Linux EXT4 FS development

Linux EXT4 FS development
 help / color / mirror / Atom feed

* [PATCH v8 4/4] ext4: convert remaining EA inode iput() calls to ext4_put_ea_inode()
From: Yun Zhou @ 2026-06-20  1:39 UTC (permalink / raw)
  To: tytso, adilger.kernel, libaokun, jack, ojaswin, ritesh.list,
	yi.zhang
  Cc: linux-ext4, linux-kernel, yun.zhou
In-Reply-To: <20260620013937.2564269-1-yun.zhou@windriver.com>

Convert all remaining iput() calls on EA inodes that execute under
xattr_sem or a jbd2 handle to use ext4_put_ea_inode().  With i_nlink>=1
and !SB_ACTIVE, a direct iput() would trigger write_inode_now() ->
s_writepages_rwsem, creating a lock ordering violation with the caller's
active jbd2 handle.

Converted sites and why defer is necessary:

- ext4_xattr_inode_inc_ref_all() cleanup: dec_ref undoes the failed
  inc_ref, but the EA inode may be shared so i_nlink remains 1.

- ext4_xattr_inode_dec_ref_all() ENOMEM fallback: ext4_expand_inode_array()
  failed before dec_ref is called, i_nlink=1, jbd2 handle active.

- ext4_xattr_inode_lookup_create() out_err: may be a cache-found inode
  where inc_ref failed; i_nlink remains 1.

- ext4_xattr_set_entry() old_ea_inode: dec_ref was called but the EA
  inode may be shared by other xattr blocks, so i_nlink remains 1.

- ext4_xattr_block_set() new block path: dec_ref drops the "extra" ref
  but inc_ref_all added another, so i_nlink stays 1.

- ext4_xattr_block_set() cleanup: on success no dec_ref was called
  (i_nlink=1); on error dec_ref may leave i_nlink=1 if shared.

- ext4_xattr_ibody_set() error path: dec_ref on a cache-found EA inode
  may leave i_nlink=1 if shared.

- ext4_xattr_ibody_set() success path: newly stored EA inode with
  i_nlink=1, just releasing the lookup reference.

- ext4_xattr_delete_inode() quota loop: iget for quota accounting only,
  no dec_ref called, i_nlink=1, jbd2 handle is active.

Direct iput() calls in pure lookup paths (ext4_xattr_inode_get,
ext4_xattr_inode_cache_find, tmp_inode in ext4_xattr_block_set) are
left unchanged -- these do not hold a jbd2 handle or xattr_sem.

Signed-off-by: Yun Zhou <yun.zhou@windriver.com>
---
 fs/ext4/xattr.c | 40 ++++++++++++++++++++++++++++------------
 1 file changed, 28 insertions(+), 12 deletions(-)

diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 79de182e22e6..08c1bdd5133d 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1079,6 +1079,13 @@ static int ext4_xattr_inode_inc_ref(handle_t *handle, struct inode *ea_inode)
 	return ext4_xattr_inode_update_ref(handle, ea_inode, 1);
 }
 
+/*
+ * Decrement on-disk reference count of an EA inode.  If refcount reaches 0,
+ * i_nlink is cleared and the inode is added to the orphan list.  Callers
+ * must use ext4_put_ea_inode() (not iput) to release the VFS reference
+ * afterwards, since iput on a nlink=0 inode triggers eviction which may
+ * deadlock if called under xattr_sem or an active jbd2 handle.
+ */
 static int ext4_xattr_inode_dec_ref(handle_t *handle, struct inode *ea_inode)
 {
 	return ext4_xattr_inode_update_ref(handle, ea_inode, -1);
@@ -1106,10 +1113,10 @@ static int ext4_xattr_inode_inc_ref_all(handle_t *handle, struct inode *parent,
 		err = ext4_xattr_inode_inc_ref(handle, ea_inode);
 		if (err) {
 			ext4_warning_inode(ea_inode, "inc ref error %d", err);
-			iput(ea_inode);
+			ext4_put_ea_inode(parent->i_sb, ea_inode);
 			goto cleanup;
 		}
-		iput(ea_inode);
+		ext4_put_ea_inode(parent->i_sb, ea_inode);
 	}
 	return 0;
 
@@ -1135,7 +1142,8 @@ static int ext4_xattr_inode_inc_ref_all(handle_t *handle, struct inode *parent,
 		if (err)
 			ext4_warning_inode(ea_inode, "cleanup dec ref error %d",
 					   err);
-		iput(ea_inode);
+		/* i_nlink may remain 1 if shared; defer for !SB_ACTIVE safety */
+		ext4_put_ea_inode(parent->i_sb, ea_inode);
 	}
 	return saved_err;
 }
@@ -1203,7 +1211,8 @@ ext4_xattr_inode_dec_ref_all(handle_t *handle, struct inode *parent,
 		if (err) {
 			ext4_warning_inode(ea_inode,
 					   "Expand inode array err=%d", err);
-			iput(ea_inode);
+			/* i_nlink=1 (dec_ref not yet called); handle active */
+			ext4_put_ea_inode(parent->i_sb, ea_inode);
 			continue;
 		}
 
@@ -1507,7 +1516,7 @@ static struct inode *ext4_xattr_inode_create(handle_t *handle,
 			if (ext4_xattr_inode_dec_ref(handle, ea_inode))
 				ext4_warning_inode(ea_inode,
 					"cleanup dec ref error %d", err);
-			iput(ea_inode);
+			ext4_put_ea_inode(inode->i_sb, ea_inode);
 			return ERR_PTR(err);
 		}
 
@@ -1617,7 +1626,8 @@ static struct inode *ext4_xattr_inode_lookup_create(handle_t *handle,
 				      ea_inode->i_ino, true /* reusable */);
 	return ea_inode;
 out_err:
-	iput(ea_inode);
+	/* May be cache-found inode with i_nlink=1 (inc_ref failed) */
+	ext4_put_ea_inode(inode->i_sb, ea_inode);
 	ext4_xattr_inode_free_quota(inode, NULL, value_len);
 	return ERR_PTR(err);
 }
@@ -1850,7 +1860,8 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
 
 	ret = 0;
 out:
-	iput(old_ea_inode);
+	/* old_ea_inode had dec_ref; may still have i_nlink=1 if shared */
+	ext4_put_ea_inode(inode->i_sb, old_ea_inode);
 	return ret;
 }
 
@@ -2152,7 +2163,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
 					ext4_warning_inode(ea_inode,
 							   "dec ref error=%d",
 							   error);
-				iput(ea_inode);
+				/* i_nlink stays 1 (inc_ref_all added a ref) */
+				ext4_put_ea_inode(inode->i_sb, ea_inode);
 				ea_inode = NULL;
 			}
 
@@ -2206,7 +2218,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
 			ext4_xattr_inode_free_quota(inode, ea_inode,
 						    i_size_read(ea_inode));
 		}
-		iput(ea_inode);
+		/* success: i_nlink=1; error+dec_ref: may still be 1 if shared */
+		ext4_put_ea_inode(inode->i_sb, ea_inode);
 	}
 	if (ce)
 		mb_cache_entry_put(ea_block_cache, ce);
@@ -2288,7 +2301,8 @@ int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
 
 			ext4_xattr_inode_free_quota(inode, ea_inode,
 						    i_size_read(ea_inode));
-			iput(ea_inode);
+			/* cache-found ea_inode may retain i_nlink=1 */
+			ext4_put_ea_inode(inode->i_sb, ea_inode);
 		}
 		return error;
 	}
@@ -2300,7 +2314,8 @@ int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
 		header->h_magic = cpu_to_le32(0);
 		ext4_clear_inode_state(inode, EXT4_STATE_XATTR);
 	}
-	iput(ea_inode);
+	/* ea_inode has i_nlink=1 (new ref just stored in xattr entry) */
+	ext4_put_ea_inode(inode->i_sb, ea_inode);
 	return 0;
 }
 
@@ -2989,7 +3004,8 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
 					continue;
 				ext4_xattr_inode_free_quota(inode, ea_inode,
 					      le32_to_cpu(entry->e_value_size));
-				iput(ea_inode);
+				/* no dec_ref yet but i_nlink=1; handle is active */
+				ext4_put_ea_inode(inode->i_sb, ea_inode);
 			}
 
 		}
-- 
2.43.0


^ permalink raw reply related

* [PATCH v8 3/4] ext4: introduce ext4_put_ea_inode() for safe deferred iput
From: Yun Zhou @ 2026-06-20  1:39 UTC (permalink / raw)
  To: tytso, adilger.kernel, libaokun, jack, ojaswin, ritesh.list,
	yi.zhang
  Cc: linux-ext4, linux-kernel, yun.zhou
In-Reply-To: <20260620013937.2564269-1-yun.zhou@windriver.com>

Calling iput() on EA inodes while holding xattr_sem or a jbd2 handle
can trigger write_inode_now() -> ext4_writepages() -> s_writepages_rwsem,
creating a lock ordering issue during mount (!SB_ACTIVE).

Add ext4_put_ea_inode() which safely releases EA inode references:
when SB_ACTIVE, it calls iput() directly (write_inode_now cannot be
triggered); during mount (!SB_ACTIVE), it queues the inode on a per-sb
lock-free llist and schedules a delayed worker (1 jiffie) to call iput()
in a clean context without holding any ext4 locks.  The delay allows
multiple inodes to accumulate before the worker runs, reducing context
switches.

Convert the iput in ext4_xattr_block_set()'s "Drop the previous xattr
block" path to use ext4_xattr_inode_array_free_deferred(), which
releases EA inodes via ext4_put_ea_inode().  This path previously called
ext4_xattr_inode_array_free() (synchronous iput) while holding xattr_sem
and a jbd2 handle.

The worker is flushed in ext4_put_super() before quota shutdown to
ensure all pending EA inode cleanup completes while quota accounting
is still active.

Signed-off-by: Yun Zhou <yun.zhou@windriver.com>
---
 fs/ext4/ext4.h  |  5 ++++
 fs/ext4/super.c |  6 ++++
 fs/ext4/xattr.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++-
 fs/ext4/xattr.h |  2 ++
 4 files changed, 86 insertions(+), 1 deletion(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 94283a991e5c..e31d60f82a63 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1706,6 +1706,11 @@ struct ext4_sb_info {
 	struct ext4_es_stats s_es_stats;
 	struct mb_cache *s_ea_block_cache;
 	struct mb_cache *s_ea_inode_cache;
+
+	/* Deferred iput for EA inodes to avoid lock ordering issues */
+	struct llist_head s_ea_inode_to_free;
+	struct delayed_work s_ea_inode_work;
+
 	spinlock_t s_es_lock ____cacheline_aligned_in_smp;
 
 	/* Journal triggers for checksum computation */
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 6a77db4d3124..5dd7c29a70bc 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1303,6 +1303,8 @@ static void ext4_put_super(struct super_block *sb)
 			 &sb->s_uuid);
 
 	ext4_unregister_li_request(sb);
+	/* Flush deferred EA inode iputs while quota is still active */
+	flush_delayed_work(&sbi->s_ea_inode_work);
 	ext4_quotas_off(sb, EXT4_MAXQUOTAS);
 
 	destroy_workqueue(sbi->rsv_conversion_wq);
@@ -5535,6 +5537,9 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
 		needs_recovery = 0;
 	}
 
+	init_llist_head(&sbi->s_ea_inode_to_free);
+	INIT_DELAYED_WORK(&sbi->s_ea_inode_work, ext4_ea_inode_work);
+
 	if (!test_opt(sb, NO_MBCACHE)) {
 		sbi->s_ea_block_cache = ext4_xattr_create_cache();
 		if (!sbi->s_ea_block_cache) {
@@ -5763,6 +5768,7 @@ failed_mount8: __maybe_unused
 	if (EXT4_SB(sb)->rsv_conversion_wq)
 		destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
 failed_mount_wq:
+	flush_delayed_work(&sbi->s_ea_inode_work);
 	ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
 	sbi->s_ea_inode_cache = NULL;
 
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 982a1f831e22..79de182e22e6 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -117,6 +117,8 @@ const struct xattr_handler * const ext4_xattr_handlers[] = {
 static int
 ext4_expand_inode_array(struct ext4_xattr_inode_array **ea_inode_array,
 			struct inode *inode);
+static void ext4_xattr_inode_array_free_deferred(struct super_block *sb,
+				struct ext4_xattr_inode_array *array);
 
 #ifdef CONFIG_LOCKDEP
 void ext4_xattr_inode_set_class(struct inode *ea_inode)
@@ -2187,7 +2189,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
 		ext4_xattr_release_block(handle, inode, bs->bh,
 					 &ea_inode_array,
 					 0 /* extra_credits */);
-		ext4_xattr_inode_array_free(ea_inode_array);
+		ext4_xattr_inode_array_free_deferred(inode->i_sb,
+						     ea_inode_array);
 	}
 	error = 0;
 
@@ -3025,6 +3028,75 @@ void ext4_xattr_inode_array_free(struct ext4_xattr_inode_array *ea_inode_array)
 	kfree(ea_inode_array);
 }
 
+static void ext4_xattr_inode_array_free_deferred(struct super_block *sb,
+				struct ext4_xattr_inode_array *array)
+{
+	int idx;
+
+	if (array == NULL)
+		return;
+
+	for (idx = 0; idx < array->count; ++idx)
+		ext4_put_ea_inode(sb, array->inodes[idx]);
+	kfree(array);
+}
+
+struct ext4_ea_iput_entry {
+	struct llist_node node;
+	struct inode *inode;
+};
+
+/*
+ * Worker function for deferred EA inode iput.  Processes all inodes queued
+ * on s_ea_inode_to_free in a context free of xattr_sem/jbd2 handle locks.
+ */
+void ext4_ea_inode_work(struct work_struct *work)
+{
+	struct ext4_sb_info *sbi = container_of(to_delayed_work(work),
+						struct ext4_sb_info,
+						s_ea_inode_work);
+	struct llist_node *node = llist_del_all(&sbi->s_ea_inode_to_free);
+	struct llist_node *next;
+
+	while (node) {
+		struct ext4_ea_iput_entry *entry = container_of(node,
+				struct ext4_ea_iput_entry, node);
+		next = node->next;
+		iput(entry->inode);
+		kfree(entry);
+		node = next;
+	}
+}
+
+/*
+ * Release a VFS reference on an EA inode after ext4_xattr_inode_dec_ref()
+ * may have set i_nlink=0.  Must be used instead of iput() in any context
+ * where xattr_sem or a jbd2 handle is held, because eviction of a nlink=0
+ * inode can acquire those same locks.
+ *
+ * When SB_ACTIVE, eviction does not call write_inode_now() so direct
+ * iput() is safe.  During mount (!SB_ACTIVE), defer to a workqueue.
+ *
+ * For EA inode references dropped without a preceding dec_ref (e.g.,
+ * lookup-only paths where nlink remains >= 1), plain iput() is safe
+ * and preferred.
+ */
+void ext4_put_ea_inode(struct super_block *sb, struct inode *inode)
+{
+	struct ext4_ea_iput_entry *entry;
+
+	if (!inode)
+		return;
+	if (sb->s_flags & SB_ACTIVE) {
+		iput(inode);
+		return;
+	}
+	entry = kmalloc(sizeof(*entry), GFP_NOFS | __GFP_NOFAIL);
+	entry->inode = inode;
+	llist_add(&entry->node, &EXT4_SB(sb)->s_ea_inode_to_free);
+	schedule_delayed_work(&EXT4_SB(sb)->s_ea_inode_work, 1);
+}
+
 /*
  * ext4_xattr_block_cache_insert()
  *
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index 1fedf44d4fb6..52074537dce5 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -190,6 +190,8 @@ extern int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
 				   struct ext4_xattr_inode_array **array,
 				   int extra_credits);
 extern void ext4_xattr_inode_array_free(struct ext4_xattr_inode_array *array);
+extern void ext4_ea_inode_work(struct work_struct *work);
+extern void ext4_put_ea_inode(struct super_block *sb, struct inode *inode);
 
 extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
 			    struct ext4_inode *raw_inode, handle_t *handle);
-- 
2.43.0


^ permalink raw reply related

* [PATCH v8 0/4] ext4: fix xattr iput deadlock with s_writepages_rwsem
From: Yun Zhou @ 2026-06-20  1:39 UTC (permalink / raw)
  To: tytso, adilger.kernel, libaokun, jack, ojaswin, ritesh.list,
	yi.zhang
  Cc: linux-ext4, linux-kernel, yun.zhou

This series fixes a circular lock dependency reported by syzbot:

  s_writepages_rwsem --> jbd2_handle --> xattr_sem --> s_writepages_rwsem

The deadlock occurs when iput() on an EA inode triggers write_inode_now()
while xattr_sem and a jbd2 handle are held.  The triggering path is
during mount-time orphan cleanup (!SB_ACTIVE) where iput_final() calls
write_inode_now() synchronously.

Patch 1 blocks the deadlock by skipping extra isize expansion when
!SB_ACTIVE -- this prevents the xattr manipulation path from being
entered during mount.

Patch 2 is a belt-and-suspenders semantic improvement: an inode under
eviction never needs extra isize expansion.

Patches 3-4 are a structural improvement using a per-sb delayed workqueue:

  Patch 3 introduces ext4_put_ea_inode(), which does direct iput() when
  SB_ACTIVE (zero overhead) and defers to a delayed worker (1 jiffie)
  when !SB_ACTIVE.  The delay allows multiple EA inodes to accumulate
  before the worker runs, reducing context switches.  It also converts
  the first call site (ext4_xattr_block_set release path).

  Patch 4 converts all remaining EA inode iput() calls that execute
  under xattr_sem or a jbd2 handle.  Direct iput() in pure lookup paths
  (ext4_xattr_inode_get, ext4_xattr_inode_cache_find, tmp_inode) is
  left unchanged since these do not hold locks.

Link: https://syzkaller.appspot.com/bug?extid=5d19358d7eb30ffb0cc5

v8:
 - Use delayed_work with 1 jiffie delay instead of immediate work,
   allowing EA inodes to batch before processing (per reviewer suggestion).
 - Move flush_delayed_work() in ext4_put_super() before ext4_quotas_off()
   to ensure deferred iputs complete while quota is still active.
 - Convert ext4_xattr_inode_inc_ref_all() main loop iput calls and
   ext4_xattr_inode_create() error path to ext4_put_ea_inode() for
   completeness -- eliminates all direct iput of EA inodes under locks.
 - Convert ext4_xattr_inode_dec_ref_all() ENOMEM fallback iput which
   is reachable during !SB_ACTIVE via ext4_evict_inode ->
   ext4_xattr_delete_inode -> ext4_xattr_release_block.
 - Add flush_delayed_work() in __ext4_fill_super() failed_mount_wq
   error path to prevent use-after-free if mount fails.

v7:
 - Replaced the deferred-iput array threading approach (v4-v6) with a
   simpler per-sb workqueue + lock-free llist design.  No function
   signature changes needed.  ext4_put_ea_inode() does direct iput when
   SB_ACTIVE (zero overhead in normal operation) and defers to the
   workqueue only during mount (!SB_ACTIVE).
 - Converted the iput in ext4_xattr_delete_inode()'s quota accounting
   loop to ext4_put_ea_inode() to eliminate a lockdep-reportable lock
   ordering violation (jbd2_handle -> iput -> s_writepages_rwsem).
 - Moved flush_work() before the if (sbi->s_journal) check in
   ext4_put_super() to cover nojournal mode.
 - Split patch 3 into two for easier review: infrastructure + first
   conversion, then remaining mechanical conversions.

v6:
 - ext4_inline_data_truncate(): use local ea_inode_array instead of
   passing NULL, freed after ext4_journal_stop().

v5:
 - Split into 3 patches for easier review.
 - Add explicit !SB_ACTIVE early-return in ext4_try_to_expand_extra_isize()
   to block ALL mount-time paths.

v4:
 - Comprehensive rewrite: thread ea_inode_array through all xattr
   functions, use __GFP_NOFAIL, set EXT4_STATE_NO_EXPAND in evict.

v3:
 - Make ext4_xattr_set_handle() take ea_inode_array output parameter.

v2:
 - Defer iput() in ext4_xattr_block_set() via ea_inode_array.

v1:
 - Set EXT4_STATE_NO_EXPAND in ext4_evict_inode().

Yun Zhou (4):
  ext4: skip extra isize expansion during mount to prevent deadlock
  ext4: set EXT4_STATE_NO_EXPAND in ext4_evict_inode
  ext4: introduce ext4_put_ea_inode() for safe deferred iput
  ext4: convert remaining EA inode iput() calls to ext4_put_ea_inode()

 fs/ext4/ext4.h  |   5 +++
 fs/ext4/inode.c |  11 +++++
 fs/ext4/super.c |   6 +++
 fs/ext4/xattr.c | 114 ++++++++++++++++++++++++++++++++++++++++++------
 fs/ext4/xattr.h |   2 +
 5 files changed, 125 insertions(+), 13 deletions(-)

-- 
2.43.0


^ permalink raw reply

* [PATCH v8 1/4] ext4: skip extra isize expansion during mount to prevent deadlock
From: Yun Zhou @ 2026-06-20  1:39 UTC (permalink / raw)
  To: tytso, adilger.kernel, libaokun, jack, ojaswin, ritesh.list,
	yi.zhang
  Cc: linux-ext4, linux-kernel, yun.zhou
In-Reply-To: <20260620013937.2564269-1-yun.zhou@windriver.com>

ext4_try_to_expand_extra_isize() is called from __ext4_mark_inode_dirty()
while holding an active jbd2 handle.  During mount (!SB_ACTIVE), the
expand path may move xattrs to external blocks and release ea_inodes via
iput().  When !SB_ACTIVE, iput() calls write_inode_now() which acquires
s_writepages_rwsem, creating a circular lock dependency:

  s_writepages_rwsem --> jbd2_handle --> xattr_sem --> s_writepages_rwsem

This can be triggered via:

  ext4_process_orphan() -> ext4_truncate() -> ext4_mark_inode_dirty()
    -> ext4_try_to_expand_extra_isize()

or:

  ext4_evict_inode() -> ext4_mark_inode_dirty()
    -> ext4_try_to_expand_extra_isize()

Skip expansion when !SB_ACTIVE.  This is a minor loss of functionality
(extra isize won't grow for these inodes during mount), which e2fsck
can resolve later if needed.

Reported-by: syzbot+5d19358d7eb30ffb0cc5@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=5d19358d7eb30ffb0cc5
Fixes: c8585c6fcaf2 ("ext4: fix races between changing inode journal mode and ext4_writepages")
Signed-off-by: Yun Zhou <yun.zhou@windriver.com>
Reviewed-by: Jan Kara <jack@suse.cz>
---
 fs/ext4/inode.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index c2c2d6ac7f3d..09dcfb6bf48c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -6458,6 +6458,16 @@ static int ext4_try_to_expand_extra_isize(struct inode *inode,
 	if (ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND))
 		return -EOVERFLOW;
 
+	/*
+	 * Skip expansion during mount (!SB_ACTIVE).  Expanding extra isize
+	 * may move xattrs to external blocks and release ea_inodes via iput.
+	 * When !SB_ACTIVE, iput triggers write_inode_now() which acquires
+	 * s_writepages_rwsem, causing a deadlock with the caller's active
+	 * jbd2 handle (lock order: s_writepages_rwsem -> jbd2_handle).
+	 */
+	if (unlikely(!(inode->i_sb->s_flags & SB_ACTIVE)))
+		return -EBUSY;
+
 	/*
 	 * In nojournal mode, we can immediately attempt to expand
 	 * the inode.  When journaled, we first need to obtain extra
-- 
2.43.0


^ permalink raw reply related

* [RFC PATCH v2 6/6] ext4/067: LUFID and encryption+casefold+dirdata
From: Artem Blagodarenko @ 2026-06-19 19:52 UTC (permalink / raw)
  To: linux-ext4; +Cc: adilger.kernel, Artem Blagodarenko
In-Reply-To: <20260619195205.29384-1-ablagodarenko@thelustrecollective.com>

From: Artem Blagodarenko <artem.blagodarenko@gmail.com>

Test ext4 LUFID  functionality in the complex combination of
encryption, casefold (case-insensitive), and dirdata features.

Verification uses 'debugfs ls -lD' to check for 'fid: or hash:' markers.
Tests also verify that case-insensitive lookups work correctly and that
encrypted file content is preserved after setting LUFID.

This test validates that LUFID works correctly when encryption and
casefold features are enabled, ensuring feature interactions don't
break the LUFID functionality.

Signed-off-by: Artem Blagodarenko <artem.blagodarenko@gmail.com>
---
 tests/ext4/067     | 137 +++++++++++++++++++++++++++++++++++++++++++++
 tests/ext4/067.out |   4 ++
 2 files changed, 141 insertions(+)

diff --git a/tests/ext4/067 b/tests/ext4/067
new file mode 100755
index 00000000..acb49c40
--- /dev/null
+++ b/tests/ext4/067
@@ -0,0 +1,137 @@
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2026 The Lustre Collective. All Rights Reserved.
+# Author: Artem Blagodarenko <ablagodarenko@thelustrecollective.com>
+#
+# FS QA Test ext4/067
+#
+# Test ext4 LUFID  with encryption and casefold.
+# EXT4_IOC_SET_LUFID is an ioctl that allows LUFID data to be set on a directory entry.
+# directory name hash is stored by EXT4 when casefold and encryption features are enabled.
+
+. ./common/preamble
+_begin_fstest auto quick encrypt casefold
+
+# Import common functions
+. ./common/filter
+. ./common/encrypt
+. ./common/casefold
+. ./common/attr
+. ./common/ext4
+
+_exclude_fs ext2
+_exclude_fs ext3
+
+_require_scratch_nocheck
+_require_scratch_encryption
+_require_scratch_casefold
+_require_command "$SET_LUFID_PROG"
+
+# Check if dirdata feature is supported (required for LUFID IOCTL)
+_require_scratch_dirdata()
+{
+	if test ! -f /sys/fs/ext4/features/dirdata ; then
+		_notrun "dirdata feature not supported by kernel (required for LUFID)"
+	fi
+
+	# Verify that mkfs supports dirdata
+	if ! $MKFS_EXT4_PROG -O dirdata -n $SCRATCH_DEV &>>$seqres.full ; then
+		_notrun "mkfs.ext4 does not support dirdata feature"
+	fi
+
+	# Verify kernel can mount filesystem with encrypt+casefold+dirdata
+	if ! _scratch_mkfs -O encrypt,casefold,dirdata &>>$seqres.full ; then
+		_notrun "failed to create filesystem with encrypt+casefold+dirdata"
+	fi
+	if ! _try_scratch_mount &>>$seqres.full ; then
+		_notrun "kernel cannot mount filesystem with encrypt+casefold+dirdata"
+	fi
+	_scratch_unmount
+}
+
+_require_scratch_dirdata
+
+# Helper to add a v2 encryption key and set policy on a directory
+_setup_encrypted_dir()
+{
+	local dir=$1
+	local raw_key=$(_generate_raw_encryption_key)
+	local keyspec=$(_add_enckey $SCRATCH_MNT "$raw_key" | awk '{print $NF}')
+	_set_encpolicy $dir $keyspec
+	_casefold_set_attr $dir
+	echo $keyspec
+}
+
+# Create a filesystem with encryption, casefold, and dirdata features
+_scratch_mkfs -O encrypt,casefold,dirdata &>>$seqres.full
+_scratch_mount
+
+# Test: Create file in encrypted+casefolded directory and set three 16-byte LUFIDs
+echo "Test: Set three 16-byte LUFIDs on file in encrypted+casefolded directory"
+mkdir $SCRATCH_MNT/encrypted_dir
+_setup_encrypted_dir $SCRATCH_MNT/encrypted_dir > /dev/null
+
+echo "encrypted content" > $SCRATCH_MNT/encrypted_dir/testfile.txt
+
+lufid_payload=$'\xde\xad\xbe\xef\x01\x02\x03\x04\x11\x12\x13\x14\x21\x22\x23\x24\xca\xfe\xba\xbe\x05\x06\x07\x08\x31\x32\x33\x34\x41\x42\x43\x44\xfe\xed\xfa\xce\x09\x0a\x0b\x0c\x51\x52\x53\x54\x61\x62\x63\x64'
+expected_fid='[0xdeadbeef01020304:0x11121314:0x21222324],[0xcafebabe05060708:0x31323334:0x41424344],[0xfeedface090a0b0c:0x51525354:0x61626364]'
+
+# Set three LUFIDs on the file at the same time (48 bytes total: three 16-byte FIDs)
+# First FID:  [part1 (8 bytes):part2 (4 bytes):part3 (4 bytes)]
+# Second FID: [part1 (8 bytes):part2 (4 bytes):part3 (4 bytes)]
+# Third FID:  [part1 (8 bytes):part2 (4 bytes):part3 (4 bytes)]
+set_lufid $SCRATCH_MNT/encrypted_dir testfile.txt "$lufid_payload" >>$seqres.full 2>&1
+if [ $? -ne 0 ]; then
+	echo "FAIL: Could not set three LUFIDs on encrypted+casefolded file"
+	_scratch_unmount
+	_check_scratch_fs
+	status=1
+	exit
+fi
+
+# Verify file is still accessible
+if [ ! -f $SCRATCH_MNT/encrypted_dir/testfile.txt ]; then
+	echo "FAIL: Encrypted file not accessible after setting LUFID"
+	_scratch_unmount
+	_check_scratch_fs
+	status=1
+	exit
+fi
+
+# Verify file content is preserved
+enc_content=$(cat $SCRATCH_MNT/encrypted_dir/testfile.txt 2>/dev/null)
+if [ "$enc_content" != "encrypted content" ]; then
+	echo "FAIL: Encrypted file content not preserved after setting LUFID"
+	_scratch_unmount
+	_check_scratch_fs
+	status=1
+	exit
+fi
+
+# Test case-insensitive lookup still works with LUFID
+if [ ! -f "$SCRATCH_MNT/encrypted_dir/TESTFILE.TXT" ]; then
+	echo "FAIL: Case-insensitive lookup doesn't work with LUFID"
+	_scratch_unmount
+	_check_scratch_fs
+	status=1
+	exit
+fi
+
+echo "Successfully set and verified three 16-byte LUFIDs on encrypted+casefolded file"
+
+# Dump directory structure to verify dirdata
+if ! _dump_dir_structure $SCRATCH_MNT/encrypted_dir testfile.txt "$expected_fid"; then
+	echo "FAIL: Stored LUFID does not match expected value"
+	_scratch_unmount
+	_check_scratch_fs
+	status=1
+	exit
+fi
+
+# Cleanup and verify filesystem
+_scratch_unmount
+_check_scratch_fs
+
+# success, all done
+status=0
+exit
diff --git a/tests/ext4/067.out b/tests/ext4/067.out
new file mode 100644
index 00000000..1c9a8126
--- /dev/null
+++ b/tests/ext4/067.out
@@ -0,0 +1,4 @@
+QA output created by 067
+Test: Set three 16-byte LUFIDs on file in encrypted+casefolded directory
+Successfully set and verified three 16-byte LUFIDs on encrypted+casefolded file
+  Directory structure of encrypted_dir: OK (dirdata verified)
-- 
2.43.7


^ permalink raw reply related

* [RFC PATCH v2 5/6] ext4/066: verify LUFID dirdata operations
From: Artem Blagodarenko @ 2026-06-19 19:52 UTC (permalink / raw)
  To: linux-ext4; +Cc: adilger.kernel, Artem Blagodarenko
In-Reply-To: <20260619195205.29384-1-ablagodarenko@thelustrecollective.com>

From: Artem Blagodarenko <artem.blagodarenko@gmail.com>

Test ext4 LUFID set/get operations on dirdata fields. This test
verifies that the EXT4_IOC_SET_LUFID ioctl can be used
to attach LUFID data to a directory entry and that `debugfs ls -lD`
can read this data.

Verification uses `debugfs ls -lD` to check for `fid:` markers,
indicating the presence of LUFID data in directory entries.

Signed-off-by: Artem Blagodarenko <artem.blagodarenko@gmail.com>
---
 tests/ext4/066     | 158 +++++++++++++++++++++++++++++++++++++++++++++
 tests/ext4/066.out |   4 ++
 2 files changed, 162 insertions(+)

diff --git a/tests/ext4/066 b/tests/ext4/066
new file mode 100755
index 00000000..ae98fb45
--- /dev/null
+++ b/tests/ext4/066
@@ -0,0 +1,158 @@
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2026 The Lustre Collective. All Rights Reserved.
+# Author: Artem Blagodarenko <ablagodarenko@thelustrecollective.com>
+#
+# FS QA Test ext4/066
+#
+# Test the ext4 dirdata feature with LUFID ioctl functionality.
+# LUFID is a 16-byte identifier that can be attached
+# to directory entries. It is used for quick access to file metadata.
+# EXT4_IOC_SET_LUFID is an ioctl that allows LUFID data to be set on a
+# directory entry.
+
+. ./common/preamble
+_begin_fstest auto quick
+
+# Import common functions
+. ./common/filter
+. ./common/ext4
+
+_exclude_fs ext2
+_exclude_fs ext3
+
+_require_scratch_nocheck
+_require_command "$SET_LUFID_PROG"
+
+# Check if dirdata feature is supported (required for LUFID IOCTL)
+_require_scratch_dirdata()
+{
+	if test ! -f /sys/fs/ext4/features/dirdata ; then
+		_notrun "dirdata feature not supported by kernel (required for LUFID)"
+	fi
+
+	# Verify that mkfs supports dirdata
+	if ! $MKFS_EXT4_PROG -O dirdata -n $SCRATCH_DEV &>>$seqres.full ; then
+		_notrun "mkfs.ext4 does not support dirdata feature"
+	fi
+
+	# Verify kernel can mount filesystem with dirdata
+	if ! _scratch_mkfs -O dirdata &>>$seqres.full ; then
+		_notrun "failed to create filesystem with dirdata"
+	fi
+	if ! _try_scratch_mount &>>$seqres.full ; then
+		_notrun "kernel cannot mount filesystem with dirdata"
+	fi
+	_scratch_unmount
+}
+
+_require_scratch_dirdata
+
+_u32_to_le_hex()
+{
+	local v=$1
+	local h
+
+	h=$(printf '%08x' "$((v & 0xffffffff))")
+	printf '%s%s%s%s' "${h:6:2}" "${h:4:2}" "${h:2:2}" "${h:0:2}"
+}
+
+_build_default_expected_fid()
+{
+	local path=$1
+	local inode
+	local version
+	local ino_hi ino_lo
+	local ver_hi ver_lo
+	local seq_hex oid_hex ver_hex
+
+	inode=$(stat -c '%i' "$path") || return 1
+	version=$(debugfs -R "stat <${inode}>" $SCRATCH_DEV 2>/dev/null | \
+		sed -n 's/.*Generation:[[:space:]]*\([0-9xa-fA-F]\+\).*/\1/p' | head -n 1)
+
+	if [ -z "$version" ]; then
+		return 1
+	fi
+
+	ino_hi=$(((inode >> 32) & 0xffffffff))
+	ino_lo=$((inode & 0xffffffff))
+	ver_lo=$((version & 0xffffffff))
+	ver_hi=$(((version >> 32) & 0xffffffff))
+
+	# Match lu_fid cast semantics: set_lufid stores u32 words in native memory
+	# order; debugfs reads lu_fid fields and prints f_seq/f_oid/f_ver.
+	seq_hex="$(_u32_to_le_hex "$ino_hi")$(_u32_to_le_hex "$ino_lo")"
+	oid_hex="$(_u32_to_le_hex "$ver_lo")"
+	ver_hex="$(_u32_to_le_hex "$ver_hi")"
+
+	printf '[0x%x:0x%x:0x%x]' "$((16#$seq_hex))" "$((16#$oid_hex))" \
+		"$((16#$ver_hex))"
+}
+
+# Create a filesystem with dirdata feature
+_scratch_mkfs -O dirdata &>>$seqres.full
+_scratch_mount
+
+# Test: Create file and set multiple 16-byte LUFIDs on the same file
+echo "Test: Set multiple 16-byte LUFIDs on the same file"
+mkdir -p $SCRATCH_MNT/lufid_test
+echo "test content" > $SCRATCH_MNT/lufid_test/testfile.txt
+
+# Set both LUFIDs on the file at the same time (32 bytes total: two 16-byte FIDs)
+# First FID:  [part1 (8 bytes):part2 (4 bytes):part3 (4 bytes)]
+# Second FID: [part1 (8 bytes):part2 (4 bytes):part3 (4 bytes)]
+set_lufid $SCRATCH_MNT/lufid_test testfile.txt >>$seqres.full
+if [ $? -ne 0 ]; then
+	echo "FAIL: Could not set both LUFIDs on testfile.txt"
+	_scratch_unmount
+	_check_scratch_fs
+	status=1
+	exit
+fi
+
+# Verify file is still accessible
+if [ ! -f $SCRATCH_MNT/lufid_test/testfile.txt ]; then
+	echo "FAIL: File not accessible after setting LUFIDs"
+	_scratch_unmount
+	_check_scratch_fs
+	status=1
+	exit
+fi
+
+# Verify file content is preserved
+content=$(cat $SCRATCH_MNT/lufid_test/testfile.txt 2>/dev/null)
+if [ "$content" != "test content" ]; then
+	echo "FAIL: File content not preserved after setting LUFIDs"
+	_scratch_unmount
+	_check_scratch_fs
+	status=1
+	exit
+fi
+
+expected_fid=$(_build_default_expected_fid $SCRATCH_MNT/lufid_test/testfile.txt)
+if [ -z "$expected_fid" ]; then
+	echo "FAIL: Could not calculate expected default LUFID"
+	_scratch_unmount
+	_check_scratch_fs
+	status=1
+	exit
+fi
+
+echo "Successfully set and verified both 16-byte LUFIDs on same file at the same time"
+
+# Dump directory structure to verify dirdata
+if ! _dump_dir_structure $SCRATCH_MNT/lufid_test testfile.txt "$expected_fid"; then
+	echo "FAIL: Stored LUFID does not match expected default value"
+	_scratch_unmount
+	_check_scratch_fs
+	status=1
+	exit
+fi
+
+# Cleanup and verify filesystem
+_scratch_unmount
+_check_scratch_fs
+
+# success, all done
+status=0
+exit
diff --git a/tests/ext4/066.out b/tests/ext4/066.out
new file mode 100644
index 00000000..4ec0fd6d
--- /dev/null
+++ b/tests/ext4/066.out
@@ -0,0 +1,4 @@
+QA output created by 066
+Test: Set multiple 16-byte LUFIDs on the same file
+Successfully set and verified both 16-byte LUFIDs on same file at the same time
+  Directory structure of lufid_test: OK (dirdata verified)
-- 
2.43.7


^ permalink raw reply related

* [RFC PATCH v2 4/6] ext4: add set_lufid utility
From: Artem Blagodarenko @ 2026-06-19 19:52 UTC (permalink / raw)
  To: linux-ext4; +Cc: adilger.kernel, Artem Blagodarenko
In-Reply-To: <20260619195205.29384-1-ablagodarenko@thelustrecollective.com>

From: Artem Blagodarenko <artem.blagodarenko@gmail.com>

EXT4 provides the EXT4_IOC_SET_LUFID ioctl, which allows setting
or replacing the LUFID dirdata field for an existing directory entry.

The set_lufid utility uses this ioctl and accepts a directory path,
directory entry name, and LUFID value as arguments.

This utility is used by subsequent dirdata-related tests.

Signed-off-by: Artem Blagodarenko <artem.blagodarenko@gmail.com>
---
 common/config   |   1 +
 src/Makefile    |   2 +-
 src/set_lufid.c | 196 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 198 insertions(+), 1 deletion(-)

diff --git a/common/config b/common/config
index 8468a600..2ca23207 100644
--- a/common/config
+++ b/common/config
@@ -210,6 +210,7 @@ export LVM_PROG="$(type -P lvm)"
 export LSATTR_PROG="$(type -P lsattr)"
 export CHATTR_PROG="$(type -P chattr)"
 export DEBUGFS_PROG="$(type -P debugfs)"
+export SET_LUFID_PROG="$(type -P set_lufid || echo $here/src/set_lufid)"
 export UUIDGEN_PROG="$(type -P uuidgen)"
 export KEYCTL_PROG="$(type -P keyctl)"
 export XZ_PROG="$(type -P xz)"
diff --git a/src/Makefile b/src/Makefile
index 31ac43b2..a1f161b0 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -36,7 +36,7 @@ LINUX_TARGETS = xfsctl bstat t_mtab getdevicesize preallo_rw_pattern_reader \
 	fscrypt-crypt-util bulkstat_null_ocount splice-test chprojid_fail \
 	detached_mounts_propagation ext4_resize t_readdir_3 splice2pipe \
 	uuid_ioctl t_snapshot_deleted_subvolume fiemap-fault min_dio_alignment \
-	rw_hint fs-monitor
+	rw_hint fs-monitor set_lufid
 
 EXTRA_EXECS = dmerror fill2attr fill2fs fill2fs_check scaleread.sh \
 	      btrfs_crc32c_forged_name.py popdir.pl popattr.py \
diff --git a/src/set_lufid.c b/src/set_lufid.c
new file mode 100644
index 00000000..2e75939a
--- /dev/null
+++ b/src/set_lufid.c
@@ -0,0 +1,196 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * set_lufid.c --- Set LUFID on a directory entry using IOCTL
+ *
+ * Copyright (C) 2026 The Lustre Collective. All Rights Reserved.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <libgen.h>
+#include <stdint.h>
+#include <linux/fs.h>
+
+/* Structure for EXT4_IOC_SET_LUFID - should match kernel definition */
+struct ext4_dirdata_fid {
+	__u8 edf_data_len;
+	char edf_data[255];
+} __attribute__((packed));
+
+struct ext4_set_lufid {
+        __u8 esl_name_len;
+        char esl_name[255 + 1];
+        union {
+                char esl_data[1 + 255];
+                struct ext4_dirdata_fid esl_edf;
+        };
+} __attribute__((packed));
+
+#ifndef EXT4_IOC_SET_LUFID
+#define EXT4_IOC_SET_LUFID	_IOW('f', 47, struct ext4_set_lufid)
+#endif
+
+static void usage(const char *prog)
+{
+	fprintf(stderr, "Usage: %s DIRECTORY FILENAME [LUFID]\n", prog);
+	fprintf(stderr, "  DIRECTORY: path to the directory containing the FILENAME\n");
+	fprintf(stderr, "  FILENAME:  name of the file to set LUFID on\n");
+	fprintf(stderr, "  LUFID: data to attach (generated if not passed)\n");
+	exit(1);
+}
+
+static void dump_lufid_payload(const char *data, int len)
+{
+	int i;
+
+	printf("LUFID payload length: %d bytes\n", len);
+	printf("LUFID payload hex:");
+	for (i = 0; i < len; i++)
+		printf(" %02x", (unsigned char)data[i]);
+	printf("\n");
+}
+
+static int build_default_lufid(int dir_fd, const char *dir_path, const char *filename,
+			       uint32_t fid[5])
+{
+	int file_fd;
+	unsigned long ver = 0;
+	struct stat st;
+
+	/* Build an IGIF-style default payload from inode + version. */
+	file_fd = openat(dir_fd, filename, O_RDONLY | O_CLOEXEC);
+	if (file_fd < 0) {
+		fprintf(stderr, "Error opening %s/%s: %s\n",
+			dir_path, filename, strerror(errno));
+		return -1;
+	}
+
+	if (fstat(file_fd, &st) < 0) {
+		fprintf(stderr, "Error stating %s/%s: %s\n",
+			dir_path, filename, strerror(errno));
+		close(file_fd);
+		return -1;
+	}
+
+	fid[0] = (uint32_t)(st.st_ino >> 32);
+	fid[1] = (uint32_t)st.st_ino;
+
+	if (ioctl(file_fd, FS_IOC_GETVERSION, &ver) < 0) {
+		fprintf(stderr, "Error calling EXT4_IOC_GETVERSION for %s/%s: %s\n",
+			dir_path, filename, strerror(errno));
+		close(file_fd);
+		return -1;
+	}
+
+	fid[2] = (uint32_t)ver;
+	fid[3] = (uint32_t)(ver >> 32);
+
+	close(file_fd);
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	const char *dir_path;
+	const char *filename;
+	const char *lufid_data;
+	DIR *dirp;
+	int fd;
+	int name_len, data_len;
+	struct ext4_set_lufid lufid_args;
+	struct stat st;
+	uint32_t fid[4];
+	int rc;
+
+	if (argc < 2) {
+		usage(argv[0]);
+	}
+
+	dir_path = argv[1];
+	filename = argv[2];
+	name_len = strlen(filename) + 1;	/* +1 for NUL terminator */
+
+	if (name_len == 0 || name_len > 255) {
+		fprintf(stderr, "Error: Invalid filename length: %d (must be 1-256 with NUL)\n",
+			name_len);
+		return 1;
+	}
+
+	/* Check if directory exists and is a directory */
+	if (stat(dir_path, &st) < 0) {
+		fprintf(stderr, "Error accessing %s: %s\n", dir_path, strerror(errno));
+		return 1;
+	}
+
+	if (!S_ISDIR(st.st_mode)) {
+		fprintf(stderr, "Error: %s is not a directory\n", dir_path);
+		return 1;
+	}
+
+	/* Open the directory */
+	dirp = opendir(dir_path);
+	if (!dirp) {
+		fprintf(stderr, "Error opening directory %s: %s\n", dir_path, strerror(errno));
+		return 1;
+	}
+
+	fd = dirfd(dirp);
+	if (fd < 0) {
+		fprintf(stderr, "Error getting directory fd: %s\n", strerror(errno));
+		closedir(dirp);
+		return 1;
+	}
+
+	if (argc > 3) {
+		lufid_data = argv[3];
+		data_len = strlen(lufid_data) + 1;
+	} else {
+		rc = build_default_lufid(fd, dir_path, filename, fid);
+		if (rc) {
+			fprintf(stderr, "Error getting lufid for %s/%s\n",
+				dir_path, filename);
+			closedir(dirp);
+			return 1;
+		}
+		lufid_data = (char *)fid;
+		data_len = sizeof(fid) + 1;
+	}
+
+	if (data_len == 0 || data_len > 255) {
+		fprintf(stderr, "Error: Invalid LUFID data length: %d (must be 1-256 with NUL)\n",
+			data_len);
+		closedir(dirp);
+		return 1;
+	}
+
+	/* Prepare LUFID data */
+	memset(&lufid_args, 0, sizeof(lufid_args));
+	lufid_args.esl_name_len = name_len;
+	lufid_args.esl_edf.edf_data_len = data_len;
+	/* Ensure filename is properly NUL-terminated at the correct position */
+	strncpy(lufid_args.esl_name, filename, name_len - 1);
+	lufid_args.esl_name[name_len - 1] = '\0';
+	memcpy(lufid_args.esl_edf.edf_data, lufid_data, data_len);
+
+	/* Call the ioctl */
+	if (ioctl(fd, EXT4_IOC_SET_LUFID, &lufid_args)) {
+		fprintf(stderr, "Error calling EXT4_IOC_SET_LUFID for %s/%s: %s\n",
+			dir_path, filename, strerror(errno));
+		closedir(dirp);
+		return 1;
+	}
+
+	closedir(dirp);
+	printf("Successfully set LUFID  on %s in directory %s\n", filename, dir_path);
+	dump_lufid_payload(lufid_args.esl_edf.edf_data, data_len);
+
+	return 0;
+}
-- 
2.43.7


^ permalink raw reply related

* [RFC PATCH v2 3/6] ext4/065 encryption + casefold + dirdata feature combination
From: Artem Blagodarenko @ 2026-06-19 19:52 UTC (permalink / raw)
  To: linux-ext4; +Cc: adilger.kernel, Artem Blagodarenko
In-Reply-To: <20260619195205.29384-1-ablagodarenko@thelustrecollective.com>

From: Artem Blagodarenko <artem.blagodarenko@gmail.com>

Test ext4 encryption + casefold + dirdata feature combination.
This test verifies that files created in directories with encryption,
case-insensitive (casefold), and dirdata attributes work correctly.
See ext4/064 for the same test WITHOUT dirdata feature.

Signed-off-by: Artem Blagodarenko <artem.blagodarenko@gmail.com>
---
 tests/ext4/065     | 217 +++++++++++++++++++++++++++++++++++++++++++++
 tests/ext4/065.out |  26 ++++++
 2 files changed, 243 insertions(+)

diff --git a/tests/ext4/065 b/tests/ext4/065
new file mode 100755
index 00000000..0ad7a382
--- /dev/null
+++ b/tests/ext4/065
@@ -0,0 +1,217 @@
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2026 The Lustre Collective.  All Rights Reserved.
+# Author: Artem Blagodarenko <ablagodarenko@thelustrecollective.com>
+#
+# FS QA Test ext4/065
+#
+# Test ext4 encryption + casefold + dirdata feature combination.
+# This test verifies that files created in directories with encryption,
+# case-insensitive (casefold), and dirdata attributes work correctly.
+# See ext4/064 for the same test WITHOUT dirdata feature.
+#
+. ./common/preamble
+_begin_fstest auto quick encrypt casefold
+
+# get standard environment and checks
+. ./common/filter
+. ./common/encrypt
+. ./common/casefold
+. ./common/attr
+. ./common/ext4
+
+_exclude_fs ext2
+_exclude_fs ext3
+
+# Check if dirdata feature is supported and can be used with encrypt+casefold
+_require_scratch_dirdata()
+{
+	if test ! -f /sys/fs/ext4/features/dirdata ; then
+		_notrun "dirdata feature not supported by kernel"
+	fi
+
+	# Debug: log e2fsprogs tool paths and versions
+	echo "=== _require_scratch_dirdata debug info ===" >> $seqres.full
+	echo "E2FSCK_PROG: $E2FSCK_PROG" >> $seqres.full
+	echo "E2FSCK_PROG resolved: $(type -P e2fsck)" >> $seqres.full
+	echo "MKFS_EXT4_PROG: $MKFS_EXT4_PROG" >> $seqres.full
+	echo "fsck -t ext4 resolves to: $(type -P fsck.ext4)" >> $seqres.full
+	$E2FSCK_PROG -V >> $seqres.full 2>&1
+	$MKFS_EXT4_PROG -V >> $seqres.full 2>&1
+	echo "=== end debug info ===" >> $seqres.full
+
+	# Also verify that mkfs supports dirdata
+	if ! $MKFS_EXT4_PROG -O dirdata -n $SCRATCH_DEV &>>$seqres.full ; then
+		_notrun "mkfs.ext4 does not support dirdata feature"
+	fi
+
+	# Verify kernel can mount filesystem with encrypt+casefold+dirdata
+	echo "Running: _scratch_mkfs -O encrypt,casefold,dirdata" >> $seqres.full
+	if ! _scratch_mkfs -O encrypt,casefold,dirdata &>>$seqres.full ; then
+		_notrun "failed to create filesystem with encrypt+casefold+dirdata"
+	fi
+	if ! _try_scratch_mount &>>$seqres.full ; then
+		_notrun "kernel cannot mount filesystem with encrypt+casefold+dirdata"
+	fi
+	_scratch_unmount
+}
+
+_require_scratch_nocheck
+_require_scratch_encryption
+_require_scratch_casefold
+_require_scratch_dirdata
+_require_xfs_io_command "set_encpolicy"
+_require_xfs_io_command "add_enckey"
+
+# Helper to add a v2 encryption key and set policy on a directory
+_setup_encrypted_casefold_dir()
+{
+	local dir=$1
+	local raw_key=$(_generate_raw_encryption_key)
+	local keyspec=$(_add_enckey $SCRATCH_MNT "$raw_key" | awk '{print $NF}')
+	_set_encpolicy $dir $keyspec
+	_casefold_set_attr $dir
+	echo $keyspec
+}
+
+# Create a filesystem with encrypt, casefold, and dirdata features
+# Debug: log e2fsprogs tool paths and versions
+echo "=== e2fsprogs debug info ===" >> $seqres.full
+echo "E2FSCK_PROG: $E2FSCK_PROG" >> $seqres.full
+echo "E2FSCK_PROG resolved: $(type -P e2fsck)" >> $seqres.full
+echo "MKFS_EXT4_PROG: $MKFS_EXT4_PROG" >> $seqres.full
+echo "FSCK_OPTIONS: $FSCK_OPTIONS" >> $seqres.full
+echo "fsck -t ext4 resolves to: $(type -P fsck.ext4)" >> $seqres.full
+$E2FSCK_PROG -V >> $seqres.full 2>&1
+$MKFS_EXT4_PROG -V >> $seqres.full 2>&1
+echo "=== end e2fsprogs debug info ===" >> $seqres.full
+
+_scratch_mkfs -O encrypt,casefold,dirdata &>>$seqres.full
+_scratch_mount
+
+# Test 1: Create an encrypted + casefolded directory and verify lookups work
+echo "Test 1: Basic encrypted casefold lookup with dirdata"
+mkdir $SCRATCH_MNT/test1
+_setup_encrypted_casefold_dir $SCRATCH_MNT/test1 > /dev/null
+
+# Create file with lowercase, lookup with uppercase
+echo "hello" > $SCRATCH_MNT/test1/testfile.txt
+if [ -f "$SCRATCH_MNT/test1/TESTFILE.TXT" ]; then
+	echo "Case-insensitive lookup works in encrypted dir"
+else
+	echo "FAIL: Case-insensitive lookup failed in encrypted dir"
+fi
+
+# Verify the exact name on disk is preserved
+if _casefold_check_exact_name "$SCRATCH_MNT/test1" "testfile.txt"; then
+	echo "Original filename preserved"
+else
+	echo "FAIL: Original filename not preserved"
+fi
+_dump_dir_structure $SCRATCH_MNT/test1
+
+# Test 2: Create files with different case variations
+echo "Test 2: Conflicting names in encrypted casefold dir"
+mkdir $SCRATCH_MNT/test2
+_setup_encrypted_casefold_dir $SCRATCH_MNT/test2 > /dev/null
+
+echo "first" > $SCRATCH_MNT/test2/MyFile.txt
+# This should fail or overwrite since "MYFILE.TXT" is equivalent
+echo "second" > $SCRATCH_MNT/test2/MYFILE.TXT 2>/dev/null
+content=$(cat $SCRATCH_MNT/test2/myfile.txt)
+echo "Content after writes: $content"
+_dump_dir_structure $SCRATCH_MNT/test2
+
+# Test 3: Unicode normalization in encrypted casefold dir
+echo "Test 3: Unicode in encrypted casefold dir"
+mkdir $SCRATCH_MNT/test3
+_setup_encrypted_casefold_dir $SCRATCH_MNT/test3 > /dev/null
+
+# Test with UTF-8 characters
+fr_file1=$(echo -e "cafe\xcc\x81.txt")
+fr_file2=$(echo -e "caf\xc3\xa9.txt")
+echo "french" > "$SCRATCH_MNT/test3/$fr_file1"
+if [ -f "$SCRATCH_MNT/test3/$fr_file2" ]; then
+	echo "Unicode normalization works in encrypted dir"
+else
+	echo "FAIL: Unicode normalization failed in encrypted dir"
+fi
+_dump_dir_structure $SCRATCH_MNT/test3
+
+# Test 4: Directory operations in encrypted casefold dir
+echo "Test 4: Directory operations in encrypted casefold dir"
+mkdir $SCRATCH_MNT/test4
+_setup_encrypted_casefold_dir $SCRATCH_MNT/test4 > /dev/null
+
+mkdir $SCRATCH_MNT/test4/SubDir
+if [ -d "$SCRATCH_MNT/test4/SUBDIR" ]; then
+	echo "Directory case-insensitive lookup works"
+else
+	echo "FAIL: Directory case-insensitive lookup failed"
+fi
+_dump_dir_structure $SCRATCH_MNT/test4
+
+# Test 5: Verify inheritance of casefold+encryption in subdirectories
+echo "Test 5: Inheritance of attributes"
+mkdir $SCRATCH_MNT/test5
+_setup_encrypted_casefold_dir $SCRATCH_MNT/test5 > /dev/null
+
+mkdir $SCRATCH_MNT/test5/child
+echo "data" > $SCRATCH_MNT/test5/child/file.txt
+if [ -f "$SCRATCH_MNT/test5/CHILD/FILE.TXT" ]; then
+	echo "Attributes inherited correctly"
+else
+	echo "FAIL: Attributes not inherited"
+fi
+_dump_dir_structure $SCRATCH_MNT/test5
+
+# Test 6: Remove and recreate with different case
+echo "Test 6: Remove and recreate with different case"
+mkdir $SCRATCH_MNT/test6
+_setup_encrypted_casefold_dir $SCRATCH_MNT/test6 > /dev/null
+
+echo "original" > $SCRATCH_MNT/test6/RemoveMe.txt
+rm $SCRATCH_MNT/test6/REMOVEME.TXT
+echo "recreated" > $SCRATCH_MNT/test6/REMOVEME.TXT
+if _casefold_check_exact_name "$SCRATCH_MNT/test6" "REMOVEME.TXT"; then
+	echo "Recreated file has new case"
+else
+	echo "FAIL: Recreated file case incorrect"
+fi
+_dump_dir_structure $SCRATCH_MNT/test6
+
+# Test 7: Hard links in encrypted casefold dir
+echo "Test 7: Hard links in encrypted casefold dir"
+mkdir $SCRATCH_MNT/test7
+_setup_encrypted_casefold_dir $SCRATCH_MNT/test7 > /dev/null
+
+echo "linkdata" > $SCRATCH_MNT/test7/original.txt
+ln $SCRATCH_MNT/test7/original.txt $SCRATCH_MNT/test7/hardlink.txt
+if [ -f "$SCRATCH_MNT/test7/HARDLINK.TXT" ]; then
+	echo "Hard link case-insensitive lookup works"
+else
+	echo "FAIL: Hard link case-insensitive lookup failed"
+fi
+_dump_dir_structure $SCRATCH_MNT/test7
+
+# Cleanup and verify filesystem
+_scratch_unmount
+
+# Dirdata analysis summary
+echo ""
+echo "Dirdata analysis:"
+echo "=== e2fsprogs debug info (before _check_scratch_fs) ===" >> $seqres.full
+echo "E2FSCK_PROG: $E2FSCK_PROG" >> $seqres.full
+echo "E2FSCK_PROG resolved: $(type -P e2fsck)" >> $seqres.full
+echo "fsck -t ext4 resolves to: $(type -P fsck.ext4)" >> $seqres.full
+echo "FSCK_OPTIONS: $FSCK_OPTIONS" >> $seqres.full
+$E2FSCK_PROG -V >> $seqres.full 2>&1
+echo "=== end e2fsprogs debug info ===" >> $seqres.full
+
+_check_scratch_fs
+
+echo "Encrypted casefold tests with dirdata completed"
+
+# success, all done
+status=0
+exit
diff --git a/tests/ext4/065.out b/tests/ext4/065.out
new file mode 100644
index 00000000..c1316430
--- /dev/null
+++ b/tests/ext4/065.out
@@ -0,0 +1,26 @@
+QA output created by 065
+Test 1: Basic encrypted casefold lookup with dirdata
+Case-insensitive lookup works in encrypted dir
+Original filename preserved
+  Directory structure of test1: OK (dirdata verified)
+Test 2: Conflicting names in encrypted casefold dir
+Content after writes: second
+  Directory structure of test2: OK (dirdata verified)
+Test 3: Unicode in encrypted casefold dir
+Unicode normalization works in encrypted dir
+  Directory structure of test3: OK (dirdata verified)
+Test 4: Directory operations in encrypted casefold dir
+Directory case-insensitive lookup works
+  Directory structure of test4: OK (dirdata verified)
+Test 5: Inheritance of attributes
+Attributes inherited correctly
+  Directory structure of test5: OK (dirdata verified)
+Test 6: Remove and recreate with different case
+Recreated file has new case
+  Directory structure of test6: OK (dirdata verified)
+Test 7: Hard links in encrypted casefold dir
+Hard link case-insensitive lookup works
+  Directory structure of test7: OK (dirdata verified)
+
+Dirdata analysis:
+Encrypted casefold tests with dirdata completed
-- 
2.43.7


^ permalink raw reply related

* [RFC PATCH v2 2/6] ext4/064 encryption + casefold feature combination WITHOUT dirdata
From: Artem Blagodarenko @ 2026-06-19 19:52 UTC (permalink / raw)
  To: linux-ext4; +Cc: adilger.kernel, Artem Blagodarenko
In-Reply-To: <20260619195205.29384-1-ablagodarenko@thelustrecollective.com>

From: Artem Blagodarenko <artem.blagodarenko@gmail.com>

This test verifies that files created in directories with both
encryption and case-insensitive (casefold) attributes work correctly.
See ext4/065 for the same test WITH dirdata feature enabled.

Signed-off-by: Artem Blagodarenko <artem.blagodarenko@gmail.com>
---
 tests/ext4/064     | 153 +++++++++++++++++++++++++++++++++++++++++++++
 tests/ext4/064.out |  17 +++++
 2 files changed, 170 insertions(+)

diff --git a/tests/ext4/064 b/tests/ext4/064
new file mode 100755
index 00000000..53450927
--- /dev/null
+++ b/tests/ext4/064
@@ -0,0 +1,153 @@
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2026 The Lustre Collective.  All Rights Reserved.
+# Author: Artem Blagodarenko <ablagodarenko@thelustrecollective.com>
+#
+# FS QA Test ext4/064
+#
+# Test ext4 encryption + casefold feature combination WITHOUT dirdata.
+# This test verifies that files created in directories with both
+# encryption and case-insensitive (casefold) attributes work correctly.
+# See ext4/065 for the same test WITH dirdata feature enabled.
+#
+. ./common/preamble
+_begin_fstest auto quick encrypt casefold
+
+# get standard environment and checks
+. ./common/filter
+. ./common/encrypt
+. ./common/casefold
+. ./common/attr
+
+_exclude_fs ext2
+_exclude_fs ext3
+
+_require_scratch_nocheck
+_require_scratch_encryption
+_require_scratch_casefold
+_require_xfs_io_command "set_encpolicy"
+_require_xfs_io_command "add_enckey"
+
+# Helper to add a v2 encryption key and set policy on a directory
+_setup_encrypted_casefold_dir()
+{
+	local dir=$1
+	local raw_key=$(_generate_raw_encryption_key)
+	local keyspec=$(_add_enckey $SCRATCH_MNT "$raw_key" | awk '{print $NF}')
+	_set_encpolicy $dir $keyspec
+	_casefold_set_attr $dir
+	echo $keyspec
+}
+
+# Create a filesystem with both encrypt and casefold features
+_scratch_mkfs -O encrypt,casefold &>>$seqres.full
+_scratch_mount
+
+# Test 1: Create an encrypted + casefolded directory and verify lookups work
+echo "Test 1: Basic encrypted casefold lookup"
+mkdir $SCRATCH_MNT/test1
+_setup_encrypted_casefold_dir $SCRATCH_MNT/test1 > /dev/null
+
+# Create file with lowercase, lookup with uppercase
+echo "hello" > $SCRATCH_MNT/test1/testfile.txt
+if [ -f "$SCRATCH_MNT/test1/TESTFILE.TXT" ]; then
+	echo "Case-insensitive lookup works in encrypted dir"
+else
+	echo "FAIL: Case-insensitive lookup failed in encrypted dir"
+fi
+
+# Verify the exact name on disk is preserved
+if _casefold_check_exact_name "$SCRATCH_MNT/test1" "testfile.txt"; then
+	echo "Original filename preserved"
+else
+	echo "FAIL: Original filename not preserved"
+fi
+
+# Test 2: Create files with different case variations
+echo "Test 2: Conflicting names in encrypted casefold dir"
+mkdir $SCRATCH_MNT/test2
+_setup_encrypted_casefold_dir $SCRATCH_MNT/test2 > /dev/null
+
+echo "first" > $SCRATCH_MNT/test2/MyFile.txt
+# This should fail or overwrite since "MYFILE.TXT" is equivalent
+echo "second" > $SCRATCH_MNT/test2/MYFILE.TXT 2>/dev/null
+content=$(cat $SCRATCH_MNT/test2/myfile.txt)
+echo "Content after writes: $content"
+
+# Test 3: Unicode normalization in encrypted casefold dir
+echo "Test 3: Unicode in encrypted casefold dir"
+mkdir $SCRATCH_MNT/test3
+_setup_encrypted_casefold_dir $SCRATCH_MNT/test3 > /dev/null
+
+# Test with UTF-8 characters
+fr_file1=$(echo -e "cafe\xcc\x81.txt")
+fr_file2=$(echo -e "caf\xc3\xa9.txt")
+echo "french" > "$SCRATCH_MNT/test3/$fr_file1"
+if [ -f "$SCRATCH_MNT/test3/$fr_file2" ]; then
+	echo "Unicode normalization works in encrypted dir"
+else
+	echo "FAIL: Unicode normalization failed in encrypted dir"
+fi
+
+# Test 4: Directory operations in encrypted casefold dir
+echo "Test 4: Directory operations in encrypted casefold dir"
+mkdir $SCRATCH_MNT/test4
+_setup_encrypted_casefold_dir $SCRATCH_MNT/test4 > /dev/null
+
+mkdir $SCRATCH_MNT/test4/SubDir
+if [ -d "$SCRATCH_MNT/test4/SUBDIR" ]; then
+	echo "Directory case-insensitive lookup works"
+else
+	echo "FAIL: Directory case-insensitive lookup failed"
+fi
+
+# Test 5: Verify inheritance of casefold+encryption in subdirectories
+echo "Test 5: Inheritance of attributes"
+mkdir $SCRATCH_MNT/test5
+_setup_encrypted_casefold_dir $SCRATCH_MNT/test5 > /dev/null
+
+mkdir $SCRATCH_MNT/test5/child
+echo "data" > $SCRATCH_MNT/test5/child/file.txt
+if [ -f "$SCRATCH_MNT/test5/CHILD/FILE.TXT" ]; then
+	echo "Attributes inherited correctly"
+else
+	echo "FAIL: Attributes not inherited"
+fi
+
+# Test 6: Remove and recreate with different case
+echo "Test 6: Remove and recreate with different case"
+mkdir $SCRATCH_MNT/test6
+_setup_encrypted_casefold_dir $SCRATCH_MNT/test6 > /dev/null
+
+echo "original" > $SCRATCH_MNT/test6/RemoveMe.txt
+rm $SCRATCH_MNT/test6/REMOVEME.TXT
+echo "recreated" > $SCRATCH_MNT/test6/REMOVEME.TXT
+if _casefold_check_exact_name "$SCRATCH_MNT/test6" "REMOVEME.TXT"; then
+	echo "Recreated file has new case"
+else
+	echo "FAIL: Recreated file case incorrect"
+fi
+
+# Test 7: Hard links in encrypted casefold dir
+echo "Test 7: Hard links in encrypted casefold dir"
+mkdir $SCRATCH_MNT/test7
+_setup_encrypted_casefold_dir $SCRATCH_MNT/test7 > /dev/null
+
+echo "linkdata" > $SCRATCH_MNT/test7/original.txt
+ln $SCRATCH_MNT/test7/original.txt $SCRATCH_MNT/test7/hardlink.txt
+if [ -f "$SCRATCH_MNT/test7/HARDLINK.TXT" ]; then
+	echo "Hard link case-insensitive lookup works"
+else
+	echo "FAIL: Hard link case-insensitive lookup failed"
+fi
+
+# Cleanup and verify filesystem
+_scratch_unmount
+_check_scratch_fs
+
+echo "Encrypted casefold tests completed"
+
+# success, all done
+status=0
+exit
diff --git a/tests/ext4/064.out b/tests/ext4/064.out
new file mode 100644
index 00000000..0197e51e
--- /dev/null
+++ b/tests/ext4/064.out
@@ -0,0 +1,17 @@
+QA output created by 064
+Test 1: Basic encrypted casefold lookup
+Case-insensitive lookup works in encrypted dir
+Original filename preserved
+Test 2: Conflicting names in encrypted casefold dir
+Content after writes: second
+Test 3: Unicode in encrypted casefold dir
+Unicode normalization works in encrypted dir
+Test 4: Directory operations in encrypted casefold dir
+Directory case-insensitive lookup works
+Test 5: Inheritance of attributes
+Attributes inherited correctly
+Test 6: Remove and recreate with different case
+Recreated file has new case
+Test 7: Hard links in encrypted casefold dir
+Hard link case-insensitive lookup works
+Encrypted casefold tests completed
-- 
2.43.7


^ permalink raw reply related

* [RFC PATCH v2 1/6] ext4: add common helper to check whether dirdata is applied
From: Artem Blagodarenko @ 2026-06-19 19:52 UTC (permalink / raw)
  To: linux-ext4; +Cc: adilger.kernel, Artem Blagodarenko
In-Reply-To: <20260619195205.29384-1-ablagodarenko@thelustrecollective.com>

From: Artem Blagodarenko <artem.blagodarenko@gmail.com>

Add a helper that lists a directory with the -lD flags and checks
whether any dirdata fields exist.

This helper will be used by subsequent dirdata-related patches.

Signed-off-by: Artem Blagodarenko <artem.blagodarenko@gmail.com>
---
 common/ext4 | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/common/ext4 b/common/ext4
index a2ce456d..47c31db9 100644
--- a/common/ext4
+++ b/common/ext4
@@ -242,3 +242,37 @@ _ext4_get_inum_iflags() {
 	debugfs -R "stat <${inumber}>" "${dev}" 2> /dev/null | \
 			sed -n 's/^.*Flags: \([0-9a-fx]*\).*$/\1/p'
 }
+
+# Helper to dump directory structure with hash info (requires dirdata feature)
+# This is useful for verifying that dirdata is storing hash information
+_dump_dir_structure()
+{
+	local dir=$1
+	local dir_name=$(basename $dir)
+	local expected=$3
+
+	local debugfs_output=$({
+		echo "cd $dir_name"
+		echo "ls -lD ."
+		echo "quit"
+	} | debugfs $SCRATCH_DEV 2>/dev/null)
+
+	# DEBUG: uncomment to see full debugfs output
+	# echo "  [DEBUG] debugfs output for $dir_name:"
+	# echo "$debugfs_output" | grep -v "^debugfs:" | sed 's/^/    /'
+
+	# Check if hash data is present (encryption+casefold+dirdata case)
+	# or if fid data is present (dirdata+encryption or dirdata only case)
+	if echo "$debugfs_output" | grep -q "fid="; then
+		local fid_value=$(echo "$debugfs_output" | grep -o "fid=[^ ]*" | head -1 | sed 's/^fid=//')
+		if [ "$fid_value" = "$expected" ]; then
+			echo "  Directory structure of $dir_name: OK (dirdata verified)"
+		else
+			echo "  Directory structure of $dir_name: FAILED (fid mismatch: got '$fid_value', expected '$expected')"
+		fi
+	elif echo "$debugfs_output" | grep -q "hash="; then
+		echo "  Directory structure of $dir_name: OK (dirdata verified)"
+	else
+		echo "  Directory structure of $dir_name: FAILED (no dirdata)"
+	fi
+}
-- 
2.43.7


^ permalink raw reply related

* [RFC PATCH v2 0/6] ext4: tests for the dirdata feature (encryption+casefold, LUFID)
From: Artem Blagodarenko @ 2026-06-19 19:51 UTC (permalink / raw)
  To: linux-ext4; +Cc: adilger.kernel, Artem Blagodarenko

These tests cover the ext4 "dirdata" feature (storing extra metadata
in directory entries beyond the file name), sent separately from the
kernel and e2fsprogs dirdata patch series for reference and review.

ext4/064 and ext4/065 verify that encryption and case-insensitive
(casefold) directories continue to work both without and with
dirdata enabled. ext4/066 and ext4/067 exercise the LUFID (Locally
Unique File ID) use of dirdata via a new EXT4_IOC_SET_LUFID ioctl,
using a small set_lufid helper utility added in this series.

Changes in v2:
- Ted Ts'o pointed out that the v1 tests exercised the
  encryption+casefold/dirdata feature combination without actually
  validating that the encrypted hash was stored as a dirdata
  attribute.
  ext4/064 and ext4/065 now use the new _dump_dir_structure helper
  (debugfs-based) to dump and check the on-disk directory entry
  content, confirming the hash is actually present as dirdata rather
  than just exercising the feature combination.
- Added ext4/066 and ext4/067, plus a new common/ext4 helper and the
  src/set_lufid.c utility, to directly verify LUFID data is correctly
  stored in and retrieved from dirdata via EXT4_IOC_SET_LUFID, including
  in combination with encryption+casefold.

Artem Blagodarenko (6):
  ext4: add common helper to check whether dirdata is applied
  ext4/064 encryption + casefold feature combination WITHOUT dirdata
  ext4/065 encryption + casefold + dirdata feature combination
  ext4: add set_lufid utility
  ext4/066: verify LUFID dirdata operations
  ext4/067: LUFID and encryption+casefold+dirdata

 common/config      |   1 +
 common/ext4        |  34 +++++++
 src/Makefile       |   2 +-
 src/set_lufid.c    | 196 ++++++++++++++++++++++++++++++++++++++++
 tests/ext4/064     | 153 ++++++++++++++++++++++++++++++++
 tests/ext4/064.out |  17 ++++
 tests/ext4/065     | 217 +++++++++++++++++++++++++++++++++++++++++++++
 tests/ext4/065.out |  26 ++++++
 tests/ext4/066     | 158 +++++++++++++++++++++++++++++++++
 tests/ext4/066.out |   4 +
 tests/ext4/067     | 137 ++++++++++++++++++++++++++++
 tests/ext4/067.out |   4 +
 12 files changed, 948 insertions(+), 1 deletion(-)
 create mode 100644 src/set_lufid.c
 create mode 100755 tests/ext4/064
 create mode 100644 tests/ext4/064.out
 create mode 100755 tests/ext4/065
 create mode 100644 tests/ext4/065.out
 create mode 100755 tests/ext4/066
 create mode 100644 tests/ext4/066.out
 create mode 100755 tests/ext4/067
 create mode 100644 tests/ext4/067.out

-- 
2.43.7


^ permalink raw reply

* [PATCH v3 10/10] ext4: Add EXT4_IOC_SET_LUFID ioctl for setting LUFID on directory entries
From: Artem Blagodarenko @ 2026-06-19 19:10 UTC (permalink / raw)
  To: linux-ext4; +Cc: adilger.kernel, Artem Blagodarenko, Andreas Dilger
In-Reply-To: <20260619191022.27008-1-ablagodarenko@thelustrecollective.com>

From: Artem Blagodarenko <artem.blagodarenko@gmail.com>

Add a new ioctl command that allows setting LUFID (Locally Unique File ID)
data on existing directory entries. This includes:

- ext4_ioctl_set_lufid(): ioctl handler that validates parameters and
  calls the underlying implementation
- ext4_set_direntry_lufid(): Core function that performs the operation by:
  * Looking up the target directory entry
  * Retrieving the associated inode
  * Deleting the old entry and re-creating it with LUFID data attached

This implementation requires the dirdata feature to be enabled on the
filesystem and properly handles transactions and inode locking to ensure
consistency.

Signed-off-by: Artem Blagodarenko artem.blagodarenko@gmail.com
Reviewed-by: Andreas Dilger <adilger@dilger.ca>
---
 fs/ext4/ext4.h            |  15 ++++
 fs/ext4/ioctl.c           |  62 +++++++++++++++++
 fs/ext4/namei.c           | 143 ++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/ext4.h |  13 ++++
 4 files changed, 233 insertions(+)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 543c8e87f207..0a8b9116dc7b 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1227,6 +1227,7 @@ struct ext4_inode_info {
 #ifdef CONFIG_FS_ENCRYPTION
 	struct fscrypt_inode_info *i_crypt_info;
 #endif
+	void *i_dirdata;
 };
 
 /*
@@ -2601,6 +2602,18 @@ struct ext4_dirent_hash {
 	struct ext4_dir_entry_hash	dh_hash;
 } __packed;
 
+static inline
+struct ext4_dirent_fid *ext4_dentry_get_fid(struct super_block *sb,
+					    struct ext4_dentry_param *p)
+{
+	if (!ext4_has_feature_dirdata(sb))
+		return NULL;
+	if (p && p->edp_magic == EXT4_LUFID_MAGIC)
+		return &p->edp_dfid;
+
+	return NULL;
+}
+
 #define EXT4_FT_DIR_CSUM	0xDE
 
 /*
@@ -3302,6 +3315,8 @@ static inline int ext4_init_new_dir(handle_t *handle, struct inode *dir,
 }
 extern int ext4_dirblock_csum_verify(struct inode *inode,
 				     struct buffer_head *bh);
+extern int ext4_dirdata_set_lufid(struct inode *dir, const char *filename,
+			   int namelen, struct ext4_dentry_param *edp);
 extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
 				__u32 start_minor_hash, __u32 *next_hash);
 extern int ext4_search_dir(struct buffer_head *bh,
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index c8387e6a2c6e..19d6588092d3 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -1535,6 +1535,65 @@ static int ext4_ioctl_set_tune_sb(struct file *filp,
 	return ret;
 }
 
+/*
+ * ext4_ioctl_set_lufid() - Set LUFID on a directory entry
+ * @filp:	file pointer (parent directory)
+ * @arg:	pointer to ext4_set_lufid structure with filename and LUFID data
+ *
+ * This ioctl allows setting LUFID data on an existing
+ * directory entry. It is called on the parent directory with a filename and
+ * LUFID data.
+ */
+static long ext4_ioctl_set_lufid(struct file *filp, unsigned long arg)
+{
+	struct inode *dir = file_inode(filp);
+	struct ext4_set_lufid lufid_args;
+	struct {
+		__u32 edp_magic;
+		struct ext4_dirent_data_header df_header;
+		char df_fid[255];
+	} edp;
+	int err;
+
+	/* Check if parent is a directory */
+	if (!S_ISDIR(dir->i_mode))
+		return -ENOTDIR;
+
+	/* Copy arguments from user space */
+	if (copy_from_user(&lufid_args, (struct ext4_set_lufid __user *)arg,
+			   sizeof(lufid_args)))
+		return -EFAULT;
+
+	/* Validate parameters */
+	if (lufid_args.esl_name_len == 0 || lufid_args.esl_name_len > EXT4_NAME_LEN)
+		return -EINVAL;
+
+	if (lufid_args.esl_data_len == 0 || lufid_args.esl_data_len > 255)
+		return -EINVAL;
+
+	/* Ensure filename is NUL-terminated and unmodified */
+	if (lufid_args.esl_name[lufid_args.esl_name_len - 1] != '\0')
+		return -EINVAL;
+
+	/* Prepare the dentry param struct with LUFID data */
+	edp.edp_magic = EXT4_LUFID_MAGIC;
+	edp.df_header.ddh_length = lufid_args.esl_data_len;
+	memcpy(edp.df_fid, lufid_args.esl_data, lufid_args.esl_data_len);
+
+	/* Want write access */
+	err = mnt_want_write_file(filp);
+	if (err)
+		return err;
+
+	/* Call the helper function to do the actual work */
+	err = ext4_dirdata_set_lufid(dir, lufid_args.esl_name,
+				    lufid_args.esl_name_len - 1,
+				    (struct ext4_dentry_param *)&edp);
+
+	mnt_drop_write_file(filp);
+	return err;
+}
+
 static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
 	struct inode *inode = file_inode(filp);
@@ -1921,6 +1980,8 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 					      (void __user *)arg);
 	case EXT4_IOC_SET_TUNE_SB_PARAM:
 		return ext4_ioctl_set_tune_sb(filp, (void __user *)arg);
+	case EXT4_IOC_SET_LUFID:
+		return ext4_ioctl_set_lufid(filp, arg);
 	default:
 		return -ENOTTY;
 	}
@@ -2000,6 +2061,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	case FS_IOC_SETFSLABEL:
 	case EXT4_IOC_GETFSUUID:
 	case EXT4_IOC_SETFSUUID:
+	case EXT4_IOC_SET_LUFID:
 		break;
 	default:
 		return -ENOIOCTLCMD;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index c8fa8d4940c2..29d5b70b84bf 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2282,6 +2282,8 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
 	if (ext4_has_feature_metadata_csum(inode->i_sb))
 		csum_size = sizeof(struct ext4_dir_entry_tail);
 
+	dfid = ext4_dentry_get_fid(inode->i_sb,
+		(struct ext4_dentry_param *)EXT4_I(inode)->i_dirdata);
 	if (!de) {
 		if (dfid)
 			dlen = dfid->df_header.ddh_length;
@@ -2628,6 +2630,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
 {
 	struct inode *dir = d_inode(dentry->d_parent);
 
+	EXT4_I(inode)->i_dirdata = dentry->d_fsdata;
 	if (fscrypt_is_nokey_name(dentry))
 		return -ENOKEY;
 	return __ext4_add_entry(handle, dir, &dentry->d_name, inode);
@@ -4389,6 +4392,146 @@ static int ext4_rename2(struct mnt_idmap *idmap,
 	return ext4_rename(idmap, old_dir, old_dentry, new_dir, new_dentry, flags);
 }
 
+/*
+ * ext4_dirdata_set_lufid() - Set LUFID data on an existing directory entry
+ * @dir:        parent directory inode
+ * @filename:   name of the file in the directory
+ * @namelen:    length of filename
+ * @edp:        pointer to initialized dentry param with LUFID data
+ *
+ * This function finds an existing directory entry, deletes it, and re-creates it
+ * with LUFID data attached. Used by the EXT4_IOC_SET_LUFID ioctl.
+ *
+ * Returns 0 on success, negative error code on failure.
+ */
+int ext4_dirdata_set_lufid(struct inode *dir, const char *filename,
+			    int namelen, struct ext4_dentry_param *edp)
+{
+	struct super_block *sb = dir->i_sb;
+	struct ext4_filename fname;
+	struct ext4_dir_entry_2 *de = NULL;
+	struct buffer_head *bh = NULL;
+	struct inode *inode = NULL;
+	handle_t *handle = NULL;
+	struct qstr d_name;
+	void *old_dirdata = NULL;
+	int err = 0;
+
+	/* Check if dirdata feature is enabled */
+	if (!ext4_has_feature_dirdata(sb))
+		return -ENOTSUPP;
+
+	if (namelen > EXT4_NAME_LEN)
+               return -ENAMETOOLONG;
+        if (namelen != strnlen(filename, namelen + 1))
+               return -EINVAL;
+
+	/* Setup the filename for lookup */
+	d_name.name = filename;
+	d_name.len = namelen;
+
+	/* Lookup the filename in the directory */
+	err = ext4_fname_setup_filename(dir, &d_name, 0, &fname);
+	if (err)
+		goto out_free;
+
+	bh = ext4_find_entry(dir, &d_name, &de, NULL);
+	if (!bh) {
+		err = -ENOENT;
+		goto out_free;
+	}
+
+	/* Get the inode number from the directory entry */
+	inode = ext4_iget(sb, le32_to_cpu(de->inode), EXT4_IGET_NORMAL);
+	if (IS_ERR(inode)) {
+		err = PTR_ERR(inode);
+		inode = NULL;
+		goto out_brelse;
+	}
+
+	/* Start a transaction */
+	handle = ext4_journal_start(dir, EXT4_HT_DIR, 
+				     2 * EXT4_DATA_TRANS_BLOCKS(sb) + 
+				     EXT4_INDEX_EXTRA_TRANS_BLOCKS);
+	if (IS_ERR(handle)) {
+		err = PTR_ERR(handle);
+		handle = NULL;
+		goto out_iput;
+	}
+
+	inode_lock(dir);
+
+	/* Delete the old entry */
+	err = ext4_delete_entry(handle, dir, de, bh);
+	if (err)
+		goto out_unlock;
+
+	brelse(bh);
+	bh = NULL;
+
+	/* Re-add the entry with LUFID data
+	 * We set i_dirdata before adding so the entry can include it
+	 */
+	old_dirdata = EXT4_I(inode)->i_dirdata;
+	EXT4_I(inode)->i_dirdata = edp;
+
+	/* Use ext4_add_entry() to properly handle hash table management
+	 * and block splitting, just like rename does. This ensures the entry
+	 * is placed in the correct hash block and avoids breaking dirhash.
+	 */
+	{
+		struct dentry parent_dentry = { .d_inode = dir };
+		struct dentry new_dentry = {
+			.d_name = d_name,
+			.d_parent = &parent_dentry,
+			.d_inode = inode,  /* Same inode (in-place update) */
+			.d_fsdata = edp,   /* required */
+		};
+		err = ext4_add_entry(handle, &new_dentry, inode);
+	}
+	EXT4_I(inode)->i_dirdata = old_dirdata;
+
+	if (err) {
+		/*
+		 * The original entry was already removed above and the
+		 * re-add with the new LUFID failed; try to restore the
+		 * original entry so the inode isn't left without any
+		 * directory entry pointing at it.
+		 */
+		struct dentry parent_dentry = { .d_inode = dir };
+		struct dentry orig_dentry = {
+			.d_name = d_name,
+			.d_parent = &parent_dentry,
+			.d_inode = inode,
+		};
+		int rollback_err = ext4_add_entry(handle, &orig_dentry, inode);
+
+		if (rollback_err)
+			EXT4_ERROR_INODE(dir,
+				"Failed to set LUFID on '%.*s' (err=%d) and failed to restore the original directory entry (err=%d); inode %llu may be orphaned",
+				namelen, filename, err, rollback_err,
+				inode->i_ino);
+		goto out_unlock;
+	}
+
+	/* Update inode times */
+	inode_set_ctime_current(dir);
+	inode_inc_iversion(dir);
+	ext4_mark_inode_dirty(handle, dir);
+
+out_unlock:
+	inode_unlock(dir);
+	ext4_journal_stop(handle);
+out_iput:
+	iput(inode);
+out_brelse:
+	brelse(bh);
+out_free:
+	ext4_fname_free_filename(&fname);
+
+	return err;
+}
+
 /*
  * directories can handle most operations...
  */
diff --git a/include/uapi/linux/ext4.h b/include/uapi/linux/ext4.h
index 9c683991c32f..9fab8978843b 100644
--- a/include/uapi/linux/ext4.h
+++ b/include/uapi/linux/ext4.h
@@ -35,6 +35,7 @@
 #define EXT4_IOC_SETFSUUID		_IOW('f', 44, struct fsuuid)
 #define EXT4_IOC_GET_TUNE_SB_PARAM	_IOR('f', 45, struct ext4_tune_sb_params)
 #define EXT4_IOC_SET_TUNE_SB_PARAM	_IOW('f', 46, struct ext4_tune_sb_params)
+#define EXT4_IOC_SET_LUFID		_IOW('f', 47, struct ext4_set_lufid)
 
 #define EXT4_IOC_SHUTDOWN _IOR('X', 125, __u32)
 
@@ -92,6 +93,18 @@ struct move_extent {
 	__u64 moved_len;	/* moved block length */
 };
 
+/*
+ * Structure for EXT4_IOC_SET_LUFID
+ * Sets LUFID on a directory entry
+ * Called on parent directory with filename and LUFID data as arguments
+ */
+struct ext4_set_lufid {
+	__u8 esl_name_len;	/* length of filename */
+	char  esl_name[255 + 1]; /* filename (NUL-terminated) */
+	__u8 esl_data_len;	/* length of LUFID data */
+	char  esl_data[255]; /* LUFID data (raw bytes) */
+};
+
 /*
  * Flags used by EXT4_IOC_SHUTDOWN
  */
-- 
2.43.7


^ permalink raw reply related

* [PATCH v3 09/10] ext4: add dirdata set/get helpers
From: Artem Blagodarenko @ 2026-06-19 19:10 UTC (permalink / raw)
  To: linux-ext4; +Cc: adilger.kernel, Artem Blagodarenko, Andreas Dilger
In-Reply-To: <20260619191022.27008-1-ablagodarenko@thelustrecollective.com>

From: Artem Blagodarenko <artem.blagodarenko@gmail.com>

Add helpers to set and retrieve dirdata payload and hook them up at
the appropriate call sites.

Enable dirdata for casefold+encryption hashes and storing unique
128-bit file identifier in the directory entry for testing.

Signed-off-by: Artem Blagodarenko <artem.blagodarenko@gmail.com>
Reviewed-by: Andreas Dilger <adilger@dilger.ca>
---
 foofile.txt      |   0
 fs/ext4/ext4.h   |   4 +
 fs/ext4/inline.c |   6 +-
 fs/ext4/namei.c  | 201 +++++++++++++++++++++++++++++++++++++++++------
 4 files changed, 181 insertions(+), 30 deletions(-)

diff --git a/foofile.txt b/foofile.txt
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index d4ec450c05f2..543c8e87f207 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -3874,6 +3874,10 @@ extern int __ext4_unlink(struct inode *dir, const struct qstr *d_name,
 			 struct inode *inode, struct dentry *dentry);
 extern int __ext4_link(struct inode *dir, struct inode *inode,
 		       const struct qstr *d_name, struct dentry *dentry);
+extern unsigned char ext4_dirdata_get(struct ext4_dir_entry_2 *de,
+				      struct inode *dir,
+				      struct ext4_dirent_fid  *lufid,
+				      struct dx_hash_info *hinfo);
 
 #define S_SHIFT 12
 static const unsigned char ext4_type_by_mode[(S_IFMT >> S_SHIFT) + 1] = {
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index c57a8ebe4f94..71c395c9a162 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -1346,10 +1346,8 @@ int ext4_inlinedir_to_tree(struct file *dir_file,
 			}
 		}
 
-		if (ext4_hash_in_dirent(dir)) {
-			hinfo->hash = EXT4_DIRENT_HASH(de);
-			hinfo->minor_hash = EXT4_DIRENT_MINOR_HASH(de);
-		} else {
+		if (!(ext4_dirdata_get(de, dir, NULL, hinfo) &
+							EXT4_DIRENT_CFHASH)) {
 			err = ext4fs_dirhash(dir, de->name, de->name_len, hinfo);
 			if (err) {
 				ret = err;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index e230fa1094ff..c8fa8d4940c2 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1097,22 +1097,22 @@ static int htree_dirblock_to_tree(struct file *dir_file,
 			/* silently ignore the rest of the block */
 			break;
 		}
-		if (ext4_hash_in_dirent(dir)) {
-			if (de->name_len && de->inode) {
-				hinfo->hash = EXT4_DIRENT_HASH(de);
-				hinfo->minor_hash = EXT4_DIRENT_MINOR_HASH(de);
-			} else {
-				hinfo->hash = 0;
-				hinfo->minor_hash = 0;
-			}
+		if (de->name_len && de->inode) {
+			/* check for saved hash first, or generate it from name */
+			if (!(ext4_dirdata_get(de, dir, NULL, hinfo) &
+			      EXT4_DIRENT_CFHASH)) {
+				err = ext4fs_dirhash(dir, de->name,
+						     de->name_len, hinfo);
+				if (err < 0) {
+					count = err;
+					goto errout;
+				}
+			 }
 		} else {
-			err = ext4fs_dirhash(dir, de->name,
-					     de->name_len, hinfo);
-			if (err < 0) {
-				count = err;
-				goto errout;
-			}
+			hinfo->hash = 0;
+			hinfo->minor_hash = 0;
 		}
+
 		if ((hinfo->hash < start_hash) ||
 		    ((hinfo->hash == start_hash) &&
 		     (hinfo->minor_hash < start_minor_hash)))
@@ -1290,9 +1290,165 @@ static inline int search_dirblock(struct buffer_head *bh,
  */
 
 /*
- * Create map of hash values, offsets, and sizes, stored at end of block.
- * Returns number of entries mapped.
+ * ext4_dirdata_get() - Read dirdata fields from a directory entry.
+ * @de:         directory entry
+ * @dir:        directory inode (used for fscrypt+casefold hash fallback)
+ * @dfid:      if non-NULL and EXT4_DIRENT_LUFID is set, LUFID data is copied
+ * 		here
+ * @hinfo:	if non-NULL, receives the casefold hash and minor hash
+ *
+ * Reads any dirdata stored in @de.  If the dirdata feature is not enabled,
+ * falls back to reading the hash stored inline after the filename (for
+ * compatibility with the older casefold+fscrypt format).
+ *
+ * Returns a bitmask of EXT4_DIRENT_* flags indicating which fields were read.
  */
+unsigned char ext4_dirdata_get(struct ext4_dir_entry_2 *de, struct inode *dir,
+			       struct ext4_dirent_fid *dfid,
+			       struct dx_hash_info *hinfo)
+{
+	unsigned char ret = 0;
+	unsigned int data_offset = de->name_len + 1;
+	unsigned int rec_len = ext4_rec_len_from_disk(de->rec_len,
+						       dir->i_sb->s_blocksize);
+
+	if (data_offset > rec_len)
+		return ret;
+
+	/* compatibility: hash stored inline after filename (no dirdata) */
+	if (hinfo && !ext4_has_feature_dirdata(dir->i_sb) &&
+	    ext4_hash_in_dirent(dir)) {
+		hinfo->hash = EXT4_DIRENT_HASH(de);
+		hinfo->minor_hash = EXT4_DIRENT_MINOR_HASH(de);
+		ret |= EXT4_DIRENT_CFHASH;
+
+		return ret;
+	}
+
+	/*  EXT4_DIRENT_* are not expected without flag in i_sb */
+	if (de->file_type & EXT4_DIRENT_LUFID) {
+		struct ext4_dirent_fid *disk_fid =
+			(struct ext4_dirent_fid *)(de->name + data_offset);
+		unsigned int dlen;
+
+		if (data_offset + sizeof(disk_fid->df_header) > rec_len)
+			return ret;
+
+		dlen = disk_fid->df_header.ddh_length;
+		if (dlen < sizeof(*disk_fid) || data_offset + dlen > rec_len)
+			return ret;
+
+		if (dfid) {
+			memcpy(dfid, disk_fid->df_fid,
+			       disk_fid->df_header.ddh_length);
+			ret |= EXT4_DIRENT_LUFID;
+		}
+		data_offset += dlen;
+	}
+
+	/* Skip INO64 for now*/
+	if (de->file_type & EXT4_DIRENT_INO64) {
+		struct ext4_dirent_data_header *ddh =
+		       (struct ext4_dirent_data_header *)(de->name + data_offset);
+		unsigned int dlen;
+
+		if (data_offset + sizeof(*ddh) > rec_len)
+			return ret;
+
+		dlen = ddh->ddh_length;
+		if (dlen < sizeof(*ddh) || data_offset + dlen > rec_len)
+			return ret;
+
+		data_offset += dlen;
+	}
+
+	if (!hinfo)
+		return ret;
+
+	if (de->file_type & EXT4_DIRENT_CFHASH) {
+		struct ext4_dirent_hash *dh =
+			(struct ext4_dirent_hash *)(de->name + data_offset);
+		unsigned int dlen;
+
+		dlen = dh->dh_header.ddh_length;
+		if (dlen < sizeof(*dh) || data_offset + dlen > rec_len)
+			return ret;
+
+		hinfo->hash = le32_to_cpu(dh->dh_hash.hash);
+		hinfo->minor_hash = le32_to_cpu(dh->dh_hash.minor_hash);
+		ret |= EXT4_DIRENT_CFHASH;
+	}
+
+	return ret;
+}
+
+/*
+ * ext4_dirdata_set() - Write dirdata fields into a directory entry.
+ * @de:    directory entry (name must already be set)
+ * @dir:   directory inode
+ * @data:  LUFID data to store (or NULL)
+ * @fname: filename info carrying the casefold hash
+ *
+ * Writes any required dirdata into @de after the filename.  If the dirdata
+ * feature is not enabled, falls back to writing the hash inline after the
+ * filename (for compatibility with the older casefold+fscrypt format).
+ */
+static void ext4_dirdata_set(struct ext4_dir_entry_2 *de, struct inode *dir,
+			     struct ext4_dirent_fid *dfid,
+			     struct ext4_filename *fname)
+{
+	struct dx_hash_info *hinfo = &fname->hinfo;
+	unsigned int data_offset = de->name_len + 1;
+	unsigned int rec_len = ext4_rec_len_from_disk(de->rec_len,
+						       dir->i_sb->s_blocksize);
+
+
+	if (dfid) {
+		unsigned int dlen = dfid->df_header.ddh_length;
+
+		if (data_offset + dlen > rec_len) {
+			EXT4_ERROR_INODE(dir, "Can not insert FID");
+			return;
+		}
+
+
+		de->name[de->name_len] = 0;
+		memcpy(&de->name[de->name_len + 1], dfid,
+		       dlen);
+		de->file_type |= EXT4_DIRENT_LUFID;
+		data_offset += dlen;
+	}
+
+	if (ext4_hash_in_dirent(dir)) {
+		if (ext4_has_feature_dirdata(dir->i_sb)) {
+			struct ext4_dirent_hash *dh =
+			    (struct ext4_dirent_hash *)(de->name + data_offset);
+
+			if (data_offset + sizeof(*dh) > rec_len) {
+				EXT4_ERROR_INODE(dir, "Can not insert dhash dirdata");
+				return;
+			}
+
+			dh->dh_header.ddh_length = sizeof(*dh);
+			dh->dh_hash.hash = cpu_to_le32(hinfo->hash);
+			dh->dh_hash.minor_hash = cpu_to_le32(hinfo->minor_hash);
+			de->file_type |= EXT4_DIRENT_CFHASH;
+		} else {
+			/* Compatibility: store hash inline after filename */
+			if (data_offset + sizeof(struct ext4_dir_entry_hash) >
+								rec_len) {
+				EXT4_ERROR_INODE(dir, "Can not insert dhash");
+				return;
+			}
+
+			EXT4_DIRENT_HASHES(de)->hash = cpu_to_le32(hinfo->hash);
+			EXT4_DIRENT_HASHES(de)->minor_hash =
+						cpu_to_le32(hinfo->minor_hash);
+		}
+	}
+}
+
+
 static int dx_make_map(struct inode *dir, struct buffer_head *bh,
 		       struct dx_hash_info *hinfo,
 		       struct dx_map_entry *map_tail)
@@ -1312,9 +1468,8 @@ static int dx_make_map(struct inode *dir, struct buffer_head *bh,
 					 ((char *)de) - base))
 			return -EFSCORRUPTED;
 		if (de->name_len && de->inode) {
-			if (ext4_hash_in_dirent(dir))
-				h.hash = EXT4_DIRENT_HASH(de);
-			else {
+			if (!(ext4_dirdata_get(de, dir, NULL, &h) &
+						EXT4_DIRENT_CFHASH)) {
 				int err = ext4fs_dirhash(dir, de->name,
 						     de->name_len, &h);
 				if (err < 0)
@@ -2102,13 +2257,7 @@ void ext4_insert_dentry_data(struct inode *dir, struct inode *inode,
 	ext4_set_de_type(inode->i_sb, de, inode->i_mode);
 	de->name_len = fname_len(fname);
 	memcpy(de->name, fname_name(fname), fname_len(fname));
-	if (ext4_hash_in_dirent(dir)) {
-		struct dx_hash_info *hinfo = &fname->hinfo;
-
-		EXT4_DIRENT_HASHES(de)->hash = cpu_to_le32(hinfo->hash);
-		EXT4_DIRENT_HASHES(de)->minor_hash =
-						cpu_to_le32(hinfo->minor_hash);
-	}
+	ext4_dirdata_set(de, dir, data, fname);
 }
 
 /*
-- 
2.43.7


^ permalink raw reply related

* [PATCH v3 08/10] ext4: dirdata feature
From: Artem Blagodarenko @ 2026-06-19 19:10 UTC (permalink / raw)
  To: linux-ext4
  Cc: adilger.kernel, Artem Blagodarenko, Pravin Shelar, Andreas Dilger
In-Reply-To: <20260619191022.27008-1-ablagodarenko@thelustrecollective.com>

From: Artem Blagodarenko <artem.blagodarenko@gmail.com>

When fscrypt and casefold are enabled together for a directory,
all ext4_dir_entry[_2] in that directory store a n 8-byte hash
of the filename after 'name' between 'name_len' and 'rec_len'.

However, there is no clear indication there is important data
stored in these bytes, which are only for padding and alignment
in other directory entries.  This adds complexity to code handling
the on-disk directory entries, and there is no provision for other
metadata to be stored in each dir entry after 'name'.

The dirdata feature adds a mechanism to store multiple metadata
entries in each dir entry after 'name' (including the fchash).
The unused high 4 bits of 'file_type' are used to indicate whether
additional data fields are stored after 'name'.  If a bit is set,
the corresponding dirdata record is present, starting after a NUL
filename terminator.  If present, a record starts with a 1-byte
length (including the length byte itself) and the data immediately
follows the length byte without any alignment.

This allows up to four different dirdata records to be stored in
each entry, and allows unhandled record bytes to be skipped without
having to process the contents, providing forward compatibility.

If and when the fourth and last dirdata record is needed, it is
recommended to further subdivide it into sub-records, with
the first byte being the total length, and then there being a
second byte that gives the sub-record length, etc. as long as
the total record length is less than 255 bytes.  However, this
would not affect compatibility with the current code since the
record length would allow it to be skipped without processing.

Signed-off-by: Pravin Shelar <pravin.shelar@sun.com>
Signed-off-by: Artem Blagodarenko <artem.blagodarenko@gmail.com>
Reviewed-by: Andreas Dilger <adilger@dilger.ca>
---
 fs/ext4/ext4.h   | 27 +++++++++++++++++++++------
 fs/ext4/inline.c | 19 +++++++++++++++----
 fs/ext4/namei.c  | 43 +++++++++++++++++++++----------------------
 fs/ext4/sysfs.c  |  2 ++
 4 files changed, 59 insertions(+), 32 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 682dd141072d..d4ec450c05f2 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2334,6 +2334,7 @@ EXT4_FEATURE_INCOMPAT_FUNCS(casefold,		CASEFOLD)
 					 EXT4_FEATURE_INCOMPAT_FLEX_BG| \
 					 EXT4_FEATURE_INCOMPAT_EA_INODE| \
 					 EXT4_FEATURE_INCOMPAT_MMP | \
+					 EXT4_FEATURE_INCOMPAT_DIRDATA | \
 					 EXT4_FEATURE_INCOMPAT_INLINE_DATA | \
 					 EXT4_FEATURE_INCOMPAT_ENCRYPT | \
 					 EXT4_FEATURE_INCOMPAT_CASEFOLD | \
@@ -3035,10 +3036,18 @@ extern int ext4_find_dest_de(struct inode *dir, struct buffer_head *bh,
 			     struct ext4_filename *fname,
 			     struct ext4_dir_entry_2 **dest_de,
 			     int dlen);
-void ext4_insert_dentry(struct inode *dir, struct inode *inode,
-			struct ext4_dir_entry_2 *de,
-			int buf_size,
-			struct ext4_filename *fname);
+void ext4_insert_dentry_data(struct inode *dir, struct inode *inode,
+			     struct ext4_dir_entry_2 *de,
+			     int buf_size,
+			     struct ext4_filename *fname,
+			     void *data);
+static inline void ext4_insert_dentry(struct inode *dir, struct inode *inode,
+				      struct ext4_dir_entry_2 *de,
+				      int buf_size,
+				      struct ext4_filename *fname)
+{
+	ext4_insert_dentry_data(dir, inode, de, buf_size, fname, NULL);
+}
 static inline void ext4_update_dx_flag(struct inode *inode)
 {
 	if (!ext4_has_feature_dir_index(inode->i_sb) &&
@@ -3283,8 +3292,14 @@ extern int ext4_ext_migrate(struct inode *);
 extern int ext4_ind_migrate(struct inode *inode);
 
 /* namei.c */
-extern int ext4_init_new_dir(handle_t *handle, struct inode *dir,
-			     struct inode *inode);
+extern int ext4_init_new_dir_data(handle_t *handle, struct inode *dir,
+				  struct inode *inode,
+				  const void *data1, const void *data2);
+static inline int ext4_init_new_dir(handle_t *handle, struct inode *dir,
+				    struct inode *inode)
+{
+	return ext4_init_new_dir_data(handle, dir, inode, NULL, NULL);
+}
 extern int ext4_dirblock_csum_verify(struct inode *inode,
 				     struct buffer_head *bh);
 extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 5b3faacdf143..c57a8ebe4f94 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -973,11 +973,16 @@ static int ext4_add_dirent_to_inline(handle_t *handle,
 				     struct ext4_iloc *iloc,
 				     void *inline_start, int inline_size)
 {
-	int		err;
+	int		err, dlen = 0;
 	struct ext4_dir_entry_2 *de;
+	unsigned char *data = NULL;
+
+	/* Deliver data in any appropriate way here. Now it is NULL */
+	if (data)
+		dlen = (*data) + 1;
 
 	err = ext4_find_dest_de(dir, iloc->bh, inline_start,
-				inline_size, fname, &de, 0);
+				inline_size, fname, &de, dlen);
 	if (err)
 		return err;
 
@@ -986,7 +991,7 @@ static int ext4_add_dirent_to_inline(handle_t *handle,
 					    EXT4_JTR_NONE);
 	if (err)
 		return err;
-	ext4_insert_dentry(dir, inode, de, inline_size, fname);
+	ext4_insert_dentry_data(dir, inode, de, inline_size, fname, NULL);
 
 	ext4_show_inline_dir(dir, iloc->bh, inline_start, inline_size);
 
@@ -1326,7 +1331,13 @@ int ext4_inlinedir_to_tree(struct file *dir_file,
 			pos = EXT4_INLINE_DOTDOT_SIZE;
 		} else {
 			de = (struct ext4_dir_entry_2 *)(dir_buf + pos);
-			pos += ext4_rec_len_from_disk(de->rec_len, inline_size);
+			/* Use ext4_dir_entry_len to account for dirdata extensions */
+			pos += ext4_dir_entry_len(de, dir);
+			/* Validate pos doesn't exceed buffer to prevent use-after-free */
+			if (pos > inline_size) {
+				ret = count;
+				goto out;
+			}
 			if (ext4_check_dir_entry(inode, dir_file, de,
 					 iloc.bh, dir_buf,
 					 inline_size, pos)) {
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 7bca813c0627..e230fa1094ff 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -401,23 +401,24 @@ static struct dx_countlimit *get_dx_countlimit(struct inode *inode,
 {
 	struct ext4_dir_entry_2 *de;
 	struct dx_root_info *root;
-	int count_offset;
+	int count_offset, dotdot_rec_len;
 	int blocksize = EXT4_BLOCK_SIZE(inode->i_sb);
 	unsigned int rlen = ext4_rec_len_from_disk(dirent->rec_len, blocksize);
 
-	if (rlen == blocksize)
+	if (rlen == blocksize) {
 		count_offset = sizeof(struct dx_node);
-	else if (rlen == 12) {
-		de = (struct ext4_dir_entry_2 *)(((void *)dirent) + 12);
-		if (ext4_rec_len_from_disk(de->rec_len, blocksize) != blocksize - 12)
+	} else {
+		de = (struct ext4_dir_entry_2 *)(((char *)dirent) + rlen);
+		if (le16_to_cpu(de->rec_len) != (blocksize - rlen))
 			return NULL;
-		root = (struct dx_root_info *)(((void *)de + 12));
+		/* de->rec_len covers whole dx_root block, calculate actual length */
+		dotdot_rec_len = ext4_dir_entry_len(de, inode);
+		root = (struct dx_root_info *)(((char *)de + dotdot_rec_len));
 		if (root->reserved_zero ||
 		    root->info_length != sizeof(struct dx_root_info))
 			return NULL;
-		count_offset = 32;
-	} else
-		return NULL;
+		count_offset = root->info_length + rlen + dotdot_rec_len;
+	}
 
 	if (offset)
 		*offset = count_offset;
@@ -707,7 +708,7 @@ static struct stats dx_show_leaf(struct inode *dir,
 				       (unsigned) ((char *) de - base));
 #endif
 			}
-			space += ext4_dir_rec_len(de->name_len, dir);
+			space += ext4_dir_entry_len(de, dir);
 			names++;
 		}
 		de = ext4_next_entry(de, size);
@@ -2081,13 +2082,10 @@ int ext4_find_dest_de(struct inode *dir, struct buffer_head *bh,
 	return 0;
 }
 
-void ext4_insert_dentry(struct inode *dir,
-			struct inode *inode,
-			struct ext4_dir_entry_2 *de,
-			int buf_size,
-			struct ext4_filename *fname)
+void ext4_insert_dentry_data(struct inode *dir, struct inode *inode,
+			     struct ext4_dir_entry_2 *de, int buf_size,
+			     struct ext4_filename *fname, void *data)
 {
-
 	int nlen, rlen;
 
 	nlen = ext4_dir_entry_len(de, dir);
@@ -2129,15 +2127,15 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
 	unsigned int	blocksize = dir->i_sb->s_blocksize;
 	int		csum_size = 0;
 	int		err, err2, dlen = 0;
-	unsigned char	*data = NULL;
+	struct ext4_dirent_fid *dfid = NULL;
 
 	/* Deliver data in any appropriate way here. Now it is NULL */
 	if (ext4_has_feature_metadata_csum(inode->i_sb))
 		csum_size = sizeof(struct ext4_dir_entry_tail);
 
 	if (!de) {
-		if (data)
-			dlen = (*data) + 1;
+		if (dfid)
+			dlen = dfid->df_header.ddh_length;
 		err = ext4_find_dest_de(dir, bh, bh->b_data,
 					blocksize - csum_size, fname, &de, dlen);
 		if (err)
@@ -2152,7 +2150,7 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
 	}
 
 	/* By now the buffer is marked for journaling */
-	ext4_insert_dentry(dir, inode, de, blocksize, fname);
+	ext4_insert_dentry_data(dir, inode, de, blocksize, fname, dfid);
 
 	/*
 	 * XXX shouldn't update any times until successful
@@ -2991,8 +2989,9 @@ int ext4_init_dirblock(handle_t *handle, struct inode *inode,
 	return ext4_handle_dirty_dirblock(handle, inode, bh);
 }
 
-int ext4_init_new_dir(handle_t *handle, struct inode *dir,
-			     struct inode *inode)
+int ext4_init_new_dir_data(handle_t *handle, struct inode *dir,
+			   struct inode *inode,
+			   const void *data1, const void *data2)
 {
 	struct buffer_head *dir_block = NULL;
 	ext4_lblk_t block = 0;
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index 923b375e017f..80074fb15ee9 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -362,6 +362,7 @@ EXT4_ATTR_FEATURE(verity);
 #endif
 EXT4_ATTR_FEATURE(metadata_csum_seed);
 EXT4_ATTR_FEATURE(fast_commit);
+EXT4_ATTR_FEATURE(dirdata);
 #if IS_ENABLED(CONFIG_UNICODE) && defined(CONFIG_FS_ENCRYPTION)
 EXT4_ATTR_FEATURE(encrypted_casefold);
 #endif
@@ -385,6 +386,7 @@ static struct attribute *ext4_feat_attrs[] = {
 #endif
 	ATTR_LIST(metadata_csum_seed),
 	ATTR_LIST(fast_commit),
+	ATTR_LIST(dirdata),
 #if IS_ENABLED(CONFIG_UNICODE) && defined(CONFIG_FS_ENCRYPTION)
 	ATTR_LIST(encrypted_casefold),
 #endif
-- 
2.43.7


^ permalink raw reply related

* [PATCH v3 07/10] ext4: rename ext4_dir_rec_len() and clarify dirdata usage
From: Artem Blagodarenko @ 2026-06-19 19:10 UTC (permalink / raw)
  To: linux-ext4; +Cc: adilger.kernel, Artem Blagodarenko, Andreas Dilger
In-Reply-To: <20260619191022.27008-1-ablagodarenko@thelustrecollective.com>

From: Artem Blagodarenko <artem.blagodarenko@gmail.com>

Rename ext4_dir_rec_len() to ext4_dirent_rec_len() to better
reflect that it computes the record length for a directory
entry based on the provided name length.

Update the comment to clarify handling of dirdata-enabled
directories and document the use of ext4_dir_entry_len()
when dirdata is present.

No functional changes.

Signed-off-by: Artem Blagodarenko <artem.blagodarenko@gmail.com>
Reviewed-by: Andreas Dilger <adilger@dilger.ca>
---
 fs/ext4/dir.c    |  9 +++--
 fs/ext4/ext4.h   | 14 +++++---
 fs/ext4/inline.c | 14 ++++----
 fs/ext4/namei.c  | 86 ++++++++++++++++++++++++++++++++----------------
 4 files changed, 78 insertions(+), 45 deletions(-)

diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 17edd678fa87..012687822b82 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -89,16 +89,15 @@ int __ext4_check_dir_entry(const char *function, unsigned int line,
 	bool fake = is_fake_dir_entry(de);
 	bool has_csum = ext4_has_feature_metadata_csum(dir->i_sb);
 
-	if (unlikely(rlen < ext4_dir_rec_len(1, fake ? NULL : dir)))
+	if (unlikely(rlen < ext4_dirent_rec_len(1, fake ? NULL : dir)))
 		error_msg = "rec_len is smaller than minimal";
 	else if (unlikely(rlen % 4 != 0))
 		error_msg = "rec_len % 4 != 0";
-	else if (unlikely(rlen < ext4_dir_rec_len(de->name_len,
-							fake ? NULL : dir)))
+	else if (unlikely(rlen < ext4_dir_entry_len(de, fake ? NULL : dir)))
 		error_msg = "rec_len is too small for name_len";
 	else if (unlikely(next_offset > size))
 		error_msg = "directory entry overrun";
-	else if (unlikely(next_offset > size - ext4_dir_rec_len(1,
+	else if (unlikely(next_offset > size - ext4_dirent_rec_len(1,
 						  has_csum ? NULL : dir) &&
 			  next_offset != size))
 		error_msg = "directory entry too close to block end";
@@ -245,7 +244,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
 				 * failure will be detected in the
 				 * dirent test below. */
 				if (ext4_rec_len_from_disk(de->rec_len,
-					sb->s_blocksize) < ext4_dir_rec_len(1,
+					sb->s_blocksize) < ext4_dirent_rec_len(1,
 									inode))
 					break;
 				i += ext4_rec_len_from_disk(de->rec_len,
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index fd979452391f..682dd141072d 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2616,11 +2616,16 @@ struct ext4_dirent_hash {
  * casefolded and encrypted need to store the hash as well, so we add room for
  * ext4_extended_dir_entry_2. For all entries related to '.' or '..' you should
  * pass NULL for dir, as those entries do not use the extra fields.
+ *
+ * For directories with the dirdata feature, extra data may follow the filename.
+ * Use ext4_dir_entry_len() to compute the length of a directory entry
+ * including any dirdata, or ext4_dirent_rec_len() directly when the total
+ * name_len (including dirdata length) is already known.
  */
-static inline unsigned int ext4_dir_rec_len(__u8 name_len,
+static inline unsigned int ext4_dirent_rec_len(unsigned int name_len,
 						const struct inode *dir)
 {
-	int rec_len = (name_len + 8 + EXT4_DIR_ROUND);
+	unsigned int rec_len = (name_len + 8 + EXT4_DIR_ROUND);
 
 	if (dir && ext4_hash_in_dirent(dir))
 		rec_len += sizeof(struct ext4_dir_entry_hash);
@@ -3028,7 +3033,8 @@ extern void ext4_htree_free_dir_info(struct dir_private_info *p);
 extern int ext4_find_dest_de(struct inode *dir, struct buffer_head *bh,
 			     void *buf, int buf_size,
 			     struct ext4_filename *fname,
-			     struct ext4_dir_entry_2 **dest_de);
+			     struct ext4_dir_entry_2 **dest_de,
+			     int dlen);
 void ext4_insert_dentry(struct inode *dir, struct inode *inode,
 			struct ext4_dir_entry_2 *de,
 			int buf_size,
@@ -4142,7 +4148,7 @@ static inline unsigned int ext4_dir_entry_len(struct ext4_dir_entry_2 *de,
 	unsigned int rec_len = ext4_rec_len_from_disk(de->rec_len, blocksize);
 	unsigned int dirdata = ext4_dirent_get_data_len(de, rec_len);
 
-	return ext4_dir_rec_len(de->name_len + dirdata, dir);
+	return ext4_dirent_rec_len(de->name_len + dirdata, dir);
 }
 
 extern const struct iomap_ops ext4_iomap_ops;
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 8045e4ff270c..5b3faacdf143 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -977,7 +977,7 @@ static int ext4_add_dirent_to_inline(handle_t *handle,
 	struct ext4_dir_entry_2 *de;
 
 	err = ext4_find_dest_de(dir, iloc->bh, inline_start,
-				inline_size, fname, &de);
+				inline_size, fname, &de, 0);
 	if (err)
 		return err;
 
@@ -1055,7 +1055,7 @@ static int ext4_update_inline_dir(handle_t *handle, struct inode *dir,
 	int old_size = EXT4_I(dir)->i_inline_size - EXT4_MIN_INLINE_DATA_SIZE;
 	int new_size = get_max_inline_xattr_value_size(dir, iloc);
 
-	if (new_size - old_size <= ext4_dir_rec_len(1, NULL))
+	if (new_size - old_size <= ext4_dirent_rec_len(1, NULL))
 		return -ENOSPC;
 
 	ret = ext4_update_inline_data(handle, dir,
@@ -1309,7 +1309,7 @@ int ext4_inlinedir_to_tree(struct file *dir_file,
 			fake.name_len = 1;
 			memcpy(fake.name, ".", 2);
 			fake.rec_len = ext4_rec_len_to_disk(
-					  ext4_dir_rec_len(fake.name_len, NULL),
+					  ext4_dirent_rec_len(fake.name_len, NULL),
 					  inline_size);
 			ext4_set_de_type(inode->i_sb, &fake, S_IFDIR);
 			de = &fake;
@@ -1319,7 +1319,7 @@ int ext4_inlinedir_to_tree(struct file *dir_file,
 			fake.name_len = 2;
 			memcpy(fake.name, "..", 3);
 			fake.rec_len = ext4_rec_len_to_disk(
-					  ext4_dir_rec_len(fake.name_len, NULL),
+					  ext4_dirent_rec_len(fake.name_len, NULL),
 					  inline_size);
 			ext4_set_de_type(inode->i_sb, &fake, S_IFDIR);
 			de = &fake;
@@ -1427,8 +1427,8 @@ int ext4_read_inline_dir(struct file *file,
 	 * So we will use extra_offset and extra_size to indicate them
 	 * during the inline dir iteration.
 	 */
-	dotdot_offset = ext4_dir_rec_len(1, NULL);
-	dotdot_size = dotdot_offset + ext4_dir_rec_len(2, NULL);
+	dotdot_offset = ext4_dirent_rec_len(1, NULL);
+	dotdot_size = dotdot_offset + ext4_dirent_rec_len(2, NULL);
 	extra_offset = dotdot_size - EXT4_INLINE_DOTDOT_SIZE;
 	extra_size = extra_offset + inline_size;
 
@@ -1463,7 +1463,7 @@ int ext4_read_inline_dir(struct file *file,
 			 * failure will be detected in the
 			 * dirent test below. */
 			if (ext4_rec_len_from_disk(de->rec_len, extra_size)
-				< ext4_dir_rec_len(1, NULL))
+				< ext4_dirent_rec_len(1, NULL))
 				break;
 			i += ext4_rec_len_from_disk(de->rec_len,
 						    extra_size);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index a20ba9a6a15b..7bca813c0627 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -519,13 +519,20 @@ ext4_next_entry(struct ext4_dir_entry_2 *p, unsigned long blocksize)
  * Future: use high four bits of block for coalesce-on-delete flags
  * Mask them off for now.
  */
-static struct dx_root_info *dx_get_dx_info(void *de_buf)
+static struct dx_root_info *dx_get_dx_info(struct inode *dir, void *de_buf)
 {
+	unsigned int blocksize = dir->i_sb->s_blocksize;
+	void *base = de_buf;
+
 	/* get dotdot first */
-	de_buf = de_buf + ext4_dir_rec_len(1, NULL);
+	de_buf += ext4_dir_entry_len(de_buf, dir);
 
 	/* dx root info is after dotdot entry */
-	de_buf = de_buf + ext4_dir_rec_len(2, NULL);
+	de_buf += ext4_dir_entry_len(de_buf, dir);
+
+	if (de_buf < base || (char *)de_buf - (char *)base +
+			      sizeof(struct dx_root_info) > blocksize)
+		return ERR_PTR(-EFSCORRUPTED);
 
 	return (struct dx_root_info *)de_buf;
 }
@@ -576,7 +583,9 @@ static inline unsigned dx_root_limit(struct inode *dir,
 	struct dx_root_info *info;
 	unsigned int entry_space;
 
-	info = dx_get_dx_info(dot_de);
+	info = dx_get_dx_info(dir, dot_de);
+	if (IS_ERR(info))
+		return 0;
 	entry_space = dir->i_sb->s_blocksize - ((char *)info - (char *)dot_de) -
 		info->info_length;
 
@@ -588,7 +597,7 @@ static inline unsigned dx_root_limit(struct inode *dir,
 static inline unsigned dx_node_limit(struct inode *dir)
 {
 	unsigned int entry_space = dir->i_sb->s_blocksize -
-			ext4_dir_rec_len(0, dir);
+			ext4_dirent_rec_len(0, dir);
 
 	if (ext4_has_feature_metadata_csum(dir->i_sb))
 		entry_space -= sizeof(struct dx_tail);
@@ -792,7 +801,9 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
 	if (IS_ERR(frame->bh))
 		return (struct dx_frame *) frame->bh;
 
-	info = dx_get_dx_info((struct ext4_dir_entry_2 *)frame->bh->b_data);
+	info = dx_get_dx_info(dir, (struct ext4_dir_entry_2 *)frame->bh->b_data);
+	if (IS_ERR(info))
+		goto fail;
 	if (info->hash_version != DX_HASH_TEA &&
 	    info->hash_version != DX_HASH_HALF_MD4 &&
 	    info->hash_version != DX_HASH_LEGACY &&
@@ -937,7 +948,7 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
 	return ret_err;
 }
 
-static void dx_release(struct dx_frame *frames)
+static void dx_release(struct inode *dir, struct dx_frame *frames)
 {
 	struct dx_root_info *info;
 	int i;
@@ -946,7 +957,9 @@ static void dx_release(struct dx_frame *frames)
 	if (frames[0].bh == NULL)
 		return;
 
-	info = dx_get_dx_info((struct ext4_dir_entry_2 *)frames[0].bh->b_data);
+	info = dx_get_dx_info(dir, (struct ext4_dir_entry_2 *)frames[0].bh->b_data);
+	if (IS_ERR(info))
+		return;
 	/* save local copy, "info" may be freed after brelse() */
 	indirect_levels = info->indirect_levels;
 	for (i = 0; i <= indirect_levels; i++) {
@@ -1058,7 +1071,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
 	/* csum entries are not larger in the casefolded encrypted case */
 	top = (struct ext4_dir_entry_2 *) ((char *) de +
 					   dir->i_sb->s_blocksize -
-					   ext4_dir_rec_len(0,
+					   ext4_dirent_rec_len(0,
 							   csum ? NULL : dir));
 	/* Check if the directory is encrypted */
 	if (IS_ENCRYPTED(dir)) {
@@ -1252,12 +1265,12 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
 		    (count && ((hashval & 1) == 0)))
 			break;
 	}
-	dx_release(frames);
+	dx_release(dir, frames);
 	dxtrace(printk(KERN_DEBUG "Fill tree: returned %d entries, "
 		       "next hash: %x\n", count, *next_hash));
 	return count;
 errout:
-	dx_release(frames);
+	dx_release(dir, frames);
 	return (err);
 }
 
@@ -1755,7 +1768,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
 errout:
 	dxtrace(printk(KERN_DEBUG "%s not found\n", fname->usr_fname->name));
 success:
-	dx_release(frames);
+	dx_release(dir, frames);
 	return bh;
 }
 
@@ -1852,7 +1865,7 @@ dx_move_dirents(struct inode *dir, char *from, char *to,
 	while (count--) {
 		struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)
 						(from + (map->offs<<2));
-		rec_len = ext4_dir_rec_len(de->name_len, dir);
+		rec_len = ext4_dir_entry_len(de, dir);
 
 		memcpy (to, de, rec_len);
 		((struct ext4_dir_entry_2 *) to)->rec_len =
@@ -1885,7 +1898,7 @@ static struct ext4_dir_entry_2 *dx_pack_dirents(struct inode *dir, char *base,
 	while ((char*)de < base + blocksize) {
 		next = ext4_next_entry(de, blocksize);
 		if (de->inode && de->name_len) {
-			rec_len = ext4_dir_rec_len(de->name_len, dir);
+			rec_len = ext4_dir_entry_len(de, dir);
 			if (de > to)
 				memmove(to, de, rec_len);
 			to->rec_len = ext4_rec_len_to_disk(rec_len, blocksize);
@@ -2037,10 +2050,11 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
 int ext4_find_dest_de(struct inode *dir, struct buffer_head *bh,
 		      void *buf, int buf_size,
 		      struct ext4_filename *fname,
-		      struct ext4_dir_entry_2 **dest_de)
+		      struct ext4_dir_entry_2 **dest_de,
+		      int dlen)
 {
 	struct ext4_dir_entry_2 *de;
-	unsigned short reclen = ext4_dir_rec_len(fname_len(fname), dir);
+	unsigned short reclen = ext4_dirent_rec_len(fname_len(fname) + dlen, dir);
 	int nlen, rlen;
 	unsigned int offset = 0;
 	char *top;
@@ -2053,7 +2067,7 @@ int ext4_find_dest_de(struct inode *dir, struct buffer_head *bh,
 			return -EFSCORRUPTED;
 		if (ext4_match(dir, fname, de))
 			return -EEXIST;
-		nlen = ext4_dir_rec_len(de->name_len, dir);
+		nlen = ext4_dir_entry_len(de, dir);
 		rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
 		if ((de->inode ? rlen - nlen : rlen) >= reclen)
 			break;
@@ -2076,7 +2090,7 @@ void ext4_insert_dentry(struct inode *dir,
 
 	int nlen, rlen;
 
-	nlen = ext4_dir_rec_len(de->name_len, dir);
+	nlen = ext4_dir_entry_len(de, dir);
 	rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
 	if (de->inode) {
 		struct ext4_dir_entry_2 *de1 =
@@ -2114,14 +2128,18 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
 {
 	unsigned int	blocksize = dir->i_sb->s_blocksize;
 	int		csum_size = 0;
-	int		err, err2;
+	int		err, err2, dlen = 0;
+	unsigned char	*data = NULL;
 
+	/* Deliver data in any appropriate way here. Now it is NULL */
 	if (ext4_has_feature_metadata_csum(inode->i_sb))
 		csum_size = sizeof(struct ext4_dir_entry_tail);
 
 	if (!de) {
+		if (data)
+			dlen = (*data) + 1;
 		err = ext4_find_dest_de(dir, bh, bh->b_data,
-					blocksize - csum_size, fname, &de);
+					blocksize - csum_size, fname, &de, dlen);
 		if (err)
 			return err;
 	}
@@ -2276,7 +2294,12 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
 				     blocksize);
 
 	/* initialize hashing info */
-	dx_info = dx_get_dx_info(dot_de);
+	dx_info = dx_get_dx_info(dir, dot_de);
+	if (IS_ERR(dx_info)) {
+		brelse(bh2);
+		brelse(bh);
+		return PTR_ERR(dx_info);
+	}
 	memset(dx_info, 0, sizeof(*dx_info));
 	dx_info->info_length = sizeof(*dx_info);
 	if (ext4_hash_in_dirent(dir))
@@ -2334,7 +2357,7 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
 	 */
 	if (retval)
 		ext4_mark_inode_dirty(handle, dir);
-	dx_release(frames);
+	dx_release(dir, frames);
 	brelse(bh2);
 	return retval;
 }
@@ -2609,8 +2632,13 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
 			/* Set up root */
 			dx_set_count(entries, 1);
 			dx_set_block(entries + 0, newblock);
-			info = dx_get_dx_info((struct ext4_dir_entry_2 *)
+			info = dx_get_dx_info(dir, (struct ext4_dir_entry_2 *)
 					      frames[0].bh->b_data);
+			if (IS_ERR(info)) {
+				err = PTR_ERR(info);
+				brelse(bh2);
+				goto journal_error;
+			}
 			info->indirect_levels += 1;
 			dxtrace(printk(KERN_DEBUG
 				       "Creating %d level index...\n",
@@ -2638,7 +2666,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
 	ext4_std_error(dir->i_sb, err); /* this is a no-op if err == 0 */
 cleanup:
 	brelse(bh);
-	dx_release(frames);
+	dx_release(dir, frames);
 	/* @restart is true means htree-path has been changed, we need to
 	 * repeat dx_probe() to find out valid htree-path
 	 */
@@ -2930,7 +2958,7 @@ int ext4_init_dirblock(handle_t *handle, struct inode *inode,
 
 	de->inode = cpu_to_le32(inode->i_ino);
 	de->name_len = 1;
-	de->rec_len = ext4_rec_len_to_disk(ext4_dir_rec_len(de->name_len, NULL),
+	de->rec_len = ext4_rec_len_to_disk(ext4_dirent_rec_len(de->name_len, NULL),
 					   blocksize);
 	memcpy(de->name, ".", 2);
 	ext4_set_de_type(inode->i_sb, de, S_IFDIR);
@@ -2942,7 +2970,7 @@ int ext4_init_dirblock(handle_t *handle, struct inode *inode,
 	ext4_set_de_type(inode->i_sb, de, S_IFDIR);
 	if (inline_buf) {
 		de->rec_len = ext4_rec_len_to_disk(
-					ext4_dir_rec_len(de->name_len, NULL),
+					ext4_dirent_rec_len(de->name_len, NULL),
 					blocksize);
 		de = ext4_next_entry(de, blocksize);
 		header_size = (char *)de - bh->b_data;
@@ -2951,7 +2979,7 @@ int ext4_init_dirblock(handle_t *handle, struct inode *inode,
 			blocksize - csum_size);
 	} else {
 		de->rec_len = ext4_rec_len_to_disk(blocksize -
-					(csum_size + ext4_dir_rec_len(1, NULL)),
+					(csum_size + ext4_dirent_rec_len(1, NULL)),
 					blocksize);
 	}
 
@@ -3074,8 +3102,8 @@ bool ext4_empty_dir(struct inode *inode)
 	}
 
 	sb = inode->i_sb;
-	if (inode->i_size < ext4_dir_rec_len(1, NULL) +
-					ext4_dir_rec_len(2, NULL)) {
+	if (inode->i_size < ext4_dirent_rec_len(1, NULL) +
+					ext4_dirent_rec_len(2, NULL)) {
 		EXT4_ERROR_INODE(inode, "invalid size");
 		return false;
 	}
-- 
2.43.7


^ permalink raw reply related

* [PATCH v3 06/10] ext4: add ext4_dir_entry_len() and harden dirdata parsing
From: Artem Blagodarenko @ 2026-06-19 19:10 UTC (permalink / raw)
  To: linux-ext4; +Cc: adilger.kernel, Artem Blagodarenko, Andreas Dilger
In-Reply-To: <20260619191022.27008-1-ablagodarenko@thelustrecollective.com>

From: Artem Blagodarenko <artem.blagodarenko@gmail.com>

Introduce ext4_dir_entry_len() helper to compute the required
rec_len for a directory entry, taking into account dirdata and
casefold+fscrypt hash space.

Convert ext4_dirent_get_data_len() to take the decoded rec_len
as an argument and add bounds checking when walking dirdata
extensions to avoid overruns on malformed entries.

Update dx_root_limit() to use ext4_dir_entry_len() instead of
open-coded ext4_dir_rec_len() for '.' and '..' entries.

Signed-off-by: Artem Blagodarenko <artem.blagodarenko@gmail.com>
Reviewed-by: Andreas Dilger <adilger@dilger.ca>
---
 fs/ext4/ext4.h  | 45 ++++++++++++++++++++++++++++++++++++++++++---
 fs/ext4/namei.c | 23 +++++++++++++++--------
 2 files changed, 57 insertions(+), 11 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 2f29f50a12ac..fd979452391f 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -4075,6 +4075,7 @@ static inline bool ext4_dir_entry_is_tail(struct ext4_dir_entry_2 *de)
 /*
  * ext4_dirent_get_data_len() - Compute the total dirdata length for an entry.
  * @de: directory entry
+ * @rec_len: the record length of the directory entry (decoded)
  *
  * Computes the length of optional data stored after the filename (and its
  * implicit NUL terminator).  Each extension is indicated by a bit in the
@@ -4083,22 +4084,41 @@ static inline bool ext4_dir_entry_is_tail(struct ext4_dir_entry_2 *de)
  *
  * Returns 0 for tail entries and for entries with no dirdata.
  */
-static inline int ext4_dirent_get_data_len(struct ext4_dir_entry_2 *de)
+static inline int ext4_dirent_get_data_len(struct ext4_dir_entry_2 *de,
+					   unsigned int rec_len)
 {
 	__u8 extra_data_flags;
 	struct ext4_dirent_data_header *ddh;
 	int dlen = 0;
+	unsigned int offset;
 
 	if (ext4_dir_entry_is_tail(de))
 		return 0;
 
 	extra_data_flags = (de->file_type & ~EXT4_FT_MASK) >> 4;
-	ddh = (struct ext4_dirent_data_header *)(de->name + de->name_len +
-						 1 /* NUL terminator */);
+	/* offset from start of entry to after filename + NUL */
+	offset = EXT4_BASE_DIR_LEN + de->name_len + 1;
 
+	/* bounds check: ensure we start reading within the entry */
+	if (offset >= rec_len)
+		return 0;
+
+	ddh = (struct ext4_dirent_data_header *)((char *)de + offset);
+ 
 	while (extra_data_flags) {
 		if (extra_data_flags & 1) {
+			/* bounds check before reading ddh_length */
+			if (offset + sizeof(*ddh) >
+			    rec_len)
+				return dlen;
+
+			/* validate ddh_length is reasonable */
+			if (ddh->ddh_length == 0 || ddh->ddh_length >
+			    rec_len - offset)
+				return dlen;
+
 			dlen += ddh->ddh_length + (dlen == 0);
+			offset += ddh->ddh_length;
 			ddh = ext4_dirdata_next(ddh);
 		}
 		extra_data_flags >>= 1;
@@ -4106,6 +4126,25 @@ static inline int ext4_dirent_get_data_len(struct ext4_dir_entry_2 *de)
 	return dlen;
 }
 
+/*
+ * ext4_dir_entry_len() - Compute the required rec_len for a directory entry.
+ * @de:  directory entry (used to read name_len and any dirdata length)
+ * @dir: directory inode (may be NULL for '.' and '..' entries)
+ *
+ * Returns the minimum record length needed to hold @de, rounded up to the
+ * directory alignment and including room for the casefold+fscrypt hash if
+ * the directory requires it.
+ */
+static inline unsigned int ext4_dir_entry_len(struct ext4_dir_entry_2 *de,
+					      const struct inode *dir)
+{
+	unsigned int blocksize = (dir && dir->i_sb) ? dir->i_sb->s_blocksize : 4096;
+	unsigned int rec_len = ext4_rec_len_from_disk(de->rec_len, blocksize);
+	unsigned int dirdata = ext4_dirent_get_data_len(de, rec_len);
+
+	return ext4_dir_rec_len(de->name_len + dirdata, dir);
+}
+
 extern const struct iomap_ops ext4_iomap_ops;
 extern const struct iomap_ops ext4_iomap_report_ops;
 
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 7692cf5184ab..a20ba9a6a15b 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -570,11 +570,15 @@ static inline void dx_set_limit(struct dx_entry *entries, unsigned value)
 	((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
 }
 
-static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize)
+static inline unsigned dx_root_limit(struct inode *dir,
+	struct ext4_dir_entry_2 *dot_de)
 {
-	unsigned int entry_space = dir->i_sb->s_blocksize -
-			ext4_dir_rec_len(1, NULL) -
-			ext4_dir_rec_len(2, NULL) - infosize;
+	struct dx_root_info *info;
+	unsigned int entry_space;
+
+	info = dx_get_dx_info(dot_de);
+	entry_space = dir->i_sb->s_blocksize - ((char *)info - (char *)dot_de) -
+		info->info_length;
 
 	if (ext4_has_feature_metadata_csum(dir->i_sb))
 		entry_space -= sizeof(struct dx_tail);
@@ -850,10 +854,13 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
 
 	entries = (struct dx_entry *)(((char *)info) + info->info_length);
 
-	if (dx_get_limit(entries) != dx_root_limit(dir, info->info_length)) {
+	if (dx_get_limit(entries) !=
+	    dx_root_limit(dir, (struct ext4_dir_entry_2 *)frame->bh->b_data)) {
 		ext4_warning_inode(dir, "dx entry: limit %u != root limit %u",
 				   dx_get_limit(entries),
-				   dx_root_limit(dir, info->info_length));
+				   dx_root_limit(dir,
+				   (struct ext4_dir_entry_2 *)frame->bh->b_data
+				   ));
 		goto fail;
 	}
 
@@ -2278,10 +2285,10 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
 		dx_info->hash_version =
 				EXT4_SB(dir->i_sb)->s_def_hash_version;
 
-	entries = (void *)dx_info + sizeof(*dx_info);
+	entries = (void *)dx_info + dx_info->info_length;
 	dx_set_block(entries, 1);
 	dx_set_count(entries, 1);
-	dx_set_limit(entries, dx_root_limit(dir, sizeof(*dx_info)));
+	dx_set_limit(entries, dx_root_limit(dir, dot_de));
 
 	/* Initialize as for dx_probe */
 	fname->hinfo.hash_version = dx_info->hash_version;
-- 
2.43.7


^ permalink raw reply related

* [PATCH v3 05/10] ext4: preserve dirdata bits in get_dtype()
From: Artem Blagodarenko @ 2026-06-19 19:10 UTC (permalink / raw)
  To: linux-ext4; +Cc: adilger.kernel, Artem Blagodarenko, Andreas Dilger
In-Reply-To: <20260619191022.27008-1-ablagodarenko@thelustrecollective.com>

From: Artem Blagodarenko <artem.blagodarenko@gmail.com>

Mask the filetype with EXT4_FT_MASK when indexing
ext4_filetype_table[] to avoid using dirdata bits as an index.

Preserve the extra bits
stored in the upper part of filetype and propagate them to the
returned dtype value.

Signed-off-by: Artem Blagodarenko <artem.blagodarenko@gmail.com>
Reviewed-by: Andreas Dilger <adilger@dilger.ca>
---
 fs/ext4/ext4.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 98603aa44693..2f29f50a12ac 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -3046,12 +3046,15 @@ static const unsigned char ext4_filetype_table[] = {
 	DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
 };
 
-static inline  unsigned char get_dtype(struct super_block *sb, int filetype)
+static inline unsigned char get_dtype(struct super_block *sb, int filetype)
 {
-	if (!ext4_has_feature_filetype(sb) || filetype >= EXT4_FT_MAX)
+	unsigned char fl_index = filetype & EXT4_FT_MASK;
+
+	if (!ext4_has_feature_filetype(sb) || fl_index >= EXT4_FT_MAX)
 		return DT_UNKNOWN;
 
-	return ext4_filetype_table[filetype];
+	return (ext4_filetype_table[fl_index]) |
+		(filetype & ~EXT4_FT_MASK);
 }
 extern int ext4_check_all_de(struct inode *dir, struct buffer_head *bh,
 			     void *buf, int buf_size);
-- 
2.43.7


^ permalink raw reply related

* [PATCH v3 04/10] ext4: add dirdata format definitions and access helpers
From: Artem Blagodarenko @ 2026-06-19 19:10 UTC (permalink / raw)
  To: linux-ext4
  Cc: adilger.kernel, Artem Blagodarenko, Pravin Shelar, Andreas Dilger
In-Reply-To: <20260619191022.27008-1-ablagodarenko@thelustrecollective.com>

From: Artem Blagodarenko <artem.blagodarenko@gmail.com>

Define the on-disk format for ext4 directory entry extension data.

The upper four bits of de->file_type indicate the presence of
optional data stored after the filename NUL terminator.  This patch
defines flags for LUFID, 64-bit inode numbers, and casefold hash
data stored in that area.

Add struct ext4_dirent_data_header to describe variable-length
extension records and struct ext4_dirent_hash for hash storage used
by casefold and fscrypt.

Provide ext4_dirdata_next() to advance to the next extension record
and ext4_dirent_get_data_len() to compute the total extension data
length associated with a directory entry.

No functional changes.

Signed-off-by: Pravin Shelar <pravin.shelar@sun.com>
Signed-off-by: Artem Blagodarenko <artem.blagodarenko@gmail.com>
Reviewed-by: Andreas Dilger <adilger@diliger.ca>
---
 fs/ext4/ext4.h | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 83 insertions(+)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 9400bc2858a5..98603aa44693 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2556,6 +2556,49 @@ struct ext4_dir_entry_tail {
 #define EXT4_FT_SYMLINK		7
 
 #define EXT4_FT_MAX		8
+#define EXT4_FT_MASK		0xf
+
+#if EXT4_FT_MAX > EXT4_FT_MASK
+#error "conflicting EXT4_FT_MAX and EXT4_FT_MASK"
+#endif
+
+/*
+ * d_type has 4 unused bits, so it can hold four types of data. These different
+ * types of data (e.g. fscypt hash, high 32 bits of 64-bit inode number) can be
+ * stored, in flag order, after file-name in ext4 dirent.
+ *
+ * These flags are added to d_type if ext4 dirent has extra data after
+ * filename. This data length is variable and length is stored in first byte
+ * of data. Data starts after filename NUL byte.
+ */
+#define EXT4_DIRENT_LUFID		0x10
+#define EXT4_DIRENT_INO64		0x20
+#define EXT4_DIRENT_CFHASH		0x40
+
+struct ext4_fid {
+	char    fid[16];     /* 128-bit unique file identifier */
+};
+
+struct ext4_dirent_data_header {
+	/* length of this header + the whole data blob */
+	__u8	ddh_length;
+} __packed;
+
+struct ext4_dirent_fid {
+	struct ext4_dirent_data_header df_header;
+	struct ext4_fid                df_fid[];
+};
+
+#define EXT4_LUFID_MAGIC    0xAD200907UL
+struct ext4_dentry_param {
+	__u32			edp_magic;	/* EXT4_LUFID_MAGIC */
+	struct ext4_dirent_fid	edp_dfid;
+};
+
+struct ext4_dirent_hash {
+	struct ext4_dirent_data_header	dh_header;
+	struct ext4_dir_entry_hash	dh_hash;
+} __packed;
 
 #define EXT4_FT_DIR_CSUM	0xDE
 
@@ -4004,6 +4047,12 @@ static inline void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end)
 		io_end->flag &= ~EXT4_IO_END_UNWRITTEN;
 }
 
+/*
+ * Advance to the next dirdata record header starting from @ddh.
+ */
+#define ext4_dirdata_next(ddh) \
+	((struct ext4_dirent_data_header *)((char *)(ddh) + (ddh)->ddh_length))
+
 /*
  * ext4_dir_entry_is_tail() - Check if a directory entry is a tail entry.
  * @de: directory entry to check
@@ -4020,6 +4069,40 @@ static inline bool ext4_dir_entry_is_tail(struct ext4_dir_entry_2 *de)
 	       t->det_reserved_ft == EXT4_FT_DIR_CSUM;
 }
 
+/*
+ * ext4_dirent_get_data_len() - Compute the total dirdata length for an entry.
+ * @de: directory entry
+ *
+ * Computes the length of optional data stored after the filename (and its
+ * implicit NUL terminator).  Each extension is indicated by a bit in the
+ * high 4 bits of de->file_type; the first byte of each extension is its
+ * length (including that length byte itself).
+ *
+ * Returns 0 for tail entries and for entries with no dirdata.
+ */
+static inline int ext4_dirent_get_data_len(struct ext4_dir_entry_2 *de)
+{
+	__u8 extra_data_flags;
+	struct ext4_dirent_data_header *ddh;
+	int dlen = 0;
+
+	if (ext4_dir_entry_is_tail(de))
+		return 0;
+
+	extra_data_flags = (de->file_type & ~EXT4_FT_MASK) >> 4;
+	ddh = (struct ext4_dirent_data_header *)(de->name + de->name_len +
+						 1 /* NUL terminator */);
+
+	while (extra_data_flags) {
+		if (extra_data_flags & 1) {
+			dlen += ddh->ddh_length + (dlen == 0);
+			ddh = ext4_dirdata_next(ddh);
+		}
+		extra_data_flags >>= 1;
+	}
+	return dlen;
+}
+
 extern const struct iomap_ops ext4_iomap_ops;
 extern const struct iomap_ops ext4_iomap_report_ops;
 
-- 
2.43.7


^ permalink raw reply related

* [PATCH v3 03/10] ext4: refactor dx_root to support variable dirent sizes
From: Artem Blagodarenko @ 2026-06-19 19:10 UTC (permalink / raw)
  To: linux-ext4
  Cc: adilger.kernel, Artem Blagodarenko, Pravin Shelar, Andreas Dilger
In-Reply-To: <20260619191022.27008-1-ablagodarenko@thelustrecollective.com>

From: Artem Blagodarenko <artem.blagodarenko@gmail.com>

Split monolithic definition of dx_root struct to separate dx_root_info
from fake struct ext4_dir_entry2 for improved code readability.
This allows "." and ".." dirents to have different sizes if necessary,
since we can't assume the rec_len 12 if dx_root dirents have dirdata.
Adds dx_get_dx_info() accessor instead of complex typecast at callers.
Does not change any functionality.

Signed-off-by: Pravin Shelar <pravin.shelar@sun.com>
Signed-off-by: Artem Blagodarenko <artem.blagodarenko@gmail.com>
Reviewed-by: Andreas Dilger <adilger@dilger.ca>
---
 fs/ext4/namei.c | 145 +++++++++++++++++++++++-------------------------
 1 file changed, 70 insertions(+), 75 deletions(-)

diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index f47f072a1a89..7692cf5184ab 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -244,22 +244,13 @@ struct dx_entry
  * hash version mod 4 should never be 0.  Sincerely, the paranoia department.
  */
 
-struct dx_root
+struct dx_root_info
 {
-	struct fake_dirent dot;
-	char dot_name[4];
-	struct fake_dirent dotdot;
-	char dotdot_name[4];
-	struct dx_root_info
-	{
-		__le32 reserved_zero;
-		u8 hash_version;
-		u8 info_length; /* 8 */
-		u8 indirect_levels;
-		u8 unused_flags;
-	}
-	info;
-	struct dx_entry	entries[];
+	__le32 reserved_zero;
+	u8 hash_version;
+	u8 info_length; /* 8 */
+	u8 indirect_levels;
+	u8 unused_flags;
 };
 
 struct dx_node
@@ -528,6 +519,16 @@ ext4_next_entry(struct ext4_dir_entry_2 *p, unsigned long blocksize)
  * Future: use high four bits of block for coalesce-on-delete flags
  * Mask them off for now.
  */
+static struct dx_root_info *dx_get_dx_info(void *de_buf)
+{
+	/* get dotdot first */
+	de_buf = de_buf + ext4_dir_rec_len(1, NULL);
+
+	/* dx root info is after dotdot entry */
+	de_buf = de_buf + ext4_dir_rec_len(2, NULL);
+
+	return (struct dx_root_info *)de_buf;
+}
 
 static inline ext4_lblk_t dx_get_block(struct dx_entry *entry)
 {
@@ -775,7 +776,7 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
 {
 	unsigned count, indirect, level, i;
 	struct dx_entry *at, *entries, *p, *q, *m;
-	struct dx_root *root;
+	struct dx_root_info *info;
 	struct dx_frame *frame = frame_in;
 	struct dx_frame *ret_err = ERR_PTR(ERR_BAD_DX_DIR);
 	u32 hash;
@@ -787,23 +788,24 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
 	if (IS_ERR(frame->bh))
 		return (struct dx_frame *) frame->bh;
 
-	root = (struct dx_root *) frame->bh->b_data;
-	if (root->info.hash_version != DX_HASH_TEA &&
-	    root->info.hash_version != DX_HASH_HALF_MD4 &&
-	    root->info.hash_version != DX_HASH_LEGACY &&
-	    root->info.hash_version != DX_HASH_SIPHASH) {
-		ext4_warning_inode(dir, "Unrecognised inode hash code %u",
-				   root->info.hash_version);
+	info = dx_get_dx_info((struct ext4_dir_entry_2 *)frame->bh->b_data);
+	if (info->hash_version != DX_HASH_TEA &&
+	    info->hash_version != DX_HASH_HALF_MD4 &&
+	    info->hash_version != DX_HASH_LEGACY &&
+	    info->hash_version != DX_HASH_SIPHASH) {
+		ext4_warning(dir->i_sb,
+			"Unrecognised inode hash code %d for directory #%llu",
+			info->hash_version, dir->i_ino);
 		goto fail;
 	}
 	if (ext4_hash_in_dirent(dir)) {
-		if (root->info.hash_version != DX_HASH_SIPHASH) {
+		if (info->hash_version != DX_HASH_SIPHASH) {
 			ext4_warning_inode(dir,
 				"Hash in dirent, but hash is not SIPHASH");
 			goto fail;
 		}
 	} else {
-		if (root->info.hash_version == DX_HASH_SIPHASH) {
+		if (info->hash_version == DX_HASH_SIPHASH) {
 			ext4_warning_inode(dir,
 				"Hash code is SIPHASH, but hash not in dirent");
 			goto fail;
@@ -811,7 +813,7 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
 	}
 	if (fname)
 		hinfo = &fname->hinfo;
-	hinfo->hash_version = root->info.hash_version;
+	hinfo->hash_version = info->hash_version;
 	if (hinfo->hash_version <= DX_HASH_TEA)
 		hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
 	hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
@@ -827,13 +829,13 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
 	}
 	hash = hinfo->hash;
 
-	if (root->info.unused_flags & 1) {
+	if (info->unused_flags & 1) {
 		ext4_warning_inode(dir, "Unimplemented hash flags: %#06x",
-				   root->info.unused_flags);
+				   info->unused_flags);
 		goto fail;
 	}
 
-	indirect = root->info.indirect_levels;
+	indirect = info->indirect_levels;
 	if (indirect >= ext4_dir_htree_level(dir->i_sb)) {
 		ext4_warning(dir->i_sb,
 			     "Directory (ino: %llu) htree depth %#06x exceed"
@@ -846,14 +848,12 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
 		goto fail;
 	}
 
-	entries = (struct dx_entry *)(((char *)&root->info) +
-				      root->info.info_length);
+	entries = (struct dx_entry *)(((char *)info) + info->info_length);
 
-	if (dx_get_limit(entries) != dx_root_limit(dir,
-						   root->info.info_length)) {
+	if (dx_get_limit(entries) != dx_root_limit(dir, info->info_length)) {
 		ext4_warning_inode(dir, "dx entry: limit %u != root limit %u",
 				   dx_get_limit(entries),
-				   dx_root_limit(dir, root->info.info_length));
+				   dx_root_limit(dir, info->info_length));
 		goto fail;
 	}
 
@@ -939,7 +939,7 @@ static void dx_release(struct dx_frame *frames)
 	if (frames[0].bh == NULL)
 		return;
 
-	info = &((struct dx_root *)frames[0].bh->b_data)->info;
+	info = dx_get_dx_info((struct ext4_dir_entry_2 *)frames[0].bh->b_data);
 	/* save local copy, "info" may be freed after brelse() */
 	indirect_levels = info->indirect_levels;
 	for (i = 0; i <= indirect_levels; i++) {
@@ -2151,44 +2151,38 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
 	return err ? err : err2;
 }
 
-static bool ext4_check_dx_root(struct inode *dir, struct dx_root *root)
+static bool ext4_check_dx_root(struct inode *dir,
+			       struct ext4_dir_entry_2 *dot_de,
+			       struct ext4_dir_entry_2 *dotdot_de,
+			       struct ext4_dir_entry_2 **entry)
 {
-	struct fake_dirent *fde;
 	const char *error_msg;
-	unsigned int rlen;
 	unsigned int blocksize = dir->i_sb->s_blocksize;
-	char *blockend = (char *)root + dir->i_sb->s_blocksize;
+	struct ext4_dir_entry_2 *de = NULL;
 
-	fde = &root->dot;
-	if (unlikely(fde->name_len != 1)) {
+	if (unlikely(dot_de->name_len != 1)) {
 		error_msg = "invalid name_len for '.'";
 		goto corrupted;
 	}
-	if (unlikely(strncmp(root->dot_name, ".", fde->name_len))) {
+	if (unlikely(strncmp(dot_de->name, ".", dot_de->name_len))) {
 		error_msg = "invalid name for '.'";
 		goto corrupted;
 	}
-	rlen = ext4_rec_len_from_disk(fde->rec_len, blocksize);
-	if (unlikely((char *)fde + rlen >= blockend)) {
-		error_msg = "invalid rec_len for '.'";
-		goto corrupted;
-	}
 
-	fde = &root->dotdot;
-	if (unlikely(fde->name_len != 2)) {
+	if (unlikely(dotdot_de->name_len != 2)) {
 		error_msg = "invalid name_len for '..'";
 		goto corrupted;
 	}
-	if (unlikely(strncmp(root->dotdot_name, "..", fde->name_len))) {
+	if (unlikely(strncmp(dotdot_de->name, "..", dotdot_de->name_len))) {
 		error_msg = "invalid name for '..'";
 		goto corrupted;
 	}
-	rlen = ext4_rec_len_from_disk(fde->rec_len, blocksize);
-	if (unlikely((char *)fde + rlen >= blockend)) {
+	de = ext4_next_entry(dotdot_de, blocksize);
+	if ((char *)de >= (((char *)dot_de) + blocksize)) {
 		error_msg = "invalid rec_len for '..'";
 		goto corrupted;
 	}
-
+	*entry = de;
 	return true;
 
 corrupted:
@@ -2206,16 +2200,15 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
 			    struct inode *inode, struct buffer_head *bh)
 {
 	struct buffer_head *bh2;
-	struct dx_root	*root;
 	struct dx_frame	frames[EXT4_HTREE_LEVEL], *frame;
 	struct dx_entry *entries;
-	struct ext4_dir_entry_2	*de, *de2;
+	struct ext4_dir_entry_2	*de, *de2, *dot_de, *dotdot_de;
 	char		*data2, *top;
 	unsigned	len;
 	int		retval;
 	unsigned	blocksize;
 	ext4_lblk_t  block;
-	struct fake_dirent *fde;
+	struct dx_root_info *dx_info;
 	int csum_size = 0;
 
 	if (ext4_has_feature_metadata_csum(inode->i_sb))
@@ -2232,17 +2225,15 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
 		return retval;
 	}
 
-	root = (struct dx_root *) bh->b_data;
-	if (!ext4_check_dx_root(dir, root)) {
+	dot_de = (struct ext4_dir_entry_2 *)bh->b_data;
+	dotdot_de = ext4_next_entry(dot_de, blocksize);
+	if (!ext4_check_dx_root(dir, dot_de, dotdot_de, &de)) {
 		brelse(bh);
 		return -EFSCORRUPTED;
 	}
 
 	/* The 0th block becomes the root, move the dirents out */
-	fde = &root->dotdot;
-	de = (struct ext4_dir_entry_2 *)((char *)fde +
-		ext4_rec_len_from_disk(fde->rec_len, blocksize));
-	len = ((char *) root) + (blocksize - csum_size) - (char *) de;
+	len = ((char *)dot_de) + (blocksize - csum_size) - (char *)de;
 
 	/* Allocate new block for the 0th block's dirents */
 	bh2 = ext4_append(handle, dir, &block);
@@ -2273,24 +2264,27 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
 		ext4_initialize_dirent_tail(bh2, blocksize);
 
 	/* Initialize the root; the dot dirents already exist */
-	de = (struct ext4_dir_entry_2 *) (&root->dotdot);
-	de->rec_len = ext4_rec_len_to_disk(
-			blocksize - ext4_dir_rec_len(2, NULL), blocksize);
-	memset (&root->info, 0, sizeof(root->info));
-	root->info.info_length = sizeof(root->info);
+	dotdot_de->rec_len =
+		ext4_rec_len_to_disk(blocksize - le16_to_cpu(dot_de->rec_len),
+				     blocksize);
+
+	/* initialize hashing info */
+	dx_info = dx_get_dx_info(dot_de);
+	memset(dx_info, 0, sizeof(*dx_info));
+	dx_info->info_length = sizeof(*dx_info);
 	if (ext4_hash_in_dirent(dir))
-		root->info.hash_version = DX_HASH_SIPHASH;
+		dx_info->hash_version = DX_HASH_SIPHASH;
 	else
-		root->info.hash_version =
+		dx_info->hash_version =
 				EXT4_SB(dir->i_sb)->s_def_hash_version;
 
-	entries = root->entries;
+	entries = (void *)dx_info + sizeof(*dx_info);
 	dx_set_block(entries, 1);
 	dx_set_count(entries, 1);
-	dx_set_limit(entries, dx_root_limit(dir, sizeof(root->info)));
+	dx_set_limit(entries, dx_root_limit(dir, sizeof(*dx_info)));
 
 	/* Initialize as for dx_probe */
-	fname->hinfo.hash_version = root->info.hash_version;
+	fname->hinfo.hash_version = dx_info->hash_version;
 	if (fname->hinfo.hash_version <= DX_HASH_TEA)
 		fname->hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
 	fname->hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
@@ -2600,7 +2594,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
 			if (restart || err)
 				goto journal_error;
 		} else {
-			struct dx_root *dxroot;
+			struct dx_root_info *info;
 			memcpy((char *) entries2, (char *) entries,
 			       icount * sizeof(struct dx_entry));
 			dx_set_limit(entries2, dx_node_limit(dir));
@@ -2608,8 +2602,9 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
 			/* Set up root */
 			dx_set_count(entries, 1);
 			dx_set_block(entries + 0, newblock);
-			dxroot = (struct dx_root *)frames[0].bh->b_data;
-			dxroot->info.indirect_levels += 1;
+			info = dx_get_dx_info((struct ext4_dir_entry_2 *)
+					      frames[0].bh->b_data);
+			info->indirect_levels += 1;
 			dxtrace(printk(KERN_DEBUG
 				       "Creating %d level index...\n",
 				       dxroot->info.indirect_levels));
-- 
2.43.7


^ permalink raw reply related

* [PATCH v3 02/10] ext4: add ext4_dir_entry_is_tail()
From: Artem Blagodarenko @ 2026-06-19 19:10 UTC (permalink / raw)
  To: linux-ext4; +Cc: adilger.kernel, Artem Blagodarenko, Andreas Dilger
In-Reply-To: <20260619191022.27008-1-ablagodarenko@thelustrecollective.com>

From: Artem Blagodarenko <artem.blagodarenko@gmail.com>

Replace open-coded checks for directory tail entries with a call
to ext4_dir_entry_is_tail(). This helper will also be used by
upcoming changes.

Signed-off-by: Artem Blagodarenko <artem.blagodarenko@gmail.com>
Reviewed-by: Andreas Dilger <adilger@dilger.ca>
---
 fs/ext4/ext4.h  | 16 ++++++++++++++++
 fs/ext4/namei.c |  7 +------
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index b37c136ea3ab..9400bc2858a5 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -4004,6 +4004,22 @@ static inline void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end)
 		io_end->flag &= ~EXT4_IO_END_UNWRITTEN;
 }
 
+/*
+ * ext4_dir_entry_is_tail() - Check if a directory entry is a tail entry.
+ * @de: directory entry to check
+ *
+ * Returns true if @de is a directory block tail entry (checksum record).
+ */
+static inline bool ext4_dir_entry_is_tail(struct ext4_dir_entry_2 *de)
+{
+	struct ext4_dir_entry_tail *t = (struct ext4_dir_entry_tail *)de;
+
+	return !t->det_reserved_zero1 &&
+	       le16_to_cpu(t->det_rec_len) == sizeof(*t) &&
+	       !t->det_reserved_zero2 &&
+	       t->det_reserved_ft == EXT4_FT_DIR_CSUM;
+}
+
 extern const struct iomap_ops ext4_iomap_ops;
 extern const struct iomap_ops ext4_iomap_report_ops;
 
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 2c951570990f..f47f072a1a89 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -314,7 +314,6 @@ static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode,
 						   struct buffer_head *bh)
 {
 	struct ext4_dir_entry_tail *t;
-	int blocksize = EXT4_BLOCK_SIZE(inode->i_sb);
 
 #ifdef PARANOID
 	struct ext4_dir_entry_2 *d, *top;
@@ -334,11 +333,7 @@ static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode,
 	t = EXT4_DIRENT_TAIL(bh->b_data, EXT4_BLOCK_SIZE(inode->i_sb));
 #endif
 
-	if (t->det_reserved_zero1 ||
-	    (ext4_rec_len_from_disk(t->det_rec_len, blocksize) !=
-	     sizeof(struct ext4_dir_entry_tail)) ||
-	    t->det_reserved_zero2 ||
-	    t->det_reserved_ft != EXT4_FT_DIR_CSUM)
+	if (!ext4_dir_entry_is_tail((struct ext4_dir_entry_2 *)t))
 		return NULL;
 
 	return t;
-- 
2.43.7


^ permalink raw reply related

* [PATCH v3 01/10] ext4: replace ext4_dir_entry with ext4_dir_entry_2
From: Artem Blagodarenko @ 2026-06-19 19:10 UTC (permalink / raw)
  To: linux-ext4; +Cc: adilger.kernel, Artem Blagodarenko, Andreas Dilger
In-Reply-To: <20260619191022.27008-1-ablagodarenko@thelustrecollective.com>

From: Artem Blagodarenko <artem.blagodarenko@gmail.com>

Replace remaining uses of struct ext4_dir_entry in namei.c
with struct ext4_dir_entry_2.

The code paths affected by this change already depend on the
filetype feature, so using struct ext4_dir_entry_2 is
appropriate and avoids mixing the two directory entry types
unnecessarily.

This change does not affect support for 16-bit rec_len.

Signed-off-by: Artem Blagodarenko <artem.blagodarenko@gmail.com>
Reviewed-by: Andreas Dilger <adilger@dilger.ca>
---
 fs/ext4/namei.c | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index cc49ae04a6f6..2c951570990f 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -102,7 +102,7 @@ static struct buffer_head *ext4_append(handle_t *handle,
 }
 
 static int ext4_dx_csum_verify(struct inode *inode,
-			       struct ext4_dir_entry *dirent);
+			       struct ext4_dir_entry_2 *dirent);
 
 /*
  * Hints to ext4_read_dirblock regarding whether we expect a directory
@@ -128,7 +128,7 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
 						unsigned int line)
 {
 	struct buffer_head *bh;
-	struct ext4_dir_entry *dirent;
+	struct ext4_dir_entry_2 *dirent;
 	int is_dx_block = 0;
 
 	if (block >= inode->i_size >> inode->i_blkbits) {
@@ -160,7 +160,7 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
 	}
 	if (!bh)
 		return NULL;
-	dirent = (struct ext4_dir_entry *) bh->b_data;
+	dirent = (struct ext4_dir_entry_2 *) bh->b_data;
 	/* Determine whether or not we have an index block */
 	if (is_dx(inode)) {
 		if (block == 0)
@@ -317,13 +317,13 @@ static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode,
 	int blocksize = EXT4_BLOCK_SIZE(inode->i_sb);
 
 #ifdef PARANOID
-	struct ext4_dir_entry *d, *top;
+	struct ext4_dir_entry_2 *d, *top;
 
-	d = (struct ext4_dir_entry *)bh->b_data;
-	top = (struct ext4_dir_entry *)(bh->b_data +
+	d = (struct ext4_dir_entry_2 *)bh->b_data;
+	top = (struct ext4_dir_entry_2 *)(bh->b_data +
 		(blocksize - sizeof(struct ext4_dir_entry_tail)));
 	while (d < top && ext4_rec_len_from_disk(d->rec_len, blocksize))
-		d = (struct ext4_dir_entry *)(((void *)d) +
+		d = (struct ext4_dir_entry_2 *)(((void *)d) +
 		    ext4_rec_len_from_disk(d->rec_len, blocksize));
 
 	if (d != top)
@@ -410,22 +410,22 @@ int ext4_handle_dirty_dirblock(handle_t *handle,
 }
 
 static struct dx_countlimit *get_dx_countlimit(struct inode *inode,
-					       struct ext4_dir_entry *dirent,
+					       struct ext4_dir_entry_2 *dirent,
 					       int *offset)
 {
-	struct ext4_dir_entry *dp;
+	struct ext4_dir_entry_2 *de;
 	struct dx_root_info *root;
 	int count_offset;
 	int blocksize = EXT4_BLOCK_SIZE(inode->i_sb);
 	unsigned int rlen = ext4_rec_len_from_disk(dirent->rec_len, blocksize);
 
 	if (rlen == blocksize)
-		count_offset = 8;
+		count_offset = sizeof(struct dx_node);
 	else if (rlen == 12) {
-		dp = (struct ext4_dir_entry *)(((void *)dirent) + 12);
-		if (ext4_rec_len_from_disk(dp->rec_len, blocksize) != blocksize - 12)
+		de = (struct ext4_dir_entry_2 *)(((void *)dirent) + 12);
+		if (ext4_rec_len_from_disk(de->rec_len, blocksize) != blocksize - 12)
 			return NULL;
-		root = (struct dx_root_info *)(((void *)dp + 12));
+		root = (struct dx_root_info *)(((void *)de + 12));
 		if (root->reserved_zero ||
 		    root->info_length != sizeof(struct dx_root_info))
 			return NULL;
@@ -438,7 +438,7 @@ static struct dx_countlimit *get_dx_countlimit(struct inode *inode,
 	return (struct dx_countlimit *)(((void *)dirent) + count_offset);
 }
 
-static __le32 ext4_dx_csum(struct inode *inode, struct ext4_dir_entry *dirent,
+static __le32 ext4_dx_csum(struct inode *inode, struct ext4_dir_entry_2 *dirent,
 			   int count_offset, int count, struct dx_tail *t)
 {
 	struct ext4_inode_info *ei = EXT4_I(inode);
@@ -456,7 +456,7 @@ static __le32 ext4_dx_csum(struct inode *inode, struct ext4_dir_entry *dirent,
 }
 
 static int ext4_dx_csum_verify(struct inode *inode,
-			       struct ext4_dir_entry *dirent)
+			       struct ext4_dir_entry_2 *dirent)
 {
 	struct dx_countlimit *c;
 	struct dx_tail *t;
@@ -485,7 +485,7 @@ static int ext4_dx_csum_verify(struct inode *inode,
 	return 1;
 }
 
-static void ext4_dx_csum_set(struct inode *inode, struct ext4_dir_entry *dirent)
+static void ext4_dx_csum_set(struct inode *inode, struct ext4_dir_entry_2 *dirent)
 {
 	struct dx_countlimit *c;
 	struct dx_tail *t;
@@ -515,7 +515,7 @@ static inline int ext4_handle_dirty_dx_node(handle_t *handle,
 					    struct inode *inode,
 					    struct buffer_head *bh)
 {
-	ext4_dx_csum_set(inode, (struct ext4_dir_entry *)bh->b_data);
+	ext4_dx_csum_set(inode, (struct ext4_dir_entry_2 *)bh->b_data);
 	return ext4_handle_dirty_metadata(handle, inode, bh);
 }
 
@@ -1488,7 +1488,7 @@ int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
 }
 
 static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block,
-			       struct ext4_dir_entry *de)
+			       struct ext4_dir_entry_2 *de)
 {
 	struct super_block *sb = dir->i_sb;
 
@@ -1619,7 +1619,7 @@ static struct buffer_head *__ext4_find_entry(struct inode *dir,
 		}
 		if (!buffer_verified(bh) &&
 		    !is_dx_internal_node(dir, block,
-					 (struct ext4_dir_entry *)bh->b_data) &&
+					 (struct ext4_dir_entry_2 *)bh->b_data) &&
 		    !ext4_dirblock_csum_verify(dir, bh)) {
 			EXT4_ERROR_INODE_ERR(dir, EFSBADCRC,
 					     "checksumming directory "
-- 
2.43.7


^ permalink raw reply related

* [PATCH v3 00/10] Data in direntry (dirdata) feature
From: Artem Blagodarenko @ 2026-06-19 19:10 UTC (permalink / raw)
  To: linux-ext4; +Cc: adilger.kernel, Artem Blagodarenko, syzbot

EXT4 currently stores a hash in the directory entry
(dirent) immediately after the file name to support
simultaneous fscrypt and casefold functionality.

It has been discussed within the EXT4 community that
this hash could instead be stored in dirdata. This
would make it the second (or third, in the case of
64-bit inode counts) user of dirdata.

At the same time, the existing format—where the hash
is placed after the file name—must continue to be
supported. With these patches, EXT4 can handle the
hash in both formats.

The first user of this feature is  LUFID -
Locally Unique File ID.

Support for fscrypt and case-insensitive directories
with dirdata enabled has been verified using a
dedicated xfstest submitted to the xfstests list as
a separate patch.

e2fsprogs support is provided in a separate patches
series.

Changes in v3:
- Fixed issues reported by automated review of v2:
  - dx_get_dx_info() and get_dx_countlimit() called
    ext4_dir_entry_len() with the directory inode
    hardcoded to NULL, forcing its blocksize fallback
    to 4096 regardless of the real filesystem blocksize.
    Both now pass the real inode through, and
    dx_get_dx_info() also rejects results that fall
    outside the directory block.
  - ext4_dirdata_get() declared a local "dfid" that
    shadowed the function's own "dfid" output parameter,
    so a requested LUFID copy never reached the caller's
    buffer. Renamed the local and fixed the copy.
  - ext4_dirdata_get()/ext4_dirdata_set() compared
    offsets against the raw on-disk rec_len instead of
    decoding it via ext4_rec_len_from_disk(), which is
    incorrect on big-endian hosts and mishandles the
    "0/65535 means full block" sentinel. Both now decode
    rec_len once and use the decoded value throughout.
  - EXT4_IOC_SET_LUFID deleted the existing directory
    entry before re-adding it with the new LUFID data;
    if the re-add failed, the inode was left with no
    directory entry at all. It now attempts to restore
    the original entry on failure, and loudly flags
    inode corruption if that also fails.
- syzbot ci tested the fix for these issues; per its
  request, this is being submitted with the corresponding
  Tested-by tag below.
- Rebased onto the latest codebase.

Artem Blagodarenko (10):
  ext4: replace ext4_dir_entry with ext4_dir_entry_2
  ext4: add ext4_dir_entry_is_tail()
  ext4: refactor dx_root to support variable dirent sizes
  ext4: add dirdata format definitions and access helpers
  ext4: preserve dirdata bits in get_dtype()
  ext4: add ext4_dir_entry_len() and harden dirdata parsing
  ext4: rename ext4_dir_rec_len() and clarify dirdata usage
  ext4: dirdata feature
  ext4: add dirdata set/get helpers
  ext4: Add EXT4_IOC_SET_LUFID ioctl for setting LUFID on directory
    entries

 foofile.txt               |   0
 fs/ext4/dir.c             |   9 +-
 fs/ext4/ext4.h            | 205 +++++++++++-
 fs/ext4/inline.c          |  37 ++-
 fs/ext4/ioctl.c           |  62 ++++
 fs/ext4/namei.c           | 650 ++++++++++++++++++++++++++++----------
 fs/ext4/sysfs.c           |   2 +
 include/uapi/linux/ext4.h |  13 +
 8 files changed, 780 insertions(+), 198 deletions(-)
 create mode 100644 foofile.txt

Tested-by: syzbot@syzkaller.appspotmail.com
-- 
2.43.7


^ permalink raw reply

* [PATCH RESEND 4/4] libext2fs: add ext2fs_xattrs_release_all() helper
From: Etienne AUJAMES @ 2026-06-19 15:33 UTC (permalink / raw)
  To: linux-ext4, Theodore Ts'o; +Cc: Andreas Dilger, Li Dongyang
In-Reply-To: <ajVdnQUu9tSrKldW@eaujamesFR0130>

This patch adds a helper function ext2fs_xattrs_release_all() which
removes all extended attributes and updates the quota accordingly.

The main purpose of this is to handle ea_inode xattrs in e2fsck when
deleting orphan inodes:

 # e2fsck -yf /tmp/ext4
 e2fsck 1.47.3-wc2 (11-Nov-2025)
 Clearing orphaned inode 12 (uid=0, gid=0, mode=0100644, size=0)
 Pass 1: Checking inodes, blocks, and sizes
 Pass 2: Checking directory structure
 Pass 3: Checking directory connectivity
 Pass 4: Checking reference counts
 Regular filesystem inode 13 has EA_INODE flag set. Clear<y>? yes
 Unattached inode 13
 Connect to /lost+found<y>? yes
 Inode 13 ref count is 2, should be 1.  Fix<y>? yes

fuse2fs, debugfs and mke2fs are updated to use this function and
handle ea_inode on inode deletion.

Update d_xattr_ea_inode to check for the inode deletion case.
Add a regression test: f_orphan_ea_inode

Signed-off-by: Etienne AUJAMES <eaujames@ddn.com>
Change-Id: I4a84a50d43b8b9aab2dfc352a92256c710a3659e
Lustre-bug-id: https://jira.whamcloud.com/browse/LU-20049
---
 debugfs/debugfs.c                |  33 +++++++--
 e2fsck/super.c                   |  67 +++++++++++-------
 lib/ext2fs/ext2fs.h              |   3 +
 lib/ext2fs/ext_attr.c            |  41 +++++++++++
 misc/create_inode_libarchive.c   |  35 ++++-----
 misc/fuse2fs.c                   | 117 +++++++++++--------------------
 tests/d_xattr_ea_inode/expect    |  51 ++++++++++++++
 tests/d_xattr_ea_inode/script    |  55 ++++++++++-----
 tests/f_orphan_ea_inode/expect.1 |   6 ++
 tests/f_orphan_ea_inode/expect.2 |   7 ++
 tests/f_orphan_ea_inode/image.gz | Bin 0 -> 2139 bytes
 tests/f_orphan_ea_inode/name     |   1 +
 tests/f_orphan_ea_inode/script   |   3 +
 13 files changed, 277 insertions(+), 142 deletions(-)
 create mode 100644 tests/f_orphan_ea_inode/expect.1
 create mode 100644 tests/f_orphan_ea_inode/expect.2
 create mode 100644 tests/f_orphan_ea_inode/image.gz
 create mode 100644 tests/f_orphan_ea_inode/name
 create mode 100644 tests/f_orphan_ea_inode/script

diff --git a/debugfs/debugfs.c b/debugfs/debugfs.c
index b9f248be2..d316293d2 100644
--- a/debugfs/debugfs.c
+++ b/debugfs/debugfs.c
@@ -1861,21 +1861,40 @@ static int release_blocks_proc(ext2_filsys fs, blk64_t *blocknr,
 
 static void kill_file_by_inode(ext2_ino_t inode)
 {
-	struct ext2_inode inode_buf;
+	struct ext2_inode_large *inode_buf;
+	size_t inode_size = EXT2_INODE_SIZE(current_fs->super);
+	errcode_t err;
 
-	if (debugfs_read_inode(inode, &inode_buf, 0))
-		return;
-	ext2fs_set_dtime(current_fs,  &inode_buf);
-	if (debugfs_write_inode(inode, &inode_buf, 0))
+	err = ext2fs_get_memzero(inode_size, &inode_buf);
+	if (err)
 		return;
-	if (ext2fs_inode_has_valid_blocks2(current_fs, &inode_buf)) {
+
+	err = ext2fs_read_inode_full(current_fs, inode, EXT2_INODE(inode_buf),
+				     inode_size);
+	if (err) {
+		com_err(__func__, err, "while reading inode %u", inode);
+		goto out;
+	}
+
+	ext2fs_set_dtime(current_fs,  EXT2_INODE(inode_buf));
+	ext2fs_xattrs_release_all(current_fs, inode, inode_buf, inode_size,
+				  NULL);
+	if (ext2fs_inode_has_valid_blocks2(current_fs, EXT2_INODE(inode_buf))) {
 		blk64_t last_cluster = 0;
 		ext2fs_block_iterate3(current_fs, inode, BLOCK_FLAG_READ_ONLY,
 				      NULL, release_blocks_proc, &last_cluster);
 	}
 	printf("\n");
 	ext2fs_inode_alloc_stats2(current_fs, inode, -1,
-				  LINUX_S_ISDIR(inode_buf.i_mode));
+				  LINUX_S_ISDIR(inode_buf->i_mode));
+
+	err = ext2fs_write_inode_full(current_fs, inode, EXT2_INODE(inode_buf),
+				     inode_size);
+	if (err)
+		com_err(__func__, err, "while writing inode %u", inode);
+
+out:
+	ext2fs_free_mem(&inode_buf);
 }
 
 
diff --git a/e2fsck/super.c b/e2fsck/super.c
index c2ccefd54..1a94ba567 100644
--- a/e2fsck/super.c
+++ b/e2fsck/super.c
@@ -156,13 +156,14 @@ static errcode_t truncate_inode_blocks(e2fsck_t ctx, ext2_ino_t ino,
 	struct process_block_struct pb = { 0 };
 	e2_blkcnt_t truncate_block = 0;
 	__u32 truncate_offset = 0;
-	blk64_t blk;
+	blk64_t blk, iblks;
 	int ret_flags;
 	errcode_t retval = 0;
 
 	if (!ext2fs_inode_has_valid_blocks2(fs, EXT2_INODE(inode)))
 		return 0;
 
+	iblks = ext2fs_get_stat_i_blocks(fs, EXT2_INODE(inode));
 	if (inode->i_links_count) {
 		truncate_offset = inode->i_size % fs->blocksize;
 		truncate_block = (e2_blkcnt_t)
@@ -190,6 +191,10 @@ static errcode_t truncate_inode_blocks(e2fsck_t ctx, ext2_ino_t ino,
 			"release_inode_blocks");
 
 	ext2fs_iblk_sub_blocks(fs, EXT2_INODE(inode), pb.truncated_blocks);
+	iblks -= ext2fs_get_stat_i_blocks(fs, EXT2_INODE(inode));
+	if (ctx->qctx)
+		quota_data_sub(ctx->qctx, inode, ino, iblks * 512);
+
 	if (!truncate_offset)
 		return 0;
 
@@ -217,17 +222,19 @@ static errcode_t truncate_inode_blocks(e2fsck_t ctx, ext2_ino_t ino,
  * not deleted.
  */
 static int release_inode_blocks(e2fsck_t ctx, ext2_ino_t ino,
-				struct ext2_inode_large *inode, char *block_buf,
+				struct ext2_inode_large *inode,
+				size_t inode_size, char *block_buf,
 				struct problem_context *pctx)
 {
 	ext2_filsys			fs = ctx->fs;
-	blk64_t				free_blks, ino_blks;
+	blk64_t				free_blks;
+	__u32				free_inodes;
 	char				*buf;
 	errcode_t			err;
 	int				rc = 0;
 
+	free_inodes = fs->super->s_free_inodes_count;
 	free_blks = ext2fs_free_blocks_count(fs->super);
-	ino_blks = ext2fs_get_stat_i_blocks(fs, EXT2_INODE(inode));
 	buf = block_buf + 3 * ctx->fs->blocksize;
 	if (truncate_inode_blocks(ctx, ino, inode, buf, pctx)) {
 		rc = 1;
@@ -236,7 +243,7 @@ static int release_inode_blocks(e2fsck_t ctx, ext2_ino_t ino,
 	if (inode->i_links_count)
 		goto update_counts;
 
-	err = ext2fs_free_ext_attr(fs, ino, inode);
+	err = ext2fs_xattrs_release_all(fs, ino, inode, inode_size, ctx->qctx);
 	if (err) {
 		com_err(__func__, err,
 			_("while calling ext2fs_free_ext_attr for inode %u"),
@@ -249,9 +256,8 @@ static int release_inode_blocks(e2fsck_t ctx, ext2_ino_t ino,
 
 update_counts:
 	ctx->free_blocks += ext2fs_free_blocks_count(fs->super) - free_blks;
-	ino_blks -= ext2fs_get_stat_i_blocks(fs, EXT2_INODE(inode));
-	if (ctx->qctx)
-		quota_data_sub(ctx->qctx, inode, 0, ino_blks << 9);
+	free_inodes = fs->super->s_free_inodes_count - free_inodes;
+	ctx->free_inodes += free_inodes;
 
 	return rc;
 }
@@ -312,44 +318,55 @@ static int release_orphan_inode(e2fsck_t ctx, ext2_ino_t *ino, char *block_buf)
 {
 	ext2_filsys fs = ctx->fs;
 	struct problem_context pctx;
-	struct ext2_inode_large inode;
+	struct ext2_inode_large *inode;
+	size_t inode_size = EXT2_INODE_SIZE(fs->super);
 	ext2_ino_t next_ino;
+	int rc = 1;
+
+	if (ext2fs_get_memzero(inode_size, &inode))
+		return 1;
 
-	e2fsck_read_inode_full(ctx, *ino, EXT2_INODE(&inode),
-				sizeof(inode), "release_orphan_inode");
+	e2fsck_read_inode_full(ctx, *ino, EXT2_INODE(inode),
+				     inode_size, __func__);
 	clear_problem_context(&pctx);
 	pctx.ino = *ino;
-	pctx.inode = EXT2_INODE(&inode);
-	pctx.str = inode.i_links_count ? _("Truncating") : _("Clearing");
+	pctx.inode = EXT2_INODE(inode);
+	pctx.str = inode->i_links_count ? _("Truncating") : _("Clearing");
 
 	fix_problem(ctx, PR_0_ORPHAN_CLEAR_INODE, &pctx);
 
-	next_ino = inode.i_dtime;
+	next_ino = inode->i_dtime;
 	if (next_ino &&
 	    ((next_ino < EXT2_FIRST_INODE(fs->super)) ||
 	     (next_ino > fs->super->s_inodes_count))) {
 		pctx.ino = next_ino;
 		fix_problem(ctx, PR_0_ORPHAN_ILLEGAL_INODE, &pctx);
-		return 1;
+		goto out;
 	}
 
-	if (release_inode_blocks(ctx, *ino, &inode, block_buf, &pctx))
-		return 1;
+	if (release_inode_blocks(ctx, *ino, inode, inode_size, block_buf,
+				 &pctx))
+		goto out;
 
-	if (!inode.i_links_count) {
+	if (!inode->i_links_count) {
 		if (ctx->qctx)
-			quota_data_inodes(ctx->qctx, &inode, *ino, -1);
+			quota_data_inodes(ctx->qctx, inode, *ino, -1);
 		ext2fs_inode_alloc_stats2(fs, *ino, -1,
-					  LINUX_S_ISDIR(inode.i_mode));
+					  LINUX_S_ISDIR(inode->i_mode));
 		ctx->free_inodes++;
-		ext2fs_set_dtime(fs, EXT2_INODE(&inode));
+		ext2fs_set_dtime(fs, EXT2_INODE(inode));
 	} else {
-		inode.i_dtime = 0;
+		inode->i_dtime = 0;
 	}
-	e2fsck_write_inode_full(ctx, *ino, EXT2_INODE(&inode),
-				sizeof(inode), "delete_file");
+	e2fsck_write_inode_full(ctx, *ino, EXT2_INODE(inode),
+				      inode_size, __func__);
 	*ino = next_ino;
-	return 0;
+	rc = 0;
+
+out:
+	ext2fs_free_mem(&inode);
+
+	return rc;
 }
 
 struct process_orphan_block_data {
diff --git a/lib/ext2fs/ext2fs.h b/lib/ext2fs/ext2fs.h
index 56de5ea50..cb3f1a3a1 100644
--- a/lib/ext2fs/ext2fs.h
+++ b/lib/ext2fs/ext2fs.h
@@ -1425,6 +1425,9 @@ errcode_t ext2fs_xattr_inode_max_size(ext2_filsys fs, ext2_ino_t ino,
 #define XATTR_HANDLE_FLAG_RAW	0x0001
 errcode_t ext2fs_xattrs_flags(struct ext2_xattr_handle *handle,
 			      unsigned int *new_flags, unsigned int *old_flags);
+errcode_t ext2fs_xattrs_release_all(ext2_filsys fs, ext2_ino_t ino,
+				    struct ext2_inode_large *inode,
+				    size_t inode_size, quota_ctx_t qctx);
 extern void ext2fs_ext_attr_block_rehash(struct ext2_ext_attr_header *header,
 					 struct ext2_ext_attr_entry *end);
 extern __u32 ext2fs_get_ea_inode_hash(struct ext2_inode *inode);
diff --git a/lib/ext2fs/ext_attr.c b/lib/ext2fs/ext_attr.c
index 3b90b70bb..2a2e79acd 100644
--- a/lib/ext2fs/ext_attr.c
+++ b/lib/ext2fs/ext_attr.c
@@ -1868,3 +1868,44 @@ errcode_t ext2fs_xattrs_flags(struct ext2_xattr_handle *handle,
 		handle->flags = *new_flags;
 	return 0;
 }
+
+errcode_t ext2fs_xattrs_release_all(ext2_filsys fs, ext2_ino_t ino,
+				    struct ext2_inode_large *inode,
+				    size_t inode_size, quota_ctx_t qctx)
+{
+	struct ext2_xattr_handle *h;
+	errcode_t err = 0;
+
+	if (!ext2fs_has_feature_ea_inode(fs->super)) {
+		blk64_t blk = ext2fs_file_acl_block(fs, EXT2_INODE(inode));
+
+		if (!blk)
+			return 0;
+
+		err = ext2fs_free_ext_attr(fs, ino, inode);
+		if (err || !qctx)
+			return err;
+
+		quota_data_sub(qctx, inode, ino,
+			       EXT2FS_C2B(fs, 1) * fs->blocksize);
+		return 0;
+	}
+
+	err = ext2fs_xattrs_open_inode(fs, ino, EXT2_INODE(inode), inode_size,
+				       qctx, &h);
+	if (err)
+		return err;
+
+	err = ext2fs_xattrs_read(h);
+	if (err)
+		goto out_close;
+
+	err = ext2fs_xattr_remove_all(h);
+	if (err)
+		goto out_close;
+
+out_close:
+	ext2fs_xattrs_close(&h);
+
+	return err;
+}
diff --git a/misc/create_inode_libarchive.c b/misc/create_inode_libarchive.c
index fadf0721f..4736e8c22 100644
--- a/misc/create_inode_libarchive.c
+++ b/misc/create_inode_libarchive.c
@@ -261,46 +261,49 @@ static inline unsigned int __round_up(unsigned int quantity, unsigned int size)
 static int remove_inode(ext2_filsys fs, ext2_ino_t ino)
 {
 	errcode_t ret = 0;
-	struct ext2_inode_large inode;
+	struct ext2_inode_large *inode;
+	size_t inode_size = EXT2_INODE_SIZE(fs->super);
 
-	memset(&inode, 0, sizeof(inode));
-	ret = ext2fs_read_inode_full(fs, ino, (struct ext2_inode *)&inode,
-				     sizeof(inode));
+	ret = ext2fs_get_memzero(inode_size, &inode);
+	if (ret)
+		return ret;
+
+	ret = ext2fs_read_inode_full(fs, ino, EXT2_INODE(inode), inode_size);
 	if (ret)
 		goto out;
 
-	switch (inode.i_links_count) {
+	switch (inode->i_links_count) {
 	case 0:
 		return 0; /* XXX: already done? */
 	case 1:
-		inode.i_links_count--;
-		ext2fs_set_dtime(fs, EXT2_INODE(&inode));
+		inode->i_links_count--;
+		ext2fs_set_dtime(fs, EXT2_INODE(inode));
 		break;
 	default:
-		inode.i_links_count--;
+		inode->i_links_count--;
 	}
 
-	if (inode.i_links_count)
+	if (inode->i_links_count)
 		goto write_out;
 
 	/* Nobody holds this file; free its blocks! */
-	ret = ext2fs_free_ext_attr(fs, ino, &inode);
+	ret = ext2fs_xattrs_release_all(fs, ino, inode, inode_size, NULL);
 	if (ret)
 		goto write_out;
 
-	if (ext2fs_inode_has_valid_blocks2(fs, (struct ext2_inode *)&inode)) {
-		ret = ext2fs_punch(fs, ino, (struct ext2_inode *)&inode, NULL,
-				   0, ~0ULL);
+	if (ext2fs_inode_has_valid_blocks2(fs, EXT2_INODE(inode))) {
+		ret = ext2fs_punch(fs, ino, EXT2_INODE(inode), NULL, 0, ~0ULL);
 		if (ret)
 			goto write_out;
 	}
 
-	ext2fs_inode_alloc_stats2(fs, ino, -1, LINUX_S_ISDIR(inode.i_mode));
+	ext2fs_inode_alloc_stats2(fs, ino, -1, LINUX_S_ISDIR(inode->i_mode));
 
 write_out:
-	ret = ext2fs_write_inode_full(fs, ino, (struct ext2_inode *)&inode,
-				      sizeof(inode));
+	ret = ext2fs_write_inode_full(fs, ino, EXT2_INODE(inode), inode_size);
 out:
+	ext2fs_free_mem(&inode);
+
 	return ret;
 }
 
diff --git a/misc/fuse2fs.c b/misc/fuse2fs.c
index 94e289fab..11141f645 100644
--- a/misc/fuse2fs.c
+++ b/misc/fuse2fs.c
@@ -2274,123 +2274,88 @@ static int fuse2fs_unlink(struct fuse2fs *ff, const char *path,
 	return 0;
 }
 
-static int remove_ea_inodes(struct fuse2fs *ff, ext2_ino_t ino,
-			    struct ext2_inode_large *inode)
+static int remove_inode(struct fuse2fs *ff, ext2_ino_t ino)
 {
 	ext2_filsys fs = ff->fs;
-	struct ext2_xattr_handle *h;
 	errcode_t err;
+	struct ext2_inode_large *inode;
+	size_t inode_size = EXT2_INODE_SIZE(fs->super);
 	int ret = 0;
 
-	/*
-	 * The xattr handle maintains its own private copy of the inode, so
-	 * write ours to disk so that we can read it.
-	 */
-	err = fuse2fs_write_inode(fs, ino, inode);
+	err = ext2fs_get_memzero(inode_size, &inode);
 	if (err)
 		return translate_error(fs, ino, err);
 
-	err = ext2fs_xattrs_open(fs, ino, &h);
-	if (err)
-		return translate_error(fs, ino, err);
-
-	err = ext2fs_xattrs_read(h);
+	err = ext2fs_read_inode_full(fs, ino, EXT2_INODE(inode), inode_size);
 	if (err) {
 		ret = translate_error(fs, ino, err);
-		goto out_close;
-	}
-
-	err = ext2fs_xattr_remove_all(h);
-	if (err) {
-		ret = translate_error(fs, ino, err);
-		goto out_close;
+		goto out;
 	}
-
-out_close:
-	ext2fs_xattrs_close(&h);
-	if (ret)
-		return ret;
-
-	/* Now read the inode back in. */
-	err = fuse2fs_read_inode(fs, ino, inode);
-	if (err)
-		return translate_error(fs, ino, err);
-
-	return 0;
-}
-
-static int remove_inode(struct fuse2fs *ff, ext2_ino_t ino)
-{
-	ext2_filsys fs = ff->fs;
-	errcode_t err;
-	struct ext2_inode_large inode;
-	int ret = 0;
-
-	err = fuse2fs_read_inode(fs, ino, &inode);
-	if (err)
-		return translate_error(fs, ino, err);
-
 	dbg_printf(ff, "%s: put ino=%d links=%d\n", __func__, ino,
-		   inode.i_links_count);
+		   inode->i_links_count);
 
-	if (S_ISDIR(inode.i_mode)) {
+	if (S_ISDIR(inode->i_mode)) {
 		/*
 		 * Caller should have checked that this is an empty directory
 		 * before starting the unlink process.  nlink is usually 2, but
 		 * it could be 1 if this dir ever had more than 65000 subdirs.
 		 * Zero the link count.
 		 */
-		if (!ext2fs_dir_link_empty(EXT2_INODE(&inode)))
-			return translate_error(fs, ino, EXT2_ET_INODE_CORRUPTED);
-		inode.i_links_count = 0;
-		ext2fs_set_dtime(fs, EXT2_INODE(&inode));
+		if (!ext2fs_dir_link_empty(EXT2_INODE(inode))) {
+			ret = translate_error(fs, ino, EXT2_ET_INODE_CORRUPTED);
+			goto out;
+		}
+		inode->i_links_count = 0;
+		ext2fs_set_dtime(fs, EXT2_INODE(inode));
 	} else {
 		/*
 		 * Any other file type can be hardlinked, so all we need to do
 		 * is decrement the nlink.
 		 */
-		if (inode.i_links_count == 0)
-			return translate_error(fs, ino, EXT2_ET_INODE_CORRUPTED);
-		inode.i_links_count--;
-		if (!inode.i_links_count)
-			ext2fs_set_dtime(fs, EXT2_INODE(&inode));
+		if (inode->i_links_count == 0) {
+			ret = translate_error(fs, ino, EXT2_ET_INODE_CORRUPTED);
+			goto out;
+		}
+		inode->i_links_count--;
+		if (!inode->i_links_count)
+			ext2fs_set_dtime(fs, EXT2_INODE(inode));
 	}
 
-	ret = update_ctime(fs, ino, &inode);
+	ret = update_ctime(fs, ino, inode);
 	if (ret)
-		return ret;
+		goto out;
 
 	/* Still linked?  Leave it be. */
-	if (inode.i_links_count)
+	if (inode->i_links_count)
 		goto write_out;
 
-	if (ext2fs_has_feature_ea_inode(fs->super)) {
-		ret = remove_ea_inodes(ff, ino, &inode);
-		if (ret)
-			return ret;
-	}
-
 	/* Nobody holds this file; free its blocks! */
-	err = ext2fs_free_ext_attr(fs, ino, &inode);
-	if (err)
-		return translate_error(fs, ino, err);
+	err = ext2fs_xattrs_release_all(fs, ino, inode, inode_size, NULL);
+	if (err) {
+		ret = translate_error(fs, ino, err);
+		goto out;
+	}
 
-	if (ext2fs_inode_has_valid_blocks2(fs, EXT2_INODE(&inode))) {
-		err = ext2fs_punch(fs, ino, EXT2_INODE(&inode), NULL,
+	if (ext2fs_inode_has_valid_blocks2(fs, EXT2_INODE(inode))) {
+		err = ext2fs_punch(fs, ino, EXT2_INODE(inode), NULL,
 				   0, ~0ULL);
-		if (err)
-			return translate_error(fs, ino, err);
+		if (err) {
+			ret = translate_error(fs, ino, err);
+			goto out;
+		}
 	}
 
 	ext2fs_inode_alloc_stats2(fs, ino, -1,
-				  LINUX_S_ISDIR(inode.i_mode));
+				  LINUX_S_ISDIR(inode->i_mode));
 
 write_out:
-	err = fuse2fs_write_inode(fs, ino, &inode);
+	err = ext2fs_write_inode_full(fs, ino, EXT2_INODE(inode), inode_size);
 	if (err)
-		return translate_error(fs, ino, err);
+		ret = translate_error(fs, ino, err);
+out:
+	ext2fs_free_mem(&inode);
 
-	return 0;
+	return ret;
 }
 
 static int __op_unlink(struct fuse2fs *ff, const char *path)
diff --git a/tests/d_xattr_ea_inode/expect b/tests/d_xattr_ea_inode/expect
index aaad9c5b3..e1878c3dc 100644
--- a/tests/d_xattr_ea_inode/expect
+++ b/tests/d_xattr_ea_inode/expect
@@ -135,3 +135,54 @@ Pass 5: Checking group summary information
 test_filesys: 11/128 files (0.0% non-contiguous), 18/256 blocks
 Exit status is 0
 
+write d_xattr_ea_inode.tmp test_file
+Allocated inode: 12
+Exit status is 0
+
+Generate xattr value (1024 bytes)
+ea_set -f d_xattr_ea_inode.tmp test_file user.test1
+Exit status is 0
+ea_get -f d_xattr_ea_inode.ver.tmp test_file user.test1
+Exit status is 0
+Compare xattr values (1024 bytes)
+stat test_file
+Blockcount: 16
+Exit status is 0
+e2fsck -yf -N test_filesys
+Pass 1: Checking inodes, blocks, and sizes
+Pass 2: Checking directory structure
+Pass 3: Checking directory connectivity
+Pass 4: Checking reference counts
+Pass 5: Checking group summary information
+test_filesys: 12/128 files (0.0% non-contiguous), 20/256 blocks
+Exit status is 0
+
+Generate xattr value (16384 bytes)
+ea_set -f d_xattr_ea_inode.tmp test_file user.test2
+Exit status is 0
+ea_get -f d_xattr_ea_inode.ver.tmp test_file user.test2
+Exit status is 0
+Compare xattr values (16384 bytes)
+stat test_file
+Blockcount: 48
+Exit status is 0
+e2fsck -yf -N test_filesys
+Pass 1: Checking inodes, blocks, and sizes
+Pass 2: Checking directory structure
+Pass 3: Checking directory connectivity
+Pass 4: Checking reference counts
+Pass 5: Checking group summary information
+test_filesys: 13/128 files (0.0% non-contiguous), 24/256 blocks
+Exit status is 0
+
+rm test_file
+
+Exit status is 0
+e2fsck -yf -N test_filesys
+Pass 1: Checking inodes, blocks, and sizes
+Pass 2: Checking directory structure
+Pass 3: Checking directory connectivity
+Pass 4: Checking reference counts
+Pass 5: Checking group summary information
+test_filesys: 11/128 files (0.0% non-contiguous), 18/256 blocks
+Exit status is 0
diff --git a/tests/d_xattr_ea_inode/script b/tests/d_xattr_ea_inode/script
index 84104549c..c24eb6cd5 100644
--- a/tests/d_xattr_ea_inode/script
+++ b/tests/d_xattr_ea_inode/script
@@ -15,32 +15,33 @@ VERIFY_DATA=$test_name.ver.tmp
 echo "debugfs edit extended attributes with ea_inode feature" > $OUT.new
 
 d_xattr_ea_inode_check() {
-	local xattr_size=$1
-	local xattr_name=$2
-	local ea_rm=$3
+	local path=$1
+	local xattr_size=$2
+	local xattr_name=$3
+	local ea_rm=$4
 
 	echo "Generate xattr value ($xattr_size bytes)" >> $OUT.new
 	echo $xattr_size |
 		awk '{srand();for(i=0;i<$1;i++) printf("%c",97+int(rand()*26));}' > $TEST_DATA
 
-	echo "ea_set -f $TEST_DATA / $xattr_name" >> $OUT.new
-	$DEBUGFS -w -R "ea_set -f $TEST_DATA / $xattr_name" $TMPFILE >> $OUT.new 2>&1
+	echo "ea_set -f $TEST_DATA $path $xattr_name" >> $OUT.new
+	$DEBUGFS -w -R "ea_set -f $TEST_DATA $path $xattr_name" $TMPFILE >> $OUT.new 2>&1
 	echo Exit status is $? >> $OUT.new
 
-	echo "ea_get -f $VERIFY_DATA / $xattr_name" >> $OUT.new
-	$DEBUGFS -w -R "ea_get -f $VERIFY_DATA / $xattr_name" $TMPFILE >> $OUT.new 2>&1
+	echo "ea_get -f $VERIFY_DATA $path $xattr_name" >> $OUT.new
+	$DEBUGFS -w -R "ea_get -f $VERIFY_DATA $path $xattr_name" $TMPFILE >> $OUT.new 2>&1
 	echo Exit status is $? >> $OUT.new
 
 	echo "Compare xattr values ($xattr_size bytes)" >> $OUT.new
 	diff -u $TEST_DATA $VERIFY_DATA >> $OUT.new
 
-	echo "stat /" >> $OUT.new
-	($DEBUGFS -c -R "stat /" $TMPFILE | grep -Eo "Blockcount: [0-9]+") >> $OUT.new 2>&1
+	echo "stat $path" >> $OUT.new
+	($DEBUGFS -c -R "stat $path" $TMPFILE | grep -Eo "Blockcount: [0-9]+") >> $OUT.new 2>&1
 	echo Exit status is $? >> $OUT.new
 
 	if $ea_rm; then
-		echo "ea_rm / $xattr_name" >> $OUT.new
-		$DEBUGFS -w -R "ea_rm / $xattr_name" $TMPFILE >> $OUT.new 2>&1
+		echo "ea_rm $path $xattr_name" >> $OUT.new
+		$DEBUGFS -w -R "ea_rm $path $xattr_name" $TMPFILE >> $OUT.new 2>&1
 		echo Exit status is $? >> $OUT.new
 	fi
 
@@ -56,15 +57,33 @@ echo "mke2fs -Fq -b 4096 -O ea_inode test.img 1m" >> $OUT.new
 $MKE2FS -Fq -b 4096 -O ea_inode $TMPFILE 1m > /dev/null 2>&1
 echo Exit status is $? >> $OUT.new
 
-d_xattr_ea_inode_check 8292 user.test1 true
+d_xattr_ea_inode_check / 8292 user.test1 true
 
-d_xattr_ea_inode_check 4097 user.test1 false
-d_xattr_ea_inode_check 102  user.test2 false
-d_xattr_ea_inode_check 5005 user.test2 true
-d_xattr_ea_inode_check 512  user.test1 true
+d_xattr_ea_inode_check / 4097 user.test1 false
+d_xattr_ea_inode_check / 102  user.test2 false
+d_xattr_ea_inode_check / 5005 user.test2 true
+d_xattr_ea_inode_check / 512  user.test1 true
 
-d_xattr_ea_inode_check 1024 user.test1 false
-d_xattr_ea_inode_check 5000 user.test1 true
+d_xattr_ea_inode_check / 1024 user.test1 false
+d_xattr_ea_inode_check / 5000 user.test1 true
+
+# Create and remove a file with ea_inode
+echo "test_file_content" > $TEST_DATA
+echo "write $TEST_DATA test_file" >> $OUT.new
+$DEBUGFS -w -R "write $TEST_DATA test_file" $TMPFILE >> $OUT.new 2>&1
+echo Exit status is $? >> $OUT.new
+echo >> $OUT.new
+
+d_xattr_ea_inode_check test_file 1024  user.test1 false
+d_xattr_ea_inode_check test_file 16384 user.test2 false
+
+echo "rm test_file" >> $OUT.new
+$DEBUGFS -w -R "rm test_file" $TMPFILE >> $OUT.new 2>&1
+echo Exit status is $? >> $OUT.new
+
+echo e2fsck $VERIFY_FSCK_OPT -N test_filesys >> $OUT.new
+$FSCK $VERIFY_FSCK_OPT -N test_filesys $TMPFILE >> $OUT.new 2>&1
+echo Exit status is $? >> $OUT.new
 
 sed -f $cmd_dir/filter.sed $OUT.new > $OUT
 
diff --git a/tests/f_orphan_ea_inode/expect.1 b/tests/f_orphan_ea_inode/expect.1
new file mode 100644
index 000000000..3eba3d718
--- /dev/null
+++ b/tests/f_orphan_ea_inode/expect.1
@@ -0,0 +1,6 @@
+test_filesys: Clearing orphaned inode 12 (uid=0, gid=0, mode=0100644, size=0)
+test_filesys: Clearing orphaned inode 13 (uid=1000, gid=1000, mode=0100644, size=6)
+test_filesys: Clearing orphaned inode 14 (uid=1001, gid=1001, mode=0100644, size=0)
+test_filesys: Clearing orphaned inode 15 (uid=1002, gid=1002, mode=0100644, size=6)
+test_filesys: clean, 13/128 files, 23/256 blocks
+Exit status is 0
diff --git a/tests/f_orphan_ea_inode/expect.2 b/tests/f_orphan_ea_inode/expect.2
new file mode 100644
index 000000000..bf76a5c25
--- /dev/null
+++ b/tests/f_orphan_ea_inode/expect.2
@@ -0,0 +1,7 @@
+Pass 1: Checking inodes, blocks, and sizes
+Pass 2: Checking directory structure
+Pass 3: Checking directory connectivity
+Pass 4: Checking reference counts
+Pass 5: Checking group summary information
+test_filesys: 13/128 files (0.0% non-contiguous), 23/256 blocks
+Exit status is 0
diff --git a/tests/f_orphan_ea_inode/image.gz b/tests/f_orphan_ea_inode/image.gz
new file mode 100644
index 0000000000000000000000000000000000000000..95f0e53aebd45e041b4b1f53be7c374c8144c1b7
GIT binary patch
literal 2139
zcmeH`ZB){C6vzM8Hrp}7g@%~MtYvweYvyWYCGO-yK4OW}V<wpzXd)%L%m=7B52Z~^
zQ={e6O5&KqaG4HETqaXUP01LVmg2*F0G7z3!f&v>*_*xGyS^{(x%d9g@7#Mo=U!CN
zx^;`kZBo{(O(w?UlOWkxD8Vt(5&DxMNiEw;EPH7y+Ew~8s5^)kG8_|gs>t&FoaZqP
zhX^f4FF$nsWQ%V%)-Su+(l6Bj)v?|nzO6mdz3EPOer?uZ1ll)s&Ai{i%Nu890TO2K
zshf0{b{<Zv_<<poPxa+^D#yc4?<l;OGK`3u+<CLg<*w-W<I&Z&TQxBKQZK^k`xB!I
z(xSjWKVP%@5e;nGe!!u<P+^A{(m#3T?#y#3+&#B^K;w?n8!FVs+wQPuX=y9mv_BLa
z=6Qv2PmV?vhTVTy7c;X9{Xof3m)GR|oFiSpu)UCoih#ockGL374P)C>)fKnfhQuM3
ztE3o$LWqHZD0#-n3TAl<f}q;T^&2Retol@MQ}DTA*g>%tf_w_xidVV}P2DUekE)eD
zT(0*hq{s=2DLZ|=cfjpP$NK%@=c#ATI5bt|@jRbe)6n{i=2q(KOUp^UXH_U#Uun@v
zibGUrxGp$!c<IY}PVE{YCR5k~q_VDufa}qrdE~6*s9_O;x6eT`O1@ed`+(FRn2_dR
zy?yp+&_3XFs02d_6Stp^N{Bi#Yr8ZraBTIL_E9OTb@d@SI&nN#{_FNup8eKF^qsOO
zqmqFvSOVkM=-~sio=0EA9y8h{L<fM1e&apib)L>gIAYUPiAMJD3-(ra9L!`-503X&
zkE^6#8;v0xBSQ8}OeIL;lcoEH!L8Ob?!DL*>IBEr%AXAqYhC(lf<e&c!9rpa1t~yN
zROqvHMj-DAi1se&rBWj}t3|)vFRr%EM^&ByvH|gy4IU$Un2?-Mx~$FA^ip}&LeisK
zMvT5Alb4%Cyn)UP1b6qGr{*f{1-#`qLPf<rz=XvKHf=M{A^}#ihNMxl8`(<!IBOM7
z=?Vu{qUa390%0_(p($2JAJg$)p88HTI~3?`q@4qkMSSqK%?ZGaTsb#=nv};~c|3`S
z;tkGKjZz$r3up$s<cf@zu0*Io+u5{!G4)1V1%K%aHgejF{rVU<@yn7Td!-oe>@~D*
z0Vc{mZQ6tXP$#y`cS52**Q|e%QQ8=WpBK$J4K_}_l~<2P(lgK;5~jzBUMu8J=*SB{
zu6o87DJL#ukb`Hs$o89qW=2h^DO_^T0O|gm1q7WOdI3WMASpos)5bw|%t04(St-?I
zI<+XXZ&F+7Op7CM4HI=7$IonKxlbh#x3CCTLZL{v1h~&|gDaytp6nCp!M9BLOtX(e
z)ppMY*hgR0Ug#Q`KxpXdycinpU<q<si|6gN=q#7j;8mhaAKp)|TL9rd#RA&=v>uJ!
zB?fibNa<QO0K9aI&P3I~31I2szuSflXauk3xuJ7kIrh1K({`9b!eg3<AD9nik=amQ
z4A7F1hyxGu5`+1`8GRLqB}7xt%qp6>bru0;9?UEe*0yT@Se{ymzx_(1Yw1fztv6+{
z&4>Pe{l|1(NBH&nZ1g*N%U^UYwDJ`y93DU>!vnq1xwwn%W;V>4G3=h2i~ss#c00=v
i9Rno8r?2x(P~Yhtfp-M{Zvr7`V4?Up>pcjvf&Kz@_kh^|

literal 0
HcmV?d00001

diff --git a/tests/f_orphan_ea_inode/name b/tests/f_orphan_ea_inode/name
new file mode 100644
index 000000000..b892ff960
--- /dev/null
+++ b/tests/f_orphan_ea_inode/name
@@ -0,0 +1 @@
+clearing orphan inodes with ea_inode and quota features
diff --git a/tests/f_orphan_ea_inode/script b/tests/f_orphan_ea_inode/script
new file mode 100644
index 000000000..9650d07d0
--- /dev/null
+++ b/tests/f_orphan_ea_inode/script
@@ -0,0 +1,3 @@
+FSCK_OPT=-p
+SECOND_FSCK_OPT="-yf"
+. $cmd_dir/run_e2fsck
-- 
2.43.7


^ permalink raw reply related

* [PATCH RESEND 3/4] libext2fs: update iblock when using ea_inode feature
From: Etienne AUJAMES @ 2026-06-19 15:32 UTC (permalink / raw)
  To: linux-ext4, Theodore Ts'o; +Cc: Andreas Dilger, Li Dongyang
In-Reply-To: <ajVdnQUu9tSrKldW@eaujamesFR0130>

When a xattr is stored in an ea_inode, ext2fs_xattrs_* functions do
not update the inode block count.

This patch uses a cached inode to update the block count and quota
when writing xattrs.
It also fix an xattr remove case: the current ACL block was not
release by ext2fs_xattrs_write().

Add a helper function ext2fs_iblk_get() to get the inode block count
in cluster count unit.

Add ext2fs_xattrs_open_inode() to specify an optional cached inode to
use or update.
The function handle an optional quota context argument to update quota
accounting. It is hard to predict inode quota usage with ea_inode
deduplication.

For testing purposes, modify the debugfs "ea_set" command to handle
input file larger than the FS block size.

Add a regression test: d_xattr_ea_inode

Fixes: 50d0998cfe ("libext2fs: add ea_inode support to set xattr")
Signed-off-by: Etienne AUJAMES <eaujames@ddn.com>
Change-Id: I34733255bb76ffe2386d8cd6c19ce4561be4da3a
Lustre-bug-id: https://jira.whamcloud.com/browse/LU-20049
---
 debugfs/xattrs.c              |  19 ++-
 e2fsck/pass1.c                |  12 +-
 lib/ext2fs/ext2fs.h           |   7 ++
 lib/ext2fs/ext_attr.c         | 227 ++++++++++++++++++++++------------
 lib/ext2fs/i_block.c          |  14 +++
 lib/support/quotaio.h         |   1 -
 tests/d_xattr_ea_inode/expect | 137 ++++++++++++++++++++
 tests/d_xattr_ea_inode/name   |   1 +
 tests/d_xattr_ea_inode/script |  85 +++++++++++++
 9 files changed, 415 insertions(+), 88 deletions(-)
 create mode 100644 tests/d_xattr_ea_inode/expect
 create mode 100644 tests/d_xattr_ea_inode/name
 create mode 100644 tests/d_xattr_ea_inode/script

diff --git a/debugfs/xattrs.c b/debugfs/xattrs.c
index b518941c9..8364281f4 100644
--- a/debugfs/xattrs.c
+++ b/debugfs/xattrs.c
@@ -14,6 +14,7 @@ extern int optind;
 extern char *optarg;
 #endif
 #include <ctype.h>
+#include <unistd.h>
 #include "support/cstring.h"
 
 #include "debugfs.h"
@@ -299,10 +300,24 @@ void do_set_xattr(int argc, ss_argv_t argv, int sci_idx EXT2FS_ATTR((unused)),
 		goto out;
 
 	if (fp) {
-		err = ext2fs_get_mem(current_fs->blocksize, &buf);
+		struct stat st;
+
+		if (fstat(fileno(fp), &st)) {
+			err = errno;
+			goto out;
+		}
+		if (st.st_size > sysconf(_SC_ARG_MAX)) {
+			err = EFBIG;
+			goto out;
+		}
+		err = ext2fs_get_mem(st.st_size, &buf);
 		if (err)
 			goto out;
-		buflen = fread(buf, 1, current_fs->blocksize, fp);
+		buflen = fread(buf, 1, st.st_size, fp);
+		if (ferror(fp)) {
+			err = errno;
+			goto out;
+		}
 	} else {
 		buf = argv[optind + 2];
 		buflen = parse_c_string(buf);
diff --git a/e2fsck/pass1.c b/e2fsck/pass1.c
index fdde76cc2..364128f4d 100644
--- a/e2fsck/pass1.c
+++ b/e2fsck/pass1.c
@@ -968,18 +968,18 @@ static void reserve_block_for_lnf_repair(e2fsck_t ctx)
 
 static errcode_t get_inline_data_ea_size(ext2_filsys fs, ext2_ino_t ino,
 					 struct ext2_inode *inode,
-					 size_t *sz)
+					 size_t inode_size, size_t *sz)
 {
 	void *p;
 	struct ext2_xattr_handle *handle;
 	errcode_t retval;
 
-	retval = ext2fs_xattrs_open(fs, ino, &handle);
+	retval = ext2fs_xattrs_open_inode(fs, ino, inode, inode_size, NULL,
+					  &handle);
 	if (retval)
 		return retval;
 
-	retval = ext2fs_xattrs_read_inode(handle,
-					  (struct ext2_inode_large *)inode);
+	retval = ext2fs_xattrs_read(handle);
 	if (retval)
 		goto err;
 
@@ -1580,6 +1580,7 @@ void e2fsck_pass1(e2fsck_t ctx)
 			size_t size = 0;
 
 			pctx.errcode = get_inline_data_ea_size(fs, ino, inode,
+							       inode_size,
 							       &size);
 			if (!pctx.errcode &&
 			    fix_problem(ctx, PR_1_INLINE_DATA_FEATURE, &pctx)) {
@@ -1603,7 +1604,8 @@ void e2fsck_pass1(e2fsck_t ctx)
 			flags = fs->flags;
 			if (failed_csum)
 				fs->flags |= EXT2_FLAG_IGNORE_CSUM_ERRORS;
-			err = get_inline_data_ea_size(fs, ino, inode, &size);
+			err = get_inline_data_ea_size(fs, ino, inode,
+						      inode_size, &size);
 			fs->flags = (flags & EXT2_FLAG_IGNORE_CSUM_ERRORS) |
 				    (fs->flags & ~EXT2_FLAG_IGNORE_CSUM_ERRORS);
 
diff --git a/lib/ext2fs/ext2fs.h b/lib/ext2fs/ext2fs.h
index c4fcb10be..56de5ea50 100644
--- a/lib/ext2fs/ext2fs.h
+++ b/lib/ext2fs/ext2fs.h
@@ -94,6 +94,8 @@ typedef __s64 __bitwise		ext2_off64_t;
 typedef __s64 __bitwise		e2_blkcnt_t;
 typedef __u32 __bitwise		ext2_dirhash_t;
 
+typedef struct quota_ctx *quota_ctx_t;
+
 #if EXT2_FLAT_INCLUDES
 #include "com_err.h"
 #include "ext2_io.h"
@@ -1408,6 +1410,10 @@ errcode_t ext2fs_xattr_set(struct ext2_xattr_handle *handle,
 errcode_t ext2fs_xattr_remove(struct ext2_xattr_handle *handle,
 			      const char *key);
 errcode_t ext2fs_xattr_remove_all(struct ext2_xattr_handle *handle);
+errcode_t ext2fs_xattrs_open_inode(ext2_filsys fs, ext2_ino_t ino,
+				   struct ext2_inode *inode, size_t inode_size,
+				   quota_ctx_t qctx,
+				   struct ext2_xattr_handle **handle);
 errcode_t ext2fs_xattrs_open(ext2_filsys fs, ext2_ino_t ino,
 			     struct ext2_xattr_handle **handle);
 errcode_t ext2fs_xattrs_close(struct ext2_xattr_handle **handle);
@@ -1607,6 +1613,7 @@ errcode_t ext2fs_iblk_add_blocks(ext2_filsys fs, struct ext2_inode *inode,
 errcode_t ext2fs_iblk_sub_blocks(ext2_filsys fs, struct ext2_inode *inode,
 				 blk64_t num_blocks);
 errcode_t ext2fs_iblk_set(ext2_filsys fs, struct ext2_inode *inode, blk64_t b);
+blk64_t ext2fs_iblk_get(ext2_filsys fs, struct ext2_inode *inode);
 
 /* imager.c */
 extern errcode_t ext2fs_image_inode_write(ext2_filsys fs, int fd, int flags);
diff --git a/lib/ext2fs/ext_attr.c b/lib/ext2fs/ext_attr.c
index 7723d0f91..3b90b70bb 100644
--- a/lib/ext2fs/ext_attr.c
+++ b/lib/ext2fs/ext_attr.c
@@ -22,6 +22,7 @@
 #include "ext2_fs.h"
 #include "ext2_ext_attr.h"
 #include "ext4_acl.h"
+#include "support/quotaio.h"
 
 #include "ext2fsP.h"
 
@@ -361,8 +362,14 @@ struct ext2_xattr_handle {
 	int capacity;
 	int count;
 	int ibody_count;
+	struct ext2_inode *in_inode;
+	size_t in_inode_size;
+	struct ext2_inode_large *alloc_inode;
+	struct ext2_inode_large *inode;
+	size_t inode_size;
 	ext2_ino_t ino;
 	unsigned int flags;
+	quota_ctx_t qctx;
 };
 
 static errcode_t ext2fs_xattrs_expand(struct ext2_xattr_handle *h,
@@ -499,9 +506,11 @@ out:
 	return err;
 }
 
-static errcode_t prep_ea_block_for_write(ext2_filsys fs, ext2_ino_t ino,
-					 struct ext2_inode_large *inode)
+static errcode_t prep_ea_block_for_write(ext2_filsys fs,
+					 struct ext2_xattr_handle *handle)
 {
+	struct ext2_inode_large *inode = handle->inode;
+	ext2_ino_t ino = handle->ino;
 	struct ext2_ext_attr_header *header;
 	void *block_buf = NULL;
 	blk64_t blk, goal;
@@ -541,11 +550,15 @@ static errcode_t prep_ea_block_for_write(ext2_filsys fs, ext2_ino_t ino,
 		if (err)
 			goto out2;
 	} else {
-		/* No block, we must increment i_blocks */
+		/* No block, we must increment i_blocks and quota */
 		err = ext2fs_iblk_add_blocks(fs, (struct ext2_inode *)inode,
 					     1);
 		if (err)
 			goto out;
+
+		if (handle->qctx)
+			quota_data_add(handle->qctx, handle->inode, handle->ino,
+				       EXT2FS_C2B(fs, 1) * fs->blocksize);
 	}
 
 	/* Allocate a block */
@@ -744,8 +757,7 @@ write_xattrs_to_buffer(ext2_filsys fs, struct ext2_xattr *attrs, int count,
 errcode_t ext2fs_xattrs_write(struct ext2_xattr_handle *handle)
 {
 	ext2_filsys fs = handle->fs;
-	const unsigned int inode_size = EXT2_INODE_SIZE(fs->super);
-	struct ext2_inode_large *inode;
+	struct ext2_inode_large *inode = handle->inode;
 	char *start, *block_buf = NULL;
 	struct ext2_ext_attr_header *header;
 	__u32 ea_inode_magic;
@@ -755,21 +767,12 @@ errcode_t ext2fs_xattrs_write(struct ext2_xattr_handle *handle)
 	errcode_t err;
 
 	EXT2_CHECK_MAGIC(handle, EXT2_ET_MAGIC_EA_HANDLE);
-	i = inode_size;
-	if (i < sizeof(*inode))
-		i = sizeof(*inode);
-	err = ext2fs_get_memzero(i, &inode);
-	if (err)
-		return err;
-
-	err = ext2fs_read_inode_full(fs, handle->ino, EXT2_INODE(inode),
-				     inode_size);
-	if (err)
-		goto out;
+	if (!inode)
+		return EINVAL;
 
 	/* If extra_isize isn't set, we need to set it now */
 	if (inode->i_extra_isize == 0 &&
-	    inode_size > EXT2_GOOD_OLD_INODE_SIZE) {
+	    handle->inode_size > EXT2_GOOD_OLD_INODE_SIZE) {
 		char *p = (char *)inode;
 		size_t extra = fs->super->s_want_extra_isize;
 
@@ -778,22 +781,20 @@ errcode_t ext2fs_xattrs_write(struct ext2_xattr_handle *handle)
 		memset(p + EXT2_GOOD_OLD_INODE_SIZE, 0, extra);
 		inode->i_extra_isize = extra;
 	}
-	if (inode->i_extra_isize & 3) {
-		err = EXT2_ET_INODE_CORRUPTED;
-		goto out;
-	}
+	if (inode->i_extra_isize & 3)
+		return EXT2_ET_INODE_CORRUPTED;
 
 	/* Does the inode have space for EA? */
 	if (inode->i_extra_isize < sizeof(inode->i_extra_isize) ||
-	    inode_size <= EXT2_GOOD_OLD_INODE_SIZE + inode->i_extra_isize +
-								sizeof(__u32))
+	    handle->inode_size <= EXT2_GOOD_OLD_INODE_SIZE +
+	    inode->i_extra_isize + sizeof(__u32))
 		goto write_ea_block;
 
 	/* Write the inode EA */
 	ea_inode_magic = EXT2_EXT_ATTR_MAGIC;
 	memcpy(((char *) inode) + EXT2_GOOD_OLD_INODE_SIZE +
 	       inode->i_extra_isize, &ea_inode_magic, sizeof(__u32));
-	storage_size = inode_size - EXT2_GOOD_OLD_INODE_SIZE -
+	storage_size = handle->inode_size - EXT2_GOOD_OLD_INODE_SIZE -
 				inode->i_extra_isize - sizeof(__u32);
 	start = ((char *) inode) + EXT2_GOOD_OLD_INODE_SIZE +
 				inode->i_extra_isize + sizeof(__u32);
@@ -801,17 +802,16 @@ errcode_t ext2fs_xattrs_write(struct ext2_xattr_handle *handle)
 	err = write_xattrs_to_buffer(fs, handle->attrs, handle->ibody_count,
 				     start, storage_size, 0, 0);
 	if (err)
-		goto out;
+		return err;
 write_ea_block:
 	/* Are we done? */
-	if (handle->ibody_count == handle->count &&
-	    !ext2fs_file_acl_block(fs, EXT2_INODE(inode)))
+	if (handle->ibody_count == handle->count)
 		goto skip_ea_block;
 
 	/* Write the EA block */
 	err = ext2fs_get_memzero(fs->blocksize, &block_buf);
 	if (err)
-		goto out;
+		return err;
 
 	storage_size = fs->blocksize - sizeof(struct ext2_ext_attr_header);
 	start = block_buf + sizeof(struct ext2_ext_attr_header);
@@ -820,7 +820,7 @@ write_ea_block:
 				     handle->count - handle->ibody_count, start,
 				     storage_size, start - block_buf, 1);
 	if (err)
-		goto out2;
+		goto out;
 
 	/* Write a header on the EA block */
 	header = (struct ext2_ext_attr_header *) block_buf;
@@ -829,15 +829,15 @@ write_ea_block:
 	header->h_blocks = 1;
 
 	/* Get a new block for writing */
-	err = prep_ea_block_for_write(fs, handle->ino, inode);
+	err = prep_ea_block_for_write(fs, handle);
 	if (err)
-		goto out2;
+		goto out;
 
 	/* Finally, write the new EA block */
 	blk = ext2fs_file_acl_block(fs, EXT2_INODE(inode));
 	err = ext2fs_write_ext_attr3(fs, blk, block_buf, handle->ino);
 	if (err)
-		goto out2;
+		goto out;
 
 skip_ea_block:
 	blk = ext2fs_file_acl_block(fs, (struct ext2_inode *)inode);
@@ -845,19 +845,26 @@ skip_ea_block:
 		/* xattrs shrunk, free the block */
 		err = ext2fs_free_ext_attr(fs, handle->ino, inode);
 		if (err)
-			goto out;
+			return err;
+		if (handle->qctx)
+			quota_data_sub(handle->qctx, inode, handle->ino,
+				       EXT2FS_C2B(fs, 1) * fs->blocksize);
 	}
 
 	/* Write the inode */
 	err = ext2fs_write_inode_full(fs, handle->ino, EXT2_INODE(inode),
-				      inode_size);
+				      handle->inode_size);
 	if (err)
-		goto out2;
+		goto out;
+
+	/* Update the caller cached inode if provided */
+	if (handle->in_inode && handle->in_inode != EXT2_INODE(handle->inode))
+		memcpy(handle->in_inode, EXT2_INODE(handle->inode),
+		       handle->in_inode_size);
 
-out2:
-	ext2fs_free_mem(&block_buf);
 out:
-	ext2fs_free_mem(&inode);
+	ext2fs_free_mem(&block_buf);
+
 	return err;
 }
 
@@ -1130,29 +1137,51 @@ out:
 	return err;
 }
 
-errcode_t ext2fs_xattrs_read(struct ext2_xattr_handle *handle)
+errcode_t ext2fs_xattrs_read(struct ext2_xattr_handle *h)
 {
-	struct ext2_inode_large *inode;
-	size_t inode_size = EXT2_INODE_SIZE(handle->fs->super);
 	errcode_t err;
 
-	EXT2_CHECK_MAGIC(handle, EXT2_ET_MAGIC_EA_HANDLE);
+	EXT2_CHECK_MAGIC(h, EXT2_ET_MAGIC_EA_HANDLE);
+
+	h->inode_size = EXT2_INODE_SIZE(h->fs->super);
+	if (h->inode_size < sizeof(*h->inode))
+		h->inode_size = sizeof(*h->inode);
+
+	/* Use the caller cached inode if possible */
+	if (h->in_inode &&  h->in_inode_size >= h->inode_size) {
+		h->inode = (struct ext2_inode_large *) h->in_inode;
+		goto xattrs_read;
+	}
+
+	/* Flush the caller cached inode if provided */
+	if (h->in_inode) {
+		err = ext2fs_write_inode_full(h->fs, h->ino, h->in_inode,
+					     h->in_inode_size);
+		if (err)
+			goto err;
+	}
 
-	if (inode_size < sizeof(*inode))
-		inode_size = sizeof(*inode);
-	err = ext2fs_get_memzero(inode_size, &inode);
+	err = ext2fs_get_memzero(h->inode_size, &h->alloc_inode);
 	if (err)
 		return err;
 
-	err = ext2fs_read_inode_full(handle->fs, handle->ino, EXT2_INODE(inode),
-				     EXT2_INODE_SIZE(handle->fs->super));
+	err = ext2fs_read_inode_full(h->fs, h->ino, EXT2_INODE(h->alloc_inode),
+				     h->inode_size);
 	if (err)
-		goto out;
+		goto err;
 
-	err = ext2fs_xattrs_read_inode(handle, inode);
+	h->inode = h->alloc_inode;
 
-out:
-	ext2fs_free_mem(&inode);
+xattrs_read:
+	err = ext2fs_xattrs_read_inode(h, h->inode);
+	if (err)
+		goto err;
+
+	return 0;
+
+err:
+	h->inode_size = 0;
+	ext2fs_free_mem(&h->alloc_inode);
 
 	return err;
 }
@@ -1272,12 +1301,15 @@ out:
 	return err;
 }
 
-static errcode_t xattr_create_ea_inode(ext2_filsys fs, const void *value,
-				       size_t value_len, ext2_ino_t *ea_ino)
+static errcode_t xattr_create_ea_inode(struct ext2_xattr_handle *handle,
+				       const void *value, size_t value_len,
+				       ext2_ino_t *ea_ino)
 {
+	ext2_filsys fs = handle->fs;
 	struct ext2_inode inode;
 	ext2_ino_t ino;
 	ext2_file_t file;
+	blk64_t	iblk;
 	__u32 hash;
 	errcode_t ret;
 
@@ -1317,16 +1349,30 @@ static errcode_t xattr_create_ea_inode(ext2_filsys fs, const void *value,
 	if (ret)
 		return ret;
 
+	ret = ext2fs_read_inode(fs, ino, &inode);
+	if (ret)
+		return ret;
+
 	ext2fs_inode_alloc_stats2(fs, ino, 1 /* inuse */, 0 /* isdir */);
+	iblk = ext2fs_iblk_get(fs, &inode);
+	ext2fs_iblk_add_blocks(fs, EXT2_INODE(handle->inode), iblk);
+	if (handle->qctx) {
+		quota_data_add(handle->qctx, handle->inode, handle->ino,
+			       EXT2FS_C2B(fs, iblk) * fs->blocksize);
+		quota_data_inodes(handle->qctx, handle->inode, handle->ino, +1);
+	}
 
 	*ea_ino = ino;
 	return 0;
 }
 
-static errcode_t xattr_inode_dec_ref(ext2_filsys fs, ext2_ino_t ino)
+static errcode_t xattr_inode_dec_ref(struct ext2_xattr_handle *handle,
+				     ext2_ino_t ino)
 {
+	ext2_filsys fs = handle->fs;
 	struct ext2_inode_large inode;
 	__u64 ref_count;
+	blk64_t	iblk;
 	errcode_t ret;
 
 	ret = ext2fs_read_inode_full(fs, ino, (struct ext2_inode *)&inode,
@@ -1338,6 +1384,14 @@ static errcode_t xattr_inode_dec_ref(ext2_filsys fs, ext2_ino_t ino)
 	ref_count--;
 	ext2fs_set_ea_inode_ref(EXT2_INODE(&inode), ref_count);
 
+	iblk = ext2fs_iblk_get(fs, EXT2_INODE(&inode));
+	ext2fs_iblk_sub_blocks(fs, EXT2_INODE(handle->inode), iblk);
+	if (handle->qctx) {
+		quota_data_sub(handle->qctx, handle->inode, handle->ino,
+			       EXT2FS_C2B(fs, iblk) * fs->blocksize);
+		quota_data_inodes(handle->qctx, handle->inode, handle->ino, -1);
+	}
+
 	if (ref_count)
 		goto write_out;
 
@@ -1365,7 +1419,8 @@ out:
 	return ret;
 }
 
-static errcode_t xattr_update_entry(ext2_filsys fs, struct ext2_xattr *x,
+static errcode_t xattr_update_entry(struct ext2_xattr_handle *handle,
+				    struct ext2_xattr *x,
 				    const char *name, const char *short_name,
 				    int index, const void *value,
 				    size_t value_len, int in_inode)
@@ -1390,13 +1445,13 @@ static errcode_t xattr_update_entry(ext2_filsys fs, struct ext2_xattr *x,
 	memcpy(new_value, value, value_len);
 
 	if (in_inode) {
-		ret = xattr_create_ea_inode(fs, value, value_len, &ea_ino);
+		ret = xattr_create_ea_inode(handle, value, value_len, &ea_ino);
 		if (ret)
 			goto fail;
 	}
 
 	if (x->ea_ino) {
-		ret = xattr_inode_dec_ref(fs, x->ea_ino);
+		ret = xattr_inode_dec_ref(handle, x->ea_ino);
 		if (ret)
 			goto fail;
 	}
@@ -1419,7 +1474,7 @@ fail:
 	if (new_value)
 		ext2fs_free_mem(&new_value);
 	if (ea_ino)
-		xattr_inode_dec_ref(fs, ea_ino);
+		xattr_inode_dec_ref(handle, ea_ino);
 	return ret;
 }
 
@@ -1486,7 +1541,7 @@ static errcode_t xattr_array_update(struct ext2_xattr_handle *h,
 		}
 
 		/* Update the existing entry. */
-		ret = xattr_update_entry(h->fs, &h->attrs[old_idx], name,
+		ret = xattr_update_entry(h, &h->attrs[old_idx], name,
 					 shortname, name_idx, value,
 					 value_len, in_inode);
 		if (ret)
@@ -1515,7 +1570,7 @@ static errcode_t xattr_array_update(struct ext2_xattr_handle *h,
 
 	if (old_idx >= 0) {
 		/* Update the existing entry. */
-		ret = xattr_update_entry(h->fs, &h->attrs[old_idx], name,
+		ret = xattr_update_entry(h, &h->attrs[old_idx], name,
 					 shortname, name_idx, value,
 					 value_len, in_inode);
 		if (ret)
@@ -1551,7 +1606,7 @@ add_new:
 			return ret;
 	}
 
-	ret = xattr_update_entry(h->fs, &h->attrs[h->count], name, shortname,
+	ret = xattr_update_entry(h, &h->attrs[h->count], name, shortname,
 				 name_idx, value, value_len, in_inode);
 	if (ret)
 		return ret;
@@ -1594,8 +1649,7 @@ errcode_t ext2fs_xattr_set(struct ext2_xattr_handle *h,
 			   size_t value_len)
 {
 	ext2_filsys fs = h->fs;
-	const int inode_size = EXT2_INODE_SIZE(fs->super);
-	struct ext2_inode_large *inode = NULL;
+	struct ext2_inode_large *inode = h->inode;
 	struct ext2_xattr *x;
 	char *new_value;
 	int ibody_free, block_free;
@@ -1605,6 +1659,8 @@ errcode_t ext2fs_xattr_set(struct ext2_xattr_handle *h,
 	errcode_t ret;
 
 	EXT2_CHECK_MAGIC(h, EXT2_ET_MAGIC_EA_HANDLE);
+	if (!inode || !h->inode_size)
+		return EINVAL;
 
 	ret = ext2fs_get_mem(value_len, &new_value);
 	if (ret)
@@ -1632,23 +1688,14 @@ errcode_t ext2fs_xattr_set(struct ext2_xattr_handle *h,
 			break;
 		}
 	}
-
-	ret = ext2fs_get_memzero(inode_size, &inode);
-	if (ret)
-		goto out;
-	ret = ext2fs_read_inode_full(fs, h->ino,
-				     (struct ext2_inode *)inode,
-				     inode_size);
-	if (ret)
-		goto out;
-	if (inode_size > EXT2_GOOD_OLD_INODE_SIZE) {
+	if (h->inode_size > EXT2_GOOD_OLD_INODE_SIZE) {
 		extra_isize = inode->i_extra_isize;
 		if (extra_isize == 0) {
 			extra_isize = fs->super->s_want_extra_isize;
 			if (extra_isize == 0)
 				extra_isize = sizeof(__u32);
 		}
-		ibody_free = inode_size - EXT2_GOOD_OLD_INODE_SIZE;
+		ibody_free = h->inode_size - EXT2_GOOD_OLD_INODE_SIZE;
 		ibody_free -= extra_isize;
 		/* Extended attribute magic and final null entry. */
 		ibody_free -= sizeof(__u32) * 2;
@@ -1694,8 +1741,6 @@ errcode_t ext2fs_xattr_set(struct ext2_xattr_handle *h,
 write_out:
 	ret = ext2fs_xattrs_write(h);
 out:
-	if (inode)
-		ext2fs_free_mem(&inode);
 	ext2fs_free_mem(&new_value);
 	return ret;
 }
@@ -1712,7 +1757,7 @@ errcode_t ext2fs_xattr_remove(struct ext2_xattr_handle *handle,
 			ext2fs_free_mem(&x->name);
 			ext2fs_free_mem(&x->value);
 			if (x->ea_ino)
-				xattr_inode_dec_ref(handle->fs, x->ea_ino);
+				xattr_inode_dec_ref(handle, x->ea_ino);
 			memmove(x, x + 1, (end - x - 1)*sizeof(*x));
 			memset(end - 1, 0, sizeof(*end));
 			if (x < handle->attrs + handle->ibody_count)
@@ -1736,7 +1781,7 @@ errcode_t ext2fs_xattr_remove_all(struct ext2_xattr_handle *handle)
 		ext2fs_free_mem(&x->name);
 		ext2fs_free_mem(&x->value);
 		if (x->ea_ino)
-			xattr_inode_dec_ref(handle->fs, x->ea_ino);
+			xattr_inode_dec_ref(handle, x->ea_ino);
 	}
 
 	handle->ibody_count = 0;
@@ -1744,8 +1789,14 @@ errcode_t ext2fs_xattr_remove_all(struct ext2_xattr_handle *handle)
 	return ext2fs_xattrs_write(handle);
 }
 
-errcode_t ext2fs_xattrs_open(ext2_filsys fs, ext2_ino_t ino,
-			     struct ext2_xattr_handle **handle)
+/* If the inode size is set to EXT2_INODE_SIZE(fs), the input inode is used
+ * directly. Otherwise, the ext2fs_xattrs_* functions operate on a separate copy
+ * (with inline xattrs) and update the caller's cached inode on write.
+ */
+errcode_t ext2fs_xattrs_open_inode(ext2_filsys fs, ext2_ino_t ino,
+				   struct ext2_inode *inode, size_t inode_size,
+				   quota_ctx_t qctx,
+				   struct ext2_xattr_handle **handle)
 {
 	struct ext2_xattr_handle *h;
 	errcode_t err;
@@ -1754,6 +1805,9 @@ errcode_t ext2fs_xattrs_open(ext2_filsys fs, ext2_ino_t ino,
 	    !ext2fs_has_feature_inline_data(fs->super))
 		return EXT2_ET_MISSING_EA_FEATURE;
 
+	if (inode && inode_size < sizeof(*inode))
+		return EINVAL;
+
 	err = ext2fs_get_memzero(sizeof(*h), &h);
 	if (err)
 		return err;
@@ -1768,11 +1822,23 @@ errcode_t ext2fs_xattrs_open(ext2_filsys fs, ext2_ino_t ino,
 	}
 	h->count = 0;
 	h->ino = ino;
+	h->in_inode = inode;
+	h->in_inode_size = inode_size;
+	h->alloc_inode = NULL;
+	h->inode = NULL;
+	h->inode_size = 0;
 	h->fs = fs;
+	h->qctx = qctx;
 	*handle = h;
 	return 0;
 }
 
+errcode_t ext2fs_xattrs_open(ext2_filsys fs, ext2_ino_t ino,
+			     struct ext2_xattr_handle **handle)
+{
+	return ext2fs_xattrs_open_inode(fs, ino, NULL, 0, NULL, handle);
+}
+
 errcode_t ext2fs_xattrs_close(struct ext2_xattr_handle **handle)
 {
 	struct ext2_xattr_handle *h = *handle;
@@ -1780,6 +1846,7 @@ errcode_t ext2fs_xattrs_close(struct ext2_xattr_handle **handle)
 	EXT2_CHECK_MAGIC(h, EXT2_ET_MAGIC_EA_HANDLE);
 	xattrs_free_keys(h);
 	ext2fs_free_mem(&h->attrs);
+	ext2fs_free_mem(&h->alloc_inode);
 	ext2fs_free_mem(handle);
 	return 0;
 }
diff --git a/lib/ext2fs/i_block.c b/lib/ext2fs/i_block.c
index 2eecf02fc..064a5c989 100644
--- a/lib/ext2fs/i_block.c
+++ b/lib/ext2fs/i_block.c
@@ -88,3 +88,17 @@ errcode_t ext2fs_iblk_set(ext2_filsys fs, struct ext2_inode *inode, blk64_t b)
 		return EOVERFLOW;
 	return 0;
 }
+
+blk64_t ext2fs_iblk_get(ext2_filsys fs, struct ext2_inode *inode)
+{
+	blk64_t	blk = inode->i_blocks;
+
+	if (ext2fs_has_feature_huge_file(fs->super))
+		blk += ((blk64_t) inode->osd2.linux2.l_i_blocks_hi) << 32;
+
+	if (!ext2fs_has_feature_huge_file(fs->super) ||
+	    !(inode->i_flags & EXT4_HUGE_FILE_FL))
+		blk /= fs->blocksize / 512;
+
+	return EXT2FS_B2C(fs, blk);
+}
diff --git a/lib/support/quotaio.h b/lib/support/quotaio.h
index 6152416fb..c76486919 100644
--- a/lib/support/quotaio.h
+++ b/lib/support/quotaio.h
@@ -58,7 +58,6 @@ enum quota_type {
 #define QUOTA_PRJ_BIT (1 << PRJQUOTA)
 #define QUOTA_ALL_BIT (QUOTA_USR_BIT | QUOTA_GRP_BIT | QUOTA_PRJ_BIT)
 
-typedef struct quota_ctx *quota_ctx_t;
 struct dict_t;
 
 struct quota_ctx {
diff --git a/tests/d_xattr_ea_inode/expect b/tests/d_xattr_ea_inode/expect
new file mode 100644
index 000000000..aaad9c5b3
--- /dev/null
+++ b/tests/d_xattr_ea_inode/expect
@@ -0,0 +1,137 @@
+debugfs edit extended attributes with ea_inode feature
+mke2fs -Fq -b 4096 -O ea_inode test.img 1m
+Exit status is 0
+Generate xattr value (8292 bytes)
+ea_set -f d_xattr_ea_inode.tmp / user.test1
+Exit status is 0
+ea_get -f d_xattr_ea_inode.ver.tmp / user.test1
+Exit status is 0
+Compare xattr values (8292 bytes)
+stat /
+Blockcount: 32
+Exit status is 0
+ea_rm / user.test1
+Exit status is 0
+e2fsck -yf -N test_filesys
+Pass 1: Checking inodes, blocks, and sizes
+Pass 2: Checking directory structure
+Pass 3: Checking directory connectivity
+Pass 4: Checking reference counts
+Pass 5: Checking group summary information
+test_filesys: 11/128 files (0.0% non-contiguous), 18/256 blocks
+Exit status is 0
+
+Generate xattr value (4097 bytes)
+ea_set -f d_xattr_ea_inode.tmp / user.test1
+Exit status is 0
+ea_get -f d_xattr_ea_inode.ver.tmp / user.test1
+Exit status is 0
+Compare xattr values (4097 bytes)
+stat /
+Blockcount: 24
+Exit status is 0
+e2fsck -yf -N test_filesys
+Pass 1: Checking inodes, blocks, and sizes
+Pass 2: Checking directory structure
+Pass 3: Checking directory connectivity
+Pass 4: Checking reference counts
+Pass 5: Checking group summary information
+test_filesys: 12/128 files (0.0% non-contiguous), 20/256 blocks
+Exit status is 0
+
+Generate xattr value (102 bytes)
+ea_set -f d_xattr_ea_inode.tmp / user.test2
+Exit status is 0
+ea_get -f d_xattr_ea_inode.ver.tmp / user.test2
+Exit status is 0
+Compare xattr values (102 bytes)
+stat /
+Blockcount: 32
+Exit status is 0
+e2fsck -yf -N test_filesys
+Pass 1: Checking inodes, blocks, and sizes
+Pass 2: Checking directory structure
+Pass 3: Checking directory connectivity
+Pass 4: Checking reference counts
+Pass 5: Checking group summary information
+test_filesys: 12/128 files (0.0% non-contiguous), 21/256 blocks
+Exit status is 0
+
+Generate xattr value (5005 bytes)
+ea_set -f d_xattr_ea_inode.tmp / user.test2
+Exit status is 0
+ea_get -f d_xattr_ea_inode.ver.tmp / user.test2
+Exit status is 0
+Compare xattr values (5005 bytes)
+stat /
+Blockcount: 40
+Exit status is 0
+ea_rm / user.test2
+Exit status is 0
+e2fsck -yf -N test_filesys
+Pass 1: Checking inodes, blocks, and sizes
+Pass 2: Checking directory structure
+Pass 3: Checking directory connectivity
+Pass 4: Checking reference counts
+Pass 5: Checking group summary information
+test_filesys: 12/128 files (0.0% non-contiguous), 20/256 blocks
+Exit status is 0
+
+Generate xattr value (512 bytes)
+ea_set -f d_xattr_ea_inode.tmp / user.test1
+Exit status is 0
+ea_get -f d_xattr_ea_inode.ver.tmp / user.test1
+Exit status is 0
+Compare xattr values (512 bytes)
+stat /
+Blockcount: 16
+Exit status is 0
+ea_rm / user.test1
+Exit status is 0
+e2fsck -yf -N test_filesys
+Pass 1: Checking inodes, blocks, and sizes
+Pass 2: Checking directory structure
+Pass 3: Checking directory connectivity
+Pass 4: Checking reference counts
+Pass 5: Checking group summary information
+test_filesys: 11/128 files (0.0% non-contiguous), 18/256 blocks
+Exit status is 0
+
+Generate xattr value (1024 bytes)
+ea_set -f d_xattr_ea_inode.tmp / user.test1
+Exit status is 0
+ea_get -f d_xattr_ea_inode.ver.tmp / user.test1
+Exit status is 0
+Compare xattr values (1024 bytes)
+stat /
+Blockcount: 16
+Exit status is 0
+e2fsck -yf -N test_filesys
+Pass 1: Checking inodes, blocks, and sizes
+Pass 2: Checking directory structure
+Pass 3: Checking directory connectivity
+Pass 4: Checking reference counts
+Pass 5: Checking group summary information
+test_filesys: 11/128 files (0.0% non-contiguous), 19/256 blocks
+Exit status is 0
+
+Generate xattr value (5000 bytes)
+ea_set -f d_xattr_ea_inode.tmp / user.test1
+Exit status is 0
+ea_get -f d_xattr_ea_inode.ver.tmp / user.test1
+Exit status is 0
+Compare xattr values (5000 bytes)
+stat /
+Blockcount: 24
+Exit status is 0
+ea_rm / user.test1
+Exit status is 0
+e2fsck -yf -N test_filesys
+Pass 1: Checking inodes, blocks, and sizes
+Pass 2: Checking directory structure
+Pass 3: Checking directory connectivity
+Pass 4: Checking reference counts
+Pass 5: Checking group summary information
+test_filesys: 11/128 files (0.0% non-contiguous), 18/256 blocks
+Exit status is 0
+
diff --git a/tests/d_xattr_ea_inode/name b/tests/d_xattr_ea_inode/name
new file mode 100644
index 000000000..9e36dc986
--- /dev/null
+++ b/tests/d_xattr_ea_inode/name
@@ -0,0 +1 @@
+edit extended attributes in debugfs with ea_inode feature
diff --git a/tests/d_xattr_ea_inode/script b/tests/d_xattr_ea_inode/script
new file mode 100644
index 000000000..84104549c
--- /dev/null
+++ b/tests/d_xattr_ea_inode/script
@@ -0,0 +1,85 @@
+#!/bin/bash
+
+if ! test -x $DEBUGFS_EXE; then
+	echo "$test_name: $test_description: skipped (no debugfs)"
+	return 0
+fi
+
+OUT=$test_name.log
+EXP=$test_dir/expect
+VERIFY_FSCK_OPT=-yf
+
+TEST_DATA=$test_name.tmp
+VERIFY_DATA=$test_name.ver.tmp
+
+echo "debugfs edit extended attributes with ea_inode feature" > $OUT.new
+
+d_xattr_ea_inode_check() {
+	local xattr_size=$1
+	local xattr_name=$2
+	local ea_rm=$3
+
+	echo "Generate xattr value ($xattr_size bytes)" >> $OUT.new
+	echo $xattr_size |
+		awk '{srand();for(i=0;i<$1;i++) printf("%c",97+int(rand()*26));}' > $TEST_DATA
+
+	echo "ea_set -f $TEST_DATA / $xattr_name" >> $OUT.new
+	$DEBUGFS -w -R "ea_set -f $TEST_DATA / $xattr_name" $TMPFILE >> $OUT.new 2>&1
+	echo Exit status is $? >> $OUT.new
+
+	echo "ea_get -f $VERIFY_DATA / $xattr_name" >> $OUT.new
+	$DEBUGFS -w -R "ea_get -f $VERIFY_DATA / $xattr_name" $TMPFILE >> $OUT.new 2>&1
+	echo Exit status is $? >> $OUT.new
+
+	echo "Compare xattr values ($xattr_size bytes)" >> $OUT.new
+	diff -u $TEST_DATA $VERIFY_DATA >> $OUT.new
+
+	echo "stat /" >> $OUT.new
+	($DEBUGFS -c -R "stat /" $TMPFILE | grep -Eo "Blockcount: [0-9]+") >> $OUT.new 2>&1
+	echo Exit status is $? >> $OUT.new
+
+	if $ea_rm; then
+		echo "ea_rm / $xattr_name" >> $OUT.new
+		$DEBUGFS -w -R "ea_rm / $xattr_name" $TMPFILE >> $OUT.new 2>&1
+		echo Exit status is $? >> $OUT.new
+	fi
+
+	echo e2fsck $VERIFY_FSCK_OPT -N test_filesys >> $OUT.new
+	$FSCK $VERIFY_FSCK_OPT -N test_filesys $TMPFILE >> $OUT.new 2>&1
+	echo Exit status is $? >> $OUT.new
+	echo >> $OUT.new
+}
+
+truncate -s1M $TMPFILE 2>&1
+
+echo "mke2fs -Fq -b 4096 -O ea_inode test.img 1m" >> $OUT.new
+$MKE2FS -Fq -b 4096 -O ea_inode $TMPFILE 1m > /dev/null 2>&1
+echo Exit status is $? >> $OUT.new
+
+d_xattr_ea_inode_check 8292 user.test1 true
+
+d_xattr_ea_inode_check 4097 user.test1 false
+d_xattr_ea_inode_check 102  user.test2 false
+d_xattr_ea_inode_check 5005 user.test2 true
+d_xattr_ea_inode_check 512  user.test1 true
+
+d_xattr_ea_inode_check 1024 user.test1 false
+d_xattr_ea_inode_check 5000 user.test1 true
+
+sed -f $cmd_dir/filter.sed $OUT.new > $OUT
+
+#
+# Do the verification
+#
+
+rm -f $TMPFILE $TEST_DATA $VERIFY_DATA $OUT.new
+
+if cmp -s $OUT $EXP; then
+	echo "$test_name: $test_description: ok"
+	touch $test_name.ok
+else
+	echo "$test_name: $test_description: failed"
+	diff $DIFF_OPTS $EXP $OUT > $test_name.failed
+fi
+
+unset VERIFY_FSCK_OPT VERIFY_DATA TEST_DATA OUT EXP d_xattr_ea_inode_check
-- 
2.43.7


^ permalink raw reply related

* [PATCH RESEND 2/4] libext2fs: add quota to libext2fs
From: Etienne AUJAMES @ 2026-06-19 15:32 UTC (permalink / raw)
  To: linux-ext4, Theodore Ts'o; +Cc: Andreas Dilger, Li Dongyang
In-Reply-To: <ajVdnQUu9tSrKldW@eaujamesFR0130>

add quota related interface to libext2fs and install the
relevant headers.

Change-Id: I17e6b5aa74e0f1bb1465168a1cf4e03184e003b0
Lustre-bug-id: https://jira.whamcloud.com/browse/LU-13241
Signed-off-by: Li Dongyang <dongyangli@ddn.com>
Signed-off-by: Etienne AUJAMES <eaujames@ddn.com>
---
 lib/ext2fs/Makefile.in | 43 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/lib/ext2fs/Makefile.in b/lib/ext2fs/Makefile.in
index e9a6ced24..0656c4c5c 100644
--- a/lib/ext2fs/Makefile.in
+++ b/lib/ext2fs/Makefile.in
@@ -28,6 +28,8 @@ DEBUG_OBJS= debug_cmds.o extent_cmds.o tst_cmds.o debugfs.o util.o \
 	create_inode_libarchive.o journal.o revoke.o recovery.o \
 	do_journal.o do_orphan.o
 
+QUOTA_LIB_OBJS= mkquota.o quotaio.o quotaio_v2.o quotaio_tree.o dict.o
+
 DEBUG_SRCS= debug_cmds.c extent_cmds.c tst_cmds.c \
 	$(top_srcdir)/debugfs/debugfs.c \
 	$(top_srcdir)/debugfs/util.c \
@@ -57,6 +59,7 @@ DEBUG_SRCS= debug_cmds.c extent_cmds.c tst_cmds.c \
 @TDB_CMT@TDB_OBJ= tdb.o
 
 OBJS= $(DEBUGFS_LIB_OBJS) $(RESIZE_LIB_OBJS) $(E2IMAGE_LIB_OBJS) \
+	$(QUOTA_LIB_OBJS) \
 	$(TEST_IO_LIB_OBJS) \
 	ext2_err.o \
 	alloc.o \
@@ -236,6 +239,7 @@ SRCS= ext2_err.c \
 
 HFILES= bitops.h ext2fs.h ext2_io.h ext2_fs.h ext2_ext_attr.h ext3_extents.h \
 	tdb.h qcow2.h hashmap.h
+QUOTA_HFILES= quotaio.h dqblk_v2.h quotaio_tree.h dict.h
 HFILES_IN=  ext2_err.h ext2_types.h
 
 LIBRARY= libext2fs
@@ -459,6 +463,41 @@ do_orphan.o: $(top_srcdir)/debugfs/do_orphan.c
 	$(E) "	CC $<"
 	$(Q) $(CC) $(DEBUGFS_CFLAGS) -c $< -o $@
 
+mkquota.o: $(top_srcdir)/lib/support/mkquota.c
+	$(E) "	CC $<"
+	$(Q) $(CC) -I$(top_srcdir)/lib/support $(ALL_CFLAGS_STLIB) -c $< -o $@
+@PROFILE_CMT@	$(Q) $(CC) -I$(top_srcdir)/lib/support $(ALL_CFLAGS_STLIB) -g -pg -o profiled/$*.o -c $<
+@ELF_CMT@	$(Q) $(CC) -I$(top_srcdir)/lib/support $(ALL_CFLAGS_SHLIB) -fPIC -shared -o elfshared/$*.o -c $<
+@BSDLIB_CMT@	$(Q) $(CC) -I$(top_srcdir)/lib/support $(ALL_CFLAGS_SHLIB) $(BSDLIB_PIC_FLAG) -o pic/$*.o -c $<
+
+quotaio.o: $(top_srcdir)/lib/support/quotaio.c
+	$(E) "	CC $<"
+	$(Q) $(CC) -I$(top_srcdir)/lib/support $(ALL_CFLAGS_STLIB) -c $< -o $@
+@PROFILE_CMT@	$(Q) $(CC) -I$(top_srcdir)/lib/support $(ALL_CFLAGS_STLIB) -g -pg -o profiled/$*.o -c $<
+@ELF_CMT@	$(Q) $(CC) -I$(top_srcdir)/lib/support $(ALL_CFLAGS_SHLIB) -fPIC -shared -o elfshared/$*.o -c $<
+@BSDLIB_CMT@	$(Q) $(CC) -I$(top_srcdir)/lib/support $(ALL_CFLAGS_SHLIB) $(BSDLIB_PIC_FLAG) -o pic/$*.o -c $<
+
+quotaio_v2.o: $(top_srcdir)/lib/support/quotaio_v2.c
+	$(E) "	CC $<"
+	$(Q) $(CC) -I$(top_srcdir)/lib/support $(ALL_CFLAGS_STLIB) -c $< -o $@
+@PROFILE_CMT@	$(Q) $(CC) -I$(top_srcdir)/lib/support $(ALL_CFLAGS_STLIB) -g -pg -o profiled/$*.o -c $<
+@ELF_CMT@	$(Q) $(CC) -I$(top_srcdir)/lib/support $(ALL_CFLAGS_SHLIB) -fPIC -shared -o elfshared/$*.o -c $<
+@BSDLIB_CMT@	$(Q) $(CC) -I$(top_srcdir)/lib/support $(ALL_CFLAGS_SHLIB) $(BSDLIB_PIC_FLAG) -o pic/$*.o -c $<
+
+quotaio_tree.o: $(top_srcdir)/lib/support/quotaio_tree.c
+	$(E) "	CC $<"
+	$(Q) $(CC) -I$(top_srcdir)/lib/support $(ALL_CFLAGS_STLIB) -c $< -o $@
+@PROFILE_CMT@	$(Q) $(CC) -I$(top_srcdir)/lib/support $(ALL_CFLAGS_STLIB) -g -pg -o profiled/$*.o -c $<
+@ELF_CMT@	$(Q) $(CC) -I$(top_srcdir)/lib/support $(ALL_CFLAGS_SHLIB) -fPIC -shared -o elfshared/$*.o -c $<
+@BSDLIB_CMT@	$(Q) $(CC) -I$(top_srcdir)/lib/support $(ALL_CFLAGS_SHLIB) $(BSDLIB_PIC_FLAG) -o pic/$*.o -c $<
+
+dict.o: $(top_srcdir)/lib/support/dict.c
+	$(E) "	CC $<"
+	$(Q) $(CC) -I$(top_srcdir)/lib/support $(ALL_CFLAGS_STLIB) -c $< -o $@
+@PROFILE_CMT@	$(Q) $(CC) -I$(top_srcdir)/lib/support $(ALL_CFLAGS_STLIB) -g -pg -o profiled/$*.o -c $<
+@ELF_CMT@	$(Q) $(CC) -I$(top_srcdir)/lib/support $(ALL_CFLAGS_SHLIB) -fPIC -shared -o elfshared/$*.o -c $<
+@BSDLIB_CMT@	$(Q) $(CC) -I$(top_srcdir)/lib/support $(ALL_CFLAGS_SHLIB) $(BSDLIB_PIC_FLAG) -o pic/$*.o -c $<
+
 xattrs.o: $(top_srcdir)/debugfs/xattrs.c
 	$(E) "	CC $<"
 	$(Q) $(CC) $(DEBUGFS_CFLAGS) -c $< -o $@
@@ -586,6 +625,10 @@ install:: all $(HFILES) $(HFILES_IN) installdirs ext2fs.pc
 		echo "	INSTALL_DATA $(includedir)/ext2fs/$$i"; \
 		$(INSTALL_DATA) $(srcdir)/$$i $(DESTDIR)$(includedir)/ext2fs/$$i; \
 	done
+	$(Q) for i in $(QUOTA_HFILES); do \
+		echo "	INSTALL_DATA $(includedir)/ext2fs/$$i"; \
+		$(INSTALL_DATA) $(top_srcdir)/lib/support/$$i $(DESTDIR)$(includedir)/ext2fs/$$i; \
+	done
 	$(Q) for i in $(HFILES_IN); do \
 		echo "	INSTALL_DATA $(includedir)/ext2fs/$$i"; \
 		$(INSTALL_DATA) $$i $(DESTDIR)$(includedir)/ext2fs/$$i; \
-- 
2.43.7


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox