* [RFC PATCH 1/3] libfs: rework dcache_readdir to use an internal function with callback
2025-03-18 19:41 [RFC PATCH 0/3] create simple libfs directory iterator and make efivarfs use it James Bottomley
@ 2025-03-18 19:41 ` James Bottomley
2025-03-18 21:32 ` Ard Biesheuvel
2025-03-18 19:41 ` [RFC PATCH 2/3] libfs: add simple directory iteration " James Bottomley
` (2 subsequent siblings)
3 siblings, 1 reply; 13+ messages in thread
From: James Bottomley @ 2025-03-18 19:41 UTC (permalink / raw)
To: linux-efi, linux-fsdevel; +Cc: Ard Biesheuvel, Christian Brauner, Al Viro
No functional change. Preparatory to using the internal function to
iterate a directory with just a dentry not a file.
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
---
fs/libfs.c | 41 +++++++++++++++++++++++++++++------------
1 file changed, 29 insertions(+), 12 deletions(-)
diff --git a/fs/libfs.c b/fs/libfs.c
index 8444f5cc4064..816bfe6c0430 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -189,28 +189,21 @@ EXPORT_SYMBOL(dcache_dir_lseek);
* for ramfs-type trees they can't go away without unlink() or rmdir(),
* both impossible due to the lock on directory.
*/
-
-int dcache_readdir(struct file *file, struct dir_context *ctx)
+static void internal_readdir(struct dentry *dentry, struct dentry *cursor,
+ void *data, bool start,
+ bool (*callback)(void *, struct dentry *))
{
- struct dentry *dentry = file->f_path.dentry;
- struct dentry *cursor = file->private_data;
struct dentry *next = NULL;
struct hlist_node **p;
- if (!dir_emit_dots(file, ctx))
- return 0;
-
- if (ctx->pos == 2)
+ if (start)
p = &dentry->d_children.first;
else
p = &cursor->d_sib.next;
while ((next = scan_positives(cursor, p, 1, next)) != NULL) {
- if (!dir_emit(ctx, next->d_name.name, next->d_name.len,
- d_inode(next)->i_ino,
- fs_umode_to_dtype(d_inode(next)->i_mode)))
+ if (!callback(data, next))
break;
- ctx->pos++;
p = &next->d_sib.next;
}
spin_lock(&dentry->d_lock);
@@ -219,6 +212,30 @@ int dcache_readdir(struct file *file, struct dir_context *ctx)
hlist_add_before(&cursor->d_sib, &next->d_sib);
spin_unlock(&dentry->d_lock);
dput(next);
+}
+
+static bool dcache_readdir_callback(void *data, struct dentry *entry)
+{
+ struct dir_context *ctx = data;
+
+ if (!dir_emit(ctx, entry->d_name.name, entry->d_name.len,
+ d_inode(entry)->i_ino,
+ fs_umode_to_dtype(d_inode(entry)->i_mode)))
+ return false;
+ ctx->pos++;
+ return true;
+}
+
+int dcache_readdir(struct file *file, struct dir_context *ctx)
+{
+ struct dentry *dentry = file->f_path.dentry;
+ struct dentry *cursor = file->private_data;
+
+ if (!dir_emit_dots(file, ctx))
+ return 0;
+
+ internal_readdir(dentry, cursor, ctx, ctx->pos == 2,
+ dcache_readdir_callback);
return 0;
}
--
2.43.0
^ permalink raw reply related [flat|nested] 13+ messages in thread* Re: [RFC PATCH 1/3] libfs: rework dcache_readdir to use an internal function with callback
2025-03-18 19:41 ` [RFC PATCH 1/3] libfs: rework dcache_readdir to use an internal function with callback James Bottomley
@ 2025-03-18 21:32 ` Ard Biesheuvel
2025-03-18 21:49 ` James Bottomley
0 siblings, 1 reply; 13+ messages in thread
From: Ard Biesheuvel @ 2025-03-18 21:32 UTC (permalink / raw)
To: James Bottomley; +Cc: linux-efi, linux-fsdevel, Christian Brauner, Al Viro
Hi James,
Thanks for persisting with this.
On Tue, 18 Mar 2025 at 20:44, James Bottomley
<James.Bottomley@hansenpartnership.com> wrote:
>
> No functional change. Preparatory to using the internal function to
> iterate a directory with just a dentry not a file.
>
> Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
> ---
> fs/libfs.c | 41 +++++++++++++++++++++++++++++------------
> 1 file changed, 29 insertions(+), 12 deletions(-)
>
> diff --git a/fs/libfs.c b/fs/libfs.c
> index 8444f5cc4064..816bfe6c0430 100644
> --- a/fs/libfs.c
> +++ b/fs/libfs.c
> @@ -189,28 +189,21 @@ EXPORT_SYMBOL(dcache_dir_lseek);
> * for ramfs-type trees they can't go away without unlink() or rmdir(),
> * both impossible due to the lock on directory.
> */
> -
> -int dcache_readdir(struct file *file, struct dir_context *ctx)
> +static void internal_readdir(struct dentry *dentry, struct dentry *cursor,
It might make sense to make this __always_inline, so that the callback
argument is guaranteed to become a compile time constant when the
caller is dcache_readdir(). Otherwise, the indirect call overhead
might impact its performance.
> + void *data, bool start,
> + bool (*callback)(void *, struct dentry *))
> {
> - struct dentry *dentry = file->f_path.dentry;
> - struct dentry *cursor = file->private_data;
> struct dentry *next = NULL;
> struct hlist_node **p;
>
> - if (!dir_emit_dots(file, ctx))
> - return 0;
> -
> - if (ctx->pos == 2)
> + if (start)
> p = &dentry->d_children.first;
> else
> p = &cursor->d_sib.next;
>
> while ((next = scan_positives(cursor, p, 1, next)) != NULL) {
> - if (!dir_emit(ctx, next->d_name.name, next->d_name.len,
> - d_inode(next)->i_ino,
> - fs_umode_to_dtype(d_inode(next)->i_mode)))
> + if (!callback(data, next))
> break;
> - ctx->pos++;
> p = &next->d_sib.next;
> }
> spin_lock(&dentry->d_lock);
> @@ -219,6 +212,30 @@ int dcache_readdir(struct file *file, struct dir_context *ctx)
> hlist_add_before(&cursor->d_sib, &next->d_sib);
> spin_unlock(&dentry->d_lock);
> dput(next);
> +}
> +
> +static bool dcache_readdir_callback(void *data, struct dentry *entry)
> +{
> + struct dir_context *ctx = data;
> +
> + if (!dir_emit(ctx, entry->d_name.name, entry->d_name.len,
> + d_inode(entry)->i_ino,
> + fs_umode_to_dtype(d_inode(entry)->i_mode)))
> + return false;
> + ctx->pos++;
> + return true;
> +}
> +
> +int dcache_readdir(struct file *file, struct dir_context *ctx)
> +{
> + struct dentry *dentry = file->f_path.dentry;
> + struct dentry *cursor = file->private_data;
> +
> + if (!dir_emit_dots(file, ctx))
> + return 0;
> +
> + internal_readdir(dentry, cursor, ctx, ctx->pos == 2,
> + dcache_readdir_callback);
>
> return 0;
> }
> --
> 2.43.0
>
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [RFC PATCH 1/3] libfs: rework dcache_readdir to use an internal function with callback
2025-03-18 21:32 ` Ard Biesheuvel
@ 2025-03-18 21:49 ` James Bottomley
0 siblings, 0 replies; 13+ messages in thread
From: James Bottomley @ 2025-03-18 21:49 UTC (permalink / raw)
To: Ard Biesheuvel; +Cc: linux-efi, linux-fsdevel, Christian Brauner, Al Viro
On Tue, 2025-03-18 at 22:32 +0100, Ard Biesheuvel wrote:
> Hi James,
>
> Thanks for persisting with this.
Heh, well, it is starting to feel a bit like the swamp I can't get out
of ...
> On Tue, 18 Mar 2025 at 20:44, James Bottomley
> <James.Bottomley@hansenpartnership.com> wrote:
> >
> > No functional change. Preparatory to using the internal function
> > to iterate a directory with just a dentry not a file.
> >
> > Signed-off-by: James Bottomley
> > <James.Bottomley@HansenPartnership.com>
> > ---
> > fs/libfs.c | 41 +++++++++++++++++++++++++++++------------
> > 1 file changed, 29 insertions(+), 12 deletions(-)
> >
> > diff --git a/fs/libfs.c b/fs/libfs.c
> > index 8444f5cc4064..816bfe6c0430 100644
> > --- a/fs/libfs.c
> > +++ b/fs/libfs.c
> > @@ -189,28 +189,21 @@ EXPORT_SYMBOL(dcache_dir_lseek);
> > * for ramfs-type trees they can't go away without unlink() or
> > rmdir(),
> > * both impossible due to the lock on directory.
> > */
> > -
> > -int dcache_readdir(struct file *file, struct dir_context *ctx)
> > +static void internal_readdir(struct dentry *dentry, struct dentry
> > *cursor,
>
> It might make sense to make this __always_inline, so that the
> callback argument is guaranteed to become a compile time constant
> when the caller is dcache_readdir(). Otherwise, the indirect call
> overhead might impact its performance.
I was hoping the compiler would pick that up ... especially as it's a
tail call, but I can add it if necessary.
Regards,
James
^ permalink raw reply [flat|nested] 13+ messages in thread
* [RFC PATCH 2/3] libfs: add simple directory iteration function with callback
2025-03-18 19:41 [RFC PATCH 0/3] create simple libfs directory iterator and make efivarfs use it James Bottomley
2025-03-18 19:41 ` [RFC PATCH 1/3] libfs: rework dcache_readdir to use an internal function with callback James Bottomley
@ 2025-03-18 19:41 ` James Bottomley
2025-03-18 21:33 ` Ard Biesheuvel
2025-03-18 19:41 ` [RFC PATCH 3/3] efivarfs: replace iterate_dir with libfs function simple_iterate_call James Bottomley
2025-03-18 23:45 ` [RFC PATCH 0/3] create simple libfs directory iterator and make efivarfs use it Al Viro
3 siblings, 1 reply; 13+ messages in thread
From: James Bottomley @ 2025-03-18 19:41 UTC (permalink / raw)
To: linux-efi, linux-fsdevel; +Cc: Ard Biesheuvel, Christian Brauner, Al Viro
The current iterate_dir() infrastructure is somewhat cumbersome to use
from within the kernel. Introduce a lighter weight
simple_iterate_dir() function that directly iterates the directory and
executes a callback for each positive dentry.
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
---
fs/libfs.c | 33 +++++++++++++++++++++++++++++++++
include/linux/fs.h | 2 ++
2 files changed, 35 insertions(+)
diff --git a/fs/libfs.c b/fs/libfs.c
index 816bfe6c0430..37da5fe25242 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -214,6 +214,39 @@ static void internal_readdir(struct dentry *dentry, struct dentry *cursor,
dput(next);
}
+/**
+ * generic_iterate_call - iterate all entries executing @callback
+ *
+ * @dir: directory to iterate over
+ * @data: data passed to callback
+ * @callback: callback to call
+ *
+ * Iterates over all positive dentries that are direct children of
+ * @dir (so doesn't include . and ..) and executes the callback for
+ * each of them. Note that because there's no struct *mnt, the caller
+ * is responsible for pinning the filesystem.
+ *
+ * If the @callback returns true, the iteration will continue and if
+ * it returns @false, it will stop (note that since the cursor is
+ * destroyed the next invocation will go back to the beginning again).
+ *
+ */
+int simple_iterate_call(struct dentry *dir, void *data,
+ bool (*callback)(void *, struct dentry *))
+{
+ struct dentry *cursor = d_alloc_cursor(dir);
+
+ if (!cursor)
+ return -ENOMEM;
+
+ internal_readdir(dir, cursor, data, true, callback);
+
+ dput(cursor);
+
+ return 0;
+}
+EXPORT_SYMBOL(simple_iterate_call);
+
static bool dcache_readdir_callback(void *data, struct dentry *entry)
{
struct dir_context *ctx = data;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2788df98080f..a84896f0b2d1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3531,6 +3531,8 @@ extern int simple_rename(struct mnt_idmap *, struct inode *,
unsigned int);
extern void simple_recursive_removal(struct dentry *,
void (*callback)(struct dentry *));
+extern int simple_iterate_call(struct dentry *dir, void *data,
+ bool (*callback)(void *, struct dentry *));
extern int noop_fsync(struct file *, loff_t, loff_t, int);
extern ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter);
extern int simple_empty(struct dentry *);
--
2.43.0
^ permalink raw reply related [flat|nested] 13+ messages in thread* Re: [RFC PATCH 2/3] libfs: add simple directory iteration function with callback
2025-03-18 19:41 ` [RFC PATCH 2/3] libfs: add simple directory iteration " James Bottomley
@ 2025-03-18 21:33 ` Ard Biesheuvel
2025-03-18 21:50 ` James Bottomley
0 siblings, 1 reply; 13+ messages in thread
From: Ard Biesheuvel @ 2025-03-18 21:33 UTC (permalink / raw)
To: James Bottomley; +Cc: linux-efi, linux-fsdevel, Christian Brauner, Al Viro
On Tue, 18 Mar 2025 at 20:45, James Bottomley
<James.Bottomley@hansenpartnership.com> wrote:
>
> The current iterate_dir() infrastructure is somewhat cumbersome to use
> from within the kernel. Introduce a lighter weight
> simple_iterate_dir() function that directly iterates the directory and
> executes a callback for each positive dentry.
>
> Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
> ---
> fs/libfs.c | 33 +++++++++++++++++++++++++++++++++
> include/linux/fs.h | 2 ++
> 2 files changed, 35 insertions(+)
>
> diff --git a/fs/libfs.c b/fs/libfs.c
> index 816bfe6c0430..37da5fe25242 100644
> --- a/fs/libfs.c
> +++ b/fs/libfs.c
> @@ -214,6 +214,39 @@ static void internal_readdir(struct dentry *dentry, struct dentry *cursor,
> dput(next);
> }
>
> +/**
> + * generic_iterate_call - iterate all entries executing @callback
This name doesn't match the name below.
> + *
> + * @dir: directory to iterate over
> + * @data: data passed to callback
> + * @callback: callback to call
> + *
> + * Iterates over all positive dentries that are direct children of
> + * @dir (so doesn't include . and ..) and executes the callback for
> + * each of them. Note that because there's no struct *mnt, the caller
> + * is responsible for pinning the filesystem.
> + *
> + * If the @callback returns true, the iteration will continue and if
> + * it returns @false, it will stop (note that since the cursor is
> + * destroyed the next invocation will go back to the beginning again).
> + *
> + */
> +int simple_iterate_call(struct dentry *dir, void *data,
> + bool (*callback)(void *, struct dentry *))
> +{
> + struct dentry *cursor = d_alloc_cursor(dir);
> +
> + if (!cursor)
> + return -ENOMEM;
> +
> + internal_readdir(dir, cursor, data, true, callback);
> +
> + dput(cursor);
> +
> + return 0;
> +}
> +EXPORT_SYMBOL(simple_iterate_call);
> +
> static bool dcache_readdir_callback(void *data, struct dentry *entry)
> {
> struct dir_context *ctx = data;
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 2788df98080f..a84896f0b2d1 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -3531,6 +3531,8 @@ extern int simple_rename(struct mnt_idmap *, struct inode *,
> unsigned int);
> extern void simple_recursive_removal(struct dentry *,
> void (*callback)(struct dentry *));
> +extern int simple_iterate_call(struct dentry *dir, void *data,
> + bool (*callback)(void *, struct dentry *));
> extern int noop_fsync(struct file *, loff_t, loff_t, int);
> extern ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter);
> extern int simple_empty(struct dentry *);
> --
> 2.43.0
>
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [RFC PATCH 2/3] libfs: add simple directory iteration function with callback
2025-03-18 21:33 ` Ard Biesheuvel
@ 2025-03-18 21:50 ` James Bottomley
0 siblings, 0 replies; 13+ messages in thread
From: James Bottomley @ 2025-03-18 21:50 UTC (permalink / raw)
To: Ard Biesheuvel; +Cc: linux-efi, linux-fsdevel, Christian Brauner, Al Viro
On Tue, 2025-03-18 at 22:33 +0100, Ard Biesheuvel wrote:
> On Tue, 18 Mar 2025 at 20:45, James Bottomley
> <James.Bottomley@hansenpartnership.com> wrote:
> >
> > The current iterate_dir() infrastructure is somewhat cumbersome to
> > use from within the kernel. Introduce a lighter weight
> > simple_iterate_dir() function that directly iterates the directory
> > and executes a callback for each positive dentry.
> >
> > Signed-off-by: James Bottomley
> > <James.Bottomley@HansenPartnership.com>
> > ---
> > fs/libfs.c | 33 +++++++++++++++++++++++++++++++++
> > include/linux/fs.h | 2 ++
> > 2 files changed, 35 insertions(+)
> >
> > diff --git a/fs/libfs.c b/fs/libfs.c
> > index 816bfe6c0430..37da5fe25242 100644
> > --- a/fs/libfs.c
> > +++ b/fs/libfs.c
> > @@ -214,6 +214,39 @@ static void internal_readdir(struct dentry
> > *dentry, struct dentry *cursor,
> > dput(next);
> > }
> >
> > +/**
> > + * generic_iterate_call - iterate all entries executing @callback
>
> This name doesn't match the name below.
Right, I started out thinking the generic_ prefix was the preferred
one, but then simple_ looked better and I forgot to update the docbook.
Regards,
James
^ permalink raw reply [flat|nested] 13+ messages in thread
* [RFC PATCH 3/3] efivarfs: replace iterate_dir with libfs function simple_iterate_call
2025-03-18 19:41 [RFC PATCH 0/3] create simple libfs directory iterator and make efivarfs use it James Bottomley
2025-03-18 19:41 ` [RFC PATCH 1/3] libfs: rework dcache_readdir to use an internal function with callback James Bottomley
2025-03-18 19:41 ` [RFC PATCH 2/3] libfs: add simple directory iteration " James Bottomley
@ 2025-03-18 19:41 ` James Bottomley
2025-03-18 21:34 ` Ard Biesheuvel
2025-03-18 23:45 ` [RFC PATCH 0/3] create simple libfs directory iterator and make efivarfs use it Al Viro
3 siblings, 1 reply; 13+ messages in thread
From: James Bottomley @ 2025-03-18 19:41 UTC (permalink / raw)
To: linux-efi, linux-fsdevel; +Cc: Ard Biesheuvel, Christian Brauner, Al Viro
This relieves us of the requirement to have a struct path and use file
operations, which greatly simplifies the code. The superblock is now
pinned by the blocking notifier (which is why deregistration moves
above kill_litter_super).
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
---
fs/efivarfs/super.c | 103 +++-----------------------------------------
1 file changed, 7 insertions(+), 96 deletions(-)
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index 81b3c6b7e100..135b0bb9b4b5 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -393,29 +393,13 @@ static const struct fs_context_operations efivarfs_context_ops = {
.reconfigure = efivarfs_reconfigure,
};
-struct efivarfs_ctx {
- struct dir_context ctx;
- struct super_block *sb;
- struct dentry *dentry;
-};
-
-static bool efivarfs_actor(struct dir_context *ctx, const char *name, int len,
- loff_t offset, u64 ino, unsigned mode)
+static bool efivarfs_iterate_callback(void *data, struct dentry *dentry)
{
unsigned long size;
- struct efivarfs_ctx *ectx = container_of(ctx, struct efivarfs_ctx, ctx);
- struct qstr qstr = { .name = name, .len = len };
- struct dentry *dentry = d_hash_and_lookup(ectx->sb->s_root, &qstr);
- struct inode *inode;
- struct efivar_entry *entry;
+ struct inode *inode = d_inode(dentry);
+ struct efivar_entry *entry = efivar_entry(inode);
int err;
- if (IS_ERR_OR_NULL(dentry))
- return true;
-
- inode = d_inode(dentry);
- entry = efivar_entry(inode);
-
err = efivar_entry_size(entry, &size);
size += sizeof(__u32); /* attributes */
if (err)
@@ -426,12 +410,10 @@ static bool efivarfs_actor(struct dir_context *ctx, const char *name, int len,
inode_unlock(inode);
if (!size) {
- ectx->dentry = dentry;
- return false;
+ pr_info("efivarfs: removing variable %pd\n", dentry);
+ simple_recursive_removal(dentry, NULL);
}
- dput(dentry);
-
return true;
}
@@ -474,33 +456,11 @@ static int efivarfs_check_missing(efi_char16_t *name16, efi_guid_t vendor,
return err;
}
-static void efivarfs_deactivate_super_work(struct work_struct *work)
-{
- struct super_block *s = container_of(work, struct super_block,
- destroy_work);
- /*
- * note: here s->destroy_work is free for reuse (which
- * will happen in deactivate_super)
- */
- deactivate_super(s);
-}
-
-static struct file_system_type efivarfs_type;
-
static int efivarfs_pm_notify(struct notifier_block *nb, unsigned long action,
void *ptr)
{
struct efivarfs_fs_info *sfi = container_of(nb, struct efivarfs_fs_info,
pm_nb);
- struct path path;
- struct efivarfs_ctx ectx = {
- .ctx = {
- .actor = efivarfs_actor,
- },
- .sb = sfi->sb,
- };
- struct file *file;
- struct super_block *s = sfi->sb;
static bool rescan_done = true;
if (action == PM_HIBERNATION_PREPARE) {
@@ -513,64 +473,15 @@ static int efivarfs_pm_notify(struct notifier_block *nb, unsigned long action,
if (rescan_done)
return NOTIFY_DONE;
- /* ensure single superblock is alive and pin it */
- if (!atomic_inc_not_zero(&s->s_active))
- return NOTIFY_DONE;
-
pr_info("efivarfs: resyncing variable state\n");
- path.dentry = sfi->sb->s_root;
-
- /*
- * do not add SB_KERNMOUNT which a single superblock could
- * expose to userspace and which also causes MNT_INTERNAL, see
- * below
- */
- path.mnt = vfs_kern_mount(&efivarfs_type, 0,
- efivarfs_type.name, NULL);
- if (IS_ERR(path.mnt)) {
- pr_err("efivarfs: internal mount failed\n");
- /*
- * We may be the last pinner of the superblock but
- * calling efivarfs_kill_sb from within the notifier
- * here would deadlock trying to unregister it
- */
- INIT_WORK(&s->destroy_work, efivarfs_deactivate_super_work);
- schedule_work(&s->destroy_work);
- return PTR_ERR(path.mnt);
- }
-
- /* path.mnt now has pin on superblock, so this must be above one */
- atomic_dec(&s->s_active);
-
- file = kernel_file_open(&path, O_RDONLY | O_DIRECTORY | O_NOATIME,
- current_cred());
- /*
- * safe even if last put because no MNT_INTERNAL means this
- * will do delayed deactivate_super and not deadlock
- */
- mntput(path.mnt);
- if (IS_ERR(file))
- return NOTIFY_DONE;
-
rescan_done = true;
/*
* First loop over the directory and verify each entry exists,
* removing it if it doesn't
*/
- file->f_pos = 2; /* skip . and .. */
- do {
- ectx.dentry = NULL;
- iterate_dir(file, &ectx.ctx);
- if (ectx.dentry) {
- pr_info("efivarfs: removing variable %pd\n",
- ectx.dentry);
- simple_recursive_removal(ectx.dentry, NULL);
- dput(ectx.dentry);
- }
- } while (ectx.dentry);
- fput(file);
+ simple_iterate_call(sfi->sb->s_root, NULL, efivarfs_iterate_callback);
/*
* then loop over variables, creating them if there's no matching
@@ -609,8 +520,8 @@ static void efivarfs_kill_sb(struct super_block *sb)
struct efivarfs_fs_info *sfi = sb->s_fs_info;
blocking_notifier_chain_unregister(&efivar_ops_nh, &sfi->nb);
- kill_litter_super(sb);
unregister_pm_notifier(&sfi->pm_nb);
+ kill_litter_super(sb);
kfree(sfi);
}
--
2.43.0
^ permalink raw reply related [flat|nested] 13+ messages in thread* Re: [RFC PATCH 3/3] efivarfs: replace iterate_dir with libfs function simple_iterate_call
2025-03-18 19:41 ` [RFC PATCH 3/3] efivarfs: replace iterate_dir with libfs function simple_iterate_call James Bottomley
@ 2025-03-18 21:34 ` Ard Biesheuvel
0 siblings, 0 replies; 13+ messages in thread
From: Ard Biesheuvel @ 2025-03-18 21:34 UTC (permalink / raw)
To: James Bottomley; +Cc: linux-efi, linux-fsdevel, Christian Brauner, Al Viro
On Tue, 18 Mar 2025 at 20:46, James Bottomley
<James.Bottomley@hansenpartnership.com> wrote:
>
> This relieves us of the requirement to have a struct path and use file
> operations, which greatly simplifies the code. The superblock is now
> pinned by the blocking notifier (which is why deregistration moves
> above kill_litter_super).
>
> Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
> ---
> fs/efivarfs/super.c | 103 +++-----------------------------------------
> 1 file changed, 7 insertions(+), 96 deletions(-)
>
I like this a lot. Assuming it gets blessed by the VFS maintainers,
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
> diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
> index 81b3c6b7e100..135b0bb9b4b5 100644
> --- a/fs/efivarfs/super.c
> +++ b/fs/efivarfs/super.c
> @@ -393,29 +393,13 @@ static const struct fs_context_operations efivarfs_context_ops = {
> .reconfigure = efivarfs_reconfigure,
> };
>
> -struct efivarfs_ctx {
> - struct dir_context ctx;
> - struct super_block *sb;
> - struct dentry *dentry;
> -};
> -
> -static bool efivarfs_actor(struct dir_context *ctx, const char *name, int len,
> - loff_t offset, u64 ino, unsigned mode)
> +static bool efivarfs_iterate_callback(void *data, struct dentry *dentry)
> {
> unsigned long size;
> - struct efivarfs_ctx *ectx = container_of(ctx, struct efivarfs_ctx, ctx);
> - struct qstr qstr = { .name = name, .len = len };
> - struct dentry *dentry = d_hash_and_lookup(ectx->sb->s_root, &qstr);
> - struct inode *inode;
> - struct efivar_entry *entry;
> + struct inode *inode = d_inode(dentry);
> + struct efivar_entry *entry = efivar_entry(inode);
> int err;
>
> - if (IS_ERR_OR_NULL(dentry))
> - return true;
> -
> - inode = d_inode(dentry);
> - entry = efivar_entry(inode);
> -
> err = efivar_entry_size(entry, &size);
> size += sizeof(__u32); /* attributes */
> if (err)
> @@ -426,12 +410,10 @@ static bool efivarfs_actor(struct dir_context *ctx, const char *name, int len,
> inode_unlock(inode);
>
> if (!size) {
> - ectx->dentry = dentry;
> - return false;
> + pr_info("efivarfs: removing variable %pd\n", dentry);
> + simple_recursive_removal(dentry, NULL);
> }
>
> - dput(dentry);
> -
> return true;
> }
>
> @@ -474,33 +456,11 @@ static int efivarfs_check_missing(efi_char16_t *name16, efi_guid_t vendor,
> return err;
> }
>
> -static void efivarfs_deactivate_super_work(struct work_struct *work)
> -{
> - struct super_block *s = container_of(work, struct super_block,
> - destroy_work);
> - /*
> - * note: here s->destroy_work is free for reuse (which
> - * will happen in deactivate_super)
> - */
> - deactivate_super(s);
> -}
> -
> -static struct file_system_type efivarfs_type;
> -
> static int efivarfs_pm_notify(struct notifier_block *nb, unsigned long action,
> void *ptr)
> {
> struct efivarfs_fs_info *sfi = container_of(nb, struct efivarfs_fs_info,
> pm_nb);
> - struct path path;
> - struct efivarfs_ctx ectx = {
> - .ctx = {
> - .actor = efivarfs_actor,
> - },
> - .sb = sfi->sb,
> - };
> - struct file *file;
> - struct super_block *s = sfi->sb;
> static bool rescan_done = true;
>
> if (action == PM_HIBERNATION_PREPARE) {
> @@ -513,64 +473,15 @@ static int efivarfs_pm_notify(struct notifier_block *nb, unsigned long action,
> if (rescan_done)
> return NOTIFY_DONE;
>
> - /* ensure single superblock is alive and pin it */
> - if (!atomic_inc_not_zero(&s->s_active))
> - return NOTIFY_DONE;
> -
> pr_info("efivarfs: resyncing variable state\n");
>
> - path.dentry = sfi->sb->s_root;
> -
> - /*
> - * do not add SB_KERNMOUNT which a single superblock could
> - * expose to userspace and which also causes MNT_INTERNAL, see
> - * below
> - */
> - path.mnt = vfs_kern_mount(&efivarfs_type, 0,
> - efivarfs_type.name, NULL);
> - if (IS_ERR(path.mnt)) {
> - pr_err("efivarfs: internal mount failed\n");
> - /*
> - * We may be the last pinner of the superblock but
> - * calling efivarfs_kill_sb from within the notifier
> - * here would deadlock trying to unregister it
> - */
> - INIT_WORK(&s->destroy_work, efivarfs_deactivate_super_work);
> - schedule_work(&s->destroy_work);
> - return PTR_ERR(path.mnt);
> - }
> -
> - /* path.mnt now has pin on superblock, so this must be above one */
> - atomic_dec(&s->s_active);
> -
> - file = kernel_file_open(&path, O_RDONLY | O_DIRECTORY | O_NOATIME,
> - current_cred());
> - /*
> - * safe even if last put because no MNT_INTERNAL means this
> - * will do delayed deactivate_super and not deadlock
> - */
> - mntput(path.mnt);
> - if (IS_ERR(file))
> - return NOTIFY_DONE;
> -
> rescan_done = true;
>
> /*
> * First loop over the directory and verify each entry exists,
> * removing it if it doesn't
> */
> - file->f_pos = 2; /* skip . and .. */
> - do {
> - ectx.dentry = NULL;
> - iterate_dir(file, &ectx.ctx);
> - if (ectx.dentry) {
> - pr_info("efivarfs: removing variable %pd\n",
> - ectx.dentry);
> - simple_recursive_removal(ectx.dentry, NULL);
> - dput(ectx.dentry);
> - }
> - } while (ectx.dentry);
> - fput(file);
> + simple_iterate_call(sfi->sb->s_root, NULL, efivarfs_iterate_callback);
>
> /*
> * then loop over variables, creating them if there's no matching
> @@ -609,8 +520,8 @@ static void efivarfs_kill_sb(struct super_block *sb)
> struct efivarfs_fs_info *sfi = sb->s_fs_info;
>
> blocking_notifier_chain_unregister(&efivar_ops_nh, &sfi->nb);
> - kill_litter_super(sb);
> unregister_pm_notifier(&sfi->pm_nb);
> + kill_litter_super(sb);
>
> kfree(sfi);
> }
> --
> 2.43.0
>
>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [RFC PATCH 0/3] create simple libfs directory iterator and make efivarfs use it
2025-03-18 19:41 [RFC PATCH 0/3] create simple libfs directory iterator and make efivarfs use it James Bottomley
` (2 preceding siblings ...)
2025-03-18 19:41 ` [RFC PATCH 3/3] efivarfs: replace iterate_dir with libfs function simple_iterate_call James Bottomley
@ 2025-03-18 23:45 ` Al Viro
2025-03-18 23:49 ` Al Viro
2025-03-19 16:46 ` James Bottomley
3 siblings, 2 replies; 13+ messages in thread
From: Al Viro @ 2025-03-18 23:45 UTC (permalink / raw)
To: James Bottomley
Cc: linux-efi, linux-fsdevel, Ard Biesheuvel, Christian Brauner
On Tue, Mar 18, 2025 at 03:41:08PM -0400, James Bottomley wrote:
> [Note this is built on top of the previous patch to populate path.mnt]
>
> This turned out to be much simpler than I feared. The first patch
> breaks out the core of the current dcache_readdir() into an internal
> function with a callback (there should be no functional change). The
> second adds a new API, simple_iterate_call(), which loops over the
> dentries in the next level and executes a callback for each one and
> the third which removes all the efivarfs superblock and mnt crud and
> replaces it with this simple callback interface. I think the
> diffstats of the third patch demonstrate how much nicer it is for us:
I suspect that you are making it too generic for its own good.
dcache_readdir() needs to cope with the situation when there are
fuckloads of opened-and-unliked files in there. That's why we
play those games with cursors under if (need_resched()) there.
That's not the case for efivarfs. There you really want just
"grab a reference to the next positive, drop the reference we
were given" and that's it.
IOW, find_next_child() instead of scan_positives(). Export that
and it becomes just a simple loop -
child = NULL;
while ((child = find_next_child(parent, child)) != NULL) {
struct inode *inode = d_inode(child);
struct efivar_entry *entry = efivar_entry(inode);
err = efivar_entry_size(entry, &size);
inode_lock(inode);
i_size_write(inode, err ? 0 : size + sizeof(__u32));
inode_unlock(inode);
if (err)
simple_recursive_removal(child, NULL);
}
and that's it. No callbacks, no cursors, no iterators - just an
export of helper already there.
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [RFC PATCH 0/3] create simple libfs directory iterator and make efivarfs use it
2025-03-18 23:45 ` [RFC PATCH 0/3] create simple libfs directory iterator and make efivarfs use it Al Viro
@ 2025-03-18 23:49 ` Al Viro
2025-03-19 16:46 ` James Bottomley
1 sibling, 0 replies; 13+ messages in thread
From: Al Viro @ 2025-03-18 23:49 UTC (permalink / raw)
To: James Bottomley
Cc: linux-efi, linux-fsdevel, Ard Biesheuvel, Christian Brauner
On Tue, Mar 18, 2025 at 11:45:05PM +0000, Al Viro wrote:
> and it becomes just a simple loop -
> child = NULL;
> while ((child = find_next_child(parent, child)) != NULL) {
that being root, obviously.
And we might want a better function name than that...
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [RFC PATCH 0/3] create simple libfs directory iterator and make efivarfs use it
2025-03-18 23:45 ` [RFC PATCH 0/3] create simple libfs directory iterator and make efivarfs use it Al Viro
2025-03-18 23:49 ` Al Viro
@ 2025-03-19 16:46 ` James Bottomley
2025-03-19 18:45 ` James Bottomley
1 sibling, 1 reply; 13+ messages in thread
From: James Bottomley @ 2025-03-19 16:46 UTC (permalink / raw)
To: Al Viro; +Cc: linux-efi, linux-fsdevel, Ard Biesheuvel, Christian Brauner
On Tue, 2025-03-18 at 23:45 +0000, Al Viro wrote:
[...]
> I suspect that you are making it too generic for its own good.
>
> dcache_readdir() needs to cope with the situation when there are
> fuckloads of opened-and-unliked files in there. That's why we
> play those games with cursors under if (need_resched()) there.
Yes, but those games will never really activate for the efivarfs code
because we expect to be mostly positive. What I was aiming for was to
make sure there's no duplication of the subtle code, so someone can't
forget to update it in two places, so doing a callback approach seemed
natural.
> That's not the case for efivarfs. There you really want just
> "grab a reference to the next positive, drop the reference we
> were given" and that's it.
>
> IOW, find_next_child() instead of scan_positives(). Export that
> and it becomes just a simple loop -
> child = NULL;
> while ((child = find_next_child(parent, child)) != NULL) {
> struct inode *inode = d_inode(child);
> struct efivar_entry *entry = efivar_entry(inode);
>
> err = efivar_entry_size(entry, &size);
>
> inode_lock(inode);
> i_size_write(inode, err ? 0 : size + sizeof(__u32));
> inode_unlock(inode);
>
> if (err)
> simple_recursive_removal(child, NULL);
> }
> and that's it. No callbacks, no cursors, no iterators - just an
> export of helper already there.
So we don't mind the callbacks; we have to do it for efivar_init()
lower down anyway. However, I did take a cut at doing this based on
simple positive (see below). I assume you won't like the way I have to
allocate and toss a cursor for each iteration, nor the fact that
there's still the cond_resched() in there? I think I can fix that but
I'd have to slice apart simple_positive(), which is a bigger
undertaking.
> And we might want a better function name than that...
I went for simple_next_child() ...
Regards,
James
---
fs/libfs.c | 41 +++++++++++++++++++++++++++++++++++++++++
include/linux/fs.h | 2 ++
2 files changed, 43 insertions(+)
diff --git a/fs/libfs.c b/fs/libfs.c
index 8444f5cc4064..86f29cc2b85a 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -146,6 +146,47 @@ static struct dentry *scan_positives(struct dentry *cursor,
return found;
}
+/**
+ * simple_next_child - get next child of the parent directory
+ *
+ * @parent: the directory to scan
+ * @child: last child (or NULL to start at the beginning)
+ *
+ * returns the next positive child in sequence with the reference
+ * elevated but if a child is passed in will drop that
+ * reference. Returns NULL on either error or when directory has been
+ * fully scanned.
+ *
+ * The intended use is as an iterator because all the references will
+ * be dropped by the end of the while loop:
+ *
+ * child = NULL
+ * while(child = simple_next_child(parent, child)) {
+ * // do something
+ * }
+ */
+struct dentry *simple_next_child(struct dentry *parent, struct dentry *child)
+{
+ struct hlist_node **p;
+ struct dentry *cursor = d_alloc_cursor(parent);
+
+ if (!cursor) {
+ dput(child);
+ return NULL;
+ }
+
+ if (child)
+ p = &child->d_sib.next;
+ else
+ p = &parent->d_children.first;
+
+ child = scan_positives(cursor, p, 1, child);
+ dput(cursor);
+
+ return child;
+}
+EXPORT_SYMBOL(simple_next_child);
+
loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
{
struct dentry *dentry = file->f_path.dentry;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2788df98080f..dd84d1c3b8af 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3531,6 +3531,8 @@ extern int simple_rename(struct mnt_idmap *, struct inode *,
unsigned int);
extern void simple_recursive_removal(struct dentry *,
void (*callback)(struct dentry *));
+extern struct dentry *simple_next_child(struct dentry *parent,
+ struct dentry *child);
extern int noop_fsync(struct file *, loff_t, loff_t, int);
extern ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter);
extern int simple_empty(struct dentry *);
--
2.43.0
^ permalink raw reply related [flat|nested] 13+ messages in thread* Re: [RFC PATCH 0/3] create simple libfs directory iterator and make efivarfs use it
2025-03-19 16:46 ` James Bottomley
@ 2025-03-19 18:45 ` James Bottomley
0 siblings, 0 replies; 13+ messages in thread
From: James Bottomley @ 2025-03-19 18:45 UTC (permalink / raw)
To: Al Viro; +Cc: linux-efi, linux-fsdevel, Ard Biesheuvel, Christian Brauner
On Wed, 2025-03-19 at 12:46 -0400, James Bottomley wrote:
> I assume you won't like the way I have to allocate and toss a cursor
> for each iteration, nor the fact that there's still the
> cond_resched() in there? I think I can fix that but I'd have to
> slice apart simple_positive(), which is a bigger undertaking.
So this is what I think that would look like: it still exports the same
simple_next_child() function but doesn't need to allocate cursors or do
any of the cond_resched() stuff. The down side is that the slice is a
lot less easily verified than the other two patches, so this one's
going to need a lot of code inspection to make sure I got it right.
Regards,
James
---
fs/libfs.c | 85 +++++++++++++++++++++++++++++++++++++++-------
include/linux/fs.h | 2 ++
2 files changed, 74 insertions(+), 13 deletions(-)
diff --git a/fs/libfs.c b/fs/libfs.c
index 8444f5cc4064..3a32910f44dc 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -101,6 +101,34 @@ int dcache_dir_close(struct inode *inode, struct
file *file)
}
EXPORT_SYMBOL(dcache_dir_close);
+/*
+ * helper for positive scanning. Requires a nested while loop which should
+ * be broken if this returns false
+ */
+static bool internal_scan_positive(struct hlist_node **p, struct dentry *d,
+ struct dentry **found, loff_t *count)
+{
+ while (*p) {
+ p = &d->d_sib.next;
+
+ // we must at least skip cursors, to avoid livelocks
+ if (d->d_flags & DCACHE_DENTRY_CURSOR)
+ continue;
+
+ if (simple_positive(d) && !--*count) {
+ spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
+ if (simple_positive(d))
+ *found = dget_dlock(d);
+ spin_unlock(&d->d_lock);
+ if (likely(*found))
+ return true;
+ *count = 1;
+ }
+ return false;
+ }
+ return true;
+}
+
/* parent is locked at least shared */
/*
* Returns an element of siblings' list.
@@ -118,19 +146,9 @@ static struct dentry *scan_positives(struct dentry *cursor,
spin_lock(&dentry->d_lock);
while (*p) {
struct dentry *d = hlist_entry(*p, struct dentry, d_sib);
- p = &d->d_sib.next;
- // we must at least skip cursors, to avoid livelocks
- if (d->d_flags & DCACHE_DENTRY_CURSOR)
- continue;
- if (simple_positive(d) && !--count) {
- spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
- if (simple_positive(d))
- found = dget_dlock(d);
- spin_unlock(&d->d_lock);
- if (likely(found))
- break;
- count = 1;
- }
+
+ if (internal_scan_positive(p, d, &found, &count))
+ break;
if (need_resched()) {
if (!hlist_unhashed(&cursor->d_sib))
__hlist_del(&cursor->d_sib);
@@ -146,6 +164,47 @@ static struct dentry *scan_positives(struct dentry *cursor,
return found;
}
+/**
+ * simple_next_child - get next child of the parent directory
+ *
+ * @parent: the directory to scan
+ * @child: last child (or NULL to start at the beginning)
+ *
+ * returns the next positive child in sequence with the reference
+ * elevated but if a child is passed in will drop that
+ * reference. Returns NULL on either error or when directory has been
+ * fully scanned.
+ *
+ * The intended use is as an iterator because all the references will
+ * be dropped by the end of the while loop:
+ *
+ * child = NULL
+ * while(child = simple_next_child(parent, child)) {
+ * // do something
+ * }
+ */
+struct dentry *simple_next_child(struct dentry *parent, struct dentry *child)
+{
+ struct hlist_node **p;
+ loff_t count = 1;
+ struct dentry *found = NULL;
+
+ if (child)
+ p = &child->d_sib.next;
+ else
+ p = &parent->d_children.first;
+
+ while (*p) {
+ struct dentry *d = hlist_entry(*p, struct dentry, d_sib);
+
+ if (internal_scan_positive(p, d, &found, &count))
+ break;
+ }
+ dput(child);
+ return found;
+}
+EXPORT_SYMBOL(simple_next_child);
+
loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
{
struct dentry *dentry = file->f_path.dentry;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2788df98080f..dd84d1c3b8af 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3531,6 +3531,8 @@ extern int simple_rename(struct mnt_idmap *, struct inode *,
unsigned int);
extern void simple_recursive_removal(struct dentry *,
void (*callback)(struct dentry *));
+extern struct dentry *simple_next_child(struct dentry *parent,
+ struct dentry *child);
extern int noop_fsync(struct file *, loff_t, loff_t, int);
extern ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter);
extern int simple_empty(struct dentry *);
--
2.43.0
^ permalink raw reply related [flat|nested] 13+ messages in thread