linux-xfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Darrick J. Wong" <djwong@kernel.org>
To: Jan Kara <jack@suse.cz>
Cc: Amir Goldstein <amir73il@gmail.com>,
	cem@kernel.org, hch@lst.de, linux-fsdevel@vger.kernel.org,
	linux-xfs@vger.kernel.org, gabriel@krisman.be,
	Christian Brauner <brauner@kernel.org>
Subject: Re: [PATCH 1/6] iomap: report file IO errors to fsnotify
Date: Wed, 5 Nov 2025 10:28:08 -0800	[thread overview]
Message-ID: <20251105182808.GC196370@frogsfrogsfrogs> (raw)
In-Reply-To: <g2xevmkixxjturg47qv4gokvxvbah275z5slweehj2pvesl3zs@ordfml4v7gaa>

On Wed, Nov 05, 2025 at 03:24:41PM +0100, Jan Kara wrote:
> On Wed 05-11-25 12:14:52, Amir Goldstein wrote:
> > On Wed, Nov 5, 2025 at 12:00 PM Jan Kara <jack@suse.cz> wrote:
> > > > diff --git a/include/linux/fs.h b/include/linux/fs.h
> > > > index 5e4b3a4b24823f..1cb3965db3275c 100644
> > > > --- a/include/linux/fs.h
> > > > +++ b/include/linux/fs.h
> > > > @@ -80,6 +80,7 @@ struct fs_context;
> > > >  struct fs_parameter_spec;
> > > >  struct file_kattr;
> > > >  struct iomap_ops;
> > > > +struct notifier_head;
> > > >
> > > >  extern void __init inode_init(void);
> > > >  extern void __init inode_init_early(void);
> > > > @@ -1587,6 +1588,7 @@ struct super_block {
> > > >
> > > >       spinlock_t              s_inode_wblist_lock;
> > > >       struct list_head        s_inodes_wb;    /* writeback inodes */
> > > > +     struct blocking_notifier_head   s_error_notifier;
> > > >  } __randomize_layout;
> > > >
> > > >  static inline struct user_namespace *i_user_ns(const struct inode *inode)
> > > > @@ -4069,4 +4071,66 @@ static inline bool extensible_ioctl_valid(unsigned int cmd_a,
> > > >       return true;
> > > >  }
> > > >
> > > > +enum fs_error_type {
> > > > +     /* pagecache reads and writes */
> > > > +     FSERR_READAHEAD,
> > > > +     FSERR_WRITEBACK,
> > > > +
> > > > +     /* directio read and writes */
> > > > +     FSERR_DIO_READ,
> > > > +     FSERR_DIO_WRITE,
> > > > +
> > > > +     /* media error */
> > > > +     FSERR_DATA_LOST,
> > > > +
> > > > +     /* filesystem metadata */
> > > > +     FSERR_METADATA,
> > > > +};
> > > > +
> > > > +struct fs_error {
> > > > +     struct work_struct work;
> > > > +     struct super_block *sb;
> > > > +     struct inode *inode;
> > > > +     loff_t pos;
> > > > +     u64 len;
> > > > +     enum fs_error_type type;
> > > > +     int error;
> > > > +};
> > > > +
> > > > +struct fs_error_hook {
> > > > +     struct notifier_block nb;
> > > > +};
> > > > +
> > > > +static inline int sb_hook_error(struct super_block *sb,
> > > > +                             struct fs_error_hook *h)
> > > > +{
> > > > +     return blocking_notifier_chain_register(&sb->s_error_notifier, &h->nb);
> > > > +}
> > > > +
> > > > +static inline void sb_unhook_error(struct super_block *sb,
> > > > +                                struct fs_error_hook *h)
> > > > +{
> > > > +     blocking_notifier_chain_unregister(&sb->s_error_notifier, &h->nb);
> > > > +}
> > > > +
> > > > +static inline void sb_init_error_hook(struct fs_error_hook *h, notifier_fn_t fn)
> > > > +{
> > > > +     h->nb.notifier_call = fn;
> > > > +     h->nb.priority = 0;
> > > > +}
> > > > +
> > > > +void __sb_error(struct super_block *sb, struct inode *inode,
> > > > +             enum fs_error_type type, loff_t pos, u64 len, int error);
> > > > +
> > > > +static inline void sb_error(struct super_block *sb, int error)
> > > > +{
> > > > +     __sb_error(sb, NULL, FSERR_METADATA, 0, 0, error);
> > > > +}
> > > > +
> > > > +static inline void inode_error(struct inode *inode, enum fs_error_type type,
> > > > +                            loff_t pos, u64 len, int error)
> > > > +{
> > > > +     __sb_error(inode->i_sb, inode, type, pos, len, error);
> > > > +}
> > > > +
> > 
> > Apart from the fact that Christian is not going to be happy with this
> > bloat of fs.h shouldn't all this be part of fsnotify.h?
> 
> Point that this maybe doesn't belong to fs.h is a good one. But I don't
> think fsnotify.h is appropriate either because this isn't really part of
> fsnotify. It is a layer on top that's binding fsnotify and notifier chain
> notification. So maybe a new fs_error.h header?

Fine with me, though it'd be a small header.

> > I do not see why ext4 should not use the same workqueue
> > or why any code would need to call fsnotify_sb_error() directly.
> 
> Yes, I guess we can convert ext4 to the same framework but I'm fine with
> cleaning that up later.

Yes, that should be a trivial patch to change fsnotify_sb_error ->
sb_error/inode_error.

> > > > +void __sb_error(struct super_block *sb, struct inode *inode,
> > > > +             enum fs_error_type type, loff_t pos, u64 len, int error)
> > > > +{
> > > > +     struct fs_error *fserr = kzalloc(sizeof(struct fs_error), GFP_ATOMIC);
> > > > +
> > > > +     if (!fserr) {
> > > > +             printk(KERN_ERR
> > > > + "lost fs error report for ino %lu type %u pos 0x%llx len 0x%llx error %d",
> > > > +                             inode ? inode->i_ino : 0, type,
> > > > +                             pos, len, error);
> > > > +             return;
> > > > +     }
> > > > +
> > > > +     if (inode) {
> > > > +             fserr->sb = inode->i_sb;
> > > > +             fserr->inode = igrab(inode);
> > > > +     } else {
> > > > +             fserr->sb = sb;
> > > > +     }
> > > > +     fserr->type = type;
> > > > +     fserr->pos = pos;
> > > > +     fserr->len = len;
> > > > +     fserr->error = error;
> > > > +     INIT_WORK(&fserr->work, handle_sb_error);
> > > > +
> > > > +     schedule_work(&fserr->work);
> > > > +}
> > > > +EXPORT_SYMBOL_GPL(__sb_error);
> > > >
> > 
> > ...
> > We recently discovered that fsnotify_sb_error() calls are exposed to
> > races with generic_shutdown_super():
> > https://lore.kernel.org/linux-fsdevel/scmyycf2trich22v25s6gpe3ib6ejawflwf76znxg7sedqablp@ejfycd34xvpa/

Hrmm.  I've noticed that ever since I added this new patchset, I've been
getting more instances of outright crashes in the timer code, or
workqueue lockups.  I wonder if that UAF is what's going on here...

> > Will punting all FS_ERROR events to workqueue help to improve this
> > situation or will it make it worse?
> 
> Worse. But you raise a really good point which I've missed during my
> review. Currently there's nothing which synchronizes pending works with
> superblock getting destroyed with obvious UAF issues already in
> handle_sb_error().

I wonder, could __sb_error call get_active_super() to obtain an active
reference to the sb, and then deactivate_super() it in the workqueue
callback?  If we can't get an active ref then we presume that the fs is
already shutting down and don't send the event.

The igrab/iput was supposed to prevent the same UAF from happening with
the inode, but I should've checked for a non-null return value.

> > Another question to ask is whether reporting fs error duing fs shutdown
> > is a feature or anti feature?
> 
> I think there must be a point of no return during fs shutdown after which
> we just stop emitting errors.

I agree, once S_ACTIVE hits zero there's no point in sending further
errors.

> > If this is needed then we could change fsnotify_sb_error() to
> > take ino,gen or file handle directly instead of calling filesystem to encode
> > a file handle to report with the event.

That would be another way to do it.  The sole downstream consumer of the
s_error_notifier-based events only cares about ino/gen.

> This lifetime issue is not limited to fsnotify. I think __sb_error() needs
> to check whether the superblock is still alive and synchronize properly
> with sb shutdown (at which point making ext4 use this framework will be a
> net win because it will close this race for ext4 as well).

<nod>

--D

> 								Honza
> -- 
> Jan Kara <jack@suse.com>
> SUSE Labs, CR
> 

  reply	other threads:[~2025-11-05 18:28 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-11-05  0:46 [PATCHBOMB v2 6.19] xfs: autonomous self healing Darrick J. Wong
2025-11-05  0:48 ` [PATCHSET V3 1/2] xfs: autonomous self healing of filesystems Darrick J. Wong
2025-11-05  0:48   ` [PATCH 01/22] docs: remove obsolete links in the xfs online repair documentation Darrick J. Wong
2025-11-05  0:48   ` [PATCH 02/22] docs: discuss autonomous self healing in the xfs online repair design doc Darrick J. Wong
2025-11-05  0:49   ` [PATCH 03/22] xfs: create debugfs uuid aliases Darrick J. Wong
2025-11-05  0:49   ` [PATCH 04/22] xfs: create hooks for monitoring health updates Darrick J. Wong
2025-11-05  0:49   ` [PATCH 05/22] xfs: create a filesystem shutdown hook Darrick J. Wong
2025-11-05  0:49   ` [PATCH 06/22] xfs: create hooks for media errors Darrick J. Wong
2025-11-05  0:50   ` [PATCH 07/22] iomap: report buffered read and write io errors to the filesystem Darrick J. Wong
2025-11-05  0:50   ` [PATCH 08/22] iomap: report directio read and write errors to callers Darrick J. Wong
2025-11-05  0:50   ` [PATCH 09/22] xfs: create file io error hooks Darrick J. Wong
2025-11-05  0:51   ` [PATCH 10/22] xfs: create a special file to pass filesystem health to userspace Darrick J. Wong
2025-11-05  0:51   ` [PATCH 11/22] xfs: create event queuing, formatting, and discovery infrastructure Darrick J. Wong
2025-11-05  0:51   ` [PATCH 12/22] xfs: report metadata health events through healthmon Darrick J. Wong
2025-11-05  0:51   ` [PATCH 13/22] xfs: report shutdown " Darrick J. Wong
2025-11-05  0:52   ` [PATCH 14/22] xfs: report media errors " Darrick J. Wong
2025-11-05  0:52   ` [PATCH 15/22] xfs: report file io " Darrick J. Wong
2025-11-05  0:52   ` [PATCH 16/22] xfs: allow reconfiguration of the health monitoring device Darrick J. Wong
2025-11-05  0:52   ` [PATCH 17/22] xfs: validate fds against running healthmon Darrick J. Wong
2025-11-05  0:53   ` [PATCH 18/22] xfs: add media error reporting ioctl Darrick J. Wong
2025-11-05  0:53   ` [PATCH 19/22] xfs: send uevents when major filesystem events happen Darrick J. Wong
2025-11-05  0:53   ` [PATCH 20/22] xfs: merge health monitoring events when possible Darrick J. Wong
2025-11-05  0:53   ` [PATCH 21/22] xfs: restrict healthmon users further Darrick J. Wong
2025-11-05  0:54   ` [PATCH 22/22] xfs: charge healthmon event objects to the memcg of the listening process Darrick J. Wong
2025-11-05  0:48 ` [PATCHSET V3 2/2] iomap: generic file IO error reporting Darrick J. Wong
2025-11-05  0:54   ` [PATCH 1/6] iomap: report file IO errors to fsnotify Darrick J. Wong
2025-11-05 11:00     ` Jan Kara
2025-11-05 11:14       ` Amir Goldstein
2025-11-05 14:24         ` Jan Kara
2025-11-05 18:28           ` Darrick J. Wong [this message]
2025-11-05 19:41             ` Darrick J. Wong
2025-11-06 10:13               ` Jan Kara
2025-11-06 17:06                 ` Darrick J. Wong
2025-11-05  0:54   ` [PATCH 2/6] xfs: switch healthmon to use the iomap I/O error reporting Darrick J. Wong
2025-11-05  0:54   ` [PATCH 3/6] xfs: port notify-failure to use the new vfs io " Darrick J. Wong
2025-11-05  0:55   ` [PATCH 4/6] xfs: remove file I/O error hooks Darrick J. Wong
2025-11-05  0:55   ` [PATCH 5/6] iomap: remove " Darrick J. Wong
2025-11-05  0:55   ` [PATCH 6/6] xfs: report fs metadata errors via fsnotify Darrick J. Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251105182808.GC196370@frogsfrogsfrogs \
    --to=djwong@kernel.org \
    --cc=amir73il@gmail.com \
    --cc=brauner@kernel.org \
    --cc=cem@kernel.org \
    --cc=gabriel@krisman.be \
    --cc=hch@lst.de \
    --cc=jack@suse.cz \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-xfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).