public inbox for linux-ext4@vger.kernel.org
 help / color / mirror / Atom feed
From: "Darrick J. Wong" <djwong@kernel.org>
To: Christian Brauner <brauner@kernel.org>
Cc: linux-ext4@vger.kernel.org, linux-xfs@vger.kernel.org,
	hch@lst.de, linux-fsdevel@vger.kernel.org
Subject: Re: [PATCH 1/4] fs: send uevents for filesystem mount events
Date: Mon, 5 Jan 2026 09:25:47 -0800	[thread overview]
Message-ID: <20260105172547.GA191481@frogsfrogsfrogs> (raw)
In-Reply-To: <20251224-imitieren-flugtauglich-dcef25c57c8d@brauner>

On Wed, Dec 24, 2025 at 01:47:25PM +0100, Christian Brauner wrote:
> On Wed, Dec 17, 2025 at 06:04:29PM -0800, Darrick J. Wong wrote:
> > From: Darrick J. Wong <djwong@kernel.org>
> > 
> > Add the ability to send uevents whenever a filesystem mounts, unmounts,
> > or goes down.  This will enable XFS to start daemons whenever a
> > filesystem is first mounted.
> > 
> > Regrettably, we can't wire this directly into get_tree_bdev_flags or
> > generic_shutdown_super because not all filesystems set up a kobject
> > representation in sysfs, and the VFS has no idea if a filesystem
> > actually does that.
> > 
> > Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
> > ---
> 
> I have issues with uevents as a mechanism for this. Uevents are tied to
> network namespaces and they are not really namespaced appropriately. Any
> filesystem that hooks into this mechanism will spew uevents into the
> initial network namespace unconditionally. Any container mountable
> filesystem that wants to use this interface will spam the host with
> this event though the even is completely useless without appropriate
> meta information about the relevant mount namespaces and further
> parameters. This is a design dead end going forward imho. So please
> let's not do this.

Ok.  Initially I'd assumed that any xfs mounts would have to be made
initially by whatever's managing the containers and then bindmounted
into an actual container, but fanotify in the associated mountns means
that containers could decide to have their own healer instances with
their own policies.

It had also occurred to me that wouldn't work so well for a
PrivateMounts=yes systemd service that also gets to mount its own xfs
filesystems.  Granted fanotify might not either, but at least this way
we don't have to wind through udev.

> Instead ties this to fanotify which is the right interface for this.
> My suggestion would be to tie this to mount namespaces as that's the
> appropriate object. Fanotify already supports listening for general
> mount/umount events on mount namespaces. So extend it to send filesystem
> creation/destruction events so that a caller may listen on the initial
> mount namespace - where xfs fses can be mounted - you could even make it
> filterable per filesystem type right away.

Hrmm, would that program look something like this?  Please ignore the
weird weakhandle struct, I hastily stapled this together from various
programs.

I'm not that familiar with fanotify, so I'm curious what the rest of you
think of handle_mount_event and main.  In my trivial workstation test it
worked as a POC, but I've not even thrown fstests at it.

--D

#include <errno.h>
#include <err.h>
#include <stdlib.h>
#include <stdio.h>
#include <fcntl.h>
#include <sys/fanotify.h>
#include <sys/types.h>
#include <unistd.h>
#include <linux/mount.h>
#include <sys/syscall.h>
#include <string.h>
#include <sys/wait.h>
#include <limits.h>

struct weakhandle {
	const char		*mntpoint;
};

/* Compute the systemd instance unit name for this mountpoint. */
int
weakhandle_instance_unit_name(
	struct weakhandle	*wh,
	const char		*template,
	char			*unitname,
	size_t			unitnamelen)
{
	FILE			*fp;
	char			*s;
	ssize_t			bytes;
	pid_t			child_pid;
	int			pipe_fds[2];
	int			ret;

	ret = pipe(pipe_fds);
	if (ret)
		return -1;

	child_pid = fork();
	if (child_pid < 0)
		return -1;

	if (!child_pid) {
		/* child process */
		char		*argv[] = {
			"systemd-escape",
			"--template",
			(char *)template,
			"--path",
			(char *)wh->mntpoint,
			NULL,
		};

		ret = dup2(pipe_fds[1], STDOUT_FILENO);
		if (ret < 0) {
			perror(wh->mntpoint);
			goto fail;
		}

		ret = execvp("systemd-escape", argv);
		if (ret)
			perror(wh->mntpoint);

fail:
		exit(EXIT_FAILURE);
	}

	/* parent scrapes the output */
	fp = fdopen(pipe_fds[0], "r");
	s = fgets(unitname, unitnamelen, fp);
	fclose(fp);
	close(pipe_fds[1]);

	waitpid(child_pid, NULL, 0);

	if (!s) {
		errno = ENOENT;
		return -1;
	}

	/* trim off trailing newline */
	bytes = strlen(s);
	if (s[bytes - 1] == '\n')
		s[bytes - 1] = 0;

	return 0;
}

static void start_healer(const char *mntpoint)
{
	struct weakhandle wh = {
		.mntpoint = mntpoint,
	};
	char svcname[PATH_MAX];
	pid_t child_pid;
	int child_status;
	int ret;

	ret = weakhandle_instance_unit_name(&wh, "xfs_healer@.service",
			svcname, PATH_MAX);
	if (ret) {
		perror("whiun!");
		return;
	}


	printf("systemctl start xfs_healer@%s\n", svcname);

	child_pid = fork();
	if (child_pid < 0) {
		perror(mntpoint);
		return;
	}
	if (!child_pid) {
		/* child starts the process */
		char		*argv[] = {
			"systemctl",
			"start",
			"--no-block",
			svcname,
			NULL,
		};

		ret = execvp("systemctl", argv);
		if (ret)
			perror("systemctl");

		exit(EXIT_FAILURE);
	}

	/* parent waits for process */
	waitpid(child_pid, &child_status, 0);

	if (WIFEXITED(child_status) && WEXITSTATUS(child_status) == 0) {
		printf("%s: healer started\n", mntpoint);
		fflush(stdout);
		return;
	}

	fprintf(stderr, "%s: could not start healer\n", mntpoint);
}

static void find_mount(const struct fanotify_event_info_mnt *mnt,
		int mnt_ns_fd)
{
	struct mnt_id_req req = {
		.size = sizeof(req),
		.mnt_id = mnt->mnt_id,
		.mnt_ns_fd = mnt_ns_fd,
		.param = STATMOUNT_FS_TYPE | STATMOUNT_MNT_POINT,
	};
	size_t smbuf_size = sizeof(struct statmount) + 4096;
	struct statmount *smbuf = alloca(smbuf_size);
	int ret;

	ret = syscall(SYS_statmount, &req, smbuf, smbuf_size, 0);
	if (ret) {
		perror("statmount");
		return;
	}

	printf("mount: id 0x%llx fstype %s mountpoint %s\n", mnt->mnt_id,
			smbuf->str + smbuf->fs_type,
			smbuf->str + smbuf->mnt_point);

	if (!strcmp(smbuf->str + smbuf->fs_type, "xfs"))
		start_healer(smbuf->str + smbuf->mnt_point);
}

static void handle_mount_event(const struct fanotify_event_metadata *event,
		int mnt_ns_fd)
{
	const struct fanotify_event_info_header *info;
	const struct fanotify_event_info_mnt *mnt;
	int off;

	if (event->fd != FAN_NOFD) {
		printf("Unexpected fd (!= FAN_NOFD)\n");
		return;
	}

	switch (event->mask) {
	case FAN_MNT_ATTACH:
		printf("FAN_MNT_ATTACH (len=%d)\n", event->event_len);
		break;
	case FAN_MNT_DETACH:
		printf("FAN_MNT_DETACH (len=%d)\n", event->event_len);
		break;
	}

	for (off = sizeof(*event) ; off < event->event_len;
	     off += info->len) {
		info = (struct fanotify_event_info_header *)
			((char *) event + off);

		switch (info->info_type) {
		case FAN_EVENT_INFO_TYPE_MNT:
			mnt = (struct fanotify_event_info_mnt *) info;

			printf("\tGeneric Mount Record: len=%d\n",
			       mnt->hdr.len);
			printf("\tmnt_id: %llx\n", mnt->mnt_id);
			find_mount(mnt, mnt_ns_fd);
			break;

		default:
			printf("\tUnknown info type=%d len=%d:\n",
			       info->info_type, info->len);
		}
	}
}

static void handle_notifications(char *buffer, int len, int mnt_ns_fd)
{
	struct fanotify_event_metadata *event =
		(struct fanotify_event_metadata *) buffer;

	for (; FAN_EVENT_OK(event, len); event = FAN_EVENT_NEXT(event, len)) {

		switch (event->mask) {
		case FAN_MNT_ATTACH:
		case FAN_MNT_DETACH:
			handle_mount_event(event, mnt_ns_fd);
			break;
		default:
			printf("unexpected FAN MARK: %llx\n",
					(unsigned long long)event->mask);
			break;
		}

		printf("---\n\n");
		fflush(stdout);
	}
}

int main(int argc, char *argv[])
{
	char buffer[BUFSIZ];
	int mnt_ns_fd;
	int fan_fd;
	int ret;

	mnt_ns_fd = open("/proc/self/ns/mnt", O_RDONLY);
	if (mnt_ns_fd < 0) {
		perror("/proc/self/ns/mnt");
		return -1;
	}

	fan_fd = fanotify_init(FAN_REPORT_MNT, O_RDONLY);
	if (fan_fd < 0) {
		perror("fanotify_init");
		return -1;
	}

	ret = fanotify_mark(fan_fd, FAN_MARK_ADD | FAN_MARK_MNTNS,
			FAN_MNT_ATTACH | FAN_MNT_DETACH, mnt_ns_fd, NULL);
	if (ret) {
		perror("fanotify_mark");
		return -1;
	}

	printf("fanotify active\n");
	fflush(stdout);

	while (1) {
		int n = read(fan_fd, buffer, BUFSIZ);

		if (n < 0)
			errx(1, "read");

		handle_notifications(buffer, n, mnt_ns_fd);
	}

	return 0;
}

  parent reply	other threads:[~2026-01-05 17:25 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-12-18  2:02 [PATCHSET V4 2/2] fs: send uevents on mount and unmount Darrick J. Wong
2025-12-18  2:04 ` [PATCH 1/4] fs: send uevents for filesystem mount events Darrick J. Wong
2025-12-18  5:26   ` Christoph Hellwig
2025-12-18 19:49     ` Darrick J. Wong
2025-12-18 23:33   ` [PATCH V4.1 " Darrick J. Wong
2025-12-24 12:47   ` [PATCH " Christian Brauner
2025-12-26 23:58     ` Ian Kent
2026-01-05 17:26       ` Darrick J. Wong
2026-01-05 17:25     ` Darrick J. Wong [this message]
2025-12-18  2:04 ` [PATCH 2/4] xfs: send uevents when major filesystem events happen Darrick J. Wong
2025-12-18  5:27   ` Christoph Hellwig
2025-12-18  2:05 ` [PATCH 3/4] ext4: convert ext4_root to a kset Darrick J. Wong
2025-12-18  5:27   ` Christoph Hellwig
2025-12-18  2:05 ` [PATCH 4/4] ext4: send uevents when major filesystem events happen Darrick J. Wong
2025-12-18  5:27   ` Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260105172547.GA191481@frogsfrogsfrogs \
    --to=djwong@kernel.org \
    --cc=brauner@kernel.org \
    --cc=hch@lst.de \
    --cc=linux-ext4@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-xfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox