Linux Documentation
 help / color / mirror / Atom feed
* [PATCH 12/24] nfsd: add data structures for handling CB_NOTIFY
From: Jeff Layton @ 2026-04-07 13:21 UTC (permalink / raw)
  To: Alexander Viro, Christian Brauner, Jan Kara, Chuck Lever,
	Alexander Aring, Steven Rostedt, Masami Hiramatsu,
	Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, NeilBrown,
	Olga Kornievskaia, Dai Ngo, Tom Talpey, Trond Myklebust,
	Anna Schumaker, Amir Goldstein
  Cc: Calum Mackay, linux-fsdevel, linux-kernel, linux-trace-kernel,
	linux-doc, linux-nfs, Jeff Layton
In-Reply-To: <20260407-dir-deleg-v1-0-aaf68c478abd@kernel.org>

Add the data structures, allocation helpers, and callback operations
needed for directory delegation CB_NOTIFY support:

- struct nfsd_notify_event: carries fsnotify events for CB_NOTIFY
- struct nfsd4_cb_notify: per-delegation state for notification handling
- Union dl_cb_fattr with dl_cb_notify in nfs4_delegation since a
  delegation is either a regular file delegation or a directory
  delegation, never both

Refactor alloc_init_deleg() into a common __alloc_init_deleg() base
with a pluggable sc_free callback, and add alloc_init_dir_deleg() which
allocates the page array and notify4 buffer needed for CB_NOTIFY
encoding.

Add skeleton nfsd4_cb_notify_ops with done/release handlers that will
be filled in when the notification path is wired up.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/nfsd/nfs4state.c | 117 +++++++++++++++++++++++++++++++++++++++++++++-------
 fs/nfsd/state.h     |  46 ++++++++++++++++++++-
 2 files changed, 147 insertions(+), 16 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 4afe7e68fb51..b2b8c454fc0f 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -124,6 +124,7 @@ static void free_session(struct nfsd4_session *);
 static const struct nfsd4_callback_ops nfsd4_cb_recall_ops;
 static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops;
 static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops;
+static const struct nfsd4_callback_ops nfsd4_cb_notify_ops;
 
 static struct workqueue_struct *laundry_wq;
 
@@ -1121,29 +1122,31 @@ static void block_delegations(struct knfsd_fh *fh)
 }
 
 static struct nfs4_delegation *
-alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
-		 struct nfs4_clnt_odstate *odstate, u32 dl_type)
+__alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
+		   struct nfs4_clnt_odstate *odstate, u32 dl_type,
+		   void (*sc_free)(struct nfs4_stid *))
 {
 	struct nfs4_delegation *dp;
 	struct nfs4_stid *stid;
 	long n;
 
-	dprintk("NFSD alloc_init_deleg\n");
+	if (delegation_blocked(&fp->fi_fhandle))
+		return NULL;
+
 	n = atomic_long_inc_return(&num_delegations);
 	if (n < 0 || n > max_delegations)
 		goto out_dec;
-	if (delegation_blocked(&fp->fi_fhandle))
-		goto out_dec;
-	stid = nfs4_alloc_stid(clp, deleg_slab, nfs4_free_deleg);
+
+	stid = nfs4_alloc_stid(clp, deleg_slab, sc_free);
 	if (stid == NULL)
 		goto out_dec;
-	dp = delegstateid(stid);
 
 	/*
 	 * delegation seqid's are never incremented.  The 4.1 special
 	 * meaning of seqid 0 isn't meaningful, really, but let's avoid
-	 * 0 anyway just for consistency and use 1:
+	 * 0 anyway just for consistency and use 1.
 	 */
+	dp = delegstateid(stid);
 	dp->dl_stid.sc_stateid.si_generation = 1;
 	INIT_LIST_HEAD(&dp->dl_perfile);
 	INIT_LIST_HEAD(&dp->dl_perclnt);
@@ -1153,19 +1156,75 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
 	dp->dl_type = dl_type;
 	dp->dl_retries = 1;
 	dp->dl_recalled = false;
-	nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
-		      &nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL);
-	nfsd4_init_cb(&dp->dl_cb_fattr.ncf_getattr, dp->dl_stid.sc_client,
-			&nfsd4_cb_getattr_ops, NFSPROC4_CLNT_CB_GETATTR);
-	dp->dl_cb_fattr.ncf_file_modified = false;
 	get_nfs4_file(fp);
 	dp->dl_stid.sc_file = fp;
+	nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
+		      &nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL);
 	return dp;
 out_dec:
 	atomic_long_dec(&num_delegations);
 	return NULL;
 }
 
+static struct nfs4_delegation *
+alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
+		 struct nfs4_clnt_odstate *odstate, u32 dl_type)
+{
+	struct nfs4_delegation *dp;
+
+	dp = __alloc_init_deleg(clp, fp, odstate, dl_type, nfs4_free_deleg);
+	if (!dp)
+		return NULL;
+
+	nfsd4_init_cb(&dp->dl_cb_fattr.ncf_getattr, dp->dl_stid.sc_client,
+			&nfsd4_cb_getattr_ops, NFSPROC4_CLNT_CB_GETATTR);
+	dp->dl_cb_fattr.ncf_file_modified = false;
+	return dp;
+}
+
+static void nfs4_free_dir_deleg(struct nfs4_stid *stid)
+{
+	struct nfs4_delegation	*dp = delegstateid(stid);
+	struct nfsd4_cb_notify *ncn = &dp->dl_cb_notify;
+	int i;
+
+	for (i = 0; i < ncn->ncn_evt_cnt; ++i)
+		nfsd_notify_event_put(ncn->ncn_evt[i]);
+	release_pages(ncn->ncn_pages, NOTIFY4_PAGE_ARRAY_SIZE);
+	kfree(ncn->ncn_nf);
+	nfs4_free_deleg(stid);
+}
+
+static struct nfs4_delegation *
+alloc_init_dir_deleg(struct nfs4_client *clp, struct nfs4_file *fp)
+{
+	struct nfs4_delegation *dp;
+	struct nfsd4_cb_notify *ncn;
+	int npages;
+
+	dp = __alloc_init_deleg(clp, fp, NULL, NFS4_OPEN_DELEGATE_READ, nfs4_free_dir_deleg);
+	if (!dp)
+		return NULL;
+
+	ncn = &dp->dl_cb_notify;
+
+	npages = alloc_pages_bulk(GFP_KERNEL, NOTIFY4_PAGE_ARRAY_SIZE, ncn->ncn_pages);
+	if (npages != NOTIFY4_PAGE_ARRAY_SIZE) {
+		nfs4_free_dir_deleg(&dp->dl_stid);
+		return NULL;
+	}
+
+	ncn->ncn_nf = kcalloc(NOTIFY4_EVENT_QUEUE_SIZE, sizeof(*ncn->ncn_nf), GFP_KERNEL);
+	if (!ncn->ncn_nf) {
+		nfs4_free_dir_deleg(&dp->dl_stid);
+		return NULL;
+	}
+	spin_lock_init(&ncn->ncn_lock);
+	nfsd4_init_cb(&ncn->ncn_cb, dp->dl_stid.sc_client,
+			&nfsd4_cb_notify_ops, NFSPROC4_CLNT_CB_NOTIFY);
+	return dp;
+}
+
 void
 nfs4_put_stid(struct nfs4_stid *s)
 {
@@ -3381,6 +3440,30 @@ nfsd4_cb_getattr_release(struct nfsd4_callback *cb)
 	nfs4_put_stid(&dp->dl_stid);
 }
 
+static int
+nfsd4_cb_notify_done(struct nfsd4_callback *cb,
+				struct rpc_task *task)
+{
+	switch (task->tk_status) {
+	case -NFS4ERR_DELAY:
+		rpc_delay(task, 2 * HZ);
+		return 0;
+	default:
+		return 1;
+	}
+}
+
+static void
+nfsd4_cb_notify_release(struct nfsd4_callback *cb)
+{
+	struct nfsd4_cb_notify *ncn =
+			container_of(cb, struct nfsd4_cb_notify, ncn_cb);
+	struct nfs4_delegation *dp =
+			container_of(ncn, struct nfs4_delegation, dl_cb_notify);
+
+	nfs4_put_stid(&dp->dl_stid);
+}
+
 static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
 	.done		= nfsd4_cb_recall_any_done,
 	.release	= nfsd4_cb_recall_any_release,
@@ -3393,6 +3476,12 @@ static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops = {
 	.opcode		= OP_CB_GETATTR,
 };
 
+static const struct nfsd4_callback_ops nfsd4_cb_notify_ops = {
+	.done		= nfsd4_cb_notify_done,
+	.release	= nfsd4_cb_notify_release,
+	.opcode		= OP_CB_NOTIFY,
+};
+
 static void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf)
 {
 	struct nfs4_delegation *dp =
@@ -9661,7 +9750,7 @@ nfsd_get_dir_deleg(struct nfsd4_compound_state *cstate,
 
 	/* Try to set up the lease */
 	status = -ENOMEM;
-	dp = alloc_init_deleg(clp, fp, NULL, NFS4_OPEN_DELEGATE_READ);
+	dp = alloc_init_dir_deleg(clp, fp);
 	if (!dp)
 		goto out_delegees;
 	if (cstate->current_fh.fh_export)
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index eb5946b0999e..500e07e47909 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -197,6 +197,44 @@ struct nfs4_cb_fattr {
 #define NOTIFY4_EVENT_QUEUE_SIZE	3
 #define NOTIFY4_PAGE_ARRAY_SIZE		1
 
+struct nfsd_notify_event {
+	refcount_t	ne_ref;		// refcount
+	u32		ne_mask;	// FS_* mask from fsnotify callback
+	struct dentry	*ne_dentry;	// dentry reference to target
+	u32		ne_namelen;	// length of ne_name
+	char		ne_name[];	// name of dentry being changed
+};
+
+static inline struct nfsd_notify_event *nfsd_notify_event_get(struct nfsd_notify_event *ne)
+{
+	refcount_inc(&ne->ne_ref);
+	return ne;
+}
+
+static inline void nfsd_notify_event_put(struct nfsd_notify_event *ne)
+{
+	if (refcount_dec_and_test(&ne->ne_ref)) {
+		dput(ne->ne_dentry);
+		kfree(ne);
+	}
+}
+
+/*
+ * Represents a directory delegation. The callback is for handling CB_NOTIFYs.
+ * As notifications from fsnotify come in, allocate a new event, take the ncn_lock,
+ * and add it to the ncn_evt queue. The CB_NOTIFY prepare handler will take the
+ * lock, clean out the list and process it.
+ */
+struct nfsd4_cb_notify {
+	spinlock_t			ncn_lock;	// protects the evt queue and count
+	int				ncn_evt_cnt;	// count of events in ncn_evt
+	int				ncn_nf_cnt;	// count of valid entries in ncn_nf
+	struct nfsd_notify_event	*ncn_evt[NOTIFY4_EVENT_QUEUE_SIZE]; // list of events
+	struct page			*ncn_pages[NOTIFY4_PAGE_ARRAY_SIZE]; // for encoding
+	struct notify4			*ncn_nf;	// array of notify4's to be sent
+	struct nfsd4_callback		ncn_cb;		// notify4 callback
+};
+
 /*
  * Represents a delegation stateid. The nfs4_client holds references to these
  * and they are put when it is being destroyed or when the delegation is
@@ -233,8 +271,12 @@ struct nfs4_delegation {
 	bool			dl_written;
 	bool			dl_setattr;
 
-	/* for CB_GETATTR */
-	struct nfs4_cb_fattr    dl_cb_fattr;
+	union {
+		/* for CB_GETATTR */
+		struct nfs4_cb_fattr    dl_cb_fattr;
+		/* for CB_NOTIFY */
+		struct nfsd4_cb_notify	dl_cb_notify;
+	};
 
 	/* For delegated timestamps */
 	struct timespec64	dl_atime;

-- 
2.53.0


^ permalink raw reply related

* [PATCH 13/24] nfsd: add notification handlers for dir events
From: Jeff Layton @ 2026-04-07 13:21 UTC (permalink / raw)
  To: Alexander Viro, Christian Brauner, Jan Kara, Chuck Lever,
	Alexander Aring, Steven Rostedt, Masami Hiramatsu,
	Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, NeilBrown,
	Olga Kornievskaia, Dai Ngo, Tom Talpey, Trond Myklebust,
	Anna Schumaker, Amir Goldstein
  Cc: Calum Mackay, linux-fsdevel, linux-kernel, linux-trace-kernel,
	linux-doc, linux-nfs, Jeff Layton
In-Reply-To: <20260407-dir-deleg-v1-0-aaf68c478abd@kernel.org>

Add the necessary parts to accept a fsnotify callback for directory
change event and create a CB_NOTIFY request for it. When a dir nfsd_file
is created set a handle_event callback to handle the notification.

Use that to allocate a nfsd_notify_event object and then hand off a
reference to each delegation's CB_NOTIFY. If anything fails along the
way, recall any affected delegations.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/nfsd/filecache.c    |  57 ++++++++----
 fs/nfsd/nfs4callback.c |  19 +++-
 fs/nfsd/nfs4state.c    | 248 ++++++++++++++++++++++++++++++++++++++++++++-----
 fs/nfsd/nfs4xdr.c      |  96 +++++++++++++++++++
 fs/nfsd/state.h        |   2 +
 fs/nfsd/xdr4.h         |   3 +
 6 files changed, 383 insertions(+), 42 deletions(-)

diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index 24511c3208db..56889fca6dca 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -72,6 +72,7 @@ static struct kmem_cache		*nfsd_file_mark_slab;
 static struct list_lru			nfsd_file_lru;
 static unsigned long			nfsd_file_flags;
 static struct fsnotify_group		*nfsd_file_fsnotify_group;
+static struct fsnotify_group		*nfsd_dir_fsnotify_group;
 static struct delayed_work		nfsd_filecache_laundrette;
 static struct rhltable			nfsd_file_rhltable
 						____cacheline_aligned_in_smp;
@@ -147,7 +148,7 @@ static void
 nfsd_file_mark_put(struct nfsd_file_mark *nfm)
 {
 	if (refcount_dec_and_test(&nfm->nfm_ref)) {
-		fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group);
+		fsnotify_destroy_mark(&nfm->nfm_mark, nfm->nfm_mark.group);
 		fsnotify_put_mark(&nfm->nfm_mark);
 	}
 }
@@ -155,35 +156,37 @@ nfsd_file_mark_put(struct nfsd_file_mark *nfm)
 static struct nfsd_file_mark *
 nfsd_file_mark_find_or_create(struct inode *inode)
 {
-	int			err;
-	struct fsnotify_mark	*mark;
 	struct nfsd_file_mark	*nfm = NULL, *new;
+	struct fsnotify_group	*group;
+	struct fsnotify_mark	*mark;
+	int			err;
+
+	group = S_ISDIR(inode->i_mode) ? nfsd_dir_fsnotify_group : nfsd_file_fsnotify_group;
 
 	do {
-		fsnotify_group_lock(nfsd_file_fsnotify_group);
-		mark = fsnotify_find_inode_mark(inode,
-						nfsd_file_fsnotify_group);
+		fsnotify_group_lock(group);
+		mark = fsnotify_find_inode_mark(inode, group);
 		if (mark) {
 			nfm = nfsd_file_mark_get(container_of(mark,
 						 struct nfsd_file_mark,
 						 nfm_mark));
-			fsnotify_group_unlock(nfsd_file_fsnotify_group);
+			fsnotify_group_unlock(group);
 			if (nfm) {
 				fsnotify_put_mark(mark);
 				break;
 			}
 			/* Avoid soft lockup race with nfsd_file_mark_put() */
-			fsnotify_destroy_mark(mark, nfsd_file_fsnotify_group);
+			fsnotify_destroy_mark(mark, group);
 			fsnotify_put_mark(mark);
 		} else {
-			fsnotify_group_unlock(nfsd_file_fsnotify_group);
+			fsnotify_group_unlock(group);
 		}
 
 		/* allocate a new nfm */
 		new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL);
 		if (!new)
 			return NULL;
-		fsnotify_init_mark(&new->nfm_mark, nfsd_file_fsnotify_group);
+		fsnotify_init_mark(&new->nfm_mark, group);
 		new->nfm_mark.mask = FS_ATTRIB|FS_DELETE_SELF;
 		refcount_set(&new->nfm_ref, 1);
 
@@ -812,12 +815,25 @@ nfsd_file_fsnotify_handle_event(struct fsnotify_mark *mark, u32 mask,
 	return 0;
 }
 
+static int
+nfsd_dir_fsnotify_handle_event(struct fsnotify_group *group, u32 mask,
+			       const void *data, int data_type, struct inode *dir,
+			       const struct qstr *name, u32 cookie,
+			       struct fsnotify_iter_info *iter_info)
+{
+	return nfsd_handle_dir_event(mask, dir, data, data_type, name);
+}
 
 static const struct fsnotify_ops nfsd_file_fsnotify_ops = {
 	.handle_inode_event = nfsd_file_fsnotify_handle_event,
 	.free_mark = nfsd_file_mark_free,
 };
 
+static const struct fsnotify_ops nfsd_dir_fsnotify_ops = {
+	.handle_event = nfsd_dir_fsnotify_handle_event,
+	.free_mark = nfsd_file_mark_free,
+};
+
 int
 nfsd_file_cache_init(void)
 {
@@ -869,8 +885,7 @@ nfsd_file_cache_init(void)
 		goto out_shrinker;
 	}
 
-	nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops,
-							0);
+	nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops, 0);
 	if (IS_ERR(nfsd_file_fsnotify_group)) {
 		pr_err("nfsd: unable to create fsnotify group: %ld\n",
 			PTR_ERR(nfsd_file_fsnotify_group));
@@ -879,11 +894,23 @@ nfsd_file_cache_init(void)
 		goto out_notifier;
 	}
 
+	nfsd_dir_fsnotify_group = fsnotify_alloc_group(&nfsd_dir_fsnotify_ops, 0);
+	if (IS_ERR(nfsd_dir_fsnotify_group)) {
+		pr_err("nfsd: unable to create fsnotify group: %ld\n",
+			PTR_ERR(nfsd_dir_fsnotify_group));
+		ret = PTR_ERR(nfsd_dir_fsnotify_group);
+		nfsd_dir_fsnotify_group = NULL;
+		goto out_notify_group;
+	}
+
 	INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_gc_worker);
 out:
 	if (ret)
 		clear_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags);
 	return ret;
+out_notify_group:
+	fsnotify_put_group(nfsd_file_fsnotify_group);
+	nfsd_file_fsnotify_group = NULL;
 out_notifier:
 	lease_unregister_notifier(&nfsd_file_lease_notifier);
 out_shrinker:
@@ -1223,10 +1250,8 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct net *net,
 open_file:
 	trace_nfsd_file_alloc(nf);
 
-	if (type == S_IFREG)
-		nf->nf_mark = nfsd_file_mark_find_or_create(inode);
-
-	if (type != S_IFREG || nf->nf_mark) {
+	nf->nf_mark = nfsd_file_mark_find_or_create(inode);
+	if (nf->nf_mark) {
 		if (file) {
 			get_file(file);
 			nf->nf_file = file;
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index ea3e7deb06fa..1964a213f80e 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -870,21 +870,30 @@ static void nfs4_xdr_enc_cb_notify(struct rpc_rqst *req,
 				   const void *data)
 {
 	const struct nfsd4_callback *cb = data;
+	struct nfsd4_cb_notify *ncn = container_of(cb, struct nfsd4_cb_notify, ncn_cb);
+	struct nfs4_delegation *dp = container_of(ncn, struct nfs4_delegation, dl_cb_notify);
 	struct nfs4_cb_compound_hdr hdr = {
 		.ident = 0,
 		.minorversion = cb->cb_clp->cl_minorversion,
 	};
-	struct CB_NOTIFY4args args = { };
+	struct CB_NOTIFY4args args;
+	__be32 *p;
 
 	WARN_ON_ONCE(hdr.minorversion == 0);
 
 	encode_cb_compound4args(xdr, &hdr);
 	encode_cb_sequence4args(xdr, cb, &hdr);
 
-	/*
-	 * FIXME: get stateid and fh from delegation. Inline the cna_changes
-	 * buffer, and zero it.
-	 */
+	p = xdr_reserve_space(xdr, 4);
+	*p = cpu_to_be32(OP_CB_NOTIFY);
+
+	args.cna_stateid.seqid = dp->dl_stid.sc_stateid.si_generation;
+	memcpy(&args.cna_stateid.other, &dp->dl_stid.sc_stateid.si_opaque,
+	       ARRAY_SIZE(args.cna_stateid.other));
+	args.cna_fh.len = dp->dl_stid.sc_file->fi_fhandle.fh_size;
+	args.cna_fh.data = dp->dl_stid.sc_file->fi_fhandle.fh_raw;
+	args.cna_changes.count = ncn->ncn_nf_cnt;
+	args.cna_changes.element = ncn->ncn_nf;
 	WARN_ON_ONCE(!xdrgen_encode_CB_NOTIFY4args(xdr, &args));
 
 	hdr.nops++;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index b2b8c454fc0f..339c3d0bb575 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -55,6 +55,7 @@
 #include "netns.h"
 #include "pnfs.h"
 #include "filecache.h"
+#include "nfs4xdr_gen.h"
 #include "trace.h"
 
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
@@ -3440,15 +3441,125 @@ nfsd4_cb_getattr_release(struct nfsd4_callback *cb)
 	nfs4_put_stid(&dp->dl_stid);
 }
 
+static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
+{
+	bool queued;
+
+	if (test_and_set_bit(NFSD4_CALLBACK_RUNNING, &dp->dl_recall.cb_flags))
+		return;
+
+	/*
+	 * We're assuming the state code never drops its reference
+	 * without first removing the lease.  Since we're in this lease
+	 * callback (and since the lease code is serialized by the
+	 * flc_lock) we know the server hasn't removed the lease yet, and
+	 * we know it's safe to take a reference.
+	 */
+	refcount_inc(&dp->dl_stid.sc_count);
+	queued = nfsd4_run_cb(&dp->dl_recall);
+	WARN_ON_ONCE(!queued);
+	if (!queued)
+		refcount_dec(&dp->dl_stid.sc_count);
+}
+
+static bool
+nfsd4_cb_notify_prepare(struct nfsd4_callback *cb)
+{
+	struct nfsd4_cb_notify *ncn = container_of(cb, struct nfsd4_cb_notify, ncn_cb);
+	struct nfs4_delegation *dp = container_of(ncn, struct nfs4_delegation, dl_cb_notify);
+	struct nfsd_notify_event *events[NOTIFY4_EVENT_QUEUE_SIZE];
+	struct xdr_buf xdr = { .buflen = PAGE_SIZE * NOTIFY4_PAGE_ARRAY_SIZE,
+			       .pages  = ncn->ncn_pages };
+	struct xdr_stream stream;
+	struct nfsd_file *nf;
+	int count, i;
+	bool error = false;
+
+	xdr_init_encode_pages(&stream, &xdr);
+
+	spin_lock(&ncn->ncn_lock);
+	count = ncn->ncn_evt_cnt;
+
+	/* spurious queueing? */
+	if (count == 0) {
+		spin_unlock(&ncn->ncn_lock);
+		return false;
+	}
+
+	/* we can't keep up! */
+	if (count > NOTIFY4_EVENT_QUEUE_SIZE) {
+		spin_unlock(&ncn->ncn_lock);
+		goto out_recall;
+	}
+
+	memcpy(events, ncn->ncn_evt, sizeof(*events) * count);
+	ncn->ncn_evt_cnt = 0;
+	spin_unlock(&ncn->ncn_lock);
+
+	rcu_read_lock();
+	nf = nfsd_file_get(rcu_dereference(dp->dl_stid.sc_file->fi_deleg_file));
+	rcu_read_unlock();
+	if (!nf) {
+		for (i = 0; i < count; ++i)
+			nfsd_notify_event_put(events[i]);
+		goto out_recall;
+	}
+
+	for (i = 0; i < count; ++i) {
+		struct nfsd_notify_event *nne = events[i];
+
+		if (!error) {
+			u32 *maskp = (u32 *)xdr_reserve_space(&stream, sizeof(*maskp));
+			u8 *p;
+
+			if (!maskp) {
+				error = true;
+				goto put_event;
+			}
+
+			p = nfsd4_encode_notify_event(&stream, nne, dp, nf, maskp);
+			if (!p) {
+				pr_notice("Count not generate CB_NOTIFY from fsnotify mask 0x%x\n",
+					  nne->ne_mask);
+				error = true;
+				goto put_event;
+			}
+
+			ncn->ncn_nf[i].notify_mask.count = 1;
+			ncn->ncn_nf[i].notify_mask.element = maskp;
+			ncn->ncn_nf[i].notify_vals.data = p;
+			ncn->ncn_nf[i].notify_vals.len = (u8 *)stream.p - p;
+		}
+put_event:
+		nfsd_notify_event_put(nne);
+	}
+	if (!error) {
+		ncn->ncn_nf_cnt = count;
+		nfsd_file_put(nf);
+		return true;
+	}
+	nfsd_file_put(nf);
+out_recall:
+	nfsd_break_one_deleg(dp);
+	return false;
+}
+
 static int
 nfsd4_cb_notify_done(struct nfsd4_callback *cb,
 				struct rpc_task *task)
 {
+	struct nfsd4_cb_notify *ncn = container_of(cb, struct nfsd4_cb_notify, ncn_cb);
+	struct nfs4_delegation *dp = container_of(ncn, struct nfs4_delegation, dl_cb_notify);
+
 	switch (task->tk_status) {
 	case -NFS4ERR_DELAY:
 		rpc_delay(task, 2 * HZ);
 		return 0;
 	default:
+		/* For any other hard error, recall the deleg */
+		nfsd_break_one_deleg(dp);
+		fallthrough;
+	case 0:
 		return 1;
 	}
 }
@@ -3477,6 +3588,7 @@ static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops = {
 };
 
 static const struct nfsd4_callback_ops nfsd4_cb_notify_ops = {
+	.prepare	= nfsd4_cb_notify_prepare,
 	.done		= nfsd4_cb_notify_done,
 	.release	= nfsd4_cb_notify_release,
 	.opcode		= OP_CB_NOTIFY,
@@ -5702,27 +5814,6 @@ static const struct nfsd4_callback_ops nfsd4_cb_recall_ops = {
 	.opcode		= OP_CB_RECALL,
 };
 
-static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
-{
-	bool queued;
-
-	if (test_and_set_bit(NFSD4_CALLBACK_RUNNING, &dp->dl_recall.cb_flags))
-		return;
-
-	/*
-	 * We're assuming the state code never drops its reference
-	 * without first removing the lease.  Since we're in this lease
-	 * callback (and since the lease code is serialized by the
-	 * flc_lock) we know the server hasn't removed the lease yet, and
-	 * we know it's safe to take a reference.
-	 */
-	refcount_inc(&dp->dl_stid.sc_count);
-	queued = nfsd4_run_cb(&dp->dl_recall);
-	WARN_ON_ONCE(!queued);
-	if (!queued)
-		refcount_dec(&dp->dl_stid.sc_count);
-}
-
 /* Called from break_lease() with flc_lock held. */
 static bool
 nfsd_break_deleg_cb(struct file_lease *fl)
@@ -9796,3 +9887,118 @@ nfsd_get_dir_deleg(struct nfsd4_compound_state *cstate,
 	put_nfs4_file(fp);
 	return ERR_PTR(status);
 }
+
+static void
+nfsd4_run_cb_notify(struct nfsd4_cb_notify *ncn)
+{
+	struct nfs4_delegation *dp = container_of(ncn, struct nfs4_delegation, dl_cb_notify);
+
+	if (test_and_set_bit(NFSD4_CALLBACK_RUNNING, &ncn->ncn_cb.cb_flags))
+		return;
+
+	if (!refcount_inc_not_zero(&dp->dl_stid.sc_count))
+		clear_bit(NFSD4_CALLBACK_RUNNING, &ncn->ncn_cb.cb_flags);
+	else
+		nfsd4_run_cb(&ncn->ncn_cb);
+}
+
+static struct nfsd_notify_event *
+alloc_nfsd_notify_event(u32 mask, const struct qstr *q, struct dentry *dentry)
+{
+	struct nfsd_notify_event *ne;
+
+	ne = kmalloc(sizeof(*ne) + q->len + 1, GFP_KERNEL);
+	if (!ne)
+		return NULL;
+
+	memcpy(&ne->ne_name, q->name, q->len);
+	refcount_set(&ne->ne_ref, 1);
+	ne->ne_mask = mask;
+	ne->ne_name[q->len] = '\0';
+	ne->ne_namelen = q->len;
+	ne->ne_dentry = dget(dentry);
+	return ne;
+}
+
+static bool
+should_notify_deleg(u32 mask, struct file_lease *fl)
+{
+	/* Only nfsd leases */
+	if (fl->fl_lmops != &nfsd_lease_mng_ops)
+		return false;
+
+	/* Skip if this event wasn't ignored by the lease */
+	if ((mask & FS_DELETE) && !(fl->c.flc_flags & FL_IGN_DIR_DELETE))
+		return false;
+	if ((mask & FS_CREATE) && !(fl->c.flc_flags & FL_IGN_DIR_CREATE))
+		return false;
+	if ((mask & FS_RENAME) && !(fl->c.flc_flags & FL_IGN_DIR_RENAME))
+		return false;
+
+	return true;
+}
+
+static void
+nfsd_recall_all_dir_delegs(const struct inode *dir)
+{
+	struct file_lock_context *ctx = locks_inode_context(dir);
+	struct file_lock_core *flc;
+
+	spin_lock(&ctx->flc_lock);
+	list_for_each_entry(flc, &ctx->flc_lease, flc_list) {
+		struct file_lease *fl = container_of(flc, struct file_lease, c);
+
+		if (fl->fl_lmops == &nfsd_lease_mng_ops)
+			nfsd_break_deleg_cb(fl);
+	}
+	spin_unlock(&ctx->flc_lock);
+}
+
+int
+nfsd_handle_dir_event(u32 mask, const struct inode *dir, const void *data,
+		      int data_type, const struct qstr *name)
+{
+	struct dentry *dentry = fsnotify_data_dentry(data, data_type);
+	struct file_lock_context *ctx;
+	struct file_lock_core *flc;
+	struct nfsd_notify_event *evt;
+
+	/* Don't do anything if this is not an expected event */
+	if (!(mask & (FS_CREATE|FS_DELETE|FS_RENAME)))
+		return 0;
+
+	ctx = locks_inode_context(dir);
+	if (!ctx || list_empty(&ctx->flc_lease))
+		return 0;
+
+	evt = alloc_nfsd_notify_event(mask, name, dentry);
+	if (!evt) {
+		nfsd_recall_all_dir_delegs(dir);
+		return 0;
+	}
+
+	spin_lock(&ctx->flc_lock);
+	list_for_each_entry(flc, &ctx->flc_lease, flc_list) {
+		struct file_lease *fl = container_of(flc, struct file_lease, c);
+		struct nfs4_delegation *dp = flc->flc_owner;
+		struct nfsd4_cb_notify *ncn = &dp->dl_cb_notify;
+
+		if (!should_notify_deleg(mask, fl))
+			continue;
+
+		spin_lock(&ncn->ncn_lock);
+		if (ncn->ncn_evt_cnt >= NOTIFY4_EVENT_QUEUE_SIZE) {
+			/* We're generating notifications too fast. Recall. */
+			spin_unlock(&ncn->ncn_lock);
+			nfsd_break_deleg_cb(fl);
+			continue;
+		}
+		ncn->ncn_evt[ncn->ncn_evt_cnt++] = nfsd_notify_event_get(evt);
+		spin_unlock(&ncn->ncn_lock);
+
+		nfsd4_run_cb_notify(ncn);
+	}
+	spin_unlock(&ctx->flc_lock);
+	nfsd_notify_event_put(evt);
+	return 0;
+}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 2a0946c630e1..9ad13f96c219 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -4098,6 +4098,102 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
 	goto out;
 }
 
+static bool
+nfsd4_setup_notify_entry4(struct notify_entry4 *ne, struct xdr_stream *xdr,
+			  struct dentry *dentry, struct nfs4_delegation *dp,
+			  struct nfsd_file *nf, char *name, u32 namelen)
+{
+	uint32_t *attrmask;
+
+	/* Reserve space for attrmask */
+	attrmask = xdr_reserve_space(xdr, 3 * sizeof(uint32_t));
+	if (!attrmask)
+		return false;
+
+	ne->ne_file.data = name;
+	ne->ne_file.len = namelen;
+	ne->ne_attrs.attrmask.element = attrmask;
+
+	attrmask[0] = 0;
+	attrmask[1] = 0;
+	attrmask[2] = 0;
+	ne->ne_attrs.attr_vals.data = NULL;
+	ne->ne_attrs.attr_vals.len = 0;
+	ne->ne_attrs.attrmask.count = 1;
+	return true;
+}
+
+/**
+ * nfsd4_encode_notify_event - encode a notify
+ * @xdr: stream to which to encode the fattr4
+ * @nne: nfsd_notify_event to encode
+ * @dp: delegation where the event occurred
+ * @notify_mask: pointer to word where notification mask should be set
+ *
+ * Encode @nne into @xdr. Returns a pointer to the start of the event, or NULL if
+ * the event couldn't be encoded. The appropriate bit in the notify_mask will also
+ * be set on success.
+ */
+u8 *nfsd4_encode_notify_event(struct xdr_stream *xdr, struct nfsd_notify_event *nne,
+			      struct nfs4_delegation *dp, struct nfsd_file *nf,
+			      u32 *notify_mask)
+{
+	u8 *p = NULL;
+
+	*notify_mask = 0;
+
+	if (nne->ne_mask & FS_DELETE) {
+		struct notify_remove4 nr = { };
+
+		if (!nfsd4_setup_notify_entry4(&nr.nrm_old_entry, xdr, nne->ne_dentry, dp,
+					       nf, nne->ne_name, nne->ne_namelen))
+			goto out_err;
+		p = (u8 *)xdr->p;
+		if (!xdrgen_encode_notify_remove4(xdr, &nr))
+			goto out_err;
+		*notify_mask |= BIT(NOTIFY4_REMOVE_ENTRY);
+	} else if (nne->ne_mask & FS_CREATE) {
+		struct notify_add4 na = { };
+
+		if (!nfsd4_setup_notify_entry4(&na.nad_new_entry, xdr, nne->ne_dentry, dp,
+					       nf, nne->ne_name, nne->ne_namelen))
+			goto out_err;
+
+		p = (u8 *)xdr->p;
+		if (!xdrgen_encode_notify_add4(xdr, &na))
+			goto out_err;
+
+		*notify_mask |= BIT(NOTIFY4_ADD_ENTRY);
+	} else if (nne->ne_mask & FS_RENAME) {
+		struct notify_rename4 nr = { };
+		struct name_snapshot n;
+		bool ret;
+
+		/* Don't send any attributes in the old_entry since they're the same in new */
+		if (!nfsd4_setup_notify_entry4(&nr.nrn_old_entry.nrm_old_entry, xdr,
+					       NULL, dp, nf, nne->ne_name,
+					       nne->ne_namelen))
+			goto out_err;
+
+		take_dentry_name_snapshot(&n, nne->ne_dentry);
+		ret = nfsd4_setup_notify_entry4(&nr.nrn_new_entry.nad_new_entry, xdr,
+					       nne->ne_dentry, dp, nf, (char *)n.name.name,
+					       n.name.len);
+		if (ret) {
+			p = (u8 *)xdr->p;
+			ret = xdrgen_encode_notify_rename4(xdr, &nr);
+		}
+		release_dentry_name_snapshot(&n);
+		if (!ret)
+			goto out_err;
+		*notify_mask |= BIT(NOTIFY4_RENAME_ENTRY);
+	}
+	return p;
+out_err:
+	pr_warn("nfsd: unable to marshal notify_rename4 to xdr stream\n");
+	return NULL;
+}
+
 static void svcxdr_init_encode_from_buffer(struct xdr_stream *xdr,
 				struct xdr_buf *buf, __be32 *p, int bytes)
 {
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 500e07e47909..dbeacbb7a5c8 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -897,6 +897,8 @@ bool nfsd4_has_active_async_copies(struct nfs4_client *clp);
 extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(struct xdr_netobj name,
 				struct xdr_netobj princhash, struct nfsd_net *nn);
 extern bool nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn);
+int nfsd_handle_dir_event(u32 mask, const struct inode *dir, const void *data,
+			  int data_type, const struct qstr *name);
 
 void put_nfs4_file(struct nfs4_file *fi);
 extern void nfs4_put_cpntf_state(struct nfsd_net *nn,
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 417e9ad9fbb3..d276840aca50 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -955,6 +955,9 @@ __be32 nfsd4_encode_fattr_to_buf(__be32 **p, int words,
 		struct svc_fh *fhp, struct svc_export *exp,
 		struct dentry *dentry,
 		u32 *bmval, struct svc_rqst *, int ignore_crossmnt);
+u8 *nfsd4_encode_notify_event(struct xdr_stream *xdr, struct nfsd_notify_event *nne,
+			      struct nfs4_delegation *dd, struct nfsd_file *nf,
+			      u32 *notify_mask);
 extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp,
 		struct nfsd4_compound_state *, union nfsd4_op_u *u);
 extern __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp,

-- 
2.53.0


^ permalink raw reply related

* [PATCH 14/24] nfsd: add tracepoint to dir_event handler
From: Jeff Layton @ 2026-04-07 13:21 UTC (permalink / raw)
  To: Alexander Viro, Christian Brauner, Jan Kara, Chuck Lever,
	Alexander Aring, Steven Rostedt, Masami Hiramatsu,
	Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, NeilBrown,
	Olga Kornievskaia, Dai Ngo, Tom Talpey, Trond Myklebust,
	Anna Schumaker, Amir Goldstein
  Cc: Calum Mackay, linux-fsdevel, linux-kernel, linux-trace-kernel,
	linux-doc, linux-nfs, Jeff Layton
In-Reply-To: <20260407-dir-deleg-v1-0-aaf68c478abd@kernel.org>

Add some extra visibility around the fsnotify handlers.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/nfsd/nfs4state.c |  2 ++
 fs/nfsd/trace.h     | 20 ++++++++++++++++++++
 2 files changed, 22 insertions(+)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 339c3d0bb575..f3bf572b0ada 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -9963,6 +9963,8 @@ nfsd_handle_dir_event(u32 mask, const struct inode *dir, const void *data,
 	struct file_lock_core *flc;
 	struct nfsd_notify_event *evt;
 
+	trace_nfsd_file_fsnotify_handle_dir_event(mask, dir, name);
+
 	/* Don't do anything if this is not an expected event */
 	if (!(mask & (FS_CREATE|FS_DELETE|FS_RENAME)))
 		return 0;
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 60cacf64181c..3302cb926254 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -1377,6 +1377,26 @@ TRACE_EVENT(nfsd_file_fsnotify_handle_event,
 			__entry->nlink, __entry->mode, __entry->mask)
 );
 
+TRACE_EVENT(nfsd_file_fsnotify_handle_dir_event,
+	TP_PROTO(u32 mask, const struct inode *dir, const struct qstr *name),
+	TP_ARGS(mask, dir, name),
+	TP_STRUCT__entry(
+		__field(u32, mask)
+		__field(dev_t, s_dev)
+		__field(ino_t, i_ino)
+		__string_len(name, name->name, name->len)
+	),
+	TP_fast_assign(
+		__entry->mask = mask;
+		__entry->s_dev = dir->i_sb->s_dev;
+		__entry->i_ino = dir->i_ino;
+		__assign_str(name);
+	),
+	TP_printk("inode=0x%x:0x%x:0x%lx mask=0x%x name=%s",
+			MAJOR(__entry->s_dev), MINOR(__entry->s_dev),
+			__entry->i_ino, __entry->mask, __get_str(name))
+);
+
 DECLARE_EVENT_CLASS(nfsd_file_gc_class,
 	TP_PROTO(
 		const struct nfsd_file *nf

-- 
2.53.0


^ permalink raw reply related

* [PATCH 15/24] nfsd: apply the notify mask to the delegation when requested
From: Jeff Layton @ 2026-04-07 13:21 UTC (permalink / raw)
  To: Alexander Viro, Christian Brauner, Jan Kara, Chuck Lever,
	Alexander Aring, Steven Rostedt, Masami Hiramatsu,
	Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, NeilBrown,
	Olga Kornievskaia, Dai Ngo, Tom Talpey, Trond Myklebust,
	Anna Schumaker, Amir Goldstein
  Cc: Calum Mackay, linux-fsdevel, linux-kernel, linux-trace-kernel,
	linux-doc, linux-nfs, Jeff Layton
In-Reply-To: <20260407-dir-deleg-v1-0-aaf68c478abd@kernel.org>

If the client requests a directory delegation with notifications
enabled, set the appropriate return mask in gddr_notification[0]. This
will ensure the lease acquisition sets the appropriate ignore mask.

If the client doesn't set NOTIFY4_GFLAG_EXTEND, then don't offer any
notifications, as nfsd won't provide directory offset information, and
"classic" notifications require them.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/nfsd/nfs4proc.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 2797da8cc950..01e3bf9e1839 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -2506,12 +2506,18 @@ nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	return status == nfserr_same ? nfs_ok : status;
 }
 
+#define SUPPORTED_NOTIFY_MASK	(BIT(NOTIFY4_REMOVE_ENTRY) |	\
+				 BIT(NOTIFY4_ADD_ENTRY) |	\
+				 BIT(NOTIFY4_RENAME_ENTRY) |	\
+				 BIT(NOTIFY4_GFLAG_EXTEND))
+
 static __be32
 nfsd4_get_dir_delegation(struct svc_rqst *rqstp,
 			 struct nfsd4_compound_state *cstate,
 			 union nfsd4_op_u *u)
 {
 	struct nfsd4_get_dir_delegation *gdd = &u->get_dir_delegation;
+	u32 requested = gdd->gdda_notification_types[0];
 	struct nfs4_delegation *dd;
 	struct nfsd_file *nf;
 	__be32 status;
@@ -2520,6 +2526,12 @@ nfsd4_get_dir_delegation(struct svc_rqst *rqstp,
 	if (status != nfs_ok)
 		return status;
 
+	/* No notifications if you don't set NOTIFY4_GFLAG_EXTEND! */
+	if (!(requested & BIT(NOTIFY4_GFLAG_EXTEND)))
+		requested = 0;
+
+	gdd->gddr_notification[0] = requested & SUPPORTED_NOTIFY_MASK;
+
 	/*
 	 * RFC 8881, section 18.39.3 says:
 	 *

-- 
2.53.0


^ permalink raw reply related

* [PATCH 16/24] nfsd: add helper to marshal a fattr4 from completed args
From: Jeff Layton @ 2026-04-07 13:21 UTC (permalink / raw)
  To: Alexander Viro, Christian Brauner, Jan Kara, Chuck Lever,
	Alexander Aring, Steven Rostedt, Masami Hiramatsu,
	Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, NeilBrown,
	Olga Kornievskaia, Dai Ngo, Tom Talpey, Trond Myklebust,
	Anna Schumaker, Amir Goldstein
  Cc: Calum Mackay, linux-fsdevel, linux-kernel, linux-trace-kernel,
	linux-doc, linux-nfs, Jeff Layton
In-Reply-To: <20260407-dir-deleg-v1-0-aaf68c478abd@kernel.org>

Break the loop that encodes the actual attr_vals field into a separate
function.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/nfsd/nfs4xdr.c | 46 ++++++++++++++++++++++++++--------------------
 1 file changed, 26 insertions(+), 20 deletions(-)

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 9ad13f96c219..2ba3fcadb742 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3852,6 +3852,22 @@ static const nfsd4_enc_attr nfsd4_enc_fattr4_encode_ops[] = {
 #endif
 };
 
+static __be32
+nfsd4_encode_attr_vals(struct xdr_stream *xdr, u32 *attrmask, struct nfsd4_fattr_args *args)
+{
+	DECLARE_BITMAP(attr_bitmap, ARRAY_SIZE(nfsd4_enc_fattr4_encode_ops));
+	unsigned long bit;
+	__be32 status;
+
+	bitmap_from_arr32(attr_bitmap, attrmask, ARRAY_SIZE(nfsd4_enc_fattr4_encode_ops));
+	for_each_set_bit(bit, attr_bitmap, ARRAY_SIZE(nfsd4_enc_fattr4_encode_ops)) {
+		status = nfsd4_enc_fattr4_encode_ops[bit](xdr, args);
+		if (status != nfs_ok)
+			return status;
+	}
+	return nfs_ok;
+}
+
 /*
  * Note: @fhp can be NULL; in this case, we might have to compose the filehandle
  * ourselves.
@@ -3862,7 +3878,6 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
 		    struct dentry *dentry, const u32 *bmval,
 		    int ignore_crossmnt)
 {
-	DECLARE_BITMAP(attr_bitmap, ARRAY_SIZE(nfsd4_enc_fattr4_encode_ops));
 	struct nfs4_delegation *dp = NULL;
 	struct nfsd4_fattr_args args;
 	struct svc_fh *tempfh = NULL;
@@ -3877,7 +3892,6 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
 		.mnt	= exp->ex_path.mnt,
 		.dentry	= dentry,
 	};
-	unsigned long bit;
 
 	WARN_ON_ONCE(bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1);
 	WARN_ON_ONCE(!nfsd_attrs_supported(minorversion, bmval));
@@ -4050,27 +4064,22 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
 #endif /* CONFIG_NFSD_V4_POSIX_ACLS */
 
 	/* attrmask */
-	status = nfsd4_encode_bitmap4(xdr, attrmask[0], attrmask[1],
-				      attrmask[2]);
+	status = nfsd4_encode_bitmap4(xdr, attrmask[0], attrmask[1], attrmask[2]);
 	if (status)
-		goto out;
+		return status;
 
 	/* attr_vals */
 	attrlen_offset = xdr->buf->len;
-	if (unlikely(!xdr_reserve_space(xdr, XDR_UNIT)))
-		goto out_resource;
-	bitmap_from_arr32(attr_bitmap, attrmask,
-			  ARRAY_SIZE(nfsd4_enc_fattr4_encode_ops));
-	for_each_set_bit(bit, attr_bitmap,
-			 ARRAY_SIZE(nfsd4_enc_fattr4_encode_ops)) {
-		status = nfsd4_enc_fattr4_encode_ops[bit](xdr, &args);
-		if (status != nfs_ok)
-			goto out;
+	if (unlikely(!xdr_reserve_space(xdr, XDR_UNIT))) {
+		status = nfserr_resource;
+		goto out;
 	}
-	attrlen = cpu_to_be32(xdr->buf->len - attrlen_offset - XDR_UNIT);
-	write_bytes_to_xdr_buf(xdr->buf, attrlen_offset, &attrlen, XDR_UNIT);
-	status = nfs_ok;
 
+	status = nfsd4_encode_attr_vals(xdr, attrmask, &args);
+	if (status == nfs_ok) {
+		attrlen = cpu_to_be32(xdr->buf->len - attrlen_offset - XDR_UNIT);
+		write_bytes_to_xdr_buf(xdr->buf, attrlen_offset, &attrlen, XDR_UNIT);
+	}
 out:
 #ifdef CONFIG_NFSD_V4_POSIX_ACLS
 	if (args.dpacl)
@@ -4093,9 +4102,6 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
 out_nfserr:
 	status = nfserrno(err);
 	goto out;
-out_resource:
-	status = nfserr_resource;
-	goto out;
 }
 
 static bool

-- 
2.53.0


^ permalink raw reply related

* [PATCH 17/24] nfsd: allow nfsd4_encode_fattr4_change() to work with no export
From: Jeff Layton @ 2026-04-07 13:21 UTC (permalink / raw)
  To: Alexander Viro, Christian Brauner, Jan Kara, Chuck Lever,
	Alexander Aring, Steven Rostedt, Masami Hiramatsu,
	Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, NeilBrown,
	Olga Kornievskaia, Dai Ngo, Tom Talpey, Trond Myklebust,
	Anna Schumaker, Amir Goldstein
  Cc: Calum Mackay, linux-fsdevel, linux-kernel, linux-trace-kernel,
	linux-doc, linux-nfs, Jeff Layton
In-Reply-To: <20260407-dir-deleg-v1-0-aaf68c478abd@kernel.org>

In the context of a CB_NOTIFY callback, we may not have easy access to
a svc_export. nfsd will not currently grant a delegation on a the V4 root
however, so this should be safe.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/nfsd/nfs4xdr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 2ba3fcadb742..49ca24851707 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3257,7 +3257,7 @@ static __be32 nfsd4_encode_fattr4_change(struct xdr_stream *xdr,
 {
 	const struct svc_export *exp = args->exp;
 
-	if (unlikely(exp->ex_flags & NFSEXP_V4ROOT)) {
+	if (exp && unlikely(exp->ex_flags & NFSEXP_V4ROOT)) {
 		u32 flush_time = convert_to_wallclock(exp->cd->flush_time);
 
 		if (xdr_stream_encode_u32(xdr, flush_time) != XDR_UNIT)

-- 
2.53.0


^ permalink raw reply related

* [PATCH 18/24] nfsd: send basic file attributes in CB_NOTIFY
From: Jeff Layton @ 2026-04-07 13:21 UTC (permalink / raw)
  To: Alexander Viro, Christian Brauner, Jan Kara, Chuck Lever,
	Alexander Aring, Steven Rostedt, Masami Hiramatsu,
	Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, NeilBrown,
	Olga Kornievskaia, Dai Ngo, Tom Talpey, Trond Myklebust,
	Anna Schumaker, Amir Goldstein
  Cc: Calum Mackay, linux-fsdevel, linux-kernel, linux-trace-kernel,
	linux-doc, linux-nfs, Jeff Layton
In-Reply-To: <20260407-dir-deleg-v1-0-aaf68c478abd@kernel.org>

In addition to the filename, send attributes about the inode in a
CB_NOTIFY event. This patch just adds a the basic inode information that
can be acquired via GETATTR.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/nfsd/nfs4xdr.c | 44 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 49ca24851707..0cdce460f9c8 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -4104,12 +4104,21 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
 	goto out;
 }
 
+#define CB_NOTIFY_STATX_REQUEST_MASK (STATX_BASIC_STATS   | \
+				      STATX_BTIME	  | \
+				      STATX_CHANGE_COOKIE)
+
 static bool
 nfsd4_setup_notify_entry4(struct notify_entry4 *ne, struct xdr_stream *xdr,
 			  struct dentry *dentry, struct nfs4_delegation *dp,
 			  struct nfsd_file *nf, char *name, u32 namelen)
 {
+	struct path path =  { .mnt = nf->nf_file->f_path.mnt,
+			      .dentry = dentry };
+	struct nfsd4_fattr_args args = { };
 	uint32_t *attrmask;
+	__be32 status;
+	int ret;
 
 	/* Reserve space for attrmask */
 	attrmask = xdr_reserve_space(xdr, 3 * sizeof(uint32_t));
@@ -4120,6 +4129,41 @@ nfsd4_setup_notify_entry4(struct notify_entry4 *ne, struct xdr_stream *xdr,
 	ne->ne_file.len = namelen;
 	ne->ne_attrs.attrmask.element = attrmask;
 
+	/* FIXME: d_find_alias for inode ? */
+	if (!path.dentry || !d_inode(path.dentry))
+		goto noattrs;
+
+	/*
+	 * It is possible that the client was granted a delegation when a file
+	 * was created. Note that we don't issue a CB_GETATTR here since stale
+	 * attributes are presumably ok.
+	 */
+	ret = vfs_getattr(&path, &args.stat, CB_NOTIFY_STATX_REQUEST_MASK, AT_STATX_SYNC_AS_STAT);
+	if (ret)
+		goto noattrs;
+
+	args.change_attr = nfsd4_change_attribute(&args.stat);
+
+	attrmask[0] = FATTR4_WORD0_TYPE | FATTR4_WORD0_CHANGE |
+		      FATTR4_WORD0_SIZE | FATTR4_WORD0_FILEID;
+	attrmask[1] = FATTR4_WORD1_MODE | FATTR4_WORD1_NUMLINKS | FATTR4_WORD1_RAWDEV |
+		      FATTR4_WORD1_SPACE_USED | FATTR4_WORD1_TIME_ACCESS |
+		      FATTR4_WORD1_TIME_METADATA | FATTR4_WORD1_TIME_MODIFY;
+	attrmask[2] = 0;
+
+	if (args.stat.result_mask & STATX_BTIME)
+		attrmask[1] |= FATTR4_WORD1_TIME_CREATE;
+
+	ne->ne_attrs.attrmask.count = 2;
+	ne->ne_attrs.attr_vals.data = (u8 *)xdr->p;
+
+	status = nfsd4_encode_attr_vals(xdr, attrmask, &args);
+	if (status != nfs_ok)
+		goto noattrs;
+
+	ne->ne_attrs.attr_vals.len = (u8 *)xdr->p - ne->ne_attrs.attr_vals.data;
+	return true;
+noattrs:
 	attrmask[0] = 0;
 	attrmask[1] = 0;
 	attrmask[2] = 0;

-- 
2.53.0


^ permalink raw reply related

* [PATCH 19/24] nfsd: allow encoding a filehandle into fattr4 without a svc_fh
From: Jeff Layton @ 2026-04-07 13:21 UTC (permalink / raw)
  To: Alexander Viro, Christian Brauner, Jan Kara, Chuck Lever,
	Alexander Aring, Steven Rostedt, Masami Hiramatsu,
	Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, NeilBrown,
	Olga Kornievskaia, Dai Ngo, Tom Talpey, Trond Myklebust,
	Anna Schumaker, Amir Goldstein
  Cc: Calum Mackay, linux-fsdevel, linux-kernel, linux-trace-kernel,
	linux-doc, linux-nfs, Jeff Layton
In-Reply-To: <20260407-dir-deleg-v1-0-aaf68c478abd@kernel.org>

The current fattr4 encoder requires a svc_fh in order to encode the
filehandle. This is not available in a CB_NOTIFY callback. Add a a new
"fhandle" field to struct nfsd4_fattr_args and copy the filehandle into
there from the svc_fh. CB_NOTIFY will populate it via other means.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/nfsd/nfs4xdr.c | 35 ++++++++++++++++++++---------------
 1 file changed, 20 insertions(+), 15 deletions(-)

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 0cdce460f9c8..faf0c3d35dee 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2701,7 +2701,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
 }
 
 static __be32 nfsd4_encode_nfs_fh4(struct xdr_stream *xdr,
-				   struct knfsd_fh *fh_handle)
+				   const struct knfsd_fh *fh_handle)
 {
 	return nfsd4_encode_opaque(xdr, fh_handle->fh_raw, fh_handle->fh_size);
 }
@@ -3144,6 +3144,7 @@ struct nfsd4_fattr_args {
 	struct svc_fh		*fhp;
 	struct svc_export	*exp;
 	struct dentry		*dentry;
+	struct knfsd_fh		fhandle;
 	struct kstat		stat;
 	struct kstatfs		statfs;
 	struct nfs4_acl		*acl;
@@ -3359,7 +3360,7 @@ static __be32 nfsd4_encode_fattr4_acl(struct xdr_stream *xdr,
 static __be32 nfsd4_encode_fattr4_filehandle(struct xdr_stream *xdr,
 					     const struct nfsd4_fattr_args *args)
 {
-	return nfsd4_encode_nfs_fh4(xdr, &args->fhp->fh_handle);
+	return nfsd4_encode_nfs_fh4(xdr, &args->fhandle);
 }
 
 static __be32 nfsd4_encode_fattr4_fileid(struct xdr_stream *xdr,
@@ -3969,19 +3970,23 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
 		if (err)
 			goto out_nfserr;
 	}
-	if ((attrmask[0] & (FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FSID)) &&
-	    !fhp) {
-		tempfh = kmalloc_obj(struct svc_fh);
-		status = nfserr_jukebox;
-		if (!tempfh)
-			goto out;
-		fh_init(tempfh, NFS4_FHSIZE);
-		status = fh_compose(tempfh, exp, dentry, NULL);
-		if (status)
-			goto out;
-		args.fhp = tempfh;
-	} else
-		args.fhp = fhp;
+
+	args.fhp = fhp;
+	if ((attrmask[0] & (FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FSID))) {
+		if (!args.fhp) {
+			tempfh = kmalloc_obj(struct svc_fh);
+			status = nfserr_jukebox;
+			if (!tempfh)
+				goto out;
+			fh_init(tempfh, NFS4_FHSIZE);
+			status = fh_compose(tempfh, exp, dentry, NULL);
+			if (status)
+				goto out;
+			args.fhp = tempfh;
+		}
+		if (args.fhp)
+			fh_copy_shallow(&args.fhandle, &args.fhp->fh_handle);
+	}
 
 	if (attrmask[0] & FATTR4_WORD0_ACL) {
 		err = nfsd4_get_nfs4_acl(rqstp, dentry, &args.acl);

-- 
2.53.0


^ permalink raw reply related

* [PATCH 20/24] nfsd: add a fi_connectable flag to struct nfs4_file
From: Jeff Layton @ 2026-04-07 13:21 UTC (permalink / raw)
  To: Alexander Viro, Christian Brauner, Jan Kara, Chuck Lever,
	Alexander Aring, Steven Rostedt, Masami Hiramatsu,
	Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, NeilBrown,
	Olga Kornievskaia, Dai Ngo, Tom Talpey, Trond Myklebust,
	Anna Schumaker, Amir Goldstein
  Cc: Calum Mackay, linux-fsdevel, linux-kernel, linux-trace-kernel,
	linux-doc, linux-nfs, Jeff Layton
In-Reply-To: <20260407-dir-deleg-v1-0-aaf68c478abd@kernel.org>

When encoding a filehandle for a CB_NOTIFY, there is no svc_export
available, but the server needs to know whether to encode a connectable
filehandle. Add a flag to the nfs4_file that tells whether the
svc_export under which a directory delegation was acquired requires
connectable filehandles.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/nfsd/nfs4state.c | 1 +
 fs/nfsd/state.h     | 1 +
 2 files changed, 2 insertions(+)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index f3bf572b0ada..0580c935d804 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -5153,6 +5153,7 @@ static void nfsd4_file_init(const struct svc_fh *fh, struct nfs4_file *fp)
 	memset(fp->fi_access, 0, sizeof(fp->fi_access));
 	fp->fi_aliased = false;
 	fp->fi_inode = d_inode(fh->fh_dentry);
+	fp->fi_connectable = fh->fh_export->ex_flags & EXPORT_FH_CONNECTABLE;
 #ifdef CONFIG_NFSD_PNFS
 	INIT_LIST_HEAD(&fp->fi_lo_states);
 	atomic_set(&fp->fi_lo_recalls, 0);
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index dbeacbb7a5c8..d060d70c5820 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -745,6 +745,7 @@ struct nfs4_file {
 	int			fi_delegees;
 	struct knfsd_fh		fi_fhandle;
 	bool			fi_had_conflict;
+	bool			fi_connectable;
 #ifdef CONFIG_NFSD_PNFS
 	struct list_head	fi_lo_states;
 	atomic_t		fi_lo_recalls;

-- 
2.53.0


^ permalink raw reply related

* [PATCH 21/24] nfsd: add the filehandle to returned attributes in CB_NOTIFY
From: Jeff Layton @ 2026-04-07 13:21 UTC (permalink / raw)
  To: Alexander Viro, Christian Brauner, Jan Kara, Chuck Lever,
	Alexander Aring, Steven Rostedt, Masami Hiramatsu,
	Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, NeilBrown,
	Olga Kornievskaia, Dai Ngo, Tom Talpey, Trond Myklebust,
	Anna Schumaker, Amir Goldstein
  Cc: Calum Mackay, linux-fsdevel, linux-kernel, linux-trace-kernel,
	linux-doc, linux-nfs, Jeff Layton
In-Reply-To: <20260407-dir-deleg-v1-0-aaf68c478abd@kernel.org>

nfsd's usual fh_compose routine requires a svc_export and fills out a
svc_fh. In the context of a CB_NOTIFY there is no such export to
consult.

Add a new routine that composes a filehandle with only a parent
filehandle and nfs4_file. Use that to fill out the fhandle field in the
nfsd4_fattr_args.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/nfsd/nfs4xdr.c | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index faf0c3d35dee..e468cbc087ad 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -4109,6 +4109,39 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
 	goto out;
 }
 
+static bool
+setup_notify_fhandle(struct dentry *dentry, struct nfs4_file *fi,
+		     struct nfsd_file *nf, struct nfsd4_fattr_args *args)
+{
+	int fileid_type, fsid_len, maxsize, flags = 0;
+	struct knfsd_fh *fhp = &args->fhandle;
+	struct inode *inode = d_inode(dentry);
+	struct inode *parent = NULL;
+	struct fid *fid;
+
+	fsid_len = key_len(fi->fi_fhandle.fh_fsid_type);
+	fhp->fh_size = 4 + fsid_len;
+
+	/* Copy first 4 bytes + fsid */
+	memcpy(&fhp->fh_raw, &fi->fi_fhandle.fh_raw, fhp->fh_size);
+
+	fid = (struct fid *)(fh_fsid(fhp) + fsid_len/4);
+	maxsize = (NFS4_FHSIZE - fhp->fh_size)/4;
+
+	if (fi->fi_connectable && !S_ISDIR(inode->i_mode)) {
+		parent = d_inode(nf->nf_file->f_path.dentry);
+		flags = EXPORT_FH_CONNECTABLE;
+	}
+
+	fileid_type = exportfs_encode_inode_fh(inode, fid, &maxsize, parent, flags);
+	if (fileid_type < 0)
+		return false;
+
+	fhp->fh_fileid_type = fileid_type;
+	fhp->fh_size += maxsize * 4;
+	return true;
+}
+
 #define CB_NOTIFY_STATX_REQUEST_MASK (STATX_BASIC_STATS   | \
 				      STATX_BTIME	  | \
 				      STATX_CHANGE_COOKIE)
@@ -4118,6 +4151,7 @@ nfsd4_setup_notify_entry4(struct notify_entry4 *ne, struct xdr_stream *xdr,
 			  struct dentry *dentry, struct nfs4_delegation *dp,
 			  struct nfsd_file *nf, char *name, u32 namelen)
 {
+	struct nfs4_file *fi = dp->dl_stid.sc_file;
 	struct path path =  { .mnt = nf->nf_file->f_path.mnt,
 			      .dentry = dentry };
 	struct nfsd4_fattr_args args = { };
@@ -4156,6 +4190,9 @@ nfsd4_setup_notify_entry4(struct notify_entry4 *ne, struct xdr_stream *xdr,
 		      FATTR4_WORD1_TIME_METADATA | FATTR4_WORD1_TIME_MODIFY;
 	attrmask[2] = 0;
 
+	if (setup_notify_fhandle(dentry, fi, nf, &args))
+		attrmask[0] |= FATTR4_WORD0_FILEHANDLE;
+
 	if (args.stat.result_mask & STATX_BTIME)
 		attrmask[1] |= FATTR4_WORD1_TIME_CREATE;
 

-- 
2.53.0


^ permalink raw reply related

* [PATCH 22/24] nfsd: properly track requested child attributes
From: Jeff Layton @ 2026-04-07 13:21 UTC (permalink / raw)
  To: Alexander Viro, Christian Brauner, Jan Kara, Chuck Lever,
	Alexander Aring, Steven Rostedt, Masami Hiramatsu,
	Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, NeilBrown,
	Olga Kornievskaia, Dai Ngo, Tom Talpey, Trond Myklebust,
	Anna Schumaker, Amir Goldstein
  Cc: Calum Mackay, linux-fsdevel, linux-kernel, linux-trace-kernel,
	linux-doc, linux-nfs, Jeff Layton
In-Reply-To: <20260407-dir-deleg-v1-0-aaf68c478abd@kernel.org>

Track the union of requested and supported child attributes in the
delegation, and only encode the attributes in that union when sending
add/remove/rename updates.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/nfsd/nfs4proc.c  |  2 ++
 fs/nfsd/nfs4state.c | 18 ++++++++++++++++++
 fs/nfsd/nfs4xdr.c   | 15 ++++++---------
 fs/nfsd/state.h     |  3 +++
 4 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 01e3bf9e1839..a807a55dddf9 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -2553,6 +2553,8 @@ nfsd4_get_dir_delegation(struct svc_rqst *rqstp,
 
 	gdd->gddrnf_status = GDD4_OK;
 	memcpy(&gdd->gddr_stateid, &dd->dl_stid.sc_stateid, sizeof(gdd->gddr_stateid));
+	gdd->gddr_child_attributes[0] = dd->dl_child_attrs[0];
+	gdd->gddr_child_attributes[1] = dd->dl_child_attrs[1];
 	nfs4_put_stid(&dd->dl_stid);
 	return nfs_ok;
 }
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 0580c935d804..59a9b1ca836b 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -9780,6 +9780,21 @@ nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct dentry *dentry,
 	return status;
 }
 
+#define GDD_WORD0_CHILD_ATTRS	(FATTR4_WORD0_TYPE |		\
+				 FATTR4_WORD0_CHANGE |		\
+				 FATTR4_WORD0_SIZE |		\
+				 FATTR4_WORD0_FILEID |		\
+				 FATTR4_WORD0_FILEHANDLE)
+
+#define GDD_WORD1_CHILD_ATTRS	(FATTR4_WORD1_MODE |		\
+				 FATTR4_WORD1_NUMLINKS |	\
+				 FATTR4_WORD1_RAWDEV |		\
+				 FATTR4_WORD1_SPACE_USED |	\
+				 FATTR4_WORD1_TIME_ACCESS |	\
+				 FATTR4_WORD1_TIME_METADATA |	\
+				 FATTR4_WORD1_TIME_MODIFY |	\
+				 FATTR4_WORD1_TIME_CREATE)
+
 /**
  * nfsd_get_dir_deleg - attempt to get a directory delegation
  * @cstate: compound state
@@ -9849,6 +9864,9 @@ nfsd_get_dir_deleg(struct nfsd4_compound_state *cstate,
 		dp->dl_stid.sc_export =
 			exp_get(cstate->current_fh.fh_export);
 
+	dp->dl_child_attrs[0] = gdd->gdda_child_attributes[0] & GDD_WORD0_CHILD_ATTRS;
+	dp->dl_child_attrs[1] = gdd->gdda_child_attributes[1] & GDD_WORD1_CHILD_ATTRS;
+
 	fl = nfs4_alloc_init_lease(dp, gdd->gddr_notification[0]);
 	if (!fl)
 		goto out_put_stid;
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index e468cbc087ad..35646809becb 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -4183,18 +4183,15 @@ nfsd4_setup_notify_entry4(struct notify_entry4 *ne, struct xdr_stream *xdr,
 
 	args.change_attr = nfsd4_change_attribute(&args.stat);
 
-	attrmask[0] = FATTR4_WORD0_TYPE | FATTR4_WORD0_CHANGE |
-		      FATTR4_WORD0_SIZE | FATTR4_WORD0_FILEID;
-	attrmask[1] = FATTR4_WORD1_MODE | FATTR4_WORD1_NUMLINKS | FATTR4_WORD1_RAWDEV |
-		      FATTR4_WORD1_SPACE_USED | FATTR4_WORD1_TIME_ACCESS |
-		      FATTR4_WORD1_TIME_METADATA | FATTR4_WORD1_TIME_MODIFY;
+	attrmask[0] = dp->dl_child_attrs[0];
+	attrmask[1] = dp->dl_child_attrs[1];
 	attrmask[2] = 0;
 
-	if (setup_notify_fhandle(dentry, fi, nf, &args))
-		attrmask[0] |= FATTR4_WORD0_FILEHANDLE;
+	if (!setup_notify_fhandle(dentry, fi, nf, &args))
+		attrmask[0] &= ~FATTR4_WORD0_FILEHANDLE;
 
-	if (args.stat.result_mask & STATX_BTIME)
-		attrmask[1] |= FATTR4_WORD1_TIME_CREATE;
+	if (!(args.stat.result_mask & STATX_BTIME))
+		attrmask[1] &= ~FATTR4_WORD1_TIME_CREATE;
 
 	ne->ne_attrs.attrmask.count = 2;
 	ne->ne_attrs.attr_vals.data = (u8 *)xdr->p;
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index d060d70c5820..7ca5ef9caafe 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -282,6 +282,9 @@ struct nfs4_delegation {
 	struct timespec64	dl_atime;
 	struct timespec64	dl_mtime;
 	struct timespec64	dl_ctime;
+
+	/* For dir delegations */
+	uint32_t		dl_child_attrs[2];
 };
 
 static inline bool deleg_is_read(u32 dl_type)

-- 
2.53.0


^ permalink raw reply related

* [PATCH 23/24] nfsd: track requested dir attributes
From: Jeff Layton @ 2026-04-07 13:21 UTC (permalink / raw)
  To: Alexander Viro, Christian Brauner, Jan Kara, Chuck Lever,
	Alexander Aring, Steven Rostedt, Masami Hiramatsu,
	Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, NeilBrown,
	Olga Kornievskaia, Dai Ngo, Tom Talpey, Trond Myklebust,
	Anna Schumaker, Amir Goldstein
  Cc: Calum Mackay, linux-fsdevel, linux-kernel, linux-trace-kernel,
	linux-doc, linux-nfs, Jeff Layton
In-Reply-To: <20260407-dir-deleg-v1-0-aaf68c478abd@kernel.org>

Track the union of requested and supported dir attributes in the
delegation, and only encode the attributes in that union when sending
add/remove/rename updates.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/nfsd/nfs4proc.c  |  7 ++++---
 fs/nfsd/nfs4state.c | 14 +++++++++++++-
 fs/nfsd/state.h     |  2 ++
 3 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index a807a55dddf9..82d7c473e4d3 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -2506,9 +2506,10 @@ nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	return status == nfserr_same ? nfs_ok : status;
 }
 
-#define SUPPORTED_NOTIFY_MASK	(BIT(NOTIFY4_REMOVE_ENTRY) |	\
-				 BIT(NOTIFY4_ADD_ENTRY) |	\
-				 BIT(NOTIFY4_RENAME_ENTRY) |	\
+#define SUPPORTED_NOTIFY_MASK	(BIT(NOTIFY4_CHANGE_DIR_ATTRS) |	\
+				 BIT(NOTIFY4_REMOVE_ENTRY) |		\
+				 BIT(NOTIFY4_ADD_ENTRY) |		\
+				 BIT(NOTIFY4_RENAME_ENTRY) |		\
 				 BIT(NOTIFY4_GFLAG_EXTEND))
 
 static __be32
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 59a9b1ca836b..c4b6f4d65a47 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -9795,6 +9795,15 @@ nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct dentry *dentry,
 				 FATTR4_WORD1_TIME_MODIFY |	\
 				 FATTR4_WORD1_TIME_CREATE)
 
+#define GDD_WORD0_DIR_ATTRS	(FATTR4_WORD0_CHANGE |		\
+				 FATTR4_WORD0_SIZE)
+
+#define GDD_WORD1_DIR_ATTRS	(FATTR4_WORD1_NUMLINKS |	\
+				 FATTR4_WORD1_SPACE_USED |	\
+				 FATTR4_WORD1_TIME_ACCESS |	\
+				 FATTR4_WORD1_TIME_METADATA |	\
+				 FATTR4_WORD1_TIME_MODIFY)
+
 /**
  * nfsd_get_dir_deleg - attempt to get a directory delegation
  * @cstate: compound state
@@ -9864,10 +9873,13 @@ nfsd_get_dir_deleg(struct nfsd4_compound_state *cstate,
 		dp->dl_stid.sc_export =
 			exp_get(cstate->current_fh.fh_export);
 
+	dp->dl_notify_mask = gdd->gddr_notification[0];
 	dp->dl_child_attrs[0] = gdd->gdda_child_attributes[0] & GDD_WORD0_CHILD_ATTRS;
 	dp->dl_child_attrs[1] = gdd->gdda_child_attributes[1] & GDD_WORD1_CHILD_ATTRS;
+	dp->dl_dir_attrs[0] = gdd->gdda_dir_attributes[0] & GDD_WORD0_DIR_ATTRS;
+	dp->dl_dir_attrs[1] = gdd->gdda_dir_attributes[1] & GDD_WORD1_DIR_ATTRS;
 
-	fl = nfs4_alloc_init_lease(dp, gdd->gddr_notification[0]);
+	fl = nfs4_alloc_init_lease(dp, dp->dl_notify_mask);
 	if (!fl)
 		goto out_put_stid;
 
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 7ca5ef9caafe..56a3cfb12e65 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -284,7 +284,9 @@ struct nfs4_delegation {
 	struct timespec64	dl_ctime;
 
 	/* For dir delegations */
+	uint32_t		dl_notify_mask;
 	uint32_t		dl_child_attrs[2];
+	uint32_t		dl_dir_attrs[2];
 };
 
 static inline bool deleg_is_read(u32 dl_type)

-- 
2.53.0


^ permalink raw reply related

* [PATCH 24/24] nfsd: add support to CB_NOTIFY for dir attribute changes
From: Jeff Layton @ 2026-04-07 13:21 UTC (permalink / raw)
  To: Alexander Viro, Christian Brauner, Jan Kara, Chuck Lever,
	Alexander Aring, Steven Rostedt, Masami Hiramatsu,
	Mathieu Desnoyers, Jonathan Corbet, Shuah Khan, NeilBrown,
	Olga Kornievskaia, Dai Ngo, Tom Talpey, Trond Myklebust,
	Anna Schumaker, Amir Goldstein
  Cc: Calum Mackay, linux-fsdevel, linux-kernel, linux-trace-kernel,
	linux-doc, linux-nfs, Jeff Layton
In-Reply-To: <20260407-dir-deleg-v1-0-aaf68c478abd@kernel.org>

If the client requested dir attribute change notifications, send those
alongside any set of add/remove/rename events. Note that the server will
still recall the delegation on a SETATTR, so these are only sent for
changes to child dirents.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/nfsd/nfs4state.c | 25 ++++++++++++++++++++--
 fs/nfsd/nfs4xdr.c   | 61 +++++++++++++++++++++++++++++++++++++++++++++--------
 fs/nfsd/xdr4.h      |  2 ++
 3 files changed, 77 insertions(+), 11 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index c4b6f4d65a47..01a2fb11dc0e 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3470,10 +3470,15 @@ nfsd4_cb_notify_prepare(struct nfsd4_callback *cb)
 	struct nfsd_notify_event *events[NOTIFY4_EVENT_QUEUE_SIZE];
 	struct xdr_buf xdr = { .buflen = PAGE_SIZE * NOTIFY4_PAGE_ARRAY_SIZE,
 			       .pages  = ncn->ncn_pages };
+	int limit = NOTIFY4_EVENT_QUEUE_SIZE;
 	struct xdr_stream stream;
 	struct nfsd_file *nf;
-	int count, i;
 	bool error = false;
+	int count, i;
+
+	/* Save a slot for dir attr update if requested */
+	if (dp->dl_notify_mask & BIT(NOTIFY4_CHANGE_DIR_ATTRS))
+		--limit;
 
 	xdr_init_encode_pages(&stream, &xdr);
 
@@ -3487,7 +3492,7 @@ nfsd4_cb_notify_prepare(struct nfsd4_callback *cb)
 	}
 
 	/* we can't keep up! */
-	if (count > NOTIFY4_EVENT_QUEUE_SIZE) {
+	if (count > limit) {
 		spin_unlock(&ncn->ncn_lock);
 		goto out_recall;
 	}
@@ -3534,6 +3539,22 @@ nfsd4_cb_notify_prepare(struct nfsd4_callback *cb)
 		nfsd_notify_event_put(nne);
 	}
 	if (!error) {
+		if (dp->dl_notify_mask & BIT(NOTIFY4_CHANGE_DIR_ATTRS)) {
+			u32 *maskp = (u32 *)xdr_reserve_space(&stream, sizeof(*maskp));
+
+			if (maskp) {
+				u8 *p = nfsd4_encode_dir_attr_change(&stream, dp, nf);
+
+				if (p) {
+					*maskp = BIT(NOTIFY4_CHANGE_DIR_ATTRS);
+					ncn->ncn_nf[count].notify_mask.count = 1;
+					ncn->ncn_nf[count].notify_mask.element = maskp;
+					ncn->ncn_nf[count].notify_vals.data = p;
+					ncn->ncn_nf[count].notify_vals.len = (u8 *)stream.p - p;
+					++count;
+				}
+			}
+		}
 		ncn->ncn_nf_cnt = count;
 		nfsd_file_put(nf);
 		return true;
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 35646809becb..6e76502ca149 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -4152,11 +4152,11 @@ nfsd4_setup_notify_entry4(struct notify_entry4 *ne, struct xdr_stream *xdr,
 			  struct nfsd_file *nf, char *name, u32 namelen)
 {
 	struct nfs4_file *fi = dp->dl_stid.sc_file;
-	struct path path =  { .mnt = nf->nf_file->f_path.mnt,
-			      .dentry = dentry };
+	struct path path = nf->nf_file->f_path;
 	struct nfsd4_fattr_args args = { };
 	uint32_t *attrmask;
 	__be32 status;
+	bool parent;
 	int ret;
 
 	/* Reserve space for attrmask */
@@ -4168,6 +4168,9 @@ nfsd4_setup_notify_entry4(struct notify_entry4 *ne, struct xdr_stream *xdr,
 	ne->ne_file.len = namelen;
 	ne->ne_attrs.attrmask.element = attrmask;
 
+	parent = (dentry == path.dentry);
+	path.dentry = dentry;
+
 	/* FIXME: d_find_alias for inode ? */
 	if (!path.dentry || !d_inode(path.dentry))
 		goto noattrs;
@@ -4183,15 +4186,20 @@ nfsd4_setup_notify_entry4(struct notify_entry4 *ne, struct xdr_stream *xdr,
 
 	args.change_attr = nfsd4_change_attribute(&args.stat);
 
-	attrmask[0] = dp->dl_child_attrs[0];
-	attrmask[1] = dp->dl_child_attrs[1];
-	attrmask[2] = 0;
+	if (parent) {
+		attrmask[0] = dp->dl_dir_attrs[0];
+		attrmask[1] = dp->dl_dir_attrs[1];
+	} else {
+		attrmask[0] = dp->dl_child_attrs[0];
+		attrmask[1] = dp->dl_child_attrs[1];
 
-	if (!setup_notify_fhandle(dentry, fi, nf, &args))
-		attrmask[0] &= ~FATTR4_WORD0_FILEHANDLE;
+		if (!setup_notify_fhandle(dentry, fi, nf, &args))
+			attrmask[0] &= ~FATTR4_WORD0_FILEHANDLE;
 
-	if (!(args.stat.result_mask & STATX_BTIME))
-		attrmask[1] &= ~FATTR4_WORD1_TIME_CREATE;
+		if (!(args.stat.result_mask & STATX_BTIME))
+			attrmask[1] &= ~FATTR4_WORD1_TIME_CREATE;
+	}
+	attrmask[2] = 0;
 
 	ne->ne_attrs.attrmask.count = 2;
 	ne->ne_attrs.attr_vals.data = (u8 *)xdr->p;
@@ -4283,6 +4291,41 @@ u8 *nfsd4_encode_notify_event(struct xdr_stream *xdr, struct nfsd_notify_event *
 	return NULL;
 }
 
+/**
+ * nfsd4_encode_dir_attr_change
+ * @xdr: stream to which to encode the fattr4
+ * @dp: delegation where the event occurred
+ * @nf: nfsd_file opened on the directory
+ *
+ * Encode a dir attr change event.
+ */
+u8 *nfsd4_encode_dir_attr_change(struct xdr_stream *xdr, struct nfs4_delegation *dp,
+				 struct nfsd_file *nf)
+{
+	struct dentry *dentry = nf->nf_file->f_path.dentry;
+	struct notify_attr4 na = { };
+	struct name_snapshot n;
+	bool ret;
+	u8 *p = NULL;
+
+	if (!(dp->dl_notify_mask & BIT(NOTIFY4_CHANGE_DIR_ATTRS)))
+		return NULL;
+
+	take_dentry_name_snapshot(&n, dentry);
+	ret = nfsd4_setup_notify_entry4(&na.na_changed_entry, xdr,
+					dentry, dp, nf, (char *)n.name.name,
+					n.name.len);
+
+	/* Don't bother with the event if we're not encoding attrs */
+	if (ret && na.na_changed_entry.ne_attrs.attr_vals.len) {
+		p = (u8 *)xdr->p;
+		if (!xdrgen_encode_notify_attr4(xdr, &na))
+			p = NULL;
+	}
+	release_dentry_name_snapshot(&n);
+	return p;
+}
+
 static void svcxdr_init_encode_from_buffer(struct xdr_stream *xdr,
 				struct xdr_buf *buf, __be32 *p, int bytes)
 {
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index d276840aca50..cf7f0df68d63 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -958,6 +958,8 @@ __be32 nfsd4_encode_fattr_to_buf(__be32 **p, int words,
 u8 *nfsd4_encode_notify_event(struct xdr_stream *xdr, struct nfsd_notify_event *nne,
 			      struct nfs4_delegation *dd, struct nfsd_file *nf,
 			      u32 *notify_mask);
+u8 *nfsd4_encode_dir_attr_change(struct xdr_stream *xdr, struct nfs4_delegation *dp,
+				 struct nfsd_file *nf);
 extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp,
 		struct nfsd4_compound_state *, union nfsd4_op_u *u);
 extern __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp,

-- 
2.53.0


^ permalink raw reply related

* [PATCH v8 2/2] PCI: s390: Expose the UID as an arch specific PCI slot attribute
From: Niklas Schnelle @ 2026-04-07 13:24 UTC (permalink / raw)
  To: Bjorn Helgaas, Jonathan Corbet, Lukas Wunner, Shuah Khan
  Cc: Farhan Ali, Alexander Gordeev, Christian Borntraeger,
	Gerald Schaefer, Gerd Bayer, Heiko Carstens, Julian Ruess,
	Matthew Rosato, Peter Oberparleiter, Ramesh Errabolu,
	Sven Schnelle, Vasily Gorbik, linux-doc, linux-kernel, linux-pci,
	linux-s390, Niklas Schnelle
In-Reply-To: <20260407-uid_slot-v8-0-15ae4409d2ce@linux.ibm.com>

On s390, an individual PCI function can generally be identified by two
identifiers, the FID and the UID. Which identifier is used depends on
the scope and the platform configuration.

The first identifier, the FID, is always available and identifies a PCI
device uniquely within a machine. The FID may be virtualized by
hypervisors, but on the LPAR level, the machine scope makes it
impossible to create the same configuration based on FIDs on two
different LPARs of the same machine, and difficult to reuse across
machines.

Such matching LPAR configurations are useful, though, allowing
standardized setups and booting a Linux installation on different LPARs.
To this end the UID, or user-defined identifier, was introduced. While
it is only guaranteed to be unique within an LPAR and only if indicated
by firmware, it allows users to replicate PCI device setups.

On s390, which uses a machine hypervisor, a per PCI function hotplug
model is used. The shortcoming with the UID then is, that it is not
visible to the user without first attaching the PCI function and
accessing the "uid" device attribute. The FID, on the other hand, is
used as the slot name and is thus known even with the PCI function in
standby.

Remedy this shortcoming by providing the UID as an attribute on the slot
allowing the user to identify a PCI function based on the UID without
having to first attach it. Do this via a macro mechanism analogous to
what was introduced by commit 265baca69a07 ("s390/pci: Stop usurping
pdev->dev.groups") for the PCI device attributes.

Reviewed-by: Gerd Bayer <gbayer@linux.ibm.com>
Reviewed-by: Julian Ruess <julianr@linux.ibm.com>
Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
---
 Documentation/arch/s390/pci.rst |  7 +++++++
 arch/s390/include/asm/pci.h     |  4 ++++
 arch/s390/pci/pci_sysfs.c       | 20 ++++++++++++++++++++
 drivers/pci/slot.c              | 13 ++++++++++++-
 4 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/Documentation/arch/s390/pci.rst b/Documentation/arch/s390/pci.rst
index c3476de4f03278d07099aa32cbea0f868b6e9c9c..80f4ba19315994da056a10b4d216d61ff22ea5aa 100644
--- a/Documentation/arch/s390/pci.rst
+++ b/Documentation/arch/s390/pci.rst
@@ -58,6 +58,13 @@ Entries specific to zPCI functions and entries that hold zPCI information.
 
   - /sys/bus/pci/slots/XXXXXXXX/power
 
+  In addition to using the FID as the name of the slot, the slot directory
+  also contains the following s390-specific slot attributes.
+
+  - uid:
+    The User-defined identifier (UID) of the function which may be configured
+    by this slot. See also the corresponding attribute of the device.
+
   A physical function that currently supports a virtual function cannot be
   powered off until all virtual functions are removed with:
   echo 0 > /sys/bus/pci/devices/DDDD:BB:dd.f/sriov_numvf
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index c0ff19dab5807c7e1aabb48a0e9436aac45ec97d..5dcf35f0f325f5f44b28109a1c8d9aef18401035 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -208,6 +208,10 @@ extern const struct attribute_group zpci_ident_attr_group;
 			    &pfip_attr_group,		 \
 			    &zpci_ident_attr_group,
 
+extern const struct attribute_group zpci_slot_attr_group;
+
+#define ARCH_PCI_SLOT_GROUPS (&zpci_slot_attr_group)
+
 extern unsigned int s390_pci_force_floating __initdata;
 extern unsigned int s390_pci_no_rid;
 
diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
index c2444a23e26c4218832bb91930b5f0ffd498d28f..d98d97df792adb3c7e415a8d374cc2f3a65fbb52 100644
--- a/arch/s390/pci/pci_sysfs.c
+++ b/arch/s390/pci/pci_sysfs.c
@@ -187,6 +187,17 @@ static ssize_t index_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(index);
 
+static ssize_t zpci_uid_slot_show(struct pci_slot *slot, char *buf)
+{
+	struct zpci_dev *zdev = container_of(slot->hotplug, struct zpci_dev,
+					     hotplug_slot);
+
+	return sysfs_emit(buf, "0x%x\n", zdev->uid);
+}
+
+static struct pci_slot_attribute zpci_slot_attr_uid =
+	__ATTR(uid, 0444, zpci_uid_slot_show, NULL);
+
 static umode_t zpci_index_is_visible(struct kobject *kobj,
 				     struct attribute *attr, int n)
 {
@@ -243,6 +254,15 @@ const struct attribute_group pfip_attr_group = {
 	.attrs = pfip_attrs,
 };
 
+static struct attribute *zpci_slot_attrs[] = {
+	&zpci_slot_attr_uid.attr,
+	NULL,
+};
+
+const struct attribute_group zpci_slot_attr_group = {
+	.attrs = zpci_slot_attrs,
+};
+
 static struct attribute *clp_fw_attrs[] = {
 	&uid_checking_attr.attr,
 	NULL,
diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c
index 787311614e5b6ebb39e7284f9b9f205a0a684d6d..2f8fcfbbec24e73d0bb6e40fd04c05a94f518045 100644
--- a/drivers/pci/slot.c
+++ b/drivers/pci/slot.c
@@ -96,7 +96,18 @@ static struct attribute *pci_slot_default_attrs[] = {
 	&pci_slot_attr_cur_speed.attr,
 	NULL,
 };
-ATTRIBUTE_GROUPS(pci_slot_default);
+
+static const struct attribute_group pci_slot_default_group = {
+	.attrs = pci_slot_default_attrs,
+};
+
+static const struct attribute_group *pci_slot_default_groups[] = {
+	&pci_slot_default_group,
+#ifdef ARCH_PCI_SLOT_GROUPS
+	ARCH_PCI_SLOT_GROUPS,
+#endif
+	NULL,
+};
 
 static const struct kobj_type pci_slot_ktype = {
 	.sysfs_ops = &pci_slot_sysfs_ops,

-- 
2.51.0


^ permalink raw reply related

* [PATCH v8 0/2] PCI: s390: Expose the UID as an arch specific PCI slot attribute
From: Niklas Schnelle @ 2026-04-07 13:24 UTC (permalink / raw)
  To: Bjorn Helgaas, Jonathan Corbet, Lukas Wunner, Shuah Khan
  Cc: Farhan Ali, Alexander Gordeev, Christian Borntraeger,
	Gerald Schaefer, Gerd Bayer, Heiko Carstens, Julian Ruess,
	Matthew Rosato, Peter Oberparleiter, Ramesh Errabolu,
	Sven Schnelle, Vasily Gorbik, linux-doc, linux-kernel, linux-pci,
	linux-s390, Niklas Schnelle, Randy Dunlap

Hi all,

Add a mechanism for architecture specific attributes on
PCI slots in order to add the user-defined ID (UID) as an s390 specific
PCI slot attribute. First though improve some issues with the s390 specific
documentation of PCI sysfs attributes noticed during development. 

Also note, I considered adding the UID as a generic slot index attribute
analogous to the PCI device index attribute (SMBIOS index / s390 UID)
but decided against it as this seems rather s390 specific and having
it named UID makes things easier for users and aligns with the existing
separate uid device attribute.

Thanks,
Niklas

v7->v8:
- Fix wrong uid_is_unique on firmware file, it's actually "uid_checking"
  (Gerd)
- Link to v7: https://lore.kernel.org/r/20260407-uid_slot-v7-0-e50f7976124e@linux.ibm.com

Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
---
---

---

---
Niklas Schnelle (2):
      docs: s390/pci: Improve and update PCI documentation
      PCI: s390: Expose the UID as an arch specific PCI slot attribute

 Documentation/arch/s390/pci.rst | 151 +++++++++++++++++++++++++++-------------
 arch/s390/include/asm/pci.h     |   4 ++
 arch/s390/pci/pci_sysfs.c       |  20 ++++++
 drivers/pci/slot.c              |  13 +++-
 4 files changed, 140 insertions(+), 48 deletions(-)
---
base-commit: 591cd656a1bf5ea94a222af5ef2ee76df029c1d2
change-id: 20250923-uid_slot-e3559cf5ca30

Best regards,
-- 
Niklas Schnelle


^ permalink raw reply

* [PATCH v8 1/2] docs: s390/pci: Improve and update PCI documentation
From: Niklas Schnelle @ 2026-04-07 13:24 UTC (permalink / raw)
  To: Bjorn Helgaas, Jonathan Corbet, Lukas Wunner, Shuah Khan
  Cc: Farhan Ali, Alexander Gordeev, Christian Borntraeger,
	Gerald Schaefer, Gerd Bayer, Heiko Carstens, Julian Ruess,
	Matthew Rosato, Peter Oberparleiter, Ramesh Errabolu,
	Sven Schnelle, Vasily Gorbik, linux-doc, linux-kernel, linux-pci,
	linux-s390, Niklas Schnelle, Randy Dunlap
In-Reply-To: <20260407-uid_slot-v8-0-15ae4409d2ce@linux.ibm.com>

Update the s390 specific PCI documentation to better reflect current
behavior and terms such as the handling of Isolated VFs via commit
25f39d3dcb48 ("s390/pci: Ignore RID for isolated VFs").

Add a descriptions for /sys/firmware/clp/uid_checking which was added
in commit b043a81ce3ee ("s390/pci: Expose firmware provided UID Checking
state in sysfs") but missed documentation.

Similarly add documentation for the fidparm attribute added by commit
99ad39306a62 ("s390/pci: Expose FIDPARM attribute in sysfs") and
add a list of pft values and their names.

Finally improve formatting of the different attribute descriptions by
adding a separating colon.

Reviewed-by: Farhan Ali <alifm@linux.ibm.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com>
Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
---
 Documentation/arch/s390/pci.rst | 144 +++++++++++++++++++++++++++-------------
 1 file changed, 97 insertions(+), 47 deletions(-)

diff --git a/Documentation/arch/s390/pci.rst b/Documentation/arch/s390/pci.rst
index d5755484d8e75c7bf67a350e61bbe04f0452a2fa..c3476de4f03278d07099aa32cbea0f868b6e9c9c 100644
--- a/Documentation/arch/s390/pci.rst
+++ b/Documentation/arch/s390/pci.rst
@@ -6,6 +6,7 @@ S/390 PCI
 
 Authors:
         - Pierre Morel
+        - Niklas Schnelle
 
 Copyright, IBM Corp. 2020
 
@@ -27,14 +28,16 @@ Command line parameters
 debugfs entries
 ---------------
 
-The S/390 debug feature (s390dbf) generates views to hold various debug results in sysfs directories of the form:
+The S/390 debug feature (s390dbf) generates views to hold various debug results
+in sysfs directories of the form:
 
  * /sys/kernel/debug/s390dbf/pci_*/
 
 For example:
 
   - /sys/kernel/debug/s390dbf/pci_msg/sprintf
-    Holds messages from the processing of PCI events, like machine check handling
+
+    holds messages from the processing of PCI events, like machine check handling
     and setting of global functionality, like UID checking.
 
   Change the level of logging to be more or less verbose by piping
@@ -47,87 +50,134 @@ Sysfs entries
 
 Entries specific to zPCI functions and entries that hold zPCI information.
 
-* /sys/bus/pci/slots/XXXXXXXX
+* /sys/bus/pci/slots/XXXXXXXX:
 
-  The slot entries are set up using the function identifier (FID) of the
-  PCI function. The format depicted as XXXXXXXX above is 8 hexadecimal digits
-  with 0 padding and lower case hexadecimal digits.
+  The slot entries are set up using the function identifier (FID) of the PCI
+  function as slot name. The format depicted as XXXXXXXX above is 8 hexadecimal
+  digits with 0 padding and lower case hexadecimal digits.
 
   - /sys/bus/pci/slots/XXXXXXXX/power
 
   A physical function that currently supports a virtual function cannot be
   powered off until all virtual functions are removed with:
-  echo 0 > /sys/bus/pci/devices/XXXX:XX:XX.X/sriov_numvf
+  echo 0 > /sys/bus/pci/devices/DDDD:BB:dd.f/sriov_numvf
 
-* /sys/bus/pci/devices/XXXX:XX:XX.X/
+* /sys/bus/pci/devices/DDDD:BB:dd.f/:
 
-  - function_id
-    A zPCI function identifier that uniquely identifies the function in the Z server.
+  - function_id:
+    The zPCI function identifier (FID) is a 32-bit hexadecimal value that
+    uniquely identifies the PCI function. Unless the hypervisor provides
+    a virtual FID e.g. on KVM this identifier is unique across the machine even
+    between different partitions.
 
-  - function_handle
-    Low-level identifier used for a configured PCI function.
-    It might be useful for debugging.
+  - function_handle:
+    This 32-bit hexadecimal value is a low-level identifier used for a PCI
+    function. Note that the function handle may be changed and become invalid
+    on PCI events and when enabling/disabling the PCI function.
 
-  - pchid
-    Model-dependent location of the I/O adapter.
+  - pchid:
+    This 16-bit hexadecimal value encodes a model-dependent location for
+    the PCI function.
 
-  - pfgid
-    PCI function group ID, functions that share identical functionality
+  - pfgid:
+    PCI function group ID; functions that share identical functionality
     use a common identifier.
     A PCI group defines interrupts, IOMMU, IOTLB, and DMA specifics.
 
-  - vfn
+  - vfn:
     The virtual function number, from 1 to N for virtual functions,
     0 for physical functions.
 
-  - pft
-    The PCI function type
+  - pft:
+    The PCI function type is an s390-specific type attribute. It indicates
+    a more general, usage oriented, type than PCI Specification
+    class/vendor/device identifiers. That is PCI functions with the same pft
+    value may be backed by different hardware implementations. At the same time
+    apart from unclassified functions (pft is 0x00) the same pft value
+    generally implies a similar usage model. At the same time the same
+    PCI hardware device may appear with different pft values when in a
+    different usage model. For example NETD and NETH VFs may be implemented
+    by the same PCI hardware device but in NETD the parent Physical Function
+    is user managed while with NETH it is platform managed.
 
-  - port
-    The port corresponds to the physical port the function is attached to.
-    It also gives an indication of the physical function a virtual function
-    is attached to.
+    Currently the following PFT values are defined:
 
-  - uid
-    The user identifier (UID) may be defined as part of the machine
-    configuration or the z/VM or KVM guest configuration. If the accompanying
-    uid_is_unique attribute is 1 the platform guarantees that the UID is unique
-    within that instance and no devices with the same UID can be attached
-    during the lifetime of the system.
+    - 0x00 (UNC): Unclassified
+    - 0x02 (ROCE): RoCE Express
+    - 0x05 (ISM): Internal Shared Memory
+    - 0x0a (ROC2): RoCE Express 2
+    - 0x0b (NVMe): NVMe
+    - 0x0c (NETH): Network Express hybrid
+    - 0x0d (CNW): Cloud Network Adapter
+    - 0x0f (NETD): Network Express direct
 
-  - uid_is_unique
-    Indicates whether the user identifier (UID) is guaranteed to be and remain
-    unique within this Linux instance.
+  - port:
+    The port is a decimal value corresponding to the physical port the function
+    is attached to. Virtual Functions (VFs) share the port with their parent
+    Physical Function (PF). A value of 0 indicates that the port attribute is
+    not applicable for that PCI function type.
 
-  - pfip/segmentX
+  - uid:
+    The user-defined identifier (UID) for a PCI function is a 32-bit
+    hexadecimal value. It is defined on a per instance basis as part of the
+    partition, KVM guest, or z/VM guest configuration. If UID Checking is
+    enabled the platform ensures that the UID is unique within that instance
+    and no two PCI functions with the same UID will be visible to the instance.
+
+    Independent of this guarantee and unlike the function ID (FID) the UID may
+    be the same in different partitions within the same machine. This allows to
+    create PCI configurations in multiple partitions to be identical in the
+    UID-namespace.
+
+  - uid_is_unique:
+    A 0 or 1 flag indicating whether the user-defined identifier (UID) is
+    guaranteed to be and remain unique within this Linux instance. This
+    platform feature is called UID Checking.
+
+  - pfip/segmentX:
     The segments determine the isolation of a function.
     They correspond to the physical path to the function.
     The more the segments are different, the more the functions are isolated.
 
+  - fidparm:
+    Contains an 8-bit-per-PCI function parameter field in hexadecimal provided
+    by the platform. The meaning of this field is PCI function type specific.
+    For NETH VFs a value of 0x01 indicates that the function supports
+    promiscuous mode.
+
+* /sys/firmware/clp/uid_checking:
+
+  In addition to the per-device uid_is_unique attribute this presents a
+  global indication of whether UID Checking is enabled. This allows users
+  to check for UID Checking even when no PCI functions are configured.
+
 Enumeration and hotplug
 =======================
 
 The PCI address consists of four parts: domain, bus, device and function,
-and is of this form: DDDD:BB:dd.f
+and is of this form: DDDD:BB:dd.f.
 
-* When not using multi-functions (norid is set, or the firmware does not
-  support multi-functions):
+* For a PCI function for which the platform does not expose the RID, the
+  pci=norid kernel parameter is used, or a so-called isolated Virtual Function
+  which does have RID information but is used without its parent Physical
+  Function being part of the same PCI configuration:
 
   - There is only one function per domain.
 
-  - The domain is set from the zPCI function's UID as defined during the
-    LPAR creation.
+  - The domain is set from the zPCI function's UID if UID Checking is on;
+    otherwise the domain ID is generated dynamically and is not stable
+    across reboots or hot plug.
 
-* When using multi-functions (norid parameter is not set),
-  zPCI functions are addressed differently:
+* For a PCI function for which the platform exposes the RID and which
+  is not an Isolated Virtual Function:
 
   - There is still only one bus per domain.
 
-  - There can be up to 256 functions per bus.
+  - There can be up to 256 PCI functions per bus.
 
-  - The domain part of the address of all functions for
-    a multi-Function device is set from the zPCI function's UID as defined
-    in the LPAR creation for the function zero.
+  - The domain part of the address of all functions within the same topology is
+    that of the configured PCI function with the lowest devfn within that
+    topology.
 
-  - New functions will only be ready for use after the function zero
-    (the function with devfn 0) has been enumerated.
+  - Virtual Functions generated by an SR-IOV capable Physical Function only
+    become visible once SR-IOV is enabled.

-- 
2.51.0


^ permalink raw reply related

* Re: [PATCH 0/1] Documentation: leds: leds-class: Document keyboard backlight LED class naming
From: Xavier Bestel @ 2026-04-07 13:26 UTC (permalink / raw)
  To: Hans de Goede, Lee Jones, Pavel Machek, Jonathan Corbet,
	Shuah Khan
  Cc: Rishit Bansal, Carlos Ferreira, Edip Hazuri, Mustafa Ekşi,
	linux-leds, linux-doc
In-Reply-To: <20260406174638.320135-1-johannes.goede@oss.qualcomm.com>

Le lundi 06 avril 2026 à 19:46 +0200, Hans de Goede a écrit :
> Drivers which need this are:
> [...]
> 3. Logitech G710/G710+ gaming keyboards HID driver:
> https://lore.kernel.org/linux-input/20260402075239.3829699-1-xav@bes.tel/
> Posted a week ago, needs an agreement on the LED class dev naming scheme
> to continue.

Indeed. I referenced Documentation/leds/leds-class.rst as if your patch
was already merged. Thank you for tackling this !

Regards,

	Xavier

^ permalink raw reply

* Re: [PATCH v10 12/21] gpu: nova-core: mm: Add unified page table entry wrapper enums
From: Eliot Courtney @ 2026-04-07 13:42 UTC (permalink / raw)
  To: Joel Fernandes, Eliot Courtney, linux-kernel
  Cc: Miguel Ojeda, Boqun Feng, Gary Guo, Bjorn Roy Baron, Benno Lossin,
	Andreas Hindborg, Alice Ryhl, Trevor Gross, Danilo Krummrich,
	Dave Airlie, Daniel Almeida, Koen Koning, dri-devel,
	rust-for-linux, Nikola Djukic, Maarten Lankhorst, Maxime Ripard,
	Thomas Zimmermann, David Airlie, Simona Vetter, Jonathan Corbet,
	Alex Deucher, Christian Koenig, Jani Nikula, Joonas Lahtinen,
	Rodrigo Vivi, Tvrtko Ursulin, Huang Rui, Matthew Auld,
	Matthew Brost, Lucas De Marchi, Thomas Hellstrom, Helge Deller,
	Alex Gaynor, Boqun Feng, John Hubbard, Alistair Popple,
	Timur Tabi, Edwin Peer, Alexandre Courbot, Andrea Righi,
	Andy Ritger, Zhi Wang, Balbir Singh, Philipp Stanner,
	Elle Rhumsaa, alexeyi, joel, linux-doc, amd-gfx, intel-gfx,
	intel-xe, linux-fbdev
In-Reply-To: <5db2aab1-4b65-486e-ad9b-27a108bdb0d6@nvidia.com>

On Tue Apr 7, 2026 at 6:55 AM JST, Joel Fernandes wrote:
>>> +    /// Compute upper bound on page table pages needed for `num_virt_pages`.
>>> +    ///
>>> +    /// Walks from PTE level up through PDE levels, accumulating the tree.
>>> +    pub(crate) fn pt_pages_upper_bound(&self, num_virt_pages: usize) -> usize {
>>> +        let mut total = 0;
>>> +
>>> +        // PTE pages at the leaf level.
>>> +        let pte_epp = self.entries_per_page(self.pte_level());
>>> +        let mut pages_at_level = num_virt_pages.div_ceil(pte_epp);
>>> +        total += pages_at_level;
>>> +
>>> +        // Walk PDE levels bottom-up (reverse of pde_levels()).
>>> +        for &level in self.pde_levels().iter().rev() {
>>> +            let epp = self.entries_per_page(level);
>>> +
>>> +            // How many pages at this level do we need to point to
>>> +            // the previous pages_at_level?
>>> +            pages_at_level = pages_at_level.div_ceil(epp);
>>> +            total += pages_at_level;
>>> +        }
>>> +
>>> +        total
>>> +    }
>>> +}
>>> +
>> 
>> We have a lot of matches on the MMU version here (and below in Pte, Pde,
>> DualPde). What about making MmuVersion into a trait (e.g. Mmu) with
>> associated types for Pte, Pde, DualPde which can implement traits
>> defining their common operations too?
>
> I coded this up and it did not look pretty, there's not much LOC savings and the
> code becomes harder to read because of parametrization of several functions. Also:

Thanks for looking into it. Sorry to be a bother, but would you have a
branch around with the code? I'm curious what didn't look good about it.

>> Then you can parameterise Vmm/PtWalk on this type.
>
> The match still to be done somewhere, so you end up matching on chipset to call
> the correct parametrized functions versus just passing in the parameter or
> chipset down, in some cases.
>
> For now I am inclined to leave it as is. Also there's a Rust pitfall we all
> learnt during the turing and other patch reviews, sometimes doing a bunch of
> matches is good especially if the number of variants are expected to be fixed
> (in the mm case, version 2 and version 3). Traits have some disadvantages too,
> example dyn traits have to heap-allocated, parametrizing can increase code size
> (due to monomorphization) etc.

Yeah, it's just this is a lot of matches in a lot of places. And we have
ver2 / ver3 specific code leaking into the general pagetable.rs file. So
it would be really nice if we could find a way to improve this specific
aspect. We can reduce the match to happening in just one file. You can
avoid heap allocation if you would like by making Vmm an enum,
for example, and doing the match based dispatch there at the top of the
API tree, rather than at the bottom where it fans out into a lot more
locations.

>
> thanks,
>
> --
> Joel Fernandes


^ permalink raw reply

* Re: [PATCH] hwmon: (yogafan) various markup improvements
From: Guenter Roeck @ 2026-04-07 13:51 UTC (permalink / raw)
  To: Sergio Melas, Randy Dunlap
  Cc: linux-kernel, linux-hwmon, Jonathan Corbet, Shuah Khan, linux-doc
In-Reply-To: <CAP8e=sLQ9HTy1Wu7TMcrae8w9MD7-eC8Wu-4rLSe1hoLF3buBA@mail.gmail.com>

On 4/7/26 04:04, Sergio Melas wrote:
> Thank you, Randy.
>   I will incorporate these markup improvements into the next version of
> the patch set.
> 

Why would that make sense ? I'll just apply the patch.

Guenter


^ permalink raw reply

* Re: [PATCH] hwmon: (yogafan) various markup improvements
From: Guenter Roeck @ 2026-04-07 13:52 UTC (permalink / raw)
  To: Randy Dunlap
  Cc: linux-kernel, Sergio Melas, linux-hwmon, Jonathan Corbet,
	Shuah Khan, linux-doc
In-Reply-To: <20260407052317.2097791-1-rdunlap@infradead.org>

On Mon, Apr 06, 2026 at 10:23:17PM -0700, Randy Dunlap wrote:
> There are several places in yogafan.rst where it appears that lines
> are meant to be presented on their own but instead they are strung
> together due to the lack of markups. Fix these issues by:
> 
> - using bullets where needed
> - indenting continuation lines of bulleted items
> - using a table where appropriate
> - using a literal block where appropriate
> 
> Fixes: c67c248ca406 ("hwmon: (yogafan) Add support for Lenovo Yoga/Legion fan monitoring")
> Signed-off-by: Randy Dunlap <rdunlap@infradead.org>

Applied.

Thanks,
Guenter

^ permalink raw reply

* Re: [PATCH v10 12/21] gpu: nova-core: mm: Add unified page table entry wrapper enums
From: Joel Fernandes @ 2026-04-07 13:59 UTC (permalink / raw)
  To: Eliot Courtney, linux-kernel
  Cc: Miguel Ojeda, Boqun Feng, Gary Guo, Bjorn Roy Baron, Benno Lossin,
	Andreas Hindborg, Alice Ryhl, Trevor Gross, Danilo Krummrich,
	Dave Airlie, Daniel Almeida, Koen Koning, dri-devel,
	rust-for-linux, Nikola Djukic, Maarten Lankhorst, Maxime Ripard,
	Thomas Zimmermann, David Airlie, Simona Vetter, Jonathan Corbet,
	Alex Deucher, Christian Koenig, Jani Nikula, Joonas Lahtinen,
	Rodrigo Vivi, Tvrtko Ursulin, Huang Rui, Matthew Auld,
	Matthew Brost, Lucas De Marchi, Thomas Hellstrom, Helge Deller,
	Alex Gaynor, Boqun Feng, John Hubbard, Alistair Popple,
	Timur Tabi, Edwin Peer, Alexandre Courbot, Andrea Righi,
	Andy Ritger, Zhi Wang, Balbir Singh, Philipp Stanner,
	Elle Rhumsaa, alexeyi, joel, linux-doc, amd-gfx, intel-gfx,
	intel-xe, linux-fbdev
In-Reply-To: <DHMYSTLVHIFJ.A2BDMPVNZNLS@nvidia.com>

Hi Eliot,

On 4/7/2026 9:42 AM, Eliot Courtney wrote:
> On Tue Apr 7, 2026 at 6:55 AM JST, Joel Fernandes wrote:
>>>> +    /// Compute upper bound on page table pages needed for `num_virt_pages`.
>>>> +    ///
>>>> +    /// Walks from PTE level up through PDE levels, accumulating the tree.
>>>> +    pub(crate) fn pt_pages_upper_bound(&self, num_virt_pages: usize) -> usize {
>>>> +        let mut total = 0;
>>>> +
>>>> +        // PTE pages at the leaf level.
>>>> +        let pte_epp = self.entries_per_page(self.pte_level());
>>>> +        let mut pages_at_level = num_virt_pages.div_ceil(pte_epp);
>>>> +        total += pages_at_level;
>>>> +
>>>> +        // Walk PDE levels bottom-up (reverse of pde_levels()).
>>>> +        for &level in self.pde_levels().iter().rev() {
>>>> +            let epp = self.entries_per_page(level);
>>>> +
>>>> +            // How many pages at this level do we need to point to
>>>> +            // the previous pages_at_level?
>>>> +            pages_at_level = pages_at_level.div_ceil(epp);
>>>> +            total += pages_at_level;
>>>> +        }
>>>> +
>>>> +        total
>>>> +    }
>>>> +}
>>>> +
>>>
>>> We have a lot of matches on the MMU version here (and below in Pte, Pde,
>>> DualPde). What about making MmuVersion into a trait (e.g. Mmu) with
>>> associated types for Pte, Pde, DualPde which can implement traits
>>> defining their common operations too?
>>
>> I coded this up and it did not look pretty, there's not much LOC savings and the
>> code becomes harder to read because of parametrization of several functions. Also:
> 
> Thanks for looking into it. Sorry to be a bother, but would you have a
> branch around with the code? I'm curious what didn't look good about it.

Sorry but I already mentioned that above, the parameterizing of dozens of
function call sites, 3-4 new traits (because each struct like
Pte/Pde/DualPde etc each need their own trait which different MMU versions
implement) etc. The code because hard to read and readability is the top
critical criteria for me - I am personally strictly against "Lets use shiny
features in language at the cost of making code unreadable". Because that
translates into bugs and nightmare for maintainability.

I don't have the code at the moment, but if you still want to spend on time
on this direction, feel free to share a tree. I am happy to take a look.
>>> Then you can parameterise Vmm/PtWalk on this type.
>>
>> The match still to be done somewhere, so you end up matching on chipset to call
>> the correct parametrized functions versus just passing in the parameter or
>> chipset down, in some cases.
>>
>> For now I am inclined to leave it as is. Also there's a Rust pitfall we all
>> learnt during the turing and other patch reviews, sometimes doing a bunch of
>> matches is good especially if the number of variants are expected to be fixed
>> (in the mm case, version 2 and version 3). Traits have some disadvantages too,
>> example dyn traits have to heap-allocated, parametrizing can increase code size
>> (due to monomorphization) etc.
> 
> Yeah, it's just this is a lot of matches in a lot of places. And we have
> ver2 / ver3 specific code leaking into the general pagetable.rs file. So

That's not a leak, that's by design. pagetable.rs is where the matches are
centralized, most of the code changes here on out should happen outside of
this file.

31 out of 42 matches in the mm code are in pagetable.rs, so it is already
centralized.

> it would be really nice if we could find a way to improve this specific
> aspect. We can reduce the match to happening in just one file. 

Assuming we know what we're improving. ;-)

> You can> avoid heap allocation if you would like by making Vmm an enum,
> for example, and doing the match based dispatch there at the top of the
> API tree, rather than at the bottom where it fans out into a lot more
> locations.

heap allocation is not always free, this code sensitive to dynamic
allocations in the kernel, due to MM reclaim and locking. I would like to
keep it simple.

thanks,

--
Joel Fernandes


^ permalink raw reply

* Re: [PATCH] docs: proc: document ProtectionKey in smaps
From: David Hildenbrand (Arm) @ 2026-04-07 14:00 UTC (permalink / raw)
  To: Kevin Brodsky, linux-doc
  Cc: linux-kernel, Yury Khrustalev, Jonathan Corbet, Shuah Khan,
	Dave Hansen, Andrew Morton, Lorenzo Stoakes, Vlastimil Babka,
	Mark Rutland, linux-fsdevel, linux-mm
In-Reply-To: <20260407125133.564182-1-kevin.brodsky@arm.com>

On 4/7/26 14:51, Kevin Brodsky wrote:
> The ProtectionKey entry was added in v4.9; back then it was
> x86-specific, but it now lives in generic code and applies to all
> architectures supporting pkeys (currently x86, power, arm64).
> 
> Time to document it: add a paragraph to proc.rst about the
> ProtectionKey entry.
> 
> Reported-by: Yury Khrustalev <yury.khrustalev@arm.com>
> Signed-off-by: Kevin Brodsky <kevin.brodsky@arm.com>
> ---
> Cc: Jonathan Corbet <corbet@lwn.net>
> Cc: Shuah Khan <skhan@linuxfoundation.org>
> Cc: Dave Hansen <dave.hansen@linux.intel.com>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Lorenzo Stoakes <ljs@kernel.org>
> Cc: Vlastimil Babka <vbabka@kernel.org>
> Cc: David Hildenbrand <david@kernel.org>
> Cc: Mark Rutland <mark.rutland@arm.com>
> Cc: linux-fsdevel@vger.kernel.org
> Cc: linux-mm@kvack.org
> ---
>  Documentation/filesystems/proc.rst | 4 ++++
>  1 file changed, 4 insertions(+)
> 
> diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst
> index b0c0d1b45b99..d673cad7dbe4 100644
> --- a/Documentation/filesystems/proc.rst
> +++ b/Documentation/filesystems/proc.rst
> @@ -549,6 +549,10 @@ does not take into account swapped out page of underlying shmem objects.
>  naturally aligned THP pages of any currently enabled size. 1 if true, 0
>  otherwise.
>  
> +If both the kernel and the system support protection keys (pkeys),
> +"ProtectionKey" indicates the memory protection key associated with the
> +virtual memory area.

Reviewed-by: David Hildenbrand (Arm) <david@kernel.org>

-- 
Cheers,

David

^ permalink raw reply

* Re: [PATCH v8 1/2] docs: s390/pci: Improve and update PCI documentation
From: Gerd Bayer @ 2026-04-07 14:09 UTC (permalink / raw)
  To: Niklas Schnelle, Bjorn Helgaas, Jonathan Corbet, Lukas Wunner,
	Shuah Khan
  Cc: Farhan Ali, Alexander Gordeev, Christian Borntraeger,
	Gerald Schaefer, Heiko Carstens, Julian Ruess, Matthew Rosato,
	Peter Oberparleiter, Ramesh Errabolu, Sven Schnelle,
	Vasily Gorbik, linux-doc, linux-kernel, linux-pci, linux-s390,
	Randy Dunlap, Gerd Bayer
In-Reply-To: <20260407-uid_slot-v8-1-15ae4409d2ce@linux.ibm.com>

On Tue, 2026-04-07 at 15:24 +0200, Niklas Schnelle wrote:
> Update the s390 specific PCI documentation to better reflect current
> behavior and terms such as the handling of Isolated VFs via commit
> 25f39d3dcb48 ("s390/pci: Ignore RID for isolated VFs").
> 
> Add a descriptions for /sys/firmware/clp/uid_checking which was added
> in commit b043a81ce3ee ("s390/pci: Expose firmware provided UID Checking
> state in sysfs") but missed documentation.
> 
> Similarly add documentation for the fidparm attribute added by commit
> 99ad39306a62 ("s390/pci: Expose FIDPARM attribute in sysfs") and
> add a list of pft values and their names.
> 
> Finally improve formatting of the different attribute descriptions by
> adding a separating colon.
> 
> Reviewed-by: Farhan Ali <alifm@linux.ibm.com>
> Acked-by: Randy Dunlap <rdunlap@infradead.org>
> Tested-by: Randy Dunlap <rdunlap@infradead.org>
> Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com>
> Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
> ---
>  Documentation/arch/s390/pci.rst | 144 +++++++++++++++++++++++++++-------------
>  1 file changed, 97 insertions(+), 47 deletions(-)
> 
> diff --git a/Documentation/arch/s390/pci.rst b/Documentation/arch/s390/pci.rst
> index d5755484d8e75c7bf67a350e61bbe04f0452a2fa..c3476de4f03278d07099aa32cbea0f868b6e9c9c 100644
> --- a/Documentation/arch/s390/pci.rst
> +++ b/Documentation/arch/s390/pci.rst
> @@ -6,6 +6,7 @@ S/390 PCI
>  
>  Authors:
>          - Pierre Morel
> +        - Niklas Schnelle
>  
>  Copyright, IBM Corp. 2020
>  
> @@ -27,14 +28,16 @@ Command line parameters
>  debugfs entries
>  ---------------
>  
> -The S/390 debug feature (s390dbf) generates views to hold various debug results in sysfs directories of the form:
> +The S/390 debug feature (s390dbf) generates views to hold various debug results
> +in sysfs directories of the form:
>  
>   * /sys/kernel/debug/s390dbf/pci_*/
>  
>  For example:
>  
>    - /sys/kernel/debug/s390dbf/pci_msg/sprintf
> -    Holds messages from the processing of PCI events, like machine check handling
> +
> +    holds messages from the processing of PCI events, like machine check handling
>      and setting of global functionality, like UID checking.
>  
>    Change the level of logging to be more or less verbose by piping
> @@ -47,87 +50,134 @@ Sysfs entries
>  
>  Entries specific to zPCI functions and entries that hold zPCI information.
>  
> -* /sys/bus/pci/slots/XXXXXXXX
> +* /sys/bus/pci/slots/XXXXXXXX:
>  
> -  The slot entries are set up using the function identifier (FID) of the
> -  PCI function. The format depicted as XXXXXXXX above is 8 hexadecimal digits
> -  with 0 padding and lower case hexadecimal digits.
> +  The slot entries are set up using the function identifier (FID) of the PCI
> +  function as slot name. The format depicted as XXXXXXXX above is 8 hexadecimal
> +  digits with 0 padding and lower case hexadecimal digits.
>  
>    - /sys/bus/pci/slots/XXXXXXXX/power
>  
>    A physical function that currently supports a virtual function cannot be
>    powered off until all virtual functions are removed with:
> -  echo 0 > /sys/bus/pci/devices/XXXX:XX:XX.X/sriov_numvf
> +  echo 0 > /sys/bus/pci/devices/DDDD:BB:dd.f/sriov_numvf
>  
> -* /sys/bus/pci/devices/XXXX:XX:XX.X/
> +* /sys/bus/pci/devices/DDDD:BB:dd.f/:
>  
> -  - function_id
> -    A zPCI function identifier that uniquely identifies the function in the Z server.
> +  - function_id:
> +    The zPCI function identifier (FID) is a 32-bit hexadecimal value that
> +    uniquely identifies the PCI function. Unless the hypervisor provides
> +    a virtual FID e.g. on KVM this identifier is unique across the machine even
> +    between different partitions.
>  
> -  - function_handle
> -    Low-level identifier used for a configured PCI function.
> -    It might be useful for debugging.
> +  - function_handle:
> +    This 32-bit hexadecimal value is a low-level identifier used for a PCI
> +    function. Note that the function handle may be changed and become invalid
> +    on PCI events and when enabling/disabling the PCI function.
>  
> -  - pchid
> -    Model-dependent location of the I/O adapter.
> +  - pchid:
> +    This 16-bit hexadecimal value encodes a model-dependent location for
> +    the PCI function.
>  
> -  - pfgid
> -    PCI function group ID, functions that share identical functionality
> +  - pfgid:
> +    PCI function group ID; functions that share identical functionality
>      use a common identifier.
>      A PCI group defines interrupts, IOMMU, IOTLB, and DMA specifics.
>  
> -  - vfn
> +  - vfn:
>      The virtual function number, from 1 to N for virtual functions,
>      0 for physical functions.
>  
> -  - pft
> -    The PCI function type
> +  - pft:
> +    The PCI function type is an s390-specific type attribute. It indicates
> +    a more general, usage oriented, type than PCI Specification
> +    class/vendor/device identifiers. That is PCI functions with the same pft
> +    value may be backed by different hardware implementations. At the same time
> +    apart from unclassified functions (pft is 0x00) the same pft value
> +    generally implies a similar usage model. At the same time the same
> +    PCI hardware device may appear with different pft values when in a
> +    different usage model. For example NETD and NETH VFs may be implemented
> +    by the same PCI hardware device but in NETD the parent Physical Function
> +    is user managed while with NETH it is platform managed.
>  
> -  - port
> -    The port corresponds to the physical port the function is attached to.
> -    It also gives an indication of the physical function a virtual function
> -    is attached to.
> +    Currently the following PFT values are defined:
>  
> -  - uid
> -    The user identifier (UID) may be defined as part of the machine
> -    configuration or the z/VM or KVM guest configuration. If the accompanying
> -    uid_is_unique attribute is 1 the platform guarantees that the UID is unique
> -    within that instance and no devices with the same UID can be attached
> -    during the lifetime of the system.
> +    - 0x00 (UNC): Unclassified
> +    - 0x02 (ROCE): RoCE Express
> +    - 0x05 (ISM): Internal Shared Memory
> +    - 0x0a (ROC2): RoCE Express 2
> +    - 0x0b (NVMe): NVMe
> +    - 0x0c (NETH): Network Express hybrid
> +    - 0x0d (CNW): Cloud Network Adapter
> +    - 0x0f (NETD): Network Express direct
>  
> -  - uid_is_unique
> -    Indicates whether the user identifier (UID) is guaranteed to be and remain
> -    unique within this Linux instance.
> +  - port:
> +    The port is a decimal value corresponding to the physical port the function
> +    is attached to. Virtual Functions (VFs) share the port with their parent
> +    Physical Function (PF). A value of 0 indicates that the port attribute is
> +    not applicable for that PCI function type.
>  
> -  - pfip/segmentX
> +  - uid:
> +    The user-defined identifier (UID) for a PCI function is a 32-bit
> +    hexadecimal value. It is defined on a per instance basis as part of the
> +    partition, KVM guest, or z/VM guest configuration. If UID Checking is
> +    enabled the platform ensures that the UID is unique within that instance
> +    and no two PCI functions with the same UID will be visible to the instance.
> +
> +    Independent of this guarantee and unlike the function ID (FID) the UID may
> +    be the same in different partitions within the same machine. This allows to
> +    create PCI configurations in multiple partitions to be identical in the
> +    UID-namespace.
> +
> +  - uid_is_unique:
> +    A 0 or 1 flag indicating whether the user-defined identifier (UID) is
> +    guaranteed to be and remain unique within this Linux instance. This
> +    platform feature is called UID Checking.
> +
> +  - pfip/segmentX:
>      The segments determine the isolation of a function.
>      They correspond to the physical path to the function.
>      The more the segments are different, the more the functions are isolated.
>  
> +  - fidparm:
> +    Contains an 8-bit-per-PCI function parameter field in hexadecimal provided
> +    by the platform. The meaning of this field is PCI function type specific.
> +    For NETH VFs a value of 0x01 indicates that the function supports
> +    promiscuous mode.
> +
> +* /sys/firmware/clp/uid_checking:
> +
> +  In addition to the per-device uid_is_unique attribute this presents a
> +  global indication of whether UID Checking is enabled. This allows users
> +  to check for UID Checking even when no PCI functions are configured.
> +
>  Enumeration and hotplug
>  =======================
>  
>  The PCI address consists of four parts: domain, bus, device and function,
> -and is of this form: DDDD:BB:dd.f
> +and is of this form: DDDD:BB:dd.f.
>  
> -* When not using multi-functions (norid is set, or the firmware does not
> -  support multi-functions):
> +* For a PCI function for which the platform does not expose the RID, the
> +  pci=norid kernel parameter is used, or a so-called isolated Virtual Function
> +  which does have RID information but is used without its parent Physical
> +  Function being part of the same PCI configuration:
>  
>    - There is only one function per domain.
>  
> -  - The domain is set from the zPCI function's UID as defined during the
> -    LPAR creation.
> +  - The domain is set from the zPCI function's UID if UID Checking is on;
> +    otherwise the domain ID is generated dynamically and is not stable
> +    across reboots or hot plug.
>  
> -* When using multi-functions (norid parameter is not set),
> -  zPCI functions are addressed differently:
> +* For a PCI function for which the platform exposes the RID and which
> +  is not an Isolated Virtual Function:
>  
>    - There is still only one bus per domain.
>  
> -  - There can be up to 256 functions per bus.
> +  - There can be up to 256 PCI functions per bus.
>  
> -  - The domain part of the address of all functions for
> -    a multi-Function device is set from the zPCI function's UID as defined
> -    in the LPAR creation for the function zero.
> +  - The domain part of the address of all functions within the same topology is
> +    that of the configured PCI function with the lowest devfn within that
> +    topology.
>  
> -  - New functions will only be ready for use after the function zero
> -    (the function with devfn 0) has been enumerated.
> +  - Virtual Functions generated by an SR-IOV capable Physical Function only
> +    become visible once SR-IOV is enabled.


LGTM!
Reviewed-by: Gerd Bayer <gbayer@linux.ibm.com>

^ permalink raw reply

* Re: [PATCH v2 07/33] rust: allow globally `clippy::incompatible_msrv`
From: Tamir Duberstein @ 2026-04-07 14:12 UTC (permalink / raw)
  To: Miguel Ojeda
  Cc: Miguel Ojeda, Nathan Chancellor, Nicolas Schier, Danilo Krummrich,
	Andreas Hindborg, Catalin Marinas, Will Deacon, Paul Walmsley,
	Palmer Dabbelt, Albert Ou, Alexandre Courbot, David Airlie,
	Simona Vetter, Brendan Higgins, David Gow, Greg Kroah-Hartman,
	Arve Hjønnevåg, Todd Kjos, Christian Brauner,
	Carlos Llamas, Alice Ryhl, Jonathan Corbet, Boqun Feng, Gary Guo,
	Björn Roy Baron, Benno Lossin, Trevor Gross, rust-for-linux,
	linux-kbuild, Lorenzo Stoakes, Vlastimil Babka, Liam R . Howlett,
	Uladzislau Rezki, linux-block, linux-arm-kernel, Alexandre Ghiti,
	linux-riscv, nouveau, dri-devel, Rae Moar, linux-kselftest,
	kunit-dev, Nick Desaulniers, Bill Wendling, Justin Stitt, llvm,
	linux-kernel, Shuah Khan, linux-doc
In-Reply-To: <CANiq72mJTT7xFnhm-CeOZM_3ZwGdaQ8F2zUbONpPbqJ8g7DokQ@mail.gmail.com>

On Tue, Apr 7, 2026 at 4:37 AM Miguel Ojeda
<miguel.ojeda.sandonis@gmail.com> wrote:
>
> On Mon, Apr 6, 2026 at 5:31 PM Tamir Duberstein <tamird@kernel.org> wrote:
> >
> > You're welcome! Actually it seems the lint was already improved
> > upstream, starting with 1.90.0.
> >
> > Link: https://github.com/rust-lang/rust-clippy/commit/c0dc3b61 [0]
>
> Indeed, I had the PR linked in
> https://github.com/Rust-for-Linux/linux/issues/349, and it is nicer,
> but it would still fire in a case like this patch :(

Ah, that is possibly https://github.com/rust-lang/rust-clippy/issues/14827.

^ permalink raw reply

* Re: [PATCH v7 8/9] KVM: x86: nSVM: Save/restore gPAT with KVM_{GET,SET}_NESTED_STATE
From: Sean Christopherson @ 2026-04-07 14:14 UTC (permalink / raw)
  To: Jim Mattson
  Cc: Paolo Bonzini, Jonathan Corbet, Shuah Khan, Thomas Gleixner,
	Ingo Molnar, Borislav Petkov, Dave Hansen, x86, H. Peter Anvin,
	kvm, linux-doc, linux-kernel, linux-kselftest, Yosry Ahmed
In-Reply-To: <CALMp9eQsd0fRuDE_R57Mn6-N6jCtbmoPAh7Y7CBdMEZJaNSUGQ@mail.gmail.com>

On Mon, Apr 06, 2026, Jim Mattson wrote:
> On Mon, Apr 6, 2026 at 4:47 PM Sean Christopherson <seanjc@google.com> wrote:
> >
> > On Fri, Mar 27, 2026, Jim Mattson wrote:
> > > @@ -1918,6 +1921,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
> > >       struct vmcb_save_area_cached save_cached;
> > >       struct vmcb_ctrl_area_cached ctl_cached;
> > >       unsigned long cr0;
> > > +     bool use_separate_l2_pat;
> >
> > Land this above "cr0" to preserve the inverted fir tree.
> >
> > >       int ret;
> > >
> > >       BUILD_BUG_ON(sizeof(struct vmcb_control_area) + sizeof(struct vmcb_save_area) >
> > > @@ -1993,6 +1997,18 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
> > >           !nested_vmcb_check_save(vcpu, &save_cached, false))
> > >               goto out_free;
> > >
> > > +     /*
> > > +      * Validate gPAT when the shared PAT quirk is disabled (i.e. L2
> > > +      * has its own gPAT). This is done separately from the
> > > +      * vmcb_save_area_cached validation above, because gPAT is L2
> > > +      * state, but the vmcb_save_area_cached is populated with L1 state.
> > > +      */
> > > +     use_separate_l2_pat =
> > > +             (ctl_cached.misc_ctl & SVM_MISC_ENABLE_NP) &&
> > > +             !kvm_check_has_quirk(vcpu->kvm,
> > > +                                  KVM_X86_QUIRK_NESTED_SVM_SHARED_PAT);
> >
> > I vote for either:
> >
> >         use_separate_l2_pat = (ctl_cached.misc_ctl & SVM_MISC_ENABLE_NP) &&
> >                               !kvm_check_has_quirk(vcpu->kvm,
> >                                                    KVM_X86_QUIRK_NESTED_SVM_SHARED_PAT);
> >
> LOL! Aren't you the one who keeps complaining that my indentation
> doesn't line up? Are you schizophrenic?

Huh?  That is aligned.  Perhaps it's whitespace damaged by your MUA?

> > or
> >
> >         use_separate_l2_pat = (ctl_cached.misc_ctl & SVM_MISC_ENABLE_NP);
> >         if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_NESTED_SVM_SHARED_PAT))
> >                 use_separate_l2_pat = false;
> 
> Wow. I really have no idea how to predict what you're going to want
> the code to look like. How is this better than the original?!?

It doesn't immediately wrap after the "=".  Similar to my view on wrapping before
function names[*], I find wrapping immediately after an assignment operator to be
unnecessarily difficult to read as it doesn't provide any context for single-line
searches.

I'm pretty darn consistent in my dislike for that style: I count 26 instances in
arch/x86/kvm that match "\s=\n", and only two of those carry my SoB or R-b.  I
simply missed the wrap in kvm_vcpu_apicv_activated() that was added by commit 
896046474f8d ("KVM: x86: Introduce kvm_x86_call() to simplify static calls of
kvm_x86_ops"), and I'll give myself a pass for commit 8764ed55c970 ("KVM: x86:
Whitelist port 0x7e for pre-incrementing %rip") as that predates treating
checkpatch's 80 char limit as a soft limit.

[*] https://lore.kernel.org/all/CAHk-=wjoLAYG446ZNHfg=GhjSY6nFmuB_wA8fYd5iLBNXjo9Bw@mail.gmail.com


^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox