Linux CIFS filesystem development
 help / color / mirror / Atom feed
* [PATCH 1/7] cifs: Revert "smb: client: Avoid race in open_cached_dir with lease breaks"
@ 2025-06-04 10:18 nspmangalore
  2025-06-04 10:18 ` [PATCH 2/7] cifs: protect cfid accesses with fid_lock nspmangalore
                   ` (5 more replies)
  0 siblings, 6 replies; 18+ messages in thread
From: nspmangalore @ 2025-06-04 10:18 UTC (permalink / raw)
  To: linux-cifs, smfrench, bharathsm.hsk, meetakshisetiyaoss, pc, paul,
	henrique.carvalho, ematsumiya
  Cc: Shyam Prasad N

From: Shyam Prasad N <sprasad@microsoft.com>

This reverts commit 3ca02e63edccb78ef3659bebc68579c7224a6ca2.
Upcoming patches in this code path introduces a new mutex, which
takes care of the same problem which this change fixes, but with
the use of mutex for the same section. So the net effect is that
there will be no blocking with spinlocks held.

Signed-off-by: Shyam Prasad N <sprasad@microsoft.com>
---
 fs/smb/client/cached_dir.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c
index 89d2dbbb742c..e6fc92667d41 100644
--- a/fs/smb/client/cached_dir.c
+++ b/fs/smb/client/cached_dir.c
@@ -29,6 +29,7 @@ static struct cached_fid *find_or_create_cached_dir(struct cached_fids *cfids,
 {
 	struct cached_fid *cfid;
 
+	spin_lock(&cfids->cfid_list_lock);
 	list_for_each_entry(cfid, &cfids->entries, entry) {
 		if (!strcmp(cfid->path, path)) {
 			/*
@@ -37,20 +38,25 @@ static struct cached_fid *find_or_create_cached_dir(struct cached_fids *cfids,
 			 * being deleted due to a lease break.
 			 */
 			if (!cfid->time || !cfid->has_lease) {
+				spin_unlock(&cfids->cfid_list_lock);
 				return NULL;
 			}
 			kref_get(&cfid->refcount);
+			spin_unlock(&cfids->cfid_list_lock);
 			return cfid;
 		}
 	}
 	if (lookup_only) {
+		spin_unlock(&cfids->cfid_list_lock);
 		return NULL;
 	}
 	if (cfids->num_entries >= max_cached_dirs) {
+		spin_unlock(&cfids->cfid_list_lock);
 		return NULL;
 	}
 	cfid = init_cached_dir(path);
 	if (cfid == NULL) {
+		spin_unlock(&cfids->cfid_list_lock);
 		return NULL;
 	}
 	cfid->cfids = cfids;
@@ -68,6 +74,7 @@ static struct cached_fid *find_or_create_cached_dir(struct cached_fids *cfids,
 	 */
 	cfid->has_lease = true;
 
+	spin_unlock(&cfids->cfid_list_lock);
 	return cfid;
 }
 
@@ -181,10 +188,8 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
 	if (!utf16_path)
 		return -ENOMEM;
 
-	spin_lock(&cfids->cfid_list_lock);
 	cfid = find_or_create_cached_dir(cfids, path, lookup_only, tcon->max_cached_dirs);
 	if (cfid == NULL) {
-		spin_unlock(&cfids->cfid_list_lock);
 		kfree(utf16_path);
 		return -ENOENT;
 	}
@@ -193,6 +198,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
 	 * Otherwise, it is either a new entry or laundromat worker removed it
 	 * from @cfids->entries.  Caller will put last reference if the latter.
 	 */
+	spin_lock(&cfids->cfid_list_lock);
 	if (cfid->has_lease && cfid->time) {
 		spin_unlock(&cfids->cfid_list_lock);
 		*ret_cfid = cfid;
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 2/7] cifs: protect cfid accesses with fid_lock
  2025-06-04 10:18 [PATCH 1/7] cifs: Revert "smb: client: Avoid race in open_cached_dir with lease breaks" nspmangalore
@ 2025-06-04 10:18 ` nspmangalore
  2025-06-12  3:24   ` Paul Aurich
  2025-06-04 10:18 ` [PATCH 3/7] cifs: do not return an invalidated cfid nspmangalore
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 18+ messages in thread
From: nspmangalore @ 2025-06-04 10:18 UTC (permalink / raw)
  To: linux-cifs, smfrench, bharathsm.hsk, meetakshisetiyaoss, pc, paul,
	henrique.carvalho, ematsumiya
  Cc: Shyam Prasad N

From: Shyam Prasad N <sprasad@microsoft.com>

There are several accesses to cfid structure today without
locking fid_lock. This can lead to race conditions that are
hard to debug.

With this change, I'm trying to make sure that accesses to cfid
struct members happen with fid_lock held.

Signed-off-by: Shyam Prasad N <sprasad@microsoft.com>
---
 fs/smb/client/cached_dir.c | 86 ++++++++++++++++++++++----------------
 1 file changed, 49 insertions(+), 37 deletions(-)

diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c
index e6fc92667d41..e62d3bebff9a 100644
--- a/fs/smb/client/cached_dir.c
+++ b/fs/smb/client/cached_dir.c
@@ -31,6 +31,7 @@ static struct cached_fid *find_or_create_cached_dir(struct cached_fids *cfids,
 
 	spin_lock(&cfids->cfid_list_lock);
 	list_for_each_entry(cfid, &cfids->entries, entry) {
+		spin_lock(&cfid->fid_lock);
 		if (!strcmp(cfid->path, path)) {
 			/*
 			 * If it doesn't have a lease it is either not yet
@@ -38,13 +39,16 @@ static struct cached_fid *find_or_create_cached_dir(struct cached_fids *cfids,
 			 * being deleted due to a lease break.
 			 */
 			if (!cfid->time || !cfid->has_lease) {
+				spin_unlock(&cfid->fid_lock);
 				spin_unlock(&cfids->cfid_list_lock);
 				return NULL;
 			}
 			kref_get(&cfid->refcount);
+			spin_unlock(&cfid->fid_lock);
 			spin_unlock(&cfids->cfid_list_lock);
 			return cfid;
 		}
+		spin_unlock(&cfid->fid_lock);
 	}
 	if (lookup_only) {
 		spin_unlock(&cfids->cfid_list_lock);
@@ -193,19 +197,20 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
 		kfree(utf16_path);
 		return -ENOENT;
 	}
+
 	/*
 	 * Return cached fid if it is valid (has a lease and has a time).
 	 * Otherwise, it is either a new entry or laundromat worker removed it
 	 * from @cfids->entries.  Caller will put last reference if the latter.
 	 */
-	spin_lock(&cfids->cfid_list_lock);
+	spin_lock(&cfid->fid_lock);
 	if (cfid->has_lease && cfid->time) {
-		spin_unlock(&cfids->cfid_list_lock);
+		spin_unlock(&cfid->fid_lock);
 		*ret_cfid = cfid;
 		kfree(utf16_path);
 		return 0;
 	}
-	spin_unlock(&cfids->cfid_list_lock);
+	spin_unlock(&cfid->fid_lock);
 
 	/*
 	 * Skip any prefix paths in @path as lookup_noperm_positive_unlocked() ends up
@@ -220,17 +225,6 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
 		goto out;
 	}
 
-	if (!npath[0]) {
-		dentry = dget(cifs_sb->root);
-	} else {
-		dentry = path_to_dentry(cifs_sb, npath);
-		if (IS_ERR(dentry)) {
-			rc = -ENOENT;
-			goto out;
-		}
-	}
-	cfid->dentry = dentry;
-	cfid->tcon = tcon;
 
 	/*
 	 * We do not hold the lock for the open because in case
@@ -302,9 +296,6 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
 		}
 		goto oshr_free;
 	}
-	cfid->is_open = true;
-
-	spin_lock(&cfids->cfid_list_lock);
 
 	o_rsp = (struct smb2_create_rsp *)rsp_iov[0].iov_base;
 	oparms.fid->persistent_fid = o_rsp->PersistentFileId;
@@ -315,7 +306,6 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
 
 
 	if (o_rsp->OplockLevel != SMB2_OPLOCK_LEVEL_LEASE) {
-		spin_unlock(&cfids->cfid_list_lock);
 		rc = -EINVAL;
 		goto oshr_free;
 	}
@@ -324,21 +314,15 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
 				 &oparms.fid->epoch,
 				 oparms.fid->lease_key,
 				 &oplock, NULL, NULL);
-	if (rc) {
-		spin_unlock(&cfids->cfid_list_lock);
+	if (rc)
 		goto oshr_free;
-	}
 
 	rc = -EINVAL;
-	if (!(oplock & SMB2_LEASE_READ_CACHING_HE)) {
-		spin_unlock(&cfids->cfid_list_lock);
+	if (!(oplock & SMB2_LEASE_READ_CACHING_HE))
 		goto oshr_free;
-	}
 	qi_rsp = (struct smb2_query_info_rsp *)rsp_iov[1].iov_base;
-	if (le32_to_cpu(qi_rsp->OutputBufferLength) < sizeof(struct smb2_file_all_info)) {
-		spin_unlock(&cfids->cfid_list_lock);
+	if (le32_to_cpu(qi_rsp->OutputBufferLength) < sizeof(struct smb2_file_all_info))
 		goto oshr_free;
-	}
 	if (!smb2_validate_and_copy_iov(
 				le16_to_cpu(qi_rsp->OutputBufferOffset),
 				sizeof(struct smb2_file_all_info),
@@ -346,10 +330,25 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
 				(char *)&cfid->file_all_info))
 		cfid->file_all_info_is_valid = true;
 
-	cfid->time = jiffies;
-	spin_unlock(&cfids->cfid_list_lock);
 	/* At this point the directory handle is fully cached */
 	rc = 0;
+	if (!cfid->dentry) {
+		if (!npath[0]) {
+			dentry = dget(cifs_sb->root);
+		} else {
+			dentry = path_to_dentry(cifs_sb, npath);
+			if (IS_ERR(dentry)) {
+				rc = -ENOENT;
+				goto out;
+			}
+		}
+	}
+	spin_lock(&cfid->fid_lock);
+	cfid->dentry = dentry;
+	cfid->tcon = tcon;
+	cfid->is_open = true;
+	cfid->time = jiffies;
+	spin_unlock(&cfid->fid_lock);
 
 oshr_free:
 	SMB2_open_free(&rqst[0]);
@@ -364,6 +363,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
 			cfid->on_list = false;
 			cfids->num_entries--;
 		}
+		spin_lock(&cfid->fid_lock);
 		if (cfid->has_lease) {
 			/*
 			 * We are guaranteed to have two references at this
@@ -373,6 +373,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
 			cfid->has_lease = false;
 			kref_put(&cfid->refcount, smb2_close_cached_fid);
 		}
+		spin_unlock(&cfid->fid_lock);
 		spin_unlock(&cfids->cfid_list_lock);
 
 		kref_put(&cfid->refcount, smb2_close_cached_fid);
@@ -401,13 +402,16 @@ int open_cached_dir_by_dentry(struct cifs_tcon *tcon,
 
 	spin_lock(&cfids->cfid_list_lock);
 	list_for_each_entry(cfid, &cfids->entries, entry) {
+		spin_lock(&cfid->fid_lock);
 		if (dentry && cfid->dentry == dentry) {
 			cifs_dbg(FYI, "found a cached file handle by dentry\n");
 			kref_get(&cfid->refcount);
+			spin_unlock(&cfid->fid_lock);
 			*ret_cfid = cfid;
 			spin_unlock(&cfids->cfid_list_lock);
 			return 0;
 		}
+		spin_unlock(&cfid->fid_lock);
 	}
 	spin_unlock(&cfids->cfid_list_lock);
 	return -ENOENT;
@@ -428,8 +432,11 @@ smb2_close_cached_fid(struct kref *ref)
 	}
 	spin_unlock(&cfid->cfids->cfid_list_lock);
 
-	dput(cfid->dentry);
-	cfid->dentry = NULL;
+	/* no locking necessary as we're the last user of this cfid */
+	if (cfid->dentry) {
+		dput(cfid->dentry);
+		cfid->dentry = NULL;
+	}
 
 	if (cfid->is_open) {
 		rc = SMB2_close(0, cfid->tcon, cfid->fid.persistent_fid,
@@ -452,12 +459,13 @@ void drop_cached_dir_by_name(const unsigned int xid, struct cifs_tcon *tcon,
 		cifs_dbg(FYI, "no cached dir found for rmdir(%s)\n", name);
 		return;
 	}
-	spin_lock(&cfid->cfids->cfid_list_lock);
+	spin_lock(&cfid->fid_lock);
 	if (cfid->has_lease) {
+		/* mark as invalid */
 		cfid->has_lease = false;
 		kref_put(&cfid->refcount, smb2_close_cached_fid);
 	}
-	spin_unlock(&cfid->cfids->cfid_list_lock);
+	spin_unlock(&cfid->fid_lock);
 	close_cached_dir(cfid);
 }
 
@@ -539,6 +547,7 @@ void invalidate_all_cached_dirs(struct cifs_tcon *tcon)
 		cfids->num_entries--;
 		cfid->is_open = false;
 		cfid->on_list = false;
+		spin_lock(&cfid->fid_lock);
 		if (cfid->has_lease) {
 			/*
 			 * The lease was never cancelled from the server,
@@ -547,6 +556,7 @@ void invalidate_all_cached_dirs(struct cifs_tcon *tcon)
 			cfid->has_lease = false;
 		} else
 			kref_get(&cfid->refcount);
+		spin_unlock(&cfid->fid_lock);
 	}
 	/*
 	 * Queue dropping of the dentries once locks have been dropped
@@ -601,6 +611,7 @@ int cached_dir_lease_break(struct cifs_tcon *tcon, __u8 lease_key[16])
 
 	spin_lock(&cfids->cfid_list_lock);
 	list_for_each_entry(cfid, &cfids->entries, entry) {
+		spin_lock(&cfid->fid_lock);
 		if (cfid->has_lease &&
 		    !memcmp(lease_key,
 			    cfid->fid.lease_key,
@@ -613,6 +624,7 @@ int cached_dir_lease_break(struct cifs_tcon *tcon, __u8 lease_key[16])
 			 */
 			list_del(&cfid->entry);
 			cfid->on_list = false;
+			spin_unlock(&cfid->fid_lock);
 			cfids->num_entries--;
 
 			++tcon->tc_count;
@@ -622,6 +634,7 @@ int cached_dir_lease_break(struct cifs_tcon *tcon, __u8 lease_key[16])
 			spin_unlock(&cfids->cfid_list_lock);
 			return true;
 		}
+		spin_unlock(&cfid->fid_lock);
 	}
 	spin_unlock(&cfids->cfid_list_lock);
 	return false;
@@ -657,9 +670,6 @@ static void free_cached_dir(struct cached_fid *cfid)
 	WARN_ON(work_pending(&cfid->close_work));
 	WARN_ON(work_pending(&cfid->put_work));
 
-	dput(cfid->dentry);
-	cfid->dentry = NULL;
-
 	/*
 	 * Delete all cached dirent names
 	 */
@@ -704,6 +714,7 @@ static void cfids_laundromat_worker(struct work_struct *work)
 
 	spin_lock(&cfids->cfid_list_lock);
 	list_for_each_entry_safe(cfid, q, &cfids->entries, entry) {
+		spin_lock(&cfid->fid_lock);
 		if (cfid->time &&
 		    time_after(jiffies, cfid->time + HZ * dir_cache_timeout)) {
 			cfid->on_list = false;
@@ -718,6 +729,7 @@ static void cfids_laundromat_worker(struct work_struct *work)
 			} else
 				kref_get(&cfid->refcount);
 		}
+		spin_unlock(&cfid->fid_lock);
 	}
 	spin_unlock(&cfids->cfid_list_lock);
 
@@ -728,8 +740,8 @@ static void cfids_laundromat_worker(struct work_struct *work)
 		dentry = cfid->dentry;
 		cfid->dentry = NULL;
 		spin_unlock(&cfid->fid_lock);
-
 		dput(dentry);
+
 		if (cfid->is_open) {
 			spin_lock(&cifs_tcp_ses_lock);
 			++cfid->tcon->tc_count;
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 3/7] cifs: do not return an invalidated cfid
  2025-06-04 10:18 [PATCH 1/7] cifs: Revert "smb: client: Avoid race in open_cached_dir with lease breaks" nspmangalore
  2025-06-04 10:18 ` [PATCH 2/7] cifs: protect cfid accesses with fid_lock nspmangalore
@ 2025-06-04 10:18 ` nspmangalore
  2025-06-04 10:18 ` [PATCH 4/7] cifs: serialize initialization and cleanup of cfid nspmangalore
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 18+ messages in thread
From: nspmangalore @ 2025-06-04 10:18 UTC (permalink / raw)
  To: linux-cifs, smfrench, bharathsm.hsk, meetakshisetiyaoss, pc, paul,
	henrique.carvalho, ematsumiya
  Cc: Shyam Prasad N

From: Shyam Prasad N <sprasad@microsoft.com>

open_cached_dir should either return an existing valid cfid
or create a new one. Validity of cfid depends on both
cfid->has_lease and cfid->time to be true. However, if has_lease
was invalidated by a worker thread in parallel, we could end up
leaking both a dentry and a server handle.

This change checks if the entry was invalidated and returns
a -ENOENT in such a case.

Signed-off-by: Shyam Prasad N <sprasad@microsoft.com>
---
 fs/smb/client/cached_dir.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c
index e62d3bebff9a..94538f52dfc8 100644
--- a/fs/smb/client/cached_dir.c
+++ b/fs/smb/client/cached_dir.c
@@ -199,9 +199,11 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
 	}
 
 	/*
+	 * check again that the cfid is valid (with mutex held this time).
 	 * Return cached fid if it is valid (has a lease and has a time).
 	 * Otherwise, it is either a new entry or laundromat worker removed it
-	 * from @cfids->entries.  Caller will put last reference if the latter.
+	 * from @cfids->entries.  If the latter, we drop the refcount and return
+	 * an error to the caller.
 	 */
 	spin_lock(&cfid->fid_lock);
 	if (cfid->has_lease && cfid->time) {
@@ -209,6 +211,12 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
 		*ret_cfid = cfid;
 		kfree(utf16_path);
 		return 0;
+	} else if (!cfid->has_lease) {
+		spin_unlock(&cfid->fid_lock);
+		/* drop the ref that we have */
+		kref_put(&cfid->refcount, smb2_close_cached_fid);
+		kfree(utf16_path);
+		return -ENOENT;
 	}
 	spin_unlock(&cfid->fid_lock);
 
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 4/7] cifs: serialize initialization and cleanup of cfid
  2025-06-04 10:18 [PATCH 1/7] cifs: Revert "smb: client: Avoid race in open_cached_dir with lease breaks" nspmangalore
  2025-06-04 10:18 ` [PATCH 2/7] cifs: protect cfid accesses with fid_lock nspmangalore
  2025-06-04 10:18 ` [PATCH 3/7] cifs: do not return an invalidated cfid nspmangalore
@ 2025-06-04 10:18 ` nspmangalore
  2025-06-12  3:25   ` Paul Aurich
  2025-06-04 10:18 ` [PATCH 5/7] cifs: update the lock ordering comments with new mutex nspmangalore
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 18+ messages in thread
From: nspmangalore @ 2025-06-04 10:18 UTC (permalink / raw)
  To: linux-cifs, smfrench, bharathsm.hsk, meetakshisetiyaoss, pc, paul,
	henrique.carvalho, ematsumiya
  Cc: Shyam Prasad N

From: Shyam Prasad N <sprasad@microsoft.com>

Today we can have multiple processes calling open_cached_dir
and other workers freeing the cached dir all in parallel.
Although small sections of this code is locked to protect
individual fields, there can be races between these threads
which can be hard to debug.

This patch serializes all initialization and cleanup of
the cfid struct and the associated resources: dentry and
the server handle.

Signed-off-by: Shyam Prasad N <sprasad@microsoft.com>
---
 fs/smb/client/cached_dir.c | 16 ++++++++++++++++
 fs/smb/client/cached_dir.h |  1 +
 2 files changed, 17 insertions(+)

diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c
index 94538f52dfc8..2746d693d80a 100644
--- a/fs/smb/client/cached_dir.c
+++ b/fs/smb/client/cached_dir.c
@@ -198,6 +198,12 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
 		return -ENOENT;
 	}
 
+	/*
+	 * the following is a critical section. We need to make sure that the
+	 * callers are serialized per-cfid
+	 */
+	mutex_lock(&cfid->cfid_mutex);
+
 	/*
 	 * check again that the cfid is valid (with mutex held this time).
 	 * Return cached fid if it is valid (has a lease and has a time).
@@ -208,11 +214,13 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
 	spin_lock(&cfid->fid_lock);
 	if (cfid->has_lease && cfid->time) {
 		spin_unlock(&cfid->fid_lock);
+		mutex_unlock(&cfid->cfid_mutex);
 		*ret_cfid = cfid;
 		kfree(utf16_path);
 		return 0;
 	} else if (!cfid->has_lease) {
 		spin_unlock(&cfid->fid_lock);
+		mutex_unlock(&cfid->cfid_mutex);
 		/* drop the ref that we have */
 		kref_put(&cfid->refcount, smb2_close_cached_fid);
 		kfree(utf16_path);
@@ -229,6 +237,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
 	 */
 	npath = path_no_prefix(cifs_sb, path);
 	if (IS_ERR(npath)) {
+		mutex_unlock(&cfid->cfid_mutex);
 		rc = PTR_ERR(npath);
 		goto out;
 	}
@@ -389,6 +398,8 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
 		*ret_cfid = cfid;
 		atomic_inc(&tcon->num_remote_opens);
 	}
+	mutex_unlock(&cfid->cfid_mutex);
+
 	kfree(utf16_path);
 
 	if (is_replayable_error(rc) &&
@@ -432,6 +443,9 @@ smb2_close_cached_fid(struct kref *ref)
 					       refcount);
 	int rc;
 
+	/* make sure not to race with server open */
+	mutex_lock(&cfid->cfid_mutex);
+
 	spin_lock(&cfid->cfids->cfid_list_lock);
 	if (cfid->on_list) {
 		list_del(&cfid->entry);
@@ -452,6 +466,7 @@ smb2_close_cached_fid(struct kref *ref)
 		if (rc) /* should we retry on -EBUSY or -EAGAIN? */
 			cifs_dbg(VFS, "close cached dir rc %d\n", rc);
 	}
+	mutex_unlock(&cfid->cfid_mutex);
 
 	free_cached_dir(cfid);
 }
@@ -666,6 +681,7 @@ static struct cached_fid *init_cached_dir(const char *path)
 	INIT_LIST_HEAD(&cfid->entry);
 	INIT_LIST_HEAD(&cfid->dirents.entries);
 	mutex_init(&cfid->dirents.de_mutex);
+	mutex_init(&cfid->cfid_mutex);
 	spin_lock_init(&cfid->fid_lock);
 	kref_init(&cfid->refcount);
 	return cfid;
diff --git a/fs/smb/client/cached_dir.h b/fs/smb/client/cached_dir.h
index 1dfe79d947a6..93c936af2253 100644
--- a/fs/smb/client/cached_dir.h
+++ b/fs/smb/client/cached_dir.h
@@ -42,6 +42,7 @@ struct cached_fid {
 	struct kref refcount;
 	struct cifs_fid fid;
 	spinlock_t fid_lock;
+	struct mutex cfid_mutex;
 	struct cifs_tcon *tcon;
 	struct dentry *dentry;
 	struct work_struct put_work;
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 5/7] cifs: update the lock ordering comments with new mutex
  2025-06-04 10:18 [PATCH 1/7] cifs: Revert "smb: client: Avoid race in open_cached_dir with lease breaks" nspmangalore
                   ` (2 preceding siblings ...)
  2025-06-04 10:18 ` [PATCH 4/7] cifs: serialize initialization and cleanup of cfid nspmangalore
@ 2025-06-04 10:18 ` nspmangalore
  2025-06-04 10:18 ` [PATCH 6/7] cifs: tc_count updates should be done with tc_lock nspmangalore
  2025-06-04 10:18 ` [PATCH 7/7] cifs: add new field to track the last access time of cfid nspmangalore
  5 siblings, 0 replies; 18+ messages in thread
From: nspmangalore @ 2025-06-04 10:18 UTC (permalink / raw)
  To: linux-cifs, smfrench, bharathsm.hsk, meetakshisetiyaoss, pc, paul,
	henrique.carvalho, ematsumiya
  Cc: Shyam Prasad N

From: Shyam Prasad N <sprasad@microsoft.com>

The lock ordering rules listed as comments in cifsglob.h were
missing some lock details and the newly introduced cfid_mutex.

Updated those notes in this commit.

Signed-off-by: Shyam Prasad N <sprasad@microsoft.com>
---
 fs/smb/client/cifsglob.h | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index 0c80ca352f3f..a56d52951b00 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -1989,13 +1989,14 @@ require use of the stronger protocol */
  * TCP_Server_Info->		TCP_Server_Info			cifs_get_tcp_session
  * reconnect_mutex
  * TCP_Server_Info->srv_mutex	TCP_Server_Info			cifs_get_tcp_session
- * cifs_ses->session_mutex		cifs_ses		sesInfoAlloc
- *				cifs_tcon
+ * cifs_ses->session_mutex	cifs_ses			sesInfoAlloc
+ * cached_fid->cfid_mutex	cached_fid			init_cached_dir
  * cifs_tcon->open_file_lock	cifs_tcon->openFileList		tconInfoAlloc
  *				cifs_tcon->pending_opens
  * cifs_tcon->stat_lock		cifs_tcon->bytes_read		tconInfoAlloc
  *				cifs_tcon->bytes_written
  * cifs_tcp_ses_lock		cifs_tcp_ses_list		sesInfoAlloc
+ * cached_fids->cfid_list_lock	cifs_tcon->cfids->entries	init_cached_dirs
  * GlobalMid_Lock		GlobalMaxActiveXid		init_cifs
  *				GlobalCurrentXid
  *				GlobalTotalActiveXid
@@ -2009,21 +2010,24 @@ require use of the stronger protocol */
  *				->oplock_credits
  *				->reconnect_instance
  * cifs_ses->ses_lock		(anything that is not protected by another lock and can change)
+ *								sesInfoAlloc
  * cifs_ses->iface_lock		cifs_ses->iface_list		sesInfoAlloc
  *				->iface_count
  *				->iface_last_update
- * cifs_ses->chan_lock		cifs_ses->chans
+ * cifs_ses->chan_lock		cifs_ses->chans			sesInfoAlloc
  *				->chans_need_reconnect
  *				->chans_in_reconnect
  * cifs_tcon->tc_lock		(anything that is not protected by another lock and can change)
+ *								tcon_info_alloc
  * inode->i_rwsem, taken by fs/netfs/locking.c e.g. should be taken before cifsInodeInfo locks
  * cifsInodeInfo->open_file_lock	cifsInodeInfo->openFileList	cifs_alloc_inode
  * cifsInodeInfo->writers_lock	cifsInodeInfo->writers		cifsInodeInfo_alloc
  * cifsInodeInfo->lock_sem	cifsInodeInfo->llist		cifs_init_once
  *				->can_cache_brlcks
  * cifsInodeInfo->deferred_lock	cifsInodeInfo->deferred_closes	cifsInodeInfo_alloc
- * cached_fids->cfid_list_lock	cifs_tcon->cfids->entries	 init_cached_dirs
- * cifsFileInfo->fh_mutex		cifsFileInfo			cifs_new_fileinfo
+ * cached_fid->fid_lock		(anything that is not protected by another lock and can change)
+ *								init_cached_dir
+ * cifsFileInfo->fh_mutex	cifsFileInfo			cifs_new_fileinfo
  * cifsFileInfo->file_info_lock	cifsFileInfo->count		cifs_new_fileinfo
  *				->invalidHandle			initiate_cifs_search
  *				->oplock_break_cancelled
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 6/7] cifs: tc_count updates should be done with tc_lock
  2025-06-04 10:18 [PATCH 1/7] cifs: Revert "smb: client: Avoid race in open_cached_dir with lease breaks" nspmangalore
                   ` (3 preceding siblings ...)
  2025-06-04 10:18 ` [PATCH 5/7] cifs: update the lock ordering comments with new mutex nspmangalore
@ 2025-06-04 10:18 ` nspmangalore
  2025-06-12  3:24   ` Paul Aurich
  2025-06-04 10:18 ` [PATCH 7/7] cifs: add new field to track the last access time of cfid nspmangalore
  5 siblings, 1 reply; 18+ messages in thread
From: nspmangalore @ 2025-06-04 10:18 UTC (permalink / raw)
  To: linux-cifs, smfrench, bharathsm.hsk, meetakshisetiyaoss, pc, paul,
	henrique.carvalho, ematsumiya
  Cc: Shyam Prasad N

From: Shyam Prasad N <sprasad@microsoft.com>

We had problems with deadlocks using the cifs_tcp_ses_lock for
protecting a lot of structures. So we broke it down into smaller
spinlocks. cifs_tcon struct fields are protected by tc_lock now.
Hence we should stick to using that.

Fixes: 3fa640d035e5 ("smb: During unmount, ensure all cached dir instances drop their dentry")
Signed-off-by: Shyam Prasad N <sprasad@microsoft.com>
---
 fs/smb/client/cached_dir.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c
index 2746d693d80a..4abf5bbd8baf 100644
--- a/fs/smb/client/cached_dir.c
+++ b/fs/smb/client/cached_dir.c
@@ -650,10 +650,12 @@ int cached_dir_lease_break(struct cifs_tcon *tcon, __u8 lease_key[16])
 			spin_unlock(&cfid->fid_lock);
 			cfids->num_entries--;
 
+			spin_lock(&cfid->tcon->tc_lock);
 			++tcon->tc_count;
 			trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count,
 					    netfs_trace_tcon_ref_get_cached_lease_break);
 			queue_work(cfid_put_wq, &cfid->put_work);
+			spin_unlock(&cfid->tcon->tc_lock);
 			spin_unlock(&cfids->cfid_list_lock);
 			return true;
 		}
@@ -767,11 +769,11 @@ static void cfids_laundromat_worker(struct work_struct *work)
 		dput(dentry);
 
 		if (cfid->is_open) {
-			spin_lock(&cifs_tcp_ses_lock);
+			spin_lock(&cfid->tcon->tc_lock);
 			++cfid->tcon->tc_count;
 			trace_smb3_tcon_ref(cfid->tcon->debug_id, cfid->tcon->tc_count,
 					    netfs_trace_tcon_ref_get_cached_laundromat);
-			spin_unlock(&cifs_tcp_ses_lock);
+			spin_unlock(&cfid->tcon->tc_lock);
 			queue_work(serverclose_wq, &cfid->close_work);
 		} else
 			/*
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 7/7] cifs: add new field to track the last access time of cfid
  2025-06-04 10:18 [PATCH 1/7] cifs: Revert "smb: client: Avoid race in open_cached_dir with lease breaks" nspmangalore
                   ` (4 preceding siblings ...)
  2025-06-04 10:18 ` [PATCH 6/7] cifs: tc_count updates should be done with tc_lock nspmangalore
@ 2025-06-04 10:18 ` nspmangalore
  2025-07-24 23:05   ` Steve French
  5 siblings, 1 reply; 18+ messages in thread
From: nspmangalore @ 2025-06-04 10:18 UTC (permalink / raw)
  To: linux-cifs, smfrench, bharathsm.hsk, meetakshisetiyaoss, pc, paul,
	henrique.carvalho, ematsumiya
  Cc: Shyam Prasad N

From: Shyam Prasad N <sprasad@microsoft.com>

The handlecache code today tracks the time at which dir lease was
acquired and the laundromat thread uses that to check for old
entries to cleanup.

However, if a directory is actively accessed, it should not
be chosen to expire first.

This change adds a new last_access_time field to cfid and
uses that to decide expiry of the cfid.

Signed-off-by: Shyam Prasad N <sprasad@microsoft.com>
---
 fs/smb/client/cached_dir.c | 6 ++++--
 fs/smb/client/cached_dir.h | 1 +
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c
index 4abf5bbd8baf..d432a40f902e 100644
--- a/fs/smb/client/cached_dir.c
+++ b/fs/smb/client/cached_dir.c
@@ -213,6 +213,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
 	 */
 	spin_lock(&cfid->fid_lock);
 	if (cfid->has_lease && cfid->time) {
+		cfid->last_access_time = jiffies;
 		spin_unlock(&cfid->fid_lock);
 		mutex_unlock(&cfid->cfid_mutex);
 		*ret_cfid = cfid;
@@ -365,6 +366,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
 	cfid->tcon = tcon;
 	cfid->is_open = true;
 	cfid->time = jiffies;
+	cfid->last_access_time = jiffies;
 	spin_unlock(&cfid->fid_lock);
 
 oshr_free:
@@ -741,8 +743,8 @@ static void cfids_laundromat_worker(struct work_struct *work)
 	spin_lock(&cfids->cfid_list_lock);
 	list_for_each_entry_safe(cfid, q, &cfids->entries, entry) {
 		spin_lock(&cfid->fid_lock);
-		if (cfid->time &&
-		    time_after(jiffies, cfid->time + HZ * dir_cache_timeout)) {
+		if (cfid->last_access_time &&
+		    time_after(jiffies, cfid->last_access_time + HZ * dir_cache_timeout)) {
 			cfid->on_list = false;
 			list_move(&cfid->entry, &entry);
 			cfids->num_entries--;
diff --git a/fs/smb/client/cached_dir.h b/fs/smb/client/cached_dir.h
index 93c936af2253..6d4b9413aa67 100644
--- a/fs/smb/client/cached_dir.h
+++ b/fs/smb/client/cached_dir.h
@@ -39,6 +39,7 @@ struct cached_fid {
 	bool on_list:1;
 	bool file_all_info_is_valid:1;
 	unsigned long time; /* jiffies of when lease was taken */
+	unsigned long last_access_time; /* jiffies of when last accessed */
 	struct kref refcount;
 	struct cifs_fid fid;
 	spinlock_t fid_lock;
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH 6/7] cifs: tc_count updates should be done with tc_lock
  2025-06-04 10:18 ` [PATCH 6/7] cifs: tc_count updates should be done with tc_lock nspmangalore
@ 2025-06-12  3:24   ` Paul Aurich
  0 siblings, 0 replies; 18+ messages in thread
From: Paul Aurich @ 2025-06-12  3:24 UTC (permalink / raw)
  To: nspmangalore
  Cc: linux-cifs, smfrench, bharathsm.hsk, meetakshisetiyaoss, pc,
	henrique.carvalho, ematsumiya, Shyam Prasad N

On 2025-06-04 15:48:15 +0530, nspmangalore@gmail.com wrote:
>From: Shyam Prasad N <sprasad@microsoft.com>
>
>We had problems with deadlocks using the cifs_tcp_ses_lock for
>protecting a lot of structures. So we broke it down into smaller
>spinlocks. cifs_tcon struct fields are protected by tc_lock now.
>Hence we should stick to using that.

Is it really safe to adjust this to tc_lock?

There are a number of other points in the smb client that (AFAICT) only hold 
cifs_tcp_ses_lock while adjusting the tc_count (which is why I used that 
here).  See smb2_handle_cancelled_close, smb2_get_dfs_refer, 
smb2_reconnect_server, smb2_find_smb_tcon.

~Paul

>Fixes: 3fa640d035e5 ("smb: During unmount, ensure all cached dir instances drop their dentry")
>Signed-off-by: Shyam Prasad N <sprasad@microsoft.com>
>---
> fs/smb/client/cached_dir.c | 6 ++++--
> 1 file changed, 4 insertions(+), 2 deletions(-)
>
>diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c
>index 2746d693d80a..4abf5bbd8baf 100644
>--- a/fs/smb/client/cached_dir.c
>+++ b/fs/smb/client/cached_dir.c
>@@ -650,10 +650,12 @@ int cached_dir_lease_break(struct cifs_tcon *tcon, __u8 lease_key[16])
> 			spin_unlock(&cfid->fid_lock);
> 			cfids->num_entries--;
>
>+			spin_lock(&cfid->tcon->tc_lock);
> 			++tcon->tc_count;
> 			trace_smb3_tcon_ref(tcon->debug_id, tcon->tc_count,
> 					    netfs_trace_tcon_ref_get_cached_lease_break);
> 			queue_work(cfid_put_wq, &cfid->put_work);
>+			spin_unlock(&cfid->tcon->tc_lock);
> 			spin_unlock(&cfids->cfid_list_lock);
> 			return true;
> 		}
>@@ -767,11 +769,11 @@ static void cfids_laundromat_worker(struct work_struct *work)
> 		dput(dentry);
>
> 		if (cfid->is_open) {
>-			spin_lock(&cifs_tcp_ses_lock);
>+			spin_lock(&cfid->tcon->tc_lock);
> 			++cfid->tcon->tc_count;
> 			trace_smb3_tcon_ref(cfid->tcon->debug_id, cfid->tcon->tc_count,
> 					    netfs_trace_tcon_ref_get_cached_laundromat);
>-			spin_unlock(&cifs_tcp_ses_lock);
>+			spin_unlock(&cfid->tcon->tc_lock);
> 			queue_work(serverclose_wq, &cfid->close_work);
> 		} else
> 			/*
>-- 
>2.43.0
>


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/7] cifs: protect cfid accesses with fid_lock
  2025-06-04 10:18 ` [PATCH 2/7] cifs: protect cfid accesses with fid_lock nspmangalore
@ 2025-06-12  3:24   ` Paul Aurich
  2025-06-12 13:05     ` Shyam Prasad N
  0 siblings, 1 reply; 18+ messages in thread
From: Paul Aurich @ 2025-06-12  3:24 UTC (permalink / raw)
  To: nspmangalore
  Cc: linux-cifs, smfrench, bharathsm.hsk, meetakshisetiyaoss, pc,
	henrique.carvalho, ematsumiya, Shyam Prasad N

On 2025-06-04 15:48:11 +0530, nspmangalore@gmail.com wrote:
>From: Shyam Prasad N <sprasad@microsoft.com>
>
>There are several accesses to cfid structure today without
>locking fid_lock. This can lead to race conditions that are
>hard to debug.
>
>With this change, I'm trying to make sure that accesses to cfid
>struct members happen with fid_lock held.
>
>Signed-off-by: Shyam Prasad N <sprasad@microsoft.com>
>---
> fs/smb/client/cached_dir.c | 86 ++++++++++++++++++++++----------------
> 1 file changed, 49 insertions(+), 37 deletions(-)
>
>diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c
>index e6fc92667d41..e62d3bebff9a 100644
>--- a/fs/smb/client/cached_dir.c
>+++ b/fs/smb/client/cached_dir.c
>@@ -31,6 +31,7 @@ static struct cached_fid *find_or_create_cached_dir(struct cached_fids *cfids,
>
> 	spin_lock(&cfids->cfid_list_lock);
> 	list_for_each_entry(cfid, &cfids->entries, entry) {
>+		spin_lock(&cfid->fid_lock);
> 		if (!strcmp(cfid->path, path)) {
> 			/*
> 			 * If it doesn't have a lease it is either not yet
>@@ -38,13 +39,16 @@ static struct cached_fid *find_or_create_cached_dir(struct cached_fids *cfids,
> 			 * being deleted due to a lease break.
> 			 */
> 			if (!cfid->time || !cfid->has_lease) {
>+				spin_unlock(&cfid->fid_lock);
> 				spin_unlock(&cfids->cfid_list_lock);
> 				return NULL;
> 			}
> 			kref_get(&cfid->refcount);
>+			spin_unlock(&cfid->fid_lock);
> 			spin_unlock(&cfids->cfid_list_lock);
> 			return cfid;
> 		}
>+		spin_unlock(&cfid->fid_lock);
> 	}
> 	if (lookup_only) {
> 		spin_unlock(&cfids->cfid_list_lock);
>@@ -193,19 +197,20 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
> 		kfree(utf16_path);
> 		return -ENOENT;
> 	}
>+
> 	/*
> 	 * Return cached fid if it is valid (has a lease and has a time).
> 	 * Otherwise, it is either a new entry or laundromat worker removed it
> 	 * from @cfids->entries.  Caller will put last reference if the latter.
> 	 */
>-	spin_lock(&cfids->cfid_list_lock);
>+	spin_lock(&cfid->fid_lock);
> 	if (cfid->has_lease && cfid->time) {
>-		spin_unlock(&cfids->cfid_list_lock);
>+		spin_unlock(&cfid->fid_lock);
> 		*ret_cfid = cfid;
> 		kfree(utf16_path);
> 		return 0;
> 	}
>-	spin_unlock(&cfids->cfid_list_lock);
>+	spin_unlock(&cfid->fid_lock);
>
> 	/*
> 	 * Skip any prefix paths in @path as lookup_noperm_positive_unlocked() ends up
>@@ -220,17 +225,6 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
> 		goto out;
> 	}
>
>-	if (!npath[0]) {
>-		dentry = dget(cifs_sb->root);
>-	} else {
>-		dentry = path_to_dentry(cifs_sb, npath);
>-		if (IS_ERR(dentry)) {
>-			rc = -ENOENT;
>-			goto out;
>-		}
>-	}
>-	cfid->dentry = dentry;
>-	cfid->tcon = tcon;

I think moving this down below to after the "At this point the directory 
handle is fully cached" comment is going to regress c353ee4fb119 ("smb: 
Initialize cfid->tcon before performing network ops").

> 	/*
> 	 * We do not hold the lock for the open because in case
>@@ -302,9 +296,6 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
> 		}
> 		goto oshr_free;
> 	}
>-	cfid->is_open = true;

Moving this down below is going to regress part of 66d45ca1350a ("cifs: Check 
the lease context if we actually got a lease"), I think. If parsing the lease 
fails, the cfid is disposed, but since `is_open` is false, the code won't call 
SMB2_close.

>-
>-	spin_lock(&cfids->cfid_list_lock);
>
> 	o_rsp = (struct smb2_create_rsp *)rsp_iov[0].iov_base;
> 	oparms.fid->persistent_fid = o_rsp->PersistentFileId;
>@@ -315,7 +306,6 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
>
>
> 	if (o_rsp->OplockLevel != SMB2_OPLOCK_LEVEL_LEASE) {
>-		spin_unlock(&cfids->cfid_list_lock);
> 		rc = -EINVAL;
> 		goto oshr_free;
> 	}
>@@ -324,21 +314,15 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
> 				 &oparms.fid->epoch,
> 				 oparms.fid->lease_key,
> 				 &oplock, NULL, NULL);
>-	if (rc) {
>-		spin_unlock(&cfids->cfid_list_lock);
>+	if (rc)
> 		goto oshr_free;
>-	}
>
> 	rc = -EINVAL;
>-	if (!(oplock & SMB2_LEASE_READ_CACHING_HE)) {
>-		spin_unlock(&cfids->cfid_list_lock);
>+	if (!(oplock & SMB2_LEASE_READ_CACHING_HE))
> 		goto oshr_free;
>-	}
> 	qi_rsp = (struct smb2_query_info_rsp *)rsp_iov[1].iov_base;
>-	if (le32_to_cpu(qi_rsp->OutputBufferLength) < sizeof(struct smb2_file_all_info)) {
>-		spin_unlock(&cfids->cfid_list_lock);
>+	if (le32_to_cpu(qi_rsp->OutputBufferLength) < sizeof(struct smb2_file_all_info))
> 		goto oshr_free;
>-	}
> 	if (!smb2_validate_and_copy_iov(
> 				le16_to_cpu(qi_rsp->OutputBufferOffset),
> 				sizeof(struct smb2_file_all_info),
>@@ -346,10 +330,25 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
> 				(char *)&cfid->file_all_info))
> 		cfid->file_all_info_is_valid = true;
>
>-	cfid->time = jiffies;
>-	spin_unlock(&cfids->cfid_list_lock);
> 	/* At this point the directory handle is fully cached */
> 	rc = 0;
>+	if (!cfid->dentry) {
>+		if (!npath[0]) {
>+			dentry = dget(cifs_sb->root);
>+		} else {
>+			dentry = path_to_dentry(cifs_sb, npath);
>+			if (IS_ERR(dentry)) {
>+				rc = -ENOENT;
>+				goto out;
>+			}
>+		}
>+	}
>+	spin_lock(&cfid->fid_lock);
>+	cfid->dentry = dentry;
>+	cfid->tcon = tcon;
>+	cfid->is_open = true;
>+	cfid->time = jiffies;
>+	spin_unlock(&cfid->fid_lock);
>
> oshr_free:
> 	SMB2_open_free(&rqst[0]);
>@@ -364,6 +363,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
> 			cfid->on_list = false;
> 			cfids->num_entries--;
> 		}
>+		spin_lock(&cfid->fid_lock);
> 		if (cfid->has_lease) {
> 			/*
> 			 * We are guaranteed to have two references at this
>@@ -373,6 +373,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
> 			cfid->has_lease = false;
> 			kref_put(&cfid->refcount, smb2_close_cached_fid);
> 		}
>+		spin_unlock(&cfid->fid_lock);
> 		spin_unlock(&cfids->cfid_list_lock);
>
> 		kref_put(&cfid->refcount, smb2_close_cached_fid);
>@@ -401,13 +402,16 @@ int open_cached_dir_by_dentry(struct cifs_tcon *tcon,
>
> 	spin_lock(&cfids->cfid_list_lock);
> 	list_for_each_entry(cfid, &cfids->entries, entry) {
>+		spin_lock(&cfid->fid_lock);
> 		if (dentry && cfid->dentry == dentry) {
> 			cifs_dbg(FYI, "found a cached file handle by dentry\n");
> 			kref_get(&cfid->refcount);
>+			spin_unlock(&cfid->fid_lock);
> 			*ret_cfid = cfid;
> 			spin_unlock(&cfids->cfid_list_lock);
> 			return 0;
> 		}
>+		spin_unlock(&cfid->fid_lock);
> 	}
> 	spin_unlock(&cfids->cfid_list_lock);
> 	return -ENOENT;
>@@ -428,8 +432,11 @@ smb2_close_cached_fid(struct kref *ref)
> 	}
> 	spin_unlock(&cfid->cfids->cfid_list_lock);
>
>-	dput(cfid->dentry);
>-	cfid->dentry = NULL;
>+	/* no locking necessary as we're the last user of this cfid */
>+	if (cfid->dentry) {
>+		dput(cfid->dentry);
>+		cfid->dentry = NULL;
>+	}
>
> 	if (cfid->is_open) {
> 		rc = SMB2_close(0, cfid->tcon, cfid->fid.persistent_fid,
>@@ -452,12 +459,13 @@ void drop_cached_dir_by_name(const unsigned int xid, struct cifs_tcon *tcon,
> 		cifs_dbg(FYI, "no cached dir found for rmdir(%s)\n", name);
> 		return;
> 	}
>-	spin_lock(&cfid->cfids->cfid_list_lock);
>+	spin_lock(&cfid->fid_lock);
> 	if (cfid->has_lease) {
>+		/* mark as invalid */
> 		cfid->has_lease = false;
> 		kref_put(&cfid->refcount, smb2_close_cached_fid);
> 	}
>-	spin_unlock(&cfid->cfids->cfid_list_lock);
>+	spin_unlock(&cfid->fid_lock);
> 	close_cached_dir(cfid);
> }
>
>@@ -539,6 +547,7 @@ void invalidate_all_cached_dirs(struct cifs_tcon *tcon)
> 		cfids->num_entries--;
> 		cfid->is_open = false;
> 		cfid->on_list = false;
>+		spin_lock(&cfid->fid_lock);
> 		if (cfid->has_lease) {
> 			/*
> 			 * The lease was never cancelled from the server,
>@@ -547,6 +556,7 @@ void invalidate_all_cached_dirs(struct cifs_tcon *tcon)
> 			cfid->has_lease = false;
> 		} else
> 			kref_get(&cfid->refcount);
>+		spin_unlock(&cfid->fid_lock);
> 	}
> 	/*
> 	 * Queue dropping of the dentries once locks have been dropped
>@@ -601,6 +611,7 @@ int cached_dir_lease_break(struct cifs_tcon *tcon, __u8 lease_key[16])
>
> 	spin_lock(&cfids->cfid_list_lock);
> 	list_for_each_entry(cfid, &cfids->entries, entry) {
>+		spin_lock(&cfid->fid_lock);
> 		if (cfid->has_lease &&
> 		    !memcmp(lease_key,
> 			    cfid->fid.lease_key,
>@@ -613,6 +624,7 @@ int cached_dir_lease_break(struct cifs_tcon *tcon, __u8 lease_key[16])
> 			 */
> 			list_del(&cfid->entry);
> 			cfid->on_list = false;
>+			spin_unlock(&cfid->fid_lock);
> 			cfids->num_entries--;
>
> 			++tcon->tc_count;
>@@ -622,6 +634,7 @@ int cached_dir_lease_break(struct cifs_tcon *tcon, __u8 lease_key[16])
> 			spin_unlock(&cfids->cfid_list_lock);
> 			return true;
> 		}
>+		spin_unlock(&cfid->fid_lock);
> 	}
> 	spin_unlock(&cfids->cfid_list_lock);
> 	return false;
>@@ -657,9 +670,6 @@ static void free_cached_dir(struct cached_fid *cfid)
> 	WARN_ON(work_pending(&cfid->close_work));
> 	WARN_ON(work_pending(&cfid->put_work));
>
>-	dput(cfid->dentry);
>-	cfid->dentry = NULL;
>-
> 	/*
> 	 * Delete all cached dirent names
> 	 */
>@@ -704,6 +714,7 @@ static void cfids_laundromat_worker(struct work_struct *work)
>
> 	spin_lock(&cfids->cfid_list_lock);
> 	list_for_each_entry_safe(cfid, q, &cfids->entries, entry) {
>+		spin_lock(&cfid->fid_lock);
> 		if (cfid->time &&
> 		    time_after(jiffies, cfid->time + HZ * dir_cache_timeout)) {
> 			cfid->on_list = false;
>@@ -718,6 +729,7 @@ static void cfids_laundromat_worker(struct work_struct *work)
> 			} else
> 				kref_get(&cfid->refcount);
> 		}
>+		spin_unlock(&cfid->fid_lock);
> 	}
> 	spin_unlock(&cfids->cfid_list_lock);
>
>@@ -728,8 +740,8 @@ static void cfids_laundromat_worker(struct work_struct *work)
> 		dentry = cfid->dentry;
> 		cfid->dentry = NULL;
> 		spin_unlock(&cfid->fid_lock);
>-
> 		dput(dentry);
>+
> 		if (cfid->is_open) {
> 			spin_lock(&cifs_tcp_ses_lock);
> 			++cfid->tcon->tc_count;
>-- 
>2.43.0

~Paul


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 4/7] cifs: serialize initialization and cleanup of cfid
  2025-06-04 10:18 ` [PATCH 4/7] cifs: serialize initialization and cleanup of cfid nspmangalore
@ 2025-06-12  3:25   ` Paul Aurich
  2025-06-12  9:37     ` Shyam Prasad N
  0 siblings, 1 reply; 18+ messages in thread
From: Paul Aurich @ 2025-06-12  3:25 UTC (permalink / raw)
  To: nspmangalore
  Cc: linux-cifs, smfrench, bharathsm.hsk, meetakshisetiyaoss, pc,
	henrique.carvalho, ematsumiya, Shyam Prasad N

On 2025-06-04 15:48:13 +0530, nspmangalore@gmail.com wrote:
>From: Shyam Prasad N <sprasad@microsoft.com>
>
>Today we can have multiple processes calling open_cached_dir
>and other workers freeing the cached dir all in parallel.
>Although small sections of this code is locked to protect
>individual fields, there can be races between these threads
>which can be hard to debug.
>
>This patch serializes all initialization and cleanup of
>the cfid struct and the associated resources: dentry and
>the server handle.
>
>Signed-off-by: Shyam Prasad N <sprasad@microsoft.com>
>---
> fs/smb/client/cached_dir.c | 16 ++++++++++++++++
> fs/smb/client/cached_dir.h |  1 +
> 2 files changed, 17 insertions(+)
>
>diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c
>index 94538f52dfc8..2746d693d80a 100644
>--- a/fs/smb/client/cached_dir.c
>+++ b/fs/smb/client/cached_dir.c
>@@ -198,6 +198,12 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
> 		return -ENOENT;
> 	}
>
>+	/*
>+	 * the following is a critical section. We need to make sure that the
>+	 * callers are serialized per-cfid
>+	 */
>+	mutex_lock(&cfid->cfid_mutex);
>+
> 	/*
> 	 * check again that the cfid is valid (with mutex held this time).
> 	 * Return cached fid if it is valid (has a lease and has a time).
>@@ -208,11 +214,13 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
> 	spin_lock(&cfid->fid_lock);
> 	if (cfid->has_lease && cfid->time) {
> 		spin_unlock(&cfid->fid_lock);
>+		mutex_unlock(&cfid->cfid_mutex);
> 		*ret_cfid = cfid;
> 		kfree(utf16_path);
> 		return 0;
> 	} else if (!cfid->has_lease) {
> 		spin_unlock(&cfid->fid_lock);
>+		mutex_unlock(&cfid->cfid_mutex);
> 		/* drop the ref that we have */
> 		kref_put(&cfid->refcount, smb2_close_cached_fid);
> 		kfree(utf16_path);
>@@ -229,6 +237,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
> 	 */
> 	npath = path_no_prefix(cifs_sb, path);
> 	if (IS_ERR(npath)) {
>+		mutex_unlock(&cfid->cfid_mutex);

Double mutex_unlock?  (It's also unlocked unconditionally in the 'out' path)

> 		rc = PTR_ERR(npath);
> 		goto out;
> 	}
>@@ -389,6 +398,8 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
> 		*ret_cfid = cfid;
> 		atomic_inc(&tcon->num_remote_opens);
> 	}
>+	mutex_unlock(&cfid->cfid_mutex);
>+
> 	kfree(utf16_path);
>
> 	if (is_replayable_error(rc) &&
>@@ -432,6 +443,9 @@ smb2_close_cached_fid(struct kref *ref)
> 					       refcount);
> 	int rc;
>
>+	/* make sure not to race with server open */
>+	mutex_lock(&cfid->cfid_mutex);
>+
> 	spin_lock(&cfid->cfids->cfid_list_lock);
> 	if (cfid->on_list) {
> 		list_del(&cfid->entry);
>@@ -452,6 +466,7 @@ smb2_close_cached_fid(struct kref *ref)
> 		if (rc) /* should we retry on -EBUSY or -EAGAIN? */
> 			cifs_dbg(VFS, "close cached dir rc %d\n", rc);
> 	}
>+	mutex_unlock(&cfid->cfid_mutex);
>
> 	free_cached_dir(cfid);
> }
>@@ -666,6 +681,7 @@ static struct cached_fid *init_cached_dir(const char *path)
> 	INIT_LIST_HEAD(&cfid->entry);
> 	INIT_LIST_HEAD(&cfid->dirents.entries);
> 	mutex_init(&cfid->dirents.de_mutex);
>+	mutex_init(&cfid->cfid_mutex);
> 	spin_lock_init(&cfid->fid_lock);
> 	kref_init(&cfid->refcount);
> 	return cfid;
>diff --git a/fs/smb/client/cached_dir.h b/fs/smb/client/cached_dir.h
>index 1dfe79d947a6..93c936af2253 100644
>--- a/fs/smb/client/cached_dir.h
>+++ b/fs/smb/client/cached_dir.h
>@@ -42,6 +42,7 @@ struct cached_fid {
> 	struct kref refcount;
> 	struct cifs_fid fid;
> 	spinlock_t fid_lock;
>+	struct mutex cfid_mutex;
> 	struct cifs_tcon *tcon;
> 	struct dentry *dentry;
> 	struct work_struct put_work;
>-- 
>2.43.0
>

~Paul


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 4/7] cifs: serialize initialization and cleanup of cfid
  2025-06-12  3:25   ` Paul Aurich
@ 2025-06-12  9:37     ` Shyam Prasad N
  2025-06-12 15:28       ` Steve French
  0 siblings, 1 reply; 18+ messages in thread
From: Shyam Prasad N @ 2025-06-12  9:37 UTC (permalink / raw)
  To: nspmangalore, linux-cifs, smfrench, bharathsm.hsk,
	meetakshisetiyaoss, pc, henrique.carvalho, ematsumiya,
	Shyam Prasad N

On Thu, Jun 12, 2025 at 8:55 AM Paul Aurich <paul@darkrain42.org> wrote:
>
> On 2025-06-04 15:48:13 +0530, nspmangalore@gmail.com wrote:
> >From: Shyam Prasad N <sprasad@microsoft.com>
> >
> >Today we can have multiple processes calling open_cached_dir
> >and other workers freeing the cached dir all in parallel.
> >Although small sections of this code is locked to protect
> >individual fields, there can be races between these threads
> >which can be hard to debug.
> >
> >This patch serializes all initialization and cleanup of
> >the cfid struct and the associated resources: dentry and
> >the server handle.
> >
> >Signed-off-by: Shyam Prasad N <sprasad@microsoft.com>
> >---
> > fs/smb/client/cached_dir.c | 16 ++++++++++++++++
> > fs/smb/client/cached_dir.h |  1 +
> > 2 files changed, 17 insertions(+)
> >
> >diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c
> >index 94538f52dfc8..2746d693d80a 100644
> >--- a/fs/smb/client/cached_dir.c
> >+++ b/fs/smb/client/cached_dir.c
> >@@ -198,6 +198,12 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
> >               return -ENOENT;
> >       }
> >
> >+      /*
> >+       * the following is a critical section. We need to make sure that the
> >+       * callers are serialized per-cfid
> >+       */
> >+      mutex_lock(&cfid->cfid_mutex);
> >+
> >       /*
> >        * check again that the cfid is valid (with mutex held this time).
> >        * Return cached fid if it is valid (has a lease and has a time).
> >@@ -208,11 +214,13 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
> >       spin_lock(&cfid->fid_lock);
> >       if (cfid->has_lease && cfid->time) {
> >               spin_unlock(&cfid->fid_lock);
> >+              mutex_unlock(&cfid->cfid_mutex);
> >               *ret_cfid = cfid;
> >               kfree(utf16_path);
> >               return 0;
> >       } else if (!cfid->has_lease) {
> >               spin_unlock(&cfid->fid_lock);
> >+              mutex_unlock(&cfid->cfid_mutex);
> >               /* drop the ref that we have */
> >               kref_put(&cfid->refcount, smb2_close_cached_fid);
> >               kfree(utf16_path);
> >@@ -229,6 +237,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
> >        */
> >       npath = path_no_prefix(cifs_sb, path);
> >       if (IS_ERR(npath)) {
> >+              mutex_unlock(&cfid->cfid_mutex);
>
> Double mutex_unlock?  (It's also unlocked unconditionally in the 'out' path)

Good catch!
Steve: Do you want me to submit a v2? Or submit another patch to fix this?

>
> >               rc = PTR_ERR(npath);
> >               goto out;
> >       }
> >@@ -389,6 +398,8 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
> >               *ret_cfid = cfid;
> >               atomic_inc(&tcon->num_remote_opens);
> >       }
> >+      mutex_unlock(&cfid->cfid_mutex);
> >+
> >       kfree(utf16_path);
> >
> >       if (is_replayable_error(rc) &&
> >@@ -432,6 +443,9 @@ smb2_close_cached_fid(struct kref *ref)
> >                                              refcount);
> >       int rc;
> >
> >+      /* make sure not to race with server open */
> >+      mutex_lock(&cfid->cfid_mutex);
> >+
> >       spin_lock(&cfid->cfids->cfid_list_lock);
> >       if (cfid->on_list) {
> >               list_del(&cfid->entry);
> >@@ -452,6 +466,7 @@ smb2_close_cached_fid(struct kref *ref)
> >               if (rc) /* should we retry on -EBUSY or -EAGAIN? */
> >                       cifs_dbg(VFS, "close cached dir rc %d\n", rc);
> >       }
> >+      mutex_unlock(&cfid->cfid_mutex);
> >
> >       free_cached_dir(cfid);
> > }
> >@@ -666,6 +681,7 @@ static struct cached_fid *init_cached_dir(const char *path)
> >       INIT_LIST_HEAD(&cfid->entry);
> >       INIT_LIST_HEAD(&cfid->dirents.entries);
> >       mutex_init(&cfid->dirents.de_mutex);
> >+      mutex_init(&cfid->cfid_mutex);
> >       spin_lock_init(&cfid->fid_lock);
> >       kref_init(&cfid->refcount);
> >       return cfid;
> >diff --git a/fs/smb/client/cached_dir.h b/fs/smb/client/cached_dir.h
> >index 1dfe79d947a6..93c936af2253 100644
> >--- a/fs/smb/client/cached_dir.h
> >+++ b/fs/smb/client/cached_dir.h
> >@@ -42,6 +42,7 @@ struct cached_fid {
> >       struct kref refcount;
> >       struct cifs_fid fid;
> >       spinlock_t fid_lock;
> >+      struct mutex cfid_mutex;
> >       struct cifs_tcon *tcon;
> >       struct dentry *dentry;
> >       struct work_struct put_work;
> >--
> >2.43.0
> >
>
> ~Paul
>


-- 
Regards,
Shyam

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/7] cifs: protect cfid accesses with fid_lock
  2025-06-12  3:24   ` Paul Aurich
@ 2025-06-12 13:05     ` Shyam Prasad N
  2025-06-12 16:33       ` Paul Aurich
  0 siblings, 1 reply; 18+ messages in thread
From: Shyam Prasad N @ 2025-06-12 13:05 UTC (permalink / raw)
  To: nspmangalore, linux-cifs, smfrench, bharathsm.hsk,
	meetakshisetiyaoss, pc, henrique.carvalho, ematsumiya,
	Shyam Prasad N

On Thu, Jun 12, 2025 at 8:55 AM Paul Aurich <paul@darkrain42.org> wrote:
>
> On 2025-06-04 15:48:11 +0530, nspmangalore@gmail.com wrote:
> >From: Shyam Prasad N <sprasad@microsoft.com>
> >
> >There are several accesses to cfid structure today without
> >locking fid_lock. This can lead to race conditions that are
> >hard to debug.
> >
> >With this change, I'm trying to make sure that accesses to cfid
> >struct members happen with fid_lock held.
> >
> >Signed-off-by: Shyam Prasad N <sprasad@microsoft.com>
> >---
> > fs/smb/client/cached_dir.c | 86 ++++++++++++++++++++++----------------
> > 1 file changed, 49 insertions(+), 37 deletions(-)
> >
> >diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c
> >index e6fc92667d41..e62d3bebff9a 100644
> >--- a/fs/smb/client/cached_dir.c
> >+++ b/fs/smb/client/cached_dir.c
> >@@ -31,6 +31,7 @@ static struct cached_fid *find_or_create_cached_dir(struct cached_fids *cfids,
> >
> >       spin_lock(&cfids->cfid_list_lock);
> >       list_for_each_entry(cfid, &cfids->entries, entry) {
> >+              spin_lock(&cfid->fid_lock);
> >               if (!strcmp(cfid->path, path)) {
> >                       /*
> >                        * If it doesn't have a lease it is either not yet
> >@@ -38,13 +39,16 @@ static struct cached_fid *find_or_create_cached_dir(struct cached_fids *cfids,
> >                        * being deleted due to a lease break.
> >                        */
> >                       if (!cfid->time || !cfid->has_lease) {
> >+                              spin_unlock(&cfid->fid_lock);
> >                               spin_unlock(&cfids->cfid_list_lock);
> >                               return NULL;
> >                       }
> >                       kref_get(&cfid->refcount);
> >+                      spin_unlock(&cfid->fid_lock);
> >                       spin_unlock(&cfids->cfid_list_lock);
> >                       return cfid;
> >               }
> >+              spin_unlock(&cfid->fid_lock);
> >       }
> >       if (lookup_only) {
> >               spin_unlock(&cfids->cfid_list_lock);
> >@@ -193,19 +197,20 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
> >               kfree(utf16_path);
> >               return -ENOENT;
> >       }
> >+
> >       /*
> >        * Return cached fid if it is valid (has a lease and has a time).
> >        * Otherwise, it is either a new entry or laundromat worker removed it
> >        * from @cfids->entries.  Caller will put last reference if the latter.
> >        */
> >-      spin_lock(&cfids->cfid_list_lock);
> >+      spin_lock(&cfid->fid_lock);
> >       if (cfid->has_lease && cfid->time) {
> >-              spin_unlock(&cfids->cfid_list_lock);
> >+              spin_unlock(&cfid->fid_lock);
> >               *ret_cfid = cfid;
> >               kfree(utf16_path);
> >               return 0;
> >       }
> >-      spin_unlock(&cfids->cfid_list_lock);
> >+      spin_unlock(&cfid->fid_lock);
> >
> >       /*
> >        * Skip any prefix paths in @path as lookup_noperm_positive_unlocked() ends up
> >@@ -220,17 +225,6 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
> >               goto out;
> >       }
> >
> >-      if (!npath[0]) {
> >-              dentry = dget(cifs_sb->root);
> >-      } else {
> >-              dentry = path_to_dentry(cifs_sb, npath);
> >-              if (IS_ERR(dentry)) {
> >-                      rc = -ENOENT;
> >-                      goto out;
> >-              }
> >-      }
> >-      cfid->dentry = dentry;
> >-      cfid->tcon = tcon;
>
> I think moving this down below to after the "At this point the directory
> handle is fully cached" comment is going to regress c353ee4fb119 ("smb:
> Initialize cfid->tcon before performing network ops").

Hi Paul,

Are you referring to the tcon reference?
If so, I think the way to fix that is to get a reference (tc_count++)
on tcon right when we add it to the cfid->tcon, and put the reference
in smb2_close_cached_fid.
I think that should take care of any concerns of race. Let me know if
you disagree.

>
> >       /*
> >        * We do not hold the lock for the open because in case
> >@@ -302,9 +296,6 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
> >               }
> >               goto oshr_free;
> >       }
> >-      cfid->is_open = true;
>
> Moving this down below is going to regress part of 66d45ca1350a ("cifs: Check
> the lease context if we actually got a lease"), I think. If parsing the lease
> fails, the cfid is disposed, but since `is_open` is false, the code won't call
> SMB2_close.

That's a good point. I'll submit a patch to fix this.

>
> >-
> >-      spin_lock(&cfids->cfid_list_lock);
> >
> >       o_rsp = (struct smb2_create_rsp *)rsp_iov[0].iov_base;
> >       oparms.fid->persistent_fid = o_rsp->PersistentFileId;
> >@@ -315,7 +306,6 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
> >
> >
> >       if (o_rsp->OplockLevel != SMB2_OPLOCK_LEVEL_LEASE) {
> >-              spin_unlock(&cfids->cfid_list_lock);
> >               rc = -EINVAL;
> >               goto oshr_free;
> >       }
> >@@ -324,21 +314,15 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
> >                                &oparms.fid->epoch,
> >                                oparms.fid->lease_key,
> >                                &oplock, NULL, NULL);
> >-      if (rc) {
> >-              spin_unlock(&cfids->cfid_list_lock);
> >+      if (rc)
> >               goto oshr_free;
> >-      }
> >
> >       rc = -EINVAL;
> >-      if (!(oplock & SMB2_LEASE_READ_CACHING_HE)) {
> >-              spin_unlock(&cfids->cfid_list_lock);
> >+      if (!(oplock & SMB2_LEASE_READ_CACHING_HE))
> >               goto oshr_free;
> >-      }
> >       qi_rsp = (struct smb2_query_info_rsp *)rsp_iov[1].iov_base;
> >-      if (le32_to_cpu(qi_rsp->OutputBufferLength) < sizeof(struct smb2_file_all_info)) {
> >-              spin_unlock(&cfids->cfid_list_lock);
> >+      if (le32_to_cpu(qi_rsp->OutputBufferLength) < sizeof(struct smb2_file_all_info))
> >               goto oshr_free;
> >-      }
> >       if (!smb2_validate_and_copy_iov(
> >                               le16_to_cpu(qi_rsp->OutputBufferOffset),
> >                               sizeof(struct smb2_file_all_info),
> >@@ -346,10 +330,25 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
> >                               (char *)&cfid->file_all_info))
> >               cfid->file_all_info_is_valid = true;
> >
> >-      cfid->time = jiffies;
> >-      spin_unlock(&cfids->cfid_list_lock);
> >       /* At this point the directory handle is fully cached */
> >       rc = 0;
> >+      if (!cfid->dentry) {
> >+              if (!npath[0]) {
> >+                      dentry = dget(cifs_sb->root);
> >+              } else {
> >+                      dentry = path_to_dentry(cifs_sb, npath);
> >+                      if (IS_ERR(dentry)) {
> >+                              rc = -ENOENT;
> >+                              goto out;
> >+                      }
> >+              }
> >+      }
> >+      spin_lock(&cfid->fid_lock);
> >+      cfid->dentry = dentry;
> >+      cfid->tcon = tcon;
> >+      cfid->is_open = true;
> >+      cfid->time = jiffies;
> >+      spin_unlock(&cfid->fid_lock);
> >
> > oshr_free:
> >       SMB2_open_free(&rqst[0]);
> >@@ -364,6 +363,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
> >                       cfid->on_list = false;
> >                       cfids->num_entries--;
> >               }
> >+              spin_lock(&cfid->fid_lock);
> >               if (cfid->has_lease) {
> >                       /*
> >                        * We are guaranteed to have two references at this
> >@@ -373,6 +373,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
> >                       cfid->has_lease = false;
> >                       kref_put(&cfid->refcount, smb2_close_cached_fid);
> >               }
> >+              spin_unlock(&cfid->fid_lock);
> >               spin_unlock(&cfids->cfid_list_lock);
> >
> >               kref_put(&cfid->refcount, smb2_close_cached_fid);
> >@@ -401,13 +402,16 @@ int open_cached_dir_by_dentry(struct cifs_tcon *tcon,
> >
> >       spin_lock(&cfids->cfid_list_lock);
> >       list_for_each_entry(cfid, &cfids->entries, entry) {
> >+              spin_lock(&cfid->fid_lock);
> >               if (dentry && cfid->dentry == dentry) {
> >                       cifs_dbg(FYI, "found a cached file handle by dentry\n");
> >                       kref_get(&cfid->refcount);
> >+                      spin_unlock(&cfid->fid_lock);
> >                       *ret_cfid = cfid;
> >                       spin_unlock(&cfids->cfid_list_lock);
> >                       return 0;
> >               }
> >+              spin_unlock(&cfid->fid_lock);
> >       }
> >       spin_unlock(&cfids->cfid_list_lock);
> >       return -ENOENT;
> >@@ -428,8 +432,11 @@ smb2_close_cached_fid(struct kref *ref)
> >       }
> >       spin_unlock(&cfid->cfids->cfid_list_lock);
> >
> >-      dput(cfid->dentry);
> >-      cfid->dentry = NULL;
> >+      /* no locking necessary as we're the last user of this cfid */
> >+      if (cfid->dentry) {
> >+              dput(cfid->dentry);
> >+              cfid->dentry = NULL;
> >+      }
> >
> >       if (cfid->is_open) {
> >               rc = SMB2_close(0, cfid->tcon, cfid->fid.persistent_fid,
> >@@ -452,12 +459,13 @@ void drop_cached_dir_by_name(const unsigned int xid, struct cifs_tcon *tcon,
> >               cifs_dbg(FYI, "no cached dir found for rmdir(%s)\n", name);
> >               return;
> >       }
> >-      spin_lock(&cfid->cfids->cfid_list_lock);
> >+      spin_lock(&cfid->fid_lock);
> >       if (cfid->has_lease) {
> >+              /* mark as invalid */
> >               cfid->has_lease = false;
> >               kref_put(&cfid->refcount, smb2_close_cached_fid);
> >       }
> >-      spin_unlock(&cfid->cfids->cfid_list_lock);
> >+      spin_unlock(&cfid->fid_lock);
> >       close_cached_dir(cfid);
> > }
> >
> >@@ -539,6 +547,7 @@ void invalidate_all_cached_dirs(struct cifs_tcon *tcon)
> >               cfids->num_entries--;
> >               cfid->is_open = false;
> >               cfid->on_list = false;
> >+              spin_lock(&cfid->fid_lock);
> >               if (cfid->has_lease) {
> >                       /*
> >                        * The lease was never cancelled from the server,
> >@@ -547,6 +556,7 @@ void invalidate_all_cached_dirs(struct cifs_tcon *tcon)
> >                       cfid->has_lease = false;
> >               } else
> >                       kref_get(&cfid->refcount);
> >+              spin_unlock(&cfid->fid_lock);
> >       }
> >       /*
> >        * Queue dropping of the dentries once locks have been dropped
> >@@ -601,6 +611,7 @@ int cached_dir_lease_break(struct cifs_tcon *tcon, __u8 lease_key[16])
> >
> >       spin_lock(&cfids->cfid_list_lock);
> >       list_for_each_entry(cfid, &cfids->entries, entry) {
> >+              spin_lock(&cfid->fid_lock);
> >               if (cfid->has_lease &&
> >                   !memcmp(lease_key,
> >                           cfid->fid.lease_key,
> >@@ -613,6 +624,7 @@ int cached_dir_lease_break(struct cifs_tcon *tcon, __u8 lease_key[16])
> >                        */
> >                       list_del(&cfid->entry);
> >                       cfid->on_list = false;
> >+                      spin_unlock(&cfid->fid_lock);
> >                       cfids->num_entries--;
> >
> >                       ++tcon->tc_count;
> >@@ -622,6 +634,7 @@ int cached_dir_lease_break(struct cifs_tcon *tcon, __u8 lease_key[16])
> >                       spin_unlock(&cfids->cfid_list_lock);
> >                       return true;
> >               }
> >+              spin_unlock(&cfid->fid_lock);
> >       }
> >       spin_unlock(&cfids->cfid_list_lock);
> >       return false;
> >@@ -657,9 +670,6 @@ static void free_cached_dir(struct cached_fid *cfid)
> >       WARN_ON(work_pending(&cfid->close_work));
> >       WARN_ON(work_pending(&cfid->put_work));
> >
> >-      dput(cfid->dentry);
> >-      cfid->dentry = NULL;
> >-
> >       /*
> >        * Delete all cached dirent names
> >        */
> >@@ -704,6 +714,7 @@ static void cfids_laundromat_worker(struct work_struct *work)
> >
> >       spin_lock(&cfids->cfid_list_lock);
> >       list_for_each_entry_safe(cfid, q, &cfids->entries, entry) {
> >+              spin_lock(&cfid->fid_lock);
> >               if (cfid->time &&
> >                   time_after(jiffies, cfid->time + HZ * dir_cache_timeout)) {
> >                       cfid->on_list = false;
> >@@ -718,6 +729,7 @@ static void cfids_laundromat_worker(struct work_struct *work)
> >                       } else
> >                               kref_get(&cfid->refcount);
> >               }
> >+              spin_unlock(&cfid->fid_lock);
> >       }
> >       spin_unlock(&cfids->cfid_list_lock);
> >
> >@@ -728,8 +740,8 @@ static void cfids_laundromat_worker(struct work_struct *work)
> >               dentry = cfid->dentry;
> >               cfid->dentry = NULL;
> >               spin_unlock(&cfid->fid_lock);
> >-
> >               dput(dentry);
> >+
> >               if (cfid->is_open) {
> >                       spin_lock(&cifs_tcp_ses_lock);
> >                       ++cfid->tcon->tc_count;
> >--
> >2.43.0
>
> ~Paul
>


-- 
Regards,
Shyam

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 4/7] cifs: serialize initialization and cleanup of cfid
  2025-06-12  9:37     ` Shyam Prasad N
@ 2025-06-12 15:28       ` Steve French
  0 siblings, 0 replies; 18+ messages in thread
From: Steve French @ 2025-06-12 15:28 UTC (permalink / raw)
  To: Shyam Prasad N
  Cc: linux-cifs, bharathsm.hsk, meetakshisetiyaoss, pc,
	henrique.carvalho, ematsumiya, Shyam Prasad N

> Steve: Do you want me to submit a v2? Or submit another patch to fix this?

For patches that are not in mainline yet, then better to send a v2, yes

On Thu, Jun 12, 2025 at 4:38 AM Shyam Prasad N <nspmangalore@gmail.com> wrote:
>
> On Thu, Jun 12, 2025 at 8:55 AM Paul Aurich <paul@darkrain42.org> wrote:
> >
> > On 2025-06-04 15:48:13 +0530, nspmangalore@gmail.com wrote:
> > >From: Shyam Prasad N <sprasad@microsoft.com>
> > >
> > >Today we can have multiple processes calling open_cached_dir
> > >and other workers freeing the cached dir all in parallel.
> > >Although small sections of this code is locked to protect
> > >individual fields, there can be races between these threads
> > >which can be hard to debug.
> > >
> > >This patch serializes all initialization and cleanup of
> > >the cfid struct and the associated resources: dentry and
> > >the server handle.
> > >
> > >Signed-off-by: Shyam Prasad N <sprasad@microsoft.com>
> > >---
> > > fs/smb/client/cached_dir.c | 16 ++++++++++++++++
> > > fs/smb/client/cached_dir.h |  1 +
> > > 2 files changed, 17 insertions(+)
> > >
> > >diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c
> > >index 94538f52dfc8..2746d693d80a 100644
> > >--- a/fs/smb/client/cached_dir.c
> > >+++ b/fs/smb/client/cached_dir.c
> > >@@ -198,6 +198,12 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
> > >               return -ENOENT;
> > >       }
> > >
> > >+      /*
> > >+       * the following is a critical section. We need to make sure that the
> > >+       * callers are serialized per-cfid
> > >+       */
> > >+      mutex_lock(&cfid->cfid_mutex);
> > >+
> > >       /*
> > >        * check again that the cfid is valid (with mutex held this time).
> > >        * Return cached fid if it is valid (has a lease and has a time).
> > >@@ -208,11 +214,13 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
> > >       spin_lock(&cfid->fid_lock);
> > >       if (cfid->has_lease && cfid->time) {
> > >               spin_unlock(&cfid->fid_lock);
> > >+              mutex_unlock(&cfid->cfid_mutex);
> > >               *ret_cfid = cfid;
> > >               kfree(utf16_path);
> > >               return 0;
> > >       } else if (!cfid->has_lease) {
> > >               spin_unlock(&cfid->fid_lock);
> > >+              mutex_unlock(&cfid->cfid_mutex);
> > >               /* drop the ref that we have */
> > >               kref_put(&cfid->refcount, smb2_close_cached_fid);
> > >               kfree(utf16_path);
> > >@@ -229,6 +237,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
> > >        */
> > >       npath = path_no_prefix(cifs_sb, path);
> > >       if (IS_ERR(npath)) {
> > >+              mutex_unlock(&cfid->cfid_mutex);
> >
> > Double mutex_unlock?  (It's also unlocked unconditionally in the 'out' path)
>
> Good catch!
> Steve: Do you want me to submit a v2? Or submit another patch to fix this?
>
> >
> > >               rc = PTR_ERR(npath);
> > >               goto out;
> > >       }
> > >@@ -389,6 +398,8 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
> > >               *ret_cfid = cfid;
> > >               atomic_inc(&tcon->num_remote_opens);
> > >       }
> > >+      mutex_unlock(&cfid->cfid_mutex);
> > >+
> > >       kfree(utf16_path);
> > >
> > >       if (is_replayable_error(rc) &&
> > >@@ -432,6 +443,9 @@ smb2_close_cached_fid(struct kref *ref)
> > >                                              refcount);
> > >       int rc;
> > >
> > >+      /* make sure not to race with server open */
> > >+      mutex_lock(&cfid->cfid_mutex);
> > >+
> > >       spin_lock(&cfid->cfids->cfid_list_lock);
> > >       if (cfid->on_list) {
> > >               list_del(&cfid->entry);
> > >@@ -452,6 +466,7 @@ smb2_close_cached_fid(struct kref *ref)
> > >               if (rc) /* should we retry on -EBUSY or -EAGAIN? */
> > >                       cifs_dbg(VFS, "close cached dir rc %d\n", rc);
> > >       }
> > >+      mutex_unlock(&cfid->cfid_mutex);
> > >
> > >       free_cached_dir(cfid);
> > > }
> > >@@ -666,6 +681,7 @@ static struct cached_fid *init_cached_dir(const char *path)
> > >       INIT_LIST_HEAD(&cfid->entry);
> > >       INIT_LIST_HEAD(&cfid->dirents.entries);
> > >       mutex_init(&cfid->dirents.de_mutex);
> > >+      mutex_init(&cfid->cfid_mutex);
> > >       spin_lock_init(&cfid->fid_lock);
> > >       kref_init(&cfid->refcount);
> > >       return cfid;
> > >diff --git a/fs/smb/client/cached_dir.h b/fs/smb/client/cached_dir.h
> > >index 1dfe79d947a6..93c936af2253 100644
> > >--- a/fs/smb/client/cached_dir.h
> > >+++ b/fs/smb/client/cached_dir.h
> > >@@ -42,6 +42,7 @@ struct cached_fid {
> > >       struct kref refcount;
> > >       struct cifs_fid fid;
> > >       spinlock_t fid_lock;
> > >+      struct mutex cfid_mutex;
> > >       struct cifs_tcon *tcon;
> > >       struct dentry *dentry;
> > >       struct work_struct put_work;
> > >--
> > >2.43.0
> > >
> >
> > ~Paul
> >
>
>
> --
> Regards,
> Shyam



-- 
Thanks,

Steve

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/7] cifs: protect cfid accesses with fid_lock
  2025-06-12 13:05     ` Shyam Prasad N
@ 2025-06-12 16:33       ` Paul Aurich
  2025-06-12 17:55         ` Shyam Prasad N
  0 siblings, 1 reply; 18+ messages in thread
From: Paul Aurich @ 2025-06-12 16:33 UTC (permalink / raw)
  To: Shyam Prasad N
  Cc: linux-cifs, smfrench, bharathsm.hsk, meetakshisetiyaoss, pc,
	henrique.carvalho, ematsumiya, Shyam Prasad N

On 2025-06-12 18:35:13 +0530, Shyam Prasad N wrote:
>On Thu, Jun 12, 2025 at 8:55 AM Paul Aurich <paul@darkrain42.org> wrote:
>>
>> On 2025-06-04 15:48:11 +0530, nspmangalore@gmail.com wrote:
>> >From: Shyam Prasad N <sprasad@microsoft.com>
>> >
>> >@@ -220,17 +225,6 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
>> >               goto out;
>> >       }
>> >
>> >-      if (!npath[0]) {
>> >-              dentry = dget(cifs_sb->root);
>> >-      } else {
>> >-              dentry = path_to_dentry(cifs_sb, npath);
>> >-              if (IS_ERR(dentry)) {
>> >-                      rc = -ENOENT;
>> >-                      goto out;
>> >-              }
>> >-      }
>> >-      cfid->dentry = dentry;
>> >-      cfid->tcon = tcon;
>>
>> I think moving this down below to after the "At this point the directory
>> handle is fully cached" comment is going to regress c353ee4fb119 ("smb:
>> Initialize cfid->tcon before performing network ops").
>
>Hi Paul,
>
>Are you referring to the tcon reference?
>If so, I think the way to fix that is to get a reference (tc_count++)
>on tcon right when we add it to the cfid->tcon, and put the reference
>in smb2_close_cached_fid.
>I think that should take care of any concerns of race. Let me know if
>you disagree.

I don't think that is sufficient.  cfid->tcon needs to be initialized prior to 
the network operations in open_cached_dir.  If we receive a lease break from 
the server and the delayed work involved in that runs *before* cfid->tcon is 
initialized, it leaks a ref.  (cached_dir_lease_break has a direct copy of the 
tcon passed as an arg and takes a ref from that, but cfid->tcon is still NULL)

T1                 T2

open_cached_dir
   <open+lease acquisition>
   (gets far enough along that the file is open and lease key initialized)

                    // receives a lease break from the server
                    cached_dir_lease_break
                      // takes a ref on tcon (via arg to function, not cfid->tcon)
                    cached_dir_put_work
                    cached_dir_offload_close
                      // cifs_put_tcon on cfid->tcon, which is still NULL

open_cached_dir continues on at this point

Another reason you need to initialize cfid->tcon earlier is that in the error 
paths, cfid->tcon needs to be valid so that smb2_close_cached_fid can actually 
close it.  Before c353ee4fb119, the tcon was initialized at the same time 
cfid->is_open was set.

>>
>> >       /*
>> >        * We do not hold the lock for the open because in case
>> >@@ -302,9 +296,6 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
>> >               }
>> >               goto oshr_free;
>> >       }
>> >-      cfid->is_open = true;
>>
>> Moving this down below is going to regress part of 66d45ca1350a ("cifs: Check
>> the lease context if we actually got a lease"), I think. If parsing the lease
>> fails, the cfid is disposed, but since `is_open` is false, the code won't call
>> SMB2_close.
>
>That's a good point. I'll submit a patch to fix this.
>

>-- 
>Regards,
>Shyam

-- 
~Paul


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/7] cifs: protect cfid accesses with fid_lock
  2025-06-12 16:33       ` Paul Aurich
@ 2025-06-12 17:55         ` Shyam Prasad N
  0 siblings, 0 replies; 18+ messages in thread
From: Shyam Prasad N @ 2025-06-12 17:55 UTC (permalink / raw)
  To: Shyam Prasad N, linux-cifs, smfrench, bharathsm.hsk,
	meetakshisetiyaoss, pc, henrique.carvalho, ematsumiya,
	Shyam Prasad N

On Thu, Jun 12, 2025 at 10:03 PM Paul Aurich <paul@darkrain42.org> wrote:
>
> On 2025-06-12 18:35:13 +0530, Shyam Prasad N wrote:
> >On Thu, Jun 12, 2025 at 8:55 AM Paul Aurich <paul@darkrain42.org> wrote:
> >>
> >> On 2025-06-04 15:48:11 +0530, nspmangalore@gmail.com wrote:
> >> >From: Shyam Prasad N <sprasad@microsoft.com>
> >> >
> >> >@@ -220,17 +225,6 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
> >> >               goto out;
> >> >       }
> >> >
> >> >-      if (!npath[0]) {
> >> >-              dentry = dget(cifs_sb->root);
> >> >-      } else {
> >> >-              dentry = path_to_dentry(cifs_sb, npath);
> >> >-              if (IS_ERR(dentry)) {
> >> >-                      rc = -ENOENT;
> >> >-                      goto out;
> >> >-              }
> >> >-      }
> >> >-      cfid->dentry = dentry;
> >> >-      cfid->tcon = tcon;
> >>
> >> I think moving this down below to after the "At this point the directory
> >> handle is fully cached" comment is going to regress c353ee4fb119 ("smb:
> >> Initialize cfid->tcon before performing network ops").
> >
> >Hi Paul,
> >
> >Are you referring to the tcon reference?
> >If so, I think the way to fix that is to get a reference (tc_count++)
> >on tcon right when we add it to the cfid->tcon, and put the reference
> >in smb2_close_cached_fid.
> >I think that should take care of any concerns of race. Let me know if
> >you disagree.
>
> I don't think that is sufficient.  cfid->tcon needs to be initialized prior to
> the network operations in open_cached_dir.  If we receive a lease break from
> the server and the delayed work involved in that runs *before* cfid->tcon is
> initialized, it leaks a ref.  (cached_dir_lease_break has a direct copy of the
> tcon passed as an arg and takes a ref from that, but cfid->tcon is still NULL)
>
> T1                 T2
>
> open_cached_dir
>    <open+lease acquisition>
>    (gets far enough along that the file is open and lease key initialized)
>
>                     // receives a lease break from the server
>                     cached_dir_lease_break
>                       // takes a ref on tcon (via arg to function, not cfid->tcon)
>                     cached_dir_put_work
>                     cached_dir_offload_close
>                       // cifs_put_tcon on cfid->tcon, which is still NULL
>
> open_cached_dir continues on at this point
>
> Another reason you need to initialize cfid->tcon earlier is that in the error
> paths, cfid->tcon needs to be valid so that smb2_close_cached_fid can actually
> close it.  Before c353ee4fb119, the tcon was initialized at the same time
> cfid->is_open was set.

Fair points. I'll put it back where it was. Just protect it with the cfid_lock.
Although I'm now wondering if we'll be hit by some weird race by not
using cfid_mutex in lease break handler and laundromat thread too. :)

>
> >>
> >> >       /*
> >> >        * We do not hold the lock for the open because in case
> >> >@@ -302,9 +296,6 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
> >> >               }
> >> >               goto oshr_free;
> >> >       }
> >> >-      cfid->is_open = true;
> >>
> >> Moving this down below is going to regress part of 66d45ca1350a ("cifs: Check
> >> the lease context if we actually got a lease"), I think. If parsing the lease
> >> fails, the cfid is disposed, but since `is_open` is false, the code won't call
> >> SMB2_close.
> >
> >That's a good point. I'll submit a patch to fix this.
> >
>
> >--
> >Regards,
> >Shyam
>
> --
> ~Paul
>


-- 
Regards,
Shyam

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 7/7] cifs: add new field to track the last access time of cfid
  2025-06-04 10:18 ` [PATCH 7/7] cifs: add new field to track the last access time of cfid nspmangalore
@ 2025-07-24 23:05   ` Steve French
  2025-07-25  3:26     ` Steve French
  0 siblings, 1 reply; 18+ messages in thread
From: Steve French @ 2025-07-24 23:05 UTC (permalink / raw)
  To: nspmangalore
  Cc: linux-cifs, bharathsm.hsk, meetakshisetiyaoss, pc, paul,
	henrique.carvalho, ematsumiya, Shyam Prasad N

[-- Attachment #1: Type: text/plain, Size: 3003 bytes --]

Rebased version of patch attached. See attached. Let me know if any
objections.  It solves an obvious important issue (we shouldn't be
timing out directory leases if they are the most recently used ones)


On Wed, Jun 4, 2025 at 5:18 AM <nspmangalore@gmail.com> wrote:
>
> From: Shyam Prasad N <sprasad@microsoft.com>
>
> The handlecache code today tracks the time at which dir lease was
> acquired and the laundromat thread uses that to check for old
> entries to cleanup.
>
> However, if a directory is actively accessed, it should not
> be chosen to expire first.
>
> This change adds a new last_access_time field to cfid and
> uses that to decide expiry of the cfid.
>
> Signed-off-by: Shyam Prasad N <sprasad@microsoft.com>
> ---
>  fs/smb/client/cached_dir.c | 6 ++++--
>  fs/smb/client/cached_dir.h | 1 +
>  2 files changed, 5 insertions(+), 2 deletions(-)
>
> diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c
> index 4abf5bbd8baf..d432a40f902e 100644
> --- a/fs/smb/client/cached_dir.c
> +++ b/fs/smb/client/cached_dir.c
> @@ -213,6 +213,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
>          */
>         spin_lock(&cfid->fid_lock);
>         if (cfid->has_lease && cfid->time) {
> +               cfid->last_access_time = jiffies;
>                 spin_unlock(&cfid->fid_lock);
>                 mutex_unlock(&cfid->cfid_mutex);
>                 *ret_cfid = cfid;
> @@ -365,6 +366,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
>         cfid->tcon = tcon;
>         cfid->is_open = true;
>         cfid->time = jiffies;
> +       cfid->last_access_time = jiffies;
>         spin_unlock(&cfid->fid_lock);
>
>  oshr_free:
> @@ -741,8 +743,8 @@ static void cfids_laundromat_worker(struct work_struct *work)
>         spin_lock(&cfids->cfid_list_lock);
>         list_for_each_entry_safe(cfid, q, &cfids->entries, entry) {
>                 spin_lock(&cfid->fid_lock);
> -               if (cfid->time &&
> -                   time_after(jiffies, cfid->time + HZ * dir_cache_timeout)) {
> +               if (cfid->last_access_time &&
> +                   time_after(jiffies, cfid->last_access_time + HZ * dir_cache_timeout)) {
>                         cfid->on_list = false;
>                         list_move(&cfid->entry, &entry);
>                         cfids->num_entries--;
> diff --git a/fs/smb/client/cached_dir.h b/fs/smb/client/cached_dir.h
> index 93c936af2253..6d4b9413aa67 100644
> --- a/fs/smb/client/cached_dir.h
> +++ b/fs/smb/client/cached_dir.h
> @@ -39,6 +39,7 @@ struct cached_fid {
>         bool on_list:1;
>         bool file_all_info_is_valid:1;
>         unsigned long time; /* jiffies of when lease was taken */
> +       unsigned long last_access_time; /* jiffies of when last accessed */
>         struct kref refcount;
>         struct cifs_fid fid;
>         spinlock_t fid_lock;
> --
> 2.43.0
>


-- 
Thanks,

Steve

[-- Attachment #2: rebased-add-new-field-to-track-last-access-time-of-cfid.diff --]
[-- Type: text/x-patch, Size: 1725 bytes --]

diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c
index 116470b1dfea..b69daeb1301b 100644
--- a/fs/smb/client/cached_dir.c
+++ b/fs/smb/client/cached_dir.c
@@ -195,6 +195,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
 	 * from @cfids->entries.  Caller will put last reference if the latter.
 	 */
 	if (cfid->has_lease && cfid->time) {
+		cfid->last_access_time = jiffies;
 		spin_unlock(&cfids->cfid_list_lock);
 		*ret_cfid = cfid;
 		kfree(utf16_path);
@@ -363,6 +364,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
 		cfid->file_all_info_is_valid = true;
 
 	cfid->time = jiffies;
+	cfid->last_access_time = jiffies;
 	spin_unlock(&cfids->cfid_list_lock);
 	/* At this point the directory handle is fully cached */
 	rc = 0;
@@ -730,8 +732,8 @@ static void cfids_laundromat_worker(struct work_struct *work)
 
 	spin_lock(&cfids->cfid_list_lock);
 	list_for_each_entry_safe(cfid, q, &cfids->entries, entry) {
-		if (cfid->time &&
-		    time_after(jiffies, cfid->time + HZ * dir_cache_timeout)) {
+		if (cfid->last_access_time &&
+		    time_after(jiffies, cfid->last_access_time + HZ * dir_cache_timeout)) {
 			cfid->on_list = false;
 			list_move(&cfid->entry, &entry);
 			cfids->num_entries--;
diff --git a/fs/smb/client/cached_dir.h b/fs/smb/client/cached_dir.h
index 070b0962de98..2c262881b7b1 100644
--- a/fs/smb/client/cached_dir.h
+++ b/fs/smb/client/cached_dir.h
@@ -39,6 +39,7 @@ struct cached_fid {
 	bool on_list:1;
 	bool file_all_info_is_valid:1;
 	unsigned long time; /* jiffies of when lease was taken */
+	unsigned long last_access_time; /* jiffies of when last accessed */
 	struct kref refcount;
 	struct cifs_fid fid;
 	spinlock_t fid_lock;

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH 7/7] cifs: add new field to track the last access time of cfid
  2025-07-24 23:05   ` Steve French
@ 2025-07-25  3:26     ` Steve French
  2025-07-27 18:23       ` Steve French
  0 siblings, 1 reply; 18+ messages in thread
From: Steve French @ 2025-07-25  3:26 UTC (permalink / raw)
  To: nspmangalore
  Cc: linux-cifs, bharathsm.hsk, meetakshisetiyaoss, pc, paul,
	henrique.carvalho, ematsumiya, Shyam Prasad N

[-- Attachment #1: Type: text/plain, Size: 3295 bytes --]

Updated patch with description


On Thu, Jul 24, 2025 at 6:05 PM Steve French <smfrench@gmail.com> wrote:
>
> Rebased version of patch attached. See attached. Let me know if any
> objections.  It solves an obvious important issue (we shouldn't be
> timing out directory leases if they are the most recently used ones)
>
>
> On Wed, Jun 4, 2025 at 5:18 AM <nspmangalore@gmail.com> wrote:
> >
> > From: Shyam Prasad N <sprasad@microsoft.com>
> >
> > The handlecache code today tracks the time at which dir lease was
> > acquired and the laundromat thread uses that to check for old
> > entries to cleanup.
> >
> > However, if a directory is actively accessed, it should not
> > be chosen to expire first.
> >
> > This change adds a new last_access_time field to cfid and
> > uses that to decide expiry of the cfid.
> >
> > Signed-off-by: Shyam Prasad N <sprasad@microsoft.com>
> > ---
> >  fs/smb/client/cached_dir.c | 6 ++++--
> >  fs/smb/client/cached_dir.h | 1 +
> >  2 files changed, 5 insertions(+), 2 deletions(-)
> >
> > diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c
> > index 4abf5bbd8baf..d432a40f902e 100644
> > --- a/fs/smb/client/cached_dir.c
> > +++ b/fs/smb/client/cached_dir.c
> > @@ -213,6 +213,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
> >          */
> >         spin_lock(&cfid->fid_lock);
> >         if (cfid->has_lease && cfid->time) {
> > +               cfid->last_access_time = jiffies;
> >                 spin_unlock(&cfid->fid_lock);
> >                 mutex_unlock(&cfid->cfid_mutex);
> >                 *ret_cfid = cfid;
> > @@ -365,6 +366,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
> >         cfid->tcon = tcon;
> >         cfid->is_open = true;
> >         cfid->time = jiffies;
> > +       cfid->last_access_time = jiffies;
> >         spin_unlock(&cfid->fid_lock);
> >
> >  oshr_free:
> > @@ -741,8 +743,8 @@ static void cfids_laundromat_worker(struct work_struct *work)
> >         spin_lock(&cfids->cfid_list_lock);
> >         list_for_each_entry_safe(cfid, q, &cfids->entries, entry) {
> >                 spin_lock(&cfid->fid_lock);
> > -               if (cfid->time &&
> > -                   time_after(jiffies, cfid->time + HZ * dir_cache_timeout)) {
> > +               if (cfid->last_access_time &&
> > +                   time_after(jiffies, cfid->last_access_time + HZ * dir_cache_timeout)) {
> >                         cfid->on_list = false;
> >                         list_move(&cfid->entry, &entry);
> >                         cfids->num_entries--;
> > diff --git a/fs/smb/client/cached_dir.h b/fs/smb/client/cached_dir.h
> > index 93c936af2253..6d4b9413aa67 100644
> > --- a/fs/smb/client/cached_dir.h
> > +++ b/fs/smb/client/cached_dir.h
> > @@ -39,6 +39,7 @@ struct cached_fid {
> >         bool on_list:1;
> >         bool file_all_info_is_valid:1;
> >         unsigned long time; /* jiffies of when lease was taken */
> > +       unsigned long last_access_time; /* jiffies of when last accessed */
> >         struct kref refcount;
> >         struct cifs_fid fid;
> >         spinlock_t fid_lock;
> > --
> > 2.43.0
> >
>
>
> --
> Thanks,
>
> Steve



-- 
Thanks,

Steve

[-- Attachment #2: 0001-cifs-add-new-field-to-track-the-last-access-time-of-.patch --]
[-- Type: text/x-patch, Size: 2538 bytes --]

From e4ae27d15ddeeb8c2763f6d038e2553a48afe34a Mon Sep 17 00:00:00 2001
From: Shyam Prasad N <sprasad@microsoft.com>
Date: Thu, 24 Jul 2025 22:23:53 -0500
Subject: [PATCH] cifs: add new field to track the last access time of cfid

The handlecache code today tracks the time at which dir lease was
acquired and the laundromat thread uses that to check for old
entries to cleanup.

However, if a directory is actively accessed, it should not
be chosen to expire first.

This change adds a new last_access_time field to cfid and
uses that to decide expiry of the cfid.

Signed-off-by: Shyam Prasad N <sprasad@microsoft.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/smb/client/cached_dir.c | 6 ++++--
 fs/smb/client/cached_dir.h | 1 +
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c
index 116470b1dfea..b69daeb1301b 100644
--- a/fs/smb/client/cached_dir.c
+++ b/fs/smb/client/cached_dir.c
@@ -195,6 +195,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
 	 * from @cfids->entries.  Caller will put last reference if the latter.
 	 */
 	if (cfid->has_lease && cfid->time) {
+		cfid->last_access_time = jiffies;
 		spin_unlock(&cfids->cfid_list_lock);
 		*ret_cfid = cfid;
 		kfree(utf16_path);
@@ -363,6 +364,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
 		cfid->file_all_info_is_valid = true;
 
 	cfid->time = jiffies;
+	cfid->last_access_time = jiffies;
 	spin_unlock(&cfids->cfid_list_lock);
 	/* At this point the directory handle is fully cached */
 	rc = 0;
@@ -730,8 +732,8 @@ static void cfids_laundromat_worker(struct work_struct *work)
 
 	spin_lock(&cfids->cfid_list_lock);
 	list_for_each_entry_safe(cfid, q, &cfids->entries, entry) {
-		if (cfid->time &&
-		    time_after(jiffies, cfid->time + HZ * dir_cache_timeout)) {
+		if (cfid->last_access_time &&
+		    time_after(jiffies, cfid->last_access_time + HZ * dir_cache_timeout)) {
 			cfid->on_list = false;
 			list_move(&cfid->entry, &entry);
 			cfids->num_entries--;
diff --git a/fs/smb/client/cached_dir.h b/fs/smb/client/cached_dir.h
index 070b0962de98..2c262881b7b1 100644
--- a/fs/smb/client/cached_dir.h
+++ b/fs/smb/client/cached_dir.h
@@ -39,6 +39,7 @@ struct cached_fid {
 	bool on_list:1;
 	bool file_all_info_is_valid:1;
 	unsigned long time; /* jiffies of when lease was taken */
+	unsigned long last_access_time; /* jiffies of when last accessed */
 	struct kref refcount;
 	struct cifs_fid fid;
 	spinlock_t fid_lock;
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH 7/7] cifs: add new field to track the last access time of cfid
  2025-07-25  3:26     ` Steve French
@ 2025-07-27 18:23       ` Steve French
  0 siblings, 0 replies; 18+ messages in thread
From: Steve French @ 2025-07-27 18:23 UTC (permalink / raw)
  To: nspmangalore
  Cc: linux-cifs, bharathsm.hsk, meetakshisetiyaoss, pc, paul,
	henrique.carvalho, ematsumiya, Shyam Prasad N

One minor improvement we could do - I noticed that this helps workloads like:

"ls /mnt/somedir ; do stuff ; ls /mnt/somedir (uses cached data from
first) ; ... "ls /mnt/somedir"

but it doesn't help

"ls /mnt/somedir ; do stuff ; stat /mnt/somedir ; do stuff ; stat
/mnt/somedir ; etc."

When "ls" is repeated then we bump up the timeout for that directory
lease, but if ls is instead only followed by "stat" or "revalidate"
(not ls/query_dir) of the directory (stat/revalidate is much more
common than ls) then it doesn't bump up the timeout for the directory
lease.

Maybe followon patch can determine when to bump up the timeout when
revalidate/stat is frequently accessing the cached directory metadata

On Thu, Jul 24, 2025 at 10:26 PM Steve French <smfrench@gmail.com> wrote:
>
> Updated patch with description
>
>
> On Thu, Jul 24, 2025 at 6:05 PM Steve French <smfrench@gmail.com> wrote:
> >
> > Rebased version of patch attached. See attached. Let me know if any
> > objections.  It solves an obvious important issue (we shouldn't be
> > timing out directory leases if they are the most recently used ones)
> >
> >
> > On Wed, Jun 4, 2025 at 5:18 AM <nspmangalore@gmail.com> wrote:
> > >
> > > From: Shyam Prasad N <sprasad@microsoft.com>
> > >
> > > The handlecache code today tracks the time at which dir lease was
> > > acquired and the laundromat thread uses that to check for old
> > > entries to cleanup.
> > >
> > > However, if a directory is actively accessed, it should not
> > > be chosen to expire first.
> > >
> > > This change adds a new last_access_time field to cfid and
> > > uses that to decide expiry of the cfid.
> > >
> > > Signed-off-by: Shyam Prasad N <sprasad@microsoft.com>
> > > ---
> > >  fs/smb/client/cached_dir.c | 6 ++++--
> > >  fs/smb/client/cached_dir.h | 1 +
> > >  2 files changed, 5 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c
> > > index 4abf5bbd8baf..d432a40f902e 100644
> > > --- a/fs/smb/client/cached_dir.c
> > > +++ b/fs/smb/client/cached_dir.c
> > > @@ -213,6 +213,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
> > >          */
> > >         spin_lock(&cfid->fid_lock);
> > >         if (cfid->has_lease && cfid->time) {
> > > +               cfid->last_access_time = jiffies;
> > >                 spin_unlock(&cfid->fid_lock);
> > >                 mutex_unlock(&cfid->cfid_mutex);
> > >                 *ret_cfid = cfid;
> > > @@ -365,6 +366,7 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon,
> > >         cfid->tcon = tcon;
> > >         cfid->is_open = true;
> > >         cfid->time = jiffies;
> > > +       cfid->last_access_time = jiffies;
> > >         spin_unlock(&cfid->fid_lock);
> > >
> > >  oshr_free:
> > > @@ -741,8 +743,8 @@ static void cfids_laundromat_worker(struct work_struct *work)
> > >         spin_lock(&cfids->cfid_list_lock);
> > >         list_for_each_entry_safe(cfid, q, &cfids->entries, entry) {
> > >                 spin_lock(&cfid->fid_lock);
> > > -               if (cfid->time &&
> > > -                   time_after(jiffies, cfid->time + HZ * dir_cache_timeout)) {
> > > +               if (cfid->last_access_time &&
> > > +                   time_after(jiffies, cfid->last_access_time + HZ * dir_cache_timeout)) {
> > >                         cfid->on_list = false;
> > >                         list_move(&cfid->entry, &entry);
> > >                         cfids->num_entries--;
> > > diff --git a/fs/smb/client/cached_dir.h b/fs/smb/client/cached_dir.h
> > > index 93c936af2253..6d4b9413aa67 100644
> > > --- a/fs/smb/client/cached_dir.h
> > > +++ b/fs/smb/client/cached_dir.h
> > > @@ -39,6 +39,7 @@ struct cached_fid {
> > >         bool on_list:1;
> > >         bool file_all_info_is_valid:1;
> > >         unsigned long time; /* jiffies of when lease was taken */
> > > +       unsigned long last_access_time; /* jiffies of when last accessed */
> > >         struct kref refcount;
> > >         struct cifs_fid fid;
> > >         spinlock_t fid_lock;
> > > --
> > > 2.43.0
> > >
> >
> >
> > --
> > Thanks,
> >
> > Steve
>
>
>
> --
> Thanks,
>
> Steve



-- 
Thanks,

Steve

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2025-07-27 18:23 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-06-04 10:18 [PATCH 1/7] cifs: Revert "smb: client: Avoid race in open_cached_dir with lease breaks" nspmangalore
2025-06-04 10:18 ` [PATCH 2/7] cifs: protect cfid accesses with fid_lock nspmangalore
2025-06-12  3:24   ` Paul Aurich
2025-06-12 13:05     ` Shyam Prasad N
2025-06-12 16:33       ` Paul Aurich
2025-06-12 17:55         ` Shyam Prasad N
2025-06-04 10:18 ` [PATCH 3/7] cifs: do not return an invalidated cfid nspmangalore
2025-06-04 10:18 ` [PATCH 4/7] cifs: serialize initialization and cleanup of cfid nspmangalore
2025-06-12  3:25   ` Paul Aurich
2025-06-12  9:37     ` Shyam Prasad N
2025-06-12 15:28       ` Steve French
2025-06-04 10:18 ` [PATCH 5/7] cifs: update the lock ordering comments with new mutex nspmangalore
2025-06-04 10:18 ` [PATCH 6/7] cifs: tc_count updates should be done with tc_lock nspmangalore
2025-06-12  3:24   ` Paul Aurich
2025-06-04 10:18 ` [PATCH 7/7] cifs: add new field to track the last access time of cfid nspmangalore
2025-07-24 23:05   ` Steve French
2025-07-25  3:26     ` Steve French
2025-07-27 18:23       ` Steve French

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox