* [PATCH 1/2] nfs: pass explicit offset/count to trace events
@ 2024-07-11 7:17 Christoph Hellwig
2024-07-11 7:17 ` [PATCH 2/2] nfs: split nfs_read_folio Christoph Hellwig
` (2 more replies)
0 siblings, 3 replies; 5+ messages in thread
From: Christoph Hellwig @ 2024-07-11 7:17 UTC (permalink / raw)
To: trondmy, anna; +Cc: chuck.lever, linux-nfs
nfs_folio_length is unsafe to use without having the folio locked and a
check for a NULL ->f_mapping that protects against truncations and can
lead to kernel crashes. E.g. when running xfstests generic/065 with
all nfs trace points enabled.
Follow the model of the XFS trace points and pass in an explіcit offset
and length. This has the additional benefit that these values can
be more accurate as some of the users touch partial folio ranges.
Fixes: eb5654b3b89d ("NFS: Enable tracing of nfs_invalidate_folio() and nfs_launder_folio()")
Reported-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
fs/nfs/file.c | 5 +++--
fs/nfs/nfstrace.h | 36 ++++++++++++++++++++----------------
fs/nfs/read.c | 8 +++++---
fs/nfs/write.c | 8 ++++----
4 files changed, 32 insertions(+), 25 deletions(-)
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 0e2f87120cb840..9aa2ab218c0ac5 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -436,7 +436,7 @@ static void nfs_invalidate_folio(struct folio *folio, size_t offset,
/* Cancel any unstarted writes on this page */
nfs_wb_folio_cancel(inode, folio);
folio_wait_private_2(folio); /* [DEPRECATED] */
- trace_nfs_invalidate_folio(inode, folio);
+ trace_nfs_invalidate_folio(inode, folio_pos(folio) + offset, length);
}
/*
@@ -504,7 +504,8 @@ static int nfs_launder_folio(struct folio *folio)
folio_wait_private_2(folio); /* [DEPRECATED] */
ret = nfs_wb_folio(inode, folio);
- trace_nfs_launder_folio_done(inode, folio, ret);
+ trace_nfs_launder_folio_done(inode, folio_pos(folio),
+ folio_size(folio), ret);
return ret;
}
diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
index 1e710654af1173..352fdaed407541 100644
--- a/fs/nfs/nfstrace.h
+++ b/fs/nfs/nfstrace.h
@@ -939,10 +939,11 @@ TRACE_EVENT(nfs_sillyrename_unlink,
DECLARE_EVENT_CLASS(nfs_folio_event,
TP_PROTO(
const struct inode *inode,
- struct folio *folio
+ loff_t offset,
+ size_t count
),
- TP_ARGS(inode, folio),
+ TP_ARGS(inode, offset, count),
TP_STRUCT__entry(
__field(dev_t, dev)
@@ -950,7 +951,7 @@ DECLARE_EVENT_CLASS(nfs_folio_event,
__field(u64, fileid)
__field(u64, version)
__field(loff_t, offset)
- __field(u32, count)
+ __field(size_t, count)
),
TP_fast_assign(
@@ -960,13 +961,13 @@ DECLARE_EVENT_CLASS(nfs_folio_event,
__entry->fileid = nfsi->fileid;
__entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
__entry->version = inode_peek_iversion_raw(inode);
- __entry->offset = folio_file_pos(folio);
- __entry->count = nfs_folio_length(folio);
+ __entry->offset = offset,
+ __entry->count = count;
),
TP_printk(
"fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu "
- "offset=%lld count=%u",
+ "offset=%lld count=%zu",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle, __entry->version,
@@ -978,18 +979,20 @@ DECLARE_EVENT_CLASS(nfs_folio_event,
DEFINE_EVENT(nfs_folio_event, name, \
TP_PROTO( \
const struct inode *inode, \
- struct folio *folio \
+ loff_t offset, \
+ size_t count \
), \
- TP_ARGS(inode, folio))
+ TP_ARGS(inode, offset, count))
DECLARE_EVENT_CLASS(nfs_folio_event_done,
TP_PROTO(
const struct inode *inode,
- struct folio *folio,
+ loff_t offset,
+ size_t count,
int ret
),
- TP_ARGS(inode, folio, ret),
+ TP_ARGS(inode, offset, count, ret),
TP_STRUCT__entry(
__field(dev_t, dev)
@@ -998,7 +1001,7 @@ DECLARE_EVENT_CLASS(nfs_folio_event_done,
__field(u64, fileid)
__field(u64, version)
__field(loff_t, offset)
- __field(u32, count)
+ __field(size_t, count)
),
TP_fast_assign(
@@ -1008,14 +1011,14 @@ DECLARE_EVENT_CLASS(nfs_folio_event_done,
__entry->fileid = nfsi->fileid;
__entry->fhandle = nfs_fhandle_hash(&nfsi->fh);
__entry->version = inode_peek_iversion_raw(inode);
- __entry->offset = folio_file_pos(folio);
- __entry->count = nfs_folio_length(folio);
+ __entry->offset = offset,
+ __entry->count = count,
__entry->ret = ret;
),
TP_printk(
"fileid=%02x:%02x:%llu fhandle=0x%08x version=%llu "
- "offset=%lld count=%u ret=%d",
+ "offset=%lld count=%zu ret=%d",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle, __entry->version,
@@ -1027,10 +1030,11 @@ DECLARE_EVENT_CLASS(nfs_folio_event_done,
DEFINE_EVENT(nfs_folio_event_done, name, \
TP_PROTO( \
const struct inode *inode, \
- struct folio *folio, \
+ loff_t offset, \
+ size_t count, \
int ret \
), \
- TP_ARGS(inode, folio, ret))
+ TP_ARGS(inode, offset, count, ret))
DEFINE_NFS_FOLIO_EVENT(nfs_aop_readpage);
DEFINE_NFS_FOLIO_EVENT_DONE(nfs_aop_readpage_done);
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 036ede4875cab6..571a5d654cf547 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -333,13 +333,15 @@ int nfs_read_add_folio(struct nfs_pageio_descriptor *pgio,
int nfs_read_folio(struct file *file, struct folio *folio)
{
struct inode *inode = file_inode(file);
+ loff_t pos = folio_file_pos(folio);
+ size_t len = folio_size(folio);
struct nfs_pageio_descriptor pgio;
struct nfs_open_context *ctx;
int ret;
- trace_nfs_aop_readpage(inode, folio);
+ trace_nfs_aop_readpage(inode, pos, len);
nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
- task_io_account_read(folio_size(folio));
+ task_io_account_read(len);
/*
* Try to flush any pending writes to the file..
@@ -383,7 +385,7 @@ int nfs_read_folio(struct file *file, struct folio *folio)
out_put:
put_nfs_open_context(ctx);
out:
- trace_nfs_aop_readpage_done(inode, folio, ret);
+ trace_nfs_aop_readpage_done(inode, pos, len, ret);
return ret;
out_unlock:
folio_unlock(folio);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index acf2d942d78fe2..54eab063b67851 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -2064,16 +2064,16 @@ int nfs_wb_folio_cancel(struct inode *inode, struct folio *folio)
int nfs_wb_folio(struct inode *inode, struct folio *folio)
{
loff_t range_start = folio_file_pos(folio);
- loff_t range_end = range_start + (loff_t)folio_size(folio) - 1;
+ size_t len = folio_size(folio);
struct writeback_control wbc = {
.sync_mode = WB_SYNC_ALL,
.nr_to_write = 0,
.range_start = range_start,
- .range_end = range_end,
+ .range_end = range_start + len - 1,
};
int ret;
- trace_nfs_writeback_folio(inode, folio);
+ trace_nfs_writeback_folio(inode, range_start, len);
for (;;) {
folio_wait_writeback(folio);
@@ -2091,7 +2091,7 @@ int nfs_wb_folio(struct inode *inode, struct folio *folio)
goto out_error;
}
out_error:
- trace_nfs_writeback_folio_done(inode, folio, ret);
+ trace_nfs_writeback_folio_done(inode, range_start, len, ret);
return ret;
}
--
2.43.0
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH 2/2] nfs: split nfs_read_folio
2024-07-11 7:17 [PATCH 1/2] nfs: pass explicit offset/count to trace events Christoph Hellwig
@ 2024-07-11 7:17 ` Christoph Hellwig
2024-07-14 13:29 ` Sagi Grimberg
2024-07-11 7:18 ` [PATCH 1/2] nfs: pass explicit offset/count to trace events Christoph Hellwig
2024-07-14 13:28 ` Sagi Grimberg
2 siblings, 1 reply; 5+ messages in thread
From: Christoph Hellwig @ 2024-07-11 7:17 UTC (permalink / raw)
To: trondmy, anna; +Cc: chuck.lever, linux-nfs
nfs_read_folio is a bit hard to follow because it mixes highlevel logic
with the actual data read. Split the latter into a helper and update
the comments to be more accurate.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
fs/nfs/read.c | 69 ++++++++++++++++++++++++++++++---------------------
1 file changed, 41 insertions(+), 28 deletions(-)
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 571a5d654cf547..4b767d5a3524b3 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -325,18 +325,52 @@ int nfs_read_add_folio(struct nfs_pageio_descriptor *pgio,
}
/*
- * Read a page over NFS.
- * We read the page synchronously in the following case:
- * - The error flag is set for this page. This happens only when a
- * previous async read operation failed.
+ * Actually read a folio over the wire.
+ */
+static int nfs_do_read_folio(struct file *file, struct folio *folio)
+{
+ struct inode *inode = file_inode(file);
+ struct nfs_pageio_descriptor pgio;
+ struct nfs_open_context *ctx;
+ int ret;
+
+ ctx = get_nfs_open_context(nfs_file_open_context(file));
+
+ xchg(&ctx->error, 0);
+ nfs_pageio_init_read(&pgio, inode, false,
+ &nfs_async_read_completion_ops);
+
+ ret = nfs_read_add_folio(&pgio, ctx, folio);
+ if (ret)
+ goto out_put;
+
+ nfs_pageio_complete_read(&pgio);
+ nfs_update_delegated_atime(inode);
+ if (pgio.pg_error < 0) {
+ ret = pgio.pg_error;
+ goto out_put;
+ }
+
+ ret = folio_wait_locked_killable(folio);
+ if (!folio_test_uptodate(folio) && !ret)
+ ret = xchg(&ctx->error, 0);
+
+out_put:
+ put_nfs_open_context(ctx);
+ return ret;
+}
+
+/*
+ * Synchronously read a folio.
+ *
+ * This is not heavily used as most users to try an asynchronous
+ * large read through ->readahead first.
*/
int nfs_read_folio(struct file *file, struct folio *folio)
{
struct inode *inode = file_inode(file);
loff_t pos = folio_file_pos(folio);
size_t len = folio_size(folio);
- struct nfs_pageio_descriptor pgio;
- struct nfs_open_context *ctx;
int ret;
trace_nfs_aop_readpage(inode, pos, len);
@@ -361,29 +395,8 @@ int nfs_read_folio(struct file *file, struct folio *folio)
goto out_unlock;
ret = nfs_netfs_read_folio(file, folio);
- if (!ret)
- goto out;
-
- ctx = get_nfs_open_context(nfs_file_open_context(file));
-
- xchg(&ctx->error, 0);
- nfs_pageio_init_read(&pgio, inode, false,
- &nfs_async_read_completion_ops);
-
- ret = nfs_read_add_folio(&pgio, ctx, folio);
if (ret)
- goto out_put;
-
- nfs_pageio_complete_read(&pgio);
- nfs_update_delegated_atime(inode);
- ret = pgio.pg_error < 0 ? pgio.pg_error : 0;
- if (!ret) {
- ret = folio_wait_locked_killable(folio);
- if (!folio_test_uptodate(folio) && !ret)
- ret = xchg(&ctx->error, 0);
- }
-out_put:
- put_nfs_open_context(ctx);
+ ret = nfs_do_read_folio(file, folio);
out:
trace_nfs_aop_readpage_done(inode, pos, len, ret);
return ret;
--
2.43.0
^ permalink raw reply related [flat|nested] 5+ messages in thread* Re: [PATCH 1/2] nfs: pass explicit offset/count to trace events
2024-07-11 7:17 [PATCH 1/2] nfs: pass explicit offset/count to trace events Christoph Hellwig
2024-07-11 7:17 ` [PATCH 2/2] nfs: split nfs_read_folio Christoph Hellwig
@ 2024-07-11 7:18 ` Christoph Hellwig
2024-07-14 13:28 ` Sagi Grimberg
2 siblings, 0 replies; 5+ messages in thread
From: Christoph Hellwig @ 2024-07-11 7:18 UTC (permalink / raw)
To: trondmy, anna; +Cc: chuck.lever, linux-nfs
On Thu, Jul 11, 2024 at 09:17:02AM +0200, Christoph Hellwig wrote:
> lead to kernel crashes. E.g. when running xfstests generic/065 with
> all nfs trace points enabled.
Sorry, this should read generic/095.
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 1/2] nfs: pass explicit offset/count to trace events
2024-07-11 7:17 [PATCH 1/2] nfs: pass explicit offset/count to trace events Christoph Hellwig
2024-07-11 7:17 ` [PATCH 2/2] nfs: split nfs_read_folio Christoph Hellwig
2024-07-11 7:18 ` [PATCH 1/2] nfs: pass explicit offset/count to trace events Christoph Hellwig
@ 2024-07-14 13:28 ` Sagi Grimberg
2 siblings, 0 replies; 5+ messages in thread
From: Sagi Grimberg @ 2024-07-14 13:28 UTC (permalink / raw)
To: Christoph Hellwig, trondmy, anna; +Cc: chuck.lever, linux-nfs
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2024-07-14 13:29 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-07-11 7:17 [PATCH 1/2] nfs: pass explicit offset/count to trace events Christoph Hellwig
2024-07-11 7:17 ` [PATCH 2/2] nfs: split nfs_read_folio Christoph Hellwig
2024-07-14 13:29 ` Sagi Grimberg
2024-07-11 7:18 ` [PATCH 1/2] nfs: pass explicit offset/count to trace events Christoph Hellwig
2024-07-14 13:28 ` Sagi Grimberg
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox