* [PATCH 01/29] xfs: use unsigned ints for non-negative quantities in xfs_attr_remote.c
2024-03-30 0:32 ` [PATCHSET v5.5 2/2] xfs: fs-verity support Darrick J. Wong
@ 2024-03-30 0:36 ` Darrick J. Wong
2024-04-02 9:51 ` Andrey Albershteyn
2024-03-30 0:36 ` [PATCH 02/29] xfs: turn XFS_ATTR3_RMT_BUF_SPACE into a function Darrick J. Wong
` (27 subsequent siblings)
28 siblings, 1 reply; 111+ messages in thread
From: Darrick J. Wong @ 2024-03-30 0:36 UTC (permalink / raw)
To: djwong, ebiggers, aalbersh; +Cc: linux-xfs, linux-fsdevel, fsverity
From: Darrick J. Wong <djwong@kernel.org>
In the next few patches we're going to refactor the attr remote code so
that we can support headerless remote xattr values for storing merkle
tree blocks. For now, let's change the code to use unsigned int to
describe quantities of bytes and blocks that cannot be negative.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
fs/xfs/libxfs/xfs_attr_remote.c | 54 ++++++++++++++++++++-------------------
fs/xfs/libxfs/xfs_attr_remote.h | 2 +
2 files changed, 28 insertions(+), 28 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index a8de9dc1e998a..c778a3a51792e 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -47,13 +47,13 @@
* Each contiguous block has a header, so it is not just a simple attribute
* length to FSB conversion.
*/
-int
+unsigned int
xfs_attr3_rmt_blocks(
- struct xfs_mount *mp,
- int attrlen)
+ struct xfs_mount *mp,
+ unsigned int attrlen)
{
if (xfs_has_crc(mp)) {
- int buflen = XFS_ATTR3_RMT_BUF_SPACE(mp, mp->m_sb.sb_blocksize);
+ unsigned int buflen = XFS_ATTR3_RMT_BUF_SPACE(mp, mp->m_sb.sb_blocksize);
return (attrlen + buflen - 1) / buflen;
}
return XFS_B_TO_FSB(mp, attrlen);
@@ -122,9 +122,9 @@ __xfs_attr3_rmt_read_verify(
{
struct xfs_mount *mp = bp->b_mount;
char *ptr;
- int len;
+ unsigned int len;
xfs_daddr_t bno;
- int blksize = mp->m_attr_geo->blksize;
+ unsigned int blksize = mp->m_attr_geo->blksize;
/* no verification of non-crc buffers */
if (!xfs_has_crc(mp))
@@ -186,7 +186,7 @@ xfs_attr3_rmt_write_verify(
{
struct xfs_mount *mp = bp->b_mount;
xfs_failaddr_t fa;
- int blksize = mp->m_attr_geo->blksize;
+ unsigned int blksize = mp->m_attr_geo->blksize;
char *ptr;
int len;
xfs_daddr_t bno;
@@ -281,20 +281,20 @@ xfs_attr_rmtval_copyout(
struct xfs_buf *bp,
struct xfs_inode *dp,
xfs_ino_t owner,
- int *offset,
- int *valuelen,
+ unsigned int *offset,
+ unsigned int *valuelen,
uint8_t **dst)
{
char *src = bp->b_addr;
xfs_daddr_t bno = xfs_buf_daddr(bp);
- int len = BBTOB(bp->b_length);
- int blksize = mp->m_attr_geo->blksize;
+ unsigned int len = BBTOB(bp->b_length);
+ unsigned int blksize = mp->m_attr_geo->blksize;
ASSERT(len >= blksize);
while (len > 0 && *valuelen > 0) {
- int hdr_size = 0;
- int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize);
+ unsigned int hdr_size = 0;
+ unsigned int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize);
byte_cnt = min(*valuelen, byte_cnt);
@@ -330,20 +330,20 @@ xfs_attr_rmtval_copyin(
struct xfs_mount *mp,
struct xfs_buf *bp,
xfs_ino_t ino,
- int *offset,
- int *valuelen,
+ unsigned int *offset,
+ unsigned int *valuelen,
uint8_t **src)
{
char *dst = bp->b_addr;
xfs_daddr_t bno = xfs_buf_daddr(bp);
- int len = BBTOB(bp->b_length);
- int blksize = mp->m_attr_geo->blksize;
+ unsigned int len = BBTOB(bp->b_length);
+ unsigned int blksize = mp->m_attr_geo->blksize;
ASSERT(len >= blksize);
while (len > 0 && *valuelen > 0) {
- int hdr_size;
- int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize);
+ unsigned int hdr_size;
+ unsigned int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize);
byte_cnt = min(*valuelen, byte_cnt);
hdr_size = xfs_attr3_rmt_hdr_set(mp, dst, ino, *offset,
@@ -389,12 +389,12 @@ xfs_attr_rmtval_get(
struct xfs_buf *bp;
xfs_dablk_t lblkno = args->rmtblkno;
uint8_t *dst = args->value;
- int valuelen;
+ unsigned int valuelen;
int nmap;
int error;
- int blkcnt = args->rmtblkcnt;
+ unsigned int blkcnt = args->rmtblkcnt;
int i;
- int offset = 0;
+ unsigned int offset = 0;
trace_xfs_attr_rmtval_get(args);
@@ -452,7 +452,7 @@ xfs_attr_rmt_find_hole(
struct xfs_inode *dp = args->dp;
struct xfs_mount *mp = dp->i_mount;
int error;
- int blkcnt;
+ unsigned int blkcnt;
xfs_fileoff_t lfileoff = 0;
/*
@@ -481,11 +481,11 @@ xfs_attr_rmtval_set_value(
struct xfs_bmbt_irec map;
xfs_dablk_t lblkno;
uint8_t *src = args->value;
- int blkcnt;
- int valuelen;
+ unsigned int blkcnt;
+ unsigned int valuelen;
int nmap;
int error;
- int offset = 0;
+ unsigned int offset = 0;
/*
* Roll through the "value", copying the attribute value to the
@@ -645,7 +645,7 @@ xfs_attr_rmtval_invalidate(
struct xfs_da_args *args)
{
xfs_dablk_t lblkno;
- int blkcnt;
+ unsigned int blkcnt;
int error;
/*
diff --git a/fs/xfs/libxfs/xfs_attr_remote.h b/fs/xfs/libxfs/xfs_attr_remote.h
index d097ec6c4dc35..c64b04f91cafd 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.h
+++ b/fs/xfs/libxfs/xfs_attr_remote.h
@@ -6,7 +6,7 @@
#ifndef __XFS_ATTR_REMOTE_H__
#define __XFS_ATTR_REMOTE_H__
-int xfs_attr3_rmt_blocks(struct xfs_mount *mp, int attrlen);
+unsigned int xfs_attr3_rmt_blocks(struct xfs_mount *mp, unsigned int attrlen);
int xfs_attr_rmtval_get(struct xfs_da_args *args);
int xfs_attr_rmtval_stale(struct xfs_inode *ip, struct xfs_bmbt_irec *map,
^ permalink raw reply related [flat|nested] 111+ messages in thread
* Re: [PATCH 01/29] xfs: use unsigned ints for non-negative quantities in xfs_attr_remote.c
2024-03-30 0:36 ` [PATCH 01/29] xfs: use unsigned ints for non-negative quantities in xfs_attr_remote.c Darrick J. Wong
@ 2024-04-02 9:51 ` Andrey Albershteyn
2024-04-02 16:25 ` Darrick J. Wong
0 siblings, 1 reply; 111+ messages in thread
From: Andrey Albershteyn @ 2024-04-02 9:51 UTC (permalink / raw)
To: Darrick J. Wong; +Cc: ebiggers, linux-xfs, linux-fsdevel, fsverity
On 2024-03-29 17:36:19, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
>
> In the next few patches we're going to refactor the attr remote code so
> that we can support headerless remote xattr values for storing merkle
> tree blocks. For now, let's change the code to use unsigned int to
> describe quantities of bytes and blocks that cannot be negative.
>
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> ---
> fs/xfs/libxfs/xfs_attr_remote.c | 54 ++++++++++++++++++++-------------------
> fs/xfs/libxfs/xfs_attr_remote.h | 2 +
> 2 files changed, 28 insertions(+), 28 deletions(-)
>
>
> diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
> index a8de9dc1e998a..c778a3a51792e 100644
> --- a/fs/xfs/libxfs/xfs_attr_remote.c
> +++ b/fs/xfs/libxfs/xfs_attr_remote.c
> @@ -47,13 +47,13 @@
> * Each contiguous block has a header, so it is not just a simple attribute
> * length to FSB conversion.
> */
> -int
> +unsigned int
> xfs_attr3_rmt_blocks(
> - struct xfs_mount *mp,
> - int attrlen)
> + struct xfs_mount *mp,
> + unsigned int attrlen)
> {
> if (xfs_has_crc(mp)) {
> - int buflen = XFS_ATTR3_RMT_BUF_SPACE(mp, mp->m_sb.sb_blocksize);
> + unsigned int buflen = XFS_ATTR3_RMT_BUF_SPACE(mp, mp->m_sb.sb_blocksize);
> return (attrlen + buflen - 1) / buflen;
> }
> return XFS_B_TO_FSB(mp, attrlen);
> @@ -122,9 +122,9 @@ __xfs_attr3_rmt_read_verify(
fsbsize in xfs_attr3_rmt_verify()?
Otherwise, looks good to me:
Reviewed-by: Andrey Albershteyn <aalbersh@redhat.com>
--
- Andrey
^ permalink raw reply [flat|nested] 111+ messages in thread
* Re: [PATCH 01/29] xfs: use unsigned ints for non-negative quantities in xfs_attr_remote.c
2024-04-02 9:51 ` Andrey Albershteyn
@ 2024-04-02 16:25 ` Darrick J. Wong
0 siblings, 0 replies; 111+ messages in thread
From: Darrick J. Wong @ 2024-04-02 16:25 UTC (permalink / raw)
To: Andrey Albershteyn; +Cc: ebiggers, linux-xfs, linux-fsdevel, fsverity
On Tue, Apr 02, 2024 at 11:51:55AM +0200, Andrey Albershteyn wrote:
> On 2024-03-29 17:36:19, Darrick J. Wong wrote:
> > From: Darrick J. Wong <djwong@kernel.org>
> >
> > In the next few patches we're going to refactor the attr remote code so
> > that we can support headerless remote xattr values for storing merkle
> > tree blocks. For now, let's change the code to use unsigned int to
> > describe quantities of bytes and blocks that cannot be negative.
> >
> > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > ---
> > fs/xfs/libxfs/xfs_attr_remote.c | 54 ++++++++++++++++++++-------------------
> > fs/xfs/libxfs/xfs_attr_remote.h | 2 +
> > 2 files changed, 28 insertions(+), 28 deletions(-)
> >
> >
> > diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
> > index a8de9dc1e998a..c778a3a51792e 100644
> > --- a/fs/xfs/libxfs/xfs_attr_remote.c
> > +++ b/fs/xfs/libxfs/xfs_attr_remote.c
> > @@ -47,13 +47,13 @@
> > * Each contiguous block has a header, so it is not just a simple attribute
> > * length to FSB conversion.
> > */
> > -int
> > +unsigned int
> > xfs_attr3_rmt_blocks(
> > - struct xfs_mount *mp,
> > - int attrlen)
> > + struct xfs_mount *mp,
> > + unsigned int attrlen)
> > {
> > if (xfs_has_crc(mp)) {
> > - int buflen = XFS_ATTR3_RMT_BUF_SPACE(mp, mp->m_sb.sb_blocksize);
> > + unsigned int buflen = XFS_ATTR3_RMT_BUF_SPACE(mp, mp->m_sb.sb_blocksize);
> > return (attrlen + buflen - 1) / buflen;
> > }
> > return XFS_B_TO_FSB(mp, attrlen);
> > @@ -122,9 +122,9 @@ __xfs_attr3_rmt_read_verify(
>
> fsbsize in xfs_attr3_rmt_verify()?
Ah, yes, good catch. Fixed.
> Otherwise, looks good to me:
> Reviewed-by: Andrey Albershteyn <aalbersh@redhat.com>
Thanks!
--D
> --
> - Andrey
>
>
^ permalink raw reply [flat|nested] 111+ messages in thread
* [PATCH 02/29] xfs: turn XFS_ATTR3_RMT_BUF_SPACE into a function
2024-03-30 0:32 ` [PATCHSET v5.5 2/2] xfs: fs-verity support Darrick J. Wong
2024-03-30 0:36 ` [PATCH 01/29] xfs: use unsigned ints for non-negative quantities in xfs_attr_remote.c Darrick J. Wong
@ 2024-03-30 0:36 ` Darrick J. Wong
2024-04-02 10:09 ` Andrey Albershteyn
2024-03-30 0:36 ` [PATCH 03/29] xfs: create a helper to compute the blockcount of a max sized remote value Darrick J. Wong
` (26 subsequent siblings)
28 siblings, 1 reply; 111+ messages in thread
From: Darrick J. Wong @ 2024-03-30 0:36 UTC (permalink / raw)
To: djwong, ebiggers, aalbersh; +Cc: linux-xfs, linux-fsdevel, fsverity
From: Darrick J. Wong <djwong@kernel.org>
Turn this into a properly typechecked function, and actually use the
correct blocksize for extended attributes. The function cannot be
static inline because xfsprogs userspace uses it.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
fs/xfs/libxfs/xfs_attr_remote.c | 19 ++++++++++++++++---
fs/xfs/libxfs/xfs_da_format.h | 4 +---
2 files changed, 17 insertions(+), 6 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index c778a3a51792e..efecebc20ec46 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -43,6 +43,19 @@
* the logging system and therefore never have a log item.
*/
+/* How many bytes can be stored in a remote value buffer? */
+inline unsigned int
+xfs_attr3_rmt_buf_space(
+ struct xfs_mount *mp)
+{
+ unsigned int blocksize = mp->m_attr_geo->blksize;
+
+ if (xfs_has_crc(mp))
+ return blocksize - sizeof(struct xfs_attr3_rmt_hdr);
+
+ return blocksize;
+}
+
/*
* Each contiguous block has a header, so it is not just a simple attribute
* length to FSB conversion.
@@ -53,7 +66,7 @@ xfs_attr3_rmt_blocks(
unsigned int attrlen)
{
if (xfs_has_crc(mp)) {
- unsigned int buflen = XFS_ATTR3_RMT_BUF_SPACE(mp, mp->m_sb.sb_blocksize);
+ unsigned int buflen = xfs_attr3_rmt_buf_space(mp);
return (attrlen + buflen - 1) / buflen;
}
return XFS_B_TO_FSB(mp, attrlen);
@@ -294,7 +307,7 @@ xfs_attr_rmtval_copyout(
while (len > 0 && *valuelen > 0) {
unsigned int hdr_size = 0;
- unsigned int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize);
+ unsigned int byte_cnt = xfs_attr3_rmt_buf_space(mp);
byte_cnt = min(*valuelen, byte_cnt);
@@ -343,7 +356,7 @@ xfs_attr_rmtval_copyin(
while (len > 0 && *valuelen > 0) {
unsigned int hdr_size;
- unsigned int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize);
+ unsigned int byte_cnt = xfs_attr3_rmt_buf_space(mp);
byte_cnt = min(*valuelen, byte_cnt);
hdr_size = xfs_attr3_rmt_hdr_set(mp, dst, ino, *offset,
diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h
index e67045a66ef8f..30c97aecd8115 100644
--- a/fs/xfs/libxfs/xfs_da_format.h
+++ b/fs/xfs/libxfs/xfs_da_format.h
@@ -870,9 +870,7 @@ struct xfs_attr3_rmt_hdr {
#define XFS_ATTR3_RMT_CRC_OFF offsetof(struct xfs_attr3_rmt_hdr, rm_crc)
-#define XFS_ATTR3_RMT_BUF_SPACE(mp, bufsize) \
- ((bufsize) - (xfs_has_crc((mp)) ? \
- sizeof(struct xfs_attr3_rmt_hdr) : 0))
+unsigned int xfs_attr3_rmt_buf_space(struct xfs_mount *mp);
/* Number of bytes in a directory block. */
static inline unsigned int xfs_dir2_dirblock_bytes(struct xfs_sb *sbp)
^ permalink raw reply related [flat|nested] 111+ messages in thread
* Re: [PATCH 02/29] xfs: turn XFS_ATTR3_RMT_BUF_SPACE into a function
2024-03-30 0:36 ` [PATCH 02/29] xfs: turn XFS_ATTR3_RMT_BUF_SPACE into a function Darrick J. Wong
@ 2024-04-02 10:09 ` Andrey Albershteyn
0 siblings, 0 replies; 111+ messages in thread
From: Andrey Albershteyn @ 2024-04-02 10:09 UTC (permalink / raw)
To: Darrick J. Wong; +Cc: ebiggers, linux-xfs, linux-fsdevel, fsverity
On 2024-03-29 17:36:35, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
>
> Turn this into a properly typechecked function, and actually use the
> correct blocksize for extended attributes. The function cannot be
> static inline because xfsprogs userspace uses it.
>
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> ---
> fs/xfs/libxfs/xfs_attr_remote.c | 19 ++++++++++++++++---
> fs/xfs/libxfs/xfs_da_format.h | 4 +---
> 2 files changed, 17 insertions(+), 6 deletions(-)
>
Looks good to me:
Reviewed-by: Andrey Albershteyn <aalbersh@redhat.com>
--
- Andrey
^ permalink raw reply [flat|nested] 111+ messages in thread
* [PATCH 03/29] xfs: create a helper to compute the blockcount of a max sized remote value
2024-03-30 0:32 ` [PATCHSET v5.5 2/2] xfs: fs-verity support Darrick J. Wong
2024-03-30 0:36 ` [PATCH 01/29] xfs: use unsigned ints for non-negative quantities in xfs_attr_remote.c Darrick J. Wong
2024-03-30 0:36 ` [PATCH 02/29] xfs: turn XFS_ATTR3_RMT_BUF_SPACE into a function Darrick J. Wong
@ 2024-03-30 0:36 ` Darrick J. Wong
2024-04-02 10:09 ` Andrey Albershteyn
2024-03-30 0:37 ` [PATCH 04/29] xfs: minor cleanups of xfs_attr3_rmt_blocks Darrick J. Wong
` (25 subsequent siblings)
28 siblings, 1 reply; 111+ messages in thread
From: Darrick J. Wong @ 2024-03-30 0:36 UTC (permalink / raw)
To: djwong, ebiggers, aalbersh; +Cc: linux-xfs, linux-fsdevel, fsverity
From: Darrick J. Wong <djwong@kernel.org>
Create a helper function to compute the number of fsblocks needed to
store a maximally-sized extended attribute value.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
fs/xfs/libxfs/xfs_attr.c | 4 ++--
fs/xfs/libxfs/xfs_attr_remote.h | 6 ++++++
fs/xfs/scrub/reap.c | 4 ++--
3 files changed, 10 insertions(+), 4 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index 45dd3e57615e7..c21def69cf636 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -1063,7 +1063,7 @@ xfs_attr_set(
rmt_blks = xfs_attr3_rmt_blocks(mp, args->valuelen);
} else {
XFS_STATS_INC(mp, xs_attr_remove);
- rmt_blks = xfs_attr3_rmt_blocks(mp, XFS_XATTR_SIZE_MAX);
+ rmt_blks = xfs_attr3_max_rmt_blocks(mp);
}
/*
@@ -1228,7 +1228,7 @@ xfs_attr_removename(
ASSERT(!args->trans);
rmt_extents = XFS_IEXT_ATTR_MANIP_CNT(
- xfs_attr3_rmt_blocks(mp, XFS_XATTR_SIZE_MAX));
+ xfs_attr3_max_rmt_blocks(mp));
xfs_init_attr_trans(args, &tres, &total);
error = xfs_trans_alloc_inode(dp, &tres, total, 0, rsvd, &args->trans);
diff --git a/fs/xfs/libxfs/xfs_attr_remote.h b/fs/xfs/libxfs/xfs_attr_remote.h
index c64b04f91cafd..e3c6c7d774bf9 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.h
+++ b/fs/xfs/libxfs/xfs_attr_remote.h
@@ -8,6 +8,12 @@
unsigned int xfs_attr3_rmt_blocks(struct xfs_mount *mp, unsigned int attrlen);
+/* Number of rmt blocks needed to store the maximally sized attr value */
+static inline unsigned int xfs_attr3_max_rmt_blocks(struct xfs_mount *mp)
+{
+ return xfs_attr3_rmt_blocks(mp, XFS_XATTR_SIZE_MAX);
+}
+
int xfs_attr_rmtval_get(struct xfs_da_args *args);
int xfs_attr_rmtval_stale(struct xfs_inode *ip, struct xfs_bmbt_irec *map,
xfs_buf_flags_t incore_flags);
diff --git a/fs/xfs/scrub/reap.c b/fs/xfs/scrub/reap.c
index b8166e19726a4..fbf4d248f0060 100644
--- a/fs/xfs/scrub/reap.c
+++ b/fs/xfs/scrub/reap.c
@@ -227,7 +227,7 @@ xrep_bufscan_max_sectors(
int max_fsbs;
/* Remote xattr values are the largest buffers that we support. */
- max_fsbs = xfs_attr3_rmt_blocks(mp, XFS_XATTR_SIZE_MAX);
+ max_fsbs = xfs_attr3_max_rmt_blocks(mp);
return XFS_FSB_TO_BB(mp, min_t(xfs_extlen_t, fsblocks, max_fsbs));
}
@@ -1070,7 +1070,7 @@ xreap_bmapi_binval(
* of the next hole.
*/
off = imap->br_startoff + imap->br_blockcount;
- max_off = off + xfs_attr3_rmt_blocks(mp, XFS_XATTR_SIZE_MAX);
+ max_off = off + xfs_attr3_max_rmt_blocks(mp);
while (off < max_off) {
struct xfs_bmbt_irec hmap;
int nhmaps = 1;
^ permalink raw reply related [flat|nested] 111+ messages in thread
* [PATCH 04/29] xfs: minor cleanups of xfs_attr3_rmt_blocks
2024-03-30 0:32 ` [PATCHSET v5.5 2/2] xfs: fs-verity support Darrick J. Wong
` (2 preceding siblings ...)
2024-03-30 0:36 ` [PATCH 03/29] xfs: create a helper to compute the blockcount of a max sized remote value Darrick J. Wong
@ 2024-03-30 0:37 ` Darrick J. Wong
2024-04-02 10:11 ` Andrey Albershteyn
2024-03-30 0:37 ` [PATCH 05/29] xfs: add attribute type for fs-verity Darrick J. Wong
` (24 subsequent siblings)
28 siblings, 1 reply; 111+ messages in thread
From: Darrick J. Wong @ 2024-03-30 0:37 UTC (permalink / raw)
To: djwong, ebiggers, aalbersh; +Cc: linux-xfs, linux-fsdevel, fsverity
From: Darrick J. Wong <djwong@kernel.org>
Clean up the type signature of this function since we don't have
negative attr lengths or block counts.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
fs/xfs/libxfs/xfs_attr_remote.c | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index efecebc20ec46..d5add11d0200e 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -56,19 +56,19 @@ xfs_attr3_rmt_buf_space(
return blocksize;
}
-/*
- * Each contiguous block has a header, so it is not just a simple attribute
- * length to FSB conversion.
- */
+/* Compute number of fsblocks needed to store a remote attr value */
unsigned int
xfs_attr3_rmt_blocks(
struct xfs_mount *mp,
unsigned int attrlen)
{
- if (xfs_has_crc(mp)) {
- unsigned int buflen = xfs_attr3_rmt_buf_space(mp);
- return (attrlen + buflen - 1) / buflen;
- }
+ /*
+ * Each contiguous block has a header, so it is not just a simple
+ * attribute length to FSB conversion.
+ */
+ if (xfs_has_crc(mp))
+ return howmany(attrlen, xfs_attr3_rmt_buf_space(mp));
+
return XFS_B_TO_FSB(mp, attrlen);
}
^ permalink raw reply related [flat|nested] 111+ messages in thread
* [PATCH 05/29] xfs: add attribute type for fs-verity
2024-03-30 0:32 ` [PATCHSET v5.5 2/2] xfs: fs-verity support Darrick J. Wong
` (3 preceding siblings ...)
2024-03-30 0:37 ` [PATCH 04/29] xfs: minor cleanups of xfs_attr3_rmt_blocks Darrick J. Wong
@ 2024-03-30 0:37 ` Darrick J. Wong
2024-03-30 0:37 ` [PATCH 06/29] xfs: do not use xfs_attr3_rmt_hdr for remote verity value blocks Darrick J. Wong
` (23 subsequent siblings)
28 siblings, 0 replies; 111+ messages in thread
From: Darrick J. Wong @ 2024-03-30 0:37 UTC (permalink / raw)
To: djwong, ebiggers, aalbersh; +Cc: linux-xfs, linux-fsdevel, fsverity
From: Andrey Albershteyn <aalbersh@redhat.com>
The Merkle tree blocks and descriptor are stored in the extended
attributes of the inode. Add new attribute type for fs-verity
metadata. Add XFS_ATTR_INTERNAL_MASK to skip parent pointer and
fs-verity attributes as those are only for internal use. While we're
at it add a few comments in relevant places that internally visible
attributes are not suppose to be handled via interface defined in
xfs_xattr.c.
Signed-off-by: Andrey Albershteyn <aalbersh@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
fs/xfs/libxfs/xfs_da_format.h | 10 +++++++++-
fs/xfs/libxfs/xfs_log_format.h | 1 +
fs/xfs/xfs_ioctl.c | 5 +++++
fs/xfs/xfs_trace.h | 3 ++-
fs/xfs/xfs_xattr.c | 10 ++++++++++
5 files changed, 27 insertions(+), 2 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h
index 30c97aecd8115..238208e5783b2 100644
--- a/fs/xfs/libxfs/xfs_da_format.h
+++ b/fs/xfs/libxfs/xfs_da_format.h
@@ -715,14 +715,22 @@ struct xfs_attr3_leafblock {
#define XFS_ATTR_ROOT_BIT 1 /* limit access to trusted attrs */
#define XFS_ATTR_SECURE_BIT 2 /* limit access to secure attrs */
#define XFS_ATTR_PARENT_BIT 3 /* parent pointer attrs */
+#define XFS_ATTR_VERITY_BIT 4 /* verity merkle tree and descriptor */
#define XFS_ATTR_INCOMPLETE_BIT 7 /* attr in middle of create/delete */
#define XFS_ATTR_LOCAL (1u << XFS_ATTR_LOCAL_BIT)
#define XFS_ATTR_ROOT (1u << XFS_ATTR_ROOT_BIT)
#define XFS_ATTR_SECURE (1u << XFS_ATTR_SECURE_BIT)
#define XFS_ATTR_PARENT (1u << XFS_ATTR_PARENT_BIT)
+#define XFS_ATTR_VERITY (1u << XFS_ATTR_VERITY_BIT)
#define XFS_ATTR_INCOMPLETE (1u << XFS_ATTR_INCOMPLETE_BIT)
#define XFS_ATTR_NSP_ONDISK_MASK \
- (XFS_ATTR_ROOT | XFS_ATTR_SECURE | XFS_ATTR_PARENT)
+ (XFS_ATTR_ROOT | XFS_ATTR_SECURE | XFS_ATTR_PARENT | \
+ XFS_ATTR_VERITY)
+
+/*
+ * Internal attributes not exposed to the user
+ */
+#define XFS_ATTR_INTERNAL_MASK (XFS_ATTR_PARENT | XFS_ATTR_VERITY)
#define XFS_ATTR_NAMESPACE_STR \
{ XFS_ATTR_LOCAL, "local" }, \
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index 4ffa195b79097..4173239c229d5 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -1050,6 +1050,7 @@ struct xfs_icreate_log {
#define XFS_ATTRI_FILTER_MASK (XFS_ATTR_ROOT | \
XFS_ATTR_SECURE | \
XFS_ATTR_PARENT | \
+ XFS_ATTR_VERITY | \
XFS_ATTR_INCOMPLETE)
/*
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index fce42bc6738c8..9d161e16ccf32 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -360,6 +360,11 @@ static unsigned int
xfs_attr_filter(
u32 ioc_flags)
{
+ /*
+ * Only externally visible attributes should be specified here.
+ * Internally used attributes (such as parent pointers or fs-verity)
+ * should not be exposed to userspace.
+ */
if (ioc_flags & XFS_IOC_ATTR_ROOT)
return XFS_ATTR_ROOT;
if (ioc_flags & XFS_IOC_ATTR_SECURE)
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 4bea7ec438eff..e2992b0115ad2 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -107,7 +107,8 @@ struct xfs_fsrefs;
{ XFS_ATTR_ROOT, "ROOT" }, \
{ XFS_ATTR_SECURE, "SECURE" }, \
{ XFS_ATTR_INCOMPLETE, "INCOMPLETE" }, \
- { XFS_ATTR_PARENT, "PARENT" }
+ { XFS_ATTR_PARENT, "PARENT" }, \
+ { XFS_ATTR_VERITY, "VERITY" }
DECLARE_EVENT_CLASS(xfs_attr_list_class,
TP_PROTO(struct xfs_attr_list_context *ctx),
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c
index 17953a0fd22b0..31db9fed3f986 100644
--- a/fs/xfs/xfs_xattr.c
+++ b/fs/xfs/xfs_xattr.c
@@ -20,6 +20,13 @@
#include <linux/posix_acl_xattr.h>
+/*
+ * This file defines interface to work with externally visible extended
+ * attributes, such as those in user, system or security namespaces. This
+ * interface should not be used for internally used attributes (consider
+ * xfs_attr.c).
+ */
+
/*
* Get permission to use log-assisted atomic exchange of file extents.
* Callers must not be running any transactions or hold any ILOCKs.
@@ -218,6 +225,9 @@ xfs_xattr_put_listent(
if (flags & XFS_ATTR_PARENT)
return;
+ if (flags & XFS_ATTR_INTERNAL_MASK)
+ return;
+
if (flags & XFS_ATTR_ROOT) {
#ifdef CONFIG_XFS_POSIX_ACL
if (namelen == SGI_ACL_FILE_SIZE &&
^ permalink raw reply related [flat|nested] 111+ messages in thread
* [PATCH 06/29] xfs: do not use xfs_attr3_rmt_hdr for remote verity value blocks
2024-03-30 0:32 ` [PATCHSET v5.5 2/2] xfs: fs-verity support Darrick J. Wong
` (4 preceding siblings ...)
2024-03-30 0:37 ` [PATCH 05/29] xfs: add attribute type for fs-verity Darrick J. Wong
@ 2024-03-30 0:37 ` Darrick J. Wong
2024-03-30 0:37 ` [PATCH 07/29] xfs: add fs-verity ro-compat flag Darrick J. Wong
` (22 subsequent siblings)
28 siblings, 0 replies; 111+ messages in thread
From: Darrick J. Wong @ 2024-03-30 0:37 UTC (permalink / raw)
To: djwong, ebiggers, aalbersh; +Cc: linux-xfs, linux-fsdevel, fsverity
From: Darrick J. Wong <djwong@kernel.org>
I enabled fsverity for a ~250MB file and noticed the following xattr
keys that got created for the merkle tree. These two merkle tree blocks
are written out in ascending order:
nvlist[52].merkle_off = 0x111000
nvlist[53].valueblk = 0x222
nvlist[53].valuelen = 4096
nvlist[53].merkle_off = 0x112000
nvlist[54].valueblk = 0x224
nvlist[54].valuelen = 4096
Notice that while the valuelen is 4k, the block offset increases by two.
Curious, I then loaded up ablock 0x223:
hdr.magic = 0x5841524d
hdr.offset = 4040
hdr.bytes = 56
hdr.crc = 0xad1b8bd8 (correct)
hdr.uuid = 07d3f25c-e550-4118-8ff5-a45c017ba5ef
hdr.owner = 133
hdr.bno = 442144
hdr.lsn = 0xffffffffffffffff
data = <56 bytes of charns data>
Ugh! Each 4k merkle tree block takes up two fsblocks due to the remote
value header that XFS puts at the start of each remote value block.
That header is 56 bytes long, which is exactly the length of the
spillover here. This isn't good.
The first thing that I tried was enabling fsverity on a bunch of files,
extracting the merkle tree blocks one by one, and testing their
compressability with gzip, zstd, and xz. Merkle tree blocks are nearly
indistinguishable from random data, with the result that 99% of the
blocks I sampled got larger under compression. So that's out.
Next I decided to try eliminating the xfs_attr3_rmt_hdr header, which
would make verity remote values align perfectly with filesystem blocks.
Because remote value blocks are written out with xfs_bwrite, the lsn
field isn't useful. The merkle tree is itself a bunch of hashes of data
blocks or other merkle tree blocks, which means that a bitflip will
result in a verity failure somewhere in the file. Hence we don't need
to store an explicit crc, and we could just XOR the ondisk merkle tree
contents with selected attributes.
In the end I decided to create a smaller header structure containing
only a magic, the fsuuid, the inode owner, and the ondisk block number.
These values get XORd into the beginning of the merkle tree block to
detect lost writes when we're writing remote XFS_ATTR_VERITY values to
disk, and XORd out when reading them back in.
With this format change applied, the fsverity overhead halves.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
fs/xfs/libxfs/xfs_attr.c | 9 ++-
fs/xfs/libxfs/xfs_attr_leaf.c | 5 +-
fs/xfs/libxfs/xfs_attr_remote.c | 125 ++++++++++++++++++++++++++++++++++-----
fs/xfs/libxfs/xfs_attr_remote.h | 8 ++
fs/xfs/libxfs/xfs_da_format.h | 22 +++++++
fs/xfs/libxfs/xfs_ondisk.h | 2 +
fs/xfs/libxfs/xfs_shared.h | 1
fs/xfs/xfs_attr_inactive.c | 2 -
8 files changed, 150 insertions(+), 24 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index c21def69cf636..931ec563a7460 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -311,7 +311,8 @@ xfs_attr_calc_size(
* Out of line attribute, cannot double split, but
* make room for the attribute value itself.
*/
- uint dblocks = xfs_attr3_rmt_blocks(mp, args->valuelen);
+ uint dblocks = xfs_attr3_rmt_blocks(mp, args->attr_filter,
+ args->valuelen);
nblks += dblocks;
nblks += XFS_NEXTENTADD_SPACE_RES(mp, dblocks, XFS_ATTR_FORK);
}
@@ -1060,7 +1061,8 @@ xfs_attr_set(
return error;
if (!local)
- rmt_blks = xfs_attr3_rmt_blocks(mp, args->valuelen);
+ rmt_blks = xfs_attr3_rmt_blocks(mp, args->attr_filter,
+ args->valuelen);
} else {
XFS_STATS_INC(mp, xs_attr_remove);
rmt_blks = xfs_attr3_max_rmt_blocks(mp);
@@ -1166,7 +1168,8 @@ xfs_attr_setname(
if (!local)
rmt_extents = XFS_IEXT_ATTR_MANIP_CNT(
- xfs_attr3_rmt_blocks(mp, args->valuelen));
+ xfs_attr3_rmt_blocks(mp, args->attr_filter,
+ args->valuelen));
xfs_init_attr_trans(args, &tres, &total);
error = xfs_trans_alloc_inode(dp, &tres, total, 0, rsvd, &args->trans);
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 856bcf164a0e4..b52aa5c7392ba 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -1561,7 +1561,8 @@ xfs_attr3_leaf_add_work(
name_rmt->valuelen = 0;
name_rmt->valueblk = 0;
args->rmtblkno = 1;
- args->rmtblkcnt = xfs_attr3_rmt_blocks(mp, args->valuelen);
+ args->rmtblkcnt = xfs_attr3_rmt_blocks(mp, args->attr_filter,
+ args->valuelen);
args->rmtvaluelen = args->valuelen;
}
xfs_trans_log_buf(args->trans, bp,
@@ -2494,6 +2495,7 @@ xfs_attr3_leaf_lookup_int(
args->rmtblkno = be32_to_cpu(name_rmt->valueblk);
args->rmtblkcnt = xfs_attr3_rmt_blocks(
args->dp->i_mount,
+ args->attr_filter,
args->rmtvaluelen);
return -EEXIST;
}
@@ -2542,6 +2544,7 @@ xfs_attr3_leaf_getvalue(
args->rmtvaluelen = be32_to_cpu(name_rmt->valuelen);
args->rmtblkno = be32_to_cpu(name_rmt->valueblk);
args->rmtblkcnt = xfs_attr3_rmt_blocks(args->dp->i_mount,
+ args->attr_filter,
args->rmtvaluelen);
return xfs_attr_copy_value(args, NULL, args->rmtvaluelen);
}
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index d5add11d0200e..b2b787bbc45a0 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -43,14 +43,23 @@
* the logging system and therefore never have a log item.
*/
+static inline bool
+xfs_attr3_rmt_has_header(
+ struct xfs_mount *mp,
+ unsigned int attrns)
+{
+ return xfs_has_crc(mp) && !(attrns & XFS_ATTR_VERITY);
+}
+
/* How many bytes can be stored in a remote value buffer? */
inline unsigned int
xfs_attr3_rmt_buf_space(
- struct xfs_mount *mp)
+ struct xfs_mount *mp,
+ unsigned int attrns)
{
unsigned int blocksize = mp->m_attr_geo->blksize;
- if (xfs_has_crc(mp))
+ if (xfs_attr3_rmt_has_header(mp, attrns))
return blocksize - sizeof(struct xfs_attr3_rmt_hdr);
return blocksize;
@@ -60,14 +69,15 @@ xfs_attr3_rmt_buf_space(
unsigned int
xfs_attr3_rmt_blocks(
struct xfs_mount *mp,
+ unsigned int attrns,
unsigned int attrlen)
{
/*
* Each contiguous block has a header, so it is not just a simple
* attribute length to FSB conversion.
*/
- if (xfs_has_crc(mp))
- return howmany(attrlen, xfs_attr3_rmt_buf_space(mp));
+ if (xfs_attr3_rmt_has_header(mp, attrns))
+ return howmany(attrlen, xfs_attr3_rmt_buf_space(mp, attrns));
return XFS_B_TO_FSB(mp, attrlen);
}
@@ -249,6 +259,42 @@ const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = {
.verify_struct = xfs_attr3_rmt_verify_struct,
};
+static void
+xfs_attr3_rmtverity_read_verify(
+ struct xfs_buf *bp)
+{
+}
+
+static xfs_failaddr_t
+xfs_attr3_rmtverity_verify_struct(
+ struct xfs_buf *bp)
+{
+ return NULL;
+}
+
+static void
+xfs_attr3_rmtverity_write_verify(
+ struct xfs_buf *bp)
+{
+}
+
+const struct xfs_buf_ops xfs_attr3_rmtverity_buf_ops = {
+ .name = "xfs_attr3_remote_verity",
+ .magic = { 0, 0 },
+ .verify_read = xfs_attr3_rmtverity_read_verify,
+ .verify_write = xfs_attr3_rmtverity_write_verify,
+ .verify_struct = xfs_attr3_rmtverity_verify_struct,
+};
+
+inline const struct xfs_buf_ops *
+xfs_attr3_remote_buf_ops(
+ unsigned int attrns)
+{
+ if (attrns & XFS_ATTR_VERITY)
+ return &xfs_attr3_rmtverity_buf_ops;
+ return &xfs_attr3_rmt_buf_ops;
+}
+
STATIC int
xfs_attr3_rmt_hdr_set(
struct xfs_mount *mp,
@@ -285,6 +331,40 @@ xfs_attr3_rmt_hdr_set(
return sizeof(struct xfs_attr3_rmt_hdr);
}
+static void
+xfs_attr_rmtverity_transform(
+ struct xfs_buf *bp,
+ xfs_ino_t ino,
+ void *buf,
+ unsigned int byte_cnt)
+{
+ struct xfs_mount *mp = bp->b_mount;
+ struct xfs_attr3_rmtverity_hdr *hdr = buf;
+ char *dst;
+ const char *src;
+ unsigned int i;
+
+ if (byte_cnt >= offsetofend(struct xfs_attr3_rmtverity_hdr, rmv_owner))
+ hdr->rmv_owner ^= cpu_to_be64(ino);
+
+ if (byte_cnt >= offsetofend(struct xfs_attr3_rmtverity_hdr, rmv_blkno))
+ hdr->rmv_blkno ^= cpu_to_be64(xfs_buf_daddr(bp));
+
+ if (byte_cnt >= offsetofend(struct xfs_attr3_rmtverity_hdr, rmv_magic))
+ hdr->rmv_magic ^= cpu_to_be32(XFS_ATTR3_RMTVERITY_MAGIC);
+
+ if (byte_cnt <= offsetof(struct xfs_attr3_rmtverity_hdr, rmv_uuid))
+ return;
+
+ byte_cnt -= offsetof(struct xfs_attr3_rmtverity_hdr, rmv_uuid);
+ byte_cnt = min(byte_cnt, sizeof(uuid_t));
+
+ dst = (void *)&hdr->rmv_uuid;
+ src = (void *)&mp->m_sb.sb_meta_uuid;
+ for (i = 0; i < byte_cnt; i++)
+ dst[i] ^= src[i];
+}
+
/*
* Helper functions to copy attribute data in and out of the one disk extents
*/
@@ -294,6 +374,7 @@ xfs_attr_rmtval_copyout(
struct xfs_buf *bp,
struct xfs_inode *dp,
xfs_ino_t owner,
+ unsigned int attrns,
unsigned int *offset,
unsigned int *valuelen,
uint8_t **dst)
@@ -307,11 +388,11 @@ xfs_attr_rmtval_copyout(
while (len > 0 && *valuelen > 0) {
unsigned int hdr_size = 0;
- unsigned int byte_cnt = xfs_attr3_rmt_buf_space(mp);
+ unsigned int byte_cnt = xfs_attr3_rmt_buf_space(mp, attrns);
byte_cnt = min(*valuelen, byte_cnt);
- if (xfs_has_crc(mp)) {
+ if (xfs_attr3_rmt_has_header(mp, attrns)) {
if (xfs_attr3_rmt_hdr_ok(src, owner, *offset,
byte_cnt, bno)) {
xfs_alert(mp,
@@ -325,6 +406,10 @@ xfs_attr_rmtval_copyout(
memcpy(*dst, src + hdr_size, byte_cnt);
+ if (attrns & XFS_ATTR_VERITY)
+ xfs_attr_rmtverity_transform(bp, dp->i_ino, *dst,
+ byte_cnt);
+
/* roll buffer forwards */
len -= blksize;
src += blksize;
@@ -343,6 +428,7 @@ xfs_attr_rmtval_copyin(
struct xfs_mount *mp,
struct xfs_buf *bp,
xfs_ino_t ino,
+ unsigned int attrns,
unsigned int *offset,
unsigned int *valuelen,
uint8_t **src)
@@ -355,15 +441,20 @@ xfs_attr_rmtval_copyin(
ASSERT(len >= blksize);
while (len > 0 && *valuelen > 0) {
- unsigned int hdr_size;
- unsigned int byte_cnt = xfs_attr3_rmt_buf_space(mp);
+ unsigned int hdr_size = 0;
+ unsigned int byte_cnt = xfs_attr3_rmt_buf_space(mp, attrns);
byte_cnt = min(*valuelen, byte_cnt);
- hdr_size = xfs_attr3_rmt_hdr_set(mp, dst, ino, *offset,
- byte_cnt, bno);
+ if (xfs_attr3_rmt_has_header(mp, attrns))
+ hdr_size = xfs_attr3_rmt_hdr_set(mp, dst, ino, *offset,
+ byte_cnt, bno);
memcpy(dst + hdr_size, *src, byte_cnt);
+ if (attrns & XFS_ATTR_VERITY)
+ xfs_attr_rmtverity_transform(bp, ino, dst + hdr_size,
+ byte_cnt);
+
/*
* If this is the last block, zero the remainder of it.
* Check that we are actually the last block, too.
@@ -408,6 +499,7 @@ xfs_attr_rmtval_get(
unsigned int blkcnt = args->rmtblkcnt;
int i;
unsigned int offset = 0;
+ const struct xfs_buf_ops *ops = xfs_attr3_remote_buf_ops(args->attr_filter);
trace_xfs_attr_rmtval_get(args);
@@ -433,14 +525,15 @@ xfs_attr_rmtval_get(
dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
dblkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
error = xfs_buf_read(mp->m_ddev_targp, dblkno, dblkcnt,
- 0, &bp, &xfs_attr3_rmt_buf_ops);
+ 0, &bp, ops);
if (xfs_metadata_is_sick(error))
xfs_dirattr_mark_sick(args->dp, XFS_ATTR_FORK);
if (error)
return error;
error = xfs_attr_rmtval_copyout(mp, bp, args->dp,
- args->owner, &offset, &valuelen, &dst);
+ args->owner, args->attr_filter,
+ &offset, &valuelen, &dst);
xfs_buf_relse(bp);
if (error)
return error;
@@ -473,7 +566,7 @@ xfs_attr_rmt_find_hole(
* straight byte to FSB conversion and have to take the header space
* into account.
*/
- blkcnt = xfs_attr3_rmt_blocks(mp, args->rmtvaluelen);
+ blkcnt = xfs_attr3_rmt_blocks(mp, args->attr_filter, args->rmtvaluelen);
error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff,
XFS_ATTR_FORK);
if (error)
@@ -532,10 +625,10 @@ xfs_attr_rmtval_set_value(
error = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt, &bp);
if (error)
return error;
- bp->b_ops = &xfs_attr3_rmt_buf_ops;
+ bp->b_ops = xfs_attr3_remote_buf_ops(args->attr_filter);
- xfs_attr_rmtval_copyin(mp, bp, args->owner, &offset, &valuelen,
- &src);
+ xfs_attr_rmtval_copyin(mp, bp, args->owner, args->attr_filter,
+ &offset, &valuelen, &src);
error = xfs_bwrite(bp); /* GROT: NOTE: synchronous write */
xfs_buf_relse(bp);
diff --git a/fs/xfs/libxfs/xfs_attr_remote.h b/fs/xfs/libxfs/xfs_attr_remote.h
index e3c6c7d774bf9..344fea1b9b50e 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.h
+++ b/fs/xfs/libxfs/xfs_attr_remote.h
@@ -6,12 +6,13 @@
#ifndef __XFS_ATTR_REMOTE_H__
#define __XFS_ATTR_REMOTE_H__
-unsigned int xfs_attr3_rmt_blocks(struct xfs_mount *mp, unsigned int attrlen);
+unsigned int xfs_attr3_rmt_blocks(struct xfs_mount *mp, unsigned int attrns,
+ unsigned int attrlen);
/* Number of rmt blocks needed to store the maximally sized attr value */
static inline unsigned int xfs_attr3_max_rmt_blocks(struct xfs_mount *mp)
{
- return xfs_attr3_rmt_blocks(mp, XFS_XATTR_SIZE_MAX);
+ return xfs_attr3_rmt_blocks(mp, 0, XFS_XATTR_SIZE_MAX);
}
int xfs_attr_rmtval_get(struct xfs_da_args *args);
@@ -23,4 +24,7 @@ int xfs_attr_rmt_find_hole(struct xfs_da_args *args);
int xfs_attr_rmtval_set_value(struct xfs_da_args *args);
int xfs_attr_rmtval_set_blk(struct xfs_attr_intent *attr);
int xfs_attr_rmtval_find_space(struct xfs_attr_intent *attr);
+
+const struct xfs_buf_ops *xfs_attr3_remote_buf_ops(unsigned int attrns);
+
#endif /* __XFS_ATTR_REMOTE_H__ */
diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h
index 238208e5783b2..8cbda181c2f48 100644
--- a/fs/xfs/libxfs/xfs_da_format.h
+++ b/fs/xfs/libxfs/xfs_da_format.h
@@ -878,7 +878,27 @@ struct xfs_attr3_rmt_hdr {
#define XFS_ATTR3_RMT_CRC_OFF offsetof(struct xfs_attr3_rmt_hdr, rm_crc)
-unsigned int xfs_attr3_rmt_buf_space(struct xfs_mount *mp);
+unsigned int xfs_attr3_rmt_buf_space(struct xfs_mount *mp, unsigned int attrns);
+
+/*
+ * XFS_ATTR_VERITY remote attribute block format definition
+ *
+ * fsverity stores blocks of a merkle tree in the extended attributes. The
+ * size of these blocks are a power of two, so we'd like to reduce overhead by
+ * not storing a remote header at the start of each ondisk block. Because
+ * merkle tree blocks are themselves hashes of other merkle tree or data
+ * blocks, we can detect bitflips without needing our own checksum. Settle for
+ * XORing the owner, blkno, magic, and metauuid into the start of each ondisk
+ * merkle tree block.
+ */
+#define XFS_ATTR3_RMTVERITY_MAGIC 0x5955434B /* YUCK */
+
+struct xfs_attr3_rmtverity_hdr {
+ __be64 rmv_owner;
+ __be64 rmv_blkno;
+ __be32 rmv_magic;
+ uuid_t rmv_uuid;
+} __packed;
/* Number of bytes in a directory block. */
static inline unsigned int xfs_dir2_dirblock_bytes(struct xfs_sb *sbp)
diff --git a/fs/xfs/libxfs/xfs_ondisk.h b/fs/xfs/libxfs/xfs_ondisk.h
index 61ba994de69ba..d46352d60d645 100644
--- a/fs/xfs/libxfs/xfs_ondisk.h
+++ b/fs/xfs/libxfs/xfs_ondisk.h
@@ -59,6 +59,7 @@ xfs_check_ondisk_structs(void)
XFS_CHECK_STRUCT_SIZE(struct xfs_attr3_leaf_hdr, 80);
XFS_CHECK_STRUCT_SIZE(struct xfs_attr3_leafblock, 80);
XFS_CHECK_STRUCT_SIZE(struct xfs_attr3_rmt_hdr, 56);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_attr3_rmtverity_hdr, 36);
XFS_CHECK_STRUCT_SIZE(struct xfs_da3_blkinfo, 56);
XFS_CHECK_STRUCT_SIZE(struct xfs_da3_intnode, 64);
XFS_CHECK_STRUCT_SIZE(struct xfs_da3_node_hdr, 64);
@@ -206,6 +207,7 @@ xfs_check_ondisk_structs(void)
XFS_CHECK_VALUE(XFS_DQ_BIGTIME_EXPIRY_MIN << XFS_DQ_BIGTIME_SHIFT, 4);
XFS_CHECK_VALUE(XFS_DQ_BIGTIME_EXPIRY_MAX << XFS_DQ_BIGTIME_SHIFT,
16299260424LL);
+
}
#endif /* __XFS_ONDISK_H */
diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h
index 40a4826603074..eb3a674fe1615 100644
--- a/fs/xfs/libxfs/xfs_shared.h
+++ b/fs/xfs/libxfs/xfs_shared.h
@@ -26,6 +26,7 @@ extern const struct xfs_buf_ops xfs_agfl_buf_ops;
extern const struct xfs_buf_ops xfs_agi_buf_ops;
extern const struct xfs_buf_ops xfs_attr3_leaf_buf_ops;
extern const struct xfs_buf_ops xfs_attr3_rmt_buf_ops;
+extern const struct xfs_buf_ops xfs_attr3_rmtverity_buf_ops;
extern const struct xfs_buf_ops xfs_bmbt_buf_ops;
extern const struct xfs_buf_ops xfs_bnobt_buf_ops;
extern const struct xfs_buf_ops xfs_cntbt_buf_ops;
diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c
index 24fb12986a568..93fa78a230d04 100644
--- a/fs/xfs/xfs_attr_inactive.c
+++ b/fs/xfs/xfs_attr_inactive.c
@@ -110,7 +110,7 @@ xfs_attr3_leaf_inactive(
if (!name_rmt->valueblk)
continue;
- blkcnt = xfs_attr3_rmt_blocks(dp->i_mount,
+ blkcnt = xfs_attr3_rmt_blocks(dp->i_mount, entry->flags,
be32_to_cpu(name_rmt->valuelen));
error = xfs_attr3_rmt_stale(dp,
be32_to_cpu(name_rmt->valueblk), blkcnt);
^ permalink raw reply related [flat|nested] 111+ messages in thread
* [PATCH 07/29] xfs: add fs-verity ro-compat flag
2024-03-30 0:32 ` [PATCHSET v5.5 2/2] xfs: fs-verity support Darrick J. Wong
` (5 preceding siblings ...)
2024-03-30 0:37 ` [PATCH 06/29] xfs: do not use xfs_attr3_rmt_hdr for remote verity value blocks Darrick J. Wong
@ 2024-03-30 0:37 ` Darrick J. Wong
2024-03-30 0:38 ` [PATCH 08/29] xfs: add inode on-disk VERITY flag Darrick J. Wong
` (21 subsequent siblings)
28 siblings, 0 replies; 111+ messages in thread
From: Darrick J. Wong @ 2024-03-30 0:37 UTC (permalink / raw)
To: djwong, ebiggers, aalbersh; +Cc: linux-xfs, linux-fsdevel, fsverity
From: Andrey Albershteyn <aalbersh@redhat.com>
To mark inodes with fs-verity enabled the new XFS_DIFLAG2_VERITY flag
will be added in further patch. This requires ro-compat flag to let
older kernels know that fs with fs-verity can not be modified.
Signed-off-by: Andrey Albershteyn <aalbersh@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
fs/xfs/libxfs/xfs_format.h | 1 +
fs/xfs/libxfs/xfs_sb.c | 2 ++
fs/xfs/xfs_mount.h | 2 ++
3 files changed, 5 insertions(+)
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 86a4c51493e7c..1532d37fd1029 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -387,6 +387,7 @@ xfs_sb_has_compat_feature(
#define XFS_SB_FEAT_RO_COMPAT_RMAPBT (1 << 1) /* reverse map btree */
#define XFS_SB_FEAT_RO_COMPAT_REFLINK (1 << 2) /* reflinked files */
#define XFS_SB_FEAT_RO_COMPAT_INOBTCNT (1 << 3) /* inobt block counts */
+#define XFS_SB_FEAT_RO_COMPAT_VERITY (1 << 4) /* fs-verity */
#define XFS_SB_FEAT_RO_COMPAT_ALL \
(XFS_SB_FEAT_RO_COMPAT_FINOBT | \
XFS_SB_FEAT_RO_COMPAT_RMAPBT | \
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 747d28477b258..39b5083745d0e 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -167,6 +167,8 @@ xfs_sb_version_to_features(
features |= XFS_FEAT_REFLINK;
if (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_INOBTCNT)
features |= XFS_FEAT_INOBTCNT;
+ if (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_VERITY)
+ features |= XFS_FEAT_VERITY;
if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_FTYPE)
features |= XFS_FEAT_FTYPE;
if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_SPINODES)
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index f89632e0de006..08ec154eb0e98 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -313,6 +313,7 @@ typedef struct xfs_mount {
#define XFS_FEAT_NREXT64 (1ULL << 26) /* large extent counters */
#define XFS_FEAT_METADIR (1ULL << 27) /* metadata directory tree */
#define XFS_FEAT_RTGROUPS (1ULL << 28) /* realtime groups */
+#define XFS_FEAT_VERITY (1ULL << 29) /* fs-verity */
/* Mount features */
#define XFS_FEAT_ADD_LOG_FEAT (1ULL << 47) /* can add log incompat features */
@@ -379,6 +380,7 @@ __XFS_HAS_FEAT(needsrepair, NEEDSREPAIR)
__XFS_HAS_FEAT(large_extent_counts, NREXT64)
__XFS_HAS_FEAT(metadir, METADIR)
__XFS_HAS_FEAT(rtgroups, RTGROUPS)
+__XFS_HAS_FEAT(verity, VERITY)
bool xfs_can_add_incompat_log_features(struct xfs_mount *mp, bool want_audit);
^ permalink raw reply related [flat|nested] 111+ messages in thread
* [PATCH 08/29] xfs: add inode on-disk VERITY flag
2024-03-30 0:32 ` [PATCHSET v5.5 2/2] xfs: fs-verity support Darrick J. Wong
` (6 preceding siblings ...)
2024-03-30 0:37 ` [PATCH 07/29] xfs: add fs-verity ro-compat flag Darrick J. Wong
@ 2024-03-30 0:38 ` Darrick J. Wong
2024-03-30 0:38 ` [PATCH 09/29] xfs: initialize fs-verity on file open and cleanup on inode destruction Darrick J. Wong
` (20 subsequent siblings)
28 siblings, 0 replies; 111+ messages in thread
From: Darrick J. Wong @ 2024-03-30 0:38 UTC (permalink / raw)
To: djwong, ebiggers, aalbersh; +Cc: linux-xfs, linux-fsdevel, fsverity
From: Andrey Albershteyn <aalbersh@redhat.com>
Add flag to mark inodes which have fs-verity enabled on them (i.e.
descriptor exist and tree is built).
Signed-off-by: Andrey Albershteyn <aalbersh@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
fs/xfs/libxfs/xfs_format.h | 5 ++++-
fs/xfs/libxfs/xfs_inode_util.c | 2 ++
fs/xfs/xfs_iops.c | 2 ++
3 files changed, 8 insertions(+), 1 deletion(-)
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 1532d37fd1029..e7ed55f747d01 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -1207,6 +1207,7 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
#define XFS_DIFLAG2_COWEXTSIZE_BIT 2 /* copy on write extent size hint */
#define XFS_DIFLAG2_BIGTIME_BIT 3 /* big timestamps */
#define XFS_DIFLAG2_NREXT64_BIT 4 /* large extent counters */
+#define XFS_DIFLAG2_VERITY_BIT 5 /* inode sealed by fsverity */
#define XFS_DIFLAG2_METADIR_BIT 63 /* filesystem metadata */
#define XFS_DIFLAG2_DAX (1ULL << XFS_DIFLAG2_DAX_BIT)
@@ -1214,6 +1215,7 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
#define XFS_DIFLAG2_COWEXTSIZE (1ULL << XFS_DIFLAG2_COWEXTSIZE_BIT)
#define XFS_DIFLAG2_BIGTIME (1ULL << XFS_DIFLAG2_BIGTIME_BIT)
#define XFS_DIFLAG2_NREXT64 (1ULL << XFS_DIFLAG2_NREXT64_BIT)
+#define XFS_DIFLAG2_VERITY (1ULL << XFS_DIFLAG2_VERITY_BIT)
/*
* The inode contains filesystem metadata and can be found through the metadata
@@ -1242,7 +1244,8 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
#define XFS_DIFLAG2_ANY \
(XFS_DIFLAG2_DAX | XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE | \
- XFS_DIFLAG2_BIGTIME | XFS_DIFLAG2_NREXT64 | XFS_DIFLAG2_METADIR)
+ XFS_DIFLAG2_BIGTIME | XFS_DIFLAG2_NREXT64 | XFS_DIFLAG2_METADIR | \
+ XFS_DIFLAG2_VERITY)
static inline bool xfs_dinode_has_bigtime(const struct xfs_dinode *dip)
{
diff --git a/fs/xfs/libxfs/xfs_inode_util.c b/fs/xfs/libxfs/xfs_inode_util.c
index a448e4a2a3e59..fcea20ad675e8 100644
--- a/fs/xfs/libxfs/xfs_inode_util.c
+++ b/fs/xfs/libxfs/xfs_inode_util.c
@@ -127,6 +127,8 @@ xfs_ip2xflags(
flags |= FS_XFLAG_DAX;
if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE)
flags |= FS_XFLAG_COWEXTSIZE;
+ if (ip->i_diflags2 & XFS_DIFLAG2_VERITY)
+ flags |= FS_XFLAG_VERITY;
}
if (xfs_inode_has_attr_fork(ip))
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index b453d5966bfc7..b0672af049043 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -1291,6 +1291,8 @@ xfs_diflags_to_iflags(
flags |= S_NOATIME;
if (init && xfs_inode_should_enable_dax(ip))
flags |= S_DAX;
+ if (xflags & FS_XFLAG_VERITY)
+ flags |= S_VERITY;
/*
* S_DAX can only be set during inode initialization and is never set by
^ permalink raw reply related [flat|nested] 111+ messages in thread
* [PATCH 09/29] xfs: initialize fs-verity on file open and cleanup on inode destruction
2024-03-30 0:32 ` [PATCHSET v5.5 2/2] xfs: fs-verity support Darrick J. Wong
` (7 preceding siblings ...)
2024-03-30 0:38 ` [PATCH 08/29] xfs: add inode on-disk VERITY flag Darrick J. Wong
@ 2024-03-30 0:38 ` Darrick J. Wong
2024-03-30 0:38 ` [PATCH 10/29] xfs: don't allow to enable DAX on fs-verity sealed inode Darrick J. Wong
` (19 subsequent siblings)
28 siblings, 0 replies; 111+ messages in thread
From: Darrick J. Wong @ 2024-03-30 0:38 UTC (permalink / raw)
To: djwong, ebiggers, aalbersh; +Cc: linux-xfs, linux-fsdevel, fsverity
From: Andrey Albershteyn <aalbersh@redhat.com>
fs-verity will read and attach metadata (not the tree itself) from
a disk for those inodes which already have fs-verity enabled.
Signed-off-by: Andrey Albershteyn <aalbersh@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
fs/xfs/xfs_file.c | 8 ++++++++
fs/xfs/xfs_super.c | 2 ++
2 files changed, 10 insertions(+)
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 6162d6c12b76d..ce57f5007308a 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -32,6 +32,7 @@
#include <linux/mman.h>
#include <linux/fadvise.h>
#include <linux/mount.h>
+#include <linux/fsverity.h>
static const struct vm_operations_struct xfs_file_vm_ops;
@@ -1413,10 +1414,17 @@ xfs_file_open(
struct inode *inode,
struct file *file)
{
+ int error;
+
if (xfs_is_shutdown(XFS_M(inode->i_sb)))
return -EIO;
file->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC | FMODE_BUF_WASYNC |
FMODE_DIO_PARALLEL_WRITE | FMODE_CAN_ODIRECT;
+
+ error = fsverity_file_open(inode, file);
+ if (error)
+ return error;
+
return generic_file_open(inode, file);
}
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 5304004646b40..42a1e1f23d3b3 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -52,6 +52,7 @@
#include <linux/magic.h>
#include <linux/fs_context.h>
#include <linux/fs_parser.h>
+#include <linux/fsverity.h>
static const struct super_operations xfs_super_operations;
@@ -671,6 +672,7 @@ xfs_fs_destroy_inode(
ASSERT(!rwsem_is_locked(&inode->i_rwsem));
XFS_STATS_INC(ip->i_mount, vn_rele);
XFS_STATS_INC(ip->i_mount, vn_remove);
+ fsverity_cleanup_inode(inode);
xfs_inode_mark_reclaimable(ip);
}
^ permalink raw reply related [flat|nested] 111+ messages in thread
* [PATCH 10/29] xfs: don't allow to enable DAX on fs-verity sealed inode
2024-03-30 0:32 ` [PATCHSET v5.5 2/2] xfs: fs-verity support Darrick J. Wong
` (8 preceding siblings ...)
2024-03-30 0:38 ` [PATCH 09/29] xfs: initialize fs-verity on file open and cleanup on inode destruction Darrick J. Wong
@ 2024-03-30 0:38 ` Darrick J. Wong
2024-03-30 0:38 ` [PATCH 11/29] xfs: disable direct read path for fs-verity files Darrick J. Wong
` (18 subsequent siblings)
28 siblings, 0 replies; 111+ messages in thread
From: Darrick J. Wong @ 2024-03-30 0:38 UTC (permalink / raw)
To: djwong, ebiggers, aalbersh; +Cc: linux-xfs, linux-fsdevel, fsverity
From: Andrey Albershteyn <aalbersh@redhat.com>
fs-verity doesn't support DAX. Forbid filesystem to enable DAX on
inodes which already have fs-verity enabled. The opposite is checked
when fs-verity is enabled, it won't be enabled if DAX is.
Signed-off-by: Andrey Albershteyn <aalbersh@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
[djwong: fix typo in subject]
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
fs/xfs/xfs_iops.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index b0672af049043..bc8528457a95e 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -1263,6 +1263,8 @@ xfs_inode_should_enable_dax(
return false;
if (!xfs_inode_supports_dax(ip))
return false;
+ if (ip->i_diflags2 & XFS_DIFLAG2_VERITY)
+ return false;
if (xfs_has_dax_always(ip->i_mount))
return true;
if (ip->i_diflags2 & XFS_DIFLAG2_DAX)
^ permalink raw reply related [flat|nested] 111+ messages in thread
* [PATCH 11/29] xfs: disable direct read path for fs-verity files
2024-03-30 0:32 ` [PATCHSET v5.5 2/2] xfs: fs-verity support Darrick J. Wong
` (9 preceding siblings ...)
2024-03-30 0:38 ` [PATCH 10/29] xfs: don't allow to enable DAX on fs-verity sealed inode Darrick J. Wong
@ 2024-03-30 0:38 ` Darrick J. Wong
2024-03-30 0:39 ` [PATCH 12/29] xfs: widen flags argument to the xfs_iflags_* helpers Darrick J. Wong
` (17 subsequent siblings)
28 siblings, 0 replies; 111+ messages in thread
From: Darrick J. Wong @ 2024-03-30 0:38 UTC (permalink / raw)
To: djwong, ebiggers, aalbersh; +Cc: linux-xfs, linux-fsdevel, fsverity
From: Andrey Albershteyn <aalbersh@redhat.com>
The direct path is not supported on verity files. Attempts to use direct
I/O path on such files should fall back to buffered I/O path.
Signed-off-by: Andrey Albershteyn <aalbersh@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
[djwong: fix braces]
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
fs/xfs/xfs_file.c | 15 ++++++++++++---
1 file changed, 12 insertions(+), 3 deletions(-)
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index ce57f5007308a..c0b3e8146b753 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -268,7 +268,8 @@ xfs_file_dax_read(
struct kiocb *iocb,
struct iov_iter *to)
{
- struct xfs_inode *ip = XFS_I(iocb->ki_filp->f_mapping->host);
+ struct inode *inode = iocb->ki_filp->f_mapping->host;
+ struct xfs_inode *ip = XFS_I(inode);
ssize_t ret = 0;
trace_xfs_file_dax_read(iocb, to);
@@ -321,10 +322,18 @@ xfs_file_read_iter(
if (IS_DAX(inode))
ret = xfs_file_dax_read(iocb, to);
- else if (iocb->ki_flags & IOCB_DIRECT)
+ else if ((iocb->ki_flags & IOCB_DIRECT) && !fsverity_active(inode))
ret = xfs_file_dio_read(iocb, to);
- else
+ else {
+ /*
+ * In case fs-verity is enabled, we also fallback to the
+ * buffered read from the direct read path. Therefore,
+ * IOCB_DIRECT is set and need to be cleared (see
+ * generic_file_read_iter())
+ */
+ iocb->ki_flags &= ~IOCB_DIRECT;
ret = xfs_file_buffered_read(iocb, to);
+ }
if (ret > 0)
XFS_STATS_ADD(mp, xs_read_bytes, ret);
^ permalink raw reply related [flat|nested] 111+ messages in thread
* [PATCH 12/29] xfs: widen flags argument to the xfs_iflags_* helpers
2024-03-30 0:32 ` [PATCHSET v5.5 2/2] xfs: fs-verity support Darrick J. Wong
` (10 preceding siblings ...)
2024-03-30 0:38 ` [PATCH 11/29] xfs: disable direct read path for fs-verity files Darrick J. Wong
@ 2024-03-30 0:39 ` Darrick J. Wong
2024-04-02 12:37 ` Andrey Albershteyn
2024-03-30 0:39 ` [PATCH 13/29] xfs: add fs-verity support Darrick J. Wong
` (16 subsequent siblings)
28 siblings, 1 reply; 111+ messages in thread
From: Darrick J. Wong @ 2024-03-30 0:39 UTC (permalink / raw)
To: djwong, ebiggers, aalbersh; +Cc: linux-xfs, linux-fsdevel, fsverity
From: Darrick J. Wong <djwong@kernel.org>
xfs_inode.i_flags is an unsigned long, so make these helpers take that
as the flags argument instead of unsigned short. This is needed for the
next patch.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
fs/xfs/xfs_inode.h | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index f6744e4fabc27..5a202706fc4a4 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -211,13 +211,13 @@ xfs_new_eof(struct xfs_inode *ip, xfs_fsize_t new_size)
* i_flags helper functions
*/
static inline void
-__xfs_iflags_set(xfs_inode_t *ip, unsigned short flags)
+__xfs_iflags_set(xfs_inode_t *ip, unsigned long flags)
{
ip->i_flags |= flags;
}
static inline void
-xfs_iflags_set(xfs_inode_t *ip, unsigned short flags)
+xfs_iflags_set(xfs_inode_t *ip, unsigned long flags)
{
spin_lock(&ip->i_flags_lock);
__xfs_iflags_set(ip, flags);
@@ -225,7 +225,7 @@ xfs_iflags_set(xfs_inode_t *ip, unsigned short flags)
}
static inline void
-xfs_iflags_clear(xfs_inode_t *ip, unsigned short flags)
+xfs_iflags_clear(xfs_inode_t *ip, unsigned long flags)
{
spin_lock(&ip->i_flags_lock);
ip->i_flags &= ~flags;
@@ -233,13 +233,13 @@ xfs_iflags_clear(xfs_inode_t *ip, unsigned short flags)
}
static inline int
-__xfs_iflags_test(xfs_inode_t *ip, unsigned short flags)
+__xfs_iflags_test(xfs_inode_t *ip, unsigned long flags)
{
return (ip->i_flags & flags);
}
static inline int
-xfs_iflags_test(xfs_inode_t *ip, unsigned short flags)
+xfs_iflags_test(xfs_inode_t *ip, unsigned long flags)
{
int ret;
spin_lock(&ip->i_flags_lock);
@@ -249,7 +249,7 @@ xfs_iflags_test(xfs_inode_t *ip, unsigned short flags)
}
static inline int
-xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
+xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned long flags)
{
int ret;
@@ -262,7 +262,7 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
}
static inline int
-xfs_iflags_test_and_set(xfs_inode_t *ip, unsigned short flags)
+xfs_iflags_test_and_set(xfs_inode_t *ip, unsigned long flags)
{
int ret;
^ permalink raw reply related [flat|nested] 111+ messages in thread
* Re: [PATCH 12/29] xfs: widen flags argument to the xfs_iflags_* helpers
2024-03-30 0:39 ` [PATCH 12/29] xfs: widen flags argument to the xfs_iflags_* helpers Darrick J. Wong
@ 2024-04-02 12:37 ` Andrey Albershteyn
2024-04-02 16:27 ` Darrick J. Wong
0 siblings, 1 reply; 111+ messages in thread
From: Andrey Albershteyn @ 2024-04-02 12:37 UTC (permalink / raw)
To: Darrick J. Wong; +Cc: ebiggers, linux-xfs, linux-fsdevel, fsverity
On 2024-03-29 17:39:11, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
>
> xfs_inode.i_flags is an unsigned long, so make these helpers take that
> as the flags argument instead of unsigned short. This is needed for the
> next patch.
>
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> ---
> fs/xfs/xfs_inode.h | 14 +++++++-------
> 1 file changed, 7 insertions(+), 7 deletions(-)
>
>
Would it also make sense to flip iflags to unsigned long in
xfs_iget_cache_miss()?
Looks good to me:
Reviewed-by: Andrey Albershteyn <aalbersh@redhat.com>
--
- Andrey
^ permalink raw reply [flat|nested] 111+ messages in thread
* Re: [PATCH 12/29] xfs: widen flags argument to the xfs_iflags_* helpers
2024-04-02 12:37 ` Andrey Albershteyn
@ 2024-04-02 16:27 ` Darrick J. Wong
0 siblings, 0 replies; 111+ messages in thread
From: Darrick J. Wong @ 2024-04-02 16:27 UTC (permalink / raw)
To: Andrey Albershteyn; +Cc: ebiggers, linux-xfs, linux-fsdevel, fsverity
On Tue, Apr 02, 2024 at 02:37:50PM +0200, Andrey Albershteyn wrote:
> On 2024-03-29 17:39:11, Darrick J. Wong wrote:
> > From: Darrick J. Wong <djwong@kernel.org>
> >
> > xfs_inode.i_flags is an unsigned long, so make these helpers take that
> > as the flags argument instead of unsigned short. This is needed for the
> > next patch.
> >
> > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > ---
> > fs/xfs/xfs_inode.h | 14 +++++++-------
> > 1 file changed, 7 insertions(+), 7 deletions(-)
> >
> >
>
> Would it also make sense to flip iflags to unsigned long in
> xfs_iget_cache_miss()?
I think it could pass XFS_INEW directly to xfs_iflags_set and skip the
iflags local variable completely. IIRC it /was/ used to set dontcache
back when that was an xfs-specific flag.
> Looks good to me:
> Reviewed-by: Andrey Albershteyn <aalbersh@redhat.com>
Thanks!
--D
> --
> - Andrey
>
>
^ permalink raw reply [flat|nested] 111+ messages in thread
* [PATCH 13/29] xfs: add fs-verity support
2024-03-30 0:32 ` [PATCHSET v5.5 2/2] xfs: fs-verity support Darrick J. Wong
` (11 preceding siblings ...)
2024-03-30 0:39 ` [PATCH 12/29] xfs: widen flags argument to the xfs_iflags_* helpers Darrick J. Wong
@ 2024-03-30 0:39 ` Darrick J. Wong
2024-04-02 8:42 ` Andrey Albershteyn
2024-03-30 0:39 ` [PATCH 14/29] xfs: create a per-mount shrinker for verity inodes merkle tree blocks Darrick J. Wong
` (15 subsequent siblings)
28 siblings, 1 reply; 111+ messages in thread
From: Darrick J. Wong @ 2024-03-30 0:39 UTC (permalink / raw)
To: djwong, ebiggers, aalbersh; +Cc: linux-xfs, linux-fsdevel, fsverity
From: Andrey Albershteyn <aalbersh@redhat.com>
Add integration with fs-verity. The XFS store fs-verity metadata in
the extended file attributes. The metadata consist of verity
descriptor and Merkle tree blocks.
The descriptor is stored under "vdesc" extended attribute. The
Merkle tree blocks are stored under binary indexes which are offsets
into the Merkle tree.
When fs-verity is enabled on an inode, the XFS_IVERITY_CONSTRUCTION
flag is set meaning that the Merkle tree is being build. The
initialization ends with storing of verity descriptor and setting
inode on-disk flag (XFS_DIFLAG2_VERITY).
The verification on read is done in read path of iomap.
Signed-off-by: Andrey Albershteyn <aalbersh@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
[djwong: replace caching implementation with an xarray, other cleanups]
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
fs/xfs/Makefile | 2
fs/xfs/libxfs/xfs_attr.c | 41 +++
fs/xfs/libxfs/xfs_attr.h | 1
fs/xfs/libxfs/xfs_da_format.h | 14 +
fs/xfs/libxfs/xfs_ondisk.h | 3
fs/xfs/libxfs/xfs_verity.c | 58 ++++
fs/xfs/libxfs/xfs_verity.h | 13 +
fs/xfs/xfs_fsverity.c | 559 +++++++++++++++++++++++++++++++++++++++++
fs/xfs/xfs_fsverity.h | 20 +
fs/xfs/xfs_icache.c | 4
fs/xfs/xfs_inode.h | 5
fs/xfs/xfs_super.c | 17 +
fs/xfs/xfs_trace.h | 32 ++
13 files changed, 769 insertions(+)
create mode 100644 fs/xfs/libxfs/xfs_verity.c
create mode 100644 fs/xfs/libxfs/xfs_verity.h
create mode 100644 fs/xfs/xfs_fsverity.c
create mode 100644 fs/xfs/xfs_fsverity.h
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 702f2ddc918a1..a4b2f54914a87 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -57,6 +57,7 @@ xfs-y += $(addprefix libxfs/, \
xfs_trans_resv.o \
xfs_trans_space.o \
xfs_types.o \
+ xfs_verity.o \
)
# xfs_rtbitmap is shared with libxfs
xfs-$(CONFIG_XFS_RT) += $(addprefix libxfs/, \
@@ -142,6 +143,7 @@ xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o
xfs-$(CONFIG_SYSCTL) += xfs_sysctl.o
xfs-$(CONFIG_COMPAT) += xfs_ioctl32.o
xfs-$(CONFIG_EXPORTFS_BLOCK_OPS) += xfs_pnfs.o
+xfs-$(CONFIG_FS_VERITY) += xfs_fsverity.o
# notify failure
ifeq ($(CONFIG_MEMORY_FAILURE),y)
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index 931ec563a7460..c3f686411e378 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -27,6 +27,7 @@
#include "xfs_attr_item.h"
#include "xfs_xattr.h"
#include "xfs_parent.h"
+#include "xfs_verity.h"
struct kmem_cache *xfs_attr_intent_cache;
@@ -1262,6 +1263,43 @@ xfs_attr_removename(
goto out_unlock;
}
+/*
+ * Retrieve the value stored in the xattr structure under @args->name.
+ *
+ * The caller must have initialized @args and must not hold any ILOCKs.
+ *
+ * Returns -ENOATTR if the name did not already exist.
+ */
+int
+xfs_attr_getname(
+ struct xfs_da_args *args)
+{
+ unsigned int lock_mode;
+ int error;
+
+ ASSERT(!args->trans);
+
+ error = xfs_trans_alloc_empty(args->dp->i_mount, &args->trans);
+ if (error)
+ return error;
+
+ lock_mode = xfs_ilock_attr_map_shared(args->dp);
+
+ /* Make sure the attr fork iext tree is loaded */
+ if (xfs_inode_hasattr(args->dp)) {
+ error = xfs_iread_extents(args->trans, args->dp, XFS_ATTR_FORK);
+ if (error)
+ goto out_unlock;
+ }
+
+ error = xfs_attr_get_ilocked(args);
+out_unlock:
+ xfs_iunlock(args->dp, lock_mode);
+ xfs_trans_cancel(args->trans);
+ args->trans = NULL;
+ return error;
+}
+
/*========================================================================
* External routines when attribute list is inside the inode
*========================================================================*/
@@ -1743,6 +1781,9 @@ xfs_attr_namecheck(
if (!xfs_attr_check_namespace(attr_flags))
return false;
+ if (attr_flags & XFS_ATTR_VERITY)
+ return xfs_verity_namecheck(attr_flags, name, length);
+
/*
* MAXNAMELEN includes the trailing null, but (name/length) leave it
* out, so use >= for the length check.
diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h
index 958bb9e41ddb3..3e43d715bcdd2 100644
--- a/fs/xfs/libxfs/xfs_attr.h
+++ b/fs/xfs/libxfs/xfs_attr.h
@@ -561,6 +561,7 @@ void xfs_init_attr_trans(struct xfs_da_args *args, struct xfs_trans_res *tres,
int xfs_attr_setname(struct xfs_da_args *args, bool rsvd);
int xfs_attr_removename(struct xfs_da_args *args, bool rsvd);
+int xfs_attr_getname(struct xfs_da_args *args);
/*
* Check to see if the attr should be upgraded from non-existent or shortform to
diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h
index 8cbda181c2f48..679cf5b4ad4be 100644
--- a/fs/xfs/libxfs/xfs_da_format.h
+++ b/fs/xfs/libxfs/xfs_da_format.h
@@ -922,4 +922,18 @@ struct xfs_parent_rec {
__be32 p_gen;
} __packed;
+/*
+ * fs-verity attribute name format
+ *
+ * Merkle tree blocks are stored under extended attributes of the inode. The
+ * name of the attributes are byte offsets into merkle tree.
+ */
+struct xfs_merkle_key {
+ __be64 mk_offset;
+};
+
+/* ondisk xattr name used for the fsverity descriptor */
+#define XFS_VERITY_DESCRIPTOR_NAME "vdesc"
+#define XFS_VERITY_DESCRIPTOR_NAME_LEN (sizeof(XFS_VERITY_DESCRIPTOR_NAME) - 1)
+
#endif /* __XFS_DA_FORMAT_H__ */
diff --git a/fs/xfs/libxfs/xfs_ondisk.h b/fs/xfs/libxfs/xfs_ondisk.h
index d46352d60d645..e927bb778ffdc 100644
--- a/fs/xfs/libxfs/xfs_ondisk.h
+++ b/fs/xfs/libxfs/xfs_ondisk.h
@@ -208,6 +208,9 @@ xfs_check_ondisk_structs(void)
XFS_CHECK_VALUE(XFS_DQ_BIGTIME_EXPIRY_MAX << XFS_DQ_BIGTIME_SHIFT,
16299260424LL);
+ /* fs-verity xattrs */
+ XFS_CHECK_STRUCT_SIZE(struct xfs_merkle_key, 8);
+ XFS_CHECK_VALUE(sizeof(XFS_VERITY_DESCRIPTOR_NAME), 6);
}
#endif /* __XFS_ONDISK_H */
diff --git a/fs/xfs/libxfs/xfs_verity.c b/fs/xfs/libxfs/xfs_verity.c
new file mode 100644
index 0000000000000..bda38b3c19698
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_verity.c
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Red Hat, Inc.
+ */
+#include "xfs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_inode.h"
+#include "xfs_log_format.h"
+#include "xfs_attr.h"
+#include "xfs_verity.h"
+
+/* Set a merkle tree offset in preparation for setting merkle tree attrs. */
+void
+xfs_merkle_key_to_disk(
+ struct xfs_merkle_key *key,
+ uint64_t offset)
+{
+ key->mk_offset = cpu_to_be64(offset);
+}
+
+/* Retrieve the merkle tree offset from the attr data. */
+uint64_t
+xfs_merkle_key_from_disk(
+ const void *attr_name,
+ int namelen)
+{
+ const struct xfs_merkle_key *key = attr_name;
+
+ ASSERT(namelen == sizeof(struct xfs_merkle_key));
+
+ return be64_to_cpu(key->mk_offset);
+}
+
+/* Return true if verity attr name is valid. */
+bool
+xfs_verity_namecheck(
+ unsigned int attr_flags,
+ const void *name,
+ int namelen)
+{
+ if (!(attr_flags & XFS_ATTR_VERITY))
+ return false;
+
+ /*
+ * Merkle tree pages are stored under u64 indexes; verity descriptor
+ * blocks are held in a named attribute.
+ */
+ if (namelen != sizeof(struct xfs_merkle_key) &&
+ namelen != XFS_VERITY_DESCRIPTOR_NAME_LEN)
+ return false;
+
+ return true;
+}
diff --git a/fs/xfs/libxfs/xfs_verity.h b/fs/xfs/libxfs/xfs_verity.h
new file mode 100644
index 0000000000000..c01cc0678bc04
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_verity.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022 Red Hat, Inc.
+ */
+#ifndef __XFS_VERITY_H__
+#define __XFS_VERITY_H__
+
+void xfs_merkle_key_to_disk(struct xfs_merkle_key *key, uint64_t offset);
+uint64_t xfs_merkle_key_from_disk(const void *attr_name, int namelen);
+bool xfs_verity_namecheck(unsigned int attr_flags, const void *name,
+ int namelen);
+
+#endif /* __XFS_VERITY_H__ */
diff --git a/fs/xfs/xfs_fsverity.c b/fs/xfs/xfs_fsverity.c
new file mode 100644
index 0000000000000..a4a52575fb3d5
--- /dev/null
+++ b/fs/xfs/xfs_fsverity.c
@@ -0,0 +1,559 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Red Hat, Inc.
+ */
+#include "xfs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_inode.h"
+#include "xfs_log_format.h"
+#include "xfs_attr.h"
+#include "xfs_verity.h"
+#include "xfs_bmap_util.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_attr_leaf.h"
+#include "xfs_trace.h"
+#include "xfs_quota.h"
+#include "xfs_fsverity.h"
+#include <linux/fsverity.h>
+
+/*
+ * Merkle Tree Block Cache
+ * =======================
+ *
+ * fsverity requires that the filesystem implement caching of ondisk merkle
+ * tree blocks. XFS stores merkle tree blocks in the extended attribute data,
+ * which makes it important to keep copies in memory for as long as possible.
+ * This is performed by allocating the data blob structure defined below,
+ * passing the data portion of the blob to xfs_attr_get, and later adding the
+ * data blob to an xarray embedded in the xfs_inode structure.
+ *
+ * The xarray structure indexes merkle tree blocks by the offset given to us by
+ * fsverity, which drastically reduces lookups. First, it eliminating the need
+ * to walk the xattr structure to find the remote block containing the merkle
+ * tree block. Second, access to each block in the xattr structure requires a
+ * lookup in the incore extent btree.
+ */
+struct xfs_merkle_blob {
+ /* refcount of this item; the cache holds its own ref */
+ refcount_t refcount;
+
+ unsigned long flags;
+
+ /* Pointer to the merkle tree block, which is power-of-2 sized */
+ void *data;
+};
+
+#define XFS_MERKLE_BLOB_VERIFIED_BIT (0) /* fsverity validated this */
+
+/*
+ * Allocate a merkle tree blob object to prepare for reading a merkle tree
+ * object from disk.
+ */
+static inline struct xfs_merkle_blob *
+xfs_merkle_blob_alloc(
+ unsigned int blocksize)
+{
+ struct xfs_merkle_blob *mk;
+
+ mk = kmalloc(sizeof(struct xfs_merkle_blob), GFP_KERNEL);
+ if (!mk)
+ return NULL;
+
+ mk->data = kvzalloc(blocksize, GFP_KERNEL);
+ if (!mk->data) {
+ kfree(mk);
+ return NULL;
+ }
+
+ /* Caller owns this refcount. */
+ refcount_set(&mk->refcount, 1);
+ mk->flags = 0;
+ return mk;
+}
+
+/* Free a merkle tree blob. */
+static inline void
+xfs_merkle_blob_rele(
+ struct xfs_merkle_blob *mk)
+{
+ if (refcount_dec_and_test(&mk->refcount)) {
+ kvfree(mk->data);
+ kfree(mk);
+ }
+}
+
+/* Initialize the merkle tree block cache */
+void
+xfs_fsverity_cache_init(
+ struct xfs_inode *ip)
+{
+ xa_init(&ip->i_merkle_blocks);
+}
+
+/*
+ * Drop all the merkle tree blocks out of the cache. Caller must ensure that
+ * there are no active references to cache items.
+ */
+void
+xfs_fsverity_cache_drop(
+ struct xfs_inode *ip)
+{
+ XA_STATE(xas, &ip->i_merkle_blocks, 0);
+ struct xfs_merkle_blob *mk;
+ unsigned long flags;
+
+ xas_lock_irqsave(&xas, flags);
+ xas_for_each(&xas, mk, ULONG_MAX) {
+ ASSERT(refcount_read(&mk->refcount) == 1);
+
+ trace_xfs_fsverity_cache_drop(ip, xas.xa_index, _RET_IP_);
+
+ xas_store(&xas, NULL);
+ xfs_merkle_blob_rele(mk);
+ }
+ xas_unlock_irqrestore(&xas, flags);
+}
+
+/* Destroy the merkle tree block cache */
+void
+xfs_fsverity_cache_destroy(
+ struct xfs_inode *ip)
+{
+ ASSERT(xa_empty(&ip->i_merkle_blocks));
+
+ /*
+ * xa_destroy calls xas_lock from rcu freeing softirq context, so
+ * we must use xa*_lock_irqsave.
+ */
+ xa_destroy(&ip->i_merkle_blocks);
+}
+
+/* Return a cached merkle tree block, or NULL. */
+static struct xfs_merkle_blob *
+xfs_fsverity_cache_load(
+ struct xfs_inode *ip,
+ unsigned long key)
+{
+ XA_STATE(xas, &ip->i_merkle_blocks, key);
+ struct xfs_merkle_blob *mk;
+
+ /* Look up the cached item and try to get an active ref. */
+ rcu_read_lock();
+ do {
+ mk = xas_load(&xas);
+ if (xa_is_zero(mk))
+ mk = NULL;
+ } while (xas_retry(&xas, mk) ||
+ (mk && !refcount_inc_not_zero(&mk->refcount)));
+ rcu_read_unlock();
+
+ if (!mk)
+ return NULL;
+
+ trace_xfs_fsverity_cache_load(ip, key, _RET_IP_);
+ return mk;
+}
+
+/*
+ * Try to store a merkle tree block in the cache with the given key.
+ *
+ * If the merkle tree block is not already in the cache, the given block @mk
+ * will be added to the cache and returned. The caller retains its active
+ * reference to @mk.
+ *
+ * If there was already a merkle block in the cache, it will be returned to
+ * the caller with an active reference. @mk will be untouched.
+ */
+static struct xfs_merkle_blob *
+xfs_fsverity_cache_store(
+ struct xfs_inode *ip,
+ unsigned long key,
+ struct xfs_merkle_blob *mk)
+{
+ struct xfs_merkle_blob *old;
+ unsigned long flags;
+
+ trace_xfs_fsverity_cache_store(ip, key, _RET_IP_);
+
+ /*
+ * Either replace a NULL entry with mk, or take an active ref to
+ * whatever's currently there.
+ */
+ xa_lock_irqsave(&ip->i_merkle_blocks, flags);
+ do {
+ old = __xa_cmpxchg(&ip->i_merkle_blocks, key, NULL, mk,
+ GFP_KERNEL);
+ } while (old && !refcount_inc_not_zero(&old->refcount));
+ xa_unlock_irqrestore(&ip->i_merkle_blocks, flags);
+
+ if (old == NULL) {
+ /*
+ * There was no previous value. @mk is now live in the cache.
+ * Bump the active refcount to transfer ownership to the cache
+ * and return @mk to the caller.
+ */
+ refcount_inc(&mk->refcount);
+ return mk;
+ }
+
+ /*
+ * We obtained an active reference to a previous value in the cache.
+ * Return it to the caller.
+ */
+ return old;
+}
+
+/*
+ * Initialize an args structure to load or store the fsverity descriptor.
+ * Caller must ensure @args is zeroed except for value and valuelen.
+ */
+static inline void
+xfs_fsverity_init_vdesc_args(
+ struct xfs_inode *ip,
+ struct xfs_da_args *args)
+{
+ args->geo = ip->i_mount->m_attr_geo;
+ args->whichfork = XFS_ATTR_FORK,
+ args->attr_filter = XFS_ATTR_VERITY;
+ args->op_flags = XFS_DA_OP_OKNOENT;
+ args->dp = ip;
+ args->owner = ip->i_ino;
+ args->name = XFS_VERITY_DESCRIPTOR_NAME;
+ args->namelen = XFS_VERITY_DESCRIPTOR_NAME_LEN;
+ xfs_attr_sethash(args);
+}
+
+/*
+ * Initialize an args structure to load or store a merkle tree block.
+ * Caller must ensure @args is zeroed except for value and valuelen.
+ */
+static inline void
+xfs_fsverity_init_merkle_args(
+ struct xfs_inode *ip,
+ struct xfs_merkle_key *key,
+ uint64_t merkleoff,
+ struct xfs_da_args *args)
+{
+ xfs_merkle_key_to_disk(key, merkleoff);
+ args->geo = ip->i_mount->m_attr_geo;
+ args->whichfork = XFS_ATTR_FORK,
+ args->attr_filter = XFS_ATTR_VERITY;
+ args->op_flags = XFS_DA_OP_OKNOENT;
+ args->dp = ip;
+ args->owner = ip->i_ino;
+ args->name = (const uint8_t *)key;
+ args->namelen = sizeof(struct xfs_merkle_key);
+ xfs_attr_sethash(args);
+}
+
+/* Delete the verity descriptor. */
+static int
+xfs_fsverity_delete_descriptor(
+ struct xfs_inode *ip)
+{
+ struct xfs_da_args args = { };
+
+ xfs_fsverity_init_vdesc_args(ip, &args);
+ return xfs_attr_removename(&args, false);
+}
+
+/* Delete a merkle tree block. */
+static int
+xfs_fsverity_delete_merkle_block(
+ struct xfs_inode *ip,
+ u64 offset)
+{
+ struct xfs_merkle_key name;
+ struct xfs_da_args args = { };
+
+ xfs_fsverity_init_merkle_args(ip, &name, offset, &args);
+ return xfs_attr_removename(&args, false);
+}
+
+/* Retrieve the verity descriptor. */
+static int
+xfs_fsverity_get_descriptor(
+ struct inode *inode,
+ void *buf,
+ size_t buf_size)
+{
+ struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_da_args args = {
+ .value = buf,
+ .valuelen = buf_size,
+ };
+ int error = 0;
+
+ /*
+ * The fact that (returned attribute size) == (provided buf_size) is
+ * checked by xfs_attr_copy_value() (returns -ERANGE). No descriptor
+ * is treated as a short read so that common fsverity code will
+ * complain.
+ */
+ xfs_fsverity_init_vdesc_args(ip, &args);
+ error = xfs_attr_getname(&args);
+ if (error == -ENOATTR)
+ return 0;
+ if (error)
+ return error;
+
+ return args.valuelen;
+}
+
+/*
+ * Clear out old fsverity metadata before we start building a new one. This
+ * could happen if, say, we crashed while building fsverity data.
+ */
+static int
+xfs_fsverity_delete_stale_metadata(
+ struct xfs_inode *ip,
+ u64 new_tree_size,
+ unsigned int tree_blocksize)
+{
+ u64 offset;
+ int error = 0;
+
+ /*
+ * Delete as many merkle tree blocks in increasing blkno order until we
+ * don't find any more. That ought to be good enough for avoiding
+ * dead bloat without excessive runtime.
+ */
+ for (offset = new_tree_size; !error; offset += tree_blocksize) {
+ if (fatal_signal_pending(current))
+ return -EINTR;
+ error = xfs_fsverity_delete_merkle_block(ip, offset);
+ if (error)
+ break;
+ }
+
+ return error != -ENOATTR ? error : 0;
+}
+
+/* Prepare to enable fsverity by clearing old metadata. */
+static int
+xfs_fsverity_begin_enable(
+ struct file *filp,
+ u64 merkle_tree_size,
+ unsigned int tree_blocksize)
+{
+ struct inode *inode = file_inode(filp);
+ struct xfs_inode *ip = XFS_I(inode);
+ int error;
+
+ xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL);
+
+ if (IS_DAX(inode))
+ return -EINVAL;
+
+ if (xfs_iflags_test_and_set(ip, XFS_VERITY_CONSTRUCTION))
+ return -EBUSY;
+
+ error = xfs_qm_dqattach(ip);
+ if (error)
+ return error;
+
+ return xfs_fsverity_delete_stale_metadata(ip, merkle_tree_size,
+ tree_blocksize);
+}
+
+/* Try to remove all the fsverity metadata after a failed enablement. */
+static int
+xfs_fsverity_delete_metadata(
+ struct xfs_inode *ip,
+ u64 merkle_tree_size,
+ unsigned int tree_blocksize)
+{
+ u64 offset;
+ int error;
+
+ if (!merkle_tree_size)
+ return 0;
+
+ for (offset = 0; offset < merkle_tree_size; offset += tree_blocksize) {
+ if (fatal_signal_pending(current))
+ return -EINTR;
+ error = xfs_fsverity_delete_merkle_block(ip, offset);
+ if (error == -ENOATTR)
+ error = 0;
+ if (error)
+ return error;
+ }
+
+ error = xfs_fsverity_delete_descriptor(ip);
+ return error != -ENOATTR ? error : 0;
+}
+
+/* Complete (or fail) the process of enabling fsverity. */
+static int
+xfs_fsverity_end_enable(
+ struct file *filp,
+ const void *desc,
+ size_t desc_size,
+ u64 merkle_tree_size,
+ unsigned int tree_blocksize)
+{
+ struct xfs_da_args args = {
+ .value = (void *)desc,
+ .valuelen = desc_size,
+ };
+ struct inode *inode = file_inode(filp);
+ struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_trans *tp;
+ int error = 0;
+
+ xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL);
+
+ /* fs-verity failed, just cleanup */
+ if (desc == NULL)
+ goto out;
+
+ xfs_fsverity_init_vdesc_args(ip, &args);
+ error = xfs_attr_setname(&args, false);
+ if (error)
+ goto out;
+
+ /* Set fsverity inode flag */
+ error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_ichange,
+ 0, 0, false, &tp);
+ if (error)
+ goto out;
+
+ /*
+ * Ensure that we've persisted the verity information before we enable
+ * it on the inode and tell the caller we have sealed the inode.
+ */
+ ip->i_diflags2 |= XFS_DIFLAG2_VERITY;
+
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+ xfs_trans_set_sync(tp);
+
+ error = xfs_trans_commit(tp);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+ if (!error)
+ inode->i_flags |= S_VERITY;
+
+out:
+ if (error) {
+ int error2;
+
+ error2 = xfs_fsverity_delete_metadata(ip,
+ merkle_tree_size, tree_blocksize);
+ if (error2)
+ xfs_alert(ip->i_mount,
+ "ino 0x%llx failed to clean up new fsverity metadata, err %d",
+ ip->i_ino, error2);
+ }
+
+ xfs_iflags_clear(ip, XFS_VERITY_CONSTRUCTION);
+ return error;
+}
+
+/* Retrieve a merkle tree block. */
+static int
+xfs_fsverity_read_merkle(
+ const struct fsverity_readmerkle *req,
+ struct fsverity_blockbuf *block)
+{
+ struct xfs_inode *ip = XFS_I(req->inode);
+ struct xfs_merkle_key name;
+ struct xfs_da_args args = {
+ .valuelen = block->size,
+ };
+ struct xfs_merkle_blob *mk, *new_mk;
+ unsigned long key = block->offset >> req->log_blocksize;
+ int error;
+
+ ASSERT(block->offset >> req->log_blocksize <= ULONG_MAX);
+
+ /* Is the block already cached? */
+ mk = xfs_fsverity_cache_load(ip, key);
+ if (mk)
+ goto out_hit;
+
+ new_mk = xfs_merkle_blob_alloc(block->size);
+ if (!new_mk)
+ return -ENOMEM;
+ args.value = new_mk->data;
+
+ /* Read the block in from disk and try to store it in the cache. */
+ xfs_fsverity_init_merkle_args(ip, &name, block->offset, &args);
+ error = xfs_attr_getname(&args);
+ if (error)
+ goto out_new_mk;
+
+ if (!args.valuelen) {
+ error = -ENODATA;
+ goto out_new_mk;
+ }
+
+ mk = xfs_fsverity_cache_store(ip, key, new_mk);
+ if (mk != new_mk) {
+ /*
+ * We raced with another thread to populate the cache and lost.
+ * Free the new cache blob and continue with the existing one.
+ */
+ xfs_merkle_blob_rele(new_mk);
+ }
+
+out_hit:
+ block->kaddr = (void *)mk->data;
+ block->context = mk;
+ block->verified = test_bit(XFS_MERKLE_BLOB_VERIFIED_BIT, &mk->flags);
+
+ return 0;
+
+out_new_mk:
+ xfs_merkle_blob_rele(new_mk);
+ return error;
+}
+
+/* Write a merkle tree block. */
+static int
+xfs_fsverity_write_merkle(
+ const struct fsverity_writemerkle *req,
+ const void *buf,
+ u64 pos,
+ unsigned int size)
+{
+ struct inode *inode = req->inode;
+ struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_merkle_key name;
+ struct xfs_da_args args = {
+ .value = (void *)buf,
+ .valuelen = size,
+ };
+
+ xfs_fsverity_init_merkle_args(ip, &name, pos, &args);
+ return xfs_attr_setname(&args, false);
+}
+
+/* Drop a cached merkle tree block.. */
+static void
+xfs_fsverity_drop_merkle(
+ struct fsverity_blockbuf *block)
+{
+ struct xfs_merkle_blob *mk = block->context;
+
+ if (block->verified)
+ set_bit(XFS_MERKLE_BLOB_VERIFIED_BIT, &mk->flags);
+ xfs_merkle_blob_rele(mk);
+ block->kaddr = NULL;
+ block->context = NULL;
+}
+
+const struct fsverity_operations xfs_fsverity_ops = {
+ .begin_enable_verity = xfs_fsverity_begin_enable,
+ .end_enable_verity = xfs_fsverity_end_enable,
+ .get_verity_descriptor = xfs_fsverity_get_descriptor,
+ .read_merkle_tree_block = xfs_fsverity_read_merkle,
+ .write_merkle_tree_block = xfs_fsverity_write_merkle,
+ .drop_merkle_tree_block = xfs_fsverity_drop_merkle,
+};
diff --git a/fs/xfs/xfs_fsverity.h b/fs/xfs/xfs_fsverity.h
new file mode 100644
index 0000000000000..277a9f856f518
--- /dev/null
+++ b/fs/xfs/xfs_fsverity.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022 Red Hat, Inc.
+ */
+#ifndef __XFS_FSVERITY_H__
+#define __XFS_FSVERITY_H__
+
+#ifdef CONFIG_FS_VERITY
+void xfs_fsverity_cache_init(struct xfs_inode *ip);
+void xfs_fsverity_cache_drop(struct xfs_inode *ip);
+void xfs_fsverity_cache_destroy(struct xfs_inode *ip);
+
+extern const struct fsverity_operations xfs_fsverity_ops;
+#else
+# define xfs_fsverity_cache_init(ip) ((void)0)
+# define xfs_fsverity_cache_drop(ip) ((void)0)
+# define xfs_fsverity_cache_destroy(ip) ((void)0)
+#endif /* CONFIG_FS_VERITY */
+
+#endif /* __XFS_FSVERITY_H__ */
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 01bbdbec6663f..0757062c318d0 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -28,6 +28,7 @@
#include "xfs_da_format.h"
#include "xfs_dir2.h"
#include "xfs_imeta.h"
+#include "xfs_fsverity.h"
#include <linux/iversion.h>
@@ -118,6 +119,7 @@ xfs_inode_alloc(
spin_lock_init(&ip->i_ioend_lock);
ip->i_next_unlinked = NULLAGINO;
ip->i_prev_unlinked = 0;
+ xfs_fsverity_cache_init(ip);
return ip;
}
@@ -129,6 +131,8 @@ xfs_inode_free_callback(
struct inode *inode = container_of(head, struct inode, i_rcu);
struct xfs_inode *ip = XFS_I(inode);
+ xfs_fsverity_cache_destroy(ip);
+
switch (VFS_I(ip)->i_mode & S_IFMT) {
case S_IFREG:
case S_IFDIR:
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 5a202706fc4a4..70c5700132b3e 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -96,6 +96,9 @@ typedef struct xfs_inode {
spinlock_t i_ioend_lock;
struct work_struct i_ioend_work;
struct list_head i_ioend_list;
+#ifdef CONFIG_FS_VERITY
+ struct xarray i_merkle_blocks;
+#endif
} xfs_inode_t;
static inline bool xfs_inode_on_unlinked_list(const struct xfs_inode *ip)
@@ -391,6 +394,8 @@ static inline bool xfs_inode_needs_cow_around(struct xfs_inode *ip)
*/
#define XFS_IREMAPPING (1U << 15)
+#define XFS_VERITY_CONSTRUCTION (1U << 16) /* merkle tree construction */
+
/* All inode state flags related to inode reclaim. */
#define XFS_ALL_IRECLAIM_FLAGS (XFS_IRECLAIMABLE | \
XFS_IRECLAIM | \
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 42a1e1f23d3b3..4e398884c46ae 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -30,6 +30,7 @@
#include "xfs_filestream.h"
#include "xfs_quota.h"
#include "xfs_sysfs.h"
+#include "xfs_fsverity.h"
#include "xfs_ondisk.h"
#include "xfs_rmap_item.h"
#include "xfs_refcount_item.h"
@@ -53,6 +54,7 @@
#include <linux/fs_context.h>
#include <linux/fs_parser.h>
#include <linux/fsverity.h>
+#include <linux/iomap.h>
static const struct super_operations xfs_super_operations;
@@ -672,6 +674,8 @@ xfs_fs_destroy_inode(
ASSERT(!rwsem_is_locked(&inode->i_rwsem));
XFS_STATS_INC(ip->i_mount, vn_rele);
XFS_STATS_INC(ip->i_mount, vn_remove);
+ if (fsverity_active(inode))
+ xfs_fsverity_cache_drop(ip);
fsverity_cleanup_inode(inode);
xfs_inode_mark_reclaimable(ip);
}
@@ -1534,6 +1538,9 @@ xfs_fs_fill_super(
sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
#endif
sb->s_op = &xfs_super_operations;
+#ifdef CONFIG_FS_VERITY
+ sb->s_vop = &xfs_fsverity_ops;
+#endif
/*
* Delay mount work if the debug hook is set. This is debug
@@ -1775,10 +1782,20 @@ xfs_fs_fill_super(
xfs_warn(mp,
"EXPERIMENTAL parent pointer feature enabled. Use at your own risk!");
+ if (xfs_has_verity(mp))
+ xfs_alert(mp,
+ "EXPERIMENTAL fsverity feature in use. Use at your own risk!");
+
error = xfs_mountfs(mp);
if (error)
goto out_filestream_unmount;
+#ifdef CONFIG_FS_VERITY
+ error = iomap_init_fsverity(mp->m_super);
+ if (error)
+ goto out_unmount;
+#endif
+
root = igrab(VFS_I(mp->m_rootip));
if (!root) {
error = -ENOENT;
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index e2992b0115ad2..86a8702c1e27c 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -5908,6 +5908,38 @@ TRACE_EVENT(xfs_growfs_check_rtgeom,
);
#endif /* CONFIG_XFS_RT */
+#ifdef CONFIG_FS_VERITY
+DECLARE_EVENT_CLASS(xfs_fsverity_cache_class,
+ TP_PROTO(struct xfs_inode *ip, unsigned long key, unsigned long caller_ip),
+ TP_ARGS(ip, key, caller_ip),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(unsigned long, key)
+ __field(void *, caller_ip)
+ ),
+ TP_fast_assign(
+ __entry->dev = ip->i_mount->m_super->s_dev;
+ __entry->ino = ip->i_ino;
+ __entry->key = key;
+ __entry->caller_ip = (void *)caller_ip;
+ ),
+ TP_printk("dev %d:%d ino 0x%llx key 0x%lx caller %pS",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __entry->key,
+ __entry->caller_ip)
+)
+
+#define DEFINE_XFS_FSVERITY_CACHE_EVENT(name) \
+DEFINE_EVENT(xfs_fsverity_cache_class, name, \
+ TP_PROTO(struct xfs_inode *ip, unsigned long key, unsigned long caller_ip), \
+ TP_ARGS(ip, key, caller_ip))
+DEFINE_XFS_FSVERITY_CACHE_EVENT(xfs_fsverity_cache_load);
+DEFINE_XFS_FSVERITY_CACHE_EVENT(xfs_fsverity_cache_store);
+DEFINE_XFS_FSVERITY_CACHE_EVENT(xfs_fsverity_cache_drop);
+#endif /* CONFIG_XFS_VERITY */
+
#endif /* _TRACE_XFS_H */
#undef TRACE_INCLUDE_PATH
^ permalink raw reply related [flat|nested] 111+ messages in thread
* Re: [PATCH 13/29] xfs: add fs-verity support
2024-03-30 0:39 ` [PATCH 13/29] xfs: add fs-verity support Darrick J. Wong
@ 2024-04-02 8:42 ` Andrey Albershteyn
2024-04-02 16:34 ` Darrick J. Wong
0 siblings, 1 reply; 111+ messages in thread
From: Andrey Albershteyn @ 2024-04-02 8:42 UTC (permalink / raw)
To: Darrick J. Wong; +Cc: ebiggers, linux-xfs, linux-fsdevel, fsverity, hch
On 2024-03-29 17:39:27, Darrick J. Wong wrote:
> From: Andrey Albershteyn <aalbersh@redhat.com>
>
> Add integration with fs-verity. The XFS store fs-verity metadata in
> the extended file attributes. The metadata consist of verity
> descriptor and Merkle tree blocks.
>
> The descriptor is stored under "vdesc" extended attribute. The
> Merkle tree blocks are stored under binary indexes which are offsets
> into the Merkle tree.
>
> When fs-verity is enabled on an inode, the XFS_IVERITY_CONSTRUCTION
> flag is set meaning that the Merkle tree is being build. The
> initialization ends with storing of verity descriptor and setting
> inode on-disk flag (XFS_DIFLAG2_VERITY).
>
> The verification on read is done in read path of iomap.
>
> Signed-off-by: Andrey Albershteyn <aalbersh@redhat.com>
> Reviewed-by: Darrick J. Wong <djwong@kernel.org>
> [djwong: replace caching implementation with an xarray, other cleanups]
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> ---
> fs/xfs/Makefile | 2
> fs/xfs/libxfs/xfs_attr.c | 41 +++
> fs/xfs/libxfs/xfs_attr.h | 1
> fs/xfs/libxfs/xfs_da_format.h | 14 +
> fs/xfs/libxfs/xfs_ondisk.h | 3
> fs/xfs/libxfs/xfs_verity.c | 58 ++++
> fs/xfs/libxfs/xfs_verity.h | 13 +
> fs/xfs/xfs_fsverity.c | 559 +++++++++++++++++++++++++++++++++++++++++
> fs/xfs/xfs_fsverity.h | 20 +
> fs/xfs/xfs_icache.c | 4
> fs/xfs/xfs_inode.h | 5
> fs/xfs/xfs_super.c | 17 +
> fs/xfs/xfs_trace.h | 32 ++
> 13 files changed, 769 insertions(+)
> create mode 100644 fs/xfs/libxfs/xfs_verity.c
> create mode 100644 fs/xfs/libxfs/xfs_verity.h
> create mode 100644 fs/xfs/xfs_fsverity.c
> create mode 100644 fs/xfs/xfs_fsverity.h
>
>
> diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
> index 702f2ddc918a1..a4b2f54914a87 100644
> --- a/fs/xfs/Makefile
> +++ b/fs/xfs/Makefile
> @@ -57,6 +57,7 @@ xfs-y += $(addprefix libxfs/, \
> xfs_trans_resv.o \
> xfs_trans_space.o \
> xfs_types.o \
> + xfs_verity.o \
> )
> # xfs_rtbitmap is shared with libxfs
> xfs-$(CONFIG_XFS_RT) += $(addprefix libxfs/, \
> @@ -142,6 +143,7 @@ xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o
> xfs-$(CONFIG_SYSCTL) += xfs_sysctl.o
> xfs-$(CONFIG_COMPAT) += xfs_ioctl32.o
> xfs-$(CONFIG_EXPORTFS_BLOCK_OPS) += xfs_pnfs.o
> +xfs-$(CONFIG_FS_VERITY) += xfs_fsverity.o
>
> # notify failure
> ifeq ($(CONFIG_MEMORY_FAILURE),y)
> diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
> index 931ec563a7460..c3f686411e378 100644
> --- a/fs/xfs/libxfs/xfs_attr.c
> +++ b/fs/xfs/libxfs/xfs_attr.c
> @@ -27,6 +27,7 @@
> #include "xfs_attr_item.h"
> #include "xfs_xattr.h"
> #include "xfs_parent.h"
> +#include "xfs_verity.h"
>
> struct kmem_cache *xfs_attr_intent_cache;
>
> @@ -1262,6 +1263,43 @@ xfs_attr_removename(
> goto out_unlock;
> }
>
> +/*
> + * Retrieve the value stored in the xattr structure under @args->name.
> + *
> + * The caller must have initialized @args and must not hold any ILOCKs.
> + *
> + * Returns -ENOATTR if the name did not already exist.
> + */
> +int
> +xfs_attr_getname(
> + struct xfs_da_args *args)
> +{
> + unsigned int lock_mode;
> + int error;
> +
> + ASSERT(!args->trans);
> +
> + error = xfs_trans_alloc_empty(args->dp->i_mount, &args->trans);
> + if (error)
> + return error;
> +
> + lock_mode = xfs_ilock_attr_map_shared(args->dp);
> +
> + /* Make sure the attr fork iext tree is loaded */
> + if (xfs_inode_hasattr(args->dp)) {
> + error = xfs_iread_extents(args->trans, args->dp, XFS_ATTR_FORK);
> + if (error)
> + goto out_unlock;
> + }
> +
> + error = xfs_attr_get_ilocked(args);
> +out_unlock:
> + xfs_iunlock(args->dp, lock_mode);
> + xfs_trans_cancel(args->trans);
> + args->trans = NULL;
> + return error;
> +}
> +
> /*========================================================================
> * External routines when attribute list is inside the inode
> *========================================================================*/
> @@ -1743,6 +1781,9 @@ xfs_attr_namecheck(
> if (!xfs_attr_check_namespace(attr_flags))
> return false;
>
> + if (attr_flags & XFS_ATTR_VERITY)
> + return xfs_verity_namecheck(attr_flags, name, length);
> +
> /*
> * MAXNAMELEN includes the trailing null, but (name/length) leave it
> * out, so use >= for the length check.
> diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h
> index 958bb9e41ddb3..3e43d715bcdd2 100644
> --- a/fs/xfs/libxfs/xfs_attr.h
> +++ b/fs/xfs/libxfs/xfs_attr.h
> @@ -561,6 +561,7 @@ void xfs_init_attr_trans(struct xfs_da_args *args, struct xfs_trans_res *tres,
>
> int xfs_attr_setname(struct xfs_da_args *args, bool rsvd);
> int xfs_attr_removename(struct xfs_da_args *args, bool rsvd);
> +int xfs_attr_getname(struct xfs_da_args *args);
>
> /*
> * Check to see if the attr should be upgraded from non-existent or shortform to
> diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h
> index 8cbda181c2f48..679cf5b4ad4be 100644
> --- a/fs/xfs/libxfs/xfs_da_format.h
> +++ b/fs/xfs/libxfs/xfs_da_format.h
> @@ -922,4 +922,18 @@ struct xfs_parent_rec {
> __be32 p_gen;
> } __packed;
>
> +/*
> + * fs-verity attribute name format
> + *
> + * Merkle tree blocks are stored under extended attributes of the inode. The
> + * name of the attributes are byte offsets into merkle tree.
> + */
> +struct xfs_merkle_key {
> + __be64 mk_offset;
> +};
> +
> +/* ondisk xattr name used for the fsverity descriptor */
> +#define XFS_VERITY_DESCRIPTOR_NAME "vdesc"
> +#define XFS_VERITY_DESCRIPTOR_NAME_LEN (sizeof(XFS_VERITY_DESCRIPTOR_NAME) - 1)
> +
> #endif /* __XFS_DA_FORMAT_H__ */
> diff --git a/fs/xfs/libxfs/xfs_ondisk.h b/fs/xfs/libxfs/xfs_ondisk.h
> index d46352d60d645..e927bb778ffdc 100644
> --- a/fs/xfs/libxfs/xfs_ondisk.h
> +++ b/fs/xfs/libxfs/xfs_ondisk.h
> @@ -208,6 +208,9 @@ xfs_check_ondisk_structs(void)
> XFS_CHECK_VALUE(XFS_DQ_BIGTIME_EXPIRY_MAX << XFS_DQ_BIGTIME_SHIFT,
> 16299260424LL);
>
> + /* fs-verity xattrs */
> + XFS_CHECK_STRUCT_SIZE(struct xfs_merkle_key, 8);
> + XFS_CHECK_VALUE(sizeof(XFS_VERITY_DESCRIPTOR_NAME), 6);
> }
>
> #endif /* __XFS_ONDISK_H */
> diff --git a/fs/xfs/libxfs/xfs_verity.c b/fs/xfs/libxfs/xfs_verity.c
> new file mode 100644
> index 0000000000000..bda38b3c19698
> --- /dev/null
> +++ b/fs/xfs/libxfs/xfs_verity.c
> @@ -0,0 +1,58 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright (C) 2023 Red Hat, Inc.
> + */
> +#include "xfs.h"
> +#include "xfs_shared.h"
> +#include "xfs_format.h"
> +#include "xfs_da_format.h"
> +#include "xfs_da_btree.h"
> +#include "xfs_trans_resv.h"
> +#include "xfs_mount.h"
> +#include "xfs_inode.h"
> +#include "xfs_log_format.h"
> +#include "xfs_attr.h"
> +#include "xfs_verity.h"
> +
> +/* Set a merkle tree offset in preparation for setting merkle tree attrs. */
> +void
> +xfs_merkle_key_to_disk(
> + struct xfs_merkle_key *key,
> + uint64_t offset)
> +{
> + key->mk_offset = cpu_to_be64(offset);
> +}
> +
> +/* Retrieve the merkle tree offset from the attr data. */
> +uint64_t
> +xfs_merkle_key_from_disk(
> + const void *attr_name,
> + int namelen)
> +{
> + const struct xfs_merkle_key *key = attr_name;
> +
> + ASSERT(namelen == sizeof(struct xfs_merkle_key));
> +
> + return be64_to_cpu(key->mk_offset);
> +}
> +
> +/* Return true if verity attr name is valid. */
> +bool
> +xfs_verity_namecheck(
> + unsigned int attr_flags,
> + const void *name,
> + int namelen)
> +{
> + if (!(attr_flags & XFS_ATTR_VERITY))
> + return false;
> +
> + /*
> + * Merkle tree pages are stored under u64 indexes; verity descriptor
> + * blocks are held in a named attribute.
> + */
> + if (namelen != sizeof(struct xfs_merkle_key) &&
> + namelen != XFS_VERITY_DESCRIPTOR_NAME_LEN)
> + return false;
> +
> + return true;
> +}
> diff --git a/fs/xfs/libxfs/xfs_verity.h b/fs/xfs/libxfs/xfs_verity.h
> new file mode 100644
> index 0000000000000..c01cc0678bc04
> --- /dev/null
> +++ b/fs/xfs/libxfs/xfs_verity.h
> @@ -0,0 +1,13 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright (C) 2022 Red Hat, Inc.
> + */
> +#ifndef __XFS_VERITY_H__
> +#define __XFS_VERITY_H__
> +
> +void xfs_merkle_key_to_disk(struct xfs_merkle_key *key, uint64_t offset);
> +uint64_t xfs_merkle_key_from_disk(const void *attr_name, int namelen);
> +bool xfs_verity_namecheck(unsigned int attr_flags, const void *name,
> + int namelen);
> +
> +#endif /* __XFS_VERITY_H__ */
> diff --git a/fs/xfs/xfs_fsverity.c b/fs/xfs/xfs_fsverity.c
> new file mode 100644
> index 0000000000000..a4a52575fb3d5
> --- /dev/null
> +++ b/fs/xfs/xfs_fsverity.c
> @@ -0,0 +1,559 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright (C) 2023 Red Hat, Inc.
> + */
> +#include "xfs.h"
> +#include "xfs_shared.h"
> +#include "xfs_format.h"
> +#include "xfs_da_format.h"
> +#include "xfs_da_btree.h"
> +#include "xfs_trans_resv.h"
> +#include "xfs_mount.h"
> +#include "xfs_inode.h"
> +#include "xfs_log_format.h"
> +#include "xfs_attr.h"
> +#include "xfs_verity.h"
> +#include "xfs_bmap_util.h"
> +#include "xfs_log_format.h"
> +#include "xfs_trans.h"
> +#include "xfs_attr_leaf.h"
> +#include "xfs_trace.h"
> +#include "xfs_quota.h"
> +#include "xfs_fsverity.h"
> +#include <linux/fsverity.h>
> +
> +/*
> + * Merkle Tree Block Cache
> + * =======================
> + *
> + * fsverity requires that the filesystem implement caching of ondisk merkle
> + * tree blocks. XFS stores merkle tree blocks in the extended attribute data,
> + * which makes it important to keep copies in memory for as long as possible.
> + * This is performed by allocating the data blob structure defined below,
> + * passing the data portion of the blob to xfs_attr_get, and later adding the
> + * data blob to an xarray embedded in the xfs_inode structure.
> + *
> + * The xarray structure indexes merkle tree blocks by the offset given to us by
> + * fsverity, which drastically reduces lookups. First, it eliminating the need
> + * to walk the xattr structure to find the remote block containing the merkle
> + * tree block. Second, access to each block in the xattr structure requires a
> + * lookup in the incore extent btree.
> + */
> +struct xfs_merkle_blob {
> + /* refcount of this item; the cache holds its own ref */
> + refcount_t refcount;
> +
> + unsigned long flags;
> +
> + /* Pointer to the merkle tree block, which is power-of-2 sized */
> + void *data;
> +};
> +
> +#define XFS_MERKLE_BLOB_VERIFIED_BIT (0) /* fsverity validated this */
> +
> +/*
> + * Allocate a merkle tree blob object to prepare for reading a merkle tree
> + * object from disk.
> + */
> +static inline struct xfs_merkle_blob *
> +xfs_merkle_blob_alloc(
> + unsigned int blocksize)
> +{
> + struct xfs_merkle_blob *mk;
> +
> + mk = kmalloc(sizeof(struct xfs_merkle_blob), GFP_KERNEL);
> + if (!mk)
> + return NULL;
> +
> + mk->data = kvzalloc(blocksize, GFP_KERNEL);
> + if (!mk->data) {
> + kfree(mk);
> + return NULL;
> + }
> +
> + /* Caller owns this refcount. */
> + refcount_set(&mk->refcount, 1);
> + mk->flags = 0;
> + return mk;
> +}
> +
> +/* Free a merkle tree blob. */
> +static inline void
> +xfs_merkle_blob_rele(
> + struct xfs_merkle_blob *mk)
> +{
> + if (refcount_dec_and_test(&mk->refcount)) {
> + kvfree(mk->data);
> + kfree(mk);
> + }
> +}
> +
> +/* Initialize the merkle tree block cache */
> +void
> +xfs_fsverity_cache_init(
> + struct xfs_inode *ip)
> +{
> + xa_init(&ip->i_merkle_blocks);
> +}
> +
> +/*
> + * Drop all the merkle tree blocks out of the cache. Caller must ensure that
> + * there are no active references to cache items.
> + */
> +void
> +xfs_fsverity_cache_drop(
> + struct xfs_inode *ip)
> +{
> + XA_STATE(xas, &ip->i_merkle_blocks, 0);
> + struct xfs_merkle_blob *mk;
> + unsigned long flags;
> +
> + xas_lock_irqsave(&xas, flags);
> + xas_for_each(&xas, mk, ULONG_MAX) {
> + ASSERT(refcount_read(&mk->refcount) == 1);
> +
> + trace_xfs_fsverity_cache_drop(ip, xas.xa_index, _RET_IP_);
> +
> + xas_store(&xas, NULL);
> + xfs_merkle_blob_rele(mk);
> + }
> + xas_unlock_irqrestore(&xas, flags);
> +}
> +
> +/* Destroy the merkle tree block cache */
> +void
> +xfs_fsverity_cache_destroy(
> + struct xfs_inode *ip)
> +{
> + ASSERT(xa_empty(&ip->i_merkle_blocks));
> +
> + /*
> + * xa_destroy calls xas_lock from rcu freeing softirq context, so
> + * we must use xa*_lock_irqsave.
> + */
> + xa_destroy(&ip->i_merkle_blocks);
> +}
> +
> +/* Return a cached merkle tree block, or NULL. */
> +static struct xfs_merkle_blob *
> +xfs_fsverity_cache_load(
> + struct xfs_inode *ip,
> + unsigned long key)
> +{
> + XA_STATE(xas, &ip->i_merkle_blocks, key);
> + struct xfs_merkle_blob *mk;
> +
> + /* Look up the cached item and try to get an active ref. */
> + rcu_read_lock();
> + do {
> + mk = xas_load(&xas);
> + if (xa_is_zero(mk))
> + mk = NULL;
> + } while (xas_retry(&xas, mk) ||
> + (mk && !refcount_inc_not_zero(&mk->refcount)));
> + rcu_read_unlock();
> +
> + if (!mk)
> + return NULL;
> +
> + trace_xfs_fsverity_cache_load(ip, key, _RET_IP_);
> + return mk;
> +}
> +
> +/*
> + * Try to store a merkle tree block in the cache with the given key.
> + *
> + * If the merkle tree block is not already in the cache, the given block @mk
> + * will be added to the cache and returned. The caller retains its active
> + * reference to @mk.
> + *
> + * If there was already a merkle block in the cache, it will be returned to
> + * the caller with an active reference. @mk will be untouched.
> + */
> +static struct xfs_merkle_blob *
> +xfs_fsverity_cache_store(
> + struct xfs_inode *ip,
> + unsigned long key,
> + struct xfs_merkle_blob *mk)
> +{
> + struct xfs_merkle_blob *old;
> + unsigned long flags;
> +
> + trace_xfs_fsverity_cache_store(ip, key, _RET_IP_);
> +
> + /*
> + * Either replace a NULL entry with mk, or take an active ref to
> + * whatever's currently there.
> + */
> + xa_lock_irqsave(&ip->i_merkle_blocks, flags);
> + do {
> + old = __xa_cmpxchg(&ip->i_merkle_blocks, key, NULL, mk,
> + GFP_KERNEL);
> + } while (old && !refcount_inc_not_zero(&old->refcount));
> + xa_unlock_irqrestore(&ip->i_merkle_blocks, flags);
> +
> + if (old == NULL) {
> + /*
> + * There was no previous value. @mk is now live in the cache.
> + * Bump the active refcount to transfer ownership to the cache
> + * and return @mk to the caller.
> + */
> + refcount_inc(&mk->refcount);
> + return mk;
> + }
> +
> + /*
> + * We obtained an active reference to a previous value in the cache.
> + * Return it to the caller.
> + */
> + return old;
> +}
> +
> +/*
> + * Initialize an args structure to load or store the fsverity descriptor.
> + * Caller must ensure @args is zeroed except for value and valuelen.
> + */
> +static inline void
> +xfs_fsverity_init_vdesc_args(
> + struct xfs_inode *ip,
> + struct xfs_da_args *args)
> +{
> + args->geo = ip->i_mount->m_attr_geo;
> + args->whichfork = XFS_ATTR_FORK,
> + args->attr_filter = XFS_ATTR_VERITY;
> + args->op_flags = XFS_DA_OP_OKNOENT;
> + args->dp = ip;
> + args->owner = ip->i_ino;
> + args->name = XFS_VERITY_DESCRIPTOR_NAME;
> + args->namelen = XFS_VERITY_DESCRIPTOR_NAME_LEN;
> + xfs_attr_sethash(args);
> +}
> +
> +/*
> + * Initialize an args structure to load or store a merkle tree block.
> + * Caller must ensure @args is zeroed except for value and valuelen.
> + */
> +static inline void
> +xfs_fsverity_init_merkle_args(
> + struct xfs_inode *ip,
> + struct xfs_merkle_key *key,
> + uint64_t merkleoff,
> + struct xfs_da_args *args)
> +{
> + xfs_merkle_key_to_disk(key, merkleoff);
> + args->geo = ip->i_mount->m_attr_geo;
> + args->whichfork = XFS_ATTR_FORK,
> + args->attr_filter = XFS_ATTR_VERITY;
> + args->op_flags = XFS_DA_OP_OKNOENT;
> + args->dp = ip;
> + args->owner = ip->i_ino;
> + args->name = (const uint8_t *)key;
> + args->namelen = sizeof(struct xfs_merkle_key);
> + xfs_attr_sethash(args);
> +}
> +
> +/* Delete the verity descriptor. */
> +static int
> +xfs_fsverity_delete_descriptor(
> + struct xfs_inode *ip)
> +{
> + struct xfs_da_args args = { };
> +
> + xfs_fsverity_init_vdesc_args(ip, &args);
> + return xfs_attr_removename(&args, false);
> +}
> +
> +/* Delete a merkle tree block. */
> +static int
> +xfs_fsverity_delete_merkle_block(
> + struct xfs_inode *ip,
> + u64 offset)
> +{
> + struct xfs_merkle_key name;
> + struct xfs_da_args args = { };
> +
> + xfs_fsverity_init_merkle_args(ip, &name, offset, &args);
> + return xfs_attr_removename(&args, false);
> +}
> +
> +/* Retrieve the verity descriptor. */
> +static int
> +xfs_fsverity_get_descriptor(
> + struct inode *inode,
> + void *buf,
> + size_t buf_size)
> +{
> + struct xfs_inode *ip = XFS_I(inode);
> + struct xfs_da_args args = {
> + .value = buf,
> + .valuelen = buf_size,
> + };
> + int error = 0;
> +
> + /*
> + * The fact that (returned attribute size) == (provided buf_size) is
> + * checked by xfs_attr_copy_value() (returns -ERANGE). No descriptor
> + * is treated as a short read so that common fsverity code will
> + * complain.
> + */
> + xfs_fsverity_init_vdesc_args(ip, &args);
> + error = xfs_attr_getname(&args);
> + if (error == -ENOATTR)
> + return 0;
> + if (error)
> + return error;
> +
> + return args.valuelen;
> +}
> +
> +/*
> + * Clear out old fsverity metadata before we start building a new one. This
> + * could happen if, say, we crashed while building fsverity data.
> + */
> +static int
> +xfs_fsverity_delete_stale_metadata(
> + struct xfs_inode *ip,
> + u64 new_tree_size,
> + unsigned int tree_blocksize)
> +{
> + u64 offset;
> + int error = 0;
> +
> + /*
> + * Delete as many merkle tree blocks in increasing blkno order until we
> + * don't find any more. That ought to be good enough for avoiding
> + * dead bloat without excessive runtime.
> + */
> + for (offset = new_tree_size; !error; offset += tree_blocksize) {
> + if (fatal_signal_pending(current))
> + return -EINTR;
> + error = xfs_fsverity_delete_merkle_block(ip, offset);
> + if (error)
> + break;
> + }
> +
> + return error != -ENOATTR ? error : 0;
> +}
> +
> +/* Prepare to enable fsverity by clearing old metadata. */
> +static int
> +xfs_fsverity_begin_enable(
> + struct file *filp,
> + u64 merkle_tree_size,
> + unsigned int tree_blocksize)
> +{
> + struct inode *inode = file_inode(filp);
> + struct xfs_inode *ip = XFS_I(inode);
> + int error;
> +
> + xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL);
> +
> + if (IS_DAX(inode))
> + return -EINVAL;
> +
> + if (xfs_iflags_test_and_set(ip, XFS_VERITY_CONSTRUCTION))
> + return -EBUSY;
> +
> + error = xfs_qm_dqattach(ip);
> + if (error)
> + return error;
> +
> + return xfs_fsverity_delete_stale_metadata(ip, merkle_tree_size,
> + tree_blocksize);
> +}
> +
> +/* Try to remove all the fsverity metadata after a failed enablement. */
> +static int
> +xfs_fsverity_delete_metadata(
> + struct xfs_inode *ip,
> + u64 merkle_tree_size,
> + unsigned int tree_blocksize)
> +{
> + u64 offset;
> + int error;
> +
> + if (!merkle_tree_size)
> + return 0;
> +
> + for (offset = 0; offset < merkle_tree_size; offset += tree_blocksize) {
> + if (fatal_signal_pending(current))
> + return -EINTR;
> + error = xfs_fsverity_delete_merkle_block(ip, offset);
> + if (error == -ENOATTR)
> + error = 0;
> + if (error)
> + return error;
> + }
> +
> + error = xfs_fsverity_delete_descriptor(ip);
> + return error != -ENOATTR ? error : 0;
> +}
> +
> +/* Complete (or fail) the process of enabling fsverity. */
> +static int
> +xfs_fsverity_end_enable(
> + struct file *filp,
> + const void *desc,
> + size_t desc_size,
> + u64 merkle_tree_size,
> + unsigned int tree_blocksize)
> +{
> + struct xfs_da_args args = {
> + .value = (void *)desc,
> + .valuelen = desc_size,
> + };
> + struct inode *inode = file_inode(filp);
> + struct xfs_inode *ip = XFS_I(inode);
> + struct xfs_mount *mp = ip->i_mount;
> + struct xfs_trans *tp;
> + int error = 0;
> +
> + xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL);
> +
> + /* fs-verity failed, just cleanup */
> + if (desc == NULL)
> + goto out;
> +
> + xfs_fsverity_init_vdesc_args(ip, &args);
> + error = xfs_attr_setname(&args, false);
> + if (error)
> + goto out;
> +
> + /* Set fsverity inode flag */
> + error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_ichange,
> + 0, 0, false, &tp);
> + if (error)
> + goto out;
> +
> + /*
> + * Ensure that we've persisted the verity information before we enable
> + * it on the inode and tell the caller we have sealed the inode.
> + */
> + ip->i_diflags2 |= XFS_DIFLAG2_VERITY;
> +
> + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
> + xfs_trans_set_sync(tp);
> +
> + error = xfs_trans_commit(tp);
> + xfs_iunlock(ip, XFS_ILOCK_EXCL);
> +
> + if (!error)
> + inode->i_flags |= S_VERITY;
> +
> +out:
> + if (error) {
> + int error2;
> +
> + error2 = xfs_fsverity_delete_metadata(ip,
> + merkle_tree_size, tree_blocksize);
> + if (error2)
> + xfs_alert(ip->i_mount,
> + "ino 0x%llx failed to clean up new fsverity metadata, err %d",
> + ip->i_ino, error2);
> + }
> +
> + xfs_iflags_clear(ip, XFS_VERITY_CONSTRUCTION);
> + return error;
> +}
> +
> +/* Retrieve a merkle tree block. */
> +static int
> +xfs_fsverity_read_merkle(
> + const struct fsverity_readmerkle *req,
> + struct fsverity_blockbuf *block)
> +{
> + struct xfs_inode *ip = XFS_I(req->inode);
> + struct xfs_merkle_key name;
> + struct xfs_da_args args = {
> + .valuelen = block->size,
> + };
> + struct xfs_merkle_blob *mk, *new_mk;
> + unsigned long key = block->offset >> req->log_blocksize;
> + int error;
> +
> + ASSERT(block->offset >> req->log_blocksize <= ULONG_MAX);
> +
> + /* Is the block already cached? */
> + mk = xfs_fsverity_cache_load(ip, key);
> + if (mk)
> + goto out_hit;
> +
> + new_mk = xfs_merkle_blob_alloc(block->size);
> + if (!new_mk)
> + return -ENOMEM;
> + args.value = new_mk->data;
> +
> + /* Read the block in from disk and try to store it in the cache. */
> + xfs_fsverity_init_merkle_args(ip, &name, block->offset, &args);
> + error = xfs_attr_getname(&args);
> + if (error)
> + goto out_new_mk;
> +
> + if (!args.valuelen) {
> + error = -ENODATA;
> + goto out_new_mk;
> + }
> +
> + mk = xfs_fsverity_cache_store(ip, key, new_mk);
> + if (mk != new_mk) {
> + /*
> + * We raced with another thread to populate the cache and lost.
> + * Free the new cache blob and continue with the existing one.
> + */
> + xfs_merkle_blob_rele(new_mk);
> + }
> +
> +out_hit:
> + block->kaddr = (void *)mk->data;
> + block->context = mk;
> + block->verified = test_bit(XFS_MERKLE_BLOB_VERIFIED_BIT, &mk->flags);
> +
> + return 0;
> +
> +out_new_mk:
> + xfs_merkle_blob_rele(new_mk);
> + return error;
> +}
> +
> +/* Write a merkle tree block. */
> +static int
> +xfs_fsverity_write_merkle(
> + const struct fsverity_writemerkle *req,
> + const void *buf,
> + u64 pos,
> + unsigned int size)
> +{
> + struct inode *inode = req->inode;
> + struct xfs_inode *ip = XFS_I(inode);
> + struct xfs_merkle_key name;
> + struct xfs_da_args args = {
> + .value = (void *)buf,
> + .valuelen = size,
> + };
> +
> + xfs_fsverity_init_merkle_args(ip, &name, pos, &args);
> + return xfs_attr_setname(&args, false);
> +}
> +
> +/* Drop a cached merkle tree block.. */
> +static void
> +xfs_fsverity_drop_merkle(
> + struct fsverity_blockbuf *block)
> +{
> + struct xfs_merkle_blob *mk = block->context;
> +
> + if (block->verified)
> + set_bit(XFS_MERKLE_BLOB_VERIFIED_BIT, &mk->flags);
> + xfs_merkle_blob_rele(mk);
> + block->kaddr = NULL;
> + block->context = NULL;
> +}
> +
> +const struct fsverity_operations xfs_fsverity_ops = {
> + .begin_enable_verity = xfs_fsverity_begin_enable,
> + .end_enable_verity = xfs_fsverity_end_enable,
> + .get_verity_descriptor = xfs_fsverity_get_descriptor,
> + .read_merkle_tree_block = xfs_fsverity_read_merkle,
> + .write_merkle_tree_block = xfs_fsverity_write_merkle,
> + .drop_merkle_tree_block = xfs_fsverity_drop_merkle,
> +};
> diff --git a/fs/xfs/xfs_fsverity.h b/fs/xfs/xfs_fsverity.h
> new file mode 100644
> index 0000000000000..277a9f856f518
> --- /dev/null
> +++ b/fs/xfs/xfs_fsverity.h
> @@ -0,0 +1,20 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright (C) 2022 Red Hat, Inc.
> + */
> +#ifndef __XFS_FSVERITY_H__
> +#define __XFS_FSVERITY_H__
> +
> +#ifdef CONFIG_FS_VERITY
> +void xfs_fsverity_cache_init(struct xfs_inode *ip);
> +void xfs_fsverity_cache_drop(struct xfs_inode *ip);
> +void xfs_fsverity_cache_destroy(struct xfs_inode *ip);
> +
> +extern const struct fsverity_operations xfs_fsverity_ops;
> +#else
> +# define xfs_fsverity_cache_init(ip) ((void)0)
> +# define xfs_fsverity_cache_drop(ip) ((void)0)
> +# define xfs_fsverity_cache_destroy(ip) ((void)0)
> +#endif /* CONFIG_FS_VERITY */
> +
> +#endif /* __XFS_FSVERITY_H__ */
> diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
> index 01bbdbec6663f..0757062c318d0 100644
> --- a/fs/xfs/xfs_icache.c
> +++ b/fs/xfs/xfs_icache.c
> @@ -28,6 +28,7 @@
> #include "xfs_da_format.h"
> #include "xfs_dir2.h"
> #include "xfs_imeta.h"
> +#include "xfs_fsverity.h"
>
> #include <linux/iversion.h>
>
> @@ -118,6 +119,7 @@ xfs_inode_alloc(
> spin_lock_init(&ip->i_ioend_lock);
> ip->i_next_unlinked = NULLAGINO;
> ip->i_prev_unlinked = 0;
> + xfs_fsverity_cache_init(ip);
>
> return ip;
> }
> @@ -129,6 +131,8 @@ xfs_inode_free_callback(
> struct inode *inode = container_of(head, struct inode, i_rcu);
> struct xfs_inode *ip = XFS_I(inode);
>
> + xfs_fsverity_cache_destroy(ip);
> +
> switch (VFS_I(ip)->i_mode & S_IFMT) {
> case S_IFREG:
> case S_IFDIR:
> diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
> index 5a202706fc4a4..70c5700132b3e 100644
> --- a/fs/xfs/xfs_inode.h
> +++ b/fs/xfs/xfs_inode.h
> @@ -96,6 +96,9 @@ typedef struct xfs_inode {
> spinlock_t i_ioend_lock;
> struct work_struct i_ioend_work;
> struct list_head i_ioend_list;
> +#ifdef CONFIG_FS_VERITY
> + struct xarray i_merkle_blocks;
> +#endif
So, is this fine like this or do you plan to change it to per-ag
mapping? I suppose Christoph against adding it to inodes [1]
[1]: https://lore.kernel.org/linux-xfs/ZfecSzBoVDW5328l@infradead.org/
> } xfs_inode_t;
>
> static inline bool xfs_inode_on_unlinked_list(const struct xfs_inode *ip)
> @@ -391,6 +394,8 @@ static inline bool xfs_inode_needs_cow_around(struct xfs_inode *ip)
> */
> #define XFS_IREMAPPING (1U << 15)
>
> +#define XFS_VERITY_CONSTRUCTION (1U << 16) /* merkle tree construction */
> +
> /* All inode state flags related to inode reclaim. */
> #define XFS_ALL_IRECLAIM_FLAGS (XFS_IRECLAIMABLE | \
> XFS_IRECLAIM | \
> diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
> index 42a1e1f23d3b3..4e398884c46ae 100644
> --- a/fs/xfs/xfs_super.c
> +++ b/fs/xfs/xfs_super.c
> @@ -30,6 +30,7 @@
> #include "xfs_filestream.h"
> #include "xfs_quota.h"
> #include "xfs_sysfs.h"
> +#include "xfs_fsverity.h"
> #include "xfs_ondisk.h"
> #include "xfs_rmap_item.h"
> #include "xfs_refcount_item.h"
> @@ -53,6 +54,7 @@
> #include <linux/fs_context.h>
> #include <linux/fs_parser.h>
> #include <linux/fsverity.h>
> +#include <linux/iomap.h>
>
> static const struct super_operations xfs_super_operations;
>
> @@ -672,6 +674,8 @@ xfs_fs_destroy_inode(
> ASSERT(!rwsem_is_locked(&inode->i_rwsem));
> XFS_STATS_INC(ip->i_mount, vn_rele);
> XFS_STATS_INC(ip->i_mount, vn_remove);
> + if (fsverity_active(inode))
> + xfs_fsverity_cache_drop(ip);
> fsverity_cleanup_inode(inode);
> xfs_inode_mark_reclaimable(ip);
> }
> @@ -1534,6 +1538,9 @@ xfs_fs_fill_super(
> sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
> #endif
> sb->s_op = &xfs_super_operations;
> +#ifdef CONFIG_FS_VERITY
> + sb->s_vop = &xfs_fsverity_ops;
> +#endif
>
> /*
> * Delay mount work if the debug hook is set. This is debug
> @@ -1775,10 +1782,20 @@ xfs_fs_fill_super(
> xfs_warn(mp,
> "EXPERIMENTAL parent pointer feature enabled. Use at your own risk!");
>
> + if (xfs_has_verity(mp))
> + xfs_alert(mp,
> + "EXPERIMENTAL fsverity feature in use. Use at your own risk!");
> +
> error = xfs_mountfs(mp);
> if (error)
> goto out_filestream_unmount;
>
> +#ifdef CONFIG_FS_VERITY
> + error = iomap_init_fsverity(mp->m_super);
> + if (error)
> + goto out_unmount;
> +#endif
> +
> root = igrab(VFS_I(mp->m_rootip));
> if (!root) {
> error = -ENOENT;
> diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
> index e2992b0115ad2..86a8702c1e27c 100644
> --- a/fs/xfs/xfs_trace.h
> +++ b/fs/xfs/xfs_trace.h
> @@ -5908,6 +5908,38 @@ TRACE_EVENT(xfs_growfs_check_rtgeom,
> );
> #endif /* CONFIG_XFS_RT */
>
> +#ifdef CONFIG_FS_VERITY
> +DECLARE_EVENT_CLASS(xfs_fsverity_cache_class,
> + TP_PROTO(struct xfs_inode *ip, unsigned long key, unsigned long caller_ip),
> + TP_ARGS(ip, key, caller_ip),
> + TP_STRUCT__entry(
> + __field(dev_t, dev)
> + __field(xfs_ino_t, ino)
> + __field(unsigned long, key)
> + __field(void *, caller_ip)
> + ),
> + TP_fast_assign(
> + __entry->dev = ip->i_mount->m_super->s_dev;
> + __entry->ino = ip->i_ino;
> + __entry->key = key;
> + __entry->caller_ip = (void *)caller_ip;
> + ),
> + TP_printk("dev %d:%d ino 0x%llx key 0x%lx caller %pS",
> + MAJOR(__entry->dev), MINOR(__entry->dev),
> + __entry->ino,
> + __entry->key,
> + __entry->caller_ip)
> +)
> +
> +#define DEFINE_XFS_FSVERITY_CACHE_EVENT(name) \
> +DEFINE_EVENT(xfs_fsverity_cache_class, name, \
> + TP_PROTO(struct xfs_inode *ip, unsigned long key, unsigned long caller_ip), \
> + TP_ARGS(ip, key, caller_ip))
> +DEFINE_XFS_FSVERITY_CACHE_EVENT(xfs_fsverity_cache_load);
> +DEFINE_XFS_FSVERITY_CACHE_EVENT(xfs_fsverity_cache_store);
> +DEFINE_XFS_FSVERITY_CACHE_EVENT(xfs_fsverity_cache_drop);
> +#endif /* CONFIG_XFS_VERITY */
> +
> #endif /* _TRACE_XFS_H */
>
> #undef TRACE_INCLUDE_PATH
>
--
- Andrey
^ permalink raw reply [flat|nested] 111+ messages in thread
* Re: [PATCH 13/29] xfs: add fs-verity support
2024-04-02 8:42 ` Andrey Albershteyn
@ 2024-04-02 16:34 ` Darrick J. Wong
2024-04-25 1:14 ` Darrick J. Wong
0 siblings, 1 reply; 111+ messages in thread
From: Darrick J. Wong @ 2024-04-02 16:34 UTC (permalink / raw)
To: Andrey Albershteyn; +Cc: ebiggers, linux-xfs, linux-fsdevel, fsverity, hch
On Tue, Apr 02, 2024 at 10:42:44AM +0200, Andrey Albershteyn wrote:
> On 2024-03-29 17:39:27, Darrick J. Wong wrote:
> > From: Andrey Albershteyn <aalbersh@redhat.com>
> >
> > Add integration with fs-verity. The XFS store fs-verity metadata in
> > the extended file attributes. The metadata consist of verity
> > descriptor and Merkle tree blocks.
> >
> > The descriptor is stored under "vdesc" extended attribute. The
> > Merkle tree blocks are stored under binary indexes which are offsets
> > into the Merkle tree.
> >
> > When fs-verity is enabled on an inode, the XFS_IVERITY_CONSTRUCTION
> > flag is set meaning that the Merkle tree is being build. The
> > initialization ends with storing of verity descriptor and setting
> > inode on-disk flag (XFS_DIFLAG2_VERITY).
> >
> > The verification on read is done in read path of iomap.
> >
> > Signed-off-by: Andrey Albershteyn <aalbersh@redhat.com>
> > Reviewed-by: Darrick J. Wong <djwong@kernel.org>
> > [djwong: replace caching implementation with an xarray, other cleanups]
> > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > ---
> > fs/xfs/Makefile | 2
> > fs/xfs/libxfs/xfs_attr.c | 41 +++
> > fs/xfs/libxfs/xfs_attr.h | 1
> > fs/xfs/libxfs/xfs_da_format.h | 14 +
> > fs/xfs/libxfs/xfs_ondisk.h | 3
> > fs/xfs/libxfs/xfs_verity.c | 58 ++++
> > fs/xfs/libxfs/xfs_verity.h | 13 +
> > fs/xfs/xfs_fsverity.c | 559 +++++++++++++++++++++++++++++++++++++++++
> > fs/xfs/xfs_fsverity.h | 20 +
> > fs/xfs/xfs_icache.c | 4
> > fs/xfs/xfs_inode.h | 5
> > fs/xfs/xfs_super.c | 17 +
> > fs/xfs/xfs_trace.h | 32 ++
> > 13 files changed, 769 insertions(+)
> > create mode 100644 fs/xfs/libxfs/xfs_verity.c
> > create mode 100644 fs/xfs/libxfs/xfs_verity.h
> > create mode 100644 fs/xfs/xfs_fsverity.c
> > create mode 100644 fs/xfs/xfs_fsverity.h
> >
> >
<snip>
> > diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
> > index 5a202706fc4a4..70c5700132b3e 100644
> > --- a/fs/xfs/xfs_inode.h
> > +++ b/fs/xfs/xfs_inode.h
> > @@ -96,6 +96,9 @@ typedef struct xfs_inode {
> > spinlock_t i_ioend_lock;
> > struct work_struct i_ioend_work;
> > struct list_head i_ioend_list;
> > +#ifdef CONFIG_FS_VERITY
> > + struct xarray i_merkle_blocks;
> > +#endif
>
> So, is this fine like this or do you plan to change it to per-ag
> mapping? I suppose Christoph against adding it to inodes [1]
>
> [1]: https://lore.kernel.org/linux-xfs/ZfecSzBoVDW5328l@infradead.org/
Still working on it. hch and I have been nitpicking the parent pointers
patchset. I think a per-ag rhashtable would work in principle, but I
don't know how well it will handle a 128-bit key.
--D
> > } xfs_inode_t;
> >
> > static inline bool xfs_inode_on_unlinked_list(const struct xfs_inode *ip)
> > @@ -391,6 +394,8 @@ static inline bool xfs_inode_needs_cow_around(struct xfs_inode *ip)
> > */
> > #define XFS_IREMAPPING (1U << 15)
> >
> > +#define XFS_VERITY_CONSTRUCTION (1U << 16) /* merkle tree construction */
> > +
> > /* All inode state flags related to inode reclaim. */
> > #define XFS_ALL_IRECLAIM_FLAGS (XFS_IRECLAIMABLE | \
> > XFS_IRECLAIM | \
> > diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
> > index 42a1e1f23d3b3..4e398884c46ae 100644
> > --- a/fs/xfs/xfs_super.c
> > +++ b/fs/xfs/xfs_super.c
> > @@ -30,6 +30,7 @@
> > #include "xfs_filestream.h"
> > #include "xfs_quota.h"
> > #include "xfs_sysfs.h"
> > +#include "xfs_fsverity.h"
> > #include "xfs_ondisk.h"
> > #include "xfs_rmap_item.h"
> > #include "xfs_refcount_item.h"
> > @@ -53,6 +54,7 @@
> > #include <linux/fs_context.h>
> > #include <linux/fs_parser.h>
> > #include <linux/fsverity.h>
> > +#include <linux/iomap.h>
> >
> > static const struct super_operations xfs_super_operations;
> >
> > @@ -672,6 +674,8 @@ xfs_fs_destroy_inode(
> > ASSERT(!rwsem_is_locked(&inode->i_rwsem));
> > XFS_STATS_INC(ip->i_mount, vn_rele);
> > XFS_STATS_INC(ip->i_mount, vn_remove);
> > + if (fsverity_active(inode))
> > + xfs_fsverity_cache_drop(ip);
> > fsverity_cleanup_inode(inode);
> > xfs_inode_mark_reclaimable(ip);
> > }
> > @@ -1534,6 +1538,9 @@ xfs_fs_fill_super(
> > sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
> > #endif
> > sb->s_op = &xfs_super_operations;
> > +#ifdef CONFIG_FS_VERITY
> > + sb->s_vop = &xfs_fsverity_ops;
> > +#endif
> >
> > /*
> > * Delay mount work if the debug hook is set. This is debug
> > @@ -1775,10 +1782,20 @@ xfs_fs_fill_super(
> > xfs_warn(mp,
> > "EXPERIMENTAL parent pointer feature enabled. Use at your own risk!");
> >
> > + if (xfs_has_verity(mp))
> > + xfs_alert(mp,
> > + "EXPERIMENTAL fsverity feature in use. Use at your own risk!");
> > +
> > error = xfs_mountfs(mp);
> > if (error)
> > goto out_filestream_unmount;
> >
> > +#ifdef CONFIG_FS_VERITY
> > + error = iomap_init_fsverity(mp->m_super);
> > + if (error)
> > + goto out_unmount;
> > +#endif
> > +
> > root = igrab(VFS_I(mp->m_rootip));
> > if (!root) {
> > error = -ENOENT;
> > diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
> > index e2992b0115ad2..86a8702c1e27c 100644
> > --- a/fs/xfs/xfs_trace.h
> > +++ b/fs/xfs/xfs_trace.h
> > @@ -5908,6 +5908,38 @@ TRACE_EVENT(xfs_growfs_check_rtgeom,
> > );
> > #endif /* CONFIG_XFS_RT */
> >
> > +#ifdef CONFIG_FS_VERITY
> > +DECLARE_EVENT_CLASS(xfs_fsverity_cache_class,
> > + TP_PROTO(struct xfs_inode *ip, unsigned long key, unsigned long caller_ip),
> > + TP_ARGS(ip, key, caller_ip),
> > + TP_STRUCT__entry(
> > + __field(dev_t, dev)
> > + __field(xfs_ino_t, ino)
> > + __field(unsigned long, key)
> > + __field(void *, caller_ip)
> > + ),
> > + TP_fast_assign(
> > + __entry->dev = ip->i_mount->m_super->s_dev;
> > + __entry->ino = ip->i_ino;
> > + __entry->key = key;
> > + __entry->caller_ip = (void *)caller_ip;
> > + ),
> > + TP_printk("dev %d:%d ino 0x%llx key 0x%lx caller %pS",
> > + MAJOR(__entry->dev), MINOR(__entry->dev),
> > + __entry->ino,
> > + __entry->key,
> > + __entry->caller_ip)
> > +)
> > +
> > +#define DEFINE_XFS_FSVERITY_CACHE_EVENT(name) \
> > +DEFINE_EVENT(xfs_fsverity_cache_class, name, \
> > + TP_PROTO(struct xfs_inode *ip, unsigned long key, unsigned long caller_ip), \
> > + TP_ARGS(ip, key, caller_ip))
> > +DEFINE_XFS_FSVERITY_CACHE_EVENT(xfs_fsverity_cache_load);
> > +DEFINE_XFS_FSVERITY_CACHE_EVENT(xfs_fsverity_cache_store);
> > +DEFINE_XFS_FSVERITY_CACHE_EVENT(xfs_fsverity_cache_drop);
> > +#endif /* CONFIG_XFS_VERITY */
> > +
> > #endif /* _TRACE_XFS_H */
> >
> > #undef TRACE_INCLUDE_PATH
> >
>
> --
> - Andrey
>
>
^ permalink raw reply [flat|nested] 111+ messages in thread
* Re: [PATCH 13/29] xfs: add fs-verity support
2024-04-02 16:34 ` Darrick J. Wong
@ 2024-04-25 1:14 ` Darrick J. Wong
0 siblings, 0 replies; 111+ messages in thread
From: Darrick J. Wong @ 2024-04-25 1:14 UTC (permalink / raw)
To: Andrey Albershteyn; +Cc: ebiggers, linux-xfs, linux-fsdevel, fsverity, hch
On Tue, Apr 02, 2024 at 09:34:53AM -0700, Darrick J. Wong wrote:
> On Tue, Apr 02, 2024 at 10:42:44AM +0200, Andrey Albershteyn wrote:
> > On 2024-03-29 17:39:27, Darrick J. Wong wrote:
> > > From: Andrey Albershteyn <aalbersh@redhat.com>
> > >
> > > Add integration with fs-verity. The XFS store fs-verity metadata in
> > > the extended file attributes. The metadata consist of verity
> > > descriptor and Merkle tree blocks.
> > >
> > > The descriptor is stored under "vdesc" extended attribute. The
> > > Merkle tree blocks are stored under binary indexes which are offsets
> > > into the Merkle tree.
> > >
> > > When fs-verity is enabled on an inode, the XFS_IVERITY_CONSTRUCTION
> > > flag is set meaning that the Merkle tree is being build. The
> > > initialization ends with storing of verity descriptor and setting
> > > inode on-disk flag (XFS_DIFLAG2_VERITY).
> > >
> > > The verification on read is done in read path of iomap.
> > >
> > > Signed-off-by: Andrey Albershteyn <aalbersh@redhat.com>
> > > Reviewed-by: Darrick J. Wong <djwong@kernel.org>
> > > [djwong: replace caching implementation with an xarray, other cleanups]
> > > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > > ---
> > > fs/xfs/Makefile | 2
> > > fs/xfs/libxfs/xfs_attr.c | 41 +++
> > > fs/xfs/libxfs/xfs_attr.h | 1
> > > fs/xfs/libxfs/xfs_da_format.h | 14 +
> > > fs/xfs/libxfs/xfs_ondisk.h | 3
> > > fs/xfs/libxfs/xfs_verity.c | 58 ++++
> > > fs/xfs/libxfs/xfs_verity.h | 13 +
> > > fs/xfs/xfs_fsverity.c | 559 +++++++++++++++++++++++++++++++++++++++++
> > > fs/xfs/xfs_fsverity.h | 20 +
> > > fs/xfs/xfs_icache.c | 4
> > > fs/xfs/xfs_inode.h | 5
> > > fs/xfs/xfs_super.c | 17 +
> > > fs/xfs/xfs_trace.h | 32 ++
> > > 13 files changed, 769 insertions(+)
> > > create mode 100644 fs/xfs/libxfs/xfs_verity.c
> > > create mode 100644 fs/xfs/libxfs/xfs_verity.h
> > > create mode 100644 fs/xfs/xfs_fsverity.c
> > > create mode 100644 fs/xfs/xfs_fsverity.h
> > >
> > >
>
> <snip>
>
> > > diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
> > > index 5a202706fc4a4..70c5700132b3e 100644
> > > --- a/fs/xfs/xfs_inode.h
> > > +++ b/fs/xfs/xfs_inode.h
> > > @@ -96,6 +96,9 @@ typedef struct xfs_inode {
> > > spinlock_t i_ioend_lock;
> > > struct work_struct i_ioend_work;
> > > struct list_head i_ioend_list;
> > > +#ifdef CONFIG_FS_VERITY
> > > + struct xarray i_merkle_blocks;
> > > +#endif
> >
> > So, is this fine like this or do you plan to change it to per-ag
> > mapping? I suppose Christoph against adding it to inodes [1]
> >
> > [1]: https://lore.kernel.org/linux-xfs/ZfecSzBoVDW5328l@infradead.org/
>
> Still working on it. hch and I have been nitpicking the parent pointers
> patchset. I think a per-ag rhashtable would work in principle, but I
> don't know how well it will handle a 128-bit key.
Update: works fine, and now we don't need to add 16 bytes of overhead to
every xfs_inode everywhere.
--D
> --D
>
> > > } xfs_inode_t;
> > >
> > > static inline bool xfs_inode_on_unlinked_list(const struct xfs_inode *ip)
> > > @@ -391,6 +394,8 @@ static inline bool xfs_inode_needs_cow_around(struct xfs_inode *ip)
> > > */
> > > #define XFS_IREMAPPING (1U << 15)
> > >
> > > +#define XFS_VERITY_CONSTRUCTION (1U << 16) /* merkle tree construction */
> > > +
> > > /* All inode state flags related to inode reclaim. */
> > > #define XFS_ALL_IRECLAIM_FLAGS (XFS_IRECLAIMABLE | \
> > > XFS_IRECLAIM | \
> > > diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
> > > index 42a1e1f23d3b3..4e398884c46ae 100644
> > > --- a/fs/xfs/xfs_super.c
> > > +++ b/fs/xfs/xfs_super.c
> > > @@ -30,6 +30,7 @@
> > > #include "xfs_filestream.h"
> > > #include "xfs_quota.h"
> > > #include "xfs_sysfs.h"
> > > +#include "xfs_fsverity.h"
> > > #include "xfs_ondisk.h"
> > > #include "xfs_rmap_item.h"
> > > #include "xfs_refcount_item.h"
> > > @@ -53,6 +54,7 @@
> > > #include <linux/fs_context.h>
> > > #include <linux/fs_parser.h>
> > > #include <linux/fsverity.h>
> > > +#include <linux/iomap.h>
> > >
> > > static const struct super_operations xfs_super_operations;
> > >
> > > @@ -672,6 +674,8 @@ xfs_fs_destroy_inode(
> > > ASSERT(!rwsem_is_locked(&inode->i_rwsem));
> > > XFS_STATS_INC(ip->i_mount, vn_rele);
> > > XFS_STATS_INC(ip->i_mount, vn_remove);
> > > + if (fsverity_active(inode))
> > > + xfs_fsverity_cache_drop(ip);
> > > fsverity_cleanup_inode(inode);
> > > xfs_inode_mark_reclaimable(ip);
> > > }
> > > @@ -1534,6 +1538,9 @@ xfs_fs_fill_super(
> > > sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
> > > #endif
> > > sb->s_op = &xfs_super_operations;
> > > +#ifdef CONFIG_FS_VERITY
> > > + sb->s_vop = &xfs_fsverity_ops;
> > > +#endif
> > >
> > > /*
> > > * Delay mount work if the debug hook is set. This is debug
> > > @@ -1775,10 +1782,20 @@ xfs_fs_fill_super(
> > > xfs_warn(mp,
> > > "EXPERIMENTAL parent pointer feature enabled. Use at your own risk!");
> > >
> > > + if (xfs_has_verity(mp))
> > > + xfs_alert(mp,
> > > + "EXPERIMENTAL fsverity feature in use. Use at your own risk!");
> > > +
> > > error = xfs_mountfs(mp);
> > > if (error)
> > > goto out_filestream_unmount;
> > >
> > > +#ifdef CONFIG_FS_VERITY
> > > + error = iomap_init_fsverity(mp->m_super);
> > > + if (error)
> > > + goto out_unmount;
> > > +#endif
> > > +
> > > root = igrab(VFS_I(mp->m_rootip));
> > > if (!root) {
> > > error = -ENOENT;
> > > diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
> > > index e2992b0115ad2..86a8702c1e27c 100644
> > > --- a/fs/xfs/xfs_trace.h
> > > +++ b/fs/xfs/xfs_trace.h
> > > @@ -5908,6 +5908,38 @@ TRACE_EVENT(xfs_growfs_check_rtgeom,
> > > );
> > > #endif /* CONFIG_XFS_RT */
> > >
> > > +#ifdef CONFIG_FS_VERITY
> > > +DECLARE_EVENT_CLASS(xfs_fsverity_cache_class,
> > > + TP_PROTO(struct xfs_inode *ip, unsigned long key, unsigned long caller_ip),
> > > + TP_ARGS(ip, key, caller_ip),
> > > + TP_STRUCT__entry(
> > > + __field(dev_t, dev)
> > > + __field(xfs_ino_t, ino)
> > > + __field(unsigned long, key)
> > > + __field(void *, caller_ip)
> > > + ),
> > > + TP_fast_assign(
> > > + __entry->dev = ip->i_mount->m_super->s_dev;
> > > + __entry->ino = ip->i_ino;
> > > + __entry->key = key;
> > > + __entry->caller_ip = (void *)caller_ip;
> > > + ),
> > > + TP_printk("dev %d:%d ino 0x%llx key 0x%lx caller %pS",
> > > + MAJOR(__entry->dev), MINOR(__entry->dev),
> > > + __entry->ino,
> > > + __entry->key,
> > > + __entry->caller_ip)
> > > +)
> > > +
> > > +#define DEFINE_XFS_FSVERITY_CACHE_EVENT(name) \
> > > +DEFINE_EVENT(xfs_fsverity_cache_class, name, \
> > > + TP_PROTO(struct xfs_inode *ip, unsigned long key, unsigned long caller_ip), \
> > > + TP_ARGS(ip, key, caller_ip))
> > > +DEFINE_XFS_FSVERITY_CACHE_EVENT(xfs_fsverity_cache_load);
> > > +DEFINE_XFS_FSVERITY_CACHE_EVENT(xfs_fsverity_cache_store);
> > > +DEFINE_XFS_FSVERITY_CACHE_EVENT(xfs_fsverity_cache_drop);
> > > +#endif /* CONFIG_XFS_VERITY */
> > > +
> > > #endif /* _TRACE_XFS_H */
> > >
> > > #undef TRACE_INCLUDE_PATH
> > >
> >
> > --
> > - Andrey
> >
> >
>
^ permalink raw reply [flat|nested] 111+ messages in thread
* [PATCH 14/29] xfs: create a per-mount shrinker for verity inodes merkle tree blocks
2024-03-30 0:32 ` [PATCHSET v5.5 2/2] xfs: fs-verity support Darrick J. Wong
` (12 preceding siblings ...)
2024-03-30 0:39 ` [PATCH 13/29] xfs: add fs-verity support Darrick J. Wong
@ 2024-03-30 0:39 ` Darrick J. Wong
2024-04-05 3:16 ` Eric Biggers
2024-03-30 0:39 ` [PATCH 15/29] xfs: create an icache tag for files with cached " Darrick J. Wong
` (14 subsequent siblings)
28 siblings, 1 reply; 111+ messages in thread
From: Darrick J. Wong @ 2024-03-30 0:39 UTC (permalink / raw)
To: djwong, ebiggers, aalbersh; +Cc: linux-xfs, linux-fsdevel, fsverity
From: Darrick J. Wong <djwong@kernel.org>
Create a shrinker for an entire filesystem that will walk the inodes
looking for inodes that are caching merkle tree blocks, and invoke
shrink functions on that cache. The actual details of shrinking merkle
tree caches are left for subsequent patches.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
fs/xfs/xfs_fsverity.c | 77 +++++++++++++++++++++++++++++++++++++++++++++++++
fs/xfs/xfs_fsverity.h | 5 +++
fs/xfs/xfs_mount.c | 10 ++++++
fs/xfs/xfs_mount.h | 6 ++++
fs/xfs/xfs_trace.h | 20 +++++++++++++
5 files changed, 117 insertions(+), 1 deletion(-)
diff --git a/fs/xfs/xfs_fsverity.c b/fs/xfs/xfs_fsverity.c
index a4a52575fb3d5..46640a495e705 100644
--- a/fs/xfs/xfs_fsverity.c
+++ b/fs/xfs/xfs_fsverity.c
@@ -20,6 +20,7 @@
#include "xfs_trace.h"
#include "xfs_quota.h"
#include "xfs_fsverity.h"
+#include "xfs_icache.h"
#include <linux/fsverity.h>
/*
@@ -276,6 +277,82 @@ xfs_fsverity_delete_merkle_block(
return xfs_attr_removename(&args, false);
}
+/* Count the merkle tree blocks that we might be able to reclaim. */
+static unsigned long
+xfs_fsverity_shrinker_count(
+ struct shrinker *shrink,
+ struct shrink_control *sc)
+{
+ struct xfs_mount *mp = shrink->private_data;
+ s64 count;
+
+ if (!xfs_has_verity(mp))
+ return SHRINK_EMPTY;
+
+ count = percpu_counter_sum_positive(&mp->m_verity_blocks);
+
+ trace_xfs_fsverity_shrinker_count(mp, count, _RET_IP_);
+ return min_t(s64, ULONG_MAX, count);
+}
+
+/* Actually try to reclaim merkle tree blocks. */
+static unsigned long
+xfs_fsverity_shrinker_scan(
+ struct shrinker *shrink,
+ struct shrink_control *sc)
+{
+ struct xfs_mount *mp = shrink->private_data;
+
+ if (!xfs_has_verity(mp))
+ return SHRINK_STOP;
+
+ return 0;
+}
+
+/* Register a shrinker so we can release cached merkle tree blocks. */
+int
+xfs_fsverity_register_shrinker(
+ struct xfs_mount *mp)
+{
+ int error;
+
+ if (!xfs_has_verity(mp))
+ return 0;
+
+ error = percpu_counter_init(&mp->m_verity_blocks, 0, GFP_KERNEL);
+ if (error)
+ return error;
+
+ mp->m_verity_shrinker = shrinker_alloc(0, "xfs-verity:%s",
+ mp->m_super->s_id);
+ if (!mp->m_verity_shrinker) {
+ percpu_counter_destroy(&mp->m_verity_blocks);
+ return -ENOMEM;
+ }
+
+ mp->m_verity_shrinker->count_objects = xfs_fsverity_shrinker_count;
+ mp->m_verity_shrinker->scan_objects = xfs_fsverity_shrinker_scan;
+ mp->m_verity_shrinker->seeks = 0;
+ mp->m_verity_shrinker->private_data = mp;
+
+ shrinker_register(mp->m_verity_shrinker);
+
+ return 0;
+}
+
+/* Unregister the merkle tree block shrinker. */
+void
+xfs_fsverity_unregister_shrinker(struct xfs_mount *mp)
+{
+ if (!xfs_has_verity(mp))
+ return;
+
+ ASSERT(percpu_counter_sum(&mp->m_verity_blocks) == 0);
+
+ shrinker_free(mp->m_verity_shrinker);
+ percpu_counter_destroy(&mp->m_verity_blocks);
+}
+
/* Retrieve the verity descriptor. */
static int
xfs_fsverity_get_descriptor(
diff --git a/fs/xfs/xfs_fsverity.h b/fs/xfs/xfs_fsverity.h
index 277a9f856f518..7148e0c4dde1f 100644
--- a/fs/xfs/xfs_fsverity.h
+++ b/fs/xfs/xfs_fsverity.h
@@ -10,11 +10,16 @@ void xfs_fsverity_cache_init(struct xfs_inode *ip);
void xfs_fsverity_cache_drop(struct xfs_inode *ip);
void xfs_fsverity_cache_destroy(struct xfs_inode *ip);
+int xfs_fsverity_register_shrinker(struct xfs_mount *mp);
+void xfs_fsverity_unregister_shrinker(struct xfs_mount *mp);
+
extern const struct fsverity_operations xfs_fsverity_ops;
#else
# define xfs_fsverity_cache_init(ip) ((void)0)
# define xfs_fsverity_cache_drop(ip) ((void)0)
# define xfs_fsverity_cache_destroy(ip) ((void)0)
+# define xfs_fsverity_register_shrinker(mp) (0)
+# define xfs_fsverity_unregister_shrinker(mp) ((void)0)
#endif /* CONFIG_FS_VERITY */
#endif /* __XFS_FSVERITY_H__ */
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 20949adb5f80b..1e6a0bc933897 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -38,6 +38,7 @@
#include "xfs_rtgroup.h"
#include "xfs_rtrmap_btree.h"
#include "xfs_rtrefcount_btree.h"
+#include "xfs_fsverity.h"
#include "scrub/stats.h"
static DEFINE_MUTEX(xfs_uuid_table_mutex);
@@ -918,6 +919,10 @@ xfs_mountfs(
if (error)
goto out_fail_wait;
+ error = xfs_fsverity_register_shrinker(mp);
+ if (error)
+ goto out_inodegc_shrinker;
+
/*
* Log's mount-time initialization. The first part of recovery can place
* some items on the AIL, to be handled when recovery is finished or
@@ -928,7 +933,7 @@ xfs_mountfs(
XFS_FSB_TO_BB(mp, sbp->sb_logblocks));
if (error) {
xfs_warn(mp, "log mount failed");
- goto out_inodegc_shrinker;
+ goto out_verity_shrinker;
}
error = xfs_mountfs_set_perm_log_features(mp);
@@ -1137,6 +1142,8 @@ xfs_mountfs(
*/
xfs_unmount_flush_inodes(mp);
xfs_log_mount_cancel(mp);
+ out_verity_shrinker:
+ xfs_fsverity_unregister_shrinker(mp);
out_inodegc_shrinker:
shrinker_free(mp->m_inodegc_shrinker);
out_fail_wait:
@@ -1228,6 +1235,7 @@ xfs_unmountfs(
#if defined(DEBUG)
xfs_errortag_clearall(mp);
#endif
+ xfs_fsverity_unregister_shrinker(mp);
shrinker_free(mp->m_inodegc_shrinker);
xfs_free_rtgroups(mp);
xfs_free_perag(mp);
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 08ec154eb0e98..2c354da8fa55b 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -274,6 +274,12 @@ typedef struct xfs_mount {
/* Hook to feed dirent updates to an active online repair. */
struct xfs_hooks m_dir_update_hooks;
+
+#ifdef CONFIG_FS_VERITY
+ /* shrinker and cached blocks count for merkle trees */
+ struct shrinker *m_verity_shrinker;
+ struct percpu_counter m_verity_blocks;
+#endif
} xfs_mount_t;
#define M_IGEO(mp) (&(mp)->m_ino_geo)
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 86a8702c1e27c..e3edd43661bd9 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -5938,6 +5938,26 @@ DEFINE_EVENT(xfs_fsverity_cache_class, name, \
DEFINE_XFS_FSVERITY_CACHE_EVENT(xfs_fsverity_cache_load);
DEFINE_XFS_FSVERITY_CACHE_EVENT(xfs_fsverity_cache_store);
DEFINE_XFS_FSVERITY_CACHE_EVENT(xfs_fsverity_cache_drop);
+
+TRACE_EVENT(xfs_fsverity_shrinker_count,
+ TP_PROTO(struct xfs_mount *mp, unsigned long long count,
+ unsigned long caller_ip),
+ TP_ARGS(mp, count, caller_ip),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(unsigned long long, count)
+ __field(void *, caller_ip)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->count = count;
+ __entry->caller_ip = (void *)caller_ip;
+ ),
+ TP_printk("dev %d:%d count %llu caller %pS",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->count,
+ __entry->caller_ip)
+)
#endif /* CONFIG_XFS_VERITY */
#endif /* _TRACE_XFS_H */
^ permalink raw reply related [flat|nested] 111+ messages in thread
* Re: [PATCH 14/29] xfs: create a per-mount shrinker for verity inodes merkle tree blocks
2024-03-30 0:39 ` [PATCH 14/29] xfs: create a per-mount shrinker for verity inodes merkle tree blocks Darrick J. Wong
@ 2024-04-05 3:16 ` Eric Biggers
2024-04-24 17:39 ` Darrick J. Wong
0 siblings, 1 reply; 111+ messages in thread
From: Eric Biggers @ 2024-04-05 3:16 UTC (permalink / raw)
To: Darrick J. Wong; +Cc: aalbersh, linux-xfs, linux-fsdevel, fsverity
On Fri, Mar 29, 2024 at 05:39:43PM -0700, Darrick J. Wong wrote:
> +/* Count the merkle tree blocks that we might be able to reclaim. */
> +static unsigned long
> +xfs_fsverity_shrinker_count(
> + struct shrinker *shrink,
> + struct shrink_control *sc)
> +{
> + struct xfs_mount *mp = shrink->private_data;
> + s64 count;
> +
> + if (!xfs_has_verity(mp))
> + return SHRINK_EMPTY;
> +
> + count = percpu_counter_sum_positive(&mp->m_verity_blocks);
> +
> + trace_xfs_fsverity_shrinker_count(mp, count, _RET_IP_);
> + return min_t(s64, ULONG_MAX, count);
On 64-bit systems this always returns ULONG_MAX.
- Eric
^ permalink raw reply [flat|nested] 111+ messages in thread
* Re: [PATCH 14/29] xfs: create a per-mount shrinker for verity inodes merkle tree blocks
2024-04-05 3:16 ` Eric Biggers
@ 2024-04-24 17:39 ` Darrick J. Wong
0 siblings, 0 replies; 111+ messages in thread
From: Darrick J. Wong @ 2024-04-24 17:39 UTC (permalink / raw)
To: Eric Biggers; +Cc: aalbersh, linux-xfs, linux-fsdevel, fsverity
On Thu, Apr 04, 2024 at 11:16:46PM -0400, Eric Biggers wrote:
> On Fri, Mar 29, 2024 at 05:39:43PM -0700, Darrick J. Wong wrote:
> > +/* Count the merkle tree blocks that we might be able to reclaim. */
> > +static unsigned long
> > +xfs_fsverity_shrinker_count(
> > + struct shrinker *shrink,
> > + struct shrink_control *sc)
> > +{
> > + struct xfs_mount *mp = shrink->private_data;
> > + s64 count;
> > +
> > + if (!xfs_has_verity(mp))
> > + return SHRINK_EMPTY;
> > +
> > + count = percpu_counter_sum_positive(&mp->m_verity_blocks);
> > +
> > + trace_xfs_fsverity_shrinker_count(mp, count, _RET_IP_);
> > + return min_t(s64, ULONG_MAX, count);
>
> On 64-bit systems this always returns ULONG_MAX.
Oops, I think I meant u64 there. It's confusing to me that
percpu_counter_sum_positive returns a signed type. :(
--D
> - Eric
>
^ permalink raw reply [flat|nested] 111+ messages in thread
* [PATCH 15/29] xfs: create an icache tag for files with cached merkle tree blocks
2024-03-30 0:32 ` [PATCHSET v5.5 2/2] xfs: fs-verity support Darrick J. Wong
` (13 preceding siblings ...)
2024-03-30 0:39 ` [PATCH 14/29] xfs: create a per-mount shrinker for verity inodes merkle tree blocks Darrick J. Wong
@ 2024-03-30 0:39 ` Darrick J. Wong
2024-03-30 0:40 ` [PATCH 16/29] xfs: shrink verity blob cache Darrick J. Wong
` (13 subsequent siblings)
28 siblings, 0 replies; 111+ messages in thread
From: Darrick J. Wong @ 2024-03-30 0:39 UTC (permalink / raw)
To: djwong, ebiggers, aalbersh; +Cc: linux-xfs, linux-fsdevel, fsverity
From: Darrick J. Wong <djwong@kernel.org>
Create a radix tree tag for the inode cache so that merkle tree block
shrinkers can find verity inodes quickly.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
fs/xfs/xfs_fsverity.c | 30 ++++++++++++++++++
fs/xfs/xfs_fsverity.h | 4 ++
fs/xfs/xfs_icache.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++++
fs/xfs/xfs_icache.h | 8 +++++
fs/xfs/xfs_trace.h | 23 ++++++++++++++
5 files changed, 145 insertions(+), 1 deletion(-)
diff --git a/fs/xfs/xfs_fsverity.c b/fs/xfs/xfs_fsverity.c
index 46640a495e705..37876ce612540 100644
--- a/fs/xfs/xfs_fsverity.c
+++ b/fs/xfs/xfs_fsverity.c
@@ -295,18 +295,46 @@ xfs_fsverity_shrinker_count(
return min_t(s64, ULONG_MAX, count);
}
+struct xfs_fsverity_scan {
+ struct xfs_icwalk icw;
+ struct shrink_control *sc;
+
+ unsigned long scanned;
+ unsigned long freed;
+};
+
+/* Scan an inode as part of a verity scan. */
+int
+xfs_fsverity_scan_inode(
+ struct xfs_inode *ip,
+ struct xfs_icwalk *icw)
+{
+ xfs_irele(ip);
+ return 0;
+}
+
/* Actually try to reclaim merkle tree blocks. */
static unsigned long
xfs_fsverity_shrinker_scan(
struct shrinker *shrink,
struct shrink_control *sc)
{
+ struct xfs_fsverity_scan vs = {
+ .sc = sc,
+ };
struct xfs_mount *mp = shrink->private_data;
+ int error;
if (!xfs_has_verity(mp))
return SHRINK_STOP;
- return 0;
+ error = xfs_icwalk_verity(mp, &vs.icw);
+ if (error)
+ xfs_alert(mp, "%s: verity scan failed, error %d", __func__,
+ error);
+
+ trace_xfs_fsverity_shrinker_scan(mp, vs.scanned, vs.freed, _RET_IP_);
+ return vs.freed;
}
/* Register a shrinker so we can release cached merkle tree blocks. */
diff --git a/fs/xfs/xfs_fsverity.h b/fs/xfs/xfs_fsverity.h
index 7148e0c4dde1f..21ba0d82f26d8 100644
--- a/fs/xfs/xfs_fsverity.h
+++ b/fs/xfs/xfs_fsverity.h
@@ -13,6 +13,9 @@ void xfs_fsverity_cache_destroy(struct xfs_inode *ip);
int xfs_fsverity_register_shrinker(struct xfs_mount *mp);
void xfs_fsverity_unregister_shrinker(struct xfs_mount *mp);
+struct xfs_icwalk;
+int xfs_fsverity_scan_inode(struct xfs_inode *ip, struct xfs_icwalk *icw);
+
extern const struct fsverity_operations xfs_fsverity_ops;
#else
# define xfs_fsverity_cache_init(ip) ((void)0)
@@ -20,6 +23,7 @@ extern const struct fsverity_operations xfs_fsverity_ops;
# define xfs_fsverity_cache_destroy(ip) ((void)0)
# define xfs_fsverity_register_shrinker(mp) (0)
# define xfs_fsverity_unregister_shrinker(mp) ((void)0)
+# define xfs_fsverity_scan_inode(ip, icw) (0)
#endif /* CONFIG_FS_VERITY */
#endif /* __XFS_FSVERITY_H__ */
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 0757062c318d0..424133f900739 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -38,6 +38,8 @@
#define XFS_ICI_RECLAIM_TAG 0
/* Inode has speculative preallocations (posteof or cow) to clean. */
#define XFS_ICI_BLOCKGC_TAG 1
+/* Inode has incore merkle tree blocks */
+#define XFS_ICI_VERITY_TAG 2
/*
* The goal for walking incore inodes. These can correspond with incore inode
@@ -47,6 +49,7 @@ enum xfs_icwalk_goal {
/* Goals directly associated with tagged inodes. */
XFS_ICWALK_BLOCKGC = XFS_ICI_BLOCKGC_TAG,
XFS_ICWALK_RECLAIM = XFS_ICI_RECLAIM_TAG,
+ XFS_ICWALK_VERITY = XFS_ICI_VERITY_TAG,
};
static int xfs_icwalk(struct xfs_mount *mp,
@@ -1649,6 +1652,7 @@ xfs_icwalk_igrab(
{
switch (goal) {
case XFS_ICWALK_BLOCKGC:
+ case XFS_ICWALK_VERITY:
return xfs_blockgc_igrab(ip);
case XFS_ICWALK_RECLAIM:
return xfs_reclaim_igrab(ip, icw);
@@ -1677,6 +1681,9 @@ xfs_icwalk_process_inode(
case XFS_ICWALK_RECLAIM:
xfs_reclaim_inode(ip, pag);
break;
+ case XFS_ICWALK_VERITY:
+ error = xfs_fsverity_scan_inode(ip, icw);
+ break;
}
return error;
}
@@ -1793,6 +1800,80 @@ xfs_icwalk_ag(
return last_error;
}
+#ifdef CONFIG_FS_VERITY
+/* Mark this inode as having cached merkle tree blocks */
+void
+xfs_inode_set_verity_tag(
+ struct xfs_inode *ip)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_perag *pag;
+
+ pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
+ if (!pag)
+ return;
+
+ spin_lock(&pag->pag_ici_lock);
+ xfs_perag_set_inode_tag(pag, XFS_INO_TO_AGINO(mp, ip->i_ino),
+ XFS_ICI_VERITY_TAG);
+ spin_unlock(&pag->pag_ici_lock);
+ xfs_perag_put(pag);
+}
+
+/* Mark this inode as not having cached merkle tree blocks */
+void
+xfs_inode_clear_verity_tag(
+ struct xfs_inode *ip)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_perag *pag;
+
+ pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
+ if (!pag)
+ return;
+
+ spin_lock(&pag->pag_ici_lock);
+ xfs_perag_clear_inode_tag(pag, XFS_INO_TO_AGINO(mp, ip->i_ino),
+ XFS_ICI_VERITY_TAG);
+ spin_unlock(&pag->pag_ici_lock);
+ xfs_perag_put(pag);
+}
+
+/* Walk all the verity inodes in the filesystem. */
+int
+xfs_icwalk_verity(
+ struct xfs_mount *mp,
+ struct xfs_icwalk *icw)
+{
+ struct xfs_perag *pag;
+ xfs_agnumber_t agno = 0;
+ int error = 0;
+
+ for_each_perag_tag(mp, agno, pag, XFS_ICWALK_VERITY) {
+ error = xfs_icwalk_ag(pag, XFS_ICWALK_VERITY, icw);
+ if (error)
+ break;
+
+ if ((icw->icw_flags & XFS_ICWALK_FLAG_SCAN_LIMIT) &&
+ icw->icw_scan_limit <= 0) {
+ xfs_perag_rele(pag);
+ break;
+ }
+ }
+
+ return error;
+}
+
+/* Stop a verity incore walk scan. */
+void
+xfs_icwalk_verity_stop(
+ struct xfs_icwalk *icw)
+{
+ icw->icw_flags |= XFS_ICWALK_FLAG_SCAN_LIMIT;
+ icw->icw_scan_limit = -1;
+}
+#endif /* CONFIG_FS_VERITY */
+
/* Walk all incore inodes to achieve a given goal. */
static int
xfs_icwalk(
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index 905944dafbe53..621ce0078e08b 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -81,4 +81,12 @@ void xfs_inodegc_stop(struct xfs_mount *mp);
void xfs_inodegc_start(struct xfs_mount *mp);
int xfs_inodegc_register_shrinker(struct xfs_mount *mp);
+#ifdef CONFIG_FS_VERITY
+int xfs_icwalk_verity(struct xfs_mount *mp, struct xfs_icwalk *icw);
+void xfs_icwalk_verity_stop(struct xfs_icwalk *icw);
+
+void xfs_inode_set_verity_tag(struct xfs_inode *ip);
+void xfs_inode_clear_verity_tag(struct xfs_inode *ip);
+#endif /* CONFIG_FS_VERITY */
+
#endif
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index e3edd43661bd9..a5b811c1731d7 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -5958,6 +5958,29 @@ TRACE_EVENT(xfs_fsverity_shrinker_count,
__entry->count,
__entry->caller_ip)
)
+
+TRACE_EVENT(xfs_fsverity_shrinker_scan,
+ TP_PROTO(struct xfs_mount *mp, unsigned long scanned,
+ unsigned long freed, unsigned long caller_ip),
+ TP_ARGS(mp, scanned, freed, caller_ip),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(unsigned long, scanned)
+ __field(unsigned long, freed)
+ __field(void *, caller_ip)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->scanned = scanned;
+ __entry->freed = freed;
+ __entry->caller_ip = (void *)caller_ip;
+ ),
+ TP_printk("dev %d:%d scanned %lu freed %lu caller %pS",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->scanned,
+ __entry->freed,
+ __entry->caller_ip)
+)
#endif /* CONFIG_XFS_VERITY */
#endif /* _TRACE_XFS_H */
^ permalink raw reply related [flat|nested] 111+ messages in thread
* [PATCH 16/29] xfs: shrink verity blob cache
2024-03-30 0:32 ` [PATCHSET v5.5 2/2] xfs: fs-verity support Darrick J. Wong
` (14 preceding siblings ...)
2024-03-30 0:39 ` [PATCH 15/29] xfs: create an icache tag for files with cached " Darrick J. Wong
@ 2024-03-30 0:40 ` Darrick J. Wong
2024-03-30 0:40 ` [PATCH 17/29] xfs: only allow the verity iflag for regular files Darrick J. Wong
` (12 subsequent siblings)
28 siblings, 0 replies; 111+ messages in thread
From: Darrick J. Wong @ 2024-03-30 0:40 UTC (permalink / raw)
To: djwong, ebiggers, aalbersh; +Cc: linux-xfs, linux-fsdevel, fsverity
From: Darrick J. Wong <djwong@kernel.org>
Add some shrinkers so that reclaim can free cached merkle tree blocks
when memory is tight. We add a shrinkref variable to bias reclaim
against freeing the upper levels of the merkle tree in the hope of
maintaining read performance.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
fs/xfs/xfs_fsverity.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++++-
fs/xfs/xfs_trace.h | 1 +
2 files changed, 90 insertions(+), 2 deletions(-)
diff --git a/fs/xfs/xfs_fsverity.c b/fs/xfs/xfs_fsverity.c
index 37876ce612540..d675b0f71bde5 100644
--- a/fs/xfs/xfs_fsverity.c
+++ b/fs/xfs/xfs_fsverity.c
@@ -44,6 +44,9 @@ struct xfs_merkle_blob {
/* refcount of this item; the cache holds its own ref */
refcount_t refcount;
+ /* number of times the shrinker should ignore this item */
+ atomic_t shrinkref;
+
unsigned long flags;
/* Pointer to the merkle tree block, which is power-of-2 sized */
@@ -74,6 +77,7 @@ xfs_merkle_blob_alloc(
/* Caller owns this refcount. */
refcount_set(&mk->refcount, 1);
+ atomic_set(&mk->shrinkref, 0);
mk->flags = 0;
return mk;
}
@@ -106,8 +110,10 @@ xfs_fsverity_cache_drop(
struct xfs_inode *ip)
{
XA_STATE(xas, &ip->i_merkle_blocks, 0);
+ struct xfs_mount *mp = ip->i_mount;
struct xfs_merkle_blob *mk;
unsigned long flags;
+ s64 freed = 0;
xas_lock_irqsave(&xas, flags);
xas_for_each(&xas, mk, ULONG_MAX) {
@@ -115,10 +121,13 @@ xfs_fsverity_cache_drop(
trace_xfs_fsverity_cache_drop(ip, xas.xa_index, _RET_IP_);
+ freed++;
xas_store(&xas, NULL);
xfs_merkle_blob_rele(mk);
}
+ percpu_counter_sub(&mp->m_verity_blocks, freed);
xas_unlock_irqrestore(&xas, flags);
+ xfs_inode_clear_verity_tag(ip);
}
/* Destroy the merkle tree block cache */
@@ -177,6 +186,7 @@ xfs_fsverity_cache_store(
unsigned long key,
struct xfs_merkle_blob *mk)
{
+ struct xfs_mount *mp = ip->i_mount;
struct xfs_merkle_blob *old;
unsigned long flags;
@@ -191,6 +201,8 @@ xfs_fsverity_cache_store(
old = __xa_cmpxchg(&ip->i_merkle_blocks, key, NULL, mk,
GFP_KERNEL);
} while (old && !refcount_inc_not_zero(&old->refcount));
+ if (!old)
+ percpu_counter_add(&mp->m_verity_blocks, 1);
xa_unlock_irqrestore(&ip->i_merkle_blocks, flags);
if (old == NULL) {
@@ -303,12 +315,73 @@ struct xfs_fsverity_scan {
unsigned long freed;
};
+/* Reclaim inactive merkle tree blocks that have run out of second chances. */
+static void
+xfs_fsverity_cache_reclaim(
+ struct xfs_inode *ip,
+ struct xfs_fsverity_scan *vs)
+{
+ XA_STATE(xas, &ip->i_merkle_blocks, 0);
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_merkle_blob *mk;
+ unsigned long flags;
+ s64 freed = 0;
+
+ xas_lock_irqsave(&xas, flags);
+ xas_for_each(&xas, mk, ULONG_MAX) {
+ /*
+ * Tell the shrinker that we scanned this merkle tree block,
+ * even if we don't remove it.
+ */
+ vs->scanned++;
+ if (vs->sc->nr_to_scan-- == 0)
+ break;
+
+ /* Retain if there are active references */
+ if (refcount_read(&mk->refcount) > 1)
+ continue;
+
+ /* Ignore if the item still has lru refcount */
+ if (atomic_add_unless(&mk->shrinkref, -1, 0))
+ continue;
+
+ trace_xfs_fsverity_cache_reclaim(ip, xas.xa_index, _RET_IP_);
+
+ freed++;
+ xas_store(&xas, NULL);
+ xfs_merkle_blob_rele(mk);
+ }
+ percpu_counter_sub(&mp->m_verity_blocks, freed);
+ xas_unlock_irqrestore(&xas, flags);
+
+ /*
+ * Try to clear the verity tree tag if we reclaimed all the cached
+ * blocks. On the flag setting side, we should have IOLOCK_SHARED.
+ */
+ xfs_ilock(ip, XFS_IOLOCK_EXCL);
+ if (xa_empty(&ip->i_merkle_blocks))
+ xfs_inode_clear_verity_tag(ip);
+ xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+
+ vs->freed += freed;
+}
+
/* Scan an inode as part of a verity scan. */
int
xfs_fsverity_scan_inode(
- struct xfs_inode *ip,
- struct xfs_icwalk *icw)
+ struct xfs_inode *ip,
+ struct xfs_icwalk *icw)
{
+ struct xfs_fsverity_scan *vs;
+
+ vs = container_of(icw, struct xfs_fsverity_scan, icw);
+
+ if (vs->sc->nr_to_scan > 0)
+ xfs_fsverity_cache_reclaim(ip, vs);
+
+ if (vs->sc->nr_to_scan == 0)
+ xfs_icwalk_verity_stop(icw);
+
xfs_irele(ip);
return 0;
}
@@ -606,6 +679,13 @@ xfs_fsverity_read_merkle(
* Free the new cache blob and continue with the existing one.
*/
xfs_merkle_blob_rele(new_mk);
+ } else {
+ /*
+ * We added this merkle tree block to the cache; tag the inode
+ * so that reclaim will scan this inode. The caller holds
+ * IOLOCK_SHARED this will not race with the shrinker.
+ */
+ xfs_inode_set_verity_tag(ip);
}
out_hit:
@@ -613,6 +693,13 @@ xfs_fsverity_read_merkle(
block->context = mk;
block->verified = test_bit(XFS_MERKLE_BLOB_VERIFIED_BIT, &mk->flags);
+ /*
+ * Prioritize keeping the root-adjacent levels cached if this isn't a
+ * streaming read.
+ */
+ if (req->level >= 0)
+ atomic_set(&mk->shrinkref, req->level + 1);
+
return 0;
out_new_mk:
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index a5b811c1731d7..ac7201a24b107 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -5938,6 +5938,7 @@ DEFINE_EVENT(xfs_fsverity_cache_class, name, \
DEFINE_XFS_FSVERITY_CACHE_EVENT(xfs_fsverity_cache_load);
DEFINE_XFS_FSVERITY_CACHE_EVENT(xfs_fsverity_cache_store);
DEFINE_XFS_FSVERITY_CACHE_EVENT(xfs_fsverity_cache_drop);
+DEFINE_XFS_FSVERITY_CACHE_EVENT(xfs_fsverity_cache_reclaim);
TRACE_EVENT(xfs_fsverity_shrinker_count,
TP_PROTO(struct xfs_mount *mp, unsigned long long count,
^ permalink raw reply related [flat|nested] 111+ messages in thread
* [PATCH 17/29] xfs: only allow the verity iflag for regular files
2024-03-30 0:32 ` [PATCHSET v5.5 2/2] xfs: fs-verity support Darrick J. Wong
` (15 preceding siblings ...)
2024-03-30 0:40 ` [PATCH 16/29] xfs: shrink verity blob cache Darrick J. Wong
@ 2024-03-30 0:40 ` Darrick J. Wong
2024-04-02 12:52 ` Andrey Albershteyn
2024-03-30 0:40 ` [PATCH 18/29] xfs: don't store trailing zeroes of merkle tree blocks Darrick J. Wong
` (11 subsequent siblings)
28 siblings, 1 reply; 111+ messages in thread
From: Darrick J. Wong @ 2024-03-30 0:40 UTC (permalink / raw)
To: djwong, ebiggers, aalbersh; +Cc: linux-xfs, linux-fsdevel, fsverity
From: Darrick J. Wong <djwong@kernel.org>
Only regular files can have fsverity enabled on them, so check this in
the inode verifier.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
fs/xfs/libxfs/xfs_inode_buf.c | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index adc457da52ef0..dae0f27d3961b 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -695,6 +695,14 @@ xfs_dinode_verify(
!xfs_has_rtreflink(mp))
return __this_address;
+ /* only regular files can have fsverity */
+ if (flags2 & XFS_DIFLAG2_VERITY) {
+ if (!xfs_has_verity(mp))
+ return __this_address;
+ if ((mode & S_IFMT) != S_IFREG)
+ return __this_address;
+ }
+
/* COW extent size hint validation */
fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize),
mode, flags, flags2);
^ permalink raw reply related [flat|nested] 111+ messages in thread
* Re: [PATCH 17/29] xfs: only allow the verity iflag for regular files
2024-03-30 0:40 ` [PATCH 17/29] xfs: only allow the verity iflag for regular files Darrick J. Wong
@ 2024-04-02 12:52 ` Andrey Albershteyn
0 siblings, 0 replies; 111+ messages in thread
From: Andrey Albershteyn @ 2024-04-02 12:52 UTC (permalink / raw)
To: Darrick J. Wong; +Cc: ebiggers, linux-xfs, linux-fsdevel, fsverity
On 2024-03-29 17:40:30, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
>
> Only regular files can have fsverity enabled on them, so check this in
> the inode verifier.
>
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> ---
> fs/xfs/libxfs/xfs_inode_buf.c | 8 ++++++++
> 1 file changed, 8 insertions(+)
>
>
> diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
> index adc457da52ef0..dae0f27d3961b 100644
> --- a/fs/xfs/libxfs/xfs_inode_buf.c
> +++ b/fs/xfs/libxfs/xfs_inode_buf.c
> @@ -695,6 +695,14 @@ xfs_dinode_verify(
> !xfs_has_rtreflink(mp))
> return __this_address;
>
> + /* only regular files can have fsverity */
> + if (flags2 & XFS_DIFLAG2_VERITY) {
> + if (!xfs_has_verity(mp))
> + return __this_address;
> + if ((mode & S_IFMT) != S_IFREG)
> + return __this_address;
> + }
> +
> /* COW extent size hint validation */
> fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize),
> mode, flags, flags2);
>
Looks good to me:
Reviewed-by: Andrey Albershteyn <aalbersh@redhat.com>
--
- Andrey
^ permalink raw reply [flat|nested] 111+ messages in thread
* [PATCH 18/29] xfs: don't store trailing zeroes of merkle tree blocks
2024-03-30 0:32 ` [PATCHSET v5.5 2/2] xfs: fs-verity support Darrick J. Wong
` (16 preceding siblings ...)
2024-03-30 0:40 ` [PATCH 17/29] xfs: only allow the verity iflag for regular files Darrick J. Wong
@ 2024-03-30 0:40 ` Darrick J. Wong
2024-03-30 0:41 ` [PATCH 19/29] xfs: use merkle tree offset as attr hash Darrick J. Wong
` (10 subsequent siblings)
28 siblings, 0 replies; 111+ messages in thread
From: Darrick J. Wong @ 2024-03-30 0:40 UTC (permalink / raw)
To: djwong, ebiggers, aalbersh; +Cc: linux-xfs, linux-fsdevel, fsverity
From: Darrick J. Wong <djwong@kernel.org>
As a minor space optimization, don't store trailing zeroes of merkle
tree blocks to reduce space consumption and copying overhead. This
really only affects the rightmost blocks at each level of the tree.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Andrey Albershteyn <aalbersh@redhat.com>
---
fs/xfs/xfs_fsverity.c | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/fs/xfs/xfs_fsverity.c b/fs/xfs/xfs_fsverity.c
index d675b0f71bde5..8d87d411a9ccb 100644
--- a/fs/xfs/xfs_fsverity.c
+++ b/fs/xfs/xfs_fsverity.c
@@ -667,11 +667,6 @@ xfs_fsverity_read_merkle(
if (error)
goto out_new_mk;
- if (!args.valuelen) {
- error = -ENODATA;
- goto out_new_mk;
- }
-
mk = xfs_fsverity_cache_store(ip, key, new_mk);
if (mk != new_mk) {
/*
@@ -722,6 +717,12 @@ xfs_fsverity_write_merkle(
.value = (void *)buf,
.valuelen = size,
};
+ const char *p = buf + size - 1;
+
+ /* Don't store trailing zeroes. */
+ while (p >= (const char *)buf && *p == 0)
+ p--;
+ args.valuelen = p - (const char *)buf + 1;
xfs_fsverity_init_merkle_args(ip, &name, pos, &args);
return xfs_attr_setname(&args, false);
^ permalink raw reply related [flat|nested] 111+ messages in thread
* [PATCH 19/29] xfs: use merkle tree offset as attr hash
2024-03-30 0:32 ` [PATCHSET v5.5 2/2] xfs: fs-verity support Darrick J. Wong
` (17 preceding siblings ...)
2024-03-30 0:40 ` [PATCH 18/29] xfs: don't store trailing zeroes of merkle tree blocks Darrick J. Wong
@ 2024-03-30 0:41 ` Darrick J. Wong
2024-03-30 0:41 ` [PATCH 20/29] xfs: don't bother storing merkle tree blocks for zeroed data blocks Darrick J. Wong
` (9 subsequent siblings)
28 siblings, 0 replies; 111+ messages in thread
From: Darrick J. Wong @ 2024-03-30 0:41 UTC (permalink / raw)
To: djwong, ebiggers, aalbersh; +Cc: linux-xfs, linux-fsdevel, fsverity
From: Darrick J. Wong <djwong@kernel.org>
I was exploring the fsverity metadata with xfs_db after creating a 220MB
verity file, and I noticed the following in the debugger output:
entries[0-75] = [hashval,nameidx,incomplete,root,secure,local,parent,verity]
0:[0,4076,0,0,0,0,0,1]
1:[0,1472,0,0,0,1,0,1]
2:[0x800,4056,0,0,0,0,0,1]
3:[0x800,4036,0,0,0,0,0,1]
...
72:[0x12000,2716,0,0,0,0,0,1]
73:[0x12000,2696,0,0,0,0,0,1]
74:[0x12800,2676,0,0,0,0,0,1]
75:[0x12800,2656,0,0,0,0,0,1]
...
nvlist[0].merkle_off = 0x18000
nvlist[1].merkle_off = 0
nvlist[2].merkle_off = 0x19000
nvlist[3].merkle_off = 0x1000
...
nvlist[71].merkle_off = 0x5b000
nvlist[72].merkle_off = 0x44000
nvlist[73].merkle_off = 0x5c000
nvlist[74].merkle_off = 0x45000
nvlist[75].merkle_off = 0x5d000
Within just this attr leaf block, there are 76 attr entries, but only 38
distinct hash values. There are 415 merkle tree blocks for this file,
but we already have hash collisions. This isn't good performance from
the standard da hash function because we're mostly shifting and rolling
zeroes around.
However, we don't even have to do that much work -- the merkle tree
block keys are themslves u64 values. Truncate that value to 32 bits
(the size of xfs_dahash_t) and use that for the hash. We won't have any
collisions between merkle tree blocks until that tree grows to 2^32nd
blocks. On a 4k block filesystem, we won't hit that unless the file
contains more than 2^49 bytes, assuming sha256.
As a side effect, the keys for merkle tree blocks get written out in
roughly sequential order, though I didn't observe any change in
performance.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Andrey Albershteyn <aalbersh@redhat.com>
---
fs/xfs/libxfs/xfs_attr.c | 2 ++
fs/xfs/libxfs/xfs_da_format.h | 6 ++++++
fs/xfs/libxfs/xfs_verity.c | 16 ++++++++++++++++
fs/xfs/libxfs/xfs_verity.h | 1 +
4 files changed, 25 insertions(+)
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index c3f686411e378..3d3335148a212 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -431,6 +431,8 @@ xfs_attr_hashval(
if (attr_flags & XFS_ATTR_PARENT)
return xfs_parent_hashattr(mp, name, namelen, value, valuelen);
+ if (attr_flags & XFS_ATTR_VERITY)
+ return xfs_verity_hashname(name, namelen);
return xfs_attr_hashname(name, namelen);
}
diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h
index 679cf5b4ad4be..4f5fd22ac4f96 100644
--- a/fs/xfs/libxfs/xfs_da_format.h
+++ b/fs/xfs/libxfs/xfs_da_format.h
@@ -936,4 +936,10 @@ struct xfs_merkle_key {
#define XFS_VERITY_DESCRIPTOR_NAME "vdesc"
#define XFS_VERITY_DESCRIPTOR_NAME_LEN (sizeof(XFS_VERITY_DESCRIPTOR_NAME) - 1)
+/*
+ * Merkle tree blocks cannot be smaller than 1k in size, so the hash function
+ * can right-shift the merkle offset by this amount without losing anything.
+ */
+#define XFS_VERITY_HASH_SHIFT (10)
+
#endif /* __XFS_DA_FORMAT_H__ */
diff --git a/fs/xfs/libxfs/xfs_verity.c b/fs/xfs/libxfs/xfs_verity.c
index bda38b3c19698..d72f04043fe5e 100644
--- a/fs/xfs/libxfs/xfs_verity.c
+++ b/fs/xfs/libxfs/xfs_verity.c
@@ -56,3 +56,19 @@ xfs_verity_namecheck(
return true;
}
+
+/*
+ * Compute name hash for a verity attribute. For merkle tree blocks, we want
+ * to use the merkle tree block offset as the hash value to avoid collisions
+ * between blocks unless the merkle tree becomes larger than 2^32 blocks.
+ */
+xfs_dahash_t
+xfs_verity_hashname(
+ const uint8_t *name,
+ unsigned int namelen)
+{
+ if (namelen != sizeof(struct xfs_merkle_key))
+ return xfs_attr_hashname(name, namelen);
+
+ return xfs_merkle_key_from_disk(name, namelen) >> XFS_VERITY_HASH_SHIFT;
+}
diff --git a/fs/xfs/libxfs/xfs_verity.h b/fs/xfs/libxfs/xfs_verity.h
index c01cc0678bc04..72e41ecd046f1 100644
--- a/fs/xfs/libxfs/xfs_verity.h
+++ b/fs/xfs/libxfs/xfs_verity.h
@@ -9,5 +9,6 @@ void xfs_merkle_key_to_disk(struct xfs_merkle_key *key, uint64_t offset);
uint64_t xfs_merkle_key_from_disk(const void *attr_name, int namelen);
bool xfs_verity_namecheck(unsigned int attr_flags, const void *name,
int namelen);
+xfs_dahash_t xfs_verity_hashname(const uint8_t *name, unsigned int namelen);
#endif /* __XFS_VERITY_H__ */
^ permalink raw reply related [flat|nested] 111+ messages in thread
* [PATCH 20/29] xfs: don't bother storing merkle tree blocks for zeroed data blocks
2024-03-30 0:32 ` [PATCHSET v5.5 2/2] xfs: fs-verity support Darrick J. Wong
` (18 preceding siblings ...)
2024-03-30 0:41 ` [PATCH 19/29] xfs: use merkle tree offset as attr hash Darrick J. Wong
@ 2024-03-30 0:41 ` Darrick J. Wong
2024-03-30 0:41 ` [PATCH 21/29] xfs: add fs-verity ioctls Darrick J. Wong
` (8 subsequent siblings)
28 siblings, 0 replies; 111+ messages in thread
From: Darrick J. Wong @ 2024-03-30 0:41 UTC (permalink / raw)
To: djwong, ebiggers, aalbersh; +Cc: linux-xfs, linux-fsdevel, fsverity
From: Darrick J. Wong <djwong@kernel.org>
Now that fsverity tells our merkle tree io functions about what a hash
of a data block full of zeroes looks like, we can use this information
to avoid writing out merkle tree blocks for sparse regions of the file.
For verified gold master images this can save quite a bit of overhead.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Andrey Albershteyn <aalbersh@redhat.com>
---
fs/xfs/xfs_fsverity.c | 37 ++++++++++++++++++++++++++++++++++---
1 file changed, 34 insertions(+), 3 deletions(-)
diff --git a/fs/xfs/xfs_fsverity.c b/fs/xfs/xfs_fsverity.c
index 8d87d411a9ccb..2806466ceaeab 100644
--- a/fs/xfs/xfs_fsverity.c
+++ b/fs/xfs/xfs_fsverity.c
@@ -664,6 +664,20 @@ xfs_fsverity_read_merkle(
/* Read the block in from disk and try to store it in the cache. */
xfs_fsverity_init_merkle_args(ip, &name, block->offset, &args);
error = xfs_attr_getname(&args);
+ if (error == -ENOATTR) {
+ u8 *p;
+ unsigned int i;
+
+ /*
+ * No attribute found. Synthesize a buffer full of the zero
+ * digests on the assumption that we elided them at write time.
+ */
+ for (i = 0, p = new_mk->data;
+ i < block->size;
+ i += req->digest_size, p += req->digest_size)
+ memcpy(p, req->zero_digest, req->digest_size);
+ error = 0;
+ }
if (error)
goto out_new_mk;
@@ -717,12 +731,29 @@ xfs_fsverity_write_merkle(
.value = (void *)buf,
.valuelen = size,
};
- const char *p = buf + size - 1;
+ const char *p;
+ unsigned int i;
- /* Don't store trailing zeroes. */
+ /*
+ * If this is a block full of hashes of zeroed blocks, don't bother
+ * storing the block. We can synthesize them later.
+ */
+ for (i = 0, p = buf;
+ i < size;
+ i += req->digest_size, p += req->digest_size)
+ if (memcmp(p, req->zero_digest, req->digest_size))
+ break;
+ if (i == size)
+ return 0;
+
+ /*
+ * Don't store trailing zeroes. Store at least one byte so that the
+ * block cannot be mistaken for an elided one.
+ */
+ p = buf + size - 1;
while (p >= (const char *)buf && *p == 0)
p--;
- args.valuelen = p - (const char *)buf + 1;
+ args.valuelen = max(1, p - (const char *)buf + 1);
xfs_fsverity_init_merkle_args(ip, &name, pos, &args);
return xfs_attr_setname(&args, false);
^ permalink raw reply related [flat|nested] 111+ messages in thread
* [PATCH 21/29] xfs: add fs-verity ioctls
2024-03-30 0:32 ` [PATCHSET v5.5 2/2] xfs: fs-verity support Darrick J. Wong
` (19 preceding siblings ...)
2024-03-30 0:41 ` [PATCH 20/29] xfs: don't bother storing merkle tree blocks for zeroed data blocks Darrick J. Wong
@ 2024-03-30 0:41 ` Darrick J. Wong
2024-03-30 0:41 ` [PATCH 22/29] xfs: advertise fs-verity being available on filesystem Darrick J. Wong
` (7 subsequent siblings)
28 siblings, 0 replies; 111+ messages in thread
From: Darrick J. Wong @ 2024-03-30 0:41 UTC (permalink / raw)
To: djwong, ebiggers, aalbersh; +Cc: linux-xfs, linux-fsdevel, fsverity
From: Andrey Albershteyn <aalbersh@redhat.com>
Add fs-verity ioctls to enable, dump metadata (descriptor and Merkle
tree pages) and obtain file's digest.
Signed-off-by: Andrey Albershteyn <aalbersh@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
[djwong: remove unnecessary casting]
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
fs/xfs/xfs_ioctl.c | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 9d161e16ccf32..0aa0ceb9ec153 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -50,6 +50,7 @@
#include <linux/fileattr.h>
#include <linux/security.h>
#include <linux/fsnotify.h>
+#include <linux/fsverity.h>
/*
* xfs_find_handle maps from userspace xfs_fsop_handlereq structure to
@@ -2711,6 +2712,21 @@ xfs_file_ioctl(
case XFS_IOC_MAP_FREESP:
return xfs_ioc_map_freesp(filp, arg);
+ case FS_IOC_ENABLE_VERITY:
+ if (!xfs_has_verity(mp))
+ return -EOPNOTSUPP;
+ return fsverity_ioctl_enable(filp, arg);
+
+ case FS_IOC_MEASURE_VERITY:
+ if (!xfs_has_verity(mp))
+ return -EOPNOTSUPP;
+ return fsverity_ioctl_measure(filp, arg);
+
+ case FS_IOC_READ_VERITY_METADATA:
+ if (!xfs_has_verity(mp))
+ return -EOPNOTSUPP;
+ return fsverity_ioctl_read_metadata(filp, arg);
+
default:
return -ENOTTY;
}
^ permalink raw reply related [flat|nested] 111+ messages in thread
* [PATCH 22/29] xfs: advertise fs-verity being available on filesystem
2024-03-30 0:32 ` [PATCHSET v5.5 2/2] xfs: fs-verity support Darrick J. Wong
` (20 preceding siblings ...)
2024-03-30 0:41 ` [PATCH 21/29] xfs: add fs-verity ioctls Darrick J. Wong
@ 2024-03-30 0:41 ` Darrick J. Wong
2024-04-02 13:44 ` Andrey Albershteyn
2024-03-30 0:42 ` [PATCH 23/29] xfs: make scrub aware of verity dinode flag Darrick J. Wong
` (6 subsequent siblings)
28 siblings, 1 reply; 111+ messages in thread
From: Darrick J. Wong @ 2024-03-30 0:41 UTC (permalink / raw)
To: djwong, ebiggers, aalbersh; +Cc: linux-xfs, linux-fsdevel, fsverity
From: Darrick J. Wong <djwong@kernel.org>
Advertise that this filesystem supports fsverity.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
fs/xfs/libxfs/xfs_fs.h | 1 +
fs/xfs/libxfs/xfs_sb.c | 2 ++
2 files changed, 3 insertions(+)
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index 6ede243fbecf7..af45a246eb1c1 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -247,6 +247,7 @@ typedef struct xfs_fsop_resblks {
/* file range exchange available to userspace */
#define XFS_FSOP_GEOM_FLAGS_EXCHANGE_RANGE (1 << 24)
+#define XFS_FSOP_GEOM_FLAGS_VERITY (1U << 28) /* fs-verity */
#define XFS_FSOP_GEOM_FLAGS_METADIR (1U << 29) /* metadata directories */
#define XFS_FSOP_GEOM_FLAGS_PARENT (1U << 30) /* parent pointers */
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 39b5083745d0e..24e22a2dea51c 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -1427,6 +1427,8 @@ xfs_fs_geometry(
geo->flags |= XFS_FSOP_GEOM_FLAGS_EXCHANGE_RANGE;
if (xfs_has_metadir(mp))
geo->flags |= XFS_FSOP_GEOM_FLAGS_METADIR;
+ if (xfs_has_verity(mp))
+ geo->flags |= XFS_FSOP_GEOM_FLAGS_VERITY;
geo->rtsectsize = sbp->sb_blocksize;
geo->dirblocksize = xfs_dir2_dirblock_bytes(sbp);
^ permalink raw reply related [flat|nested] 111+ messages in thread
* Re: [PATCH 22/29] xfs: advertise fs-verity being available on filesystem
2024-03-30 0:41 ` [PATCH 22/29] xfs: advertise fs-verity being available on filesystem Darrick J. Wong
@ 2024-04-02 13:44 ` Andrey Albershteyn
0 siblings, 0 replies; 111+ messages in thread
From: Andrey Albershteyn @ 2024-04-02 13:44 UTC (permalink / raw)
To: Darrick J. Wong; +Cc: ebiggers, linux-xfs, linux-fsdevel, fsverity
On 2024-03-29 17:41:48, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
>
> Advertise that this filesystem supports fsverity.
>
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> ---
> fs/xfs/libxfs/xfs_fs.h | 1 +
> fs/xfs/libxfs/xfs_sb.c | 2 ++
> 2 files changed, 3 insertions(+)
>
>
> diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
> index 6ede243fbecf7..af45a246eb1c1 100644
> --- a/fs/xfs/libxfs/xfs_fs.h
> +++ b/fs/xfs/libxfs/xfs_fs.h
> @@ -247,6 +247,7 @@ typedef struct xfs_fsop_resblks {
> /* file range exchange available to userspace */
> #define XFS_FSOP_GEOM_FLAGS_EXCHANGE_RANGE (1 << 24)
>
> +#define XFS_FSOP_GEOM_FLAGS_VERITY (1U << 28) /* fs-verity */
> #define XFS_FSOP_GEOM_FLAGS_METADIR (1U << 29) /* metadata directories */
> #define XFS_FSOP_GEOM_FLAGS_PARENT (1U << 30) /* parent pointers */
>
> diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
> index 39b5083745d0e..24e22a2dea51c 100644
> --- a/fs/xfs/libxfs/xfs_sb.c
> +++ b/fs/xfs/libxfs/xfs_sb.c
> @@ -1427,6 +1427,8 @@ xfs_fs_geometry(
> geo->flags |= XFS_FSOP_GEOM_FLAGS_EXCHANGE_RANGE;
> if (xfs_has_metadir(mp))
> geo->flags |= XFS_FSOP_GEOM_FLAGS_METADIR;
> + if (xfs_has_verity(mp))
> + geo->flags |= XFS_FSOP_GEOM_FLAGS_VERITY;
> geo->rtsectsize = sbp->sb_blocksize;
> geo->dirblocksize = xfs_dir2_dirblock_bytes(sbp);
>
>
Looks good to me:
Reviewed-by: Andrey Albershteyn <aalbersh@redhat.com>
--
- Andrey
^ permalink raw reply [flat|nested] 111+ messages in thread
* [PATCH 23/29] xfs: make scrub aware of verity dinode flag
2024-03-30 0:32 ` [PATCHSET v5.5 2/2] xfs: fs-verity support Darrick J. Wong
` (21 preceding siblings ...)
2024-03-30 0:41 ` [PATCH 22/29] xfs: advertise fs-verity being available on filesystem Darrick J. Wong
@ 2024-03-30 0:42 ` Darrick J. Wong
2024-03-30 0:42 ` [PATCH 24/29] xfs: teach online repair to evaluate fsverity xattrs Darrick J. Wong
` (5 subsequent siblings)
28 siblings, 0 replies; 111+ messages in thread
From: Darrick J. Wong @ 2024-03-30 0:42 UTC (permalink / raw)
To: djwong, ebiggers, aalbersh; +Cc: linux-xfs, linux-fsdevel, fsverity
From: Andrey Albershteyn <aalbersh@redhat.com>
fs-verity adds new inode flag which causes scrub to fail as it is
not yet known.
Signed-off-by: Andrey Albershteyn <aalbersh@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
fs/xfs/scrub/attr.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c
index 1608d1e316c99..2e8a2b2e82fbd 100644
--- a/fs/xfs/scrub/attr.c
+++ b/fs/xfs/scrub/attr.c
@@ -514,7 +514,7 @@ xchk_xattr_rec(
/* Retrieve the entry and check it. */
hash = be32_to_cpu(ent->hashval);
badflags = ~(XFS_ATTR_LOCAL | XFS_ATTR_ROOT | XFS_ATTR_SECURE |
- XFS_ATTR_INCOMPLETE | XFS_ATTR_PARENT);
+ XFS_ATTR_INCOMPLETE | XFS_ATTR_PARENT | XFS_ATTR_VERITY);
if ((ent->flags & badflags) != 0) {
xchk_da_set_corrupt(ds, level);
return error;
^ permalink raw reply related [flat|nested] 111+ messages in thread
* [PATCH 24/29] xfs: teach online repair to evaluate fsverity xattrs
2024-03-30 0:32 ` [PATCHSET v5.5 2/2] xfs: fs-verity support Darrick J. Wong
` (22 preceding siblings ...)
2024-03-30 0:42 ` [PATCH 23/29] xfs: make scrub aware of verity dinode flag Darrick J. Wong
@ 2024-03-30 0:42 ` Darrick J. Wong
2024-04-02 15:42 ` Andrey Albershteyn
2024-03-30 0:42 ` [PATCH 25/29] xfs: report verity failures through the health system Darrick J. Wong
` (4 subsequent siblings)
28 siblings, 1 reply; 111+ messages in thread
From: Darrick J. Wong @ 2024-03-30 0:42 UTC (permalink / raw)
To: djwong, ebiggers, aalbersh; +Cc: linux-xfs, linux-fsdevel, fsverity
From: Darrick J. Wong <djwong@kernel.org>
Teach online repair to check for unused fsverity metadata and purge it
on reconstruction.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
fs/xfs/scrub/attr.c | 139 ++++++++++++++++++++++++++++++++++++++++++++
fs/xfs/scrub/attr.h | 6 ++
fs/xfs/scrub/attr_repair.c | 50 ++++++++++++++++
fs/xfs/scrub/trace.c | 1
fs/xfs/scrub/trace.h | 31 ++++++++++
5 files changed, 226 insertions(+), 1 deletion(-)
diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c
index 2e8a2b2e82fbd..be121625c14f0 100644
--- a/fs/xfs/scrub/attr.c
+++ b/fs/xfs/scrub/attr.c
@@ -18,6 +18,7 @@
#include "xfs_attr_leaf.h"
#include "xfs_attr_sf.h"
#include "xfs_parent.h"
+#include "xfs_verity.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/dabtree.h"
@@ -25,6 +26,8 @@
#include "scrub/listxattr.h"
#include "scrub/repair.h"
+#include <linux/fsverity.h>
+
/* Free the buffers linked from the xattr buffer. */
static void
xchk_xattr_buf_cleanup(
@@ -126,6 +129,53 @@ xchk_setup_xattr_buf(
return 0;
}
+#ifdef CONFIG_FS_VERITY
+/*
+ * Obtain merkle tree geometry information for a verity file so that we can
+ * perform sanity checks of the fsverity xattrs.
+ */
+STATIC int
+xchk_xattr_setup_verity(
+ struct xfs_scrub *sc)
+{
+ struct xchk_xattr_buf *ab;
+ int error;
+
+ /*
+ * Drop the ILOCK and the transaction because loading the fsverity
+ * metadata will call into the xattr code. S_VERITY is enabled with
+ * IOLOCK_EXCL held, so it should not change here.
+ */
+ xchk_iunlock(sc, XFS_ILOCK_EXCL);
+ xchk_trans_cancel(sc);
+
+ error = xchk_setup_xattr_buf(sc, 0);
+ if (error)
+ return error;
+
+ ab = sc->buf;
+ error = fsverity_merkle_tree_geometry(VFS_I(sc->ip),
+ &ab->merkle_blocksize, &ab->merkle_tree_size);
+ if (error == -ENODATA || error == -EFSCORRUPTED) {
+ /* fsverity metadata corrupt, cannot complete checks */
+ xchk_set_incomplete(sc);
+ ab->merkle_blocksize = 0;
+ error = 0;
+ }
+ if (error)
+ return error;
+
+ error = xchk_trans_alloc(sc, 0);
+ if (error)
+ return error;
+
+ xchk_ilock(sc, XFS_ILOCK_EXCL);
+ return 0;
+}
+#else
+# define xchk_xattr_setup_verity(...) (0)
+#endif /* CONFIG_FS_VERITY */
+
/* Set us up to scrub an inode's extended attributes. */
int
xchk_setup_xattr(
@@ -150,9 +200,89 @@ xchk_setup_xattr(
return error;
}
- return xchk_setup_inode_contents(sc, 0);
+ error = xchk_setup_inode_contents(sc, 0);
+ if (error)
+ return error;
+
+ if (IS_VERITY(VFS_I(sc->ip))) {
+ error = xchk_xattr_setup_verity(sc);
+ if (error)
+ return error;
+ }
+
+ return error;
}
+#ifdef CONFIG_FS_VERITY
+/* Check the merkle tree xattrs. */
+STATIC void
+xchk_xattr_verity(
+ struct xfs_scrub *sc,
+ xfs_dablk_t blkno,
+ const unsigned char *name,
+ unsigned int namelen,
+ unsigned int valuelen)
+{
+ struct xchk_xattr_buf *ab = sc->buf;
+
+ /* Non-verity filesystems should never have verity xattrs. */
+ if (!xfs_has_verity(sc->mp)) {
+ xchk_fblock_set_corrupt(sc, XFS_ATTR_FORK, blkno);
+ return;
+ }
+
+ /*
+ * Any verity metadata on a non-verity file are leftovers from a
+ * previous attempt to enable verity.
+ */
+ if (!IS_VERITY(VFS_I(sc->ip))) {
+ xchk_ino_set_preen(sc, sc->ip->i_ino);
+ return;
+ }
+
+ /* Zero blocksize occurs if we couldn't load the merkle tree data. */
+ if (ab->merkle_blocksize == 0)
+ return;
+
+ switch (namelen) {
+ case sizeof(struct xfs_merkle_key):
+ /* Oversized blocks are not allowed */
+ if (valuelen > ab->merkle_blocksize) {
+ xchk_fblock_set_corrupt(sc, XFS_ATTR_FORK, blkno);
+ return;
+ }
+ break;
+ case XFS_VERITY_DESCRIPTOR_NAME_LEN:
+ /* Has to match the descriptor xattr name */
+ if (memcmp(name, XFS_VERITY_DESCRIPTOR_NAME, namelen))
+ xchk_fblock_set_corrupt(sc, XFS_ATTR_FORK, blkno);
+ return;
+ default:
+ xchk_fblock_set_corrupt(sc, XFS_ATTR_FORK, blkno);
+ return;
+ }
+
+ /*
+ * Merkle tree blocks beyond the end of the tree are leftovers from
+ * a previous failed attempt to enable verity.
+ */
+ if (xfs_merkle_key_from_disk(name, namelen) >= ab->merkle_tree_size)
+ xchk_ino_set_preen(sc, sc->ip->i_ino);
+}
+#else
+static void
+xchk_xattr_verity(
+ struct xfs_scrub *sc,
+ xfs_dablk_t blkno,
+ const unsigned char *name,
+ unsigned int namelen,
+ unsigned int valuelen)
+{
+ /* Should never see verity xattrs when verity is not enabled. */
+ xchk_fblock_set_corrupt(sc, XFS_ATTR_FORK, blkno);
+}
+#endif /* CONFIG_FS_VERITY */
+
/* Extended Attributes */
/*
@@ -211,6 +341,13 @@ xchk_xattr_actor(
return -ECANCELED;
}
+ /* Check verity xattr geometry */
+ if (attr_flags & XFS_ATTR_VERITY) {
+ xchk_xattr_verity(sc, args.blkno, name, namelen, valuelen);
+ if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return -ECANCELED;
+ }
+
/*
* Local and shortform xattr values are stored in the attr leaf block,
* so we don't need to retrieve the value from a remote block to detect
diff --git a/fs/xfs/scrub/attr.h b/fs/xfs/scrub/attr.h
index 7db58af56646b..40b8c12384f55 100644
--- a/fs/xfs/scrub/attr.h
+++ b/fs/xfs/scrub/attr.h
@@ -22,6 +22,12 @@ struct xchk_xattr_buf {
/* Memory buffer used to extract xattr values. */
void *value;
size_t value_sz;
+
+#ifdef CONFIG_FS_VERITY
+ /* Geometry of the merkle tree attached to this verity file. */
+ u64 merkle_tree_size;
+ unsigned int merkle_blocksize;
+#endif
};
bool xchk_xattr_set_map(struct xfs_scrub *sc, unsigned long *map,
diff --git a/fs/xfs/scrub/attr_repair.c b/fs/xfs/scrub/attr_repair.c
index 7c5e52ceae82e..040138610ae94 100644
--- a/fs/xfs/scrub/attr_repair.c
+++ b/fs/xfs/scrub/attr_repair.c
@@ -29,6 +29,7 @@
#include "xfs_exchrange.h"
#include "xfs_acl.h"
#include "xfs_parent.h"
+#include "xfs_verity.h"
#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
@@ -159,6 +160,44 @@ xrep_setup_xattr(
return xrep_tempfile_create(sc, S_IFREG);
}
+#ifdef CONFIG_FS_VERITY
+static int
+xrep_xattr_want_salvage_verity(
+ struct xrep_xattr *rx,
+ const void *name,
+ int namelen,
+ int valuelen)
+{
+ struct xchk_xattr_buf *ab = rx->sc->buf;
+
+ if (!xfs_has_verity(rx->sc->mp))
+ return false;
+ if (!IS_VERITY(VFS_I(rx->sc->ip)))
+ return false;
+
+ switch (namelen) {
+ case sizeof(struct xfs_merkle_key):
+ /* Oversized blocks are not allowed */
+ if (valuelen > ab->merkle_blocksize)
+ return false;
+ break;
+ case XFS_VERITY_DESCRIPTOR_NAME_LEN:
+ /* Has to match the descriptor xattr name */
+ return !memcmp(name, XFS_VERITY_DESCRIPTOR_NAME, namelen);
+ default:
+ return false;
+ }
+
+ /*
+ * Merkle tree blocks beyond the end of the tree are leftovers from
+ * a previous failed attempt to enable verity.
+ */
+ return xfs_merkle_key_from_disk(name, namelen) < ab->merkle_tree_size;
+}
+#else
+# define xrep_xattr_want_salvage_verity(...) (false)
+#endif /* CONFIG_FS_VERITY */
+
/*
* Decide if we want to salvage this attribute. We don't bother with
* incomplete or oversized keys or values. The @value parameter can be null
@@ -183,6 +222,9 @@ xrep_xattr_want_salvage(
return false;
if (attr_flags & XFS_ATTR_PARENT)
return xfs_parent_valuecheck(rx->sc->mp, value, valuelen);
+ if (attr_flags & XFS_ATTR_VERITY)
+ return xrep_xattr_want_salvage_verity(rx, name, namelen,
+ valuelen);
return true;
}
@@ -216,6 +258,11 @@ xrep_xattr_salvage_key(
trace_xrep_xattr_salvage_pptr(rx->sc->ip, flags, name,
key.namelen, value, valuelen);
+ } else if (flags & XFS_ATTR_VERITY) {
+ key.namelen = namelen;
+
+ trace_xrep_xattr_salvage_verity(rx->sc->ip, flags, name,
+ key.namelen, value, valuelen);
} else {
while (i < namelen && name[i] != 0)
i++;
@@ -667,6 +714,9 @@ xrep_xattr_insert_rec(
trace_xrep_xattr_insert_pptr(rx->sc->tempip, key->flags,
ab->name, key->namelen, ab->value,
key->valuelen);
+ else if (key->flags & XFS_ATTR_VERITY)
+ trace_xrep_xattr_insert_verity(rx->sc->ip, key->flags, ab->name,
+ key->namelen, ab->value, key->valuelen);
else
trace_xrep_xattr_insert_rec(rx->sc->tempip, key->flags,
ab->name, key->namelen, key->valuelen);
diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c
index 6d8acb2f63d8a..69c234f2a4b32 100644
--- a/fs/xfs/scrub/trace.c
+++ b/fs/xfs/scrub/trace.c
@@ -22,6 +22,7 @@
#include "xfs_parent.h"
#include "xfs_imeta.h"
#include "xfs_rtgroup.h"
+#include "xfs_verity.h"
#include "scrub/scrub.h"
#include "scrub/xfile.h"
#include "scrub/xfarray.h"
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index 6fd91c13f25ff..787f409799a06 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -3069,6 +3069,37 @@ DEFINE_EVENT(xrep_pptr_salvage_class, name, \
DEFINE_XREP_PPTR_SALVAGE_EVENT(xrep_xattr_salvage_pptr);
DEFINE_XREP_PPTR_SALVAGE_EVENT(xrep_xattr_insert_pptr);
+DECLARE_EVENT_CLASS(xrep_verity_salvage_class,
+ TP_PROTO(struct xfs_inode *ip, unsigned int flags, const void *name,
+ unsigned int namelen, const void *value, unsigned int valuelen),
+ TP_ARGS(ip, flags, name, namelen, value, valuelen),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(unsigned long long, merkle_off)
+ ),
+ TP_fast_assign(
+ __entry->dev = ip->i_mount->m_super->s_dev;
+ __entry->ino = ip->i_ino;
+ if (namelen == sizeof(struct xfs_merkle_key))
+ __entry->merkle_off = xfs_merkle_key_from_disk(name,
+ namelen);
+ else
+ __entry->merkle_off = -1ULL;
+ ),
+ TP_printk("dev %d:%d ino 0x%llx merkle_off 0x%llx",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __entry->merkle_off)
+)
+#define DEFINE_XREP_VERITY_SALVAGE_EVENT(name) \
+DEFINE_EVENT(xrep_verity_salvage_class, name, \
+ TP_PROTO(struct xfs_inode *ip, unsigned int flags, const void *name, \
+ unsigned int namelen, const void *value, unsigned int valuelen), \
+ TP_ARGS(ip, flags, name, namelen, value, valuelen))
+DEFINE_XREP_VERITY_SALVAGE_EVENT(xrep_xattr_salvage_verity);
+DEFINE_XREP_VERITY_SALVAGE_EVENT(xrep_xattr_insert_verity);
+
TRACE_EVENT(xrep_xattr_class,
TP_PROTO(struct xfs_inode *ip, struct xfs_inode *arg_ip),
TP_ARGS(ip, arg_ip),
^ permalink raw reply related [flat|nested] 111+ messages in thread
* Re: [PATCH 24/29] xfs: teach online repair to evaluate fsverity xattrs
2024-03-30 0:42 ` [PATCH 24/29] xfs: teach online repair to evaluate fsverity xattrs Darrick J. Wong
@ 2024-04-02 15:42 ` Andrey Albershteyn
2024-04-02 16:42 ` Darrick J. Wong
0 siblings, 1 reply; 111+ messages in thread
From: Andrey Albershteyn @ 2024-04-02 15:42 UTC (permalink / raw)
To: Darrick J. Wong; +Cc: ebiggers, linux-xfs, linux-fsdevel, fsverity
On 2024-03-29 17:42:19, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
>
> Teach online repair to check for unused fsverity metadata and purge it
> on reconstruction.
>
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> ---
> fs/xfs/scrub/attr.c | 139 ++++++++++++++++++++++++++++++++++++++++++++
> fs/xfs/scrub/attr.h | 6 ++
> fs/xfs/scrub/attr_repair.c | 50 ++++++++++++++++
> fs/xfs/scrub/trace.c | 1
> fs/xfs/scrub/trace.h | 31 ++++++++++
> 5 files changed, 226 insertions(+), 1 deletion(-)
>
>
> diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c
> index 2e8a2b2e82fbd..be121625c14f0 100644
> --- a/fs/xfs/scrub/attr.c
> +++ b/fs/xfs/scrub/attr.c
> @@ -18,6 +18,7 @@
> #include "xfs_attr_leaf.h"
> #include "xfs_attr_sf.h"
> #include "xfs_parent.h"
> +#include "xfs_verity.h"
> #include "scrub/scrub.h"
> #include "scrub/common.h"
> #include "scrub/dabtree.h"
> @@ -25,6 +26,8 @@
> #include "scrub/listxattr.h"
> #include "scrub/repair.h"
>
> +#include <linux/fsverity.h>
> +
> /* Free the buffers linked from the xattr buffer. */
> static void
> xchk_xattr_buf_cleanup(
> @@ -126,6 +129,53 @@ xchk_setup_xattr_buf(
> return 0;
> }
>
> +#ifdef CONFIG_FS_VERITY
> +/*
> + * Obtain merkle tree geometry information for a verity file so that we can
> + * perform sanity checks of the fsverity xattrs.
> + */
> +STATIC int
> +xchk_xattr_setup_verity(
> + struct xfs_scrub *sc)
> +{
> + struct xchk_xattr_buf *ab;
> + int error;
> +
> + /*
> + * Drop the ILOCK and the transaction because loading the fsverity
> + * metadata will call into the xattr code. S_VERITY is enabled with
> + * IOLOCK_EXCL held, so it should not change here.
> + */
> + xchk_iunlock(sc, XFS_ILOCK_EXCL);
> + xchk_trans_cancel(sc);
> +
> + error = xchk_setup_xattr_buf(sc, 0);
> + if (error)
> + return error;
> +
> + ab = sc->buf;
> + error = fsverity_merkle_tree_geometry(VFS_I(sc->ip),
> + &ab->merkle_blocksize, &ab->merkle_tree_size);
> + if (error == -ENODATA || error == -EFSCORRUPTED) {
> + /* fsverity metadata corrupt, cannot complete checks */
> + xchk_set_incomplete(sc);
> + ab->merkle_blocksize = 0;
> + error = 0;
> + }
> + if (error)
> + return error;
> +
> + error = xchk_trans_alloc(sc, 0);
> + if (error)
> + return error;
> +
> + xchk_ilock(sc, XFS_ILOCK_EXCL);
> + return 0;
> +}
> +#else
> +# define xchk_xattr_setup_verity(...) (0)
> +#endif /* CONFIG_FS_VERITY */
> +
> /* Set us up to scrub an inode's extended attributes. */
> int
> xchk_setup_xattr(
> @@ -150,9 +200,89 @@ xchk_setup_xattr(
> return error;
> }
>
> - return xchk_setup_inode_contents(sc, 0);
> + error = xchk_setup_inode_contents(sc, 0);
> + if (error)
> + return error;
> +
> + if (IS_VERITY(VFS_I(sc->ip))) {
> + error = xchk_xattr_setup_verity(sc);
> + if (error)
> + return error;
> + }
> +
> + return error;
> }
>
> +#ifdef CONFIG_FS_VERITY
> +/* Check the merkle tree xattrs. */
> +STATIC void
> +xchk_xattr_verity(
> + struct xfs_scrub *sc,
> + xfs_dablk_t blkno,
> + const unsigned char *name,
> + unsigned int namelen,
> + unsigned int valuelen)
> +{
> + struct xchk_xattr_buf *ab = sc->buf;
> +
> + /* Non-verity filesystems should never have verity xattrs. */
> + if (!xfs_has_verity(sc->mp)) {
> + xchk_fblock_set_corrupt(sc, XFS_ATTR_FORK, blkno);
> + return;
> + }
> +
> + /*
> + * Any verity metadata on a non-verity file are leftovers from a
> + * previous attempt to enable verity.
> + */
> + if (!IS_VERITY(VFS_I(sc->ip))) {
> + xchk_ino_set_preen(sc, sc->ip->i_ino);
> + return;
> + }
> +
> + /* Zero blocksize occurs if we couldn't load the merkle tree data. */
> + if (ab->merkle_blocksize == 0)
> + return;
> +
> + switch (namelen) {
> + case sizeof(struct xfs_merkle_key):
> + /* Oversized blocks are not allowed */
> + if (valuelen > ab->merkle_blocksize) {
> + xchk_fblock_set_corrupt(sc, XFS_ATTR_FORK, blkno);
> + return;
> + }
> + break;
> + case XFS_VERITY_DESCRIPTOR_NAME_LEN:
> + /* Has to match the descriptor xattr name */
> + if (memcmp(name, XFS_VERITY_DESCRIPTOR_NAME, namelen))
> + xchk_fblock_set_corrupt(sc, XFS_ATTR_FORK, blkno);
> + return;
> + default:
> + xchk_fblock_set_corrupt(sc, XFS_ATTR_FORK, blkno);
> + return;
> + }
> +
> + /*
> + * Merkle tree blocks beyond the end of the tree are leftovers from
> + * a previous failed attempt to enable verity.
> + */
> + if (xfs_merkle_key_from_disk(name, namelen) >= ab->merkle_tree_size)
> + xchk_ino_set_preen(sc, sc->ip->i_ino);
The other case which probably can be detected is if we start
removing the tree and it gets interrupted (starting blocks missing).
This can be checked by iterating over the xattrs names up to
->merkle_tree_size. But I'm not sure if online repair can store
state over xattrs validation.
Also, only pair of valid descriptor and valid tree is something of
use, but I'm not sure if all of this is in scope of online repair.
Otherwise, looks good to me:
Reviewed-by: Andrey Albershteyn <aalbersh@redhat.com>
--
- Andrey
^ permalink raw reply [flat|nested] 111+ messages in thread
* Re: [PATCH 24/29] xfs: teach online repair to evaluate fsverity xattrs
2024-04-02 15:42 ` Andrey Albershteyn
@ 2024-04-02 16:42 ` Darrick J. Wong
0 siblings, 0 replies; 111+ messages in thread
From: Darrick J. Wong @ 2024-04-02 16:42 UTC (permalink / raw)
To: Andrey Albershteyn; +Cc: ebiggers, linux-xfs, linux-fsdevel, fsverity
On Tue, Apr 02, 2024 at 05:42:04PM +0200, Andrey Albershteyn wrote:
> On 2024-03-29 17:42:19, Darrick J. Wong wrote:
> > From: Darrick J. Wong <djwong@kernel.org>
> >
> > Teach online repair to check for unused fsverity metadata and purge it
> > on reconstruction.
> >
> > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > ---
> > fs/xfs/scrub/attr.c | 139 ++++++++++++++++++++++++++++++++++++++++++++
> > fs/xfs/scrub/attr.h | 6 ++
> > fs/xfs/scrub/attr_repair.c | 50 ++++++++++++++++
> > fs/xfs/scrub/trace.c | 1
> > fs/xfs/scrub/trace.h | 31 ++++++++++
> > 5 files changed, 226 insertions(+), 1 deletion(-)
> >
> >
> > diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c
> > index 2e8a2b2e82fbd..be121625c14f0 100644
> > --- a/fs/xfs/scrub/attr.c
> > +++ b/fs/xfs/scrub/attr.c
> > @@ -18,6 +18,7 @@
> > #include "xfs_attr_leaf.h"
> > #include "xfs_attr_sf.h"
> > #include "xfs_parent.h"
> > +#include "xfs_verity.h"
> > #include "scrub/scrub.h"
> > #include "scrub/common.h"
> > #include "scrub/dabtree.h"
> > @@ -25,6 +26,8 @@
> > #include "scrub/listxattr.h"
> > #include "scrub/repair.h"
> >
> > +#include <linux/fsverity.h>
> > +
> > /* Free the buffers linked from the xattr buffer. */
> > static void
> > xchk_xattr_buf_cleanup(
> > @@ -126,6 +129,53 @@ xchk_setup_xattr_buf(
> > return 0;
> > }
> >
> > +#ifdef CONFIG_FS_VERITY
> > +/*
> > + * Obtain merkle tree geometry information for a verity file so that we can
> > + * perform sanity checks of the fsverity xattrs.
> > + */
> > +STATIC int
> > +xchk_xattr_setup_verity(
> > + struct xfs_scrub *sc)
> > +{
> > + struct xchk_xattr_buf *ab;
> > + int error;
> > +
> > + /*
> > + * Drop the ILOCK and the transaction because loading the fsverity
> > + * metadata will call into the xattr code. S_VERITY is enabled with
> > + * IOLOCK_EXCL held, so it should not change here.
> > + */
> > + xchk_iunlock(sc, XFS_ILOCK_EXCL);
> > + xchk_trans_cancel(sc);
> > +
> > + error = xchk_setup_xattr_buf(sc, 0);
> > + if (error)
> > + return error;
> > +
> > + ab = sc->buf;
> > + error = fsverity_merkle_tree_geometry(VFS_I(sc->ip),
> > + &ab->merkle_blocksize, &ab->merkle_tree_size);
> > + if (error == -ENODATA || error == -EFSCORRUPTED) {
> > + /* fsverity metadata corrupt, cannot complete checks */
> > + xchk_set_incomplete(sc);
> > + ab->merkle_blocksize = 0;
> > + error = 0;
> > + }
> > + if (error)
> > + return error;
> > +
> > + error = xchk_trans_alloc(sc, 0);
> > + if (error)
> > + return error;
> > +
> > + xchk_ilock(sc, XFS_ILOCK_EXCL);
> > + return 0;
> > +}
> > +#else
> > +# define xchk_xattr_setup_verity(...) (0)
> > +#endif /* CONFIG_FS_VERITY */
> > +
> > /* Set us up to scrub an inode's extended attributes. */
> > int
> > xchk_setup_xattr(
> > @@ -150,9 +200,89 @@ xchk_setup_xattr(
> > return error;
> > }
> >
> > - return xchk_setup_inode_contents(sc, 0);
> > + error = xchk_setup_inode_contents(sc, 0);
> > + if (error)
> > + return error;
> > +
> > + if (IS_VERITY(VFS_I(sc->ip))) {
> > + error = xchk_xattr_setup_verity(sc);
> > + if (error)
> > + return error;
> > + }
> > +
> > + return error;
> > }
> >
> > +#ifdef CONFIG_FS_VERITY
> > +/* Check the merkle tree xattrs. */
> > +STATIC void
> > +xchk_xattr_verity(
> > + struct xfs_scrub *sc,
> > + xfs_dablk_t blkno,
> > + const unsigned char *name,
> > + unsigned int namelen,
> > + unsigned int valuelen)
> > +{
> > + struct xchk_xattr_buf *ab = sc->buf;
> > +
> > + /* Non-verity filesystems should never have verity xattrs. */
> > + if (!xfs_has_verity(sc->mp)) {
> > + xchk_fblock_set_corrupt(sc, XFS_ATTR_FORK, blkno);
> > + return;
> > + }
> > +
> > + /*
> > + * Any verity metadata on a non-verity file are leftovers from a
> > + * previous attempt to enable verity.
> > + */
> > + if (!IS_VERITY(VFS_I(sc->ip))) {
> > + xchk_ino_set_preen(sc, sc->ip->i_ino);
> > + return;
> > + }
> > +
> > + /* Zero blocksize occurs if we couldn't load the merkle tree data. */
> > + if (ab->merkle_blocksize == 0)
> > + return;
> > +
> > + switch (namelen) {
> > + case sizeof(struct xfs_merkle_key):
> > + /* Oversized blocks are not allowed */
> > + if (valuelen > ab->merkle_blocksize) {
> > + xchk_fblock_set_corrupt(sc, XFS_ATTR_FORK, blkno);
> > + return;
> > + }
> > + break;
> > + case XFS_VERITY_DESCRIPTOR_NAME_LEN:
> > + /* Has to match the descriptor xattr name */
> > + if (memcmp(name, XFS_VERITY_DESCRIPTOR_NAME, namelen))
> > + xchk_fblock_set_corrupt(sc, XFS_ATTR_FORK, blkno);
> > + return;
> > + default:
> > + xchk_fblock_set_corrupt(sc, XFS_ATTR_FORK, blkno);
> > + return;
> > + }
> > +
> > + /*
> > + * Merkle tree blocks beyond the end of the tree are leftovers from
> > + * a previous failed attempt to enable verity.
> > + */
> > + if (xfs_merkle_key_from_disk(name, namelen) >= ab->merkle_tree_size)
> > + xchk_ino_set_preen(sc, sc->ip->i_ino);
>
> The other case which probably can be detected is if we start
> removing the tree and it gets interrupted (starting blocks missing).
> This can be checked by iterating over the xattrs names up to
> ->merkle_tree_size. But I'm not sure if online repair can store
> state over xattrs validation.
It can; you'd just have to amend the xchk_xattr_buf to store whatever
extra data you want. That said, if IS_VERITY() isn't true, then we'll
flag the xattr structure for any XFS_ATTR_VERITY attrs:
/*
* Any verity metadata on a non-verity file are leftovers from a
* previous attempt to enable verity.
*/
if (!IS_VERITY(VFS_I(sc->ip))) {
xchk_ino_set_preen(sc, sc->ip->i_ino);
return;
}
And attr_repair.c will not salvage the attrs when it reconstructs the
attr structure.
> Also, only pair of valid descriptor and valid tree is something of
> use, but I'm not sure if all of this is in scope of online repair.
Not here -- the xfsprogs verity patchset amends xfs_scrub phase 6 to
look for verity files so that it can open them and read the contents to
see if any IO errors occur. That will catch missing/inconsistent bits
in the fsverity metadata.
> Otherwise, looks good to me:
> Reviewed-by: Andrey Albershteyn <aalbersh@redhat.com>
Thanks!
--D
> --
> - Andrey
>
>
^ permalink raw reply [flat|nested] 111+ messages in thread
* [PATCH 25/29] xfs: report verity failures through the health system
2024-03-30 0:32 ` [PATCHSET v5.5 2/2] xfs: fs-verity support Darrick J. Wong
` (23 preceding siblings ...)
2024-03-30 0:42 ` [PATCH 24/29] xfs: teach online repair to evaluate fsverity xattrs Darrick J. Wong
@ 2024-03-30 0:42 ` Darrick J. Wong
2024-04-02 16:16 ` Andrey Albershteyn
2024-03-30 0:42 ` [PATCH 26/29] xfs: clear the verity iflag when not appropriate Darrick J. Wong
` (3 subsequent siblings)
28 siblings, 1 reply; 111+ messages in thread
From: Darrick J. Wong @ 2024-03-30 0:42 UTC (permalink / raw)
To: djwong, ebiggers, aalbersh; +Cc: linux-xfs, linux-fsdevel, fsverity
From: Darrick J. Wong <djwong@kernel.org>
Record verity failures and report them through the health system.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
fs/xfs/libxfs/xfs_fs.h | 1 +
fs/xfs/libxfs/xfs_health.h | 4 +++-
fs/xfs/xfs_fsverity.c | 11 +++++++++++
fs/xfs/xfs_health.c | 1 +
4 files changed, 16 insertions(+), 1 deletion(-)
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index af45a246eb1c1..d22f3423ddc76 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -426,6 +426,7 @@ struct xfs_bulkstat {
#define XFS_BS_SICK_SYMLINK (1 << 6) /* symbolic link remote target */
#define XFS_BS_SICK_PARENT (1 << 7) /* parent pointers */
#define XFS_BS_SICK_DIRTREE (1 << 8) /* directory tree structure */
+#define XFS_BS_SICK_DATA (1 << 9) /* file data */
/*
* Project quota id helpers (previously projid was 16bit only
diff --git a/fs/xfs/libxfs/xfs_health.h b/fs/xfs/libxfs/xfs_health.h
index 89b80e957917e..0f8533335e25f 100644
--- a/fs/xfs/libxfs/xfs_health.h
+++ b/fs/xfs/libxfs/xfs_health.h
@@ -105,6 +105,7 @@ struct xfs_rtgroup;
/* Don't propagate sick status to ag health summary during inactivation */
#define XFS_SICK_INO_FORGET (1 << 12)
#define XFS_SICK_INO_DIRTREE (1 << 13) /* directory tree structure */
+#define XFS_SICK_INO_DATA (1 << 14) /* file data */
/* Primary evidence of health problems in a given group. */
#define XFS_SICK_FS_PRIMARY (XFS_SICK_FS_COUNTERS | \
@@ -143,7 +144,8 @@ struct xfs_rtgroup;
XFS_SICK_INO_XATTR | \
XFS_SICK_INO_SYMLINK | \
XFS_SICK_INO_PARENT | \
- XFS_SICK_INO_DIRTREE)
+ XFS_SICK_INO_DIRTREE | \
+ XFS_SICK_INO_DATA)
#define XFS_SICK_INO_ZAPPED (XFS_SICK_INO_BMBTD_ZAPPED | \
XFS_SICK_INO_BMBTA_ZAPPED | \
diff --git a/fs/xfs/xfs_fsverity.c b/fs/xfs/xfs_fsverity.c
index 2806466ceaeab..bfa5c70beec24 100644
--- a/fs/xfs/xfs_fsverity.c
+++ b/fs/xfs/xfs_fsverity.c
@@ -21,6 +21,7 @@
#include "xfs_quota.h"
#include "xfs_fsverity.h"
#include "xfs_icache.h"
+#include "xfs_health.h"
#include <linux/fsverity.h>
/*
@@ -773,6 +774,15 @@ xfs_fsverity_drop_merkle(
block->context = NULL;
}
+static void
+xfs_fsverity_fail_validation(
+ struct inode *inode,
+ loff_t pos,
+ size_t len)
+{
+ xfs_inode_mark_sick(XFS_I(inode), XFS_SICK_INO_DATA);
+}
+
const struct fsverity_operations xfs_fsverity_ops = {
.begin_enable_verity = xfs_fsverity_begin_enable,
.end_enable_verity = xfs_fsverity_end_enable,
@@ -780,4 +790,5 @@ const struct fsverity_operations xfs_fsverity_ops = {
.read_merkle_tree_block = xfs_fsverity_read_merkle,
.write_merkle_tree_block = xfs_fsverity_write_merkle,
.drop_merkle_tree_block = xfs_fsverity_drop_merkle,
+ .fail_validation = xfs_fsverity_fail_validation,
};
diff --git a/fs/xfs/xfs_health.c b/fs/xfs/xfs_health.c
index 33059d979857a..ce7385c207d37 100644
--- a/fs/xfs/xfs_health.c
+++ b/fs/xfs/xfs_health.c
@@ -591,6 +591,7 @@ static const struct ioctl_sick_map ino_map[] = {
{ XFS_SICK_INO_DIR_ZAPPED, XFS_BS_SICK_DIR },
{ XFS_SICK_INO_SYMLINK_ZAPPED, XFS_BS_SICK_SYMLINK },
{ XFS_SICK_INO_DIRTREE, XFS_BS_SICK_DIRTREE },
+ { XFS_SICK_INO_DATA, XFS_BS_SICK_DATA },
{ 0, 0 },
};
^ permalink raw reply related [flat|nested] 111+ messages in thread
* [PATCH 26/29] xfs: clear the verity iflag when not appropriate
2024-03-30 0:32 ` [PATCHSET v5.5 2/2] xfs: fs-verity support Darrick J. Wong
` (24 preceding siblings ...)
2024-03-30 0:42 ` [PATCH 25/29] xfs: report verity failures through the health system Darrick J. Wong
@ 2024-03-30 0:42 ` Darrick J. Wong
2024-04-02 16:26 ` Andrey Albershteyn
2024-03-30 0:43 ` [PATCH 27/29] xfs: make it possible to disable fsverity Darrick J. Wong
` (2 subsequent siblings)
28 siblings, 1 reply; 111+ messages in thread
From: Darrick J. Wong @ 2024-03-30 0:42 UTC (permalink / raw)
To: djwong, ebiggers, aalbersh; +Cc: linux-xfs, linux-fsdevel, fsverity
From: Darrick J. Wong <djwong@kernel.org>
Clear the verity inode flag if the fs doesn't support verity or if it
isn't a regular file. This will clean up a busted inode enough that we
will be able to iget it.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
fs/xfs/scrub/inode_repair.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/fs/xfs/scrub/inode_repair.c b/fs/xfs/scrub/inode_repair.c
index fb8d1ba1f35c0..30e62f00a17a6 100644
--- a/fs/xfs/scrub/inode_repair.c
+++ b/fs/xfs/scrub/inode_repair.c
@@ -566,6 +566,8 @@ xrep_dinode_flags(
dip->di_nrext64_pad = 0;
else if (dip->di_version >= 3)
dip->di_v3_pad = 0;
+ if (!xfs_has_verity(mp) || !S_ISREG(mode))
+ flags2 &= ~XFS_DIFLAG2_VERITY;
if (flags2 & XFS_DIFLAG2_METADIR) {
xfs_failaddr_t fa;
^ permalink raw reply related [flat|nested] 111+ messages in thread
* Re: [PATCH 26/29] xfs: clear the verity iflag when not appropriate
2024-03-30 0:42 ` [PATCH 26/29] xfs: clear the verity iflag when not appropriate Darrick J. Wong
@ 2024-04-02 16:26 ` Andrey Albershteyn
0 siblings, 0 replies; 111+ messages in thread
From: Andrey Albershteyn @ 2024-04-02 16:26 UTC (permalink / raw)
To: Darrick J. Wong; +Cc: ebiggers, linux-xfs, linux-fsdevel, fsverity
On 2024-03-29 17:42:50, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
>
> Clear the verity inode flag if the fs doesn't support verity or if it
> isn't a regular file. This will clean up a busted inode enough that we
> will be able to iget it.
>
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> ---
> fs/xfs/scrub/inode_repair.c | 2 ++
> 1 file changed, 2 insertions(+)
>
>
> diff --git a/fs/xfs/scrub/inode_repair.c b/fs/xfs/scrub/inode_repair.c
> index fb8d1ba1f35c0..30e62f00a17a6 100644
> --- a/fs/xfs/scrub/inode_repair.c
> +++ b/fs/xfs/scrub/inode_repair.c
> @@ -566,6 +566,8 @@ xrep_dinode_flags(
> dip->di_nrext64_pad = 0;
> else if (dip->di_version >= 3)
> dip->di_v3_pad = 0;
> + if (!xfs_has_verity(mp) || !S_ISREG(mode))
> + flags2 &= ~XFS_DIFLAG2_VERITY;
>
> if (flags2 & XFS_DIFLAG2_METADIR) {
> xfs_failaddr_t fa;
>
Looks good to me:
Reviewed-by: Andrey Albershteyn <aalbersh@redhat.com>
--
- Andrey
^ permalink raw reply [flat|nested] 111+ messages in thread
* [PATCH 27/29] xfs: make it possible to disable fsverity
2024-03-30 0:32 ` [PATCHSET v5.5 2/2] xfs: fs-verity support Darrick J. Wong
` (25 preceding siblings ...)
2024-03-30 0:42 ` [PATCH 26/29] xfs: clear the verity iflag when not appropriate Darrick J. Wong
@ 2024-03-30 0:43 ` Darrick J. Wong
2024-04-02 17:15 ` Andrey Albershteyn
2024-04-02 23:25 ` Eric Biggers
2024-03-30 0:43 ` [PATCH 28/29] xfs: allow verity files to be opened even if the fsverity metadata is damaged Darrick J. Wong
2024-03-30 0:43 ` [PATCH 29/29] xfs: enable ro-compat fs-verity flag Darrick J. Wong
28 siblings, 2 replies; 111+ messages in thread
From: Darrick J. Wong @ 2024-03-30 0:43 UTC (permalink / raw)
To: djwong, ebiggers, aalbersh; +Cc: linux-xfs, linux-fsdevel, fsverity
From: Darrick J. Wong <djwong@kernel.org>
Create an experimental ioctl so that we can turn off fsverity.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
fs/xfs/libxfs/xfs_fs_staging.h | 3 ++
fs/xfs/xfs_fsverity.c | 73 ++++++++++++++++++++++++++++++++++++++++
fs/xfs/xfs_fsverity.h | 3 ++
fs/xfs/xfs_ioctl.c | 6 +++
4 files changed, 85 insertions(+)
diff --git a/fs/xfs/libxfs/xfs_fs_staging.h b/fs/xfs/libxfs/xfs_fs_staging.h
index 899a56a569d50..4c29167a2b190 100644
--- a/fs/xfs/libxfs/xfs_fs_staging.h
+++ b/fs/xfs/libxfs/xfs_fs_staging.h
@@ -229,4 +229,7 @@ struct xfs_map_freesp {
*/
#define XFS_IOC_MAP_FREESP _IOWR('X', 64, struct xfs_map_freesp)
+/* Turn off fs-verity */
+#define FS_IOC_DISABLE_VERITY _IO('f', 133)
+
#endif /* __XFS_FS_STAGING_H__ */
diff --git a/fs/xfs/xfs_fsverity.c b/fs/xfs/xfs_fsverity.c
index bfa5c70beec24..f57d8acbd858a 100644
--- a/fs/xfs/xfs_fsverity.c
+++ b/fs/xfs/xfs_fsverity.c
@@ -792,3 +792,76 @@ const struct fsverity_operations xfs_fsverity_ops = {
.drop_merkle_tree_block = xfs_fsverity_drop_merkle,
.fail_validation = xfs_fsverity_fail_validation,
};
+
+/* Turn off fs-verity. */
+int
+xfs_fsverity_disable(
+ struct file *file)
+{
+ struct inode *inode = file_inode(file);
+ struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_trans *tp;
+ u64 merkle_tree_size;
+ unsigned int merkle_blocksize;
+ int error;
+
+ BUILD_BUG_ON(FS_IOC_DISABLE_VERITY == FS_IOC_ENABLE_VERITY);
+
+ if (!xfs_has_verity(mp))
+ return -EOPNOTSUPP;
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ xfs_ilock(ip, XFS_IOLOCK_EXCL);
+
+ if (!IS_VERITY(inode)) {
+ error = 0;
+ goto out_iolock;
+ }
+
+ if (xfs_iflags_test(ip, XFS_VERITY_CONSTRUCTION)) {
+ error = -EBUSY;
+ goto out_iolock;
+ }
+
+ error = xfs_qm_dqattach(ip);
+ if (error)
+ goto out_iolock;
+
+ error = fsverity_merkle_tree_geometry(inode, &merkle_blocksize,
+ &merkle_tree_size);
+ if (error)
+ goto out_iolock;
+
+ xfs_fsverity_cache_drop(ip);
+
+ /* Clear fsverity inode flag */
+ error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_ichange, 0, 0, false,
+ &tp);
+ if (error)
+ goto out_iolock;
+
+ ip->i_diflags2 &= ~XFS_DIFLAG2_VERITY;
+
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+ xfs_trans_set_sync(tp);
+
+ error = xfs_trans_commit(tp);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ if (error)
+ goto out_iolock;
+
+ inode->i_flags &= ~S_VERITY;
+ fsverity_cleanup_inode(inode);
+
+ /* Remove the fsverity xattrs. */
+ error = xfs_fsverity_delete_metadata(ip, merkle_tree_size,
+ merkle_blocksize);
+ if (error)
+ goto out_iolock;
+
+out_iolock:
+ xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+ return error;
+}
diff --git a/fs/xfs/xfs_fsverity.h b/fs/xfs/xfs_fsverity.h
index 21ba0d82f26d8..4b9fff6b0d2c4 100644
--- a/fs/xfs/xfs_fsverity.h
+++ b/fs/xfs/xfs_fsverity.h
@@ -17,6 +17,8 @@ struct xfs_icwalk;
int xfs_fsverity_scan_inode(struct xfs_inode *ip, struct xfs_icwalk *icw);
extern const struct fsverity_operations xfs_fsverity_ops;
+
+int xfs_fsverity_disable(struct file *file);
#else
# define xfs_fsverity_cache_init(ip) ((void)0)
# define xfs_fsverity_cache_drop(ip) ((void)0)
@@ -24,6 +26,7 @@ extern const struct fsverity_operations xfs_fsverity_ops;
# define xfs_fsverity_register_shrinker(mp) (0)
# define xfs_fsverity_unregister_shrinker(mp) ((void)0)
# define xfs_fsverity_scan_inode(ip, icw) (0)
+# define xfs_fsverity_disable(ip) (-EOPNOTSUPP)
#endif /* CONFIG_FS_VERITY */
#endif /* __XFS_FSVERITY_H__ */
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 0aa0ceb9ec153..24deaaf5eb0f5 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -44,6 +44,7 @@
#include "xfs_file.h"
#include "xfs_exchrange.h"
#include "xfs_rtgroup.h"
+#include "xfs_fsverity.h"
#include <linux/mount.h>
#include <linux/namei.h>
@@ -2712,6 +2713,11 @@ xfs_file_ioctl(
case XFS_IOC_MAP_FREESP:
return xfs_ioc_map_freesp(filp, arg);
+#ifdef CONFIG_XFS_EXPERIMENTAL_IOCTLS
+ case FS_IOC_DISABLE_VERITY:
+ return xfs_fsverity_disable(filp);
+#endif
+
case FS_IOC_ENABLE_VERITY:
if (!xfs_has_verity(mp))
return -EOPNOTSUPP;
^ permalink raw reply related [flat|nested] 111+ messages in thread
* Re: [PATCH 27/29] xfs: make it possible to disable fsverity
2024-03-30 0:43 ` [PATCH 27/29] xfs: make it possible to disable fsverity Darrick J. Wong
@ 2024-04-02 17:15 ` Andrey Albershteyn
2024-04-02 23:25 ` Eric Biggers
1 sibling, 0 replies; 111+ messages in thread
From: Andrey Albershteyn @ 2024-04-02 17:15 UTC (permalink / raw)
To: Darrick J. Wong; +Cc: ebiggers, linux-xfs, linux-fsdevel, fsverity
On 2024-03-29 17:43:06, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
>
> Create an experimental ioctl so that we can turn off fsverity.
>
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> ---
> fs/xfs/libxfs/xfs_fs_staging.h | 3 ++
> fs/xfs/xfs_fsverity.c | 73 ++++++++++++++++++++++++++++++++++++++++
> fs/xfs/xfs_fsverity.h | 3 ++
> fs/xfs/xfs_ioctl.c | 6 +++
> 4 files changed, 85 insertions(+)
>
>
Looks good to me:
Reviewed-by: Andrey Albershteyn <aalbersh@redhat.com>
--
- Andrey
^ permalink raw reply [flat|nested] 111+ messages in thread
* Re: [PATCH 27/29] xfs: make it possible to disable fsverity
2024-03-30 0:43 ` [PATCH 27/29] xfs: make it possible to disable fsverity Darrick J. Wong
2024-04-02 17:15 ` Andrey Albershteyn
@ 2024-04-02 23:25 ` Eric Biggers
2024-04-03 1:26 ` Darrick J. Wong
1 sibling, 1 reply; 111+ messages in thread
From: Eric Biggers @ 2024-04-02 23:25 UTC (permalink / raw)
To: Darrick J. Wong; +Cc: aalbersh, linux-xfs, linux-fsdevel, fsverity
On Fri, Mar 29, 2024 at 05:43:06PM -0700, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
>
> Create an experimental ioctl so that we can turn off fsverity.
The concept of "experimental ioctls" seems problematic. What if people start
relying on them? Linux tends not to have "experimental" system calls, and
probably for good reason...
Also, what is the use case for this ioctl? Is it necessary to have this when
userspace can already just replace a verity file with a copy that has verity
disabled? That's less efficient, but it does not require any kernel support and
does not require CAP_SYS_ADMIN.
And of course, if do we add this ioctl it shouldn't be XFS-specific.
- Eric
^ permalink raw reply [flat|nested] 111+ messages in thread
* Re: [PATCH 27/29] xfs: make it possible to disable fsverity
2024-04-02 23:25 ` Eric Biggers
@ 2024-04-03 1:26 ` Darrick J. Wong
0 siblings, 0 replies; 111+ messages in thread
From: Darrick J. Wong @ 2024-04-03 1:26 UTC (permalink / raw)
To: Eric Biggers; +Cc: aalbersh, linux-xfs, linux-fsdevel, fsverity
On Tue, Apr 02, 2024 at 04:25:10PM -0700, Eric Biggers wrote:
> On Fri, Mar 29, 2024 at 05:43:06PM -0700, Darrick J. Wong wrote:
> > From: Darrick J. Wong <djwong@kernel.org>
> >
> > Create an experimental ioctl so that we can turn off fsverity.
>
> The concept of "experimental ioctls" seems problematic. What if people start
> relying on them? Linux tends not to have "experimental" system calls, and
> probably for good reason...
They're trapped in my enormous backlog of patches. They get this
special treatment so that I can show them to developers without anyone
getting any fancy ideas about merging them. Once I get close enough to
actually consider merging it, I'll move it out from under EXPERIMENTAL.
IOWs: I'm not planning to push xfs_fs_staging.h itself to upstream ever.
> Also, what is the use case for this ioctl? Is it necessary to have this when
> userspace can already just replace a verity file with a copy that has verity
> disabled? That's less efficient, but it does not require any kernel support and
> does not require CAP_SYS_ADMIN.
No, of course it isn't needed if replacing the file is easy. That
however assumes that replacing /is/ easy.
The use case for this is: "I enabled fsverity on my backup volume so I
could detect bitrot, then the primary disk died, and when I went to
restore the primary, I got a verity error."
Being able to read known-bad corrupted contents are less bad than losing
the entire file or having to do surgery with xfs_db to turn off
fsverity.
Just for my own convenience, this would enable me to try out fsverity in
a few places while being able to undo it quickly if <cough> we end up
changing the ondisk format during review.
> And of course, if do we add this ioctl it shouldn't be XFS-specific.
Yes, this is a proof of concept. I'd lift it to fs/verity/ if you
accept the premise.
--D
> - Eric
>
^ permalink raw reply [flat|nested] 111+ messages in thread
* [PATCH 28/29] xfs: allow verity files to be opened even if the fsverity metadata is damaged
2024-03-30 0:32 ` [PATCHSET v5.5 2/2] xfs: fs-verity support Darrick J. Wong
` (26 preceding siblings ...)
2024-03-30 0:43 ` [PATCH 27/29] xfs: make it possible to disable fsverity Darrick J. Wong
@ 2024-03-30 0:43 ` Darrick J. Wong
2024-04-02 18:04 ` Andrey Albershteyn
2024-04-02 20:00 ` Colin Walters
2024-03-30 0:43 ` [PATCH 29/29] xfs: enable ro-compat fs-verity flag Darrick J. Wong
28 siblings, 2 replies; 111+ messages in thread
From: Darrick J. Wong @ 2024-03-30 0:43 UTC (permalink / raw)
To: djwong, ebiggers, aalbersh; +Cc: linux-xfs, linux-fsdevel, fsverity
From: Darrick J. Wong <djwong@kernel.org>
There are more things that one can do with an open file descriptor on
XFS -- query extended attributes, scan for metadata damage, repair
metadata, etc. None of this is possible if the fsverity metadata are
damaged, because that prevents the file from being opened.
Ignore a selective set of error codes that we know fsverity_file_open to
return if the verity descriptor is nonsense.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
fs/iomap/buffered-io.c | 8 ++++++++
fs/xfs/xfs_file.c | 19 ++++++++++++++++++-
2 files changed, 26 insertions(+), 1 deletion(-)
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 9f9d929dfeebc..e68a15b72dbdd 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -487,6 +487,14 @@ static loff_t iomap_readpage_iter(const struct iomap_iter *iter,
size_t poff, plen;
sector_t sector;
+ /*
+ * If this verity file hasn't been activated, fail read attempts. This
+ * can happen if the calling filesystem allows files to be opened even
+ * with damaged verity metadata.
+ */
+ if (IS_VERITY(iter->inode) && !fsverity_active(iter->inode))
+ return -EIO;
+
if (iomap->type == IOMAP_INLINE)
return iomap_read_inline_data(iter, folio);
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index c0b3e8146b753..36034eaefbf55 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1431,8 +1431,25 @@ xfs_file_open(
FMODE_DIO_PARALLEL_WRITE | FMODE_CAN_ODIRECT;
error = fsverity_file_open(inode, file);
- if (error)
+ switch (error) {
+ case -EFBIG:
+ case -EINVAL:
+ case -EMSGSIZE:
+ case -EFSCORRUPTED:
+ /*
+ * Be selective about which fsverity errors we propagate to
+ * userspace; we still want to be able to open this file even
+ * if reads don't work. Someone might want to perform an
+ * online repair.
+ */
+ if (has_capability_noaudit(current, CAP_SYS_ADMIN))
+ break;
return error;
+ case 0:
+ break;
+ default:
+ return error;
+ }
return generic_file_open(inode, file);
}
^ permalink raw reply related [flat|nested] 111+ messages in thread
* Re: [PATCH 28/29] xfs: allow verity files to be opened even if the fsverity metadata is damaged
2024-03-30 0:43 ` [PATCH 28/29] xfs: allow verity files to be opened even if the fsverity metadata is damaged Darrick J. Wong
@ 2024-04-02 18:04 ` Andrey Albershteyn
2024-04-02 20:00 ` Colin Walters
1 sibling, 0 replies; 111+ messages in thread
From: Andrey Albershteyn @ 2024-04-02 18:04 UTC (permalink / raw)
To: Darrick J. Wong; +Cc: ebiggers, linux-xfs, linux-fsdevel, fsverity
On 2024-03-29 17:43:22, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
>
> There are more things that one can do with an open file descriptor on
> XFS -- query extended attributes, scan for metadata damage, repair
> metadata, etc. None of this is possible if the fsverity metadata are
> damaged, because that prevents the file from being opened.
>
> Ignore a selective set of error codes that we know fsverity_file_open to
> return if the verity descriptor is nonsense.
>
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> ---
> fs/iomap/buffered-io.c | 8 ++++++++
> fs/xfs/xfs_file.c | 19 ++++++++++++++++++-
> 2 files changed, 26 insertions(+), 1 deletion(-)
>
>
Looks good to me:
Reviewed-by: Andrey Albershteyn <aalbersh@redhat.com>
--
- Andrey
^ permalink raw reply [flat|nested] 111+ messages in thread
* Re: [PATCH 28/29] xfs: allow verity files to be opened even if the fsverity metadata is damaged
2024-03-30 0:43 ` [PATCH 28/29] xfs: allow verity files to be opened even if the fsverity metadata is damaged Darrick J. Wong
2024-04-02 18:04 ` Andrey Albershteyn
@ 2024-04-02 20:00 ` Colin Walters
2024-04-02 22:52 ` Darrick J. Wong
1 sibling, 1 reply; 111+ messages in thread
From: Colin Walters @ 2024-04-02 20:00 UTC (permalink / raw)
To: Darrick J. Wong, Eric Biggers, aalbersh; +Cc: xfs, linux-fsdevel, fsverity
On Fri, Mar 29, 2024, at 8:43 PM, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
>
> There are more things that one can do with an open file descriptor on
> XFS -- query extended attributes, scan for metadata damage, repair
> metadata, etc. None of this is possible if the fsverity metadata are
> damaged, because that prevents the file from being opened.
>
> Ignore a selective set of error codes that we know fsverity_file_open to
> return if the verity descriptor is nonsense.
>
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> ---
> fs/iomap/buffered-io.c | 8 ++++++++
> fs/xfs/xfs_file.c | 19 ++++++++++++++++++-
> 2 files changed, 26 insertions(+), 1 deletion(-)
>
>
> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> index 9f9d929dfeebc..e68a15b72dbdd 100644
> --- a/fs/iomap/buffered-io.c
> +++ b/fs/iomap/buffered-io.c
> @@ -487,6 +487,14 @@ static loff_t iomap_readpage_iter(const struct
> iomap_iter *iter,
> size_t poff, plen;
> sector_t sector;
>
> + /*
> + * If this verity file hasn't been activated, fail read attempts. This
> + * can happen if the calling filesystem allows files to be opened even
> + * with damaged verity metadata.
> + */
> + if (IS_VERITY(iter->inode) && !fsverity_active(iter->inode))
> + return -EIO;
> +
> if (iomap->type == IOMAP_INLINE)
> return iomap_read_inline_data(iter, folio);
>
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index c0b3e8146b753..36034eaefbf55 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -1431,8 +1431,25 @@ xfs_file_open(
> FMODE_DIO_PARALLEL_WRITE | FMODE_CAN_ODIRECT;
>
> error = fsverity_file_open(inode, file);
> - if (error)
> + switch (error) {
> + case -EFBIG:
> + case -EINVAL:
> + case -EMSGSIZE:
> + case -EFSCORRUPTED:
> + /*
> + * Be selective about which fsverity errors we propagate to
> + * userspace; we still want to be able to open this file even
> + * if reads don't work. Someone might want to perform an
> + * online repair.
> + */
> + if (has_capability_noaudit(current, CAP_SYS_ADMIN))
> + break;
As I understand it, fsverity (and dm-verity) are desirable in high-safety and integrity requirement cases where the goal is for the system to "fail closed" if errors in general are detected; anything that would have the system be in an ill-defined state.
A lot of ambient processes are going to have CAP_SYS_ADMIN and this will just swallow these errors for those (will things the EFSCORRUPTED path at least have been logged by a lower level function?)...whereas this is only needed just for a very few tools.
At least for composefs the quoted cases of "query extended attributes, scan for metadata damage, repair metadata" are all things that canonically live in the composefs metadata (EROFS) blob, so in theory there's a lot less of a need to query/inspect it for those use cases. (Maybe for composefs we should force canonicalize all the underlying files to have mode 0400 and no xattrs or something and add that to its repair).
I hesitate to say it but maybe there should be some ioctl for online repair use cases only, or perhaps a new O_NOVERITY special flag to openat2()?
^ permalink raw reply [flat|nested] 111+ messages in thread
* Re: [PATCH 28/29] xfs: allow verity files to be opened even if the fsverity metadata is damaged
2024-04-02 20:00 ` Colin Walters
@ 2024-04-02 22:52 ` Darrick J. Wong
2024-04-02 23:45 ` Eric Biggers
2024-04-03 0:10 ` Colin Walters
0 siblings, 2 replies; 111+ messages in thread
From: Darrick J. Wong @ 2024-04-02 22:52 UTC (permalink / raw)
To: Colin Walters; +Cc: Eric Biggers, aalbersh, xfs, linux-fsdevel, fsverity
On Tue, Apr 02, 2024 at 04:00:06PM -0400, Colin Walters wrote:
>
>
> On Fri, Mar 29, 2024, at 8:43 PM, Darrick J. Wong wrote:
> > From: Darrick J. Wong <djwong@kernel.org>
> >
> > There are more things that one can do with an open file descriptor on
> > XFS -- query extended attributes, scan for metadata damage, repair
> > metadata, etc. None of this is possible if the fsverity metadata are
> > damaged, because that prevents the file from being opened.
> >
> > Ignore a selective set of error codes that we know fsverity_file_open to
> > return if the verity descriptor is nonsense.
> >
> > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > ---
> > fs/iomap/buffered-io.c | 8 ++++++++
> > fs/xfs/xfs_file.c | 19 ++++++++++++++++++-
> > 2 files changed, 26 insertions(+), 1 deletion(-)
> >
> >
> > diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> > index 9f9d929dfeebc..e68a15b72dbdd 100644
> > --- a/fs/iomap/buffered-io.c
> > +++ b/fs/iomap/buffered-io.c
> > @@ -487,6 +487,14 @@ static loff_t iomap_readpage_iter(const struct
> > iomap_iter *iter,
> > size_t poff, plen;
> > sector_t sector;
> >
> > + /*
> > + * If this verity file hasn't been activated, fail read attempts. This
> > + * can happen if the calling filesystem allows files to be opened even
> > + * with damaged verity metadata.
> > + */
> > + if (IS_VERITY(iter->inode) && !fsverity_active(iter->inode))
> > + return -EIO;
> > +
> > if (iomap->type == IOMAP_INLINE)
> > return iomap_read_inline_data(iter, folio);
> >
> > diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> > index c0b3e8146b753..36034eaefbf55 100644
> > --- a/fs/xfs/xfs_file.c
> > +++ b/fs/xfs/xfs_file.c
> > @@ -1431,8 +1431,25 @@ xfs_file_open(
> > FMODE_DIO_PARALLEL_WRITE | FMODE_CAN_ODIRECT;
> >
> > error = fsverity_file_open(inode, file);
> > - if (error)
> > + switch (error) {
> > + case -EFBIG:
> > + case -EINVAL:
> > + case -EMSGSIZE:
> > + case -EFSCORRUPTED:
> > + /*
> > + * Be selective about which fsverity errors we propagate to
> > + * userspace; we still want to be able to open this file even
> > + * if reads don't work. Someone might want to perform an
> > + * online repair.
> > + */
> > + if (has_capability_noaudit(current, CAP_SYS_ADMIN))
> > + break;
>
> As I understand it, fsverity (and dm-verity) are desirable in
> high-safety and integrity requirement cases where the goal is for the
> system to "fail closed" if errors in general are detected; anything
> that would have the system be in an ill-defined state.
Is "open() fails if verity metadata are trashed" a hard requirement?
Reads will still fail due to (iomap) readahead returning EIO for a file
that is IS_VERITY() && !fsverity_active(). This is (afaict) the state
you end up with when the fsverity open fails. ext4/f2fs don't do that,
but they also don't have online fsck so once a file's dead it's dead.
> A lot of ambient processes are going to have CAP_SYS_ADMIN and this
> will just swallow these errors for those (will things the EFSCORRUPTED
> path at least have been logged by a lower level function?)...whereas
> this is only needed just for a very few tools.
>
> At least for composefs the quoted cases of "query extended attributes,
> scan for metadata damage, repair metadata" are all things that
> canonically live in the composefs metadata (EROFS) blob, so in theory
> there's a lot less of a need to query/inspect it for those use cases.
> (Maybe for composefs we should force canonicalize all the underlying
> files to have mode 0400 and no xattrs or something and add that to its
> repair).
<shrug> I don't know if regular (i.e. non-verity) xattrs are one of the
things that get frozen by verity? Storing fsverity metadata in private
namespace xattrs is unique to xfs.
> I hesitate to say it but maybe there should be some ioctl for online
> repair use cases only, or perhaps a new O_NOVERITY special flag to
> openat2()?
"openat2 but without meddling from the VFS"? Tempting... ;)
--D
^ permalink raw reply [flat|nested] 111+ messages in thread
* Re: [PATCH 28/29] xfs: allow verity files to be opened even if the fsverity metadata is damaged
2024-04-02 22:52 ` Darrick J. Wong
@ 2024-04-02 23:45 ` Eric Biggers
2024-04-03 1:34 ` Darrick J. Wong
2024-04-03 0:10 ` Colin Walters
1 sibling, 1 reply; 111+ messages in thread
From: Eric Biggers @ 2024-04-02 23:45 UTC (permalink / raw)
To: Darrick J. Wong; +Cc: Colin Walters, aalbersh, xfs, linux-fsdevel, fsverity
On Tue, Apr 02, 2024 at 03:52:16PM -0700, Darrick J. Wong wrote:
> On Tue, Apr 02, 2024 at 04:00:06PM -0400, Colin Walters wrote:
> >
> >
> > On Fri, Mar 29, 2024, at 8:43 PM, Darrick J. Wong wrote:
> > > From: Darrick J. Wong <djwong@kernel.org>
> > >
> > > There are more things that one can do with an open file descriptor on
> > > XFS -- query extended attributes, scan for metadata damage, repair
> > > metadata, etc. None of this is possible if the fsverity metadata are
> > > damaged, because that prevents the file from being opened.
> > >
> > > Ignore a selective set of error codes that we know fsverity_file_open to
> > > return if the verity descriptor is nonsense.
> > >
> > > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > > ---
> > > fs/iomap/buffered-io.c | 8 ++++++++
> > > fs/xfs/xfs_file.c | 19 ++++++++++++++++++-
> > > 2 files changed, 26 insertions(+), 1 deletion(-)
> > >
> > >
> > > diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> > > index 9f9d929dfeebc..e68a15b72dbdd 100644
> > > --- a/fs/iomap/buffered-io.c
> > > +++ b/fs/iomap/buffered-io.c
> > > @@ -487,6 +487,14 @@ static loff_t iomap_readpage_iter(const struct
> > > iomap_iter *iter,
> > > size_t poff, plen;
> > > sector_t sector;
> > >
> > > + /*
> > > + * If this verity file hasn't been activated, fail read attempts. This
> > > + * can happen if the calling filesystem allows files to be opened even
> > > + * with damaged verity metadata.
> > > + */
> > > + if (IS_VERITY(iter->inode) && !fsverity_active(iter->inode))
> > > + return -EIO;
> > > +
> > > if (iomap->type == IOMAP_INLINE)
> > > return iomap_read_inline_data(iter, folio);
> > >
> > > diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> > > index c0b3e8146b753..36034eaefbf55 100644
> > > --- a/fs/xfs/xfs_file.c
> > > +++ b/fs/xfs/xfs_file.c
> > > @@ -1431,8 +1431,25 @@ xfs_file_open(
> > > FMODE_DIO_PARALLEL_WRITE | FMODE_CAN_ODIRECT;
> > >
> > > error = fsverity_file_open(inode, file);
> > > - if (error)
> > > + switch (error) {
> > > + case -EFBIG:
> > > + case -EINVAL:
> > > + case -EMSGSIZE:
> > > + case -EFSCORRUPTED:
> > > + /*
> > > + * Be selective about which fsverity errors we propagate to
> > > + * userspace; we still want to be able to open this file even
> > > + * if reads don't work. Someone might want to perform an
> > > + * online repair.
> > > + */
> > > + if (has_capability_noaudit(current, CAP_SYS_ADMIN))
> > > + break;
> >
> > As I understand it, fsverity (and dm-verity) are desirable in
> > high-safety and integrity requirement cases where the goal is for the
> > system to "fail closed" if errors in general are detected; anything
> > that would have the system be in an ill-defined state.
>
> Is "open() fails if verity metadata are trashed" a hard requirement?
>
> Reads will still fail due to (iomap) readahead returning EIO for a file
> that is IS_VERITY() && !fsverity_active(). This is (afaict) the state
> you end up with when the fsverity open fails. ext4/f2fs don't do that,
> but they also don't have online fsck so once a file's dead it's dead.
>
We really should have the same behavior on all filesystems, and that behavior
should be documented in Documentation/filesystems/fsverity.rst. I guess you
want this for XFS_IOC_SCRUB_METADATA? That takes in an inode number directly,
in xfs_scrub_metadata::sm_ino; does it even need to be executed on the same file
it's checking? Anyway, allowing the open means that the case of IS_VERITY() &&
!fsverity_active() needs to be handled later in any case when I/O may be done to
the file. We need to be super careful to ensure that all cases are handled.
Even just considering this patchset and XFS only, it looks like you got it wrong
in xfs_file_read_iter(). You're allowing direct I/O to files that have
IS_VERITY() && !fsverity_active().
This change also invalidates the documentation for fsverity_active() which is:
/**
* fsverity_active() - do reads from the inode need to go through fs-verity?
* @inode: inode to check
*
* This checks whether ->i_verity_info has been set.
*
* Filesystems call this from ->readahead() to check whether the pages need to
* be verified or not. Don't use IS_VERITY() for this purpose; it's subject to
* a race condition where the file is being read concurrently with
* FS_IOC_ENABLE_VERITY completing. (S_VERITY is set before ->i_verity_info.)
*
* Return: true if reads need to go through fs-verity, otherwise false
*/
I think that if you'd like to move forward with this, it would take a patchset
that brings the behavior to all filesystems and considers all callers of
fsverity_active().
Another consideration will be whether the fsverity builtin signature not
matching the file, not being trusted, or being malformed counts as "the fsverity
metadata being damaged".
- Eric
^ permalink raw reply [flat|nested] 111+ messages in thread
* Re: [PATCH 28/29] xfs: allow verity files to be opened even if the fsverity metadata is damaged
2024-04-02 23:45 ` Eric Biggers
@ 2024-04-03 1:34 ` Darrick J. Wong
0 siblings, 0 replies; 111+ messages in thread
From: Darrick J. Wong @ 2024-04-03 1:34 UTC (permalink / raw)
To: Eric Biggers; +Cc: Colin Walters, aalbersh, xfs, linux-fsdevel, fsverity
On Tue, Apr 02, 2024 at 04:45:58PM -0700, Eric Biggers wrote:
> On Tue, Apr 02, 2024 at 03:52:16PM -0700, Darrick J. Wong wrote:
> > On Tue, Apr 02, 2024 at 04:00:06PM -0400, Colin Walters wrote:
> > >
> > >
> > > On Fri, Mar 29, 2024, at 8:43 PM, Darrick J. Wong wrote:
> > > > From: Darrick J. Wong <djwong@kernel.org>
> > > >
> > > > There are more things that one can do with an open file descriptor on
> > > > XFS -- query extended attributes, scan for metadata damage, repair
> > > > metadata, etc. None of this is possible if the fsverity metadata are
> > > > damaged, because that prevents the file from being opened.
> > > >
> > > > Ignore a selective set of error codes that we know fsverity_file_open to
> > > > return if the verity descriptor is nonsense.
> > > >
> > > > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > > > ---
> > > > fs/iomap/buffered-io.c | 8 ++++++++
> > > > fs/xfs/xfs_file.c | 19 ++++++++++++++++++-
> > > > 2 files changed, 26 insertions(+), 1 deletion(-)
> > > >
> > > >
> > > > diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> > > > index 9f9d929dfeebc..e68a15b72dbdd 100644
> > > > --- a/fs/iomap/buffered-io.c
> > > > +++ b/fs/iomap/buffered-io.c
> > > > @@ -487,6 +487,14 @@ static loff_t iomap_readpage_iter(const struct
> > > > iomap_iter *iter,
> > > > size_t poff, plen;
> > > > sector_t sector;
> > > >
> > > > + /*
> > > > + * If this verity file hasn't been activated, fail read attempts. This
> > > > + * can happen if the calling filesystem allows files to be opened even
> > > > + * with damaged verity metadata.
> > > > + */
> > > > + if (IS_VERITY(iter->inode) && !fsverity_active(iter->inode))
> > > > + return -EIO;
> > > > +
> > > > if (iomap->type == IOMAP_INLINE)
> > > > return iomap_read_inline_data(iter, folio);
> > > >
> > > > diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> > > > index c0b3e8146b753..36034eaefbf55 100644
> > > > --- a/fs/xfs/xfs_file.c
> > > > +++ b/fs/xfs/xfs_file.c
> > > > @@ -1431,8 +1431,25 @@ xfs_file_open(
> > > > FMODE_DIO_PARALLEL_WRITE | FMODE_CAN_ODIRECT;
> > > >
> > > > error = fsverity_file_open(inode, file);
> > > > - if (error)
> > > > + switch (error) {
> > > > + case -EFBIG:
> > > > + case -EINVAL:
> > > > + case -EMSGSIZE:
> > > > + case -EFSCORRUPTED:
> > > > + /*
> > > > + * Be selective about which fsverity errors we propagate to
> > > > + * userspace; we still want to be able to open this file even
> > > > + * if reads don't work. Someone might want to perform an
> > > > + * online repair.
> > > > + */
> > > > + if (has_capability_noaudit(current, CAP_SYS_ADMIN))
> > > > + break;
> > >
> > > As I understand it, fsverity (and dm-verity) are desirable in
> > > high-safety and integrity requirement cases where the goal is for the
> > > system to "fail closed" if errors in general are detected; anything
> > > that would have the system be in an ill-defined state.
> >
> > Is "open() fails if verity metadata are trashed" a hard requirement?
> >
> > Reads will still fail due to (iomap) readahead returning EIO for a file
> > that is IS_VERITY() && !fsverity_active(). This is (afaict) the state
> > you end up with when the fsverity open fails. ext4/f2fs don't do that,
> > but they also don't have online fsck so once a file's dead it's dead.
> >
>
> We really should have the same behavior on all filesystems, and that behavior
> should be documented in Documentation/filesystems/fsverity.rst. I guess you
> want this for XFS_IOC_SCRUB_METADATA?
Yes. xfs_scrub tries to open every regular file that it can, but if the
fsverity metadata is too badly damaged then the open() returns EMSGSIZE
or EINVAL or something. The EMSGSIZE is particularly nasty since it's
not listed in the openat() manpage as a possible error code, which
surprised me.
> That takes in an inode number directly,
> in xfs_scrub_metadata::sm_ino; does it even need to be executed on the same file
> it's checking?
<nod> The metadata repairs themselves can use scrub-by-handle mode, so
it's not *so* hard to handle it gracefully.
> Anyway, allowing the open means that the case of IS_VERITY() &&
> !fsverity_active() needs to be handled later in any case when I/O may be done to
> the file. We need to be super careful to ensure that all cases are handled.
I /think/ most everything else is gated on IS_VERITY, right?
> Even just considering this patchset and XFS only, it looks like you got it wrong
> in xfs_file_read_iter(). You're allowing direct I/O to files that have
> IS_VERITY() && !fsverity_active().
Ahaha, yeah, that needs to be changed to:
else if ((iocb->ki_flags & IOCB_DIRECT) && !IS_VERITY(inode))
ret = xfs_file_dio_read(iocb, to);
Good catch.
> This change also invalidates the documentation for fsverity_active() which is:
>
> /**
> * fsverity_active() - do reads from the inode need to go through fs-verity?
> * @inode: inode to check
> *
> * This checks whether ->i_verity_info has been set.
> *
> * Filesystems call this from ->readahead() to check whether the pages need to
> * be verified or not. Don't use IS_VERITY() for this purpose; it's subject to
> * a race condition where the file is being read concurrently with
> * FS_IOC_ENABLE_VERITY completing. (S_VERITY is set before ->i_verity_info.)
> *
> * Return: true if reads need to go through fs-verity, otherwise false
> */
>
> I think that if you'd like to move forward with this, it would take a patchset
> that brings the behavior to all filesystems and considers all callers of
> fsverity_active().
<nod> If you think it's a reasonable thing to allow, then I'll of course
apply it to btr/ext4/f2fs.
> Another consideration will be whether the fsverity builtin signature not
> matching the file, not being trusted, or being malformed counts as "the fsverity
> metadata being damaged".
<shrug> Can you easily check that in the open routine? I figured that
signature validation problems would manifest as read errors.
--D
> - Eric
>
^ permalink raw reply [flat|nested] 111+ messages in thread
* Re: [PATCH 28/29] xfs: allow verity files to be opened even if the fsverity metadata is damaged
2024-04-02 22:52 ` Darrick J. Wong
2024-04-02 23:45 ` Eric Biggers
@ 2024-04-03 0:10 ` Colin Walters
2024-04-03 1:39 ` Darrick J. Wong
2024-04-03 8:35 ` Alexander Larsson
1 sibling, 2 replies; 111+ messages in thread
From: Colin Walters @ 2024-04-03 0:10 UTC (permalink / raw)
To: Darrick J. Wong
Cc: Eric Biggers, Andrey Albershteyn, xfs, linux-fsdevel, fsverity,
Alexander Larsson
[cc alexl@, retained quotes for context]
On Tue, Apr 2, 2024, at 6:52 PM, Darrick J. Wong wrote:
> On Tue, Apr 02, 2024 at 04:00:06PM -0400, Colin Walters wrote:
>>
>>
>> On Fri, Mar 29, 2024, at 8:43 PM, Darrick J. Wong wrote:
>> > From: Darrick J. Wong <djwong@kernel.org>
>> >
>> > There are more things that one can do with an open file descriptor on
>> > XFS -- query extended attributes, scan for metadata damage, repair
>> > metadata, etc. None of this is possible if the fsverity metadata are
>> > damaged, because that prevents the file from being opened.
>> >
>> > Ignore a selective set of error codes that we know fsverity_file_open to
>> > return if the verity descriptor is nonsense.
>> >
>> > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
>> > ---
>> > fs/iomap/buffered-io.c | 8 ++++++++
>> > fs/xfs/xfs_file.c | 19 ++++++++++++++++++-
>> > 2 files changed, 26 insertions(+), 1 deletion(-)
>> >
>> >
>> > diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
>> > index 9f9d929dfeebc..e68a15b72dbdd 100644
>> > --- a/fs/iomap/buffered-io.c
>> > +++ b/fs/iomap/buffered-io.c
>> > @@ -487,6 +487,14 @@ static loff_t iomap_readpage_iter(const struct
>> > iomap_iter *iter,
>> > size_t poff, plen;
>> > sector_t sector;
>> >
>> > + /*
>> > + * If this verity file hasn't been activated, fail read attempts. This
>> > + * can happen if the calling filesystem allows files to be opened even
>> > + * with damaged verity metadata.
>> > + */
>> > + if (IS_VERITY(iter->inode) && !fsverity_active(iter->inode))
>> > + return -EIO;
>> > +
>> > if (iomap->type == IOMAP_INLINE)
>> > return iomap_read_inline_data(iter, folio);
>> >
>> > diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
>> > index c0b3e8146b753..36034eaefbf55 100644
>> > --- a/fs/xfs/xfs_file.c
>> > +++ b/fs/xfs/xfs_file.c
>> > @@ -1431,8 +1431,25 @@ xfs_file_open(
>> > FMODE_DIO_PARALLEL_WRITE | FMODE_CAN_ODIRECT;
>> >
>> > error = fsverity_file_open(inode, file);
>> > - if (error)
>> > + switch (error) {
>> > + case -EFBIG:
>> > + case -EINVAL:
>> > + case -EMSGSIZE:
>> > + case -EFSCORRUPTED:
>> > + /*
>> > + * Be selective about which fsverity errors we propagate to
>> > + * userspace; we still want to be able to open this file even
>> > + * if reads don't work. Someone might want to perform an
>> > + * online repair.
>> > + */
>> > + if (has_capability_noaudit(current, CAP_SYS_ADMIN))
>> > + break;
>>
>> As I understand it, fsverity (and dm-verity) are desirable in
>> high-safety and integrity requirement cases where the goal is for the
>> system to "fail closed" if errors in general are detected; anything
>> that would have the system be in an ill-defined state.
>
> Is "open() fails if verity metadata are trashed" a hard requirement?
I can't say authoritatively, but I do want to ensure we've dug into the semantics here, and I agree with Eric that it would make the most sense to have this be consistent across filesystems.
> Reads will still fail due to (iomap) readahead returning EIO for a file
> that is IS_VERITY() && !fsverity_active(). This is (afaict) the state
> you end up with when the fsverity open fails. ext4/f2fs don't do that,
> but they also don't have online fsck so once a file's dead it's dead.
OK, right. Allowing an open() but having read() fail seems like it doesn't weaken things too much in reality. I think what makes me uncomfortable is the error-swallowing; but yes, in theory we should get the same or similar error on a subsequent read().
> <shrug> I don't know if regular (i.e. non-verity) xattrs are one of the
> things that get frozen by verity? Storing fsverity metadata in private
> namespace xattrs is unique to xfs.
No, verity only covers file contents, no other metadata. This is one of the rationales for composefs (e.g. ensuring things like the suid bit, security.selinux xattr etc. are covered as well as in general complete filesystem trees).
>> I hesitate to say it but maybe there should be some ioctl for online
>> repair use cases only, or perhaps a new O_NOVERITY special flag to
>> openat2()?
>
> "openat2 but without meddling from the VFS"? Tempting... ;)
Or really any lower level even filesystem-specific API for the online fsck case.
Adding a blanket new special case for all CAP_SYS_ADMIN processes covers a lot of things that don't need that.
^ permalink raw reply [flat|nested] 111+ messages in thread
* Re: [PATCH 28/29] xfs: allow verity files to be opened even if the fsverity metadata is damaged
2024-04-03 0:10 ` Colin Walters
@ 2024-04-03 1:39 ` Darrick J. Wong
2024-04-03 1:59 ` Dave Chinner
2024-04-03 8:35 ` Alexander Larsson
1 sibling, 1 reply; 111+ messages in thread
From: Darrick J. Wong @ 2024-04-03 1:39 UTC (permalink / raw)
To: Colin Walters
Cc: Eric Biggers, Andrey Albershteyn, xfs, linux-fsdevel, fsverity,
Alexander Larsson
On Tue, Apr 02, 2024 at 08:10:15PM -0400, Colin Walters wrote:
> [cc alexl@, retained quotes for context]
>
> On Tue, Apr 2, 2024, at 6:52 PM, Darrick J. Wong wrote:
> > On Tue, Apr 02, 2024 at 04:00:06PM -0400, Colin Walters wrote:
> >>
> >>
> >> On Fri, Mar 29, 2024, at 8:43 PM, Darrick J. Wong wrote:
> >> > From: Darrick J. Wong <djwong@kernel.org>
> >> >
> >> > There are more things that one can do with an open file descriptor on
> >> > XFS -- query extended attributes, scan for metadata damage, repair
> >> > metadata, etc. None of this is possible if the fsverity metadata are
> >> > damaged, because that prevents the file from being opened.
> >> >
> >> > Ignore a selective set of error codes that we know fsverity_file_open to
> >> > return if the verity descriptor is nonsense.
> >> >
> >> > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> >> > ---
> >> > fs/iomap/buffered-io.c | 8 ++++++++
> >> > fs/xfs/xfs_file.c | 19 ++++++++++++++++++-
> >> > 2 files changed, 26 insertions(+), 1 deletion(-)
> >> >
> >> >
> >> > diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> >> > index 9f9d929dfeebc..e68a15b72dbdd 100644
> >> > --- a/fs/iomap/buffered-io.c
> >> > +++ b/fs/iomap/buffered-io.c
> >> > @@ -487,6 +487,14 @@ static loff_t iomap_readpage_iter(const struct
> >> > iomap_iter *iter,
> >> > size_t poff, plen;
> >> > sector_t sector;
> >> >
> >> > + /*
> >> > + * If this verity file hasn't been activated, fail read attempts. This
> >> > + * can happen if the calling filesystem allows files to be opened even
> >> > + * with damaged verity metadata.
> >> > + */
> >> > + if (IS_VERITY(iter->inode) && !fsverity_active(iter->inode))
> >> > + return -EIO;
> >> > +
> >> > if (iomap->type == IOMAP_INLINE)
> >> > return iomap_read_inline_data(iter, folio);
> >> >
> >> > diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> >> > index c0b3e8146b753..36034eaefbf55 100644
> >> > --- a/fs/xfs/xfs_file.c
> >> > +++ b/fs/xfs/xfs_file.c
> >> > @@ -1431,8 +1431,25 @@ xfs_file_open(
> >> > FMODE_DIO_PARALLEL_WRITE | FMODE_CAN_ODIRECT;
> >> >
> >> > error = fsverity_file_open(inode, file);
> >> > - if (error)
> >> > + switch (error) {
> >> > + case -EFBIG:
> >> > + case -EINVAL:
> >> > + case -EMSGSIZE:
> >> > + case -EFSCORRUPTED:
> >> > + /*
> >> > + * Be selective about which fsverity errors we propagate to
> >> > + * userspace; we still want to be able to open this file even
> >> > + * if reads don't work. Someone might want to perform an
> >> > + * online repair.
> >> > + */
> >> > + if (has_capability_noaudit(current, CAP_SYS_ADMIN))
> >> > + break;
> >>
> >> As I understand it, fsverity (and dm-verity) are desirable in
> >> high-safety and integrity requirement cases where the goal is for the
> >> system to "fail closed" if errors in general are detected; anything
> >> that would have the system be in an ill-defined state.
> >
> > Is "open() fails if verity metadata are trashed" a hard requirement?
>
> I can't say authoritatively, but I do want to ensure we've dug into
> the semantics here, and I agree with Eric that it would make the most
> sense to have this be consistent across filesystems.
>
> > Reads will still fail due to (iomap) readahead returning EIO for a file
> > that is IS_VERITY() && !fsverity_active(). This is (afaict) the state
> > you end up with when the fsverity open fails. ext4/f2fs don't do that,
> > but they also don't have online fsck so once a file's dead it's dead.
>
> OK, right. Allowing an open() but having read() fail seems like it
> doesn't weaken things too much in reality. I think what makes me
> uncomfortable is the error-swallowing; but yes, in theory we should
> get the same or similar error on a subsequent read().
<nod> I /could/ write up some tests to make sure that happens.
> > <shrug> I don't know if regular (i.e. non-verity) xattrs are one of the
> > things that get frozen by verity? Storing fsverity metadata in private
> > namespace xattrs is unique to xfs.
>
> No, verity only covers file contents, no other metadata. This is one
> of the rationales for composefs (e.g. ensuring things like the suid
> bit, security.selinux xattr etc. are covered as well as in general
> complete filesystem trees).
>
> >> I hesitate to say it but maybe there should be some ioctl for online
> >> repair use cases only, or perhaps a new O_NOVERITY special flag to
> >> openat2()?
> >
> > "openat2 but without meddling from the VFS"? Tempting... ;)
>
> Or really any lower level even filesystem-specific API for the online
> fsck case. Adding a blanket new special case for all CAP_SYS_ADMIN
> processes covers a lot of things that don't need that.
I suppose there could be an O_NOVALIDATION to turn off data checksum
validation on btrfs/bcachefs too. But then you'd want to careful
controls on who gets to use it. Maybe not liblzma_la-crc64-fast.o.
--D
^ permalink raw reply [flat|nested] 111+ messages in thread
* Re: [PATCH 28/29] xfs: allow verity files to be opened even if the fsverity metadata is damaged
2024-04-03 1:39 ` Darrick J. Wong
@ 2024-04-03 1:59 ` Dave Chinner
2024-04-03 3:19 ` Darrick J. Wong
0 siblings, 1 reply; 111+ messages in thread
From: Dave Chinner @ 2024-04-03 1:59 UTC (permalink / raw)
To: Darrick J. Wong
Cc: Colin Walters, Eric Biggers, Andrey Albershteyn, xfs,
linux-fsdevel, fsverity, Alexander Larsson
On Tue, Apr 02, 2024 at 06:39:03PM -0700, Darrick J. Wong wrote:
> On Tue, Apr 02, 2024 at 08:10:15PM -0400, Colin Walters wrote:
> > >> I hesitate to say it but maybe there should be some ioctl for online
> > >> repair use cases only, or perhaps a new O_NOVERITY special flag to
> > >> openat2()?
> > >
> > > "openat2 but without meddling from the VFS"? Tempting... ;)
> >
> > Or really any lower level even filesystem-specific API for the online
> > fsck case. Adding a blanket new special case for all CAP_SYS_ADMIN
> > processes covers a lot of things that don't need that.
>
> I suppose there could be an O_NOVALIDATION to turn off data checksum
> validation on btrfs/bcachefs too. But then you'd want to careful
> controls on who gets to use it. Maybe not liblzma_la-crc64-fast.o.
Just use XFS_IOC_OPEN_BY_HANDLE same as xfs_fsr and xfsdump do. The
handle can be build in userspace from the inode bulkstat
information, and for typical inode contents verification purposes we
don't actually need path-based open access to the inodes. That would
then mean we can simple add our own open flag to return a fd that
can do data operations that short-circuit verification...
Cheers,
Dave.
--
Dave Chinner
david@fromorbit.com
^ permalink raw reply [flat|nested] 111+ messages in thread
* Re: [PATCH 28/29] xfs: allow verity files to be opened even if the fsverity metadata is damaged
2024-04-03 1:59 ` Dave Chinner
@ 2024-04-03 3:19 ` Darrick J. Wong
2024-04-03 22:22 ` Dave Chinner
0 siblings, 1 reply; 111+ messages in thread
From: Darrick J. Wong @ 2024-04-03 3:19 UTC (permalink / raw)
To: Dave Chinner
Cc: Colin Walters, Eric Biggers, Andrey Albershteyn, xfs,
linux-fsdevel, fsverity, Alexander Larsson
On Wed, Apr 03, 2024 at 12:59:22PM +1100, Dave Chinner wrote:
> On Tue, Apr 02, 2024 at 06:39:03PM -0700, Darrick J. Wong wrote:
> > On Tue, Apr 02, 2024 at 08:10:15PM -0400, Colin Walters wrote:
> > > >> I hesitate to say it but maybe there should be some ioctl for online
> > > >> repair use cases only, or perhaps a new O_NOVERITY special flag to
> > > >> openat2()?
> > > >
> > > > "openat2 but without meddling from the VFS"? Tempting... ;)
> > >
> > > Or really any lower level even filesystem-specific API for the online
> > > fsck case. Adding a blanket new special case for all CAP_SYS_ADMIN
> > > processes covers a lot of things that don't need that.
> >
> > I suppose there could be an O_NOVALIDATION to turn off data checksum
> > validation on btrfs/bcachefs too. But then you'd want to careful
> > controls on who gets to use it. Maybe not liblzma_la-crc64-fast.o.
>
> Just use XFS_IOC_OPEN_BY_HANDLE same as xfs_fsr and xfsdump do. The
> handle can be build in userspace from the inode bulkstat
> information, and for typical inode contents verification purposes we
> don't actually need path-based open access to the inodes. That would
> then mean we can simple add our own open flag to return a fd that
> can do data operations that short-circuit verification...
Heh, ok. Are there any private flags that get passed via
xfs_fsop_handlereq_t::oflags? Or does that mean defining a top level
O_FLAG that cannot be passed through openat but /can/ be sent via
open_by_handle?
--D
> Cheers,
>
> Dave.
> --
> Dave Chinner
> david@fromorbit.com
>
^ permalink raw reply [flat|nested] 111+ messages in thread
* Re: [PATCH 28/29] xfs: allow verity files to be opened even if the fsverity metadata is damaged
2024-04-03 3:19 ` Darrick J. Wong
@ 2024-04-03 22:22 ` Dave Chinner
0 siblings, 0 replies; 111+ messages in thread
From: Dave Chinner @ 2024-04-03 22:22 UTC (permalink / raw)
To: Darrick J. Wong
Cc: Colin Walters, Eric Biggers, Andrey Albershteyn, xfs,
linux-fsdevel, fsverity, Alexander Larsson
On Tue, Apr 02, 2024 at 08:19:10PM -0700, Darrick J. Wong wrote:
> On Wed, Apr 03, 2024 at 12:59:22PM +1100, Dave Chinner wrote:
> > On Tue, Apr 02, 2024 at 06:39:03PM -0700, Darrick J. Wong wrote:
> > > On Tue, Apr 02, 2024 at 08:10:15PM -0400, Colin Walters wrote:
> > > > >> I hesitate to say it but maybe there should be some ioctl for online
> > > > >> repair use cases only, or perhaps a new O_NOVERITY special flag to
> > > > >> openat2()?
> > > > >
> > > > > "openat2 but without meddling from the VFS"? Tempting... ;)
> > > >
> > > > Or really any lower level even filesystem-specific API for the online
> > > > fsck case. Adding a blanket new special case for all CAP_SYS_ADMIN
> > > > processes covers a lot of things that don't need that.
> > >
> > > I suppose there could be an O_NOVALIDATION to turn off data checksum
> > > validation on btrfs/bcachefs too. But then you'd want to careful
> > > controls on who gets to use it. Maybe not liblzma_la-crc64-fast.o.
> >
> > Just use XFS_IOC_OPEN_BY_HANDLE same as xfs_fsr and xfsdump do. The
> > handle can be build in userspace from the inode bulkstat
> > information, and for typical inode contents verification purposes we
> > don't actually need path-based open access to the inodes. That would
> > then mean we can simple add our own open flag to return a fd that
> > can do data operations that short-circuit verification...
>
> Heh, ok. Are there any private flags that get passed via
> xfs_fsop_handlereq_t::oflags? Or does that mean defining a top level
> O_FLAG that cannot be passed through openat but /can/ be sent via
> open_by_handle?
AIUI, open flags are arch specific, but I don't think any use the
high bits of the 32 bit space they are defined in. So I think we
could probably use the high bits in that field for our own purposes
and not get conflicts with generic open flags...
-Dave.
--
Dave Chinner
david@fromorbit.com
^ permalink raw reply [flat|nested] 111+ messages in thread
* Re: [PATCH 28/29] xfs: allow verity files to be opened even if the fsverity metadata is damaged
2024-04-03 0:10 ` Colin Walters
2024-04-03 1:39 ` Darrick J. Wong
@ 2024-04-03 8:35 ` Alexander Larsson
1 sibling, 0 replies; 111+ messages in thread
From: Alexander Larsson @ 2024-04-03 8:35 UTC (permalink / raw)
To: Colin Walters, Darrick J. Wong
Cc: Eric Biggers, Andrey Albershteyn, xfs, linux-fsdevel, fsverity
On Tue, 2024-04-02 at 20:10 -0400, Colin Walters wrote:
> [cc alexl@, retained quotes for context]
>
> On Tue, Apr 2, 2024, at 6:52 PM, Darrick J. Wong wrote:
> > On Tue, Apr 02, 2024 at 04:00:06PM -0400, Colin Walters wrote:
> > >
> > >
> > > On Fri, Mar 29, 2024, at 8:43 PM, Darrick J. Wong wrote:
> > > > From: Darrick J. Wong <djwong@kernel.org>
> > > >
> > > > There are more things that one can do with an open file
> > > > descriptor on
> > > > XFS -- query extended attributes, scan for metadata damage,
> > > > repair
> > > > metadata, etc. None of this is possible if the fsverity
> > > > metadata are
> > > > damaged, because that prevents the file from being opened.
> > > >
> > > > Ignore a selective set of error codes that we know
> > > > fsverity_file_open to
> > > > return if the verity descriptor is nonsense.
> > > >
> > > > Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> > > > ---
> > > > fs/iomap/buffered-io.c | 8 ++++++++
> > > > fs/xfs/xfs_file.c | 19 ++++++++++++++++++-
> > > > 2 files changed, 26 insertions(+), 1 deletion(-)
> > > >
> > > >
> > > > diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> > > > index 9f9d929dfeebc..e68a15b72dbdd 100644
> > > > --- a/fs/iomap/buffered-io.c
> > > > +++ b/fs/iomap/buffered-io.c
> > > > @@ -487,6 +487,14 @@ static loff_t iomap_readpage_iter(const
> > > > struct
> > > > iomap_iter *iter,
> > > > size_t poff, plen;
> > > > sector_t sector;
> > > >
> > > > + /*
> > > > + * If this verity file hasn't been activated, fail
> > > > read attempts. This
> > > > + * can happen if the calling filesystem allows files
> > > > to be opened even
> > > > + * with damaged verity metadata.
> > > > + */
> > > > + if (IS_VERITY(iter->inode) && !fsverity_active(iter-
> > > > >inode))
> > > > + return -EIO;
> > > > +
> > > > if (iomap->type == IOMAP_INLINE)
> > > > return iomap_read_inline_data(iter, folio);
> > > >
> > > > diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> > > > index c0b3e8146b753..36034eaefbf55 100644
> > > > --- a/fs/xfs/xfs_file.c
> > > > +++ b/fs/xfs/xfs_file.c
> > > > @@ -1431,8 +1431,25 @@ xfs_file_open(
> > > > FMODE_DIO_PARALLEL_WRITE |
> > > > FMODE_CAN_ODIRECT;
> > > >
> > > > error = fsverity_file_open(inode, file);
> > > > - if (error)
> > > > + switch (error) {
> > > > + case -EFBIG:
> > > > + case -EINVAL:
> > > > + case -EMSGSIZE:
> > > > + case -EFSCORRUPTED:
> > > > + /*
> > > > + * Be selective about which fsverity errors we
> > > > propagate to
> > > > + * userspace; we still want to be able to open
> > > > this file even
> > > > + * if reads don't work. Someone might want to
> > > > perform an
> > > > + * online repair.
> > > > + */
> > > > + if (has_capability_noaudit(current,
> > > > CAP_SYS_ADMIN))
> > > > + break;
> > >
> > > As I understand it, fsverity (and dm-verity) are desirable in
> > > high-safety and integrity requirement cases where the goal is for
> > > the
> > > system to "fail closed" if errors in general are detected;
> > > anything
> > > that would have the system be in an ill-defined state.
> >
> > Is "open() fails if verity metadata are trashed" a hard
> > requirement?
>
> I can't say authoritatively, but I do want to ensure we've dug into
> the semantics here, and I agree with Eric that it would make the most
> sense to have this be consistent across filesystems.
In terms of userspace I think this semantic change is fine. Even if the
metadata is broken we will still not see any non-validated data. It's
as if we didn't try to use the broken fsverity metadata until it needed
to be used. I agree with others though that having the same behavior
across all filesystems would make sense. Also, it might be useful
information that the filesystem has an error, so maybe we should log
the swallowed errors.
For kernel use, in overlayfs when using verity_mode=require, we do use
open() (in ovl_validate_verity) to trigger the initialization of
fsverity_info . However I took a look at this code, and it seems to
properly handle (i.e. fail) the case where IS_VERITY(inode) is true but
there is no fsverity_info after open.
Similarly, IMA (in ima_get_verity_digest) relies on the digest loaded
from the header. But it also seems to handle this case correctly.
> > Reads will still fail due to (iomap) readahead returning EIO for a
> > file
> > that is IS_VERITY() && !fsverity_active(). This is (afaict) the
> > state
> > you end up with when the fsverity open fails. ext4/f2fs don't do
> > that,
> > but they also don't have online fsck so once a file's dead it's
> > dead.
>
> OK, right. Allowing an open() but having read() fail seems like it
> doesn't weaken things too much in reality. I think what makes me
> uncomfortable is the error-swallowing; but yes, in theory we should
> get the same or similar error on a subsequent read().
If anything the explicit error list seems a bit fragile to me. What if
the underlying fs reported some new error when reading the metadata,
should we then suddenly fail here when we didn't before?
>
--
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
=-=-=
Alexander Larsson Red Hat,
Inc
alexl@redhat.com alexander.larsson@gmail.com
He's a lonely alcoholic firefighter looking for a cure to the poison
coursing through his veins. She's a tortured insomniac Hell's Angel on
the trail of a serial killer. They fight crime!
^ permalink raw reply [flat|nested] 111+ messages in thread
* [PATCH 29/29] xfs: enable ro-compat fs-verity flag
2024-03-30 0:32 ` [PATCHSET v5.5 2/2] xfs: fs-verity support Darrick J. Wong
` (27 preceding siblings ...)
2024-03-30 0:43 ` [PATCH 28/29] xfs: allow verity files to be opened even if the fsverity metadata is damaged Darrick J. Wong
@ 2024-03-30 0:43 ` Darrick J. Wong
28 siblings, 0 replies; 111+ messages in thread
From: Darrick J. Wong @ 2024-03-30 0:43 UTC (permalink / raw)
To: djwong, ebiggers, aalbersh; +Cc: linux-xfs, linux-fsdevel, fsverity
From: Andrey Albershteyn <aalbersh@redhat.com>
Finalize fs-verity integration in XFS by making kernel fs-verity
aware with ro-compat flag.
Signed-off-by: Andrey Albershteyn <aalbersh@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
[djwong: add spaces]
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
fs/xfs/libxfs/xfs_format.h | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index e7ed55f747d01..5e2342c56d499 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -389,10 +389,11 @@ xfs_sb_has_compat_feature(
#define XFS_SB_FEAT_RO_COMPAT_INOBTCNT (1 << 3) /* inobt block counts */
#define XFS_SB_FEAT_RO_COMPAT_VERITY (1 << 4) /* fs-verity */
#define XFS_SB_FEAT_RO_COMPAT_ALL \
- (XFS_SB_FEAT_RO_COMPAT_FINOBT | \
- XFS_SB_FEAT_RO_COMPAT_RMAPBT | \
- XFS_SB_FEAT_RO_COMPAT_REFLINK| \
- XFS_SB_FEAT_RO_COMPAT_INOBTCNT)
+ (XFS_SB_FEAT_RO_COMPAT_FINOBT | \
+ XFS_SB_FEAT_RO_COMPAT_RMAPBT | \
+ XFS_SB_FEAT_RO_COMPAT_REFLINK | \
+ XFS_SB_FEAT_RO_COMPAT_INOBTCNT | \
+ XFS_SB_FEAT_RO_COMPAT_VERITY)
#define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL
static inline bool
xfs_sb_has_ro_compat_feature(
^ permalink raw reply related [flat|nested] 111+ messages in thread