* [PATCH] xfs: disallow atomic writes on DAX
@ 2025-07-17 15:19 John Garry
2025-07-17 16:02 ` Darrick J. Wong
0 siblings, 1 reply; 4+ messages in thread
From: John Garry @ 2025-07-17 15:19 UTC (permalink / raw)
To: djwong, hch, cem; +Cc: linux-xfs, John Garry
Atomic writes are not currently supported for DAX, but two problems exist:
- we may go down DAX write path for IOCB_ATOMIC, which does not handle
IOCB_ATOMIC properly
- we report non-zero atomic write limits in statx (for DAX inodes)
We may want atomic writes support on DAX in future, but just disallow for
now.
For this, ensure when IOCB_ATOMIC is set that we check the write size
versus the atomic write min and max before branching off to the DAX write
path. This is not strictly required for DAX, as we should not get this far
in the write path as FMODE_CAN_ATOMIC_WRITE should not be set.
In addition, due to reflink being supported for DAX, we automatically get
CoW-based atomic writes support being advertised. Remedy this by
disallowing atomic writes for a DAX inode for both sw and hw modes.
Finally make atomic write size and DAX mount always mutually exclusive.
Reported-by: "Darrick J. Wong" <djwong@kernel.org>
Fixes: 9dffc58f2384 ("xfs: update atomic write limits")
Signed-off-by: John Garry <john.g.garry@oracle.com>
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index db21b5a4b881..84876f41da93 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1102,9 +1102,6 @@ xfs_file_write_iter(
if (xfs_is_shutdown(ip->i_mount))
return -EIO;
- if (IS_DAX(inode))
- return xfs_file_dax_write(iocb, from);
-
if (iocb->ki_flags & IOCB_ATOMIC) {
if (ocount < xfs_get_atomic_write_min(ip))
return -EINVAL;
@@ -1117,6 +1114,9 @@ xfs_file_write_iter(
return ret;
}
+ if (IS_DAX(inode))
+ return xfs_file_dax_write(iocb, from);
+
if (iocb->ki_flags & IOCB_DIRECT) {
/*
* Allow a directio write to fall back to a buffered
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 07fbdcc4cbf5..b142cd4f446a 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -356,11 +356,22 @@ static inline bool xfs_inode_has_bigrtalloc(const struct xfs_inode *ip)
(XFS_IS_REALTIME_INODE(ip) ? \
(ip)->i_mount->m_rtdev_targp : (ip)->i_mount->m_ddev_targp)
-static inline bool xfs_inode_can_hw_atomic_write(const struct xfs_inode *ip)
+static inline bool xfs_inode_can_hw_atomic_write(struct xfs_inode *ip)
{
+ if (IS_DAX(VFS_I(ip)))
+ return false;
+
return xfs_inode_buftarg(ip)->bt_awu_max > 0;
}
+static inline bool xfs_inode_can_sw_atomic_write(struct xfs_inode *ip)
+{
+ if (IS_DAX(VFS_I(ip)))
+ return false;
+
+ return xfs_can_sw_atomic_write(ip->i_mount);
+}
+
/*
* In-core inode flags.
*/
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 01e597290eb5..b39a19dbc386 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -616,7 +616,8 @@ xfs_get_atomic_write_min(
* write of exactly one single fsblock if the bdev will make that
* guarantee for us.
*/
- if (xfs_inode_can_hw_atomic_write(ip) || xfs_can_sw_atomic_write(mp))
+ if (xfs_inode_can_hw_atomic_write(ip) ||
+ xfs_inode_can_sw_atomic_write(ip))
return mp->m_sb.sb_blocksize;
return 0;
@@ -633,7 +634,7 @@ xfs_get_atomic_write_max(
* write of exactly one single fsblock if the bdev will make that
* guarantee for us.
*/
- if (!xfs_can_sw_atomic_write(mp)) {
+ if (!xfs_inode_can_sw_atomic_write(ip)) {
if (xfs_inode_can_hw_atomic_write(ip))
return mp->m_sb.sb_blocksize;
return 0;
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 0b690bc119d7..6a5543e08198 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -745,6 +745,12 @@ xfs_set_max_atomic_write_opt(
ASSERT(max_write <= U32_MAX);
+ if (xfs_has_dax_always(mp)) {
+ xfs_warn(mp,
+ "atomic writes not supported for DAX");
+ return -EINVAL;
+ }
+
/* generic_atomic_write_valid enforces power of two length */
if (!is_power_of_2(new_max_bytes)) {
xfs_warn(mp,
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 97de44c32272..3c858b42a54a 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -474,11 +474,6 @@ static inline bool xfs_has_nonzoned(const struct xfs_mount *mp)
return !xfs_has_zoned(mp);
}
-static inline bool xfs_can_sw_atomic_write(struct xfs_mount *mp)
-{
- return xfs_has_reflink(mp);
-}
-
/*
* Some features are always on for v5 file systems, allow the compiler to
* eliminiate dead code when building without v4 support.
@@ -534,6 +529,14 @@ __XFS_HAS_FEAT(dax_never, DAX_NEVER)
__XFS_HAS_FEAT(norecovery, NORECOVERY)
__XFS_HAS_FEAT(nouuid, NOUUID)
+static inline bool xfs_can_sw_atomic_write(struct xfs_mount *mp)
+{
+ if (xfs_has_dax_always(mp))
+ return false;
+
+ return xfs_has_reflink(mp);
+}
+
/*
* Operational mount state flags
*
--
2.43.5
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH] xfs: disallow atomic writes on DAX
2025-07-17 15:19 [PATCH] xfs: disallow atomic writes on DAX John Garry
@ 2025-07-17 16:02 ` Darrick J. Wong
2025-07-18 8:15 ` John Garry
0 siblings, 1 reply; 4+ messages in thread
From: Darrick J. Wong @ 2025-07-17 16:02 UTC (permalink / raw)
To: John Garry; +Cc: hch, cem, linux-xfs
On Thu, Jul 17, 2025 at 03:19:00PM +0000, John Garry wrote:
> Atomic writes are not currently supported for DAX, but two problems exist:
> - we may go down DAX write path for IOCB_ATOMIC, which does not handle
> IOCB_ATOMIC properly
> - we report non-zero atomic write limits in statx (for DAX inodes)
>
> We may want atomic writes support on DAX in future, but just disallow for
> now.
>
> For this, ensure when IOCB_ATOMIC is set that we check the write size
> versus the atomic write min and max before branching off to the DAX write
> path. This is not strictly required for DAX, as we should not get this far
> in the write path as FMODE_CAN_ATOMIC_WRITE should not be set.
>
> In addition, due to reflink being supported for DAX, we automatically get
> CoW-based atomic writes support being advertised. Remedy this by
> disallowing atomic writes for a DAX inode for both sw and hw modes.
...because it's fsdax and who's really going to test/use software atomic
writes there ?
> Finally make atomic write size and DAX mount always mutually exclusive.
Why? You could have a rt-on-pmem filesystem with S_DAX files, and still
want to do atomic writes to files on the data device.
> Reported-by: "Darrick J. Wong" <djwong@kernel.org>
> Fixes: 9dffc58f2384 ("xfs: update atomic write limits")
> Signed-off-by: John Garry <john.g.garry@oracle.com>
>
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index db21b5a4b881..84876f41da93 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -1102,9 +1102,6 @@ xfs_file_write_iter(
> if (xfs_is_shutdown(ip->i_mount))
> return -EIO;
>
> - if (IS_DAX(inode))
> - return xfs_file_dax_write(iocb, from);
> -
> if (iocb->ki_flags & IOCB_ATOMIC) {
> if (ocount < xfs_get_atomic_write_min(ip))
> return -EINVAL;
> @@ -1117,6 +1114,9 @@ xfs_file_write_iter(
> return ret;
> }
>
> + if (IS_DAX(inode))
> + return xfs_file_dax_write(iocb, from);
> +
> if (iocb->ki_flags & IOCB_DIRECT) {
> /*
> * Allow a directio write to fall back to a buffered
> diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
> index 07fbdcc4cbf5..b142cd4f446a 100644
> --- a/fs/xfs/xfs_inode.h
> +++ b/fs/xfs/xfs_inode.h
> @@ -356,11 +356,22 @@ static inline bool xfs_inode_has_bigrtalloc(const struct xfs_inode *ip)
> (XFS_IS_REALTIME_INODE(ip) ? \
> (ip)->i_mount->m_rtdev_targp : (ip)->i_mount->m_ddev_targp)
>
> -static inline bool xfs_inode_can_hw_atomic_write(const struct xfs_inode *ip)
> +static inline bool xfs_inode_can_hw_atomic_write(struct xfs_inode *ip)
Why drop const here? VFS_IC() should be sufficient, I think.
--D
> {
> + if (IS_DAX(VFS_I(ip)))
> + return false;
> +
> return xfs_inode_buftarg(ip)->bt_awu_max > 0;
> }
>
> +static inline bool xfs_inode_can_sw_atomic_write(struct xfs_inode *ip)
> +{
> + if (IS_DAX(VFS_I(ip)))
> + return false;
> +
> + return xfs_can_sw_atomic_write(ip->i_mount);
> +}
> +
> /*
> * In-core inode flags.
> */
> diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
> index 01e597290eb5..b39a19dbc386 100644
> --- a/fs/xfs/xfs_iops.c
> +++ b/fs/xfs/xfs_iops.c
> @@ -616,7 +616,8 @@ xfs_get_atomic_write_min(
> * write of exactly one single fsblock if the bdev will make that
> * guarantee for us.
> */
> - if (xfs_inode_can_hw_atomic_write(ip) || xfs_can_sw_atomic_write(mp))
> + if (xfs_inode_can_hw_atomic_write(ip) ||
> + xfs_inode_can_sw_atomic_write(ip))
> return mp->m_sb.sb_blocksize;
>
> return 0;
> @@ -633,7 +634,7 @@ xfs_get_atomic_write_max(
> * write of exactly one single fsblock if the bdev will make that
> * guarantee for us.
> */
> - if (!xfs_can_sw_atomic_write(mp)) {
> + if (!xfs_inode_can_sw_atomic_write(ip)) {
> if (xfs_inode_can_hw_atomic_write(ip))
> return mp->m_sb.sb_blocksize;
> return 0;
> diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
> index 0b690bc119d7..6a5543e08198 100644
> --- a/fs/xfs/xfs_mount.c
> +++ b/fs/xfs/xfs_mount.c
> @@ -745,6 +745,12 @@ xfs_set_max_atomic_write_opt(
>
> ASSERT(max_write <= U32_MAX);
>
> + if (xfs_has_dax_always(mp)) {
> + xfs_warn(mp,
> + "atomic writes not supported for DAX");
> + return -EINVAL;
> + }
> +
> /* generic_atomic_write_valid enforces power of two length */
> if (!is_power_of_2(new_max_bytes)) {
> xfs_warn(mp,
> diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
> index 97de44c32272..3c858b42a54a 100644
> --- a/fs/xfs/xfs_mount.h
> +++ b/fs/xfs/xfs_mount.h
> @@ -474,11 +474,6 @@ static inline bool xfs_has_nonzoned(const struct xfs_mount *mp)
> return !xfs_has_zoned(mp);
> }
>
> -static inline bool xfs_can_sw_atomic_write(struct xfs_mount *mp)
> -{
> - return xfs_has_reflink(mp);
> -}
> -
> /*
> * Some features are always on for v5 file systems, allow the compiler to
> * eliminiate dead code when building without v4 support.
> @@ -534,6 +529,14 @@ __XFS_HAS_FEAT(dax_never, DAX_NEVER)
> __XFS_HAS_FEAT(norecovery, NORECOVERY)
> __XFS_HAS_FEAT(nouuid, NOUUID)
>
> +static inline bool xfs_can_sw_atomic_write(struct xfs_mount *mp)
> +{
> + if (xfs_has_dax_always(mp))
> + return false;
> +
> + return xfs_has_reflink(mp);
> +}
> +
> /*
> * Operational mount state flags
> *
> --
> 2.43.5
>
>
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] xfs: disallow atomic writes on DAX
2025-07-17 16:02 ` Darrick J. Wong
@ 2025-07-18 8:15 ` John Garry
2025-07-18 19:38 ` Darrick J. Wong
0 siblings, 1 reply; 4+ messages in thread
From: John Garry @ 2025-07-18 8:15 UTC (permalink / raw)
To: Darrick J. Wong; +Cc: hch, cem, linux-xfs
On 17/07/2025 17:02, Darrick J. Wong wrote:
> On Thu, Jul 17, 2025 at 03:19:00PM +0000, John Garry wrote:
>> Atomic writes are not currently supported for DAX, but two problems exist:
>> - we may go down DAX write path for IOCB_ATOMIC, which does not handle
>> IOCB_ATOMIC properly
>> - we report non-zero atomic write limits in statx (for DAX inodes)
>>
>> We may want atomic writes support on DAX in future, but just disallow for
>> now.
>>
>> For this, ensure when IOCB_ATOMIC is set that we check the write size
>> versus the atomic write min and max before branching off to the DAX write
>> path. This is not strictly required for DAX, as we should not get this far
>> in the write path as FMODE_CAN_ATOMIC_WRITE should not be set.
>>
>> In addition, due to reflink being supported for DAX, we automatically get
>> CoW-based atomic writes support being advertised. Remedy this by
>> disallowing atomic writes for a DAX inode for both sw and hw modes.
>
> ...because it's fsdax and who's really going to test/use software atomic
> writes there ?
Right
>
>> Finally make atomic write size and DAX mount always mutually exclusive.
>
> Why? You could have a rt-on-pmem filesystem with S_DAX files, and still
> want to do atomic writes to files on the data device.
How can I test that, i.e. put something on data device?
I tried something like this:
$mkfs.xfs -f -m rmapbt=0,reflink=1 -d rtinherit=1 -r rtdev=/dev/pmem0
/dev/pmem1
$mount /dev/pmem1 mnt -o dax=always,rtdev=/dev/pmem0 -o
max_atomic_write=16k
$mkdir mnt/non_rt
$xfs_io -c "chattr -t" mnt/non_rt/ #make non-rt
$touch mnt/non_rt/file
$xfs_io -c "lsattr -v" mnt/non_rt/file
[has-xattr] mnt/non_rt/file
$xfs_io -c "statx -r -m 0x10000" mnt/non_rt/file
---
stat.atomic_write_unit_min = 0
stat.atomic_write_unit_max = 0
stat.atomic_write_segments_max = 0
---
I thought that losing the rtinherit flag would put the file on the data
device. From adding some kernel debug, it seems that this file is still
IS_DAX() == true
>
>> Reported-by: "Darrick J. Wong" <djwong@kernel.org>
>> Fixes: 9dffc58f2384 ("xfs: update atomic write limits")
>> Signed-off-by: John Garry <john.g.garry@oracle.com>
>>
>> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
>> index db21b5a4b881..84876f41da93 100644
>> --- a/fs/xfs/xfs_file.c
>> +++ b/fs/xfs/xfs_file.c
>> @@ -1102,9 +1102,6 @@ xfs_file_write_iter(
>> if (xfs_is_shutdown(ip->i_mount))
>> return -EIO;
>>
>> - if (IS_DAX(inode))
>> - return xfs_file_dax_write(iocb, from);
>> -
>> if (iocb->ki_flags & IOCB_ATOMIC) {
>> if (ocount < xfs_get_atomic_write_min(ip))
>> return -EINVAL;
>> @@ -1117,6 +1114,9 @@ xfs_file_write_iter(
>> return ret;
>> }
>>
>> + if (IS_DAX(inode))
>> + return xfs_file_dax_write(iocb, from);
>> +
>> if (iocb->ki_flags & IOCB_DIRECT) {
>> /*
>> * Allow a directio write to fall back to a buffered
>> diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
>> index 07fbdcc4cbf5..b142cd4f446a 100644
>> --- a/fs/xfs/xfs_inode.h
>> +++ b/fs/xfs/xfs_inode.h
>> @@ -356,11 +356,22 @@ static inline bool xfs_inode_has_bigrtalloc(const struct xfs_inode *ip)
>> (XFS_IS_REALTIME_INODE(ip) ? \
>> (ip)->i_mount->m_rtdev_targp : (ip)->i_mount->m_ddev_targp)
>>
>> -static inline bool xfs_inode_can_hw_atomic_write(const struct xfs_inode *ip)
>> +static inline bool xfs_inode_can_hw_atomic_write(struct xfs_inode *ip)
>
> Why drop const here? VFS_IC() should be sufficient, I think.
>
I dropped that const as I got a complaint about ignoring the const when
passing to VFS_I(). But, as you say, I can use VFS_IC()
Thanks,
John
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] xfs: disallow atomic writes on DAX
2025-07-18 8:15 ` John Garry
@ 2025-07-18 19:38 ` Darrick J. Wong
0 siblings, 0 replies; 4+ messages in thread
From: Darrick J. Wong @ 2025-07-18 19:38 UTC (permalink / raw)
To: John Garry; +Cc: hch, cem, linux-xfs
On Fri, Jul 18, 2025 at 09:15:07AM +0100, John Garry wrote:
> On 17/07/2025 17:02, Darrick J. Wong wrote:
> > On Thu, Jul 17, 2025 at 03:19:00PM +0000, John Garry wrote:
> > > Atomic writes are not currently supported for DAX, but two problems exist:
> > > - we may go down DAX write path for IOCB_ATOMIC, which does not handle
> > > IOCB_ATOMIC properly
> > > - we report non-zero atomic write limits in statx (for DAX inodes)
> > >
> > > We may want atomic writes support on DAX in future, but just disallow for
> > > now.
> > >
> > > For this, ensure when IOCB_ATOMIC is set that we check the write size
> > > versus the atomic write min and max before branching off to the DAX write
> > > path. This is not strictly required for DAX, as we should not get this far
> > > in the write path as FMODE_CAN_ATOMIC_WRITE should not be set.
> > >
> > > In addition, due to reflink being supported for DAX, we automatically get
> > > CoW-based atomic writes support being advertised. Remedy this by
> > > disallowing atomic writes for a DAX inode for both sw and hw modes.
> >
> > ...because it's fsdax and who's really going to test/use software atomic
> > writes there ?
>
> Right
>
> >
> > > Finally make atomic write size and DAX mount always mutually exclusive.
> >
> > Why? You could have a rt-on-pmem filesystem with S_DAX files, and still
> > want to do atomic writes to files on the data device.
>
> How can I test that, i.e. put something on data device?
>
> I tried something like this:
>
> $mkfs.xfs -f -m rmapbt=0,reflink=1 -d rtinherit=1 -r rtdev=/dev/pmem0
> /dev/pmem1
> $mount /dev/pmem1 mnt -o dax=always,rtdev=/dev/pmem0 -o
> max_atomic_write=16k
> $mkdir mnt/non_rt
> $xfs_io -c "chattr -t" mnt/non_rt/ #make non-rt
> $touch mnt/non_rt/file
> $xfs_io -c "lsattr -v" mnt/non_rt/file
> [has-xattr] mnt/non_rt/file
> $xfs_io -c "statx -r -m 0x10000" mnt/non_rt/file
> ---
> stat.atomic_write_unit_min = 0
> stat.atomic_write_unit_max = 0
> stat.atomic_write_segments_max = 0
> ---
>
> I thought that losing the rtinherit flag would put the file on the data
> device. From adding some kernel debug, it seems that this file is still
> IS_DAX() == true
The data device should be a regular scsi disk, not pmem.
--D
> >
> > > Reported-by: "Darrick J. Wong" <djwong@kernel.org>
> > > Fixes: 9dffc58f2384 ("xfs: update atomic write limits")
> > > Signed-off-by: John Garry <john.g.garry@oracle.com>
> > >
> > > diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> > > index db21b5a4b881..84876f41da93 100644
> > > --- a/fs/xfs/xfs_file.c
> > > +++ b/fs/xfs/xfs_file.c
> > > @@ -1102,9 +1102,6 @@ xfs_file_write_iter(
> > > if (xfs_is_shutdown(ip->i_mount))
> > > return -EIO;
> > > - if (IS_DAX(inode))
> > > - return xfs_file_dax_write(iocb, from);
> > > -
> > > if (iocb->ki_flags & IOCB_ATOMIC) {
> > > if (ocount < xfs_get_atomic_write_min(ip))
> > > return -EINVAL;
> > > @@ -1117,6 +1114,9 @@ xfs_file_write_iter(
> > > return ret;
> > > }
> > > + if (IS_DAX(inode))
> > > + return xfs_file_dax_write(iocb, from);
> > > +
> > > if (iocb->ki_flags & IOCB_DIRECT) {
> > > /*
> > > * Allow a directio write to fall back to a buffered
> > > diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
> > > index 07fbdcc4cbf5..b142cd4f446a 100644
> > > --- a/fs/xfs/xfs_inode.h
> > > +++ b/fs/xfs/xfs_inode.h
> > > @@ -356,11 +356,22 @@ static inline bool xfs_inode_has_bigrtalloc(const struct xfs_inode *ip)
> > > (XFS_IS_REALTIME_INODE(ip) ? \
> > > (ip)->i_mount->m_rtdev_targp : (ip)->i_mount->m_ddev_targp)
> > > -static inline bool xfs_inode_can_hw_atomic_write(const struct xfs_inode *ip)
> > > +static inline bool xfs_inode_can_hw_atomic_write(struct xfs_inode *ip)
> >
> > Why drop const here? VFS_IC() should be sufficient, I think.
> >
>
> I dropped that const as I got a complaint about ignoring the const when
> passing to VFS_I(). But, as you say, I can use VFS_IC()
>
> Thanks,
> John
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2025-07-18 19:38 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-07-17 15:19 [PATCH] xfs: disallow atomic writes on DAX John Garry
2025-07-17 16:02 ` Darrick J. Wong
2025-07-18 8:15 ` John Garry
2025-07-18 19:38 ` Darrick J. Wong
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).