* [PATCH v6 1/2] Return bytes transferred for partial direct I/O
@ 2018-01-29 14:57 Goldwyn Rodrigues
2018-01-29 14:57 ` [PATCH 2/2] xfs: remove assert to check bytes returned Goldwyn Rodrigues
2018-01-29 19:04 ` [PATCH v6 1/2] Return bytes transferred for partial direct I/O Randy Dunlap
0 siblings, 2 replies; 5+ messages in thread
From: Goldwyn Rodrigues @ 2018-01-29 14:57 UTC (permalink / raw)
To: linux-fsdevel; +Cc: linux-block, linux-xfs, axboe, ak, Goldwyn Rodrigues
From: Goldwyn Rodrigues <rgoldwyn@suse.com>
In case direct I/O encounters an error midway, it returns the error.
Instead it should be returning the number of bytes transferred so far.
Test case for filesystems (with ENOSPC):
1. Create an almost full filesystem
2. Create a file, say /mnt/lastfile, until the filesystem is full.
3. Direct write() with count > sizeof /mnt/lastfile.
Result: write() returns -ENOSPC. However, file content has data written
in step 3.
Added a sysctl entry: dio_short_writes which is on by default. This is
to support applications which expect either and error or the bytes submitted
as a return value for the write calls.
This fixes fstest generic/472.
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
---
Changes since v1:
- incorporated iomap and block devices
Changes since v2:
- realized that file size was not increasing when performing a (partial)
direct I/O because end_io function was receiving the error instead of
size. Fixed.
Changes since v3:
- [hch] initialize transferred with dio->size and use transferred instead
of dio->size.
Changes since v4:
- Refreshed to v4.14
Changes since v5:
- Added /proc/sys/fs/dio_short_writes (default 1) to guard older applications
which expect write(fd, buf, count) returns either count or error.
Documentation/sysctl/fs.txt | 14 ++++++++++++++
fs/block_dev.c | 2 +-
fs/direct-io.c | 7 +++++--
fs/iomap.c | 23 ++++++++++++-----------
include/linux/fs.h | 1 +
kernel/sysctl.c | 9 +++++++++
6 files changed, 42 insertions(+), 14 deletions(-)
diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt
index 6c00c1e2743f..72e213d62511 100644
--- a/Documentation/sysctl/fs.txt
+++ b/Documentation/sysctl/fs.txt
@@ -22,6 +22,7 @@ Currently, these files are in /proc/sys/fs:
- aio-max-nr
- aio-nr
- dentry-state
+- dio_short_writes
- dquot-max
- dquot-nr
- file-max
@@ -76,6 +77,19 @@ dcache isn't pruned yet.
==============================================================
+dio_short_writes:
+
+In case Direct I/O encounters an transient error, it returns
+the errorcode, even if it has performed part of the write.
+This flag, if on (default), will return the number of bytes written
+so far, as the write(2) symantics are. However, some older applications
+still consider a direct write as an error if all of the I/O
+submitted is not complete. ie write(file, count, buf) != count.
+This option can be disabled on systems in order to support
+existing applications which do not expect short writes.
+
+==============================================================
+
dquot-max & dquot-nr:
The file dquot-max shows the maximum number of cached disk
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 4a181fcb5175..49d94360bb51 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -409,7 +409,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
if (!ret)
ret = blk_status_to_errno(dio->bio.bi_status);
- if (likely(!ret))
+ if (likely(dio->size))
ret = dio->size;
bio_put(&dio->bio);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 3aafb3343a65..9bd15be64c25 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -151,6 +151,7 @@ struct dio {
} ____cacheline_aligned_in_smp;
static struct kmem_cache *dio_cache __read_mostly;
+unsigned int sysctl_dio_short_writes = 1;
/*
* How many pages are in the queue?
@@ -262,7 +263,7 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags)
ret = dio->page_errors;
if (ret == 0)
ret = dio->io_error;
- if (ret == 0)
+ if (!sysctl_dio_short_writes && (ret == 0))
ret = transferred;
if (dio->end_io) {
@@ -310,7 +311,9 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags)
}
kmem_cache_free(dio_cache, dio);
- return ret;
+ if (!sysctl_dio_short_writes)
+ return ret;
+ return transferred ? transferred : ret;
}
static void dio_aio_complete_work(struct work_struct *work)
diff --git a/fs/iomap.c b/fs/iomap.c
index 47d29ccffaef..a8d6908dc0de 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -716,23 +716,24 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
struct kiocb *iocb = dio->iocb;
struct inode *inode = file_inode(iocb->ki_filp);
loff_t offset = iocb->ki_pos;
- ssize_t ret;
+ ssize_t err;
+ ssize_t transferred = dio->size;
if (dio->end_io) {
- ret = dio->end_io(iocb,
- dio->error ? dio->error : dio->size,
- dio->flags);
+ err = dio->end_io(iocb,
+ (transferred && sysctl_dio_short_writes) ?
+ transferred : dio->error,
+ dio->flags);
} else {
- ret = dio->error;
+ err = dio->error;
}
- if (likely(!ret)) {
- ret = dio->size;
+ if (likely(transferred)) {
/* check for short read */
- if (offset + ret > dio->i_size &&
+ if (offset + transferred > dio->i_size &&
!(dio->flags & IOMAP_DIO_WRITE))
- ret = dio->i_size - offset;
- iocb->ki_pos += ret;
+ transferred = dio->i_size - offset;
+ iocb->ki_pos += transferred;
}
/*
@@ -759,7 +760,7 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
inode_dio_end(file_inode(iocb->ki_filp));
kfree(dio);
- return ret;
+ return (transferred && sysctl_dio_short_writes) ? transferred : err;
}
static void iomap_dio_complete_work(struct work_struct *work)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 511fbaabf624..a25652e5ae1b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1469,6 +1469,7 @@ static inline void i_gid_write(struct inode *inode, gid_t gid)
}
extern struct timespec current_time(struct inode *inode);
+extern unsigned int sysctl_dio_short_writes;
/*
* Snapshotting support.
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 557d46728577..362a9c3156f1 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1844,6 +1844,15 @@ static struct ctl_table fs_table[] = {
.proc_handler = proc_dointvec_minmax,
.extra1 = &one,
},
+ {
+ .procname = "dio_short_writes",
+ .data = &sysctl_dio_short_writes,
+ .maxlen = sizeof(int),
+ .mode = 0600,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
{ }
};
--
2.16.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 2/2] xfs: remove assert to check bytes returned
2018-01-29 14:57 [PATCH v6 1/2] Return bytes transferred for partial direct I/O Goldwyn Rodrigues
@ 2018-01-29 14:57 ` Goldwyn Rodrigues
2018-01-29 18:50 ` Jens Axboe
2018-01-29 19:04 ` [PATCH v6 1/2] Return bytes transferred for partial direct I/O Randy Dunlap
1 sibling, 1 reply; 5+ messages in thread
From: Goldwyn Rodrigues @ 2018-01-29 14:57 UTC (permalink / raw)
To: linux-fsdevel; +Cc: linux-block, linux-xfs, axboe, ak, Goldwyn Rodrigues
From: Goldwyn Rodrigues <rgoldwyn@suse.com>
Since we can return less than count in case of partial direct
writes, remove the ASSERT.
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Acked-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
---
fs/xfs/xfs_file.c | 6 ------
1 file changed, 6 deletions(-)
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 8601275cc5e6..8fc4dbf66910 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -590,12 +590,6 @@ xfs_file_dio_aio_write(
ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, xfs_dio_write_end_io);
out:
xfs_iunlock(ip, iolock);
-
- /*
- * No fallback to buffered IO on errors for XFS, direct IO will either
- * complete fully or fail.
- */
- ASSERT(ret < 0 || ret == count);
return ret;
}
--
2.16.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH 2/2] xfs: remove assert to check bytes returned
2018-01-29 14:57 ` [PATCH 2/2] xfs: remove assert to check bytes returned Goldwyn Rodrigues
@ 2018-01-29 18:50 ` Jens Axboe
0 siblings, 0 replies; 5+ messages in thread
From: Jens Axboe @ 2018-01-29 18:50 UTC (permalink / raw)
To: Goldwyn Rodrigues, linux-fsdevel
Cc: linux-block, linux-xfs, ak, Goldwyn Rodrigues
On 1/29/18 7:57 AM, Goldwyn Rodrigues wrote:
> From: Goldwyn Rodrigues <rgoldwyn@suse.com>
>
> Since we can return less than count in case of partial direct
> writes, remove the ASSERT.
>
> Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
> Acked-by: Dave Chinner <dchinner@redhat.com>
> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Minot nit - you should probably reorder this patch before the other one,
otherwise you'll have a point in the tree where it can knowingly
trigger.
--
Jens Axboe
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH v6 1/2] Return bytes transferred for partial direct I/O
2018-01-29 14:57 [PATCH v6 1/2] Return bytes transferred for partial direct I/O Goldwyn Rodrigues
2018-01-29 14:57 ` [PATCH 2/2] xfs: remove assert to check bytes returned Goldwyn Rodrigues
@ 2018-01-29 19:04 ` Randy Dunlap
2018-01-30 17:31 ` Goldwyn Rodrigues
1 sibling, 1 reply; 5+ messages in thread
From: Randy Dunlap @ 2018-01-29 19:04 UTC (permalink / raw)
To: Goldwyn Rodrigues, linux-fsdevel
Cc: linux-block, linux-xfs, axboe, ak, Goldwyn Rodrigues
On 01/29/2018 06:57 AM, Goldwyn Rodrigues wrote:
> From: Goldwyn Rodrigues <rgoldwyn@suse.com>
>
> diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt
> index 6c00c1e2743f..72e213d62511 100644
> --- a/Documentation/sysctl/fs.txt
> +++ b/Documentation/sysctl/fs.txt
> @@ -76,6 +77,19 @@ dcache isn't pruned yet.
>
> ==============================================================
>
> +dio_short_writes:
> +
> +In case Direct I/O encounters an transient error, it returns
a transient
> +the errorcode, even if it has performed part of the write.
error code,
> +This flag, if on (default), will return the number of bytes written
> +so far, as the write(2) symantics are. However, some older applications
semantics
> +still consider a direct write as an error if all of the I/O
> +submitted is not complete. ie write(file, count, buf) != count.
I.e.
> +This option can be disabled on systems in order to support
> +existing applications which do not expect short writes.
and if my system has a mix of older applications and new ones,
will they all work just fine?
thanks,
--
~Randy
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH v6 1/2] Return bytes transferred for partial direct I/O
2018-01-29 19:04 ` [PATCH v6 1/2] Return bytes transferred for partial direct I/O Randy Dunlap
@ 2018-01-30 17:31 ` Goldwyn Rodrigues
0 siblings, 0 replies; 5+ messages in thread
From: Goldwyn Rodrigues @ 2018-01-30 17:31 UTC (permalink / raw)
To: Randy Dunlap, linux-fsdevel
Cc: linux-block, linux-xfs, axboe, ak, Goldwyn Rodrigues
On 01/29/2018 01:04 PM, Randy Dunlap wrote:
> On 01/29/2018 06:57 AM, Goldwyn Rodrigues wrote:
>> From: Goldwyn Rodrigues <rgoldwyn@suse.com>
>>
>
>> diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt
>> index 6c00c1e2743f..72e213d62511 100644
>> --- a/Documentation/sysctl/fs.txt
>> +++ b/Documentation/sysctl/fs.txt
>> @@ -76,6 +77,19 @@ dcache isn't pruned yet.
>>
>> ==============================================================
>>
>> +dio_short_writes:
>> +
>> +In case Direct I/O encounters an transient error, it returns
>
> a transient
>
>> +the errorcode, even if it has performed part of the write.
>
> error code,
>
>> +This flag, if on (default), will return the number of bytes written
>> +so far, as the write(2) symantics are. However, some older applications
>
> semantics
>
>> +still consider a direct write as an error if all of the I/O
>> +submitted is not complete. ie write(file, count, buf) != count.
>
> I.e.
>
>> +This option can be disabled on systems in order to support
>> +existing applications which do not expect short writes.
Thanks for the comments. I will incorporate the language changes.
>
> and if my system has a mix of older applications and new ones,
> will they all work just fine?
>
Newer applications should treat the error as nothing is written.
But yes, I tried doing it through prctl for an individual processes, but
did not find a free bit to stick it in.
--
Goldwyn
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2018-01-30 17:31 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-01-29 14:57 [PATCH v6 1/2] Return bytes transferred for partial direct I/O Goldwyn Rodrigues
2018-01-29 14:57 ` [PATCH 2/2] xfs: remove assert to check bytes returned Goldwyn Rodrigues
2018-01-29 18:50 ` Jens Axboe
2018-01-29 19:04 ` [PATCH v6 1/2] Return bytes transferred for partial direct I/O Randy Dunlap
2018-01-30 17:31 ` Goldwyn Rodrigues
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).