Linux network filesystem support library
 help / color / mirror / Atom feed
* [PATCH v2] erofs: support direct IO for ondemand mode
@ 2024-07-26  2:06 Hongbo Li
  2024-07-29 12:09 ` Jingbo Xu
  0 siblings, 1 reply; 3+ messages in thread
From: Hongbo Li @ 2024-07-26  2:06 UTC (permalink / raw)
  To: xiang, chao, huyue2, jefflexu, dhavale, dhowells
  Cc: linux-erofs, netfs, lihongbo22

erofs over fscache cannot handle the direct read io. When the file
is opened with O_DIRECT flag, -EINVAL will reback. We support the
DIO in erofs over fscache by bypassing the erofs page cache and
reading target data into ubuf from fscache's file directly.

The alignment for buffer memory, offset and size now is restricted
by erofs, since `i_blocksize` is enough for the under filesystems.

Signed-off-by: Hongbo Li <lihongbo22@huawei.com>

---
v2:
  - Change the directIO helper name to erofs_fscache_direct_io
  - Add some io interception when begin direct io
  - Optimize the direct io logical in erofs_fscache_data_read_slice

v1:
  - https://lore.kernel.org/linux-erofs/6f1a5c1c-d44d-4561-9377-b935ff4531f2@huawei.com/T/#t
---
 fs/erofs/data.c    |  3 ++
 fs/erofs/fscache.c | 84 +++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 78 insertions(+), 9 deletions(-)

diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index 8be60797ea2f..dbfafe358de4 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -391,6 +391,9 @@ static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 		     iov_iter_alignment(to)) & blksize_mask)
 			return -EINVAL;
 
+		if (erofs_is_fscache_mode(inode->i_sb))
+			return generic_file_read_iter(iocb, to);
+
 		return iomap_dio_rw(iocb, to, &erofs_iomap_ops,
 				    NULL, 0, NULL, 0);
 	}
diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c
index fda16eedafb5..e3e6c579eb81 100644
--- a/fs/erofs/fscache.c
+++ b/fs/erofs/fscache.c
@@ -35,6 +35,8 @@ struct erofs_fscache_io {
 
 struct erofs_fscache_rq {
 	struct address_space	*mapping;	/* The mapping being accessed */
+	struct iov_iter		*iter;		/* Destination for direct io */
+	struct completion	done;		/* Sync for direct io */
 	loff_t			start;		/* Start position */
 	size_t			len;		/* Length of the request */
 	size_t			submitted;	/* Length of submitted */
@@ -76,7 +78,11 @@ static void erofs_fscache_req_put(struct erofs_fscache_rq *req)
 {
 	if (!refcount_dec_and_test(&req->ref))
 		return;
-	erofs_fscache_req_complete(req);
+
+	if (req->iter)
+		complete(&req->done);
+	else
+		erofs_fscache_req_complete(req);
 	kfree(req);
 }
 
@@ -88,6 +94,7 @@ static struct erofs_fscache_rq *erofs_fscache_req_alloc(struct address_space *ma
 	if (!req)
 		return NULL;
 	req->mapping = mapping;
+	req->iter = NULL;
 	req->start = start;
 	req->len = len;
 	refcount_set(&req->ref, 1);
@@ -258,6 +265,7 @@ static int erofs_fscache_data_read_slice(struct erofs_fscache_rq *req)
 	struct address_space *mapping = req->mapping;
 	struct inode *inode = mapping->host;
 	struct super_block *sb = inode->i_sb;
+	struct iov_iter *dest_iter = req->iter;
 	struct erofs_fscache_io *io;
 	struct erofs_map_blocks map;
 	struct erofs_map_dev mdev;
@@ -270,33 +278,45 @@ static int erofs_fscache_data_read_slice(struct erofs_fscache_rq *req)
 	if (ret)
 		return ret;
 
+	count = req->len - req->submitted;
 	if (map.m_flags & EROFS_MAP_META) {
 		struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
 		struct iov_iter iter;
-		size_t size = map.m_llen;
+		struct iov_iter *iterp;
+		size_t size = min_t(size_t, map.m_llen, count);
 		void *src;
+		ssize_t filled = 0;
 
 		src = erofs_read_metabuf(&buf, sb, map.m_pa, EROFS_KMAP);
 		if (IS_ERR(src))
 			return PTR_ERR(src);
 
 		iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, PAGE_SIZE);
-		if (copy_to_iter(src, size, &iter) != size) {
+		iterp = (dest_iter) ?: &iter;
+		if (copy_to_iter(src, size, iterp) != size) {
 			erofs_put_metabuf(&buf);
 			return -EFAULT;
 		}
-		iov_iter_zero(PAGE_SIZE - size, &iter);
+
+		filled += size;
+		if (!dest_iter) {
+			iov_iter_zero(PAGE_SIZE - size, iterp);
+			filled += (PAGE_SIZE - size);
+		}
+
 		erofs_put_metabuf(&buf);
-		req->submitted += PAGE_SIZE;
+		req->submitted += filled;
 		return 0;
 	}
 
-	count = req->len - req->submitted;
 	if (!(map.m_flags & EROFS_MAP_MAPPED)) {
 		struct iov_iter iter;
 
-		iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, count);
-		iov_iter_zero(count, &iter);
+		if (!dest_iter) {
+			iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, count);
+			iov_iter_zero(count, &iter);
+		}
+
 		req->submitted += count;
 		return 0;
 	}
@@ -315,9 +335,17 @@ static int erofs_fscache_data_read_slice(struct erofs_fscache_rq *req)
 	io = erofs_fscache_req_io_alloc(req);
 	if (!io)
 		return -ENOMEM;
-	iov_iter_xarray(&io->iter, ITER_DEST, &mapping->i_pages, pos, count);
+
+	if (dest_iter)
+		iov_iter_ubuf(&io->iter, ITER_DEST,
+			dest_iter->ubuf + dest_iter->iov_offset, count);
+	else
+		iov_iter_xarray(&io->iter, ITER_DEST, &mapping->i_pages, pos, count);
+
 	ret = erofs_fscache_read_io_async(mdev.m_fscache->cookie,
 			mdev.m_pa + (pos - map.m_la), io);
+	if (dest_iter)
+		iov_iter_advance(dest_iter, io->iter.iov_offset);
 	erofs_fscache_req_io_put(io);
 
 	req->submitted += count;
@@ -373,6 +401,43 @@ static void erofs_fscache_readahead(struct readahead_control *rac)
 	erofs_fscache_req_put(req);
 }
 
+static ssize_t erofs_fscache_direct_io(struct kiocb *iocb, struct iov_iter *iter)
+{
+	struct file *file = iocb->ki_filp;
+	size_t count = iov_iter_count(iter);
+	size_t i_size = i_size_read(file->f_inode);
+	struct erofs_fscache_rq *req;
+	struct completion *ctr;
+	ssize_t rsize;
+	int ret;
+
+	if (unlikely(iov_iter_rw(iter) == WRITE))
+		return -EROFS;
+
+	if (unlikely(iocb->ki_pos >= i_size))
+		return 0;
+
+	if (count + iocb->ki_pos > i_size)
+		count = i_size - iocb->ki_pos;
+
+	if (!count)
+		return 0;
+
+	req = erofs_fscache_req_alloc(file->f_mapping, iocb->ki_pos, count);
+	if (!req)
+		return -ENOMEM;
+
+	req->iter = iter;
+	init_completion(&req->done);
+	ctr = &req->done;
+	ret = erofs_fscache_data_read(req);
+	rsize = (ret == 0) ? (ssize_t)req->submitted : ret;
+	erofs_fscache_req_put(req);
+	wait_for_completion(ctr);
+
+	return rsize;
+}
+
 static const struct address_space_operations erofs_fscache_meta_aops = {
 	.read_folio = erofs_fscache_meta_read_folio,
 };
@@ -380,6 +445,7 @@ static const struct address_space_operations erofs_fscache_meta_aops = {
 const struct address_space_operations erofs_fscache_access_aops = {
 	.read_folio = erofs_fscache_read_folio,
 	.readahead = erofs_fscache_readahead,
+	.direct_IO = erofs_fscache_direct_io,
 };
 
 static void erofs_fscache_domain_put(struct erofs_domain *domain)
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH v2] erofs: support direct IO for ondemand mode
  2024-07-26  2:06 [PATCH v2] erofs: support direct IO for ondemand mode Hongbo Li
@ 2024-07-29 12:09 ` Jingbo Xu
  2024-07-29 13:42   ` Hongbo Li
  0 siblings, 1 reply; 3+ messages in thread
From: Jingbo Xu @ 2024-07-29 12:09 UTC (permalink / raw)
  To: Hongbo Li, xiang, chao, huyue2, dhavale, dhowells; +Cc: linux-erofs, netfs



On 7/26/24 10:06 AM, Hongbo Li wrote:
> erofs over fscache cannot handle the direct read io. When the file
> is opened with O_DIRECT flag, -EINVAL will reback. We support the
> DIO in erofs over fscache by bypassing the erofs page cache and
> reading target data into ubuf from fscache's file directly.
> 
> The alignment for buffer memory, offset and size now is restricted
> by erofs, since `i_blocksize` is enough for the under filesystems.
> 
> Signed-off-by: Hongbo Li <lihongbo22@huawei.com>
> 
> ---
> v2:
>   - Change the directIO helper name to erofs_fscache_direct_io
>   - Add some io interception when begin direct io
>   - Optimize the direct io logical in erofs_fscache_data_read_slice
> 
> v1:
>   - https://lore.kernel.org/linux-erofs/6f1a5c1c-d44d-4561-9377-b935ff4531f2@huawei.com/T/#t
> ---
>  fs/erofs/data.c    |  3 ++
>  fs/erofs/fscache.c | 84 +++++++++++++++++++++++++++++++++++++++++-----
>  2 files changed, 78 insertions(+), 9 deletions(-)
> 
> diff --git a/fs/erofs/data.c b/fs/erofs/data.c
> index 8be60797ea2f..dbfafe358de4 100644
> --- a/fs/erofs/data.c
> +++ b/fs/erofs/data.c
> @@ -391,6 +391,9 @@ static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
>  		     iov_iter_alignment(to)) & blksize_mask)
>  			return -EINVAL;
>  
> +		if (erofs_is_fscache_mode(inode->i_sb))
> +			return generic_file_read_iter(iocb, to);
> +
>  		return iomap_dio_rw(iocb, to, &erofs_iomap_ops,
>  				    NULL, 0, NULL, 0);
>  	}
> diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c
> index fda16eedafb5..e3e6c579eb81 100644
> --- a/fs/erofs/fscache.c
> +++ b/fs/erofs/fscache.c
> @@ -35,6 +35,8 @@ struct erofs_fscache_io {
>  
>  struct erofs_fscache_rq {
>  	struct address_space	*mapping;	/* The mapping being accessed */
> +	struct iov_iter		*iter;		/* Destination for direct io */
> +	struct completion	done;		/* Sync for direct io */
>  	loff_t			start;		/* Start position */
>  	size_t			len;		/* Length of the request */
>  	size_t			submitted;	/* Length of submitted */
> @@ -76,7 +78,11 @@ static void erofs_fscache_req_put(struct erofs_fscache_rq *req)
>  {
>  	if (!refcount_dec_and_test(&req->ref))
>  		return;
> -	erofs_fscache_req_complete(req);
> +
> +	if (req->iter)
> +		complete(&req->done);
> +	else
> +		erofs_fscache_req_complete(req);
>  	kfree(req);
>  }
>  
> @@ -88,6 +94,7 @@ static struct erofs_fscache_rq *erofs_fscache_req_alloc(struct address_space *ma
>  	if (!req)
>  		return NULL;
>  	req->mapping = mapping;
> +	req->iter = NULL;
>  	req->start = start;
>  	req->len = len;
>  	refcount_set(&req->ref, 1);
> @@ -258,6 +265,7 @@ static int erofs_fscache_data_read_slice(struct erofs_fscache_rq *req)
>  	struct address_space *mapping = req->mapping;
>  	struct inode *inode = mapping->host;
>  	struct super_block *sb = inode->i_sb;
> +	struct iov_iter *dest_iter = req->iter;
>  	struct erofs_fscache_io *io;
>  	struct erofs_map_blocks map;
>  	struct erofs_map_dev mdev;
> @@ -270,33 +278,45 @@ static int erofs_fscache_data_read_slice(struct erofs_fscache_rq *req)
>  	if (ret)
>  		return ret;
>  
> +	count = req->len - req->submitted;
>  	if (map.m_flags & EROFS_MAP_META) {
>  		struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
>  		struct iov_iter iter;
> -		size_t size = map.m_llen;
> +		struct iov_iter *iterp;
> +		size_t size = min_t(size_t, map.m_llen, count);
>  		void *src;
> +		ssize_t filled = 0;
>  
>  		src = erofs_read_metabuf(&buf, sb, map.m_pa, EROFS_KMAP);
>  		if (IS_ERR(src))
>  			return PTR_ERR(src);
>  
>  		iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, PAGE_SIZE);
> -		if (copy_to_iter(src, size, &iter) != size) {
> +		iterp = (dest_iter) ?: &iter;

Could we also reuse erofs_fscache_rq->iter for the non-direct-IO case?
That is, initializing erofs_fscache_rq->iter before calling
erofs_fscache_data_read() just like what erofs_fscache_direct_io() does,
so that we don't need disturbing the iter here inside
erofs_fscache_data_read_slice().

> +		if (copy_to_iter(src, size, iterp) != size) {
>  			erofs_put_metabuf(&buf);
>  			return -EFAULT;
>  		}
> -		iov_iter_zero(PAGE_SIZE - size, &iter);
> +
> +		filled += size;
> +		if (!dest_iter) {
> +			iov_iter_zero(PAGE_SIZE - size, iterp);
> +			filled += (PAGE_SIZE - size);
> +		}
> +

Don't we need also zeroing the remaining part for direct IO, e.g. when
direct reading the last inlined part of a file of
EROFS_INODE_FLAT_INLINE layout?

>  		erofs_put_metabuf(&buf);
> -		req->submitted += PAGE_SIZE;
> +		req->submitted += filled;
>  		return 0;
>  	}
>  
> -	count = req->len - req->submitted;
>  	if (!(map.m_flags & EROFS_MAP_MAPPED)) {
>  		struct iov_iter iter;
>  
> -		iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, count);
> -		iov_iter_zero(count, &iter);
> +		if (!dest_iter) {
> +			iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, count);
> +			iov_iter_zero(count, &iter);
> +		}
> +

Ditto.  Don't we need handling the unmapped case for direct IO, e.g.
when direct reading a sparse file?


>  		req->submitted += count;
>  		return 0;
>  	}
> @@ -315,9 +335,17 @@ static int erofs_fscache_data_read_slice(struct erofs_fscache_rq *req)
>  	io = erofs_fscache_req_io_alloc(req);
>  	if (!io)
>  		return -ENOMEM;
> -	iov_iter_xarray(&io->iter, ITER_DEST, &mapping->i_pages, pos, count);
> +
> +	if (dest_iter)
> +		iov_iter_ubuf(&io->iter, ITER_DEST,
> +			dest_iter->ubuf + dest_iter->iov_offset, count);
> +	else
> +		iov_iter_xarray(&io->iter, ITER_DEST, &mapping->i_pages, pos, count);
> +
>  	ret = erofs_fscache_read_io_async(mdev.m_fscache->cookie,
>  			mdev.m_pa + (pos - map.m_la), io);
> +	if (dest_iter)
> +		iov_iter_advance(dest_iter, io->iter.iov_offset);
>  	erofs_fscache_req_io_put(io);
>  
>  	req->submitted += count;
> @@ -373,6 +401,43 @@ static void erofs_fscache_readahead(struct readahead_control *rac)
>  	erofs_fscache_req_put(req);
>  }
>  
> +static ssize_t erofs_fscache_direct_io(struct kiocb *iocb, struct iov_iter *iter)
> +{
> +	struct file *file = iocb->ki_filp;
> +	size_t count = iov_iter_count(iter);
> +	size_t i_size = i_size_read(file->f_inode);
> +	struct erofs_fscache_rq *req;
> +	struct completion *ctr;
> +	ssize_t rsize;
> +	int ret;
> +
> +	if (unlikely(iov_iter_rw(iter) == WRITE))
> +		return -EROFS;

We will get here by no way since .write_iter() is not implemented at
all, right?

> +
> +	if (unlikely(iocb->ki_pos >= i_size))
> +		return 0;
> +
> +	if (count + iocb->ki_pos > i_size)
> +		count = i_size - iocb->ki_pos;
> +
> +	if (!count)
> +		return 0;
> +
> +	req = erofs_fscache_req_alloc(file->f_mapping, iocb->ki_pos, count);
> +	if (!req)
> +		return -ENOMEM;
> +
> +	req->iter = iter;
> +	init_completion(&req->done);
> +	ctr = &req->done;
> +	ret = erofs_fscache_data_read(req);
> +	rsize = (ret == 0) ? (ssize_t)req->submitted : ret;
> +	erofs_fscache_req_put(req);

If we get error in erofs_fscache_data_read(), the above
erofs_fscache_req_put() will put the last reference of erofs_fscache_rq
and free the erofs_fscache_rq structure.  Then the following
wait_for_completion() will cause UAF.

> +	wait_for_completion(ctr);
> +
> +	return rsize;
> +}
> +
>  static const struct address_space_operations erofs_fscache_meta_aops = {
>  	.read_folio = erofs_fscache_meta_read_folio,
>  };
> @@ -380,6 +445,7 @@ static const struct address_space_operations erofs_fscache_meta_aops = {
>  const struct address_space_operations erofs_fscache_access_aops = {
>  	.read_folio = erofs_fscache_read_folio,
>  	.readahead = erofs_fscache_readahead,
> +	.direct_IO = erofs_fscache_direct_io,
>  };
>  
>  static void erofs_fscache_domain_put(struct erofs_domain *domain)

-- 
Thanks,
Jingbo

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v2] erofs: support direct IO for ondemand mode
  2024-07-29 12:09 ` Jingbo Xu
@ 2024-07-29 13:42   ` Hongbo Li
  0 siblings, 0 replies; 3+ messages in thread
From: Hongbo Li @ 2024-07-29 13:42 UTC (permalink / raw)
  To: Jingbo Xu, xiang, chao, huyue2, dhavale, dhowells; +Cc: linux-erofs, netfs



On 2024/7/29 20:09, Jingbo Xu wrote:
> 
> 
> On 7/26/24 10:06 AM, Hongbo Li wrote:
>> erofs over fscache cannot handle the direct read io. When the file
>> is opened with O_DIRECT flag, -EINVAL will reback. We support the
>> DIO in erofs over fscache by bypassing the erofs page cache and
>> reading target data into ubuf from fscache's file directly.
>>
>> The alignment for buffer memory, offset and size now is restricted
>> by erofs, since `i_blocksize` is enough for the under filesystems.
>>
>> Signed-off-by: Hongbo Li <lihongbo22@huawei.com>
>>
>> ---
>> v2:
>>    - Change the directIO helper name to erofs_fscache_direct_io
>>    - Add some io interception when begin direct io
>>    - Optimize the direct io logical in erofs_fscache_data_read_slice
>>
>> v1:
>>    - https://lore.kernel.org/linux-erofs/6f1a5c1c-d44d-4561-9377-b935ff4531f2@huawei.com/T/#t
>> ---
>>   fs/erofs/data.c    |  3 ++
>>   fs/erofs/fscache.c | 84 +++++++++++++++++++++++++++++++++++++++++-----
>>   2 files changed, 78 insertions(+), 9 deletions(-)
>>
>> diff --git a/fs/erofs/data.c b/fs/erofs/data.c
>> index 8be60797ea2f..dbfafe358de4 100644
>> --- a/fs/erofs/data.c
>> +++ b/fs/erofs/data.c
>> @@ -391,6 +391,9 @@ static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
>>   		     iov_iter_alignment(to)) & blksize_mask)
>>   			return -EINVAL;
>>   
>> +		if (erofs_is_fscache_mode(inode->i_sb))
>> +			return generic_file_read_iter(iocb, to);
>> +
>>   		return iomap_dio_rw(iocb, to, &erofs_iomap_ops,
>>   				    NULL, 0, NULL, 0);
>>   	}
>> diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c
>> index fda16eedafb5..e3e6c579eb81 100644
>> --- a/fs/erofs/fscache.c
>> +++ b/fs/erofs/fscache.c
>> @@ -35,6 +35,8 @@ struct erofs_fscache_io {
>>   
>>   struct erofs_fscache_rq {
>>   	struct address_space	*mapping;	/* The mapping being accessed */
>> +	struct iov_iter		*iter;		/* Destination for direct io */
>> +	struct completion	done;		/* Sync for direct io */
>>   	loff_t			start;		/* Start position */
>>   	size_t			len;		/* Length of the request */
>>   	size_t			submitted;	/* Length of submitted */
>> @@ -76,7 +78,11 @@ static void erofs_fscache_req_put(struct erofs_fscache_rq *req)
>>   {
>>   	if (!refcount_dec_and_test(&req->ref))
>>   		return;
>> -	erofs_fscache_req_complete(req);
>> +
>> +	if (req->iter)
>> +		complete(&req->done);
>> +	else
>> +		erofs_fscache_req_complete(req);
>>   	kfree(req);
>>   }
>>   
>> @@ -88,6 +94,7 @@ static struct erofs_fscache_rq *erofs_fscache_req_alloc(struct address_space *ma
>>   	if (!req)
>>   		return NULL;
>>   	req->mapping = mapping;
>> +	req->iter = NULL;
>>   	req->start = start;
>>   	req->len = len;
>>   	refcount_set(&req->ref, 1);
>> @@ -258,6 +265,7 @@ static int erofs_fscache_data_read_slice(struct erofs_fscache_rq *req)
>>   	struct address_space *mapping = req->mapping;
>>   	struct inode *inode = mapping->host;
>>   	struct super_block *sb = inode->i_sb;
>> +	struct iov_iter *dest_iter = req->iter;
>>   	struct erofs_fscache_io *io;
>>   	struct erofs_map_blocks map;
>>   	struct erofs_map_dev mdev;
>> @@ -270,33 +278,45 @@ static int erofs_fscache_data_read_slice(struct erofs_fscache_rq *req)
>>   	if (ret)
>>   		return ret;
>>   
>> +	count = req->len - req->submitted;
>>   	if (map.m_flags & EROFS_MAP_META) {
>>   		struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
>>   		struct iov_iter iter;
>> -		size_t size = map.m_llen;
>> +		struct iov_iter *iterp;
>> +		size_t size = min_t(size_t, map.m_llen, count);
>>   		void *src;
>> +		ssize_t filled = 0;
>>   
>>   		src = erofs_read_metabuf(&buf, sb, map.m_pa, EROFS_KMAP);
>>   		if (IS_ERR(src))
>>   			return PTR_ERR(src);
>>   
>>   		iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, PAGE_SIZE);
>> -		if (copy_to_iter(src, size, &iter) != size) {
>> +		iterp = (dest_iter) ?: &iter;
> 
> Could we also reuse erofs_fscache_rq->iter for the non-direct-IO case?
> That is, initializing erofs_fscache_rq->iter before calling
> erofs_fscache_data_read() just like what erofs_fscache_direct_io() does,
> so that we don't need disturbing the iter here inside
> erofs_fscache_data_read_slice().
> 
>> +		if (copy_to_iter(src, size, iterp) != size) {
>>   			erofs_put_metabuf(&buf);
>>   			return -EFAULT;
>>   		}
>> -		iov_iter_zero(PAGE_SIZE - size, &iter);
>> +
>> +		filled += size;
>> +		if (!dest_iter) {
>> +			iov_iter_zero(PAGE_SIZE - size, iterp);
>> +			filled += (PAGE_SIZE - size);
>> +		}
>> +
> 
> Don't we need also zeroing the remaining part for direct IO, e.g. when
> direct reading the last inlined part of a file of
> EROFS_INODE_FLAT_INLINE layout?
> 
When direct IO, iter pointer the userspace buffer. When direct reading 
the last inlined part of a file, only the target length of buffer (valid 
data) can be written. If we zeroing the remaining part, we may read the 
extra data that not below to this file. This is different with copying 
data to page cache.
>>   		erofs_put_metabuf(&buf);
>> -		req->submitted += PAGE_SIZE;
>> +		req->submitted += filled;
>>   		return 0;
>>   	}
>>   
>> -	count = req->len - req->submitted;
>>   	if (!(map.m_flags & EROFS_MAP_MAPPED)) {
>>   		struct iov_iter iter;
>>   
>> -		iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, count);
>> -		iov_iter_zero(count, &iter);
>> +		if (!dest_iter) {
>> +			iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, count);
>> +			iov_iter_zero(count, &iter);
>> +		}
>> +
> 
> Ditto.  Don't we need handling the unmapped case for direct IO, e.g.
> when direct reading a sparse file?
> 
Do you mean that this flag represents the file with the hole?
 From the code, it seems the file is at the end. Maybe I misunderstood 
this flag.
> 
>>   		req->submitted += count;
>>   		return 0;
>>   	}
>> @@ -315,9 +335,17 @@ static int erofs_fscache_data_read_slice(struct erofs_fscache_rq *req)
>>   	io = erofs_fscache_req_io_alloc(req);
>>   	if (!io)
>>   		return -ENOMEM;
>> -	iov_iter_xarray(&io->iter, ITER_DEST, &mapping->i_pages, pos, count);
>> +
>> +	if (dest_iter)
>> +		iov_iter_ubuf(&io->iter, ITER_DEST,
>> +			dest_iter->ubuf + dest_iter->iov_offset, count);
>> +	else
>> +		iov_iter_xarray(&io->iter, ITER_DEST, &mapping->i_pages, pos, count);
>> +
>>   	ret = erofs_fscache_read_io_async(mdev.m_fscache->cookie,
>>   			mdev.m_pa + (pos - map.m_la), io);
>> +	if (dest_iter)
>> +		iov_iter_advance(dest_iter, io->iter.iov_offset);
>>   	erofs_fscache_req_io_put(io);
>>   
>>   	req->submitted += count;
>> @@ -373,6 +401,43 @@ static void erofs_fscache_readahead(struct readahead_control *rac)
>>   	erofs_fscache_req_put(req);
>>   }
>>   
>> +static ssize_t erofs_fscache_direct_io(struct kiocb *iocb, struct iov_iter *iter)
>> +{
>> +	struct file *file = iocb->ki_filp;
>> +	size_t count = iov_iter_count(iter);
>> +	size_t i_size = i_size_read(file->f_inode);
>> +	struct erofs_fscache_rq *req;
>> +	struct completion *ctr;
>> +	ssize_t rsize;
>> +	int ret;
>> +
>> +	if (unlikely(iov_iter_rw(iter) == WRITE))
>> +		return -EROFS;
> 
> We will get here by no way since .write_iter() is not implemented at
> all, right?
> 
Yeah, you are right.
>> +
>> +	if (unlikely(iocb->ki_pos >= i_size))
>> +		return 0;
>> +
>> +	if (count + iocb->ki_pos > i_size)
>> +		count = i_size - iocb->ki_pos;
>> +
>> +	if (!count)
>> +		return 0;
>> +
>> +	req = erofs_fscache_req_alloc(file->f_mapping, iocb->ki_pos, count);
>> +	if (!req)
>> +		return -ENOMEM;
>> +
>> +	req->iter = iter;
>> +	init_completion(&req->done);
>> +	ctr = &req->done;
>> +	ret = erofs_fscache_data_read(req);
>> +	rsize = (ret == 0) ? (ssize_t)req->submitted : ret;
>> +	erofs_fscache_req_put(req);
> 
> If we get error in erofs_fscache_data_read(), the above
> erofs_fscache_req_put() will put the last reference of erofs_fscache_rq
> and free the erofs_fscache_rq structure.  Then the following
> wait_for_completion() will cause UAF.
Yeah, there is indeed a UAF issue. I thought I could just use the 
temporary variable `ctr` to keep the ref, but is in stack space and 
doesn't work.



Thanks,
Hongbo
> 
>> +	wait_for_completion(ctr);
>> +
>> +	return rsize;
>> +}
>> +
>>   static const struct address_space_operations erofs_fscache_meta_aops = {
>>   	.read_folio = erofs_fscache_meta_read_folio,
>>   };
>> @@ -380,6 +445,7 @@ static const struct address_space_operations erofs_fscache_meta_aops = {
>>   const struct address_space_operations erofs_fscache_access_aops = {
>>   	.read_folio = erofs_fscache_read_folio,
>>   	.readahead = erofs_fscache_readahead,
>> +	.direct_IO = erofs_fscache_direct_io,
>>   };
>>   
>>   static void erofs_fscache_domain_put(struct erofs_domain *domain)
> 

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2024-07-29 13:42 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-07-26  2:06 [PATCH v2] erofs: support direct IO for ondemand mode Hongbo Li
2024-07-29 12:09 ` Jingbo Xu
2024-07-29 13:42   ` Hongbo Li

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox