public inbox for linux-xfs@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Introduce SEEK_DATA/SEEK_HOLE support to XFS V7
@ 2012-02-06 14:40 Jeff Liu
  2012-02-07 14:19 ` Mark Tinguely
  2012-02-08  5:01 ` Dave Chinner
  0 siblings, 2 replies; 5+ messages in thread
From: Jeff Liu @ 2012-02-06 14:40 UTC (permalink / raw)
  To: xfs; +Cc: Christoph Hellwig, Ben Myers, Mark Tinguely, Chris Mason

Hello,

There is one bug fix in this version, in xfs_seek_data()/xfs_seek_hole(), call xfs_bmapi_read() or
xfs_bmap_first_unused() maybe failed, they should return ENXIO in this case.
Thanks Mark for pointing this out!


Signed-off-by: Jie Liu <jeff.liu@oracle.com>

---
 fs/xfs/xfs_file.c |  172 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 171 insertions(+), 1 deletions(-)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 753ed9b..3822b15 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1141,8 +1141,178 @@ xfs_vm_page_mkwrite(
 	return block_page_mkwrite(vma, vmf, xfs_get_blocks);
 }
 
+STATIC loff_t
+xfs_seek_data(
+	struct file		*file,
+	loff_t			start,
+	u32			type)
+{
+	struct inode		*inode = file->f_mapping->host;
+	struct xfs_inode	*ip = XFS_I(inode);
+	struct xfs_mount	*mp = ip->i_mount;
+	loff_t			uninitialized_var(offset);
+	xfs_fsize_t		isize;
+	xfs_fileoff_t		fsbno;
+	xfs_filblks_t		len;
+	uint			lock;
+	int			error;
+
+	lock = xfs_ilock_map_shared(ip);
+
+	isize = i_size_read(inode);
+	if (start >= isize) {
+		error = ENXIO;
+		goto out_unlock;
+	}
+
+	fsbno = XFS_B_TO_FSBT(mp, start);
+	len = XFS_B_TO_FSB(mp, isize);
+	for (;;) {
+		struct xfs_bmbt_irec	map[2];
+		int			nmap = 2;
+		loff_t			seekoff;
+
+		error = xfs_bmapi_read(ip, fsbno, len - fsbno, map, &nmap,
+				       XFS_BMAPI_ENTIRE);
+		if (error) {
+			error = ENXIO;
+			goto out_unlock;
+		}
+
+		/* No extents at given offset, must be beyond EOF */
+		if (nmap == 0) {
+			error = ENXIO;
+			goto out_unlock;
+		}
+
+		seekoff = XFS_FSB_TO_B(mp, fsbno);
+
+		if ((map[0].br_state == XFS_EXT_NORM &&
+		     !isnullstartblock(map[0].br_startblock)) ||
+		    map[0].br_startblock == DELAYSTARTBLOCK) {
+			offset = max_t(loff_t, seekoff,
+				       XFS_FSB_TO_B(mp, map[0].br_startoff));
+			break;
+		} else if (map[0].br_state == XFS_EXT_UNWRITTEN) {
+			offset = max_t(loff_t, seekoff,
+				       XFS_FSB_TO_B(mp, map[0].br_startoff));
+			break;
+		} else if (map[0].br_startblock == HOLESTARTBLOCK) {
+			if (nmap == 1) {
+				error = ENXIO;
+				goto out_unlock;
+			}
+
+			if ((map[1].br_state == XFS_EXT_NORM &&
+			     !isnullstartblock(map[1].br_startblock)) ||
+			    map[1].br_startblock == DELAYSTARTBLOCK) {
+				offset = max_t(loff_t, seekoff,
+					XFS_FSB_TO_B(mp, map[1].br_startoff));
+				break;
+			} else if (map[1].br_state == XFS_EXT_UNWRITTEN) {
+				offset = max_t(loff_t, seekoff,
+					XFS_FSB_TO_B(mp, map[1].br_startoff));
+				break;
+			} else if (map[1].br_startblock == HOLESTARTBLOCK) {
+				fsbno = map[1].br_startoff +
+					map[1].br_blockcount;
+			} else {
+				BUG();
+			}
+		} else {
+			BUG();
+		}
+
+		if (XFS_FSB_TO_B(mp, fsbno) > isize) {
+			error = ENXIO;
+			goto out_unlock;
+		}
+	}
+
+	if (offset < start)
+		offset = start;
+
+	if (offset != file->f_pos)
+		file->f_pos = offset;
+
+out_unlock:
+	xfs_iunlock_map_shared(ip, lock);
+
+	if (error)
+		return -error;
+	return offset;
+}
+
+STATIC loff_t
+xfs_seek_hole(
+	struct file		*file,
+	loff_t			start,
+	u32			type)
+{
+	struct inode		*inode = file->f_mapping->host;
+	struct xfs_inode	*ip = XFS_I(inode);
+	struct xfs_mount	*mp = ip->i_mount;
+	loff_t			uninitialized_var(offset);
+	loff_t			holeoff;
+	xfs_fsize_t		isize;
+	xfs_fileoff_t		fsbno;
+	uint			lock;
+	int			error;
+
+	lock = xfs_ilock_map_shared(ip);
+
+	isize = i_size_read(inode);
+	if (start >= isize) {
+		error = ENXIO;
+		goto out_unlock;
+	}
+
+	fsbno = XFS_B_TO_FSBT(mp, start);
+	error = xfs_bmap_first_unused(NULL, ip, 1, &fsbno, XFS_DATA_FORK);
+	if (error) {
+		error = ENXIO;
+		goto out_unlock;
+	}
+
+	holeoff = XFS_FSB_TO_B(mp, fsbno);
+	if (holeoff <= start)
+		offset = start;
+	else
+		offset = min_t(loff_t, holeoff, isize);
+
+	if (offset != file->f_pos)
+		file->f_pos = offset;
+
+out_unlock:
+	xfs_iunlock_map_shared(ip, lock);
+
+	if (error)
+		return -error;
+	return offset;
+}
+
+STATIC loff_t
+xfs_file_llseek(
+	struct file	*file,
+	loff_t		offset,
+	int		origin)
+{
+	switch (origin) {
+	case SEEK_END:
+	case SEEK_CUR:
+	case SEEK_SET:
+		return generic_file_llseek(file, offset, origin);
+	case SEEK_DATA:
+		return xfs_seek_data(file, offset, origin);
+	case SEEK_HOLE:
+		return xfs_seek_hole(file, offset, origin);
+	default:
+		return -EINVAL;
+	}
+}
+
 const struct file_operations xfs_file_operations = {
-	.llseek		= generic_file_llseek,
+	.llseek		= xfs_file_llseek,
 	.read		= do_sync_read,
 	.write		= do_sync_write,
 	.aio_read	= xfs_file_aio_read,
-- 
1.7.9

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] Introduce SEEK_DATA/SEEK_HOLE support to XFS V7
  2012-02-06 14:40 [PATCH] Introduce SEEK_DATA/SEEK_HOLE support to XFS V7 Jeff Liu
@ 2012-02-07 14:19 ` Mark Tinguely
  2012-02-08  5:01 ` Dave Chinner
  1 sibling, 0 replies; 5+ messages in thread
From: Mark Tinguely @ 2012-02-07 14:19 UTC (permalink / raw)
  To: jeff.liu; +Cc: Christoph Hellwig, Ben Myers, Chris Mason, xfs

On 02/06/12 08:40, Jeff Liu wrote:
> Hello,
>
> There is one bug fix in this version, in xfs_seek_data()/xfs_seek_hole(), call xfs_bmapi_read() or
> xfs_bmap_first_unused() maybe failed, they should return ENXIO in this case.
> Thanks Mark for pointing this out!
>
>
> Signed-off-by: Jie Liu<jeff.liu@oracle.com>
>
> ---
>   fs/xfs/xfs_file.c |  172 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
>   1 files changed, 171 insertions(+), 1 deletions(-)
>
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index 753ed9b..3822b15 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -1141,8 +1141,178 @@ xfs_vm_page_mkwrite(
>   	return block_page_mkwrite(vma, vmf, xfs_get_blocks);


Looks good.

Reviewed-by: Mark Tinguely <tinguely@sgi.com>

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] Introduce SEEK_DATA/SEEK_HOLE support to XFS V7
  2012-02-06 14:40 [PATCH] Introduce SEEK_DATA/SEEK_HOLE support to XFS V7 Jeff Liu
  2012-02-07 14:19 ` Mark Tinguely
@ 2012-02-08  5:01 ` Dave Chinner
  2012-02-08 13:10   ` Jeff Liu
  1 sibling, 1 reply; 5+ messages in thread
From: Dave Chinner @ 2012-02-08  5:01 UTC (permalink / raw)
  To: Jeff Liu; +Cc: Christoph Hellwig, Ben Myers, Mark Tinguely, Chris Mason, xfs

On Mon, Feb 06, 2012 at 10:40:44PM +0800, Jeff Liu wrote:
> Hello,
> 
> There is one bug fix in this version, in xfs_seek_data()/xfs_seek_hole(), call xfs_bmapi_read() or
> xfs_bmap_first_unused() maybe failed, they should return ENXIO in this case.
> Thanks Mark for pointing this out!
> 
> 
> Signed-off-by: Jie Liu <jeff.liu@oracle.com>

Can you post a final version with the real commit message attached?

The normal way of making comments like this about a patch posting is
to put the comments after the first "---" line, like the diffstat is
below....

As it is,my comments are mainly about error handling and putting in
some comments to explain exactly why the code ended up this way....

> ---
>  fs/xfs/xfs_file.c |  172 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 files changed, 171 insertions(+), 1 deletions(-)
> 
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index 753ed9b..3822b15 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -1141,8 +1141,178 @@ xfs_vm_page_mkwrite(
>  	return block_page_mkwrite(vma, vmf, xfs_get_blocks);
>  }
>  
> +STATIC loff_t
> +xfs_seek_data(
> +	struct file		*file,
> +	loff_t			start,
> +	u32			type)
> +{
> +	struct inode		*inode = file->f_mapping->host;
> +	struct xfs_inode	*ip = XFS_I(inode);
> +	struct xfs_mount	*mp = ip->i_mount;
> +	loff_t			uninitialized_var(offset);
> +	xfs_fsize_t		isize;
> +	xfs_fileoff_t		fsbno;
> +	xfs_filblks_t		len;
> +	uint			lock;
> +	int			error;
> +
> +	lock = xfs_ilock_map_shared(ip);
> +
> +	isize = i_size_read(inode);
> +	if (start >= isize) {
> +		error = ENXIO;
> +		goto out_unlock;
> +	}
> +
> +	fsbno = XFS_B_TO_FSBT(mp, start);
> +	len = XFS_B_TO_FSB(mp, isize);

It's not entirely obvious why len is based on isize rather than the
(isize - start), the range being mapped. A comment might be in order
so we don't make silly mistakes reading the code in a couple of
years time.

> +	for (;;) {
> +		struct xfs_bmbt_irec	map[2];
> +		int			nmap = 2;
> +		loff_t			seekoff;
> +
> +		error = xfs_bmapi_read(ip, fsbno, len - fsbno, map, &nmap,
> +				       XFS_BMAPI_ENTIRE);
> +		if (error) {
> +			error = ENXIO;
> +			goto out_unlock;
> +		}

I don't think that is correct. ENXIO means "offset beyond EOF", and
this will typically only return errors due to extent tree corruption
or filesystem shutdown. I'd just return the error as it stands.

> +		/* No extents at given offset, must be beyond EOF */
> +		if (nmap == 0) {
> +			error = ENXIO;
> +			goto out_unlock;
> +		}

But we can't be beyond EOF - we've already checked that. Hence we
should *always* get a mapping back. To fail to get one back is a
sign of extent tree corruption, I think, so this should probably be
a XFS_WANT_CORRUPTED_GOTO() case....

> +		seekoff = XFS_FSB_TO_B(mp, fsbno);
> +
> +		if ((map[0].br_state == XFS_EXT_NORM &&
> +		     !isnullstartblock(map[0].br_startblock)) ||
> +		    map[0].br_startblock == DELAYSTARTBLOCK) {

So this skips holes and unwritten regions.

> +			offset = max_t(loff_t, seekoff,
> +				       XFS_FSB_TO_B(mp, map[0].br_startoff));
> +			break;
> +		} else if (map[0].br_state == XFS_EXT_UNWRITTEN) {
> +			offset = max_t(loff_t, seekoff,
> +				       XFS_FSB_TO_B(mp, map[0].br_startoff));
> +			break;

But unwritten regions have an identical offset caclulation to
delayed and written regions. So that entire piece of logic becomes:

		if (map[0].br_startblock != HOLESTARTBLOCK)) {
			offset = max_t(loff_t, seekoff,
				       XFS_FSB_TO_B(mp, map[0].br_startoff));
			break;
		} else {

A comment might be in order there, too, indicating why we are
handling unwritten regions as data, just like written and delalloc
regions....

> +		} else if (map[0].br_startblock == HOLESTARTBLOCK) {
> +			if (nmap == 1) {
> +				error = ENXIO;
> +				goto out_unlock;
> +			}
> +
> +			if ((map[1].br_state == XFS_EXT_NORM &&
> +			     !isnullstartblock(map[1].br_startblock)) ||
> +			    map[1].br_startblock == DELAYSTARTBLOCK) {
> +				offset = max_t(loff_t, seekoff,
> +					XFS_FSB_TO_B(mp, map[1].br_startoff));
> +				break;
> +			} else if (map[1].br_state == XFS_EXT_UNWRITTEN) {
> +				offset = max_t(loff_t, seekoff,
> +					XFS_FSB_TO_B(mp, map[1].br_startoff));
> +				break;
> +			} else if (map[1].br_startblock == HOLESTARTBLOCK) {
> +				fsbno = map[1].br_startoff +
> +					map[1].br_blockcount;

Same again:
			if (map[1].br_startblock != HOLESTARTBLOCK) {
				offset = max_t(loff_t, seekoff
					XFS_FSB_TO_B(mp, map[1].br_startoff));
				break;
			} else {
				fsbno = map[1].br_startoff +
					map[1].br_blockcount;
			}

> +			} else {
> +				BUG();
> +			}
> +		} else {
> +			BUG();
> +		}

Panicing the machine just because the filesystem might be corrupted
in not a very nice way to handle the error. Given that we don't even
need to handle wierd map states here (because the xfs_bmapi_read()
will have found any corruption during the lookup) i don't think this
is at all necessary.

> +
> +		if (XFS_FSB_TO_B(mp, fsbno) > isize) {
> +			error = ENXIO;
> +			goto out_unlock;
> +		}
> +	}
> +
> +	if (offset < start)
> +		offset = start;
> +
> +	if (offset != file->f_pos)
> +		file->f_pos = offset;
> +
> +out_unlock:
> +	xfs_iunlock_map_shared(ip, lock);
> +
> +	if (error)
> +		return -error;
> +	return offset;
> +}
> +
> +STATIC loff_t
> +xfs_seek_hole(
> +	struct file		*file,
> +	loff_t			start,
> +	u32			type)
> +{
> +	struct inode		*inode = file->f_mapping->host;
> +	struct xfs_inode	*ip = XFS_I(inode);
> +	struct xfs_mount	*mp = ip->i_mount;
> +	loff_t			uninitialized_var(offset);
> +	loff_t			holeoff;
> +	xfs_fsize_t		isize;
> +	xfs_fileoff_t		fsbno;
> +	uint			lock;
> +	int			error;
> +
> +	lock = xfs_ilock_map_shared(ip);
> +
> +	isize = i_size_read(inode);
> +	if (start >= isize) {
> +		error = ENXIO;
> +		goto out_unlock;
> +	}
> +
> +	fsbno = XFS_B_TO_FSBT(mp, start);
> +	error = xfs_bmap_first_unused(NULL, ip, 1, &fsbno, XFS_DATA_FORK);
> +	if (error) {
> +		error = ENXIO;
> +		goto out_unlock;
> +	}

Same comment here about error handling. xfs_bmap_first_unused()
failing usually indicates a corruption, not a "offset beyond EOF",
so we should be returning the error that the filesystem has returned
rather than ENXIO.

> +
> +	holeoff = XFS_FSB_TO_B(mp, fsbno);
> +	if (holeoff <= start)
> +		offset = start;
> +	else
> +		offset = min_t(loff_t, holeoff, isize);
> +
> +	if (offset != file->f_pos)
> +		file->f_pos = offset;
> +
> +out_unlock:
> +	xfs_iunlock_map_shared(ip, lock);
> +
> +	if (error)
> +		return -error;
> +	return offset;
> +}
> +
> +STATIC loff_t
> +xfs_file_llseek(
> +	struct file	*file,
> +	loff_t		offset,
> +	int		origin)
> +{
> +	switch (origin) {
> +	case SEEK_END:
> +	case SEEK_CUR:
> +	case SEEK_SET:
> +		return generic_file_llseek(file, offset, origin);
> +	case SEEK_DATA:
> +		return xfs_seek_data(file, offset, origin);
> +	case SEEK_HOLE:
> +		return xfs_seek_hole(file, offset, origin);
> +	default:
> +		return -EINVAL;
> +	}
> +}
> +
>  const struct file_operations xfs_file_operations = {
> -	.llseek		= generic_file_llseek,
> +	.llseek		= xfs_file_llseek,
>  	.read		= do_sync_read,
>  	.write		= do_sync_write,
>  	.aio_read	= xfs_file_aio_read,
> -- 
> 1.7.9
> 
> _______________________________________________
> xfs mailing list
> xfs@oss.sgi.com
> http://oss.sgi.com/mailman/listinfo/xfs
> 

-- 
Dave Chinner
david@fromorbit.com

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] Introduce SEEK_DATA/SEEK_HOLE support to XFS V7
  2012-02-08  5:01 ` Dave Chinner
@ 2012-02-08 13:10   ` Jeff Liu
  2012-02-08 23:06     ` Dave Chinner
  0 siblings, 1 reply; 5+ messages in thread
From: Jeff Liu @ 2012-02-08 13:10 UTC (permalink / raw)
  To: Dave Chinner
  Cc: Christoph Hellwig, Ben Myers, Mark Tinguely, Chris Mason, xfs

Hi Dave,

Thanks for your review.
On 02/08/2012 01:01 PM, Dave Chinner wrote:

> On Mon, Feb 06, 2012 at 10:40:44PM +0800, Jeff Liu wrote:
>> Hello,
>>
>> There is one bug fix in this version, in xfs_seek_data()/xfs_seek_hole(), call xfs_bmapi_read() or
>> xfs_bmap_first_unused() maybe failed, they should return ENXIO in this case.
>> Thanks Mark for pointing this out!
>>
>>
>> Signed-off-by: Jie Liu <jeff.liu@oracle.com>
> 
> Can you post a final version with the real commit message attached?
> 
> The normal way of making comments like this about a patch posting is
> to put the comments after the first "---" line, like the diffstat is
> below....

> 
> As it is,my comments are mainly about error handling and putting in
> some comments to explain exactly why the code ended up this way....

Ok, the commit message will added in next post.

But I still have a concern about error handing as below.

> 
>> ---
>>  fs/xfs/xfs_file.c |  172 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
>>  1 files changed, 171 insertions(+), 1 deletions(-)
>>
>> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
>> index 753ed9b..3822b15 100644
>> --- a/fs/xfs/xfs_file.c
>> +++ b/fs/xfs/xfs_file.c
>> @@ -1141,8 +1141,178 @@ xfs_vm_page_mkwrite(
>>  	return block_page_mkwrite(vma, vmf, xfs_get_blocks);
>>  }
>>  
>> +STATIC loff_t
>> +xfs_seek_data(
>> +	struct file		*file,
>> +	loff_t			start,
>> +	u32			type)
>> +{
>> +	struct inode		*inode = file->f_mapping->host;
>> +	struct xfs_inode	*ip = XFS_I(inode);
>> +	struct xfs_mount	*mp = ip->i_mount;
>> +	loff_t			uninitialized_var(offset);
>> +	xfs_fsize_t		isize;
>> +	xfs_fileoff_t		fsbno;
>> +	xfs_filblks_t		len;
>> +	uint			lock;
>> +	int			error;
>> +
>> +	lock = xfs_ilock_map_shared(ip);
>> +
>> +	isize = i_size_read(inode);
>> +	if (start >= isize) {
>> +		error = ENXIO;
>> +		goto out_unlock;
>> +	}
>> +
>> +	fsbno = XFS_B_TO_FSBT(mp, start);
>> +	len = XFS_B_TO_FSB(mp, isize);
> 
> It's not entirely obvious why len is based on isize rather than the
> (isize - start), the range being mapped. A comment might be in order
> so we don't make silly mistakes reading the code in a couple of
> years time.
> 
>> +	for (;;) {
>> +		struct xfs_bmbt_irec	map[2];
>> +		int			nmap = 2;
>> +		loff_t			seekoff;
>> +
>> +		error = xfs_bmapi_read(ip, fsbno, len - fsbno, map, &nmap,
>> +				       XFS_BMAPI_ENTIRE);
>> +		if (error) {
>> +			error = ENXIO;
>> +			goto out_unlock;
>> +		}
> 
> I don't think that is correct. ENXIO means "offset beyond EOF", and
> this will typically only return errors due to extent tree corruption
> or filesystem shutdown. I'd just return the error as it stands.

Currently, both OCFS2 and Btrfs return ENXIO in case of their particular extent fetch routine failed.
So return other internal errors in XFS will introduce incompatibility IMHO.
But just as you pointed out, as well as ENXIO semantics defined at http://linux.die.net/man/2/lseek,
return ENXIO is not proper to internal issue, and that might confuse user in such situation for return value check up.

Maybe it's better to reach a consensus with other file systems with SEEK_DATA/SEEK_HOLE supports, I'd suggest
either:
1) return the error as it stands in particular file system.
or:
2) return EIO or another meaningful error number.
look all those file systems need to fix up accordingly.

>> +		/* No extents at given offset, must be beyond EOF */
>> +		if (nmap == 0) {
>> +			error = ENXIO;
>> +			goto out_unlock;
>> +		}
> 
> But we can't be beyond EOF - we've already checked that. Hence we
> should *always* get a mapping back. To fail to get one back is a
> sign of extent tree corruption, I think, so this should probably be
> a XFS_WANT_CORRUPTED_GOTO() case....

checking "nmap == 0" also has another option, since there might have continuous hole extents in mapped file range,
i.e, both map[0] and map[1] are holes(for now, I have not yet worked out a test case to verify this scenario).
I still need some time to verify that.

> 
>> +		seekoff = XFS_FSB_TO_B(mp, fsbno);
>> +
>> +		if ((map[0].br_state == XFS_EXT_NORM &&
>> +		     !isnullstartblock(map[0].br_startblock)) ||
>> +		    map[0].br_startblock == DELAYSTARTBLOCK) {
> 
> So this skips holes and unwritten regions.
> 
>> +			offset = max_t(loff_t, seekoff,
>> +				       XFS_FSB_TO_B(mp, map[0].br_startoff));
>> +			break;
>> +		} else if (map[0].br_state == XFS_EXT_UNWRITTEN) {
>> +			offset = max_t(loff_t, seekoff,
>> +				       XFS_FSB_TO_B(mp, map[0].br_startoff));
>> +			break;

Using "map[0].br_startblock != HOLESTARTBLOCK" to simplify the logic for offset calculation is cool!
However, treat unwritten extent as data is temporarily for us, it will finally split up to an individual check when
trying to add dirty data lookup. Given that, how about if we keep the current logic so that we don't need much
code change in future, does it make sense? :)

> 
> But unwritten regions have an identical offset caclulation to
> delayed and written regions. So that entire piece of logic becomes:
> 
> 		if (map[0].br_startblock != HOLESTARTBLOCK)) {
> 			offset = max_t(loff_t, seekoff,
> 				       XFS_FSB_TO_B(mp, map[0].br_startoff));
> 			break;
> 		} else {

> 
> A comment might be in order there, too, indicating why we are
> handling unwritten regions as data, just like written and delalloc
> regions....

Ok.

> 
>> +		} else if (map[0].br_startblock == HOLESTARTBLOCK) {
>> +			if (nmap == 1) {
>> +				error = ENXIO;
>> +				goto out_unlock;
>> +			}
>> +
>> +			if ((map[1].br_state == XFS_EXT_NORM &&
>> +			     !isnullstartblock(map[1].br_startblock)) ||
>> +			    map[1].br_startblock == DELAYSTARTBLOCK) {
>> +				offset = max_t(loff_t, seekoff,
>> +					XFS_FSB_TO_B(mp, map[1].br_startoff));
>> +				break;
>> +			} else if (map[1].br_state == XFS_EXT_UNWRITTEN) {
>> +				offset = max_t(loff_t, seekoff,
>> +					XFS_FSB_TO_B(mp, map[1].br_startoff));
>> +				break;
>> +			} else if (map[1].br_startblock == HOLESTARTBLOCK) {
>> +				fsbno = map[1].br_startoff +
>> +					map[1].br_blockcount;
> 
> Same again:
> 			if (map[1].br_startblock != HOLESTARTBLOCK) {
> 				offset = max_t(loff_t, seekoff
> 					XFS_FSB_TO_B(mp, map[1].br_startoff));
> 				break;
> 			} else {
> 				fsbno = map[1].br_startoff +
> 					map[1].br_blockcount;
> 			}
> 
>> +			} else {
>> +				BUG();
>> +			}
>> +		} else {
>> +			BUG();
>> +		}
> 
> Panicing the machine just because the filesystem might be corrupted
> in not a very nice way to handle the error. Given that we don't even
> need to handle wierd map states here (because the xfs_bmapi_read()
> will have found any corruption during the lookup) i don't think this
> is at all necessary.

Yes, looks we can safely remove them. :)

I'll take care below things too.

Thanks,
-Jeff

> 
>> +
>> +		if (XFS_FSB_TO_B(mp, fsbno) > isize) {
>> +			error = ENXIO;
>> +			goto out_unlock;
>> +		}
>> +	}
>> +
>> +	if (offset < start)
>> +		offset = start;
>> +
>> +	if (offset != file->f_pos)
>> +		file->f_pos = offset;
>> +
>> +out_unlock:
>> +	xfs_iunlock_map_shared(ip, lock);
>> +
>> +	if (error)
>> +		return -error;
>> +	return offset;
>> +}
>> +
>> +STATIC loff_t
>> +xfs_seek_hole(
>> +	struct file		*file,
>> +	loff_t			start,
>> +	u32			type)
>> +{
>> +	struct inode		*inode = file->f_mapping->host;
>> +	struct xfs_inode	*ip = XFS_I(inode);
>> +	struct xfs_mount	*mp = ip->i_mount;
>> +	loff_t			uninitialized_var(offset);
>> +	loff_t			holeoff;
>> +	xfs_fsize_t		isize;
>> +	xfs_fileoff_t		fsbno;
>> +	uint			lock;
>> +	int			error;
>> +
>> +	lock = xfs_ilock_map_shared(ip);
>> +
>> +	isize = i_size_read(inode);
>> +	if (start >= isize) {
>> +		error = ENXIO;
>> +		goto out_unlock;
>> +	}
>> +
>> +	fsbno = XFS_B_TO_FSBT(mp, start);
>> +	error = xfs_bmap_first_unused(NULL, ip, 1, &fsbno, XFS_DATA_FORK);
>> +	if (error) {
>> +		error = ENXIO;
>> +		goto out_unlock;
>> +	}
> 
> Same comment here about error handling. xfs_bmap_first_unused()
> failing usually indicates a corruption, not a "offset beyond EOF",
> so we should be returning the error that the filesystem has returned
> rather than ENXIO.
> 
>> +
>> +	holeoff = XFS_FSB_TO_B(mp, fsbno);
>> +	if (holeoff <= start)
>> +		offset = start;
>> +	else
>> +		offset = min_t(loff_t, holeoff, isize);
>> +
>> +	if (offset != file->f_pos)
>> +		file->f_pos = offset;
>> +
>> +out_unlock:
>> +	xfs_iunlock_map_shared(ip, lock);
>> +
>> +	if (error)
>> +		return -error;
>> +	return offset;
>> +}
>> +
>> +STATIC loff_t
>> +xfs_file_llseek(
>> +	struct file	*file,
>> +	loff_t		offset,
>> +	int		origin)
>> +{
>> +	switch (origin) {
>> +	case SEEK_END:
>> +	case SEEK_CUR:
>> +	case SEEK_SET:
>> +		return generic_file_llseek(file, offset, origin);
>> +	case SEEK_DATA:
>> +		return xfs_seek_data(file, offset, origin);
>> +	case SEEK_HOLE:
>> +		return xfs_seek_hole(file, offset, origin);
>> +	default:
>> +		return -EINVAL;
>> +	}
>> +}
>> +
>>  const struct file_operations xfs_file_operations = {
>> -	.llseek		= generic_file_llseek,
>> +	.llseek		= xfs_file_llseek,
>>  	.read		= do_sync_read,
>>  	.write		= do_sync_write,
>>  	.aio_read	= xfs_file_aio_read,
>> -- 
>> 1.7.9
>>
>> _______________________________________________
>> xfs mailing list
>> xfs@oss.sgi.com
>> http://oss.sgi.com/mailman/listinfo/xfs
>>
> 


_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] Introduce SEEK_DATA/SEEK_HOLE support to XFS V7
  2012-02-08 13:10   ` Jeff Liu
@ 2012-02-08 23:06     ` Dave Chinner
  0 siblings, 0 replies; 5+ messages in thread
From: Dave Chinner @ 2012-02-08 23:06 UTC (permalink / raw)
  To: Jeff Liu; +Cc: Christoph Hellwig, Ben Myers, Mark Tinguely, Chris Mason, xfs

On Wed, Feb 08, 2012 at 09:10:12PM +0800, Jeff Liu wrote:
> Hi Dave,
> 
> Thanks for your review.
> On 02/08/2012 01:01 PM, Dave Chinner wrote:
> 
> > On Mon, Feb 06, 2012 at 10:40:44PM +0800, Jeff Liu wrote:
> >> Hello,
> >>
> >> There is one bug fix in this version, in xfs_seek_data()/xfs_seek_hole(), call xfs_bmapi_read() or
> >> xfs_bmap_first_unused() maybe failed, they should return ENXIO in this case.
> >> Thanks Mark for pointing this out!
> >>
> >>
> >> Signed-off-by: Jie Liu <jeff.liu@oracle.com>
> > 
> > Can you post a final version with the real commit message attached?
> > 
> > The normal way of making comments like this about a patch posting is
> > to put the comments after the first "---" line, like the diffstat is
> > below....
> 
> > 
> > As it is,my comments are mainly about error handling and putting in
> > some comments to explain exactly why the code ended up this way....
> 
> Ok, the commit message will added in next post.
> 
> But I still have a concern about error handing as below.

.....
> >> +	for (;;) {
> >> +		struct xfs_bmbt_irec	map[2];
> >> +		int			nmap = 2;
> >> +		loff_t			seekoff;
> >> +
> >> +		error = xfs_bmapi_read(ip, fsbno, len - fsbno, map, &nmap,
> >> +				       XFS_BMAPI_ENTIRE);
> >> +		if (error) {
> >> +			error = ENXIO;
> >> +			goto out_unlock;
> >> +		}
> > 
> > I don't think that is correct. ENXIO means "offset beyond EOF", and
> > this will typically only return errors due to extent tree corruption
> > or filesystem shutdown. I'd just return the error as it stands.
> 
> Currently, both OCFS2 and Btrfs return ENXIO in case of their
> particular extent fetch routine failed.

That doesn't mean it is the best thing to do. The seek has not
succeeded and the current file position is now undefined. We did not
seek past the EOF - an extent lookup failure indicates we don't even
know where the EOF  or even the next hole or data area exists.

Hence the correct thing to do here is return a fatal error, not
something that the application can interpret as a successful
operation....

> So return other internal errors in XFS will introduce
> incompatibility IMHO.

Filesystems return different errors to the same syscall in lots of
places. Indeed, XFS will return EUCLEAN to just about any syscall
when the filesystem has been shut down to indicate a fatal error,
and that's not documented in a single syscall man page....

> But just as you pointed out, as well as ENXIO semantics defined at
> http://linux.die.net/man/2/lseek, return ENXIO is not proper to
> internal issue, and that might confuse user in such situation for
> return value check up.

Exactly, because it is valid for applications to be expecting ENXIO
when trying to find a specific data or hole location in a file (e.g.
is there any hole I can fill? EXNIO == no holes in the file)

> Maybe it's better to reach a consensus with other file systems with SEEK_DATA/SEEK_HOLE supports, I'd suggest
> either:
> 1) return the error as it stands in particular file system.
> or:
> 2) return EIO or another meaningful error number.
> look all those file systems need to fix up accordingly.

See above - filesystems are free to return errors according to the
problem that has occurred - it doesn't need to be defined in the man
page. XFS policy is to return EUCLEAN when a corruption is detected,
EIO when a read error has occurred, etc.  Other filesystems are free
to do treat these error conditions however they want, so I don't see
any particular need for standardisation here...

> >> +		/* No extents at given offset, must be beyond EOF */
> >> +		if (nmap == 0) {
> >> +			error = ENXIO;
> >> +			goto out_unlock;
> >> +		}
> > 
> > But we can't be beyond EOF - we've already checked that. Hence we
> > should *always* get a mapping back. To fail to get one back is a
> > sign of extent tree corruption, I think, so this should probably be
> > a XFS_WANT_CORRUPTED_GOTO() case....
> 
> checking "nmap == 0" also has another option, since there might have continuous hole extents in mapped file range,
> i.e, both map[0] and map[1] are holes(for now, I have not yet worked out a test case to verify this scenario).

If it lands in a hole, a hole mapping will be returned for the given
range.  IOWs, xfs_bmapi_read will always return a mapping of some
kind for a range that is within EOF. Hence nmap == 0 is an
indication of something gone very wrong...

> I still need some time to verify that.

Create a sparse file of a petabyte size, write a block in the first
1MB, then write a block in the last MB. The see what gets
returned....

> >> +		seekoff = XFS_FSB_TO_B(mp, fsbno);
> >> +
> >> +		if ((map[0].br_state == XFS_EXT_NORM &&
> >> +		     !isnullstartblock(map[0].br_startblock)) ||
> >> +		    map[0].br_startblock == DELAYSTARTBLOCK) {
> > 
> > So this skips holes and unwritten regions.
> > 
> >> +			offset = max_t(loff_t, seekoff,
> >> +				       XFS_FSB_TO_B(mp, map[0].br_startoff));
> >> +			break;
> >> +		} else if (map[0].br_state == XFS_EXT_UNWRITTEN) {
> >> +			offset = max_t(loff_t, seekoff,
> >> +				       XFS_FSB_TO_B(mp, map[0].br_startoff));
> >> +			break;
> 
> Using "map[0].br_startblock != HOLESTARTBLOCK" to simplify the logic for offset calculation is cool!
> However, treat unwritten extent as data is temporarily for us, it will finally split up to an individual check when
> trying to add dirty data lookup. Given that, how about if we keep the current logic so that we don't need much
> code change in future, does it make sense? :)

We can change the code easily enough to discriminate between written
and unwritten extents once we work out how best to do that. So right
now I'd prefer simple code that documents the cases we support very
clearly.

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

_______________________________________________
xfs mailing list
xfs@oss.sgi.com
http://oss.sgi.com/mailman/listinfo/xfs

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2012-02-08 23:07 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-02-06 14:40 [PATCH] Introduce SEEK_DATA/SEEK_HOLE support to XFS V7 Jeff Liu
2012-02-07 14:19 ` Mark Tinguely
2012-02-08  5:01 ` Dave Chinner
2012-02-08 13:10   ` Jeff Liu
2012-02-08 23:06     ` Dave Chinner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox