* [PATCH v9 01/19] fs: Fix rw_hint validation
2024-01-30 21:48 [PATCH v9 00/19] Pass data lifetime information to SCSI disk devices Bart Van Assche
@ 2024-01-30 21:48 ` Bart Van Assche
2024-01-31 13:56 ` Christian Brauner
2024-01-31 14:51 ` Kanchan Joshi
2024-01-30 21:48 ` [PATCH v9 02/19] fs: Verify write lifetime constants at compile time Bart Van Assche
` (17 subsequent siblings)
18 siblings, 2 replies; 42+ messages in thread
From: Bart Van Assche @ 2024-01-30 21:48 UTC (permalink / raw)
To: Martin K . Petersen
Cc: linux-scsi, linux-block, linux-fsdevel, Jens Axboe,
Christoph Hellwig, Daejun Park, Kanchan Joshi, Bart Van Assche,
Jeff Layton, Chuck Lever, Stephen Rothwell, Alexander Viro,
Christian Brauner
Reject values that are valid rw_hints after truncation but not before
truncation by passing an untruncated value to rw_hint_valid().
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Jeff Layton <jlayton@kernel.org>
Cc: Chuck Lever <chuck.lever@oracle.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Fixes: 5657cb0797c4 ("fs/fcntl: use copy_to/from_user() for u64 types")
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
fs/fcntl.c | 12 +++++-------
1 file changed, 5 insertions(+), 7 deletions(-)
diff --git a/fs/fcntl.c b/fs/fcntl.c
index c80a6acad742..3ff707bf2743 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -268,7 +268,7 @@ static int f_getowner_uids(struct file *filp, unsigned long arg)
}
#endif
-static bool rw_hint_valid(enum rw_hint hint)
+static bool rw_hint_valid(u64 hint)
{
switch (hint) {
case RWH_WRITE_LIFE_NOT_SET:
@@ -288,19 +288,17 @@ static long fcntl_rw_hint(struct file *file, unsigned int cmd,
{
struct inode *inode = file_inode(file);
u64 __user *argp = (u64 __user *)arg;
- enum rw_hint hint;
- u64 h;
+ u64 hint;
switch (cmd) {
case F_GET_RW_HINT:
- h = inode->i_write_hint;
- if (copy_to_user(argp, &h, sizeof(*argp)))
+ hint = inode->i_write_hint;
+ if (copy_to_user(argp, &hint, sizeof(*argp)))
return -EFAULT;
return 0;
case F_SET_RW_HINT:
- if (copy_from_user(&h, argp, sizeof(h)))
+ if (copy_from_user(&hint, argp, sizeof(hint)))
return -EFAULT;
- hint = (enum rw_hint) h;
if (!rw_hint_valid(hint))
return -EINVAL;
^ permalink raw reply related [flat|nested] 42+ messages in thread* Re: [PATCH v9 01/19] fs: Fix rw_hint validation
2024-01-30 21:48 ` [PATCH v9 01/19] fs: Fix rw_hint validation Bart Van Assche
@ 2024-01-31 13:56 ` Christian Brauner
2024-01-31 21:07 ` Bart Van Assche
2024-02-22 2:46 ` Bart Van Assche
2024-01-31 14:51 ` Kanchan Joshi
1 sibling, 2 replies; 42+ messages in thread
From: Christian Brauner @ 2024-01-31 13:56 UTC (permalink / raw)
To: Bart Van Assche
Cc: Martin K . Petersen, linux-scsi, linux-block, linux-fsdevel,
Jens Axboe, Christoph Hellwig, Daejun Park, Kanchan Joshi,
Jeff Layton, Chuck Lever, Stephen Rothwell, Alexander Viro
On Tue, Jan 30, 2024 at 01:48:27PM -0800, Bart Van Assche wrote:
> Reject values that are valid rw_hints after truncation but not before
> truncation by passing an untruncated value to rw_hint_valid().
>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> Cc: Jeff Layton <jlayton@kernel.org>
> Cc: Chuck Lever <chuck.lever@oracle.com>
> Cc: Jens Axboe <axboe@kernel.dk>
> Cc: Stephen Rothwell <sfr@canb.auug.org.au>
> Fixes: 5657cb0797c4 ("fs/fcntl: use copy_to/from_user() for u64 types")
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> ---
The fs parts of this should go through a vfs tree as this is vfs infra.
I can then give you a stable tag that you can merge and base the big
block and scsci bits on. It'll minimize merge conflicts and makes it
easier to coordinate imho.
> fs/fcntl.c | 12 +++++-------
> 1 file changed, 5 insertions(+), 7 deletions(-)
>
> diff --git a/fs/fcntl.c b/fs/fcntl.c
> index c80a6acad742..3ff707bf2743 100644
> --- a/fs/fcntl.c
> +++ b/fs/fcntl.c
> @@ -268,7 +268,7 @@ static int f_getowner_uids(struct file *filp, unsigned long arg)
> }
> #endif
>
> -static bool rw_hint_valid(enum rw_hint hint)
> +static bool rw_hint_valid(u64 hint)
> {
> switch (hint) {
> case RWH_WRITE_LIFE_NOT_SET:
> @@ -288,19 +288,17 @@ static long fcntl_rw_hint(struct file *file, unsigned int cmd,
> {
> struct inode *inode = file_inode(file);
> u64 __user *argp = (u64 __user *)arg;
> - enum rw_hint hint;
> - u64 h;
> + u64 hint;
>
> switch (cmd) {
> case F_GET_RW_HINT:
> - h = inode->i_write_hint;
> - if (copy_to_user(argp, &h, sizeof(*argp)))
> + hint = inode->i_write_hint;
> + if (copy_to_user(argp, &hint, sizeof(*argp)))
> return -EFAULT;
> return 0;
> case F_SET_RW_HINT:
> - if (copy_from_user(&h, argp, sizeof(h)))
> + if (copy_from_user(&hint, argp, sizeof(hint)))
> return -EFAULT;
> - hint = (enum rw_hint) h;
> if (!rw_hint_valid(hint))
> return -EINVAL;
>
^ permalink raw reply [flat|nested] 42+ messages in thread* Re: [PATCH v9 01/19] fs: Fix rw_hint validation
2024-01-31 13:56 ` Christian Brauner
@ 2024-01-31 21:07 ` Bart Van Assche
2024-02-22 2:46 ` Bart Van Assche
1 sibling, 0 replies; 42+ messages in thread
From: Bart Van Assche @ 2024-01-31 21:07 UTC (permalink / raw)
To: Christian Brauner
Cc: Martin K . Petersen, linux-scsi, linux-block, linux-fsdevel,
Jens Axboe, Christoph Hellwig, Daejun Park, Kanchan Joshi,
Jeff Layton, Chuck Lever, Stephen Rothwell, Alexander Viro
On 1/31/24 05:56, Christian Brauner wrote:
> The fs parts of this should go through a vfs tree as this is vfs infra.
> I can then give you a stable tag that you can merge and base the big
> block and scsci bits on. It'll minimize merge conflicts and makes it
> easier to coordinate imho.
The fs parts have been posted on the fs-devel mailing list. See also
https://lore.kernel.org/all/20240131205237.3540210-1-bvanassche@acm.org/
Thanks,
Bart.
^ permalink raw reply [flat|nested] 42+ messages in thread
* Re: [PATCH v9 01/19] fs: Fix rw_hint validation
2024-01-31 13:56 ` Christian Brauner
2024-01-31 21:07 ` Bart Van Assche
@ 2024-02-22 2:46 ` Bart Van Assche
2024-02-22 8:51 ` Christian Brauner
1 sibling, 1 reply; 42+ messages in thread
From: Bart Van Assche @ 2024-02-22 2:46 UTC (permalink / raw)
To: Christian Brauner
Cc: Martin K . Petersen, linux-scsi, linux-block, linux-fsdevel,
Jens Axboe, Christoph Hellwig, Daejun Park, Kanchan Joshi,
Jeff Layton, Chuck Lever, Stephen Rothwell, Alexander Viro
On 1/31/24 05:56, Christian Brauner wrote:
> The fs parts of this should go through a vfs tree as this is vfs infra.
> I can then give you a stable tag that you can merge and base the big
> block and scsci bits on. It'll minimize merge conflicts and makes it
> easier to coordinate imho.
Hi Christian,
It would be appreciated if such a stable tag could be created on the vfs.rw
branch.
Thanks,
Bart.
^ permalink raw reply [flat|nested] 42+ messages in thread
* Re: [PATCH v9 01/19] fs: Fix rw_hint validation
2024-02-22 2:46 ` Bart Van Assche
@ 2024-02-22 8:51 ` Christian Brauner
0 siblings, 0 replies; 42+ messages in thread
From: Christian Brauner @ 2024-02-22 8:51 UTC (permalink / raw)
To: Bart Van Assche
Cc: Martin K . Petersen, linux-scsi, linux-block, linux-fsdevel,
Jens Axboe, Christoph Hellwig, Daejun Park, Kanchan Joshi,
Jeff Layton, Chuck Lever, Stephen Rothwell, Alexander Viro
On Wed, Feb 21, 2024 at 06:46:51PM -0800, Bart Van Assche wrote:
> On 1/31/24 05:56, Christian Brauner wrote:
> > The fs parts of this should go through a vfs tree as this is vfs infra.
> > I can then give you a stable tag that you can merge and base the big
> > block and scsci bits on. It'll minimize merge conflicts and makes it
> > easier to coordinate imho.
>
> Hi Christian,
>
> It would be appreciated if such a stable tag could be created on the vfs.rw
> branch.
Isn't the stable branch enough I gave you weeks ago?
It's usually what I do with Jens.
But fwiw, see I've added the vfs-6.9.rw_hint tag. Pushed.
^ permalink raw reply [flat|nested] 42+ messages in thread
* Re: [PATCH v9 01/19] fs: Fix rw_hint validation
2024-01-30 21:48 ` [PATCH v9 01/19] fs: Fix rw_hint validation Bart Van Assche
2024-01-31 13:56 ` Christian Brauner
@ 2024-01-31 14:51 ` Kanchan Joshi
1 sibling, 0 replies; 42+ messages in thread
From: Kanchan Joshi @ 2024-01-31 14:51 UTC (permalink / raw)
To: Bart Van Assche, Martin K . Petersen
Cc: linux-scsi, linux-block, linux-fsdevel, Jens Axboe,
Christoph Hellwig, Daejun Park, Jeff Layton, Chuck Lever,
Stephen Rothwell, Alexander Viro, Christian Brauner
On 1/31/2024 3:18 AM, Bart Van Assche wrote:
> Reject values that are valid rw_hints after truncation but not before
> truncation by passing an untruncated value to rw_hint_valid().
Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
^ permalink raw reply [flat|nested] 42+ messages in thread
* [PATCH v9 02/19] fs: Verify write lifetime constants at compile time
2024-01-30 21:48 [PATCH v9 00/19] Pass data lifetime information to SCSI disk devices Bart Van Assche
2024-01-30 21:48 ` [PATCH v9 01/19] fs: Fix rw_hint validation Bart Van Assche
@ 2024-01-30 21:48 ` Bart Van Assche
2024-01-30 21:48 ` [PATCH v9 03/19] fs: Split fcntl_rw_hint() Bart Van Assche
` (16 subsequent siblings)
18 siblings, 0 replies; 42+ messages in thread
From: Bart Van Assche @ 2024-01-30 21:48 UTC (permalink / raw)
To: Martin K . Petersen
Cc: linux-scsi, linux-block, linux-fsdevel, Jens Axboe,
Christoph Hellwig, Daejun Park, Kanchan Joshi, Bart Van Assche,
Johannes Thumshirn, Alexander Viro, Christian Brauner,
Jeff Layton, Chuck Lever
The code in fs/fcntl.c converts RWH_* constants to and from WRITE_LIFE_*
constants using casts. Verify at compile time that these casts will yield
the intended effect.
Reviewed-by: Christoph Hellwig <hch@lst.de>
Suggested-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
fs/fcntl.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 3ff707bf2743..f3bc4662455f 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -270,6 +270,13 @@ static int f_getowner_uids(struct file *filp, unsigned long arg)
static bool rw_hint_valid(u64 hint)
{
+ BUILD_BUG_ON(WRITE_LIFE_NOT_SET != RWH_WRITE_LIFE_NOT_SET);
+ BUILD_BUG_ON(WRITE_LIFE_NONE != RWH_WRITE_LIFE_NONE);
+ BUILD_BUG_ON(WRITE_LIFE_SHORT != RWH_WRITE_LIFE_SHORT);
+ BUILD_BUG_ON(WRITE_LIFE_MEDIUM != RWH_WRITE_LIFE_MEDIUM);
+ BUILD_BUG_ON(WRITE_LIFE_LONG != RWH_WRITE_LIFE_LONG);
+ BUILD_BUG_ON(WRITE_LIFE_EXTREME != RWH_WRITE_LIFE_EXTREME);
+
switch (hint) {
case RWH_WRITE_LIFE_NOT_SET:
case RWH_WRITE_LIFE_NONE:
^ permalink raw reply related [flat|nested] 42+ messages in thread* [PATCH v9 03/19] fs: Split fcntl_rw_hint()
2024-01-30 21:48 [PATCH v9 00/19] Pass data lifetime information to SCSI disk devices Bart Van Assche
2024-01-30 21:48 ` [PATCH v9 01/19] fs: Fix rw_hint validation Bart Van Assche
2024-01-30 21:48 ` [PATCH v9 02/19] fs: Verify write lifetime constants at compile time Bart Van Assche
@ 2024-01-30 21:48 ` Bart Van Assche
2024-01-31 14:52 ` Kanchan Joshi
2024-01-30 21:48 ` [PATCH v9 04/19] fs: Move enum rw_hint into a new header file Bart Van Assche
` (15 subsequent siblings)
18 siblings, 1 reply; 42+ messages in thread
From: Bart Van Assche @ 2024-01-30 21:48 UTC (permalink / raw)
To: Martin K . Petersen
Cc: linux-scsi, linux-block, linux-fsdevel, Jens Axboe,
Christoph Hellwig, Daejun Park, Kanchan Joshi, Bart Van Assche,
Jeff Layton, Chuck Lever, Stephen Rothwell, Alexander Viro,
Christian Brauner
Split fcntl_rw_hint() such that there is one helper function per fcntl. No
functionality is changed by this patch.
Reviewed-by: Christoph Hellwig <hch@lst.de>
Suggested-by: Christoph Hellwig <hch@lst.de>
Cc: Jeff Layton <jlayton@kernel.org>
Cc: Chuck Lever <chuck.lever@oracle.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
fs/fcntl.c | 47 ++++++++++++++++++++++++++---------------------
1 file changed, 26 insertions(+), 21 deletions(-)
diff --git a/fs/fcntl.c b/fs/fcntl.c
index f3bc4662455f..5fa2d95114bf 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -290,32 +290,35 @@ static bool rw_hint_valid(u64 hint)
}
}
-static long fcntl_rw_hint(struct file *file, unsigned int cmd,
- unsigned long arg)
+static long fcntl_get_rw_hint(struct file *file, unsigned int cmd,
+ unsigned long arg)
{
struct inode *inode = file_inode(file);
u64 __user *argp = (u64 __user *)arg;
- u64 hint;
+ u64 hint = inode->i_write_hint;
- switch (cmd) {
- case F_GET_RW_HINT:
- hint = inode->i_write_hint;
- if (copy_to_user(argp, &hint, sizeof(*argp)))
- return -EFAULT;
- return 0;
- case F_SET_RW_HINT:
- if (copy_from_user(&hint, argp, sizeof(hint)))
- return -EFAULT;
- if (!rw_hint_valid(hint))
- return -EINVAL;
+ if (copy_to_user(argp, &hint, sizeof(*argp)))
+ return -EFAULT;
+ return 0;
+}
- inode_lock(inode);
- inode->i_write_hint = hint;
- inode_unlock(inode);
- return 0;
- default:
+static long fcntl_set_rw_hint(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ struct inode *inode = file_inode(file);
+ u64 __user *argp = (u64 __user *)arg;
+ u64 hint;
+
+ if (copy_from_user(&hint, argp, sizeof(hint)))
+ return -EFAULT;
+ if (!rw_hint_valid(hint))
return -EINVAL;
- }
+
+ inode_lock(inode);
+ inode->i_write_hint = hint;
+ inode_unlock(inode);
+
+ return 0;
}
static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
@@ -421,8 +424,10 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
err = memfd_fcntl(filp, cmd, argi);
break;
case F_GET_RW_HINT:
+ err = fcntl_get_rw_hint(filp, cmd, arg);
+ break;
case F_SET_RW_HINT:
- err = fcntl_rw_hint(filp, cmd, arg);
+ err = fcntl_set_rw_hint(filp, cmd, arg);
break;
default:
break;
^ permalink raw reply related [flat|nested] 42+ messages in thread* Re: [PATCH v9 03/19] fs: Split fcntl_rw_hint()
2024-01-30 21:48 ` [PATCH v9 03/19] fs: Split fcntl_rw_hint() Bart Van Assche
@ 2024-01-31 14:52 ` Kanchan Joshi
0 siblings, 0 replies; 42+ messages in thread
From: Kanchan Joshi @ 2024-01-31 14:52 UTC (permalink / raw)
To: Bart Van Assche, Martin K . Petersen
Cc: linux-scsi, linux-block, linux-fsdevel, Jens Axboe,
Christoph Hellwig, Daejun Park, Jeff Layton, Chuck Lever,
Stephen Rothwell, Alexander Viro, Christian Brauner
On 1/31/2024 3:18 AM, Bart Van Assche wrote:
> Split fcntl_rw_hint() such that there is one helper function per fcntl. No
> functionality is changed by this patch.
Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
^ permalink raw reply [flat|nested] 42+ messages in thread
* [PATCH v9 04/19] fs: Move enum rw_hint into a new header file
2024-01-30 21:48 [PATCH v9 00/19] Pass data lifetime information to SCSI disk devices Bart Van Assche
` (2 preceding siblings ...)
2024-01-30 21:48 ` [PATCH v9 03/19] fs: Split fcntl_rw_hint() Bart Van Assche
@ 2024-01-30 21:48 ` Bart Van Assche
2024-01-31 7:48 ` Chao Yu
2024-01-30 21:48 ` [PATCH v9 05/19] fs: Propagate write hints to the struct block_device inode Bart Van Assche
` (14 subsequent siblings)
18 siblings, 1 reply; 42+ messages in thread
From: Bart Van Assche @ 2024-01-30 21:48 UTC (permalink / raw)
To: Martin K . Petersen
Cc: linux-scsi, linux-block, linux-fsdevel, Jens Axboe,
Christoph Hellwig, Daejun Park, Kanchan Joshi, Bart Van Assche,
Alexander Viro, Christian Brauner, Jan Kara, Jaegeuk Kim, Chao Yu,
Jeff Layton, Chuck Lever
Move enum rw_hint into a new header file to prepare for using this data
type in the block layer. Add the attribute __packed to reduce the space
occupied by instances of this data type from four bytes to one byte.
Change the data type of i_write_hint from u8 into enum rw_hint.
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
fs/f2fs/f2fs.h | 1 +
fs/fcntl.c | 1 +
fs/inode.c | 1 +
include/linux/fs.h | 16 ++--------------
include/linux/rw_hint.h | 24 ++++++++++++++++++++++++
5 files changed, 29 insertions(+), 14 deletions(-)
create mode 100644 include/linux/rw_hint.h
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 65294e3b0bef..01fde6d44bf6 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -24,6 +24,7 @@
#include <linux/blkdev.h>
#include <linux/quotaops.h>
#include <linux/part_stat.h>
+#include <linux/rw_hint.h>
#include <crypto/hash.h>
#include <linux/fscrypt.h>
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 5fa2d95114bf..fc73c5fae43c 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -27,6 +27,7 @@
#include <linux/memfd.h>
#include <linux/compat.h>
#include <linux/mount.h>
+#include <linux/rw_hint.h>
#include <linux/poll.h>
#include <asm/siginfo.h>
diff --git a/fs/inode.c b/fs/inode.c
index 91048c4c9c9e..1aba6c0bf26a 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -20,6 +20,7 @@
#include <linux/ratelimit.h>
#include <linux/list_lru.h>
#include <linux/iversion.h>
+#include <linux/rw_hint.h>
#include <trace/events/writeback.h>
#include "internal.h"
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ed5966a70495..bdabda5dc364 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -43,6 +43,7 @@
#include <linux/cred.h>
#include <linux/mnt_idmapping.h>
#include <linux/slab.h>
+#include <linux/rw_hint.h>
#include <asm/byteorder.h>
#include <uapi/linux/fs.h>
@@ -309,19 +310,6 @@ struct address_space;
struct writeback_control;
struct readahead_control;
-/*
- * Write life time hint values.
- * Stored in struct inode as u8.
- */
-enum rw_hint {
- WRITE_LIFE_NOT_SET = 0,
- WRITE_LIFE_NONE = RWH_WRITE_LIFE_NONE,
- WRITE_LIFE_SHORT = RWH_WRITE_LIFE_SHORT,
- WRITE_LIFE_MEDIUM = RWH_WRITE_LIFE_MEDIUM,
- WRITE_LIFE_LONG = RWH_WRITE_LIFE_LONG,
- WRITE_LIFE_EXTREME = RWH_WRITE_LIFE_EXTREME,
-};
-
/* Match RWF_* bits to IOCB bits */
#define IOCB_HIPRI (__force int) RWF_HIPRI
#define IOCB_DSYNC (__force int) RWF_DSYNC
@@ -677,7 +665,7 @@ struct inode {
spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
unsigned short i_bytes;
u8 i_blkbits;
- u8 i_write_hint;
+ enum rw_hint i_write_hint;
blkcnt_t i_blocks;
#ifdef __NEED_I_SIZE_ORDERED
diff --git a/include/linux/rw_hint.h b/include/linux/rw_hint.h
new file mode 100644
index 000000000000..309ca72f2dfb
--- /dev/null
+++ b/include/linux/rw_hint.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_RW_HINT_H
+#define _LINUX_RW_HINT_H
+
+#include <linux/build_bug.h>
+#include <linux/compiler_attributes.h>
+#include <uapi/linux/fcntl.h>
+
+/* Block storage write lifetime hint values. */
+enum rw_hint {
+ WRITE_LIFE_NOT_SET = RWH_WRITE_LIFE_NOT_SET,
+ WRITE_LIFE_NONE = RWH_WRITE_LIFE_NONE,
+ WRITE_LIFE_SHORT = RWH_WRITE_LIFE_SHORT,
+ WRITE_LIFE_MEDIUM = RWH_WRITE_LIFE_MEDIUM,
+ WRITE_LIFE_LONG = RWH_WRITE_LIFE_LONG,
+ WRITE_LIFE_EXTREME = RWH_WRITE_LIFE_EXTREME,
+} __packed;
+
+/* Sparse ignores __packed annotations on enums, hence the #ifndef below. */
+#ifndef __CHECKER__
+static_assert(sizeof(enum rw_hint) == 1);
+#endif
+
+#endif /* _LINUX_RW_HINT_H */
^ permalink raw reply related [flat|nested] 42+ messages in thread* Re: [PATCH v9 04/19] fs: Move enum rw_hint into a new header file
2024-01-30 21:48 ` [PATCH v9 04/19] fs: Move enum rw_hint into a new header file Bart Van Assche
@ 2024-01-31 7:48 ` Chao Yu
0 siblings, 0 replies; 42+ messages in thread
From: Chao Yu @ 2024-01-31 7:48 UTC (permalink / raw)
To: Bart Van Assche, Martin K . Petersen
Cc: linux-scsi, linux-block, linux-fsdevel, Jens Axboe,
Christoph Hellwig, Daejun Park, Kanchan Joshi, Alexander Viro,
Christian Brauner, Jan Kara, Jaegeuk Kim, Jeff Layton,
Chuck Lever
On 2024/1/31 5:48, Bart Van Assche wrote:
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 65294e3b0bef..01fde6d44bf6 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -24,6 +24,7 @@
> #include <linux/blkdev.h>
> #include <linux/quotaops.h>
> #include <linux/part_stat.h>
> +#include <linux/rw_hint.h>
> #include <crypto/hash.h>
For f2fs part,
Acked-by: Chao Yu <chao@kernel.org>
Thanks,
^ permalink raw reply [flat|nested] 42+ messages in thread
* [PATCH v9 05/19] fs: Propagate write hints to the struct block_device inode
2024-01-30 21:48 [PATCH v9 00/19] Pass data lifetime information to SCSI disk devices Bart Van Assche
` (3 preceding siblings ...)
2024-01-30 21:48 ` [PATCH v9 04/19] fs: Move enum rw_hint into a new header file Bart Van Assche
@ 2024-01-30 21:48 ` Bart Van Assche
2024-01-31 14:55 ` Kanchan Joshi
2024-01-30 21:48 ` [PATCH v9 06/19] block, fs: Restore the per-bio/request data lifetime fields Bart Van Assche
` (13 subsequent siblings)
18 siblings, 1 reply; 42+ messages in thread
From: Bart Van Assche @ 2024-01-30 21:48 UTC (permalink / raw)
To: Martin K . Petersen
Cc: linux-scsi, linux-block, linux-fsdevel, Jens Axboe,
Christoph Hellwig, Daejun Park, Kanchan Joshi, Bart Van Assche,
Alexander Viro, Christian Brauner, Jeff Layton, Chuck Lever
Write hints applied with F_SET_RW_HINT on a block device affect the
block device inode only. Propagate these hints to the inode associated
with struct block_device because that is the inode used when writing
back dirty pages.
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Jeff Layton <jlayton@kernel.org>
Cc: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
fs/fcntl.c | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/fs/fcntl.c b/fs/fcntl.c
index fc73c5fae43c..cfb52c3a4577 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -319,6 +319,17 @@ static long fcntl_set_rw_hint(struct file *file, unsigned int cmd,
inode->i_write_hint = hint;
inode_unlock(inode);
+ /*
+ * file->f_mapping->host may differ from inode. As an example,
+ * blkdev_open() modifies file->f_mapping.
+ */
+ if (file->f_mapping->host != inode) {
+ inode = file->f_mapping->host;
+ inode_lock(inode);
+ inode->i_write_hint = hint;
+ inode_unlock(inode);
+ }
+
return 0;
}
^ permalink raw reply related [flat|nested] 42+ messages in thread* Re: [PATCH v9 05/19] fs: Propagate write hints to the struct block_device inode
2024-01-30 21:48 ` [PATCH v9 05/19] fs: Propagate write hints to the struct block_device inode Bart Van Assche
@ 2024-01-31 14:55 ` Kanchan Joshi
0 siblings, 0 replies; 42+ messages in thread
From: Kanchan Joshi @ 2024-01-31 14:55 UTC (permalink / raw)
To: Bart Van Assche, Martin K . Petersen
Cc: linux-scsi, linux-block, linux-fsdevel, Jens Axboe,
Christoph Hellwig, Daejun Park, Alexander Viro, Christian Brauner,
Jeff Layton, Chuck Lever
On 1/31/2024 3:18 AM, Bart Van Assche wrote:
> Write hints applied with F_SET_RW_HINT on a block device affect the
> block device inode only. Propagate these hints to the inode associated
> with struct block_device because that is the inode used when writing
> back dirty pages.
Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
^ permalink raw reply [flat|nested] 42+ messages in thread
* [PATCH v9 06/19] block, fs: Restore the per-bio/request data lifetime fields
2024-01-30 21:48 [PATCH v9 00/19] Pass data lifetime information to SCSI disk devices Bart Van Assche
` (4 preceding siblings ...)
2024-01-30 21:48 ` [PATCH v9 05/19] fs: Propagate write hints to the struct block_device inode Bart Van Assche
@ 2024-01-30 21:48 ` Bart Van Assche
2024-01-31 14:55 ` Kanchan Joshi
2024-01-30 21:48 ` [PATCH v9 07/19] fs/f2fs: Restore the whint_mode mount option Bart Van Assche
` (12 subsequent siblings)
18 siblings, 1 reply; 42+ messages in thread
From: Bart Van Assche @ 2024-01-30 21:48 UTC (permalink / raw)
To: Martin K . Petersen
Cc: linux-scsi, linux-block, linux-fsdevel, Jens Axboe,
Christoph Hellwig, Daejun Park, Kanchan Joshi, Bart Van Assche,
Alexander Viro, Christian Brauner
Restore support for passing data lifetime information from filesystems to
block drivers. This patch reverts commit b179c98f7697 ("block: Remove
request.write_hint") and commit c75e707fe1aa ("block: remove the
per-bio/request write hint").
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
block/bio.c | 2 ++
block/blk-crypto-fallback.c | 1 +
block/blk-merge.c | 8 ++++++++
block/blk-mq.c | 2 ++
block/bounce.c | 1 +
block/fops.c | 3 +++
fs/buffer.c | 12 ++++++++----
fs/direct-io.c | 2 ++
fs/iomap/buffered-io.c | 2 ++
fs/iomap/direct-io.c | 1 +
fs/mpage.c | 1 +
include/linux/blk-mq.h | 2 ++
include/linux/blk_types.h | 2 ++
13 files changed, 35 insertions(+), 4 deletions(-)
diff --git a/block/bio.c b/block/bio.c
index b9642a41f286..c9223e9d31da 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -251,6 +251,7 @@ void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table,
bio->bi_opf = opf;
bio->bi_flags = 0;
bio->bi_ioprio = 0;
+ bio->bi_write_hint = 0;
bio->bi_status = 0;
bio->bi_iter.bi_sector = 0;
bio->bi_iter.bi_size = 0;
@@ -813,6 +814,7 @@ static int __bio_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp)
{
bio_set_flag(bio, BIO_CLONED);
bio->bi_ioprio = bio_src->bi_ioprio;
+ bio->bi_write_hint = bio_src->bi_write_hint;
bio->bi_iter = bio_src->bi_iter;
if (bio->bi_bdev) {
diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c
index e6468eab2681..b1e7415f8439 100644
--- a/block/blk-crypto-fallback.c
+++ b/block/blk-crypto-fallback.c
@@ -172,6 +172,7 @@ static struct bio *blk_crypto_fallback_clone_bio(struct bio *bio_src)
if (bio_flagged(bio_src, BIO_REMAPPED))
bio_set_flag(bio, BIO_REMAPPED);
bio->bi_ioprio = bio_src->bi_ioprio;
+ bio->bi_write_hint = bio_src->bi_write_hint;
bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector;
bio->bi_iter.bi_size = bio_src->bi_iter.bi_size;
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 2d470cf2173e..2a06fd33039d 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -810,6 +810,10 @@ static struct request *attempt_merge(struct request_queue *q,
if (rq_data_dir(req) != rq_data_dir(next))
return NULL;
+ /* Don't merge requests with different write hints. */
+ if (req->write_hint != next->write_hint)
+ return NULL;
+
if (req->ioprio != next->ioprio)
return NULL;
@@ -937,6 +941,10 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
if (!bio_crypt_rq_ctx_compatible(rq, bio))
return false;
+ /* Don't merge requests with different write hints. */
+ if (rq->write_hint != bio->bi_write_hint)
+ return false;
+
if (rq->ioprio != bio_prio(bio))
return false;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index aa87fcfda1ec..34ceb15d2ea4 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2585,6 +2585,7 @@ static void blk_mq_bio_to_request(struct request *rq, struct bio *bio,
rq->cmd_flags |= REQ_FAILFAST_MASK;
rq->__sector = bio->bi_iter.bi_sector;
+ rq->write_hint = bio->bi_write_hint;
blk_rq_bio_prep(rq, bio, nr_segs);
/* This can't fail, since GFP_NOIO includes __GFP_DIRECT_RECLAIM. */
@@ -3185,6 +3186,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
}
rq->nr_phys_segments = rq_src->nr_phys_segments;
rq->ioprio = rq_src->ioprio;
+ rq->write_hint = rq_src->write_hint;
if (rq->bio && blk_crypto_rq_bio_prep(rq, rq->bio, gfp_mask) < 0)
goto free_and_out;
diff --git a/block/bounce.c b/block/bounce.c
index 7cfcb242f9a1..d6a5219f29dd 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -169,6 +169,7 @@ static struct bio *bounce_clone_bio(struct bio *bio_src)
if (bio_flagged(bio_src, BIO_REMAPPED))
bio_set_flag(bio, BIO_REMAPPED);
bio->bi_ioprio = bio_src->bi_ioprio;
+ bio->bi_write_hint = bio_src->bi_write_hint;
bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector;
bio->bi_iter.bi_size = bio_src->bi_iter.bi_size;
diff --git a/block/fops.c b/block/fops.c
index 0cf8cf72cdfa..ab0e37d1dc48 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -73,6 +73,7 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
bio_init(&bio, bdev, vecs, nr_pages, dio_bio_write_op(iocb));
}
bio.bi_iter.bi_sector = pos >> SECTOR_SHIFT;
+ bio.bi_write_hint = file_inode(iocb->ki_filp)->i_write_hint;
bio.bi_ioprio = iocb->ki_ioprio;
ret = bio_iov_iter_get_pages(&bio, iter);
@@ -203,6 +204,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
for (;;) {
bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT;
+ bio->bi_write_hint = file_inode(iocb->ki_filp)->i_write_hint;
bio->bi_private = dio;
bio->bi_end_io = blkdev_bio_end_io;
bio->bi_ioprio = iocb->ki_ioprio;
@@ -321,6 +323,7 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
dio->flags = 0;
dio->iocb = iocb;
bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT;
+ bio->bi_write_hint = file_inode(iocb->ki_filp)->i_write_hint;
bio->bi_end_io = blkdev_bio_end_io_async;
bio->bi_ioprio = iocb->ki_ioprio;
diff --git a/fs/buffer.c b/fs/buffer.c
index d3bcf601d3e5..eb7d3ded2c33 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -55,7 +55,7 @@
static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
- struct writeback_control *wbc);
+ enum rw_hint hint, struct writeback_control *wbc);
#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
@@ -1889,7 +1889,8 @@ int __block_write_full_folio(struct inode *inode, struct folio *folio,
do {
struct buffer_head *next = bh->b_this_page;
if (buffer_async_write(bh)) {
- submit_bh_wbc(REQ_OP_WRITE | write_flags, bh, wbc);
+ submit_bh_wbc(REQ_OP_WRITE | write_flags, bh,
+ inode->i_write_hint, wbc);
nr_underway++;
}
bh = next;
@@ -1944,7 +1945,8 @@ int __block_write_full_folio(struct inode *inode, struct folio *folio,
struct buffer_head *next = bh->b_this_page;
if (buffer_async_write(bh)) {
clear_buffer_dirty(bh);
- submit_bh_wbc(REQ_OP_WRITE | write_flags, bh, wbc);
+ submit_bh_wbc(REQ_OP_WRITE | write_flags, bh,
+ inode->i_write_hint, wbc);
nr_underway++;
}
bh = next;
@@ -2756,6 +2758,7 @@ static void end_bio_bh_io_sync(struct bio *bio)
}
static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
+ enum rw_hint write_hint,
struct writeback_control *wbc)
{
const enum req_op op = opf & REQ_OP_MASK;
@@ -2783,6 +2786,7 @@ static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO);
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
+ bio->bi_write_hint = write_hint;
__bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
@@ -2802,7 +2806,7 @@ static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
void submit_bh(blk_opf_t opf, struct buffer_head *bh)
{
- submit_bh_wbc(opf, bh, NULL);
+ submit_bh_wbc(opf, bh, WRITE_LIFE_NOT_SET, NULL);
}
EXPORT_SYMBOL(submit_bh);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 60456263a338..62c97ff9e852 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -410,6 +410,8 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
bio->bi_end_io = dio_bio_end_io;
if (dio->is_pinned)
bio_set_flag(bio, BIO_PAGE_PINNED);
+ bio->bi_write_hint = file_inode(dio->iocb->ki_filp)->i_write_hint;
+
sdio->bio = bio;
sdio->logical_offset_in_bio = sdio->cur_page_fs_offset;
}
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 093c4515b22a..c961511bece1 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -1667,6 +1667,7 @@ iomap_alloc_ioend(struct inode *inode, struct iomap_writepage_ctx *wpc,
REQ_OP_WRITE | wbc_to_write_flags(wbc),
GFP_NOFS, &iomap_ioend_bioset);
bio->bi_iter.bi_sector = sector;
+ bio->bi_write_hint = inode->i_write_hint;
wbc_init_bio(wbc, bio);
ioend = container_of(bio, struct iomap_ioend, io_inline_bio);
@@ -1697,6 +1698,7 @@ iomap_chain_bio(struct bio *prev)
new = bio_alloc(prev->bi_bdev, BIO_MAX_VECS, prev->bi_opf, GFP_NOFS);
bio_clone_blkg_association(new, prev);
new->bi_iter.bi_sector = bio_end_sector(prev);
+ new->bi_write_hint = prev->bi_write_hint;
bio_chain(prev, new);
bio_get(prev); /* for iomap_finish_ioend */
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index bcd3f8cf5ea4..f3b43d223a46 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -380,6 +380,7 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
fscrypt_set_bio_crypt_ctx(bio, inode, pos >> inode->i_blkbits,
GFP_KERNEL);
bio->bi_iter.bi_sector = iomap_sector(iomap, pos);
+ bio->bi_write_hint = inode->i_write_hint;
bio->bi_ioprio = dio->iocb->ki_ioprio;
bio->bi_private = dio;
bio->bi_end_io = iomap_dio_bio_end_io;
diff --git a/fs/mpage.c b/fs/mpage.c
index 738882e0766d..fa8b99a199fa 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -605,6 +605,7 @@ static int __mpage_writepage(struct folio *folio, struct writeback_control *wbc,
GFP_NOFS);
bio->bi_iter.bi_sector = first_block << (blkbits - 9);
wbc_init_bio(wbc, bio);
+ bio->bi_write_hint = inode->i_write_hint;
}
/*
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 7a8150a5f051..492b0128b5d9 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -8,6 +8,7 @@
#include <linux/scatterlist.h>
#include <linux/prefetch.h>
#include <linux/srcu.h>
+#include <linux/rw_hint.h>
struct blk_mq_tags;
struct blk_flush_queue;
@@ -135,6 +136,7 @@ struct request {
struct blk_crypto_keyslot *crypt_keyslot;
#endif
+ enum rw_hint write_hint;
unsigned short ioprio;
enum mq_rq_state state;
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index f288c94374b3..12d87cef2c03 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -10,6 +10,7 @@
#include <linux/bvec.h>
#include <linux/device.h>
#include <linux/ktime.h>
+#include <linux/rw_hint.h>
struct bio_set;
struct bio;
@@ -269,6 +270,7 @@ struct bio {
*/
unsigned short bi_flags; /* BIO_* below */
unsigned short bi_ioprio;
+ enum rw_hint bi_write_hint;
blk_status_t bi_status;
atomic_t __bi_remaining;
^ permalink raw reply related [flat|nested] 42+ messages in thread* Re: [PATCH v9 06/19] block, fs: Restore the per-bio/request data lifetime fields
2024-01-30 21:48 ` [PATCH v9 06/19] block, fs: Restore the per-bio/request data lifetime fields Bart Van Assche
@ 2024-01-31 14:55 ` Kanchan Joshi
0 siblings, 0 replies; 42+ messages in thread
From: Kanchan Joshi @ 2024-01-31 14:55 UTC (permalink / raw)
To: Bart Van Assche, Martin K . Petersen
Cc: linux-scsi, linux-block, linux-fsdevel, Jens Axboe,
Christoph Hellwig, Daejun Park, Alexander Viro, Christian Brauner
On 1/31/2024 3:18 AM, Bart Van Assche wrote:
> Restore support for passing data lifetime information from filesystems to
> block drivers. This patch reverts commit b179c98f7697 ("block: Remove
> request.write_hint") and commit c75e707fe1aa ("block: remove the
> per-bio/request write hint")
Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
^ permalink raw reply [flat|nested] 42+ messages in thread
* [PATCH v9 07/19] fs/f2fs: Restore the whint_mode mount option
2024-01-30 21:48 [PATCH v9 00/19] Pass data lifetime information to SCSI disk devices Bart Van Assche
` (5 preceding siblings ...)
2024-01-30 21:48 ` [PATCH v9 06/19] block, fs: Restore the per-bio/request data lifetime fields Bart Van Assche
@ 2024-01-30 21:48 ` Bart Van Assche
2024-02-01 10:27 ` Chao Yu
2024-01-30 21:48 ` [PATCH v9 08/19] fs/f2fs: Restore support for tracing data lifetimes Bart Van Assche
` (11 subsequent siblings)
18 siblings, 1 reply; 42+ messages in thread
From: Bart Van Assche @ 2024-01-30 21:48 UTC (permalink / raw)
To: Martin K . Petersen
Cc: linux-scsi, linux-block, linux-fsdevel, Jens Axboe,
Christoph Hellwig, Daejun Park, Kanchan Joshi, Bart Van Assche,
Jaegeuk Kim, Chao Yu, Jonathan Corbet
Allow users to control which write hints are passed to the block layer:
- whint_mode=off - no write hints are passed to the block layer.
- whint_mode=user-based - write hints are derived from inode->i_write_hint
and also from the system.advise xattr information (hot and cold bits).
- whint_mode=fs-based - F2FS chooses write hints.
This patch reverts commit 930e2607638d ("f2fs: remove obsolete whint_mode").
Cc: Jaegeuk Kim <jaegeuk@kernel.org>
Cc: Chao Yu <chao@kernel.org>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
Documentation/filesystems/f2fs.rst | 70 ++++++++++++++++++++++
fs/f2fs/data.c | 2 +
fs/f2fs/f2fs.h | 9 +++
fs/f2fs/segment.c | 95 ++++++++++++++++++++++++++++++
fs/f2fs/super.c | 32 +++++++++-
5 files changed, 207 insertions(+), 1 deletion(-)
diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
index d32c6209685d..de412ddebcc8 100644
--- a/Documentation/filesystems/f2fs.rst
+++ b/Documentation/filesystems/f2fs.rst
@@ -242,6 +242,12 @@ offgrpjquota Turn off group journalled quota.
offprjjquota Turn off project journalled quota.
quota Enable plain user disk quota accounting.
noquota Disable all plain disk quota option.
+whint_mode=%s Control which write hints are passed down to block
+ layer. This supports "off", "user-based", and
+ "fs-based". In "off" mode (default), f2fs does not pass
+ down hints. In "user-based" mode, f2fs tries to pass
+ down hints given by users. And in "fs-based" mode, f2fs
+ passes down hints with its policy.
alloc_mode=%s Adjust block allocation policy, which supports "reuse"
and "default".
fsync_mode=%s Control the policy of fsync. Currently supports "posix",
@@ -776,6 +782,70 @@ In order to identify whether the data in the victim segment are valid or not,
F2FS manages a bitmap. Each bit represents the validity of a block, and the
bitmap is composed of a bit stream covering whole blocks in main area.
+Write-hint Policy
+-----------------
+
+1) whint_mode=off. F2FS only passes down WRITE_LIFE_NOT_SET.
+
+2) whint_mode=user-based. F2FS tries to pass down hints given by
+users.
+
+===================== ======================== ===================
+User F2FS Block
+===================== ======================== ===================
+N/A META WRITE_LIFE_NOT_SET
+N/A HOT_NODE "
+N/A WARM_NODE "
+N/A COLD_NODE "
+ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME
+extension list " "
+
+-- buffered io
+WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME
+WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT
+WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET
+WRITE_LIFE_NONE " "
+WRITE_LIFE_MEDIUM " "
+WRITE_LIFE_LONG " "
+
+-- direct io
+WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME
+WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT
+WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET
+WRITE_LIFE_NONE " WRITE_LIFE_NONE
+WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM
+WRITE_LIFE_LONG " WRITE_LIFE_LONG
+===================== ======================== ===================
+
+3) whint_mode=fs-based. F2FS passes down hints with its policy.
+
+===================== ======================== ===================
+User F2FS Block
+===================== ======================== ===================
+N/A META WRITE_LIFE_MEDIUM;
+N/A HOT_NODE WRITE_LIFE_NOT_SET
+N/A WARM_NODE "
+N/A COLD_NODE WRITE_LIFE_NONE
+ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME
+extension list " "
+
+-- buffered io
+WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME
+WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT
+WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_LONG
+WRITE_LIFE_NONE " "
+WRITE_LIFE_MEDIUM " "
+WRITE_LIFE_LONG " "
+
+-- direct io
+WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME
+WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT
+WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET
+WRITE_LIFE_NONE " WRITE_LIFE_NONE
+WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM
+WRITE_LIFE_LONG " WRITE_LIFE_LONG
+===================== ======================== ===================
+
Fallocate(2) Policy
-------------------
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 26e317696b33..b6cf540848ab 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -478,6 +478,8 @@ static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages)
} else {
bio->bi_end_io = f2fs_write_end_io;
bio->bi_private = sbi;
+ bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi,
+ fio->type, fio->temp);
}
iostat_alloc_and_bind_ctx(sbi, bio, NULL);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 01fde6d44bf6..3600f61f2840 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -141,6 +141,12 @@ struct f2fs_rwsem {
#endif
};
+enum whint_mode {
+ WHINT_MODE_OFF, /* not pass down write hints */
+ WHINT_MODE_USER, /* try to pass down hints given by users */
+ WHINT_MODE_FS, /* pass down hints with F2FS policy */
+};
+
struct f2fs_mount_info {
unsigned int opt;
int write_io_size_bits; /* Write IO size bits */
@@ -158,6 +164,7 @@ struct f2fs_mount_info {
int s_jquota_fmt; /* Format of quota to use */
#endif
/* For which write hints are passed down to block layer */
+ enum whint_mode whint_mode;
int alloc_mode; /* segment allocation policy */
int fsync_mode; /* fsync policy */
int fs_mode; /* fs mode: LFS or ADAPTIVE */
@@ -3738,6 +3745,8 @@ void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi);
int __init f2fs_create_segment_manager_caches(void);
void f2fs_destroy_segment_manager_caches(void);
int f2fs_rw_hint_to_seg_type(enum rw_hint hint);
+enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi,
+ enum page_type type, enum temp_type temp);
unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi,
unsigned int segno);
unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi,
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 4c8836ded90f..b260aee614e4 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -3291,6 +3291,101 @@ int f2fs_rw_hint_to_seg_type(enum rw_hint hint)
}
}
+/* This returns write hints for each segment type. This hints will be
+ * passed down to block layer. There are mapping tables which depend on
+ * the mount option 'whint_mode'.
+ *
+ * 1) whint_mode=off. F2FS only passes down WRITE_LIFE_NOT_SET.
+ *
+ * 2) whint_mode=user-based. F2FS tries to pass down hints given by users.
+ *
+ * User F2FS Block
+ * ---- ---- -----
+ * META WRITE_LIFE_NOT_SET
+ * HOT_NODE "
+ * WARM_NODE "
+ * COLD_NODE "
+ * ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME
+ * extension list " "
+ *
+ * -- buffered io
+ * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME
+ * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT
+ * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET
+ * WRITE_LIFE_NONE " "
+ * WRITE_LIFE_MEDIUM " "
+ * WRITE_LIFE_LONG " "
+ *
+ * -- direct io
+ * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME
+ * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT
+ * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET
+ * WRITE_LIFE_NONE " WRITE_LIFE_NONE
+ * WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM
+ * WRITE_LIFE_LONG " WRITE_LIFE_LONG
+ *
+ * 3) whint_mode=fs-based. F2FS passes down hints with its policy.
+ *
+ * User F2FS Block
+ * ---- ---- -----
+ * META WRITE_LIFE_MEDIUM;
+ * HOT_NODE WRITE_LIFE_NOT_SET
+ * WARM_NODE "
+ * COLD_NODE WRITE_LIFE_NONE
+ * ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME
+ * extension list " "
+ *
+ * -- buffered io
+ * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME
+ * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT
+ * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_LONG
+ * WRITE_LIFE_NONE " "
+ * WRITE_LIFE_MEDIUM " "
+ * WRITE_LIFE_LONG " "
+ *
+ * -- direct io
+ * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME
+ * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT
+ * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET
+ * WRITE_LIFE_NONE " WRITE_LIFE_NONE
+ * WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM
+ * WRITE_LIFE_LONG " WRITE_LIFE_LONG
+ */
+
+enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi,
+ enum page_type type, enum temp_type temp)
+{
+ if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_USER) {
+ if (type == DATA) {
+ if (temp == WARM)
+ return WRITE_LIFE_NOT_SET;
+ else if (temp == HOT)
+ return WRITE_LIFE_SHORT;
+ else if (temp == COLD)
+ return WRITE_LIFE_EXTREME;
+ } else {
+ return WRITE_LIFE_NOT_SET;
+ }
+ } else if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_FS) {
+ if (type == DATA) {
+ if (temp == WARM)
+ return WRITE_LIFE_LONG;
+ else if (temp == HOT)
+ return WRITE_LIFE_SHORT;
+ else if (temp == COLD)
+ return WRITE_LIFE_EXTREME;
+ } else if (type == NODE) {
+ if (temp == WARM || temp == HOT)
+ return WRITE_LIFE_NOT_SET;
+ else if (temp == COLD)
+ return WRITE_LIFE_NONE;
+ } else if (type == META) {
+ return WRITE_LIFE_MEDIUM;
+ }
+ }
+ return WRITE_LIFE_NOT_SET;
+}
+
static int __get_segment_type_2(struct f2fs_io_info *fio)
{
if (fio->type == DATA)
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index d45ab0992ae5..eea7c760bdcb 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -156,6 +156,7 @@ enum {
Opt_jqfmt_vfsold,
Opt_jqfmt_vfsv0,
Opt_jqfmt_vfsv1,
+ Opt_whint,
Opt_alloc,
Opt_fsync,
Opt_test_dummy_encryption,
@@ -235,6 +236,7 @@ static match_table_t f2fs_tokens = {
{Opt_jqfmt_vfsold, "jqfmt=vfsold"},
{Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
{Opt_jqfmt_vfsv1, "jqfmt=vfsv1"},
+ {Opt_whint, "whint_mode=%s"},
{Opt_alloc, "alloc_mode=%s"},
{Opt_fsync, "fsync_mode=%s"},
{Opt_test_dummy_encryption, "test_dummy_encryption=%s"},
@@ -1026,6 +1028,22 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
f2fs_info(sbi, "quota operations not supported");
break;
#endif
+ case Opt_whint:
+ name = match_strdup(&args[0]);
+ if (!name)
+ return -ENOMEM;
+ if (!strcmp(name, "user-based")) {
+ F2FS_OPTION(sbi).whint_mode = WHINT_MODE_USER;
+ } else if (!strcmp(name, "off")) {
+ F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
+ } else if (!strcmp(name, "fs-based")) {
+ F2FS_OPTION(sbi).whint_mode = WHINT_MODE_FS;
+ } else {
+ kfree(name);
+ return -EINVAL;
+ }
+ kfree(name);
+ break;
case Opt_alloc:
name = match_strdup(&args[0]);
if (!name)
@@ -1432,6 +1450,12 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
return -EINVAL;
}
+ /* Not pass down write hints if the number of active logs is less
+ * than NR_CURSEG_PERSIST_TYPE.
+ */
+ if (F2FS_OPTION(sbi).active_logs != NR_CURSEG_PERSIST_TYPE)
+ F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
+
if (f2fs_sb_has_readonly(sbi) && !f2fs_readonly(sbi->sb)) {
f2fs_err(sbi, "Allow to mount readonly mode only");
return -EROFS;
@@ -2100,6 +2124,10 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
seq_puts(seq, ",prjquota");
#endif
f2fs_show_quota_options(seq, sbi->sb);
+ if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_USER)
+ seq_printf(seq, ",whint_mode=%s", "user-based");
+ else if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_FS)
+ seq_printf(seq, ",whint_mode=%s", "fs-based");
fscrypt_show_test_dummy_encryption(seq, ',', sbi->sb);
@@ -2169,6 +2197,7 @@ static void default_options(struct f2fs_sb_info *sbi, bool remount)
F2FS_OPTION(sbi).active_logs = NR_CURSEG_PERSIST_TYPE;
F2FS_OPTION(sbi).inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS;
+ F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
if (le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count_main) <=
SMALL_VOLUME_SEGMENTS)
F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE;
@@ -2483,7 +2512,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
need_stop_gc = true;
}
- if (*flags & SB_RDONLY) {
+ if (*flags & SB_RDONLY ||
+ F2FS_OPTION(sbi).whint_mode != org_mount_opt.whint_mode) {
sync_inodes_sb(sb);
set_sbi_flag(sbi, SBI_IS_DIRTY);
^ permalink raw reply related [flat|nested] 42+ messages in thread* Re: [PATCH v9 07/19] fs/f2fs: Restore the whint_mode mount option
2024-01-30 21:48 ` [PATCH v9 07/19] fs/f2fs: Restore the whint_mode mount option Bart Van Assche
@ 2024-02-01 10:27 ` Chao Yu
0 siblings, 0 replies; 42+ messages in thread
From: Chao Yu @ 2024-02-01 10:27 UTC (permalink / raw)
To: Bart Van Assche, Martin K . Petersen
Cc: linux-scsi, linux-block, linux-fsdevel, Jens Axboe,
Christoph Hellwig, Daejun Park, Kanchan Joshi, Jaegeuk Kim,
Jonathan Corbet
On 2024/1/31 5:48, Bart Van Assche wrote:
> Allow users to control which write hints are passed to the block layer:
> - whint_mode=off - no write hints are passed to the block layer.
> - whint_mode=user-based - write hints are derived from inode->i_write_hint
> and also from the system.advise xattr information (hot and cold bits).
> - whint_mode=fs-based - F2FS chooses write hints.
>
> This patch reverts commit 930e2607638d ("f2fs: remove obsolete whint_mode").
>
> Cc: Jaegeuk Kim <jaegeuk@kernel.org>
> Cc: Chao Yu <chao@kernel.org>
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Chao Yu <chao@kernel.org>
Thanks,
^ permalink raw reply [flat|nested] 42+ messages in thread
* [PATCH v9 08/19] fs/f2fs: Restore support for tracing data lifetimes
2024-01-30 21:48 [PATCH v9 00/19] Pass data lifetime information to SCSI disk devices Bart Van Assche
` (6 preceding siblings ...)
2024-01-30 21:48 ` [PATCH v9 07/19] fs/f2fs: Restore the whint_mode mount option Bart Van Assche
@ 2024-01-30 21:48 ` Bart Van Assche
2024-02-01 10:27 ` Chao Yu
2024-01-30 21:48 ` [PATCH v9 09/19] scsi: core: Query the Block Limits Extension VPD page Bart Van Assche
` (10 subsequent siblings)
18 siblings, 1 reply; 42+ messages in thread
From: Bart Van Assche @ 2024-01-30 21:48 UTC (permalink / raw)
To: Martin K . Petersen
Cc: linux-scsi, linux-block, linux-fsdevel, Jens Axboe,
Christoph Hellwig, Daejun Park, Kanchan Joshi, Bart Van Assche,
Jaegeuk Kim, Chao Yu, Steven Rostedt, Masami Hiramatsu
This patch restores code that was removed by commit 41d36a9f3e53 ("fs:
remove kiocb.ki_hint").
Cc: Jaegeuk Kim <jaegeuk@kernel.org>
Cc: Chao Yu <chao@kernel.org>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
include/trace/events/f2fs.h | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 7ed0fc430dc6..db3596478748 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -1032,6 +1032,7 @@ TRACE_EVENT(f2fs_direct_IO_enter,
__field(ino_t, ino)
__field(loff_t, ki_pos)
__field(int, ki_flags)
+ __field(u16, ki_hint)
__field(u16, ki_ioprio)
__field(unsigned long, len)
__field(int, rw)
@@ -1042,16 +1043,19 @@ TRACE_EVENT(f2fs_direct_IO_enter,
__entry->ino = inode->i_ino;
__entry->ki_pos = iocb->ki_pos;
__entry->ki_flags = iocb->ki_flags;
+ __entry->ki_hint =
+ file_inode(iocb->ki_filp)->i_write_hint;
__entry->ki_ioprio = iocb->ki_ioprio;
__entry->len = len;
__entry->rw = rw;
),
- TP_printk("dev = (%d,%d), ino = %lu pos = %lld len = %lu ki_flags = %x ki_ioprio = %x rw = %d",
+ TP_printk("dev = (%d,%d), ino = %lu pos = %lld len = %lu ki_flags = %x ki_hint = %x ki_ioprio = %x rw = %d",
show_dev_ino(__entry),
__entry->ki_pos,
__entry->len,
__entry->ki_flags,
+ __entry->ki_hint,
__entry->ki_ioprio,
__entry->rw)
);
^ permalink raw reply related [flat|nested] 42+ messages in thread* Re: [PATCH v9 08/19] fs/f2fs: Restore support for tracing data lifetimes
2024-01-30 21:48 ` [PATCH v9 08/19] fs/f2fs: Restore support for tracing data lifetimes Bart Van Assche
@ 2024-02-01 10:27 ` Chao Yu
0 siblings, 0 replies; 42+ messages in thread
From: Chao Yu @ 2024-02-01 10:27 UTC (permalink / raw)
To: Bart Van Assche, Martin K . Petersen
Cc: linux-scsi, linux-block, linux-fsdevel, Jens Axboe,
Christoph Hellwig, Daejun Park, Kanchan Joshi, Jaegeuk Kim,
Steven Rostedt, Masami Hiramatsu
On 2024/1/31 5:48, Bart Van Assche wrote:
> This patch restores code that was removed by commit 41d36a9f3e53 ("fs:
> remove kiocb.ki_hint").
>
> Cc: Jaegeuk Kim <jaegeuk@kernel.org>
> Cc: Chao Yu <chao@kernel.org>
> Cc: Christoph Hellwig <hch@lst.de>
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Chao Yu <chao@kernel.org>
Thanks,
^ permalink raw reply [flat|nested] 42+ messages in thread
* [PATCH v9 09/19] scsi: core: Query the Block Limits Extension VPD page
2024-01-30 21:48 [PATCH v9 00/19] Pass data lifetime information to SCSI disk devices Bart Van Assche
` (7 preceding siblings ...)
2024-01-30 21:48 ` [PATCH v9 08/19] fs/f2fs: Restore support for tracing data lifetimes Bart Van Assche
@ 2024-01-30 21:48 ` Bart Van Assche
2024-01-30 21:48 ` [PATCH v9 10/19] scsi: scsi_proto: Add structures and constants related to I/O groups and streams Bart Van Assche
` (9 subsequent siblings)
18 siblings, 0 replies; 42+ messages in thread
From: Bart Van Assche @ 2024-01-30 21:48 UTC (permalink / raw)
To: Martin K . Petersen
Cc: linux-scsi, linux-block, linux-fsdevel, Jens Axboe,
Christoph Hellwig, Daejun Park, Kanchan Joshi, Bart Van Assche,
Avri Altman, James E.J. Bottomley
Parse the Reduced Stream Control Supported (RSCS) bit from the block
limits extension VPD page. The RSCS bit is defined in SBC-5 r05
(https://www.t10.org/cgi-bin/ac.pl?t=f&f=sbc5r05.pdf).
Reviewed-by: Avri Altman <avri.altman@wdc.com>
Reviewed-by: Daejun Park <daejun7.park@samsung.com>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
drivers/scsi/scsi.c | 2 ++
drivers/scsi/scsi_sysfs.c | 10 ++++++++++
drivers/scsi/sd.c | 13 +++++++++++++
drivers/scsi/sd.h | 1 +
include/scsi/scsi_device.h | 1 +
5 files changed, 27 insertions(+)
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 76d369343c7a..74cc3369dd8d 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -499,6 +499,8 @@ void scsi_attach_vpd(struct scsi_device *sdev)
scsi_update_vpd_page(sdev, 0xb1, &sdev->vpd_pgb1);
if (vpd_buf->data[i] == 0xb2)
scsi_update_vpd_page(sdev, 0xb2, &sdev->vpd_pgb2);
+ if (vpd_buf->data[i] == 0xb7)
+ scsi_update_vpd_page(sdev, 0xb7, &sdev->vpd_pgb7);
}
kfree(vpd_buf);
}
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 24f6eefb6803..93652a786a46 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -449,6 +449,7 @@ static void scsi_device_dev_release(struct device *dev)
struct scsi_vpd *vpd_pg80 = NULL, *vpd_pg83 = NULL;
struct scsi_vpd *vpd_pg0 = NULL, *vpd_pg89 = NULL;
struct scsi_vpd *vpd_pgb0 = NULL, *vpd_pgb1 = NULL, *vpd_pgb2 = NULL;
+ struct scsi_vpd *vpd_pgb7 = NULL;
unsigned long flags;
might_sleep();
@@ -494,6 +495,8 @@ static void scsi_device_dev_release(struct device *dev)
lockdep_is_held(&sdev->inquiry_mutex));
vpd_pgb2 = rcu_replace_pointer(sdev->vpd_pgb2, vpd_pgb2,
lockdep_is_held(&sdev->inquiry_mutex));
+ vpd_pgb7 = rcu_replace_pointer(sdev->vpd_pgb7, vpd_pgb7,
+ lockdep_is_held(&sdev->inquiry_mutex));
mutex_unlock(&sdev->inquiry_mutex);
if (vpd_pg0)
@@ -510,6 +513,8 @@ static void scsi_device_dev_release(struct device *dev)
kfree_rcu(vpd_pgb1, rcu);
if (vpd_pgb2)
kfree_rcu(vpd_pgb2, rcu);
+ if (vpd_pgb7)
+ kfree_rcu(vpd_pgb7, rcu);
kfree(sdev->inquiry);
kfree(sdev);
@@ -921,6 +926,7 @@ sdev_vpd_pg_attr(pg89);
sdev_vpd_pg_attr(pgb0);
sdev_vpd_pg_attr(pgb1);
sdev_vpd_pg_attr(pgb2);
+sdev_vpd_pg_attr(pgb7);
sdev_vpd_pg_attr(pg0);
static ssize_t show_inquiry(struct file *filep, struct kobject *kobj,
@@ -1295,6 +1301,9 @@ static umode_t scsi_sdev_bin_attr_is_visible(struct kobject *kobj,
if (attr == &dev_attr_vpd_pgb2 && !sdev->vpd_pgb2)
return 0;
+ if (attr == &dev_attr_vpd_pgb7 && !sdev->vpd_pgb7)
+ return 0;
+
return S_IRUGO;
}
@@ -1347,6 +1356,7 @@ static struct bin_attribute *scsi_sdev_bin_attrs[] = {
&dev_attr_vpd_pgb0,
&dev_attr_vpd_pgb1,
&dev_attr_vpd_pgb2,
+ &dev_attr_vpd_pgb7,
&dev_attr_inquiry,
NULL
};
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index a54cd1864a92..463b201a3109 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -3166,6 +3166,18 @@ static void sd_read_block_limits(struct scsi_disk *sdkp)
rcu_read_unlock();
}
+/* Parse the Block Limits Extension VPD page (0xb7) */
+static void sd_read_block_limits_ext(struct scsi_disk *sdkp)
+{
+ struct scsi_vpd *vpd;
+
+ rcu_read_lock();
+ vpd = rcu_dereference(sdkp->device->vpd_pgb7);
+ if (vpd && vpd->len >= 2)
+ sdkp->rscs = vpd->data[5] & 1;
+ rcu_read_unlock();
+}
+
/**
* sd_read_block_characteristics - Query block dev. characteristics
* @sdkp: disk to query
@@ -3517,6 +3529,7 @@ static int sd_revalidate_disk(struct gendisk *disk)
if (scsi_device_supports_vpd(sdp)) {
sd_read_block_provisioning(sdkp);
sd_read_block_limits(sdkp);
+ sd_read_block_limits_ext(sdkp);
sd_read_block_characteristics(sdkp);
sd_zbc_read_zones(sdkp, buffer);
sd_read_cpr(sdkp);
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 409dda5350d1..e4539122f2a2 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -151,6 +151,7 @@ struct scsi_disk {
unsigned urswrz : 1;
unsigned security : 1;
unsigned ignore_medium_access_errors : 1;
+ bool rscs : 1; /* reduced stream control support */
};
#define to_scsi_disk(obj) container_of(obj, struct scsi_disk, disk_dev)
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 4dceabb9dbe1..ea494a0114e8 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -153,6 +153,7 @@ struct scsi_device {
struct scsi_vpd __rcu *vpd_pgb0;
struct scsi_vpd __rcu *vpd_pgb1;
struct scsi_vpd __rcu *vpd_pgb2;
+ struct scsi_vpd __rcu *vpd_pgb7;
struct scsi_target *sdev_target;
^ permalink raw reply related [flat|nested] 42+ messages in thread* [PATCH v9 10/19] scsi: scsi_proto: Add structures and constants related to I/O groups and streams
2024-01-30 21:48 [PATCH v9 00/19] Pass data lifetime information to SCSI disk devices Bart Van Assche
` (8 preceding siblings ...)
2024-01-30 21:48 ` [PATCH v9 09/19] scsi: core: Query the Block Limits Extension VPD page Bart Van Assche
@ 2024-01-30 21:48 ` Bart Van Assche
2024-01-30 21:48 ` [PATCH v9 11/19] scsi: sd: Translate data lifetime information Bart Van Assche
` (8 subsequent siblings)
18 siblings, 0 replies; 42+ messages in thread
From: Bart Van Assche @ 2024-01-30 21:48 UTC (permalink / raw)
To: Martin K . Petersen
Cc: linux-scsi, linux-block, linux-fsdevel, Jens Axboe,
Christoph Hellwig, Daejun Park, Kanchan Joshi, Bart Van Assche,
James E.J. Bottomley
Prepare for adding code that will query the I/O advice hints group
descriptors and for adding code that will retrieve the stream status.
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
drivers/scsi/Kconfig | 5 +++
drivers/scsi/Makefile | 2 +
drivers/scsi/scsi_proto_test.c | 56 ++++++++++++++++++++++++
include/scsi/scsi_proto.h | 78 ++++++++++++++++++++++++++++++++++
4 files changed, 141 insertions(+)
create mode 100644 drivers/scsi/scsi_proto_test.c
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index e20af95a912b..3328052c8715 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -241,6 +241,11 @@ config SCSI_SCAN_ASYNC
Note that this setting also affects whether resuming from
system suspend will be performed asynchronously.
+config SCSI_PROTO_TEST
+ tristate "scsi_proto.h unit tests" if !KUNIT_ALL_TESTS
+ depends on SCSI && KUNIT
+ default KUNIT_ALL_TESTS
+
menu "SCSI Transports"
depends on SCSI
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
index f055bfd54a68..1313ddf2fd1a 100644
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -24,6 +24,8 @@ obj-$(CONFIG_SCSI_COMMON) += scsi_common.o
obj-$(CONFIG_RAID_ATTRS) += raid_class.o
+obj-$(CONFIG_SCSI_PROTO_TEST) += scsi_proto_test.o
+
# --- NOTE ORDERING HERE ---
# For kernel non-modular link, transport attributes need to
# be initialised before drivers
diff --git a/drivers/scsi/scsi_proto_test.c b/drivers/scsi/scsi_proto_test.c
new file mode 100644
index 000000000000..7fa0a78a2ad1
--- /dev/null
+++ b/drivers/scsi/scsi_proto_test.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Google LLC
+ */
+#include <kunit/test.h>
+#include <asm-generic/unaligned.h>
+#include <scsi/scsi_proto.h>
+
+static void test_scsi_proto(struct kunit *test)
+{
+ static const union {
+ struct scsi_io_group_descriptor desc;
+ u8 arr[sizeof(struct scsi_io_group_descriptor)];
+ } d = { .arr = { 0x45, 0, 0, 0, 0xb0, 0xe4, 0xe3 } };
+ KUNIT_EXPECT_EQ(test, d.desc.io_advice_hints_mode + 0, 1);
+ KUNIT_EXPECT_EQ(test, d.desc.st_enble + 0, 1);
+ KUNIT_EXPECT_EQ(test, d.desc.cs_enble + 0, 0);
+ KUNIT_EXPECT_EQ(test, d.desc.ic_enable + 0, 1);
+ KUNIT_EXPECT_EQ(test, d.desc.acdlu + 0, 1);
+ KUNIT_EXPECT_EQ(test, d.desc.rlbsr + 0, 3);
+ KUNIT_EXPECT_EQ(test, d.desc.lbm_descriptor_type + 0, 0);
+ KUNIT_EXPECT_EQ(test, d.desc.params[0] + 0, 0xe4);
+ KUNIT_EXPECT_EQ(test, d.desc.params[1] + 0, 0xe3);
+
+ static const union {
+ struct scsi_stream_status s;
+ u8 arr[sizeof(struct scsi_stream_status)];
+ } ss = { .arr = { 0x80, 0, 0x12, 0x34, 0x3f } };
+ KUNIT_EXPECT_EQ(test, ss.s.perm + 0, 1);
+ KUNIT_EXPECT_EQ(test, get_unaligned_be16(&ss.s.stream_identifier),
+ 0x1234);
+ KUNIT_EXPECT_EQ(test, ss.s.rel_lifetime + 0, 0x3f);
+
+ static const union {
+ struct scsi_stream_status_header h;
+ u8 arr[sizeof(struct scsi_stream_status_header)];
+ } sh = { .arr = { 1, 2, 3, 4, 0, 0, 5, 6 } };
+ KUNIT_EXPECT_EQ(test, get_unaligned_be32(&sh.h.len), 0x1020304);
+ KUNIT_EXPECT_EQ(test, get_unaligned_be16(&sh.h.number_of_open_streams),
+ 0x506);
+}
+
+static struct kunit_case scsi_proto_test_cases[] = {
+ KUNIT_CASE(test_scsi_proto),
+ {}
+};
+
+static struct kunit_suite scsi_proto_test_suite = {
+ .name = "scsi_proto",
+ .test_cases = scsi_proto_test_cases,
+};
+kunit_test_suite(scsi_proto_test_suite);
+
+MODULE_DESCRIPTION("<scsi/scsi_proto.h> unit tests");
+MODULE_AUTHOR("Bart Van Assche");
+MODULE_LICENSE("GPL");
diff --git a/include/scsi/scsi_proto.h b/include/scsi/scsi_proto.h
index 07d65c1f59db..843106e1109f 100644
--- a/include/scsi/scsi_proto.h
+++ b/include/scsi/scsi_proto.h
@@ -10,6 +10,7 @@
#ifndef _SCSI_PROTO_H_
#define _SCSI_PROTO_H_
+#include <linux/build_bug.h>
#include <linux/types.h>
/*
@@ -126,6 +127,7 @@
#define SAI_READ_CAPACITY_16 0x10
#define SAI_GET_LBA_STATUS 0x12
#define SAI_REPORT_REFERRALS 0x13
+#define SAI_GET_STREAM_STATUS 0x16
/* values for maintenance in */
#define MI_REPORT_IDENTIFYING_INFORMATION 0x05
#define MI_REPORT_TARGET_PGS 0x0a
@@ -275,6 +277,82 @@ struct scsi_lun {
__u8 scsi_lun[8];
};
+/* SBC-5 IO advice hints group descriptor */
+struct scsi_io_group_descriptor {
+#if defined(__BIG_ENDIAN)
+ u8 io_advice_hints_mode: 2;
+ u8 reserved1: 3;
+ u8 st_enble: 1;
+ u8 cs_enble: 1;
+ u8 ic_enable: 1;
+#elif defined(__LITTLE_ENDIAN)
+ u8 ic_enable: 1;
+ u8 cs_enble: 1;
+ u8 st_enble: 1;
+ u8 reserved1: 3;
+ u8 io_advice_hints_mode: 2;
+#else
+#error
+#endif
+ u8 reserved2[3];
+ /* Logical block markup descriptor */
+#if defined(__BIG_ENDIAN)
+ u8 acdlu: 1;
+ u8 reserved3: 1;
+ u8 rlbsr: 2;
+ u8 lbm_descriptor_type: 4;
+#elif defined(__LITTLE_ENDIAN)
+ u8 lbm_descriptor_type: 4;
+ u8 rlbsr: 2;
+ u8 reserved3: 1;
+ u8 acdlu: 1;
+#else
+#error
+#endif
+ u8 params[2];
+ u8 reserved4;
+ u8 reserved5[8];
+};
+
+static_assert(sizeof(struct scsi_io_group_descriptor) == 16);
+
+/* SCSI stream status descriptor */
+struct scsi_stream_status {
+#if defined(__BIG_ENDIAN)
+ u8 perm: 1;
+ u8 reserved1: 7;
+#elif defined(__LITTLE_ENDIAN)
+ u8 reserved1: 7;
+ u8 perm: 1;
+#else
+#error
+#endif
+ u8 reserved2;
+ __be16 stream_identifier;
+#if defined(__BIG_ENDIAN)
+ u8 reserved3: 2;
+ u8 rel_lifetime: 6;
+#elif defined(__LITTLE_ENDIAN)
+ u8 rel_lifetime: 6;
+ u8 reserved3: 2;
+#else
+#error
+#endif
+ u8 reserved4[3];
+};
+
+static_assert(sizeof(struct scsi_stream_status) == 8);
+
+/* GET STREAM STATUS parameter data */
+struct scsi_stream_status_header {
+ __be32 len; /* length in bytes of stream_status[] array. */
+ u16 reserved;
+ __be16 number_of_open_streams;
+ DECLARE_FLEX_ARRAY(struct scsi_stream_status, stream_status);
+};
+
+static_assert(sizeof(struct scsi_stream_status_header) == 8);
+
/* SPC asymmetric access states */
#define SCSI_ACCESS_STATE_OPTIMAL 0x00
#define SCSI_ACCESS_STATE_ACTIVE 0x01
^ permalink raw reply related [flat|nested] 42+ messages in thread* [PATCH v9 11/19] scsi: sd: Translate data lifetime information
2024-01-30 21:48 [PATCH v9 00/19] Pass data lifetime information to SCSI disk devices Bart Van Assche
` (9 preceding siblings ...)
2024-01-30 21:48 ` [PATCH v9 10/19] scsi: scsi_proto: Add structures and constants related to I/O groups and streams Bart Van Assche
@ 2024-01-30 21:48 ` Bart Van Assche
2024-02-15 21:33 ` Martin K. Petersen
2024-06-11 20:57 ` Andy Shevchenko
2024-01-30 21:48 ` [PATCH v9 12/19] scsi: scsi_debug: Reduce code duplication Bart Van Assche
` (7 subsequent siblings)
18 siblings, 2 replies; 42+ messages in thread
From: Bart Van Assche @ 2024-01-30 21:48 UTC (permalink / raw)
To: Martin K . Petersen
Cc: linux-scsi, linux-block, linux-fsdevel, Jens Axboe,
Christoph Hellwig, Daejun Park, Kanchan Joshi, Bart Van Assche,
Damien Le Moal, James E.J. Bottomley
Recently T10 standardized SBC constrained streams. This mechanism allows
to pass data lifetime information to SCSI devices in the group number
field. Add support for translating write hint information into a
permanent stream number in the sd driver. Use WRITE(10) instead of
WRITE(6) if data lifetime information is present because the WRITE(6)
command does not have a GROUP NUMBER field.
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Damien Le Moal <dlemoal@kernel.org>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
drivers/scsi/sd.c | 98 +++++++++++++++++++++++++++++++++++++++++++++--
drivers/scsi/sd.h | 4 +-
2 files changed, 98 insertions(+), 4 deletions(-)
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 463b201a3109..997de4daa8c4 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -47,6 +47,7 @@
#include <linux/blkpg.h>
#include <linux/blk-pm.h>
#include <linux/delay.h>
+#include <linux/rw_hint.h>
#include <linux/major.h>
#include <linux/mutex.h>
#include <linux/string_helpers.h>
@@ -1080,12 +1081,38 @@ static blk_status_t sd_setup_flush_cmnd(struct scsi_cmnd *cmd)
return BLK_STS_OK;
}
+/**
+ * sd_group_number() - Compute the GROUP NUMBER field
+ * @cmd: SCSI command for which to compute the value of the six-bit GROUP NUMBER
+ * field.
+ *
+ * From SBC-5 r05 (https://www.t10.org/cgi-bin/ac.pl?t=f&f=sbc5r05.pdf):
+ * 0: no relative lifetime.
+ * 1: shortest relative lifetime.
+ * 2: second shortest relative lifetime.
+ * 3 - 0x3d: intermediate relative lifetimes.
+ * 0x3e: second longest relative lifetime.
+ * 0x3f: longest relative lifetime.
+ */
+static u8 sd_group_number(struct scsi_cmnd *cmd)
+{
+ const struct request *rq = scsi_cmd_to_rq(cmd);
+ struct scsi_disk *sdkp = scsi_disk(rq->q->disk);
+
+ if (!sdkp->rscs)
+ return 0;
+
+ return min3((u32)rq->write_hint, (u32)sdkp->permanent_stream_count,
+ 0x3fu);
+}
+
static blk_status_t sd_setup_rw32_cmnd(struct scsi_cmnd *cmd, bool write,
sector_t lba, unsigned int nr_blocks,
unsigned char flags, unsigned int dld)
{
cmd->cmd_len = SD_EXT_CDB_SIZE;
cmd->cmnd[0] = VARIABLE_LENGTH_CMD;
+ cmd->cmnd[6] = sd_group_number(cmd);
cmd->cmnd[7] = 0x18; /* Additional CDB len */
cmd->cmnd[9] = write ? WRITE_32 : READ_32;
cmd->cmnd[10] = flags;
@@ -1104,7 +1131,7 @@ static blk_status_t sd_setup_rw16_cmnd(struct scsi_cmnd *cmd, bool write,
cmd->cmd_len = 16;
cmd->cmnd[0] = write ? WRITE_16 : READ_16;
cmd->cmnd[1] = flags | ((dld >> 2) & 0x01);
- cmd->cmnd[14] = (dld & 0x03) << 6;
+ cmd->cmnd[14] = ((dld & 0x03) << 6) | sd_group_number(cmd);
cmd->cmnd[15] = 0;
put_unaligned_be64(lba, &cmd->cmnd[2]);
put_unaligned_be32(nr_blocks, &cmd->cmnd[10]);
@@ -1119,7 +1146,7 @@ static blk_status_t sd_setup_rw10_cmnd(struct scsi_cmnd *cmd, bool write,
cmd->cmd_len = 10;
cmd->cmnd[0] = write ? WRITE_10 : READ_10;
cmd->cmnd[1] = flags;
- cmd->cmnd[6] = 0;
+ cmd->cmnd[6] = sd_group_number(cmd);
cmd->cmnd[9] = 0;
put_unaligned_be32(lba, &cmd->cmnd[2]);
put_unaligned_be16(nr_blocks, &cmd->cmnd[7]);
@@ -1256,7 +1283,7 @@ static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
ret = sd_setup_rw16_cmnd(cmd, write, lba, nr_blocks,
protect | fua, dld);
} else if ((nr_blocks > 0xff) || (lba > 0x1fffff) ||
- sdp->use_10_for_rw || protect) {
+ sdp->use_10_for_rw || protect || rq->write_hint) {
ret = sd_setup_rw10_cmnd(cmd, write, lba, nr_blocks,
protect | fua);
} else {
@@ -3059,6 +3086,70 @@ sd_read_cache_type(struct scsi_disk *sdkp, unsigned char *buffer)
sdkp->DPOFUA = 0;
}
+static bool sd_is_perm_stream(struct scsi_disk *sdkp, unsigned int stream_id)
+{
+ u8 cdb[16] = { SERVICE_ACTION_IN_16, SAI_GET_STREAM_STATUS };
+ struct {
+ struct scsi_stream_status_header h;
+ struct scsi_stream_status s;
+ } buf;
+ struct scsi_device *sdev = sdkp->device;
+ struct scsi_sense_hdr sshdr;
+ const struct scsi_exec_args exec_args = {
+ .sshdr = &sshdr,
+ };
+ int res;
+
+ put_unaligned_be16(stream_id, &cdb[4]);
+ put_unaligned_be32(sizeof(buf), &cdb[10]);
+
+ res = scsi_execute_cmd(sdev, cdb, REQ_OP_DRV_IN, &buf, sizeof(buf),
+ SD_TIMEOUT, sdkp->max_retries, &exec_args);
+ if (res < 0)
+ return false;
+ if (scsi_status_is_check_condition(res) && scsi_sense_valid(&sshdr))
+ sd_print_sense_hdr(sdkp, &sshdr);
+ if (res)
+ return false;
+ if (get_unaligned_be32(&buf.h.len) < sizeof(struct scsi_stream_status))
+ return false;
+ return buf.h.stream_status[0].perm;
+}
+
+static void sd_read_io_hints(struct scsi_disk *sdkp, unsigned char *buffer)
+{
+ struct scsi_device *sdp = sdkp->device;
+ const struct scsi_io_group_descriptor *desc, *start, *end;
+ struct scsi_sense_hdr sshdr;
+ struct scsi_mode_data data;
+ int res;
+
+ res = scsi_mode_sense(sdp, /*dbd=*/0x8, /*modepage=*/0x0a,
+ /*subpage=*/0x05, buffer, SD_BUF_SIZE, SD_TIMEOUT,
+ sdkp->max_retries, &data, &sshdr);
+ if (res < 0)
+ return;
+ start = (void *)buffer + data.header_length + 16;
+ end = (void *)buffer + ALIGN_DOWN(data.header_length + data.length,
+ sizeof(*end));
+ /*
+ * From "SBC-5 Constrained Streams with Data Lifetimes": Device severs
+ * should assign the lowest numbered stream identifiers to permanent
+ * streams.
+ */
+ for (desc = start; desc < end; desc++)
+ if (!desc->st_enble || !sd_is_perm_stream(sdkp, desc - start))
+ break;
+ sdkp->permanent_stream_count = desc - start;
+ if (sdkp->rscs && sdkp->permanent_stream_count < 2)
+ sd_printk(KERN_INFO, sdkp,
+ "Unexpected: RSCS has been set and the permanent stream count is %u\n",
+ sdkp->permanent_stream_count);
+ else if (sdkp->permanent_stream_count)
+ sd_printk(KERN_INFO, sdkp, "permanent stream count = %d\n",
+ sdkp->permanent_stream_count);
+}
+
/*
* The ATO bit indicates whether the DIF application tag is available
* for use by the operating system.
@@ -3539,6 +3630,7 @@ static int sd_revalidate_disk(struct gendisk *disk)
sd_read_write_protect_flag(sdkp, buffer);
sd_read_cache_type(sdkp, buffer);
+ sd_read_io_hints(sdkp, buffer);
sd_read_app_tag_own(sdkp, buffer);
sd_read_write_same(sdkp, buffer);
sd_read_security(sdkp, buffer);
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index e4539122f2a2..5c4285a582b2 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -125,6 +125,8 @@ struct scsi_disk {
unsigned int physical_block_size;
unsigned int max_medium_access_timeouts;
unsigned int medium_access_timed_out;
+ /* number of permanent streams */
+ u16 permanent_stream_count;
u8 media_present;
u8 write_prot;
u8 protection_type;/* Data Integrity Field */
@@ -151,7 +153,7 @@ struct scsi_disk {
unsigned urswrz : 1;
unsigned security : 1;
unsigned ignore_medium_access_errors : 1;
- bool rscs : 1; /* reduced stream control support */
+ unsigned rscs : 1; /* reduced stream control support */
};
#define to_scsi_disk(obj) container_of(obj, struct scsi_disk, disk_dev)
^ permalink raw reply related [flat|nested] 42+ messages in thread* Re: [PATCH v9 11/19] scsi: sd: Translate data lifetime information
2024-01-30 21:48 ` [PATCH v9 11/19] scsi: sd: Translate data lifetime information Bart Van Assche
@ 2024-02-15 21:33 ` Martin K. Petersen
2024-02-15 21:51 ` Bart Van Assche
2024-06-11 20:57 ` Andy Shevchenko
1 sibling, 1 reply; 42+ messages in thread
From: Martin K. Petersen @ 2024-02-15 21:33 UTC (permalink / raw)
To: Bart Van Assche
Cc: Martin K . Petersen, linux-scsi, linux-block, linux-fsdevel,
Jens Axboe, Christoph Hellwig, Daejun Park, Kanchan Joshi,
Damien Le Moal, James E.J. Bottomley
Hi Bart!
> @@ -1256,7 +1283,7 @@ static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
> ret = sd_setup_rw16_cmnd(cmd, write, lba, nr_blocks,
> protect | fua, dld);
> } else if ((nr_blocks > 0xff) || (lba > 0x1fffff) ||
> - sdp->use_10_for_rw || protect) {
> + sdp->use_10_for_rw || protect || rq->write_hint) {
Wouldn't it be appropriate to check sdkp->permanent_stream_count here?
rq->write_hint being set doesn't help if the device uses 6-byte
commands.
--
Martin K. Petersen Oracle Linux Engineering
^ permalink raw reply [flat|nested] 42+ messages in thread* Re: [PATCH v9 11/19] scsi: sd: Translate data lifetime information
2024-02-15 21:33 ` Martin K. Petersen
@ 2024-02-15 21:51 ` Bart Van Assche
2024-02-15 22:00 ` Martin K. Petersen
0 siblings, 1 reply; 42+ messages in thread
From: Bart Van Assche @ 2024-02-15 21:51 UTC (permalink / raw)
To: Martin K. Petersen
Cc: linux-scsi, linux-block, linux-fsdevel, Jens Axboe,
Christoph Hellwig, Daejun Park, Kanchan Joshi, Damien Le Moal,
James E.J. Bottomley
On 2/15/24 13:33, Martin K. Petersen wrote:
>> @@ -1256,7 +1283,7 @@ static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
>> ret = sd_setup_rw16_cmnd(cmd, write, lba, nr_blocks,
>> protect | fua, dld);
>> } else if ((nr_blocks > 0xff) || (lba > 0x1fffff) ||
>> - sdp->use_10_for_rw || protect) {
>> + sdp->use_10_for_rw || protect || rq->write_hint) {
>
> Wouldn't it be appropriate to check sdkp->permanent_stream_count here?
> rq->write_hint being set doesn't help if the device uses 6-byte
> commands.
Something like this untested change?
@@ -1256,7 +1283,8 @@ static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
ret = sd_setup_rw16_cmnd(cmd, write, lba, nr_blocks,
protect | fua, dld);
} else if ((nr_blocks > 0xff) || (lba > 0x1fffff) ||
- sdp->use_10_for_rw || protect) {
+ sdp->use_10_for_rw || protect ||
+ (rq->write_hint && sdkp->permanent_stream_count)) {
ret = sd_setup_rw10_cmnd(cmd, write, lba, nr_blocks,
protect | fua);
} else {
Thanks,
Bart.
^ permalink raw reply [flat|nested] 42+ messages in thread* Re: [PATCH v9 11/19] scsi: sd: Translate data lifetime information
2024-02-15 21:51 ` Bart Van Assche
@ 2024-02-15 22:00 ` Martin K. Petersen
2024-02-16 0:22 ` Bart Van Assche
0 siblings, 1 reply; 42+ messages in thread
From: Martin K. Petersen @ 2024-02-15 22:00 UTC (permalink / raw)
To: Bart Van Assche
Cc: Martin K. Petersen, linux-scsi, linux-block, linux-fsdevel,
Jens Axboe, Christoph Hellwig, Daejun Park, Kanchan Joshi,
Damien Le Moal, James E.J. Bottomley
Bart,
>> Wouldn't it be appropriate to check sdkp->permanent_stream_count
>> here?
>> rq->write_hint being set doesn't help if the device uses 6-byte
>> commands.
>
> Something like this untested change?
>
> @@ -1256,7 +1283,8 @@ static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
> ret = sd_setup_rw16_cmnd(cmd, write, lba, nr_blocks,
> protect | fua, dld);
> } else if ((nr_blocks > 0xff) || (lba > 0x1fffff) ||
> - sdp->use_10_for_rw || protect) {
> + sdp->use_10_for_rw || protect ||
> + (rq->write_hint && sdkp->permanent_stream_count)) {
> ret = sd_setup_rw10_cmnd(cmd, write, lba, nr_blocks,
> protect | fua);
> } else {
Do we even care about rq->write_hint being set? sd_group_number() will
check that later.
In my book, the device supporting permanent streams is a good heuristic
not to bother with 6-byte commands.
Another option would be to simply set use_16_for_rw when streams are
supported like Damien does for ZBC.
--
Martin K. Petersen Oracle Linux Engineering
^ permalink raw reply [flat|nested] 42+ messages in thread* Re: [PATCH v9 11/19] scsi: sd: Translate data lifetime information
2024-02-15 22:00 ` Martin K. Petersen
@ 2024-02-16 0:22 ` Bart Van Assche
2024-02-16 0:32 ` Martin K. Petersen
0 siblings, 1 reply; 42+ messages in thread
From: Bart Van Assche @ 2024-02-16 0:22 UTC (permalink / raw)
To: Martin K. Petersen
Cc: linux-scsi, linux-block, linux-fsdevel, Jens Axboe,
Christoph Hellwig, Daejun Park, Kanchan Joshi, Damien Le Moal,
James E.J. Bottomley
On 2/15/24 14:00, Martin K. Petersen wrote:
> Another option would be to simply set use_16_for_rw when streams are
> supported like Damien does for ZBC.
Setting use_10_for_rw may work better since devices that support data
lifetime information will support WRITE(10) while it is not guaranteed
that these devices support WRITE(16). As an example, WRITE(10) support
is required by the UFS standard while WRITE(16) support is optional.
Thanks,
Bart.
^ permalink raw reply [flat|nested] 42+ messages in thread
* Re: [PATCH v9 11/19] scsi: sd: Translate data lifetime information
2024-02-16 0:22 ` Bart Van Assche
@ 2024-02-16 0:32 ` Martin K. Petersen
0 siblings, 0 replies; 42+ messages in thread
From: Martin K. Petersen @ 2024-02-16 0:32 UTC (permalink / raw)
To: Bart Van Assche
Cc: Martin K. Petersen, linux-scsi, linux-block, linux-fsdevel,
Jens Axboe, Christoph Hellwig, Daejun Park, Kanchan Joshi,
Damien Le Moal, James E.J. Bottomley
Bart,
> On 2/15/24 14:00, Martin K. Petersen wrote:
>> Another option would be to simply set use_16_for_rw when streams are
>> supported like Damien does for ZBC.
>
> Setting use_10_for_rw may work better since devices that support data
> lifetime information will support WRITE(10) while it is not guaranteed
> that these devices support WRITE(16). As an example, WRITE(10) support
> is required by the UFS standard while WRITE(16) support is optional.
Sure, that's fine.
--
Martin K. Petersen Oracle Linux Engineering
^ permalink raw reply [flat|nested] 42+ messages in thread
* Re: [PATCH v9 11/19] scsi: sd: Translate data lifetime information
2024-01-30 21:48 ` [PATCH v9 11/19] scsi: sd: Translate data lifetime information Bart Van Assche
2024-02-15 21:33 ` Martin K. Petersen
@ 2024-06-11 20:57 ` Andy Shevchenko
2024-06-11 21:21 ` Christian Heusel
1 sibling, 1 reply; 42+ messages in thread
From: Andy Shevchenko @ 2024-06-11 20:57 UTC (permalink / raw)
To: Bart Van Assche
Cc: Martin K . Petersen, linux-scsi, linux-block, linux-fsdevel,
Jens Axboe, Christoph Hellwig, Daejun Park, Kanchan Joshi,
Damien Le Moal, James E.J. Bottomley
Tue, Jan 30, 2024 at 01:48:37PM -0800, Bart Van Assche kirjoitti:
> Recently T10 standardized SBC constrained streams. This mechanism allows
> to pass data lifetime information to SCSI devices in the group number
> field. Add support for translating write hint information into a
> permanent stream number in the sd driver. Use WRITE(10) instead of
> WRITE(6) if data lifetime information is present because the WRITE(6)
> command does not have a GROUP NUMBER field.
This patch broke very badly my connected Garmin FR35 sport watch. The boot time
increased by 1 minute along with broken access to USB mass storage.
On the reboot it takes ages as well.
Revert of this and one little dependency (unrelated by functional means) helps.
Details are here: https://gitlab.archlinux.org/archlinux/packaging/packages/linux/-/issues/60
P.S. Big thanks to Arch Linux team to help with bisection!
Note, from tomorrow I will be off from the HW in question for 2+ months, I can
test anything back in mid-August, but I hope you may do something about it
without my participation.
--
With Best Regards,
Andy Shevchenko
^ permalink raw reply [flat|nested] 42+ messages in thread
* Re: [PATCH v9 11/19] scsi: sd: Translate data lifetime information
2024-06-11 20:57 ` Andy Shevchenko
@ 2024-06-11 21:21 ` Christian Heusel
2024-06-11 23:08 ` Bart Van Assche
0 siblings, 1 reply; 42+ messages in thread
From: Christian Heusel @ 2024-06-11 21:21 UTC (permalink / raw)
To: Andy Shevchenko
Cc: Bart Van Assche, Martin K . Petersen, linux-scsi, linux-block,
linux-fsdevel, Jens Axboe, Christoph Hellwig, Daejun Park,
Kanchan Joshi, Damien Le Moal, James E.J. Bottomley, regressions
[-- Attachment #1: Type: text/plain, Size: 1506 bytes --]
On 24/06/11 11:57PM, Andy Shevchenko wrote:
> Tue, Jan 30, 2024 at 01:48:37PM -0800, Bart Van Assche kirjoitti:
> > Recently T10 standardized SBC constrained streams. This mechanism allows
> > to pass data lifetime information to SCSI devices in the group number
> > field. Add support for translating write hint information into a
> > permanent stream number in the sd driver. Use WRITE(10) instead of
> > WRITE(6) if data lifetime information is present because the WRITE(6)
> > command does not have a GROUP NUMBER field.
>
> This patch broke very badly my connected Garmin FR35 sport watch. The boot time
> increased by 1 minute along with broken access to USB mass storage.
>
> On the reboot it takes ages as well.
>
> Revert of this and one little dependency (unrelated by functional means) helps.
We have tested that the revert fixes the issue on top of v6.10-rc3.
Also adding the regressions list in CC and making regzbot aware of this
issue.
> Details are here: https://gitlab.archlinux.org/archlinux/packaging/packages/linux/-/issues/60
>
> P.S. Big thanks to Arch Linux team to help with bisection!
If this is fixed adding in a "Reported-by" or "Bisected-by" (depending
on what this subsystem uses) for me would be appreciated :)
Cheers,
Christian
---
#regzbot title: scsi/sd: Timeout/broken USB storage with Garmin FR35
#regzbot introduced: 4f53138fffc2 ^
#regzbot link: https://gitlab.archlinux.org/archlinux/packaging/packages/linux/-/issues/60
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply [flat|nested] 42+ messages in thread
* Re: [PATCH v9 11/19] scsi: sd: Translate data lifetime information
2024-06-11 21:21 ` Christian Heusel
@ 2024-06-11 23:08 ` Bart Van Assche
2024-06-12 1:48 ` Martin K. Petersen
2024-06-12 5:35 ` Andy Shevchenko
0 siblings, 2 replies; 42+ messages in thread
From: Bart Van Assche @ 2024-06-11 23:08 UTC (permalink / raw)
To: Christian Heusel, Andy Shevchenko
Cc: Martin K . Petersen, linux-scsi, linux-block, linux-fsdevel,
Jens Axboe, Christoph Hellwig, Daejun Park, Kanchan Joshi,
Damien Le Moal, James E.J. Bottomley, regressions
On 6/11/24 2:21 PM, Christian Heusel wrote:
> On 24/06/11 11:57PM, Andy Shevchenko wrote:
>> Tue, Jan 30, 2024 at 01:48:37PM -0800, Bart Van Assche kirjoitti:
>>> Recently T10 standardized SBC constrained streams. This mechanism allows
>>> to pass data lifetime information to SCSI devices in the group number
>>> field. Add support for translating write hint information into a
>>> permanent stream number in the sd driver. Use WRITE(10) instead of
>>> WRITE(6) if data lifetime information is present because the WRITE(6)
>>> command does not have a GROUP NUMBER field.
>>
>> This patch broke very badly my connected Garmin FR35 sport watch. The boot time
>> increased by 1 minute along with broken access to USB mass storage.
>>
>> On the reboot it takes ages as well.
>>
>> Revert of this and one little dependency (unrelated by functional means) helps.
>
> We have tested that the revert fixes the issue on top of v6.10-rc3.
>
> Also adding the regressions list in CC and making regzbot aware of this
> issue.
>
>> Details are here: https://gitlab.archlinux.org/archlinux/packaging/packages/linux/-/issues/60
>>
>> P.S. Big thanks to Arch Linux team to help with bisection!
>
> If this is fixed adding in a "Reported-by" or "Bisected-by" (depending
> on what this subsystem uses) for me would be appreciated :)
Thank you Christian for having gone through the painful process of
bisecting this issue.
Is the Garmin FR35 Flash device perhaps connected to a USB bus? If so,
this is the second report of a USB storage device that resets if it
receives a query for the IO Advice Hints Grouping mode page. Does the
patch below help?
Thanks,
Bart.
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 3a43e2209751..fcf3d7730466 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -63,6 +63,7 @@
#include <scsi/scsi_cmnd.h>
#include <scsi/scsi_dbg.h>
#include <scsi/scsi_device.h>
+#include <scsi/scsi_devinfo.h>
#include <scsi/scsi_driver.h>
#include <scsi/scsi_eh.h>
#include <scsi/scsi_host.h>
@@ -3117,6 +3118,9 @@ static void sd_read_io_hints(struct scsi_disk *sdkp, unsigned char *buffer)
struct scsi_mode_data data;
int res;
+ if (sdp->sdev_bflags & BLIST_SKIP_IO_HINTS)
+ return;
+
res = scsi_mode_sense(sdp, /*dbd=*/0x8, /*modepage=*/0x0a,
/*subpage=*/0x05, buffer, SD_BUF_SIZE, SD_TIMEOUT,
sdkp->max_retries, &data, &sshdr);
diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c
index b31464740f6c..9a7185c68872 100644
--- a/drivers/usb/storage/scsiglue.c
+++ b/drivers/usb/storage/scsiglue.c
@@ -79,6 +79,8 @@ static int slave_alloc (struct scsi_device *sdev)
if (us->protocol == USB_PR_BULK && us->max_lun > 0)
sdev->sdev_bflags |= BLIST_FORCELUN;
+ sdev->sdev_bflags |= BLIST_SKIP_IO_HINTS;
+
return 0;
}
diff --git a/include/scsi/scsi_devinfo.h b/include/scsi/scsi_devinfo.h
index 6b548dc2c496..fa8721e49dec 100644
--- a/include/scsi/scsi_devinfo.h
+++ b/include/scsi/scsi_devinfo.h
@@ -69,8 +69,10 @@
#define BLIST_RETRY_ITF ((__force blist_flags_t)(1ULL << 32))
/* Always retry ABORTED_COMMAND with ASC 0xc1 */
#define BLIST_RETRY_ASC_C1 ((__force blist_flags_t)(1ULL << 33))
+/* Do not read the I/O hints mode page */
+#define BLIST_SKIP_IO_HINTS ((__force blist_flags_t)(1ULL << 34))
-#define __BLIST_LAST_USED BLIST_RETRY_ASC_C1
+#define __BLIST_LAST_USED BLIST_SKIP_IO_HINTS
#define __BLIST_HIGH_UNUSED (~(__BLIST_LAST_USED | \
(__force blist_flags_t) \
^ permalink raw reply related [flat|nested] 42+ messages in thread
* Re: [PATCH v9 11/19] scsi: sd: Translate data lifetime information
2024-06-11 23:08 ` Bart Van Assche
@ 2024-06-12 1:48 ` Martin K. Petersen
2024-06-12 5:35 ` Andy Shevchenko
1 sibling, 0 replies; 42+ messages in thread
From: Martin K. Petersen @ 2024-06-12 1:48 UTC (permalink / raw)
To: Bart Van Assche
Cc: Christian Heusel, Andy Shevchenko, Martin K . Petersen,
linux-scsi, linux-block, linux-fsdevel, Jens Axboe,
Christoph Hellwig, Daejun Park, Kanchan Joshi, Damien Le Moal,
James E.J. Bottomley, regressions
Hi Bart!
> diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c
> index b31464740f6c..9a7185c68872 100644
> --- a/drivers/usb/storage/scsiglue.c
> +++ b/drivers/usb/storage/scsiglue.c
> @@ -79,6 +79,8 @@ static int slave_alloc (struct scsi_device *sdev)
> if (us->protocol == USB_PR_BULK && us->max_lun > 0)
> sdev->sdev_bflags |= BLIST_FORCELUN;
>
> + sdev->sdev_bflags |= BLIST_SKIP_IO_HINTS;
> +
I suspect this is the safest approach and prefer it over the individual
device quirk.
--
Martin K. Petersen Oracle Linux Engineering
^ permalink raw reply [flat|nested] 42+ messages in thread
* Re: [PATCH v9 11/19] scsi: sd: Translate data lifetime information
2024-06-11 23:08 ` Bart Van Assche
2024-06-12 1:48 ` Martin K. Petersen
@ 2024-06-12 5:35 ` Andy Shevchenko
2024-06-12 5:42 ` Andy Shevchenko
1 sibling, 1 reply; 42+ messages in thread
From: Andy Shevchenko @ 2024-06-12 5:35 UTC (permalink / raw)
To: Bart Van Assche
Cc: Christian Heusel, Martin K . Petersen, linux-scsi, linux-block,
linux-fsdevel, Jens Axboe, Christoph Hellwig, Daejun Park,
Kanchan Joshi, Damien Le Moal, James E.J. Bottomley, regressions
On Wed, Jun 12, 2024 at 2:08 AM Bart Van Assche <bvanassche@acm.org> wrote:
> On 6/11/24 2:21 PM, Christian Heusel wrote:
> > On 24/06/11 11:57PM, Andy Shevchenko wrote:
> >> Tue, Jan 30, 2024 at 01:48:37PM -0800, Bart Van Assche kirjoitti:
> >>> Recently T10 standardized SBC constrained streams. This mechanism allows
> >>> to pass data lifetime information to SCSI devices in the group number
> >>> field. Add support for translating write hint information into a
> >>> permanent stream number in the sd driver. Use WRITE(10) instead of
> >>> WRITE(6) if data lifetime information is present because the WRITE(6)
> >>> command does not have a GROUP NUMBER field.
> >>
> >> This patch broke very badly my connected Garmin FR35 sport watch. The boot time
> >> increased by 1 minute along with broken access to USB mass storage.
> >>
> >> On the reboot it takes ages as well.
> >>
> >> Revert of this and one little dependency (unrelated by functional means) helps.
> >
> > We have tested that the revert fixes the issue on top of v6.10-rc3.
> >
> > Also adding the regressions list in CC and making regzbot aware of this
> > issue.
> >
> >> Details are here: https://gitlab.archlinux.org/archlinux/packaging/packages/linux/-/issues/60
> >>
> >> P.S. Big thanks to Arch Linux team to help with bisection!
> >
> > If this is fixed adding in a "Reported-by" or "Bisected-by" (depending
> > on what this subsystem uses) for me would be appreciated :)
>
> Thank you Christian for having gone through the painful process of
> bisecting this issue.
> Is the Garmin FR35 Flash device perhaps connected to a USB bus? If so,
Yes. It is written above in my report here.
> this is the second report of a USB storage device that resets if it
> receives a query for the IO Advice Hints Grouping mode page. Does the
> patch below help?
I tested the kernel build from
https://gitlab.archlinux.org/archlinux/packaging/packages/linux/-/issues/60#note_190857
which claims to have this fix and it fixes my issue.
Reported-and-tested-by: Andy Shevchenko <andy.shevchenko@gmail.com>
P.S. And please add any suitable tag for Christian, thank you!
--
With Best Regards,
Andy Shevchenko
^ permalink raw reply [flat|nested] 42+ messages in thread
* Re: [PATCH v9 11/19] scsi: sd: Translate data lifetime information
2024-06-12 5:35 ` Andy Shevchenko
@ 2024-06-12 5:42 ` Andy Shevchenko
0 siblings, 0 replies; 42+ messages in thread
From: Andy Shevchenko @ 2024-06-12 5:42 UTC (permalink / raw)
To: Bart Van Assche
Cc: Christian Heusel, Martin K . Petersen, linux-scsi, linux-block,
linux-fsdevel, Jens Axboe, Christoph Hellwig, Daejun Park,
Kanchan Joshi, Damien Le Moal, James E.J. Bottomley, regressions
On Wed, Jun 12, 2024 at 8:35 AM Andy Shevchenko
<andy.shevchenko@gmail.com> wrote:
> On Wed, Jun 12, 2024 at 2:08 AM Bart Van Assche <bvanassche@acm.org> wrote:
> > On 6/11/24 2:21 PM, Christian Heusel wrote:
> > > On 24/06/11 11:57PM, Andy Shevchenko wrote:
...
> > >> This patch broke very badly my connected Garmin FR35 sport watch. The boot time
> > >> increased by 1 minute along with broken access to USB mass storage.
...
> > Is the Garmin FR35 Flash device perhaps connected to a USB bus? If so,
>
> Yes. It is written above in my report here.
Hmm... Rereading myself and it seems it was not obvious that it's a
USB mass storage device. Sorry for the confusion.
--
With Best Regards,
Andy Shevchenko
^ permalink raw reply [flat|nested] 42+ messages in thread
* [PATCH v9 12/19] scsi: scsi_debug: Reduce code duplication
2024-01-30 21:48 [PATCH v9 00/19] Pass data lifetime information to SCSI disk devices Bart Van Assche
` (10 preceding siblings ...)
2024-01-30 21:48 ` [PATCH v9 11/19] scsi: sd: Translate data lifetime information Bart Van Assche
@ 2024-01-30 21:48 ` Bart Van Assche
2024-01-30 21:48 ` [PATCH v9 13/19] scsi: scsi_debug: Support the block limits extension VPD page Bart Van Assche
` (6 subsequent siblings)
18 siblings, 0 replies; 42+ messages in thread
From: Bart Van Assche @ 2024-01-30 21:48 UTC (permalink / raw)
To: Martin K . Petersen
Cc: linux-scsi, linux-block, linux-fsdevel, Jens Axboe,
Christoph Hellwig, Daejun Park, Kanchan Joshi, Bart Van Assche,
Avri Altman, Douglas Gilbert, James E.J. Bottomley
All VPD pages have the page code in byte one. Reduce code duplication by
storing the VPD page code once.
Reviewed-by: Avri Altman <avri.altman@wdc.com>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Douglas Gilbert <dgilbert@interlog.com>
Tested-by: Douglas Gilbert <dgilbert@interlog.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
drivers/scsi/scsi_debug.c | 16 ++--------------
1 file changed, 2 insertions(+), 14 deletions(-)
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index d03d66f11493..994a2b829b94 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -1903,7 +1903,8 @@ static int resp_inquiry(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
u32 len;
char lu_id_str[6];
int host_no = devip->sdbg_host->shost->host_no;
-
+
+ arr[1] = cmd[2];
port_group_id = (((host_no + 1) & 0x7f) << 8) +
(devip->channel & 0x7f);
if (sdebug_vpd_use_hostno == 0)
@@ -1914,7 +1915,6 @@ static int resp_inquiry(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
(devip->target * 1000) - 3;
len = scnprintf(lu_id_str, 6, "%d", lu_id_num);
if (0 == cmd[2]) { /* supported vital product data pages */
- arr[1] = cmd[2]; /*sanity */
n = 4;
arr[n++] = 0x0; /* this page */
arr[n++] = 0x80; /* unit serial number */
@@ -1935,23 +1935,18 @@ static int resp_inquiry(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
}
arr[3] = n - 4; /* number of supported VPD pages */
} else if (0x80 == cmd[2]) { /* unit serial number */
- arr[1] = cmd[2]; /*sanity */
arr[3] = len;
memcpy(&arr[4], lu_id_str, len);
} else if (0x83 == cmd[2]) { /* device identification */
- arr[1] = cmd[2]; /*sanity */
arr[3] = inquiry_vpd_83(&arr[4], port_group_id,
target_dev_id, lu_id_num,
lu_id_str, len,
&devip->lu_name);
} else if (0x84 == cmd[2]) { /* Software interface ident. */
- arr[1] = cmd[2]; /*sanity */
arr[3] = inquiry_vpd_84(&arr[4]);
} else if (0x85 == cmd[2]) { /* Management network addresses */
- arr[1] = cmd[2]; /*sanity */
arr[3] = inquiry_vpd_85(&arr[4]);
} else if (0x86 == cmd[2]) { /* extended inquiry */
- arr[1] = cmd[2]; /*sanity */
arr[3] = 0x3c; /* number of following entries */
if (sdebug_dif == T10_PI_TYPE3_PROTECTION)
arr[4] = 0x4; /* SPT: GRD_CHK:1 */
@@ -1961,30 +1956,23 @@ static int resp_inquiry(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
arr[4] = 0x0; /* no protection stuff */
arr[5] = 0x7; /* head of q, ordered + simple q's */
} else if (0x87 == cmd[2]) { /* mode page policy */
- arr[1] = cmd[2]; /*sanity */
arr[3] = 0x8; /* number of following entries */
arr[4] = 0x2; /* disconnect-reconnect mp */
arr[6] = 0x80; /* mlus, shared */
arr[8] = 0x18; /* protocol specific lu */
arr[10] = 0x82; /* mlus, per initiator port */
} else if (0x88 == cmd[2]) { /* SCSI Ports */
- arr[1] = cmd[2]; /*sanity */
arr[3] = inquiry_vpd_88(&arr[4], target_dev_id);
} else if (is_disk_zbc && 0x89 == cmd[2]) { /* ATA info */
- arr[1] = cmd[2]; /*sanity */
n = inquiry_vpd_89(&arr[4]);
put_unaligned_be16(n, arr + 2);
} else if (is_disk_zbc && 0xb0 == cmd[2]) { /* Block limits */
- arr[1] = cmd[2]; /*sanity */
arr[3] = inquiry_vpd_b0(&arr[4]);
} else if (is_disk_zbc && 0xb1 == cmd[2]) { /* Block char. */
- arr[1] = cmd[2]; /*sanity */
arr[3] = inquiry_vpd_b1(devip, &arr[4]);
} else if (is_disk && 0xb2 == cmd[2]) { /* LB Prov. */
- arr[1] = cmd[2]; /*sanity */
arr[3] = inquiry_vpd_b2(&arr[4]);
} else if (is_zbc && cmd[2] == 0xb6) { /* ZB dev. charact. */
- arr[1] = cmd[2]; /*sanity */
arr[3] = inquiry_vpd_b6(devip, &arr[4]);
} else {
mk_sense_invalid_fld(scp, SDEB_IN_CDB, 2, -1);
^ permalink raw reply related [flat|nested] 42+ messages in thread* [PATCH v9 13/19] scsi: scsi_debug: Support the block limits extension VPD page
2024-01-30 21:48 [PATCH v9 00/19] Pass data lifetime information to SCSI disk devices Bart Van Assche
` (11 preceding siblings ...)
2024-01-30 21:48 ` [PATCH v9 12/19] scsi: scsi_debug: Reduce code duplication Bart Van Assche
@ 2024-01-30 21:48 ` Bart Van Assche
2024-01-30 21:48 ` [PATCH v9 14/19] scsi: scsi_debug: Rework page code error handling Bart Van Assche
` (5 subsequent siblings)
18 siblings, 0 replies; 42+ messages in thread
From: Bart Van Assche @ 2024-01-30 21:48 UTC (permalink / raw)
To: Martin K . Petersen
Cc: linux-scsi, linux-block, linux-fsdevel, Jens Axboe,
Christoph Hellwig, Daejun Park, Kanchan Joshi, Bart Van Assche,
Douglas Gilbert, James E.J. Bottomley
From SBC-5 r05:
"Reduced stream control:
a) reduces the maximum number of streams that the device server supports;
and
b) increases the number of write commands that are able to specify a stream
to be written in any write command that contains the GROUP NUMBER field
in its CDB.
If the RSCS bit (see 6.6.5) is set to one, then the device server shall:
a) support per group stream identifier usage as described in 4.32.2;
b) support the IO Advice Hints Grouping mode page (see 6.5.7); and
c) set the MAXIMUM NUMBER OF STREAMS field (see 6.6.5) to a value that is
less than 64.
Device servers that set the RSCS bit to one may support other features
(e.g., permanent streams (see 4.32.4)).
4.32.4 Permanent streams
A permanent stream is a stream for which the device server does not allow
closing or otherwise modifying the configuration of that stream. The PERM
bit (see 5.9.2.3) indicates whether a stream is a permanent stream. If a
STREAM CONTROL command (see 5.32) specifies the closing of a permanent
stream, the device server terminates that command with CHECK CONDITION
status instead of closing the specified stream. A permanent stream is always
an open stream. Device severs should assign the lowest numbered stream
identifiers to permanent streams."
Report that reduced stream control is supported.
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Douglas Gilbert <dgilbert@interlog.com>
Tested-by: Douglas Gilbert <dgilbert@interlog.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
drivers/scsi/scsi_debug.c | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 994a2b829b94..8dba838ef983 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -1867,6 +1867,19 @@ static int inquiry_vpd_b6(struct sdebug_dev_info *devip, unsigned char *arr)
return 0x3c;
}
+#define SDEBUG_BLE_LEN_AFTER_B4 28 /* thus vpage 32 bytes long */
+
+enum { MAXIMUM_NUMBER_OF_STREAMS = 6, PERMANENT_STREAM_COUNT = 5 };
+
+/* Block limits extension VPD page (SBC-4) */
+static int inquiry_vpd_b7(unsigned char *arrb4)
+{
+ memset(arrb4, 0, SDEBUG_BLE_LEN_AFTER_B4);
+ arrb4[1] = 1; /* Reduced stream control support (RSCS) */
+ put_unaligned_be16(MAXIMUM_NUMBER_OF_STREAMS, &arrb4[2]);
+ return SDEBUG_BLE_LEN_AFTER_B4;
+}
+
#define SDEBUG_LONG_INQ_SZ 96
#define SDEBUG_MAX_INQ_ARR_SZ 584
@@ -1932,6 +1945,7 @@ static int resp_inquiry(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
arr[n++] = 0xb2; /* LB Provisioning */
if (is_zbc)
arr[n++] = 0xb6; /* ZB dev. char. */
+ arr[n++] = 0xb7; /* Block limits extension */
}
arr[3] = n - 4; /* number of supported VPD pages */
} else if (0x80 == cmd[2]) { /* unit serial number */
@@ -1974,6 +1988,8 @@ static int resp_inquiry(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
arr[3] = inquiry_vpd_b2(&arr[4]);
} else if (is_zbc && cmd[2] == 0xb6) { /* ZB dev. charact. */
arr[3] = inquiry_vpd_b6(devip, &arr[4]);
+ } else if (cmd[2] == 0xb7) { /* block limits extension page */
+ arr[3] = inquiry_vpd_b7(&arr[4]);
} else {
mk_sense_invalid_fld(scp, SDEB_IN_CDB, 2, -1);
kfree(arr);
^ permalink raw reply related [flat|nested] 42+ messages in thread* [PATCH v9 14/19] scsi: scsi_debug: Rework page code error handling
2024-01-30 21:48 [PATCH v9 00/19] Pass data lifetime information to SCSI disk devices Bart Van Assche
` (12 preceding siblings ...)
2024-01-30 21:48 ` [PATCH v9 13/19] scsi: scsi_debug: Support the block limits extension VPD page Bart Van Assche
@ 2024-01-30 21:48 ` Bart Van Assche
2024-01-30 21:48 ` [PATCH v9 15/19] scsi: scsi_debug: Rework subpage " Bart Van Assche
` (4 subsequent siblings)
18 siblings, 0 replies; 42+ messages in thread
From: Bart Van Assche @ 2024-01-30 21:48 UTC (permalink / raw)
To: Martin K . Petersen
Cc: linux-scsi, linux-block, linux-fsdevel, Jens Axboe,
Christoph Hellwig, Daejun Park, Kanchan Joshi, Bart Van Assche,
Douglas Gilbert, James E.J. Bottomley
Instead of tracking whether or not the page code is valid in a boolean
variable, jump to error handling code if an unsupported page code is
encountered.
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Douglas Gilbert <dgilbert@interlog.com>
Tested-by: Douglas Gilbert <dgilbert@interlog.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
drivers/scsi/scsi_debug.c | 24 ++++++++++++------------
1 file changed, 12 insertions(+), 12 deletions(-)
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 8dba838ef983..ba0a29e6a86d 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -2644,7 +2644,7 @@ static int resp_mode_sense(struct scsi_cmnd *scp,
unsigned char *ap;
unsigned char arr[SDEBUG_MAX_MSENSE_SZ];
unsigned char *cmd = scp->cmnd;
- bool dbd, llbaa, msense_6, is_disk, is_zbc, bad_pcode;
+ bool dbd, llbaa, msense_6, is_disk, is_zbc;
dbd = !!(cmd[1] & 0x8); /* disable block descriptors */
pcontrol = (cmd[2] & 0xc0) >> 6;
@@ -2708,7 +2708,6 @@ static int resp_mode_sense(struct scsi_cmnd *scp,
mk_sense_invalid_fld(scp, SDEB_IN_CDB, 3, -1);
return check_condition_result;
}
- bad_pcode = false;
switch (pcode) {
case 0x1: /* Read-Write error recovery page, direct access */
@@ -2723,15 +2722,17 @@ static int resp_mode_sense(struct scsi_cmnd *scp,
if (is_disk) {
len = resp_format_pg(ap, pcontrol, target);
offset += len;
- } else
- bad_pcode = true;
+ } else {
+ goto bad_pcode;
+ }
break;
case 0x8: /* Caching page, direct access */
if (is_disk || is_zbc) {
len = resp_caching_pg(ap, pcontrol, target);
offset += len;
- } else
- bad_pcode = true;
+ } else {
+ goto bad_pcode;
+ }
break;
case 0xa: /* Control Mode page, all devices */
len = resp_ctrl_m_pg(ap, pcontrol, target);
@@ -2784,18 +2785,17 @@ static int resp_mode_sense(struct scsi_cmnd *scp,
}
break;
default:
- bad_pcode = true;
- break;
- }
- if (bad_pcode) {
- mk_sense_invalid_fld(scp, SDEB_IN_CDB, 2, 5);
- return check_condition_result;
+ goto bad_pcode;
}
if (msense_6)
arr[0] = offset - 1;
else
put_unaligned_be16((offset - 2), arr + 0);
return fill_from_dev_buffer(scp, arr, min_t(u32, alloc_len, offset));
+
+bad_pcode:
+ mk_sense_invalid_fld(scp, SDEB_IN_CDB, 2, 5);
+ return check_condition_result;
}
#define SDEBUG_MAX_MSELECT_SZ 512
^ permalink raw reply related [flat|nested] 42+ messages in thread* [PATCH v9 15/19] scsi: scsi_debug: Rework subpage code error handling
2024-01-30 21:48 [PATCH v9 00/19] Pass data lifetime information to SCSI disk devices Bart Van Assche
` (13 preceding siblings ...)
2024-01-30 21:48 ` [PATCH v9 14/19] scsi: scsi_debug: Rework page code error handling Bart Van Assche
@ 2024-01-30 21:48 ` Bart Van Assche
2024-01-30 21:48 ` [PATCH v9 16/19] scsi: scsi_debug: Allocate the MODE SENSE response from the heap Bart Van Assche
` (3 subsequent siblings)
18 siblings, 0 replies; 42+ messages in thread
From: Bart Van Assche @ 2024-01-30 21:48 UTC (permalink / raw)
To: Martin K . Petersen
Cc: linux-scsi, linux-block, linux-fsdevel, Jens Axboe,
Christoph Hellwig, Daejun Park, Kanchan Joshi, Bart Van Assche,
Douglas Gilbert, James E.J. Bottomley
Move the subpage code checks into the switch statement to make it easier
to add support for new page code / subpage code combinations.
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Douglas Gilbert <dgilbert@interlog.com>
Tested-by: Douglas Gilbert <dgilbert@interlog.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
drivers/scsi/scsi_debug.c | 70 ++++++++++++++++++++-------------------
1 file changed, 36 insertions(+), 34 deletions(-)
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index ba0a29e6a86d..67a8e6243e5e 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -2703,22 +2703,22 @@ static int resp_mode_sense(struct scsi_cmnd *scp,
ap = arr + offset;
}
- if ((subpcode > 0x0) && (subpcode < 0xff) && (0x19 != pcode)) {
- /* TODO: Control Extension page */
- mk_sense_invalid_fld(scp, SDEB_IN_CDB, 3, -1);
- return check_condition_result;
- }
-
switch (pcode) {
case 0x1: /* Read-Write error recovery page, direct access */
+ if (subpcode > 0x0 && subpcode < 0xff)
+ goto bad_subpcode;
len = resp_err_recov_pg(ap, pcontrol, target);
offset += len;
break;
case 0x2: /* Disconnect-Reconnect page, all devices */
+ if (subpcode > 0x0 && subpcode < 0xff)
+ goto bad_subpcode;
len = resp_disconnect_pg(ap, pcontrol, target);
offset += len;
break;
case 0x3: /* Format device page, direct access */
+ if (subpcode > 0x0 && subpcode < 0xff)
+ goto bad_subpcode;
if (is_disk) {
len = resp_format_pg(ap, pcontrol, target);
offset += len;
@@ -2727,6 +2727,8 @@ static int resp_mode_sense(struct scsi_cmnd *scp,
}
break;
case 0x8: /* Caching page, direct access */
+ if (subpcode > 0x0 && subpcode < 0xff)
+ goto bad_subpcode;
if (is_disk || is_zbc) {
len = resp_caching_pg(ap, pcontrol, target);
offset += len;
@@ -2735,14 +2737,14 @@ static int resp_mode_sense(struct scsi_cmnd *scp,
}
break;
case 0xa: /* Control Mode page, all devices */
+ if (subpcode > 0x0 && subpcode < 0xff)
+ goto bad_subpcode;
len = resp_ctrl_m_pg(ap, pcontrol, target);
offset += len;
break;
case 0x19: /* if spc==1 then sas phy, control+discover */
- if ((subpcode > 0x2) && (subpcode < 0xff)) {
- mk_sense_invalid_fld(scp, SDEB_IN_CDB, 3, -1);
- return check_condition_result;
- }
+ if (subpcode > 0x2 && subpcode < 0xff)
+ goto bad_subpcode;
len = 0;
if ((0x0 == subpcode) || (0xff == subpcode))
len += resp_sas_sf_m_pg(ap + len, pcontrol, target);
@@ -2754,35 +2756,31 @@ static int resp_mode_sense(struct scsi_cmnd *scp,
offset += len;
break;
case 0x1c: /* Informational Exceptions Mode page, all devices */
+ if (subpcode > 0x0 && subpcode < 0xff)
+ goto bad_subpcode;
len = resp_iec_m_pg(ap, pcontrol, target);
offset += len;
break;
case 0x3f: /* Read all Mode pages */
- if ((0 == subpcode) || (0xff == subpcode)) {
- len = resp_err_recov_pg(ap, pcontrol, target);
- len += resp_disconnect_pg(ap + len, pcontrol, target);
- if (is_disk) {
- len += resp_format_pg(ap + len, pcontrol,
- target);
- len += resp_caching_pg(ap + len, pcontrol,
- target);
- } else if (is_zbc) {
- len += resp_caching_pg(ap + len, pcontrol,
- target);
- }
- len += resp_ctrl_m_pg(ap + len, pcontrol, target);
- len += resp_sas_sf_m_pg(ap + len, pcontrol, target);
- if (0xff == subpcode) {
- len += resp_sas_pcd_m_spg(ap + len, pcontrol,
- target, target_dev_id);
- len += resp_sas_sha_m_spg(ap + len, pcontrol);
- }
- len += resp_iec_m_pg(ap + len, pcontrol, target);
- offset += len;
- } else {
- mk_sense_invalid_fld(scp, SDEB_IN_CDB, 3, -1);
- return check_condition_result;
+ if (subpcode > 0x0 && subpcode < 0xff)
+ goto bad_subpcode;
+ len = resp_err_recov_pg(ap, pcontrol, target);
+ len += resp_disconnect_pg(ap + len, pcontrol, target);
+ if (is_disk) {
+ len += resp_format_pg(ap + len, pcontrol, target);
+ len += resp_caching_pg(ap + len, pcontrol, target);
+ } else if (is_zbc) {
+ len += resp_caching_pg(ap + len, pcontrol, target);
+ }
+ len += resp_ctrl_m_pg(ap + len, pcontrol, target);
+ len += resp_sas_sf_m_pg(ap + len, pcontrol, target);
+ if (0xff == subpcode) {
+ len += resp_sas_pcd_m_spg(ap + len, pcontrol, target,
+ target_dev_id);
+ len += resp_sas_sha_m_spg(ap + len, pcontrol);
}
+ len += resp_iec_m_pg(ap + len, pcontrol, target);
+ offset += len;
break;
default:
goto bad_pcode;
@@ -2796,6 +2794,10 @@ static int resp_mode_sense(struct scsi_cmnd *scp,
bad_pcode:
mk_sense_invalid_fld(scp, SDEB_IN_CDB, 2, 5);
return check_condition_result;
+
+bad_subpcode:
+ mk_sense_invalid_fld(scp, SDEB_IN_CDB, 3, -1);
+ return check_condition_result;
}
#define SDEBUG_MAX_MSELECT_SZ 512
^ permalink raw reply related [flat|nested] 42+ messages in thread* [PATCH v9 16/19] scsi: scsi_debug: Allocate the MODE SENSE response from the heap
2024-01-30 21:48 [PATCH v9 00/19] Pass data lifetime information to SCSI disk devices Bart Van Assche
` (14 preceding siblings ...)
2024-01-30 21:48 ` [PATCH v9 15/19] scsi: scsi_debug: Rework subpage " Bart Van Assche
@ 2024-01-30 21:48 ` Bart Van Assche
2024-01-30 21:48 ` [PATCH v9 17/19] scsi: scsi_debug: Implement the IO Advice Hints Grouping mode page Bart Van Assche
` (2 subsequent siblings)
18 siblings, 0 replies; 42+ messages in thread
From: Bart Van Assche @ 2024-01-30 21:48 UTC (permalink / raw)
To: Martin K . Petersen
Cc: linux-scsi, linux-block, linux-fsdevel, Jens Axboe,
Christoph Hellwig, Daejun Park, Kanchan Joshi, Bart Van Assche,
Douglas Gilbert, James E.J. Bottomley
Make the MODE SENSE response buffer larger and allocate it from the heap.
This patch prepares for adding support for the IO Advice Hints Grouping
mode page.
Suggested-by: Douglas Gilbert <dgilbert@interlog.com>
Cc: Douglas Gilbert <dgilbert@interlog.com>
Tested-by: Douglas Gilbert <dgilbert@interlog.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
drivers/scsi/scsi_debug.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 67a8e6243e5e..b544498324f6 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -43,6 +43,7 @@
#include <linux/prefetch.h>
#include <linux/debugfs.h>
#include <linux/async.h>
+#include <linux/cleanup.h>
#include <net/checksum.h>
@@ -2631,7 +2632,8 @@ static int resp_sas_sha_m_spg(unsigned char *p, int pcontrol)
return sizeof(sas_sha_m_pg);
}
-#define SDEBUG_MAX_MSENSE_SZ 256
+/* PAGE_SIZE is more than necessary but provides room for future expansion. */
+#define SDEBUG_MAX_MSENSE_SZ PAGE_SIZE
static int resp_mode_sense(struct scsi_cmnd *scp,
struct sdebug_dev_info *devip)
@@ -2642,10 +2644,13 @@ static int resp_mode_sense(struct scsi_cmnd *scp,
int target_dev_id;
int target = scp->device->id;
unsigned char *ap;
- unsigned char arr[SDEBUG_MAX_MSENSE_SZ];
+ unsigned char *arr __free(kfree);
unsigned char *cmd = scp->cmnd;
bool dbd, llbaa, msense_6, is_disk, is_zbc;
+ arr = kzalloc(SDEBUG_MAX_MSENSE_SZ, GFP_ATOMIC);
+ if (!arr)
+ return -ENOMEM;
dbd = !!(cmd[1] & 0x8); /* disable block descriptors */
pcontrol = (cmd[2] & 0xc0) >> 6;
pcode = cmd[2] & 0x3f;
^ permalink raw reply related [flat|nested] 42+ messages in thread* [PATCH v9 17/19] scsi: scsi_debug: Implement the IO Advice Hints Grouping mode page
2024-01-30 21:48 [PATCH v9 00/19] Pass data lifetime information to SCSI disk devices Bart Van Assche
` (15 preceding siblings ...)
2024-01-30 21:48 ` [PATCH v9 16/19] scsi: scsi_debug: Allocate the MODE SENSE response from the heap Bart Van Assche
@ 2024-01-30 21:48 ` Bart Van Assche
2024-01-30 21:48 ` [PATCH v9 18/19] scsi: scsi_debug: Implement GET STREAM STATUS Bart Van Assche
2024-01-30 21:48 ` [PATCH v9 19/19] scsi: scsi_debug: Maintain write statistics per group number Bart Van Assche
18 siblings, 0 replies; 42+ messages in thread
From: Bart Van Assche @ 2024-01-30 21:48 UTC (permalink / raw)
To: Martin K . Petersen
Cc: linux-scsi, linux-block, linux-fsdevel, Jens Axboe,
Christoph Hellwig, Daejun Park, Kanchan Joshi, Bart Van Assche,
Douglas Gilbert, James E.J. Bottomley
Implement an IO Advice Hints Grouping mode page with three permanent
streams. A permanent stream is a stream for which the device server does
not allow closing or otherwise modifying the configuration of that
stream. The stream identifier enable (ST_ENBLE) bit specifies whether
the stream identifier may be used in the GROUP NUMBER field of SCSI
WRITE commands.
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Douglas Gilbert <dgilbert@interlog.com>
Tested-by: Douglas Gilbert <dgilbert@interlog.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
drivers/scsi/scsi_debug.c | 61 +++++++++++++++++++++++++++++++++++++--
1 file changed, 58 insertions(+), 3 deletions(-)
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index b544498324f6..ccf59b3e7602 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -1969,7 +1969,11 @@ static int resp_inquiry(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
arr[4] = 0x5; /* SPT: GRD_CHK:1, REF_CHK:1 */
else
arr[4] = 0x0; /* no protection stuff */
- arr[5] = 0x7; /* head of q, ordered + simple q's */
+ /*
+ * GROUP_SUP=1; HEADSUP=1 (HEAD OF QUEUE); ORDSUP=1
+ * (ORDERED queuing); SIMPSUP=1 (SIMPLE queuing).
+ */
+ arr[5] = 0x17;
} else if (0x87 == cmd[2]) { /* mode page policy */
arr[3] = 0x8; /* number of following entries */
arr[4] = 0x2; /* disconnect-reconnect mp */
@@ -2559,6 +2563,40 @@ static int resp_ctrl_m_pg(unsigned char *p, int pcontrol, int target)
return sizeof(ctrl_m_pg);
}
+/* IO Advice Hints Grouping mode page */
+static int resp_grouping_m_pg(unsigned char *p, int pcontrol, int target)
+{
+ /* IO Advice Hints Grouping mode page */
+ struct grouping_m_pg {
+ u8 page_code; /* OR 0x40 when subpage_code > 0 */
+ u8 subpage_code;
+ __be16 page_length;
+ u8 reserved[12];
+ struct scsi_io_group_descriptor descr[MAXIMUM_NUMBER_OF_STREAMS];
+ };
+ static const struct grouping_m_pg gr_m_pg = {
+ .page_code = 0xa | 0x40,
+ .subpage_code = 5,
+ .page_length = cpu_to_be16(sizeof(gr_m_pg) - 4),
+ .descr = {
+ { .st_enble = 1 },
+ { .st_enble = 1 },
+ { .st_enble = 1 },
+ { .st_enble = 1 },
+ { .st_enble = 1 },
+ { .st_enble = 0 },
+ }
+ };
+
+ BUILD_BUG_ON(sizeof(struct grouping_m_pg) !=
+ 16 + MAXIMUM_NUMBER_OF_STREAMS * 16);
+ memcpy(p, &gr_m_pg, sizeof(gr_m_pg));
+ if (1 == pcontrol) {
+ /* There are no changeable values so clear from byte 4 on. */
+ memset(p + 4, 0, sizeof(gr_m_pg) - 4);
+ }
+ return sizeof(gr_m_pg);
+}
static int resp_iec_m_pg(unsigned char *p, int pcontrol, int target)
{ /* Informational Exceptions control mode page for mode_sense */
@@ -2708,6 +2746,10 @@ static int resp_mode_sense(struct scsi_cmnd *scp,
ap = arr + offset;
}
+ /*
+ * N.B. If len>0 before resp_*_pg() call, then form of that call should be:
+ * len += resp_*_pg(ap + len, pcontrol, target);
+ */
switch (pcode) {
case 0x1: /* Read-Write error recovery page, direct access */
if (subpcode > 0x0 && subpcode < 0xff)
@@ -2742,9 +2784,20 @@ static int resp_mode_sense(struct scsi_cmnd *scp,
}
break;
case 0xa: /* Control Mode page, all devices */
- if (subpcode > 0x0 && subpcode < 0xff)
+ switch (subpcode) {
+ case 0:
+ len = resp_ctrl_m_pg(ap, pcontrol, target);
+ break;
+ case 0x05:
+ len = resp_grouping_m_pg(ap, pcontrol, target);
+ break;
+ case 0xff:
+ len = resp_ctrl_m_pg(ap, pcontrol, target);
+ len += resp_grouping_m_pg(ap + len, pcontrol, target);
+ break;
+ default:
goto bad_subpcode;
- len = resp_ctrl_m_pg(ap, pcontrol, target);
+ }
offset += len;
break;
case 0x19: /* if spc==1 then sas phy, control+discover */
@@ -2778,6 +2831,8 @@ static int resp_mode_sense(struct scsi_cmnd *scp,
len += resp_caching_pg(ap + len, pcontrol, target);
}
len += resp_ctrl_m_pg(ap + len, pcontrol, target);
+ if (0xff == subpcode)
+ len += resp_grouping_m_pg(ap + len, pcontrol, target);
len += resp_sas_sf_m_pg(ap + len, pcontrol, target);
if (0xff == subpcode) {
len += resp_sas_pcd_m_spg(ap + len, pcontrol, target,
^ permalink raw reply related [flat|nested] 42+ messages in thread* [PATCH v9 18/19] scsi: scsi_debug: Implement GET STREAM STATUS
2024-01-30 21:48 [PATCH v9 00/19] Pass data lifetime information to SCSI disk devices Bart Van Assche
` (16 preceding siblings ...)
2024-01-30 21:48 ` [PATCH v9 17/19] scsi: scsi_debug: Implement the IO Advice Hints Grouping mode page Bart Van Assche
@ 2024-01-30 21:48 ` Bart Van Assche
2024-01-30 21:48 ` [PATCH v9 19/19] scsi: scsi_debug: Maintain write statistics per group number Bart Van Assche
18 siblings, 0 replies; 42+ messages in thread
From: Bart Van Assche @ 2024-01-30 21:48 UTC (permalink / raw)
To: Martin K . Petersen
Cc: linux-scsi, linux-block, linux-fsdevel, Jens Axboe,
Christoph Hellwig, Daejun Park, Kanchan Joshi, Bart Van Assche,
Douglas Gilbert, James E.J. Bottomley
Implement the GET STREAM STATUS SCSI command. Report that the first
five stream indexes correspond to permanent streams.
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Douglas Gilbert <dgilbert@interlog.com>
Tested-by: Douglas Gilbert <dgilbert@interlog.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
drivers/scsi/scsi_debug.c | 50 +++++++++++++++++++++++++++++++++++++++
1 file changed, 50 insertions(+)
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index ccf59b3e7602..497045e54300 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -533,6 +533,8 @@ static int resp_write_scat(struct scsi_cmnd *, struct sdebug_dev_info *);
static int resp_start_stop(struct scsi_cmnd *, struct sdebug_dev_info *);
static int resp_readcap16(struct scsi_cmnd *, struct sdebug_dev_info *);
static int resp_get_lba_status(struct scsi_cmnd *, struct sdebug_dev_info *);
+static int resp_get_stream_status(struct scsi_cmnd *scp,
+ struct sdebug_dev_info *devip);
static int resp_report_tgtpgs(struct scsi_cmnd *, struct sdebug_dev_info *);
static int resp_unmap(struct scsi_cmnd *, struct sdebug_dev_info *);
static int resp_rsup_opcodes(struct scsi_cmnd *, struct sdebug_dev_info *);
@@ -607,6 +609,9 @@ static const struct opcode_info_t sa_in_16_iarr[] = {
{0, 0x9e, 0x12, F_SA_LOW | F_D_IN, resp_get_lba_status, NULL,
{16, 0x12, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0, 0xc7} }, /* GET LBA STATUS(16) */
+ {0, 0x9e, 0x16, F_SA_LOW | F_D_IN, resp_get_stream_status, NULL,
+ {16, 0x16, 0, 0, 0xff, 0xff, 0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff,
+ 0, 0} }, /* GET STREAM STATUS */
};
static const struct opcode_info_t vl_iarr[] = { /* VARIABLE LENGTH */
@@ -4573,6 +4578,51 @@ static int resp_get_lba_status(struct scsi_cmnd *scp,
return fill_from_dev_buffer(scp, arr, SDEBUG_GET_LBA_STATUS_LEN);
}
+static int resp_get_stream_status(struct scsi_cmnd *scp,
+ struct sdebug_dev_info *devip)
+{
+ u16 starting_stream_id, stream_id;
+ const u8 *cmd = scp->cmnd;
+ u32 alloc_len, offset;
+ u8 arr[256] = {};
+ struct scsi_stream_status_header *h = (void *)arr;
+
+ starting_stream_id = get_unaligned_be16(cmd + 4);
+ alloc_len = get_unaligned_be32(cmd + 10);
+
+ if (alloc_len < 8) {
+ mk_sense_invalid_fld(scp, SDEB_IN_CDB, 10, -1);
+ return check_condition_result;
+ }
+
+ if (starting_stream_id >= MAXIMUM_NUMBER_OF_STREAMS) {
+ mk_sense_invalid_fld(scp, SDEB_IN_CDB, 4, -1);
+ return check_condition_result;
+ }
+
+ /*
+ * The GET STREAM STATUS command only reports status information
+ * about open streams. Treat the non-permanent stream as open.
+ */
+ put_unaligned_be16(MAXIMUM_NUMBER_OF_STREAMS,
+ &h->number_of_open_streams);
+
+ for (offset = 8, stream_id = starting_stream_id;
+ offset + 8 <= min_t(u32, alloc_len, sizeof(arr)) &&
+ stream_id < MAXIMUM_NUMBER_OF_STREAMS;
+ offset += 8, stream_id++) {
+ struct scsi_stream_status *stream_status = (void *)arr + offset;
+
+ stream_status->perm = stream_id < PERMANENT_STREAM_COUNT;
+ put_unaligned_be16(stream_id,
+ &stream_status->stream_identifier);
+ stream_status->rel_lifetime = stream_id + 1;
+ }
+ put_unaligned_be32(offset - 8, &h->len); /* PARAMETER DATA LENGTH */
+
+ return fill_from_dev_buffer(scp, arr, min(offset, alloc_len));
+}
+
static int resp_sync_cache(struct scsi_cmnd *scp,
struct sdebug_dev_info *devip)
{
^ permalink raw reply related [flat|nested] 42+ messages in thread* [PATCH v9 19/19] scsi: scsi_debug: Maintain write statistics per group number
2024-01-30 21:48 [PATCH v9 00/19] Pass data lifetime information to SCSI disk devices Bart Van Assche
` (17 preceding siblings ...)
2024-01-30 21:48 ` [PATCH v9 18/19] scsi: scsi_debug: Implement GET STREAM STATUS Bart Van Assche
@ 2024-01-30 21:48 ` Bart Van Assche
18 siblings, 0 replies; 42+ messages in thread
From: Bart Van Assche @ 2024-01-30 21:48 UTC (permalink / raw)
To: Martin K . Petersen
Cc: linux-scsi, linux-block, linux-fsdevel, Jens Axboe,
Christoph Hellwig, Daejun Park, Kanchan Joshi, Bart Van Assche,
Douglas Gilbert, James E.J. Bottomley
Track per GROUP NUMBER how many write commands have been processed. Make
this information available in sysfs. Reset these statistics if any data
is written into the sysfs attribute.
Note: SCSI devices should only interpret the information in the GROUP
NUMBER field as a stream identifier if the ST_ENBLE bit has been set to
one. This patch follows a simpler approach: count the number of writes
per GROUP NUMBER whether or not the group number represents a stream
identifier.
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Douglas Gilbert <dgilbert@interlog.com>
Tested-by: Douglas Gilbert <dgilbert@interlog.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
drivers/scsi/scsi_debug.c | 49 +++++++++++++++++++++++++++++++++++----
1 file changed, 45 insertions(+), 4 deletions(-)
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 497045e54300..ce52d580d1aa 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -902,6 +902,8 @@ static int sdeb_zbc_nr_conv = DEF_ZBC_NR_CONV_ZONES;
static int submit_queues = DEF_SUBMIT_QUEUES; /* > 1 for multi-queue (mq) */
static int poll_queues; /* iouring iopoll interface.*/
+static atomic_long_t writes_by_group_number[64];
+
static char sdebug_proc_name[] = MY_NAME;
static const char *my_name = MY_NAME;
@@ -3377,7 +3379,8 @@ static inline struct sdeb_store_info *devip2sip(struct sdebug_dev_info *devip,
/* Returns number of bytes copied or -1 if error. */
static int do_device_access(struct sdeb_store_info *sip, struct scsi_cmnd *scp,
- u32 sg_skip, u64 lba, u32 num, bool do_write)
+ u32 sg_skip, u64 lba, u32 num, bool do_write,
+ u8 group_number)
{
int ret;
u64 block, rest = 0;
@@ -3396,6 +3399,10 @@ static int do_device_access(struct sdeb_store_info *sip, struct scsi_cmnd *scp,
return 0;
if (scp->sc_data_direction != dir)
return -1;
+
+ if (do_write && group_number < ARRAY_SIZE(writes_by_group_number))
+ atomic_long_inc(&writes_by_group_number[group_number]);
+
fsp = sip->storep;
block = do_div(lba, sdebug_store_sectors);
@@ -3769,7 +3776,7 @@ static int resp_read_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
}
}
- ret = do_device_access(sip, scp, 0, lba, num, false);
+ ret = do_device_access(sip, scp, 0, lba, num, false, 0);
sdeb_read_unlock(sip);
if (unlikely(ret == -1))
return DID_ERROR << 16;
@@ -3954,6 +3961,7 @@ static int resp_write_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
{
bool check_prot;
u32 num;
+ u8 group = 0;
u32 ei_lba;
int ret;
u64 lba;
@@ -3965,11 +3973,13 @@ static int resp_write_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
ei_lba = 0;
lba = get_unaligned_be64(cmd + 2);
num = get_unaligned_be32(cmd + 10);
+ group = cmd[14] & 0x3f;
check_prot = true;
break;
case WRITE_10:
ei_lba = 0;
lba = get_unaligned_be32(cmd + 2);
+ group = cmd[6] & 0x3f;
num = get_unaligned_be16(cmd + 7);
check_prot = true;
break;
@@ -3984,15 +3994,18 @@ static int resp_write_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
ei_lba = 0;
lba = get_unaligned_be32(cmd + 2);
num = get_unaligned_be32(cmd + 6);
+ group = cmd[6] & 0x3f;
check_prot = true;
break;
case 0x53: /* XDWRITEREAD(10) */
ei_lba = 0;
lba = get_unaligned_be32(cmd + 2);
+ group = cmd[6] & 0x1f;
num = get_unaligned_be16(cmd + 7);
check_prot = false;
break;
default: /* assume WRITE(32) */
+ group = cmd[6] & 0x3f;
lba = get_unaligned_be64(cmd + 12);
ei_lba = get_unaligned_be32(cmd + 20);
num = get_unaligned_be32(cmd + 28);
@@ -4047,7 +4060,7 @@ static int resp_write_dt0(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
}
}
- ret = do_device_access(sip, scp, 0, lba, num, true);
+ ret = do_device_access(sip, scp, 0, lba, num, true, group);
if (unlikely(scsi_debug_lbp()))
map_region(sip, lba, num);
/* If ZBC zone then bump its write pointer */
@@ -4099,12 +4112,14 @@ static int resp_write_scat(struct scsi_cmnd *scp,
u32 lb_size = sdebug_sector_size;
u32 ei_lba;
u64 lba;
+ u8 group;
int ret, res;
bool is_16;
static const u32 lrd_size = 32; /* + parameter list header size */
if (cmd[0] == VARIABLE_LENGTH_CMD) {
is_16 = false;
+ group = cmd[6] & 0x3f;
wrprotect = (cmd[10] >> 5) & 0x7;
lbdof = get_unaligned_be16(cmd + 12);
num_lrd = get_unaligned_be16(cmd + 16);
@@ -4115,6 +4130,7 @@ static int resp_write_scat(struct scsi_cmnd *scp,
lbdof = get_unaligned_be16(cmd + 4);
num_lrd = get_unaligned_be16(cmd + 8);
bt_len = get_unaligned_be32(cmd + 10);
+ group = cmd[14] & 0x3f;
if (unlikely(have_dif_prot)) {
if (sdebug_dif == T10_PI_TYPE2_PROTECTION &&
wrprotect) {
@@ -4203,7 +4219,7 @@ static int resp_write_scat(struct scsi_cmnd *scp,
}
}
- ret = do_device_access(sip, scp, sg_off, lba, num, true);
+ ret = do_device_access(sip, scp, sg_off, lba, num, true, group);
/* If ZBC zone then bump its write pointer */
if (sdebug_dev_is_zoned(devip))
zbc_inc_wp(devip, lba, num);
@@ -7298,6 +7314,30 @@ static ssize_t tur_ms_to_ready_show(struct device_driver *ddp, char *buf)
}
static DRIVER_ATTR_RO(tur_ms_to_ready);
+static ssize_t group_number_stats_show(struct device_driver *ddp, char *buf)
+{
+ char *p = buf, *end = buf + PAGE_SIZE;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(writes_by_group_number); i++)
+ p += scnprintf(p, end - p, "%d %ld\n", i,
+ atomic_long_read(&writes_by_group_number[i]));
+
+ return p - buf;
+}
+
+static ssize_t group_number_stats_store(struct device_driver *ddp,
+ const char *buf, size_t count)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(writes_by_group_number); i++)
+ atomic_long_set(&writes_by_group_number[i], 0);
+
+ return count;
+}
+static DRIVER_ATTR_RW(group_number_stats);
+
/* Note: The following array creates attribute files in the
/sys/bus/pseudo/drivers/scsi_debug directory. The advantage of these
files (over those found in the /sys/module/scsi_debug/parameters
@@ -7344,6 +7384,7 @@ static struct attribute *sdebug_drv_attrs[] = {
&driver_attr_cdb_len.attr,
&driver_attr_tur_ms_to_ready.attr,
&driver_attr_zbc.attr,
+ &driver_attr_group_number_stats.attr,
NULL,
};
ATTRIBUTE_GROUPS(sdebug_drv);
^ permalink raw reply related [flat|nested] 42+ messages in thread