* [PATCH] scsi: sd: Fix TCG OPAL unlock on system resume
@ 2024-03-19 7:12 Damien Le Moal
2024-03-25 12:54 ` Damien Le Moal
2024-03-26 1:21 ` Martin K. Petersen
0 siblings, 2 replies; 3+ messages in thread
From: Damien Le Moal @ 2024-03-19 7:12 UTC (permalink / raw)
To: Martin K . Petersen, linux-scsi, linux-ide; +Cc: Niklas Cassel, desgua
Commit 3cc2ffe5c16d introduced the manage_system_start_stop scsi device
flag to allow libata to indicate to the scsi disk driver that nothing
should be done when resuming a disk on system resume. This change turned
the execution of sd_resume() into a no-op for ATA devices on system
resume. While this solved deadlock issues during device resume, this
change also wrongly removed the execution of opal_unlock_from_suspend().
As a result, devices with TCG OPAL locking enabled remain locked and
inaccessible after a system resume from sleep.
To fix this issue, introduce the scsi driver resume method and implement
it with the sd_resume() function calling opal_unlock_from_suspend(). The
former sd_resume() function is renamed to sd_resume_common() and
modified to call the new sd_resume() function. For non-ata devices, this
result in no functional changes.
Inorder for libata to explicitly execute sd_resume() when a device is
resumed during system re-start, the function scsi_resume_device() is
introduced. libata calls this function from the revalidation work
executed on devie resume, a state that is indicated with the new device
flag ATA_DFLAG_RESUMING. Doing so, locked TCG OPAL enabled devices are
unlocked on resume, allowing normal operation.
Fixes: 3cc2ffe5c16d ("scsi: sd: Differentiate system and runtime start/stop management")
Link: https://bugzilla.kernel.org/show_bug.cgi?id=218538
Cc: stable@vger.kernel.org
Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
---
drivers/ata/libata-eh.c | 5 ++++-
drivers/ata/libata-scsi.c | 9 +++++++++
drivers/scsi/scsi_scan.c | 34 ++++++++++++++++++++++++++++++++++
drivers/scsi/sd.c | 23 +++++++++++++++++++----
include/linux/libata.h | 1 +
include/scsi/scsi_driver.h | 1 +
include/scsi/scsi_host.h | 1 +
7 files changed, 69 insertions(+), 5 deletions(-)
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index b0d6e69c4a5b..214b935c2ced 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -712,8 +712,10 @@ void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap)
ehc->saved_ncq_enabled |= 1 << devno;
/* If we are resuming, wake up the device */
- if (ap->pflags & ATA_PFLAG_RESUMING)
+ if (ap->pflags & ATA_PFLAG_RESUMING) {
+ dev->flags |= ATA_DFLAG_RESUMING;
ehc->i.dev_action[devno] |= ATA_EH_SET_ACTIVE;
+ }
}
}
@@ -3169,6 +3171,7 @@ static int ata_eh_revalidate_and_attach(struct ata_link *link,
return 0;
err:
+ dev->flags &= ~ATA_DFLAG_RESUMING;
*r_failed_dev = dev;
return rc;
}
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 0a0f483124c3..2f4c58837641 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -4730,6 +4730,7 @@ void ata_scsi_dev_rescan(struct work_struct *work)
struct ata_link *link;
struct ata_device *dev;
unsigned long flags;
+ bool do_resume;
int ret = 0;
mutex_lock(&ap->scsi_scan_mutex);
@@ -4751,7 +4752,15 @@ void ata_scsi_dev_rescan(struct work_struct *work)
if (scsi_device_get(sdev))
continue;
+ do_resume = dev->flags & ATA_DFLAG_RESUMING;
+
spin_unlock_irqrestore(ap->lock, flags);
+ if (do_resume) {
+ ret = scsi_resume_device(sdev);
+ if (ret == -EWOULDBLOCK)
+ goto unlock;
+ dev->flags &= ~ATA_DFLAG_RESUMING;
+ }
ret = scsi_rescan_device(sdev);
scsi_device_put(sdev);
spin_lock_irqsave(ap->lock, flags);
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 8d06475de17a..ffd7e7e72933 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -1642,6 +1642,40 @@ int scsi_add_device(struct Scsi_Host *host, uint channel,
}
EXPORT_SYMBOL(scsi_add_device);
+int scsi_resume_device(struct scsi_device *sdev)
+{
+ struct device *dev = &sdev->sdev_gendev;
+ int ret = 0;
+
+ device_lock(dev);
+
+ /*
+ * Bail out if the device or its queue are not running. Otherwise,
+ * the rescan may block waiting for commands to be executed, with us
+ * holding the device lock. This can result in a potential deadlock
+ * in the power management core code when system resume is on-going.
+ */
+ if (sdev->sdev_state != SDEV_RUNNING ||
+ blk_queue_pm_only(sdev->request_queue)) {
+ ret = -EWOULDBLOCK;
+ goto unlock;
+ }
+
+ if (dev->driver && try_module_get(dev->driver->owner)) {
+ struct scsi_driver *drv = to_scsi_driver(dev->driver);
+
+ if (drv->resume)
+ ret = drv->resume(dev);
+ module_put(dev->driver->owner);
+ }
+
+unlock:
+ device_unlock(dev);
+
+ return ret;
+}
+EXPORT_SYMBOL(scsi_resume_device);
+
int scsi_rescan_device(struct scsi_device *sdev)
{
struct device *dev = &sdev->sdev_gendev;
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 2cc73c650ca6..7a5ad9b25ab7 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -4003,7 +4003,21 @@ static int sd_suspend_runtime(struct device *dev)
return sd_suspend_common(dev, true);
}
-static int sd_resume(struct device *dev, bool runtime)
+static int sd_resume(struct device *dev)
+{
+ struct scsi_disk *sdkp = dev_get_drvdata(dev);
+
+ sd_printk(KERN_NOTICE, sdkp, "Starting disk\n");
+
+ if (opal_unlock_from_suspend(sdkp->opal_dev)) {
+ sd_printk(KERN_NOTICE, sdkp, "OPAL unlock failed\n");
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int sd_resume_common(struct device *dev, bool runtime)
{
struct scsi_disk *sdkp = dev_get_drvdata(dev);
int ret;
@@ -4019,7 +4033,7 @@ static int sd_resume(struct device *dev, bool runtime)
sd_printk(KERN_NOTICE, sdkp, "Starting disk\n");
ret = sd_start_stop_device(sdkp, 1);
if (!ret) {
- opal_unlock_from_suspend(sdkp->opal_dev);
+ sd_resume(dev);
sdkp->suspended = false;
}
@@ -4038,7 +4052,7 @@ static int sd_resume_system(struct device *dev)
return 0;
}
- return sd_resume(dev, false);
+ return sd_resume_common(dev, false);
}
static int sd_resume_runtime(struct device *dev)
@@ -4065,7 +4079,7 @@ static int sd_resume_runtime(struct device *dev)
"Failed to clear sense data\n");
}
- return sd_resume(dev, true);
+ return sd_resume_common(dev, true);
}
static const struct dev_pm_ops sd_pm_ops = {
@@ -4088,6 +4102,7 @@ static struct scsi_driver sd_template = {
.pm = &sd_pm_ops,
},
.rescan = sd_rescan,
+ .resume = sd_resume,
.init_command = sd_init_command,
.uninit_command = sd_uninit_command,
.done = sd_done,
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 26d68115afb8..324d792e7c78 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -107,6 +107,7 @@ enum {
ATA_DFLAG_NCQ_PRIO_ENABLED = (1 << 20), /* Priority cmds sent to dev */
ATA_DFLAG_CDL_ENABLED = (1 << 21), /* cmd duration limits is enabled */
+ ATA_DFLAG_RESUMING = (1 << 22), /* Device is resuming */
ATA_DFLAG_DETACH = (1 << 24),
ATA_DFLAG_DETACHED = (1 << 25),
ATA_DFLAG_DA = (1 << 26), /* device supports Device Attention */
diff --git a/include/scsi/scsi_driver.h b/include/scsi/scsi_driver.h
index 4ce1988b2ba0..f40915d2ecee 100644
--- a/include/scsi/scsi_driver.h
+++ b/include/scsi/scsi_driver.h
@@ -12,6 +12,7 @@ struct request;
struct scsi_driver {
struct device_driver gendrv;
+ int (*resume)(struct device *);
void (*rescan)(struct device *);
blk_status_t (*init_command)(struct scsi_cmnd *);
void (*uninit_command)(struct scsi_cmnd *);
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index b259d42a1e1a..129001f600fc 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -767,6 +767,7 @@ scsi_template_proc_dir(const struct scsi_host_template *sht);
#define scsi_template_proc_dir(sht) NULL
#endif
extern void scsi_scan_host(struct Scsi_Host *);
+extern int scsi_resume_device(struct scsi_device *sdev);
extern int scsi_rescan_device(struct scsi_device *sdev);
extern void scsi_remove_host(struct Scsi_Host *);
extern struct Scsi_Host *scsi_host_get(struct Scsi_Host *);
--
2.44.0
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH] scsi: sd: Fix TCG OPAL unlock on system resume
2024-03-19 7:12 [PATCH] scsi: sd: Fix TCG OPAL unlock on system resume Damien Le Moal
@ 2024-03-25 12:54 ` Damien Le Moal
2024-03-26 1:21 ` Martin K. Petersen
1 sibling, 0 replies; 3+ messages in thread
From: Damien Le Moal @ 2024-03-25 12:54 UTC (permalink / raw)
To: Martin K . Petersen, linux-scsi, linux-ide; +Cc: Niklas Cassel, desgua
On 3/19/24 16:12, Damien Le Moal wrote:
> Commit 3cc2ffe5c16d introduced the manage_system_start_stop scsi device
> flag to allow libata to indicate to the scsi disk driver that nothing
> should be done when resuming a disk on system resume. This change turned
> the execution of sd_resume() into a no-op for ATA devices on system
> resume. While this solved deadlock issues during device resume, this
> change also wrongly removed the execution of opal_unlock_from_suspend().
> As a result, devices with TCG OPAL locking enabled remain locked and
> inaccessible after a system resume from sleep.
>
> To fix this issue, introduce the scsi driver resume method and implement
> it with the sd_resume() function calling opal_unlock_from_suspend(). The
> former sd_resume() function is renamed to sd_resume_common() and
> modified to call the new sd_resume() function. For non-ata devices, this
> result in no functional changes.
>
> Inorder for libata to explicitly execute sd_resume() when a device is
> resumed during system re-start, the function scsi_resume_device() is
> introduced. libata calls this function from the revalidation work
> executed on devie resume, a state that is indicated with the new device
> flag ATA_DFLAG_RESUMING. Doing so, locked TCG OPAL enabled devices are
> unlocked on resume, allowing normal operation.
>
> Fixes: 3cc2ffe5c16d ("scsi: sd: Differentiate system and runtime start/stop management")
> Link: https://bugzilla.kernel.org/show_bug.cgi?id=218538
> Cc: stable@vger.kernel.org
> Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
Martin,
Ping ?
> ---
> drivers/ata/libata-eh.c | 5 ++++-
> drivers/ata/libata-scsi.c | 9 +++++++++
> drivers/scsi/scsi_scan.c | 34 ++++++++++++++++++++++++++++++++++
> drivers/scsi/sd.c | 23 +++++++++++++++++++----
> include/linux/libata.h | 1 +
> include/scsi/scsi_driver.h | 1 +
> include/scsi/scsi_host.h | 1 +
> 7 files changed, 69 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
> index b0d6e69c4a5b..214b935c2ced 100644
> --- a/drivers/ata/libata-eh.c
> +++ b/drivers/ata/libata-eh.c
> @@ -712,8 +712,10 @@ void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap)
> ehc->saved_ncq_enabled |= 1 << devno;
>
> /* If we are resuming, wake up the device */
> - if (ap->pflags & ATA_PFLAG_RESUMING)
> + if (ap->pflags & ATA_PFLAG_RESUMING) {
> + dev->flags |= ATA_DFLAG_RESUMING;
> ehc->i.dev_action[devno] |= ATA_EH_SET_ACTIVE;
> + }
> }
> }
>
> @@ -3169,6 +3171,7 @@ static int ata_eh_revalidate_and_attach(struct ata_link *link,
> return 0;
>
> err:
> + dev->flags &= ~ATA_DFLAG_RESUMING;
> *r_failed_dev = dev;
> return rc;
> }
> diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
> index 0a0f483124c3..2f4c58837641 100644
> --- a/drivers/ata/libata-scsi.c
> +++ b/drivers/ata/libata-scsi.c
> @@ -4730,6 +4730,7 @@ void ata_scsi_dev_rescan(struct work_struct *work)
> struct ata_link *link;
> struct ata_device *dev;
> unsigned long flags;
> + bool do_resume;
> int ret = 0;
>
> mutex_lock(&ap->scsi_scan_mutex);
> @@ -4751,7 +4752,15 @@ void ata_scsi_dev_rescan(struct work_struct *work)
> if (scsi_device_get(sdev))
> continue;
>
> + do_resume = dev->flags & ATA_DFLAG_RESUMING;
> +
> spin_unlock_irqrestore(ap->lock, flags);
> + if (do_resume) {
> + ret = scsi_resume_device(sdev);
> + if (ret == -EWOULDBLOCK)
> + goto unlock;
> + dev->flags &= ~ATA_DFLAG_RESUMING;
> + }
> ret = scsi_rescan_device(sdev);
> scsi_device_put(sdev);
> spin_lock_irqsave(ap->lock, flags);
> diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
> index 8d06475de17a..ffd7e7e72933 100644
> --- a/drivers/scsi/scsi_scan.c
> +++ b/drivers/scsi/scsi_scan.c
> @@ -1642,6 +1642,40 @@ int scsi_add_device(struct Scsi_Host *host, uint channel,
> }
> EXPORT_SYMBOL(scsi_add_device);
>
> +int scsi_resume_device(struct scsi_device *sdev)
> +{
> + struct device *dev = &sdev->sdev_gendev;
> + int ret = 0;
> +
> + device_lock(dev);
> +
> + /*
> + * Bail out if the device or its queue are not running. Otherwise,
> + * the rescan may block waiting for commands to be executed, with us
> + * holding the device lock. This can result in a potential deadlock
> + * in the power management core code when system resume is on-going.
> + */
> + if (sdev->sdev_state != SDEV_RUNNING ||
> + blk_queue_pm_only(sdev->request_queue)) {
> + ret = -EWOULDBLOCK;
> + goto unlock;
> + }
> +
> + if (dev->driver && try_module_get(dev->driver->owner)) {
> + struct scsi_driver *drv = to_scsi_driver(dev->driver);
> +
> + if (drv->resume)
> + ret = drv->resume(dev);
> + module_put(dev->driver->owner);
> + }
> +
> +unlock:
> + device_unlock(dev);
> +
> + return ret;
> +}
> +EXPORT_SYMBOL(scsi_resume_device);
> +
> int scsi_rescan_device(struct scsi_device *sdev)
> {
> struct device *dev = &sdev->sdev_gendev;
> diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
> index 2cc73c650ca6..7a5ad9b25ab7 100644
> --- a/drivers/scsi/sd.c
> +++ b/drivers/scsi/sd.c
> @@ -4003,7 +4003,21 @@ static int sd_suspend_runtime(struct device *dev)
> return sd_suspend_common(dev, true);
> }
>
> -static int sd_resume(struct device *dev, bool runtime)
> +static int sd_resume(struct device *dev)
> +{
> + struct scsi_disk *sdkp = dev_get_drvdata(dev);
> +
> + sd_printk(KERN_NOTICE, sdkp, "Starting disk\n");
> +
> + if (opal_unlock_from_suspend(sdkp->opal_dev)) {
> + sd_printk(KERN_NOTICE, sdkp, "OPAL unlock failed\n");
> + return -EIO;
> + }
> +
> + return 0;
> +}
> +
> +static int sd_resume_common(struct device *dev, bool runtime)
> {
> struct scsi_disk *sdkp = dev_get_drvdata(dev);
> int ret;
> @@ -4019,7 +4033,7 @@ static int sd_resume(struct device *dev, bool runtime)
> sd_printk(KERN_NOTICE, sdkp, "Starting disk\n");
> ret = sd_start_stop_device(sdkp, 1);
> if (!ret) {
> - opal_unlock_from_suspend(sdkp->opal_dev);
> + sd_resume(dev);
> sdkp->suspended = false;
> }
>
> @@ -4038,7 +4052,7 @@ static int sd_resume_system(struct device *dev)
> return 0;
> }
>
> - return sd_resume(dev, false);
> + return sd_resume_common(dev, false);
> }
>
> static int sd_resume_runtime(struct device *dev)
> @@ -4065,7 +4079,7 @@ static int sd_resume_runtime(struct device *dev)
> "Failed to clear sense data\n");
> }
>
> - return sd_resume(dev, true);
> + return sd_resume_common(dev, true);
> }
>
> static const struct dev_pm_ops sd_pm_ops = {
> @@ -4088,6 +4102,7 @@ static struct scsi_driver sd_template = {
> .pm = &sd_pm_ops,
> },
> .rescan = sd_rescan,
> + .resume = sd_resume,
> .init_command = sd_init_command,
> .uninit_command = sd_uninit_command,
> .done = sd_done,
> diff --git a/include/linux/libata.h b/include/linux/libata.h
> index 26d68115afb8..324d792e7c78 100644
> --- a/include/linux/libata.h
> +++ b/include/linux/libata.h
> @@ -107,6 +107,7 @@ enum {
>
> ATA_DFLAG_NCQ_PRIO_ENABLED = (1 << 20), /* Priority cmds sent to dev */
> ATA_DFLAG_CDL_ENABLED = (1 << 21), /* cmd duration limits is enabled */
> + ATA_DFLAG_RESUMING = (1 << 22), /* Device is resuming */
> ATA_DFLAG_DETACH = (1 << 24),
> ATA_DFLAG_DETACHED = (1 << 25),
> ATA_DFLAG_DA = (1 << 26), /* device supports Device Attention */
> diff --git a/include/scsi/scsi_driver.h b/include/scsi/scsi_driver.h
> index 4ce1988b2ba0..f40915d2ecee 100644
> --- a/include/scsi/scsi_driver.h
> +++ b/include/scsi/scsi_driver.h
> @@ -12,6 +12,7 @@ struct request;
> struct scsi_driver {
> struct device_driver gendrv;
>
> + int (*resume)(struct device *);
> void (*rescan)(struct device *);
> blk_status_t (*init_command)(struct scsi_cmnd *);
> void (*uninit_command)(struct scsi_cmnd *);
> diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
> index b259d42a1e1a..129001f600fc 100644
> --- a/include/scsi/scsi_host.h
> +++ b/include/scsi/scsi_host.h
> @@ -767,6 +767,7 @@ scsi_template_proc_dir(const struct scsi_host_template *sht);
> #define scsi_template_proc_dir(sht) NULL
> #endif
> extern void scsi_scan_host(struct Scsi_Host *);
> +extern int scsi_resume_device(struct scsi_device *sdev);
> extern int scsi_rescan_device(struct scsi_device *sdev);
> extern void scsi_remove_host(struct Scsi_Host *);
> extern struct Scsi_Host *scsi_host_get(struct Scsi_Host *);
--
Damien Le Moal
Western Digital Research
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] scsi: sd: Fix TCG OPAL unlock on system resume
2024-03-19 7:12 [PATCH] scsi: sd: Fix TCG OPAL unlock on system resume Damien Le Moal
2024-03-25 12:54 ` Damien Le Moal
@ 2024-03-26 1:21 ` Martin K. Petersen
1 sibling, 0 replies; 3+ messages in thread
From: Martin K. Petersen @ 2024-03-26 1:21 UTC (permalink / raw)
To: linux-scsi, linux-ide, Damien Le Moal
Cc: Martin K . Petersen, Niklas Cassel, desgua
On Tue, 19 Mar 2024 16:12:09 +0900, Damien Le Moal wrote:
> Commit 3cc2ffe5c16d introduced the manage_system_start_stop scsi device
> flag to allow libata to indicate to the scsi disk driver that nothing
> should be done when resuming a disk on system resume. This change turned
> the execution of sd_resume() into a no-op for ATA devices on system
> resume. While this solved deadlock issues during device resume, this
> change also wrongly removed the execution of opal_unlock_from_suspend().
> As a result, devices with TCG OPAL locking enabled remain locked and
> inaccessible after a system resume from sleep.
>
> [...]
Applied to 6.9/scsi-fixes, thanks!
[1/1] scsi: sd: Fix TCG OPAL unlock on system resume
https://git.kernel.org/mkp/scsi/c/0c76106cb975
--
Martin K. Petersen Oracle Linux Engineering
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2024-03-26 1:22 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-03-19 7:12 [PATCH] scsi: sd: Fix TCG OPAL unlock on system resume Damien Le Moal
2024-03-25 12:54 ` Damien Le Moal
2024-03-26 1:21 ` Martin K. Petersen
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox