- * [PATCH v9 01/10] scsi: sr: support runtime pm
  2012-11-09  6:51 [PATCH v9 00/10] ZPODD Patches Aaron Lu
@ 2012-11-09  6:51 ` Aaron Lu
  2012-11-09  6:51 ` [PATCH v9 02/10] ata: zpodd: Add CONFIG_SATA_ZPODD Aaron Lu
                   ` (8 subsequent siblings)
  9 siblings, 0 replies; 85+ messages in thread
From: Aaron Lu @ 2012-11-09  6:51 UTC (permalink / raw)
  To: Jeff Garzik, James Bottomley, Rafael J. Wysocki, Alan Stern,
	Tejun Heo
  Cc: Jeff Wu, Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi,
	Aaron Lu
This patch adds runtime pm support for sr.
It did this by increasing the runtime usage_count of the device when:
- its block device is opened;
- the events checking is to run.
And decreasing the runtime usage_count of the device when:
- its block device is closed;
- After the events checking is done.
The idea is discussed in this mail thread:
http://thread.gmane.org/gmane.linux.acpi.devel/55243/focus=52703
Signed-off-by: Aaron Lu <aaron.lu@intel.com>
---
 drivers/scsi/sr.c | 30 +++++++++++++++++++++++++-----
 1 file changed, 25 insertions(+), 5 deletions(-)
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 5fc97d2..4d1a610 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -45,6 +45,7 @@
 #include <linux/blkdev.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
+#include <linux/pm_runtime.h>
 #include <asm/uaccess.h>
 
 #include <scsi/scsi.h>
@@ -146,7 +147,8 @@ static inline struct scsi_cd *scsi_cd_get(struct gendisk *disk)
 	kref_get(&cd->kref);
 	if (scsi_device_get(cd->device))
 		goto out_put;
-	goto out;
+	if (!scsi_autopm_get_device(cd->device))
+		goto out;
 
  out_put:
 	kref_put(&cd->kref, sr_kref_release);
@@ -162,6 +164,7 @@ static void scsi_cd_put(struct scsi_cd *cd)
 
 	mutex_lock(&sr_ref_mutex);
 	kref_put(&cd->kref, sr_kref_release);
+	scsi_autopm_put_device(sdev);
 	scsi_device_put(sdev);
 	mutex_unlock(&sr_ref_mutex);
 }
@@ -211,7 +214,7 @@ static unsigned int sr_check_events(struct cdrom_device_info *cdi,
 				    unsigned int clearing, int slot)
 {
 	struct scsi_cd *cd = cdi->handle;
-	bool last_present;
+	bool last_present = cd->media_present;
 	struct scsi_sense_hdr sshdr;
 	unsigned int events;
 	int ret;
@@ -220,6 +223,8 @@ static unsigned int sr_check_events(struct cdrom_device_info *cdi,
 	if (CDSL_CURRENT != slot)
 		return 0;
 
+	scsi_autopm_get_device(cd->device);
+
 	events = sr_get_events(cd->device);
 	cd->get_event_changed |= events & DISK_EVENT_MEDIA_CHANGE;
 
@@ -246,10 +251,9 @@ static unsigned int sr_check_events(struct cdrom_device_info *cdi,
 	}
 
 	if (!(clearing & DISK_EVENT_MEDIA_CHANGE))
-		return events;
+		goto out;
 do_tur:
 	/* let's see whether the media is there with TUR */
-	last_present = cd->media_present;
 	ret = scsi_test_unit_ready(cd->device, SR_TIMEOUT, MAX_RETRIES, &sshdr);
 
 	/*
@@ -270,7 +274,7 @@ do_tur:
 	}
 
 	if (cd->ignore_get_event)
-		return events;
+		goto out;
 
 	/* check whether GET_EVENT is reporting spurious MEDIA_CHANGE */
 	if (!cd->tur_changed) {
@@ -287,6 +291,18 @@ do_tur:
 	cd->tur_changed = false;
 	cd->get_event_changed = false;
 
+out:
+	/*
+	 * If there is no medium detected or the medium has been there
+	 * since last poll, try to suspend the device. Otherwise, keep
+	 * it active for one more poll interval so that if user space
+	 * application opens the block device, we can avoid a runtime
+	 * status change.
+	 */
+	pm_runtime_put_noidle(&cd->device->sdev_gendev);
+	if (!cd->media_present || last_present)
+		pm_runtime_suspend(&cd->device->sdev_gendev);
+
 	return events;
 }
 
@@ -718,6 +734,8 @@ static int sr_probe(struct device *dev)
 
 	sdev_printk(KERN_DEBUG, sdev,
 		    "Attached scsi CD-ROM %s\n", cd->cdi.name);
+	scsi_autopm_put_device(cd->device);
+
 	return 0;
 
 fail_put:
@@ -965,6 +983,8 @@ static int sr_remove(struct device *dev)
 {
 	struct scsi_cd *cd = dev_get_drvdata(dev);
 
+	scsi_autopm_get_device(cd->device);
+
 	blk_queue_prep_rq(cd->device->request_queue, scsi_prep_fn);
 	del_gendisk(cd->disk);
 
-- 
1.7.12.4
^ permalink raw reply related	[flat|nested] 85+ messages in thread
- * [PATCH v9 02/10] ata: zpodd: Add CONFIG_SATA_ZPODD
  2012-11-09  6:51 [PATCH v9 00/10] ZPODD Patches Aaron Lu
  2012-11-09  6:51 ` [PATCH v9 01/10] scsi: sr: support runtime pm Aaron Lu
@ 2012-11-09  6:51 ` Aaron Lu
  2012-11-09  6:51 ` [PATCH v9 03/10] ata: zpodd: identify and init ZPODD devices Aaron Lu
                   ` (7 subsequent siblings)
  9 siblings, 0 replies; 85+ messages in thread
From: Aaron Lu @ 2012-11-09  6:51 UTC (permalink / raw)
  To: Jeff Garzik, James Bottomley, Rafael J. Wysocki, Alan Stern,
	Tejun Heo
  Cc: Jeff Wu, Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi,
	Aaron Lu
Added a new config CONFIG_SATA_ZPODD, which is used to support
SATA based zero power ODD. It depends on ATA_ACPI, and selects
BLK_DEV_SR as the implementation of ZPODD depends on SCSI sr driver.
A new file libata-zpodd.c is added, which will be used to host ZPODD
related code. It is empty for this commit.
Signed-off-by: Aaron Lu <aaron.lu@intel.com>
---
 drivers/ata/Kconfig  | 12 ++++++++++++
 drivers/ata/Makefile |  1 +
 2 files changed, 13 insertions(+)
 create mode 100644 drivers/ata/libata-zpodd.c
diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index e08d322..9bcb8fb 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -58,6 +58,18 @@ config ATA_ACPI
 	  You can disable this at kernel boot time by using the
 	  option libata.noacpi=1
 
+config SATA_ZPODD
+	bool "SATA Zero Power ODD Support"
+	depends on ATA_ACPI
+	select BLK_DEV_SR
+	default n
+	help
+	  This option adds support for SATA ZPODD. It requires both
+	  ODD and the platform support, and if enabled, will automatically
+	  power on/off the ODD when certain condition is satisfied. This
+	  does not impact user's experience of the ODD, only power is saved
+	  when ODD is not in use(i.e. no disc inside).
+
 config SATA_PMP
 	bool "SATA Port Multiplier support"
 	default y
diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile
index 9329daf..85e3de4 100644
--- a/drivers/ata/Makefile
+++ b/drivers/ata/Makefile
@@ -107,3 +107,4 @@ libata-y	:= libata-core.o libata-scsi.o libata-eh.o libata-transport.o
 libata-$(CONFIG_ATA_SFF)	+= libata-sff.o
 libata-$(CONFIG_SATA_PMP)	+= libata-pmp.o
 libata-$(CONFIG_ATA_ACPI)	+= libata-acpi.o
+libata-$(CONFIG_SATA_ZPODD)	+= libata-zpodd.o
diff --git a/drivers/ata/libata-zpodd.c b/drivers/ata/libata-zpodd.c
new file mode 100644
index 0000000..e69de29
-- 
1.7.12.4
^ permalink raw reply related	[flat|nested] 85+ messages in thread
- * [PATCH v9 03/10] ata: zpodd: identify and init ZPODD devices
  2012-11-09  6:51 [PATCH v9 00/10] ZPODD Patches Aaron Lu
  2012-11-09  6:51 ` [PATCH v9 01/10] scsi: sr: support runtime pm Aaron Lu
  2012-11-09  6:51 ` [PATCH v9 02/10] ata: zpodd: Add CONFIG_SATA_ZPODD Aaron Lu
@ 2012-11-09  6:51 ` Aaron Lu
  2012-11-12 18:53   ` Tejun Heo
  2012-11-09  6:51 ` [PATCH v9 04/10] libata: acpi: move acpi notification code to zpodd Aaron Lu
                   ` (6 subsequent siblings)
  9 siblings, 1 reply; 85+ messages in thread
From: Aaron Lu @ 2012-11-09  6:51 UTC (permalink / raw)
  To: Jeff Garzik, James Bottomley, Rafael J. Wysocki, Alan Stern,
	Tejun Heo
  Cc: Jeff Wu, Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi,
	Aaron Lu
The ODD can be enabled for ZPODD if the following three conditions are
satisfied:
1 The ODD supports device attention;
2 The platform can runtime power off the ODD through ACPI;
3 The ODD is either slot type or drawer type.
For such ODDs, zpodd_init is called and a new structure is allocated for
it to store ZPODD related stuffs.
And the zpodd_dev_enabled function is used to test if ZPODD is currently
enabled for this ODD.
Signed-off-by: Aaron Lu <aaron.lu@intel.com>
---
 drivers/ata/libata-acpi.c  |   2 +
 drivers/ata/libata-core.c  |   4 +-
 drivers/ata/libata-zpodd.c | 124 +++++++++++++++++++++++++++++++++++++++++++++
 drivers/ata/libata.h       |  17 +++++++
 include/uapi/linux/cdrom.h |  34 +++++++++++++
 5 files changed, 180 insertions(+), 1 deletion(-)
diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c
index fd9ecf7..3c61100 100644
--- a/drivers/ata/libata-acpi.c
+++ b/drivers/ata/libata-acpi.c
@@ -1059,6 +1059,8 @@ void ata_acpi_bind(struct ata_device *dev)
 
 void ata_acpi_unbind(struct ata_device *dev)
 {
+	if (zpodd_dev_enabled(dev))
+		zpodd_deinit(dev);
 	ata_acpi_remove_pm_notifier(dev);
 	ata_acpi_unregister_power_resource(dev);
 }
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 3cc7096..a2293b6 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -2395,8 +2395,10 @@ int ata_dev_configure(struct ata_device *dev)
 			dma_dir_string = ", DMADIR";
 		}
 
-		if (ata_id_has_da(dev->id))
+		if (ata_id_has_da(dev->id)) {
 			dev->flags |= ATA_DFLAG_DA;
+			zpodd_init(dev);
+		}
 
 		/* print device info to dmesg */
 		if (ata_msg_drv(ap) && print_info)
diff --git a/drivers/ata/libata-zpodd.c b/drivers/ata/libata-zpodd.c
index e69de29..fce6ea6 100644
--- a/drivers/ata/libata-zpodd.c
+++ b/drivers/ata/libata-zpodd.c
@@ -0,0 +1,124 @@
+#include <linux/libata.h>
+#include <linux/cdrom.h>
+
+#include "libata.h"
+
+struct zpodd {
+	bool slot:1;
+	bool drawer:1;
+
+	struct ata_device *dev;
+};
+
+static int run_atapi_cmd(struct ata_device *dev, const char *cdb,
+		unsigned short cdb_len, char *buf, unsigned int buf_len)
+{
+	struct ata_taskfile tf = {0};
+
+	tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
+	tf.command = ATA_CMD_PACKET;
+
+	if (buf) {
+		tf.protocol = ATAPI_PROT_PIO;
+		tf.lbam = buf_len;
+	} else {
+		tf.protocol = ATAPI_PROT_NODATA;
+	}
+
+	return ata_exec_internal(dev, &tf, cdb,
+			buf ? DMA_FROM_DEVICE : DMA_NONE, buf, buf_len, 0);
+}
+
+/*
+ * Per the spec, only slot type and drawer type ODD can be supported
+ *
+ * Return 0 for slot type, 1 for drawer, -ENODEV for other types or on error.
+ */
+static int check_loading_mechanism(struct ata_device *dev)
+{
+	char buf[16];
+	unsigned int ret;
+	struct rm_feature_desc *desc = (void *)(buf + 8);
+
+	char cdb[] = {  GPCMD_GET_CONFIGURATION,
+			2,      /* only 1 feature descriptor requested */
+			0, 3,   /* 3, removable medium feature */
+			0, 0, 0,/* reserved */
+			0, sizeof(buf),
+			0, 0, 0,
+	};
+
+	ret = run_atapi_cmd(dev, cdb, sizeof(cdb), buf, sizeof(buf));
+	if (ret)
+		return -ENODEV;
+
+	if (be16_to_cpu(desc->feature_code) != 3)
+		return -ENODEV;
+
+	if (desc->mech_type == 0 && desc->load == 0 && desc->eject == 1)
+		return 0; /* slot */
+	else if (desc->mech_type == 1 && desc->load == 0 && desc->eject == 1)
+		return 1; /* drawer */
+	else
+		return -ENODEV;
+}
+
+static bool device_can_poweroff(struct ata_device *ata_dev)
+{
+	acpi_handle handle;
+	acpi_status status;
+	struct acpi_device_power_state *states;
+	struct acpi_device *acpi_dev;
+
+	handle = ata_dev_acpi_handle(ata_dev);
+	if (!handle)
+		return false;
+
+	status = acpi_bus_get_device(handle, &acpi_dev);
+	if (ACPI_FAILURE(status))
+		return false;
+
+	/*
+	 * If firmware has _PS3 or _PR3 for this device,
+	 * it means this device can be runtime powered off
+	 */
+	states = acpi_dev->power.states;
+	if (states[ACPI_STATE_D3_HOT].flags.valid ||
+	    states[ACPI_STATE_D3_COLD].flags.explicit_set)
+		return true;
+	else
+		return false;
+}
+
+void zpodd_init(struct ata_device *dev)
+{
+	int ret;
+	struct zpodd *zpodd;
+
+	if (dev->private_data)
+		return;
+
+	if (!device_can_poweroff(dev))
+		return;
+
+	if ((ret = check_loading_mechanism(dev)) == -ENODEV)
+		return;
+
+	zpodd = kzalloc(sizeof(struct zpodd), GFP_KERNEL);
+	if (!zpodd)
+		return;
+
+	if (ret)
+		zpodd->drawer = true;
+	else
+		zpodd->slot = true;
+
+	zpodd->dev = dev;
+	dev->private_data = zpodd;
+}
+
+void zpodd_deinit(struct ata_device *dev)
+{
+	kfree(dev->private_data);
+	dev->private_data = NULL;
+}
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index 7148a58..55ad37e 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -230,4 +230,21 @@ static inline void ata_sff_exit(void)
 { }
 #endif /* CONFIG_ATA_SFF */
 
+/* libata-zpodd.c */
+#ifdef CONFIG_SATA_ZPODD
+void zpodd_init(struct ata_device *dev);
+void zpodd_deinit(struct ata_device *dev);
+static inline bool zpodd_dev_enabled(struct ata_device *dev)
+{
+	if (dev->flags & ATA_DFLAG_DA && dev->private_data)
+		return true;
+	else
+		return false;
+}
+#else /* CONFIG_SATA_ZPODD */
+static inline void zpodd_init(struct ata_device *dev) {}
+static inline void zpodd_deinit(struct ata_device *dev) {}
+static inline bool zpodd_dev_enabled(struct ata_device *dev) { return false; }
+#endif /* CONFIG_SATA_ZPODD */
+
 #endif /* __LIBATA_H__ */
diff --git a/include/uapi/linux/cdrom.h b/include/uapi/linux/cdrom.h
index 898b866..bd17ad5 100644
--- a/include/uapi/linux/cdrom.h
+++ b/include/uapi/linux/cdrom.h
@@ -908,5 +908,39 @@ struct mode_page_header {
 	__be16 desc_length;
 };
 
+/* removable medium feature descriptor */
+struct rm_feature_desc {
+	__be16 feature_code;
+#if defined(__BIG_ENDIAN_BITFIELD)
+	__u8 reserved1:2;
+	__u8 feature_version:4;
+	__u8 persistent:1;
+	__u8 curr:1;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+	__u8 curr:1;
+	__u8 persistent:1;
+	__u8 feature_version:4;
+	__u8 reserved1:2;
+#endif
+	__u8 add_len;
+#if defined(__BIG_ENDIAN_BITFIELD)
+	__u8 mech_type:3;
+	__u8 load:1;
+	__u8 eject:1;
+	__u8 pvnt_jmpr:1;
+	__u8 dbml:1;
+	__u8 lock:1;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+	__u8 lock:1;
+	__u8 dbml:1;
+	__u8 pvnt_jmpr:1;
+	__u8 eject:1;
+	__u8 load:1;
+	__u8 mech_type:3;
+#endif
+	__u8 reserved2;
+	__u8 reserved3;
+	__u8 reserved4;
+};
 
 #endif /* _UAPI_LINUX_CDROM_H */
-- 
1.7.12.4
^ permalink raw reply related	[flat|nested] 85+ messages in thread
- * Re: [PATCH v9 03/10] ata: zpodd: identify and init ZPODD devices
  2012-11-09  6:51 ` [PATCH v9 03/10] ata: zpodd: identify and init ZPODD devices Aaron Lu
@ 2012-11-12 18:53   ` Tejun Heo
  2012-11-14  1:32     ` Aaron Lu
  0 siblings, 1 reply; 85+ messages in thread
From: Tejun Heo @ 2012-11-12 18:53 UTC (permalink / raw)
  To: Aaron Lu
  Cc: Jeff Garzik, James Bottomley, Rafael J. Wysocki, Alan Stern,
	Jeff Wu, Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi
Hello,
On Fri, Nov 09, 2012 at 02:51:55PM +0800, Aaron Lu wrote:
>  void ata_acpi_unbind(struct ata_device *dev)
>  {
> +	if (zpodd_dev_enabled(dev))
> +		zpodd_deinit(dev);
Maybe zpodd_exit() instead?
> +void zpodd_init(struct ata_device *dev)
> +{
> +	int ret;
> +	struct zpodd *zpodd;
> +
> +	if (dev->private_data)
> +		return;
> +
> +	if (!device_can_poweroff(dev))
> +		return;
> +
> +	if ((ret = check_loading_mechanism(dev)) == -ENODEV)
> +		return;
> +
> +	zpodd = kzalloc(sizeof(struct zpodd), GFP_KERNEL);
> +	if (!zpodd)
> +		return;
> +
> +	if (ret)
> +		zpodd->drawer = true;
> +	else
> +		zpodd->slot = true;
> +
> +	zpodd->dev = dev;
> +	dev->private_data = zpodd;
I don't think you're supposed to use dev->private_data from libata
core layer.  Just add a new field.  Nobody cares about adding 8 more
bytes to struct ata_device and spending 8 more bytes is way better
than muddying the ownership of ->private_data.
> +/* libata-zpodd.c */
> +#ifdef CONFIG_SATA_ZPODD
> +void zpodd_init(struct ata_device *dev);
> +void zpodd_deinit(struct ata_device *dev);
> +static inline bool zpodd_dev_enabled(struct ata_device *dev)
> +{
> +	if (dev->flags & ATA_DFLAG_DA && dev->private_data)
> +		return true;
> +	else
> +		return false;
> +}
And this gets completely wrong.  What if the device supports DA and
low level driver makes use of ->private_data?
Thanks.
-- 
tejun
^ permalink raw reply	[flat|nested] 85+ messages in thread
- * Re: [PATCH v9 03/10] ata: zpodd: identify and init ZPODD devices
  2012-11-12 18:53   ` Tejun Heo
@ 2012-11-14  1:32     ` Aaron Lu
  2012-11-18 14:38       ` Tejun Heo
  0 siblings, 1 reply; 85+ messages in thread
From: Aaron Lu @ 2012-11-14  1:32 UTC (permalink / raw)
  To: Tejun Heo
  Cc: Jeff Garzik, James Bottomley, Rafael J. Wysocki, Alan Stern,
	Jeff Wu, Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi
On 11/13/2012 02:53 AM, Tejun Heo wrote:
> Hello,
> 
> On Fri, Nov 09, 2012 at 02:51:55PM +0800, Aaron Lu wrote:
>>  void ata_acpi_unbind(struct ata_device *dev)
>>  {
>> +	if (zpodd_dev_enabled(dev))
>> +		zpodd_deinit(dev);
> 
> Maybe zpodd_exit() instead?
OK.
> 
>> +void zpodd_init(struct ata_device *dev)
>> +{
>> +	int ret;
>> +	struct zpodd *zpodd;
>> +
>> +	if (dev->private_data)
>> +		return;
>> +
>> +	if (!device_can_poweroff(dev))
>> +		return;
>> +
>> +	if ((ret = check_loading_mechanism(dev)) == -ENODEV)
>> +		return;
>> +
>> +	zpodd = kzalloc(sizeof(struct zpodd), GFP_KERNEL);
>> +	if (!zpodd)
>> +		return;
>> +
>> +	if (ret)
>> +		zpodd->drawer = true;
>> +	else
>> +		zpodd->slot = true;
>> +
>> +	zpodd->dev = dev;
>> +	dev->private_data = zpodd;
> 
> I don't think you're supposed to use dev->private_data from libata
> core layer.  Just add a new field.  Nobody cares about adding 8 more
> bytes to struct ata_device and spending 8 more bytes is way better
> than muddying the ownership of ->private_data.
OK.
And just out of curiosity, who's supposed to use device's private_data?
I didn't find any user for ata_device's private_data in libata.
> 
>> +/* libata-zpodd.c */
>> +#ifdef CONFIG_SATA_ZPODD
>> +void zpodd_init(struct ata_device *dev);
>> +void zpodd_deinit(struct ata_device *dev);
>> +static inline bool zpodd_dev_enabled(struct ata_device *dev)
>> +{
>> +	if (dev->flags & ATA_DFLAG_DA && dev->private_data)
>> +		return true;
>> +	else
>> +		return false;
>> +}
> 
> And this gets completely wrong.  What if the device supports DA and
> low level driver makes use of ->private_data?
I didn't find any user of ata_device's private_data, so I used it for
ZPODD. But if this is dangerous, I'll use a new field.
Thanks,
Aaron
^ permalink raw reply	[flat|nested] 85+ messages in thread
- * Re: [PATCH v9 03/10] ata: zpodd: identify and init ZPODD devices
  2012-11-14  1:32     ` Aaron Lu
@ 2012-11-18 14:38       ` Tejun Heo
  2012-11-19  2:15         ` Aaron Lu
  0 siblings, 1 reply; 85+ messages in thread
From: Tejun Heo @ 2012-11-18 14:38 UTC (permalink / raw)
  To: Aaron Lu
  Cc: Jeff Garzik, James Bottomley, Rafael J. Wysocki, Alan Stern,
	Jeff Wu, Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi
Hello, Aaron.
On Wed, Nov 14, 2012 at 09:32:27AM +0800, Aaron Lu wrote:
> > I don't think you're supposed to use dev->private_data from libata
> > core layer.  Just add a new field.  Nobody cares about adding 8 more
> > bytes to struct ata_device and spending 8 more bytes is way better
> > than muddying the ownership of ->private_data.
> 
> OK.
> And just out of curiosity, who's supposed to use device's private_data?
> I didn't find any user for ata_device's private_data in libata.
All the ->private_data fields are to be used by low level drivers
(ahci, ata_piix, pata_via...).  Given the twisted nature of ATA
devices, it's a bit surprising that no driver yet found a need for
ata_dev->private_data.  For most SATA controllers, port to device is
one to one so maybe ap->private_data is enough.
> > And this gets completely wrong.  What if the device supports DA and
> > low level driver makes use of ->private_data?
> 
> I didn't find any user of ata_device's private_data, so I used it for
> ZPODD. But if this is dangerous, I'll use a new field.
As there currently is no other user, it won't break anything but yeah
please add a properly typed and named field.
Thanks.
-- 
tejun
^ permalink raw reply	[flat|nested] 85+ messages in thread 
- * Re: [PATCH v9 03/10] ata: zpodd: identify and init ZPODD devices
  2012-11-18 14:38       ` Tejun Heo
@ 2012-11-19  2:15         ` Aaron Lu
  0 siblings, 0 replies; 85+ messages in thread
From: Aaron Lu @ 2012-11-19  2:15 UTC (permalink / raw)
  To: Tejun Heo
  Cc: Jeff Garzik, James Bottomley, Rafael J. Wysocki, Alan Stern,
	Jeff Wu, Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi
On 11/18/2012 10:38 PM, Tejun Heo wrote:
> Hello, Aaron.
Hi,
> 
> On Wed, Nov 14, 2012 at 09:32:27AM +0800, Aaron Lu wrote:
>>> I don't think you're supposed to use dev->private_data from libata
>>> core layer.  Just add a new field.  Nobody cares about adding 8 more
>>> bytes to struct ata_device and spending 8 more bytes is way better
>>> than muddying the ownership of ->private_data.
>>
>> OK.
>> And just out of curiosity, who's supposed to use device's private_data?
>> I didn't find any user for ata_device's private_data in libata.
> 
> All the ->private_data fields are to be used by low level drivers
> (ahci, ata_piix, pata_via...).  Given the twisted nature of ATA
> devices, it's a bit surprising that no driver yet found a need for
> ata_dev->private_data.  For most SATA controllers, port to device is
> one to one so maybe ap->private_data is enough.
> 
>>> And this gets completely wrong.  What if the device supports DA and
>>> low level driver makes use of ->private_data?
>>
>> I didn't find any user of ata_device's private_data, so I used it for
>> ZPODD. But if this is dangerous, I'll use a new field.
> 
> As there currently is no other user, it won't break anything but yeah
> please add a properly typed and named field.
OK, and thanks for the suggestion.
-Aaron
> 
> Thanks.
> 
^ permalink raw reply	[flat|nested] 85+ messages in thread 
 
 
 
 
- * [PATCH v9 04/10] libata: acpi: move acpi notification code to zpodd
  2012-11-09  6:51 [PATCH v9 00/10] ZPODD Patches Aaron Lu
                   ` (2 preceding siblings ...)
  2012-11-09  6:51 ` [PATCH v9 03/10] ata: zpodd: identify and init ZPODD devices Aaron Lu
@ 2012-11-09  6:51 ` Aaron Lu
  2012-11-12 18:55   ` Tejun Heo
  2012-11-09  6:51 ` [PATCH v9 05/10] libata: separate ATAPI code Aaron Lu
                   ` (5 subsequent siblings)
  9 siblings, 1 reply; 85+ messages in thread
From: Aaron Lu @ 2012-11-09  6:51 UTC (permalink / raw)
  To: Jeff Garzik, James Bottomley, Rafael J. Wysocki, Alan Stern,
	Tejun Heo
  Cc: Jeff Wu, Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi,
	Aaron Lu
Since the ata acpi notification code introduced in commit
3bd46600a7a7e938c54df8cdbac9910668c7dfb0 is solely for ZPODD, and we
now have a dedicated place for it, move these code there.
And the add/remove_pm_notifier code is simplified a little bit that it
does not check things like if the handle is NULL and if a corresponding
acpi_device is there for the handle as they are guaranteed by the
device_can_poweroff already.
Signed-off-by: Aaron Lu <aaron.lu@intel.com>
---
 drivers/ata/libata-acpi.c  | 71 ----------------------------------------------
 drivers/ata/libata-zpodd.c | 30 ++++++++++++++++++++
 2 files changed, 30 insertions(+), 71 deletions(-)
diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c
index 3c61100..6b6819c 100644
--- a/drivers/ata/libata-acpi.c
+++ b/drivers/ata/libata-acpi.c
@@ -970,57 +970,6 @@ void ata_acpi_on_disable(struct ata_device *dev)
 	ata_acpi_clear_gtf(dev);
 }
 
-static void ata_acpi_wake_dev(acpi_handle handle, u32 event, void *context)
-{
-	struct ata_device *ata_dev = context;
-
-	if (event == ACPI_NOTIFY_DEVICE_WAKE && ata_dev &&
-			pm_runtime_suspended(&ata_dev->sdev->sdev_gendev))
-		scsi_autopm_get_device(ata_dev->sdev);
-}
-
-static void ata_acpi_add_pm_notifier(struct ata_device *dev)
-{
-	struct acpi_device *acpi_dev;
-	acpi_handle handle;
-	acpi_status status;
-
-	handle = ata_dev_acpi_handle(dev);
-	if (!handle)
-		return;
-
-	status = acpi_bus_get_device(handle, &acpi_dev);
-	if (ACPI_FAILURE(status))
-		return;
-
-	if (dev->sdev->can_power_off) {
-		acpi_install_notify_handler(handle, ACPI_SYSTEM_NOTIFY,
-			ata_acpi_wake_dev, dev);
-		device_set_run_wake(&dev->sdev->sdev_gendev, true);
-	}
-}
-
-static void ata_acpi_remove_pm_notifier(struct ata_device *dev)
-{
-	struct acpi_device *acpi_dev;
-	acpi_handle handle;
-	acpi_status status;
-
-	handle = ata_dev_acpi_handle(dev);
-	if (!handle)
-		return;
-
-	status = acpi_bus_get_device(handle, &acpi_dev);
-	if (ACPI_FAILURE(status))
-		return;
-
-	if (dev->sdev->can_power_off) {
-		device_set_run_wake(&dev->sdev->sdev_gendev, false);
-		acpi_remove_notify_handler(handle, ACPI_SYSTEM_NOTIFY,
-			ata_acpi_wake_dev);
-	}
-}
-
 static void ata_acpi_register_power_resource(struct ata_device *dev)
 {
 	struct scsi_device *sdev = dev->sdev;
@@ -1053,7 +1002,6 @@ static void ata_acpi_unregister_power_resource(struct ata_device *dev)
 
 void ata_acpi_bind(struct ata_device *dev)
 {
-	ata_acpi_add_pm_notifier(dev);
 	ata_acpi_register_power_resource(dev);
 }
 
@@ -1061,7 +1009,6 @@ void ata_acpi_unbind(struct ata_device *dev)
 {
 	if (zpodd_dev_enabled(dev))
 		zpodd_deinit(dev);
-	ata_acpi_remove_pm_notifier(dev);
 	ata_acpi_unregister_power_resource(dev);
 }
 
@@ -1103,9 +1050,6 @@ static int ata_acpi_bind_device(struct ata_port *ap, struct scsi_device *sdev,
 				acpi_handle *handle)
 {
 	struct ata_device *ata_dev;
-	acpi_status status;
-	struct acpi_device *acpi_dev;
-	struct acpi_device_power_state *states;
 
 	if (ap->flags & ATA_FLAG_ACPI_SATA)
 		ata_dev = &ap->link.device[sdev->channel];
@@ -1117,21 +1061,6 @@ static int ata_acpi_bind_device(struct ata_port *ap, struct scsi_device *sdev,
 	if (!*handle)
 		return -ENODEV;
 
-	status = acpi_bus_get_device(*handle, &acpi_dev);
-	if (ACPI_FAILURE(status))
-		return 0;
-
-	/*
-	 * If firmware has _PS3 or _PR3 for this device,
-	 * and this ata ODD device support device attention,
-	 * it means this device can be powered off
-	 */
-	states = acpi_dev->power.states;
-	if ((states[ACPI_STATE_D3_HOT].flags.valid ||
-			states[ACPI_STATE_D3_COLD].flags.explicit_set) &&
-			ata_dev->flags & ATA_DFLAG_DA)
-		sdev->can_power_off = 1;
-
 	return 0;
 }
 
diff --git a/drivers/ata/libata-zpodd.c b/drivers/ata/libata-zpodd.c
index fce6ea6..ba8c985 100644
--- a/drivers/ata/libata-zpodd.c
+++ b/drivers/ata/libata-zpodd.c
@@ -1,11 +1,14 @@
 #include <linux/libata.h>
 #include <linux/cdrom.h>
+#include <linux/pm_runtime.h>
+#include <scsi/scsi_device.h>
 
 #include "libata.h"
 
 struct zpodd {
 	bool slot:1;
 	bool drawer:1;
+	bool from_notify:1;	/* resumed as a result of acpi notification */
 
 	struct ata_device *dev;
 };
@@ -90,6 +93,31 @@ static bool device_can_poweroff(struct ata_device *ata_dev)
 		return false;
 }
 
+static void zpodd_wake_dev(acpi_handle handle, u32 event, void *context)
+{
+	struct ata_device *ata_dev = context;
+	struct zpodd *zpodd = ata_dev->private_data;
+	struct device *dev = &ata_dev->sdev->sdev_gendev;
+
+	if (event == ACPI_NOTIFY_DEVICE_WAKE && pm_runtime_suspended(dev)) {
+		zpodd->from_notify = true;
+		pm_runtime_resume(dev);
+	}
+}
+
+static void acpi_add_pm_notifier(struct ata_device *dev)
+{
+	acpi_handle handle = ata_dev_acpi_handle(dev);
+	acpi_install_notify_handler(handle, ACPI_SYSTEM_NOTIFY,
+				    zpodd_wake_dev, dev);
+}
+
+static void acpi_remove_pm_notifier(struct ata_device *dev)
+{
+	acpi_handle handle = DEVICE_ACPI_HANDLE(&dev->sdev->sdev_gendev);
+	acpi_remove_notify_handler(handle, ACPI_SYSTEM_NOTIFY, zpodd_wake_dev);
+}
+
 void zpodd_init(struct ata_device *dev)
 {
 	int ret;
@@ -113,12 +141,14 @@ void zpodd_init(struct ata_device *dev)
 	else
 		zpodd->slot = true;
 
+	acpi_add_pm_notifier(dev);
 	zpodd->dev = dev;
 	dev->private_data = zpodd;
 }
 
 void zpodd_deinit(struct ata_device *dev)
 {
+	acpi_remove_pm_notifier(dev);
 	kfree(dev->private_data);
 	dev->private_data = NULL;
 }
-- 
1.7.12.4
^ permalink raw reply related	[flat|nested] 85+ messages in thread
- * Re: [PATCH v9 04/10] libata: acpi: move acpi notification code to zpodd
  2012-11-09  6:51 ` [PATCH v9 04/10] libata: acpi: move acpi notification code to zpodd Aaron Lu
@ 2012-11-12 18:55   ` Tejun Heo
  2012-11-14  1:36     ` Aaron Lu
  0 siblings, 1 reply; 85+ messages in thread
From: Tejun Heo @ 2012-11-12 18:55 UTC (permalink / raw)
  To: Aaron Lu
  Cc: Jeff Garzik, James Bottomley, Rafael J. Wysocki, Alan Stern,
	Jeff Wu, Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi
On Fri, Nov 09, 2012 at 02:51:56PM +0800, Aaron Lu wrote:
> Since the ata acpi notification code introduced in commit
> 3bd46600a7a7e938c54df8cdbac9910668c7dfb0 is solely for ZPODD, and we
> now have a dedicated place for it, move these code there.
> 
> And the add/remove_pm_notifier code is simplified a little bit that it
> does not check things like if the handle is NULL and if a corresponding
> acpi_device is there for the handle as they are guaranteed by the
> device_can_poweroff already.
Please don't mix code movement with actual changes.  It makes it
difficult to track what's going on.
Thanks.
-- 
tejun
^ permalink raw reply	[flat|nested] 85+ messages in thread 
- * Re: [PATCH v9 04/10] libata: acpi: move acpi notification code to zpodd
  2012-11-12 18:55   ` Tejun Heo
@ 2012-11-14  1:36     ` Aaron Lu
  0 siblings, 0 replies; 85+ messages in thread
From: Aaron Lu @ 2012-11-14  1:36 UTC (permalink / raw)
  To: Tejun Heo
  Cc: Jeff Garzik, James Bottomley, Rafael J. Wysocki, Alan Stern,
	Jeff Wu, Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi
On 11/13/2012 02:55 AM, Tejun Heo wrote:
> On Fri, Nov 09, 2012 at 02:51:56PM +0800, Aaron Lu wrote:
>> Since the ata acpi notification code introduced in commit
>> 3bd46600a7a7e938c54df8cdbac9910668c7dfb0 is solely for ZPODD, and we
>> now have a dedicated place for it, move these code there.
>>
>> And the add/remove_pm_notifier code is simplified a little bit that it
>> does not check things like if the handle is NULL and if a corresponding
>> acpi_device is there for the handle as they are guaranteed by the
>> device_can_poweroff already.
> 
> Please don't mix code movement with actual changes.  It makes it
> difficult to track what's going on.
Oh, yes.
But since add_pm_notifier code happens during ZPODD init time, and init
now happens during first time probe instead of after SCSI device has
been created, some changes are necessary when moving these code.
Sorry for not describing these things clear, I'll update the changelog
to reflect this next time.
Thanks,
Aaron
^ permalink raw reply	[flat|nested] 85+ messages in thread 
 
 
- * [PATCH v9 05/10] libata: separate ATAPI code
  2012-11-09  6:51 [PATCH v9 00/10] ZPODD Patches Aaron Lu
                   ` (3 preceding siblings ...)
  2012-11-09  6:51 ` [PATCH v9 04/10] libata: acpi: move acpi notification code to zpodd Aaron Lu
@ 2012-11-09  6:51 ` Aaron Lu
  2012-11-12 18:57   ` Tejun Heo
  2012-11-09  6:51 ` [PATCH v9 06/10] ata: zpodd: check zero power ready status Aaron Lu
                   ` (4 subsequent siblings)
  9 siblings, 1 reply; 85+ messages in thread
From: Aaron Lu @ 2012-11-09  6:51 UTC (permalink / raw)
  To: Jeff Garzik, James Bottomley, Rafael J. Wysocki, Alan Stern,
	Tejun Heo
  Cc: Jeff Wu, Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi,
	Aaron Lu
The atapi_eh_tur and atapi_eh_request_sense can be reused by ZPODD
code, so separate them out to a file named libata-atapi.c, and the
Makefile is modified accordingly. No functional changes should result
from this commit.
Signed-off-by: Aaron Lu <aaron.lu@intel.com>
---
 drivers/ata/Makefile       |  2 +-
 drivers/ata/libata-atapi.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++
 drivers/ata/libata-eh.c    | 85 --------------------------------------------
 drivers/ata/libata.h       |  4 +++
 4 files changed, 93 insertions(+), 86 deletions(-)
 create mode 100644 drivers/ata/libata-atapi.c
diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile
index 85e3de4..36377f9 100644
--- a/drivers/ata/Makefile
+++ b/drivers/ata/Makefile
@@ -103,7 +103,7 @@ obj-$(CONFIG_ATA_GENERIC)	+= ata_generic.o
 # Should be last libata driver
 obj-$(CONFIG_PATA_LEGACY)	+= pata_legacy.o
 
-libata-y	:= libata-core.o libata-scsi.o libata-eh.o libata-transport.o
+libata-y	:= libata-core.o libata-scsi.o libata-eh.o libata-atapi.o libata-transport.o
 libata-$(CONFIG_ATA_SFF)	+= libata-sff.o
 libata-$(CONFIG_SATA_PMP)	+= libata-pmp.o
 libata-$(CONFIG_ATA_ACPI)	+= libata-acpi.o
diff --git a/drivers/ata/libata-atapi.c b/drivers/ata/libata-atapi.c
new file mode 100644
index 0000000..28684ae
--- /dev/null
+++ b/drivers/ata/libata-atapi.c
@@ -0,0 +1,88 @@
+#include <linux/libata.h>
+#include <scsi/scsi_cmnd.h>
+#include "libata.h"
+
+/**
+ *	atapi_eh_tur - perform ATAPI TEST_UNIT_READY
+ *	@dev: target ATAPI device
+ *	@r_sense_key: out parameter for sense_key
+ *
+ *	Perform ATAPI TEST_UNIT_READY.
+ *
+ *	LOCKING:
+ *	EH context (may sleep).
+ *
+ *	RETURNS:
+ *	0 on success, AC_ERR_* mask on failure.
+ */
+unsigned int atapi_eh_tur(struct ata_device *dev, u8 *r_sense_key)
+{
+	u8 cdb[ATAPI_CDB_LEN] = { TEST_UNIT_READY, 0, 0, 0, 0, 0 };
+	struct ata_taskfile tf;
+	unsigned int err_mask;
+
+	ata_tf_init(dev, &tf);
+
+	tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
+	tf.command = ATA_CMD_PACKET;
+	tf.protocol = ATAPI_PROT_NODATA;
+
+	err_mask = ata_exec_internal(dev, &tf, cdb, DMA_NONE, NULL, 0, 0);
+	if (err_mask == AC_ERR_DEV)
+		*r_sense_key = tf.feature >> 4;
+	return err_mask;
+}
+
+/**
+ *	atapi_eh_request_sense - perform ATAPI REQUEST_SENSE
+ *	@dev: device to perform REQUEST_SENSE to
+ *	@sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long)
+ *	@dfl_sense_key: default sense key to use
+ *
+ *	Perform ATAPI REQUEST_SENSE after the device reported CHECK
+ *	SENSE.  This function is EH helper.
+ *
+ *	LOCKING:
+ *	Kernel thread context (may sleep).
+ *
+ *	RETURNS:
+ *	0 on success, AC_ERR_* mask on failure
+ */
+unsigned int atapi_eh_request_sense(struct ata_device *dev,
+				    u8 *sense_buf, u8 dfl_sense_key)
+{
+	u8 cdb[ATAPI_CDB_LEN] =	{
+		REQUEST_SENSE, 0, 0, 0, SCSI_SENSE_BUFFERSIZE, 0 };
+	struct ata_port *ap = dev->link->ap;
+	struct ata_taskfile tf;
+
+	DPRINTK("ATAPI request sense\n");
+
+	/* FIXME: is this needed? */
+	memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE);
+
+	/* initialize sense_buf with the error register,
+	 * for the case where they are -not- overwritten
+	 */
+	sense_buf[0] = 0x70;
+	sense_buf[2] = dfl_sense_key;
+
+	/* some devices time out if garbage left in tf */
+	ata_tf_init(dev, &tf);
+
+	tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
+	tf.command = ATA_CMD_PACKET;
+
+	/* is it pointless to prefer PIO for "safety reasons"? */
+	if (ap->flags & ATA_FLAG_PIO_DMA) {
+		tf.protocol = ATAPI_PROT_DMA;
+		tf.feature |= ATAPI_PKT_DMA;
+	} else {
+		tf.protocol = ATAPI_PROT_PIO;
+		tf.lbam = SCSI_SENSE_BUFFERSIZE;
+		tf.lbah = 0;
+	}
+
+	return ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE,
+				 sense_buf, SCSI_SENSE_BUFFERSIZE, 0);
+}
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index e60437c..6487b88 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -1579,91 +1579,6 @@ static int ata_eh_read_log_10h(struct ata_device *dev,
 }
 
 /**
- *	atapi_eh_tur - perform ATAPI TEST_UNIT_READY
- *	@dev: target ATAPI device
- *	@r_sense_key: out parameter for sense_key
- *
- *	Perform ATAPI TEST_UNIT_READY.
- *
- *	LOCKING:
- *	EH context (may sleep).
- *
- *	RETURNS:
- *	0 on success, AC_ERR_* mask on failure.
- */
-static unsigned int atapi_eh_tur(struct ata_device *dev, u8 *r_sense_key)
-{
-	u8 cdb[ATAPI_CDB_LEN] = { TEST_UNIT_READY, 0, 0, 0, 0, 0 };
-	struct ata_taskfile tf;
-	unsigned int err_mask;
-
-	ata_tf_init(dev, &tf);
-
-	tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
-	tf.command = ATA_CMD_PACKET;
-	tf.protocol = ATAPI_PROT_NODATA;
-
-	err_mask = ata_exec_internal(dev, &tf, cdb, DMA_NONE, NULL, 0, 0);
-	if (err_mask == AC_ERR_DEV)
-		*r_sense_key = tf.feature >> 4;
-	return err_mask;
-}
-
-/**
- *	atapi_eh_request_sense - perform ATAPI REQUEST_SENSE
- *	@dev: device to perform REQUEST_SENSE to
- *	@sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long)
- *	@dfl_sense_key: default sense key to use
- *
- *	Perform ATAPI REQUEST_SENSE after the device reported CHECK
- *	SENSE.  This function is EH helper.
- *
- *	LOCKING:
- *	Kernel thread context (may sleep).
- *
- *	RETURNS:
- *	0 on success, AC_ERR_* mask on failure
- */
-static unsigned int atapi_eh_request_sense(struct ata_device *dev,
-					   u8 *sense_buf, u8 dfl_sense_key)
-{
-	u8 cdb[ATAPI_CDB_LEN] =
-		{ REQUEST_SENSE, 0, 0, 0, SCSI_SENSE_BUFFERSIZE, 0 };
-	struct ata_port *ap = dev->link->ap;
-	struct ata_taskfile tf;
-
-	DPRINTK("ATAPI request sense\n");
-
-	/* FIXME: is this needed? */
-	memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE);
-
-	/* initialize sense_buf with the error register,
-	 * for the case where they are -not- overwritten
-	 */
-	sense_buf[0] = 0x70;
-	sense_buf[2] = dfl_sense_key;
-
-	/* some devices time out if garbage left in tf */
-	ata_tf_init(dev, &tf);
-
-	tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
-	tf.command = ATA_CMD_PACKET;
-
-	/* is it pointless to prefer PIO for "safety reasons"? */
-	if (ap->flags & ATA_FLAG_PIO_DMA) {
-		tf.protocol = ATAPI_PROT_DMA;
-		tf.feature |= ATAPI_PKT_DMA;
-	} else {
-		tf.protocol = ATAPI_PROT_PIO;
-		tf.lbam = SCSI_SENSE_BUFFERSIZE;
-		tf.lbah = 0;
-	}
-
-	return ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE,
-				 sense_buf, SCSI_SENSE_BUFFERSIZE, 0);
-}
-
-/**
  *	ata_eh_analyze_serror - analyze SError for a failed port
  *	@link: ATA link to analyze SError for
  *
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index 55ad37e..5d68210 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -247,4 +247,8 @@ static inline void zpodd_deinit(struct ata_device *dev) {}
 static inline bool zpodd_dev_enabled(struct ata_device *dev) { return false; }
 #endif /* CONFIG_SATA_ZPODD */
 
+/* libata-atapi.c */
+unsigned int atapi_eh_tur(struct ata_device *dev, u8 *r_sense_key);
+unsigned int atapi_eh_request_sense(struct ata_device *dev, u8 *sense_buf, u8 dfl_sense_key);
+
 #endif /* __LIBATA_H__ */
-- 
1.7.12.4
^ permalink raw reply related	[flat|nested] 85+ messages in thread
- * Re: [PATCH v9 05/10] libata: separate ATAPI code
  2012-11-09  6:51 ` [PATCH v9 05/10] libata: separate ATAPI code Aaron Lu
@ 2012-11-12 18:57   ` Tejun Heo
  2012-11-13 12:49     ` Aaron Lu
  0 siblings, 1 reply; 85+ messages in thread
From: Tejun Heo @ 2012-11-12 18:57 UTC (permalink / raw)
  To: Aaron Lu
  Cc: Jeff Garzik, James Bottomley, Rafael J. Wysocki, Alan Stern,
	Jeff Wu, Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi
On Fri, Nov 09, 2012 at 02:51:57PM +0800, Aaron Lu wrote:
> The atapi_eh_tur and atapi_eh_request_sense can be reused by ZPODD
> code, so separate them out to a file named libata-atapi.c, and the
> Makefile is modified accordingly. No functional changes should result
> from this commit.
Why is this change necessary?  This doesn't seem to help anything to
me.
Thanks.
-- 
tejun
^ permalink raw reply	[flat|nested] 85+ messages in thread 
- * Re: [PATCH v9 05/10] libata: separate ATAPI code
  2012-11-12 18:57   ` Tejun Heo
@ 2012-11-13 12:49     ` Aaron Lu
  2012-11-18 15:01       ` Tejun Heo
  0 siblings, 1 reply; 85+ messages in thread
From: Aaron Lu @ 2012-11-13 12:49 UTC (permalink / raw)
  To: Tejun Heo
  Cc: Jeff Garzik, James Bottomley, Rafael J. Wysocki, Alan Stern,
	Jeff Wu, Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi
On Mon, Nov 12, 2012 at 10:57:10AM -0800, Tejun Heo wrote:
> On Fri, Nov 09, 2012 at 02:51:57PM +0800, Aaron Lu wrote:
> > The atapi_eh_tur and atapi_eh_request_sense can be reused by ZPODD
> > code, so separate them out to a file named libata-atapi.c, and the
> > Makefile is modified accordingly. No functional changes should result
> > from this commit.
> 
> Why is this change necessary?  This doesn't seem to help anything to
> me.
Function zpready introduced in patch 6 used these 2 functions to see if
an ODD is in a zero power ready state.
Thanks,
Aaron
> 
> Thanks.
> 
> -- 
> tejun
^ permalink raw reply	[flat|nested] 85+ messages in thread 
- * Re: [PATCH v9 05/10] libata: separate ATAPI code
  2012-11-13 12:49     ` Aaron Lu
@ 2012-11-18 15:01       ` Tejun Heo
  2012-11-19  2:21         ` Aaron Lu
  0 siblings, 1 reply; 85+ messages in thread
From: Tejun Heo @ 2012-11-18 15:01 UTC (permalink / raw)
  To: Aaron Lu
  Cc: Jeff Garzik, James Bottomley, Rafael J. Wysocki, Alan Stern,
	Jeff Wu, Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi
On Tue, Nov 13, 2012 at 08:49:24PM +0800, Aaron Lu wrote:
> On Mon, Nov 12, 2012 at 10:57:10AM -0800, Tejun Heo wrote:
> > On Fri, Nov 09, 2012 at 02:51:57PM +0800, Aaron Lu wrote:
> > > The atapi_eh_tur and atapi_eh_request_sense can be reused by ZPODD
> > > code, so separate them out to a file named libata-atapi.c, and the
> > > Makefile is modified accordingly. No functional changes should result
> > > from this commit.
> > 
> > Why is this change necessary?  This doesn't seem to help anything to
> > me.
> 
> Function zpready introduced in patch 6 used these 2 functions to see if
> an ODD is in a zero power ready state.
And why zpready can't use the functions if they're in a different file?
-- 
tejun
^ permalink raw reply	[flat|nested] 85+ messages in thread 
- * Re: [PATCH v9 05/10] libata: separate ATAPI code
  2012-11-18 15:01       ` Tejun Heo
@ 2012-11-19  2:21         ` Aaron Lu
  2012-11-19 14:51           ` Tejun Heo
  0 siblings, 1 reply; 85+ messages in thread
From: Aaron Lu @ 2012-11-19  2:21 UTC (permalink / raw)
  To: Tejun Heo
  Cc: Jeff Garzik, James Bottomley, Rafael J. Wysocki, Alan Stern,
	Jeff Wu, Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi
On 11/18/2012 11:01 PM, Tejun Heo wrote:
> On Tue, Nov 13, 2012 at 08:49:24PM +0800, Aaron Lu wrote:
>> On Mon, Nov 12, 2012 at 10:57:10AM -0800, Tejun Heo wrote:
>>> On Fri, Nov 09, 2012 at 02:51:57PM +0800, Aaron Lu wrote:
>>>> The atapi_eh_tur and atapi_eh_request_sense can be reused by ZPODD
>>>> code, so separate them out to a file named libata-atapi.c, and the
>>>> Makefile is modified accordingly. No functional changes should result
>>>> from this commit.
>>>
>>> Why is this change necessary?  This doesn't seem to help anything to
>>> me.
>>
>> Function zpready introduced in patch 6 used these 2 functions to see if
>> an ODD is in a zero power ready state.
> 
> And why zpready can't use the functions if they're in a different file?
Oh, sure, they can be used.
My thought is that, the 2 functions are for ATAPI and can be used by
libata-eh, libata-zpodd and maybe others in the future, so it deserves
a separate place. But if this is regarded as unnecessary, I will drop
this patch.
Thanks,
Aaron
^ permalink raw reply	[flat|nested] 85+ messages in thread 
- * Re: [PATCH v9 05/10] libata: separate ATAPI code
  2012-11-19  2:21         ` Aaron Lu
@ 2012-11-19 14:51           ` Tejun Heo
  0 siblings, 0 replies; 85+ messages in thread
From: Tejun Heo @ 2012-11-19 14:51 UTC (permalink / raw)
  To: Aaron Lu
  Cc: Jeff Garzik, James Bottomley, Rafael J. Wysocki, Alan Stern,
	Jeff Wu, Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi
On Mon, Nov 19, 2012 at 10:21:15AM +0800, Aaron Lu wrote:
> My thought is that, the 2 functions are for ATAPI and can be used by
> libata-eh, libata-zpodd and maybe others in the future, so it deserves
> a separate place. But if this is regarded as unnecessary, I will drop
> this patch.
Yeah, please drop.  There are a lot of function in libata-core and eh
which are used all over libata.  No need to move them because they're
gonna be used somewhere else.
Thanks.
-- 
tejun
^ permalink raw reply	[flat|nested] 85+ messages in thread 
 
 
 
 
 
- * [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-11-09  6:51 [PATCH v9 00/10] ZPODD Patches Aaron Lu
                   ` (4 preceding siblings ...)
  2012-11-09  6:51 ` [PATCH v9 05/10] libata: separate ATAPI code Aaron Lu
@ 2012-11-09  6:51 ` Aaron Lu
  2012-11-12 19:13   ` Tejun Heo
  2012-11-09  6:51 ` [PATCH v9 07/10] block: add a new interface to block events Aaron Lu
                   ` (3 subsequent siblings)
  9 siblings, 1 reply; 85+ messages in thread
From: Aaron Lu @ 2012-11-09  6:51 UTC (permalink / raw)
  To: Jeff Garzik, James Bottomley, Rafael J. Wysocki, Alan Stern,
	Tejun Heo
  Cc: Jeff Wu, Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi,
	Aaron Lu
Per the Mount Fuji spec, the ODD is considered zero power ready when:
- For slot type ODD, no media inside;
- For tray type ODD, no media inside and tray closed.
The information can be retrieved by either the returned information of
command GET_EVENT_STATUS_NOTIFICATION(the command is used to poll for
media event) or sense code.
In this implementation, the zero power ready status is determined by
the following factors:
1 polled media status byte, and this info is recorded in status_ready
  field of zpodd structure;
2 sense code by issuing a TEST_UNIT_READY command after status_ready
  is found to be true.
The information provided by the media status byte is not accurate, it is
possible that after a new disc is just inserted, the status byte still
returns media not present. So this information can not be used as the
final deciding factor. But since SCSI ODD driver sr will always poll the
ODD every 2 seconds, this information is readily available without any
much cost. So it is used as a hint: when we find zero power ready status
in the media status byte, we will see if it is really the case with the
sense code method. This way, we can avoid sending too many
TEST_UNIT_READY commands to the ODD.
When we first sensed the ODD in the zero power ready state, the
timestamp will be recoreded. And after ODD stayed in this state for some
pre-defined period, the ODD is considered as power off ready and the
zp_ready flag will be set. The zp_ready flag serves as the deciding
factor other code will use to see if power off is OK for the ODD. The
Mount Fuji spec suggests a delay should be used here, to avoid the case
user ejects the ODD and then instantly inserts a new one again, so that
we can avoid a power transition. And some ODDs may be slow to place its
head to the home position after disc is ejected, so a delay here is
generally a good idea.
The zero power ready status check is performed in the ata port's runtime
suspend code path, when port is not frozen yet, as we need to issue some
IOs to the ODD.
Signed-off-by: Aaron Lu <aaron.lu@intel.com>
---
 drivers/ata/libata-acpi.c  |   8 +++-
 drivers/ata/libata-scsi.c  |   4 ++
 drivers/ata/libata-zpodd.c | 116 +++++++++++++++++++++++++++++++++++++++++++++
 drivers/ata/libata.h       |   4 ++
 4 files changed, 131 insertions(+), 1 deletion(-)
diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c
index 6b6819c..13ee178 100644
--- a/drivers/ata/libata-acpi.c
+++ b/drivers/ata/libata-acpi.c
@@ -784,7 +784,13 @@ static int ata_acpi_push_id(struct ata_device *dev)
  */
 int ata_acpi_on_suspend(struct ata_port *ap)
 {
-	/* nada */
+	struct ata_device *dev;
+
+	ata_for_each_dev(dev, &ap->link, ENABLED) {
+		if (zpodd_dev_enabled(dev))
+			zpodd_check_zpready(dev);
+	}
+
 	return 0;
 }
 
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index e3bda07..6f235b9 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -2665,6 +2665,10 @@ static void atapi_qc_complete(struct ata_queued_cmd *qc)
 			ata_scsi_rbuf_put(cmd, true, &flags);
 		}
 
+		if (zpodd_dev_enabled(qc->dev) &&
+				scsicmd[0] == GET_EVENT_STATUS_NOTIFICATION)
+			zpodd_snoop_status(qc->dev, cmd);
+
 		cmd->result = SAM_STAT_GOOD;
 	}
 
diff --git a/drivers/ata/libata-zpodd.c b/drivers/ata/libata-zpodd.c
index ba8c985..533a39e 100644
--- a/drivers/ata/libata-zpodd.c
+++ b/drivers/ata/libata-zpodd.c
@@ -2,13 +2,21 @@
 #include <linux/cdrom.h>
 #include <linux/pm_runtime.h>
 #include <scsi/scsi_device.h>
+#include <scsi/scsi_cmnd.h>
 
 #include "libata.h"
 
+#define POWEROFF_DELAY  (30 * 1000)     /* 30 seconds for power off delay */
+
 struct zpodd {
 	bool slot:1;
 	bool drawer:1;
 	bool from_notify:1;	/* resumed as a result of acpi notification */
+	bool status_ready:1;	/* ready status derived from media event poll,
+				   it is not accurate, but serves as a hint */
+	bool zp_ready:1;	/* zero power ready state */
+
+	unsigned long last_ready; /* last zero power ready timestamp */
 
 	struct ata_device *dev;
 };
@@ -93,6 +101,114 @@ static bool device_can_poweroff(struct ata_device *ata_dev)
 		return false;
 }
 
+/*
+ * Snoop the result of GET_STATUS_NOTIFICATION_EVENT, the media
+ * status byte has information on media present/door closed.
+ *
+ * This information serves only as a hint, as it is not accurate.
+ * The sense code method will be used when deciding if the ODD is
+ * really zero power ready.
+ */
+void zpodd_snoop_status(struct ata_device *dev, struct scsi_cmnd *scmd)
+{
+	bool ready;
+	char buf[8];
+	struct event_header *eh = (void *)buf;
+	struct media_event_desc *med = (void *)(buf + 4);
+	struct sg_table *table = &scmd->sdb.table;
+	struct zpodd *zpodd = dev->private_data;
+
+	if (sg_copy_to_buffer(table->sgl, table->nents, buf, 8) != 8)
+		return;
+
+	if (be16_to_cpu(eh->data_len) < sizeof(*med))
+		return;
+
+	if (eh->nea || eh->notification_class != 0x4)
+		return;
+
+	if (zpodd->slot)
+		ready = !med->media_present;
+	else
+		ready = !(med->media_present || med->door_open);
+
+	zpodd->status_ready = ready;
+}
+
+/* Test if ODD is zero power ready by sense code */
+static bool zpready(struct ata_device *dev)
+{
+	u8 sense_key, *sense_buf;
+	unsigned int ret, asc, ascq, add_len;
+	struct zpodd *zpodd = dev->private_data;
+
+	ret = atapi_eh_tur(dev, &sense_key);
+
+	if (!ret || sense_key != NOT_READY)
+		return false;
+
+	sense_buf = dev->link->ap->sector_buf;
+	ret = atapi_eh_request_sense(dev, sense_buf, sense_key);
+	if (ret)
+		return false;
+
+	/* sense valid */
+	if ((sense_buf[0] & 0x7f) != 0x70)
+		return false;
+
+	add_len = sense_buf[7];
+	/* has asc and ascq */
+	if (add_len < 6)
+		return false;
+
+	asc = sense_buf[12];
+	ascq = sense_buf[13];
+
+	if (zpodd->slot)
+		/* no media inside */
+		return asc == 0x3a;
+	else
+		/* no media inside and door closed */
+		return asc == 0x3a && ascq == 0x01;
+}
+
+/*
+ * Check ODD's zero power ready status.
+ *
+ * This function is called during ATA port's suspend path,
+ * when the port is not frozen yet, so that we can still make
+ * some IO to the ODD to decide if it is zero power ready.
+ *
+ * The ODD is regarded as zero power ready when it is in zero
+ * power ready state for some time(defined by POWEROFF_DELAY).
+ */
+void zpodd_check_zpready(struct ata_device *dev)
+{
+	bool zp_ready;
+	unsigned long expires;
+	struct zpodd *zpodd = dev->private_data;
+
+	if (!zpodd->status_ready) {
+		zpodd->last_ready = 0;
+		return;
+	}
+
+	if (!zpodd->last_ready) {
+		zp_ready = zpready(dev);
+		if (zp_ready)
+			zpodd->last_ready = jiffies;
+		return;
+	}
+
+	expires = zpodd->last_ready + msecs_to_jiffies(POWEROFF_DELAY);
+	if (time_before(jiffies, expires))
+		return;
+
+	zpodd->zp_ready = zpready(dev);
+	if (!zpodd->zp_ready)
+		zpodd->last_ready = 0;
+}
+
 static void zpodd_wake_dev(acpi_handle handle, u32 event, void *context)
 {
 	struct ata_device *ata_dev = context;
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index 5d68210..2b46703 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -241,10 +241,14 @@ static inline bool zpodd_dev_enabled(struct ata_device *dev)
 	else
 		return false;
 }
+void zpodd_snoop_status(struct ata_device *dev, struct scsi_cmnd *scmd);
+void zpodd_check_zpready(struct ata_device *dev);
 #else /* CONFIG_SATA_ZPODD */
 static inline void zpodd_init(struct ata_device *dev) {}
 static inline void zpodd_deinit(struct ata_device *dev) {}
 static inline bool zpodd_dev_enabled(struct ata_device *dev) { return false; }
+static inline void zpodd_snoop_status(struct ata_device *dev, struct scsi_cmnd *scmd) {}
+static inline void zpodd_check_zpready(struct ata_device *dev) {}
 #endif /* CONFIG_SATA_ZPODD */
 
 /* libata-atapi.c */
-- 
1.7.12.4
^ permalink raw reply related	[flat|nested] 85+ messages in thread
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-11-09  6:51 ` [PATCH v9 06/10] ata: zpodd: check zero power ready status Aaron Lu
@ 2012-11-12 19:13   ` Tejun Heo
  2012-11-13 13:20     ` Aaron Lu
  2012-11-14  2:18     ` Aaron Lu
  0 siblings, 2 replies; 85+ messages in thread
From: Tejun Heo @ 2012-11-12 19:13 UTC (permalink / raw)
  To: Aaron Lu
  Cc: Jeff Garzik, James Bottomley, Rafael J. Wysocki, Alan Stern,
	Jeff Wu, Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi
Hello,
On Fri, Nov 09, 2012 at 02:51:58PM +0800, Aaron Lu wrote:
> @@ -784,7 +784,13 @@ static int ata_acpi_push_id(struct ata_device *dev)
>   */
>  int ata_acpi_on_suspend(struct ata_port *ap)
>  {
> -	/* nada */
> +	struct ata_device *dev;
> +
> +	ata_for_each_dev(dev, &ap->link, ENABLED) {
> +		if (zpodd_dev_enabled(dev))
> +			zpodd_check_zpready(dev);
> +	}
> +
Why is it running off ata_acpi_on_suspend() instead of hooking
directly into EH suspend routine?
> diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
> index e3bda07..6f235b9 100644
> --- a/drivers/ata/libata-scsi.c
> +++ b/drivers/ata/libata-scsi.c
> @@ -2665,6 +2665,10 @@ static void atapi_qc_complete(struct ata_queued_cmd *qc)
>  			ata_scsi_rbuf_put(cmd, true, &flags);
>  		}
>  
> +		if (zpodd_dev_enabled(qc->dev) &&
> +				scsicmd[0] == GET_EVENT_STATUS_NOTIFICATION)
> +			zpodd_snoop_status(qc->dev, cmd);
> +
Brief comment explaining what's going on here wouldn't hurt.
> +#define POWEROFF_DELAY  (30 * 1000)     /* 30 seconds for power off delay */
> +
>  struct zpodd {
>  	bool slot:1;
>  	bool drawer:1;
>  	bool from_notify:1;	/* resumed as a result of acpi notification */
> +	bool status_ready:1;	/* ready status derived from media event poll,
> +				   it is not accurate, but serves as a hint */
> +	bool zp_ready:1;	/* zero power ready state */
> +
> +	unsigned long last_ready; /* last zero power ready timestamp */
>  
>  	struct ata_device *dev;
>  };
How are accesses to the bit fields synchronized?
> +/*
> + * Snoop the result of GET_STATUS_NOTIFICATION_EVENT, the media
> + * status byte has information on media present/door closed.
> + *
> + * This information serves only as a hint, as it is not accurate.
> + * The sense code method will be used when deciding if the ODD is
> + * really zero power ready.
> + */
It would be great if you can make the above a proper dockbook function
comment.  Also, as the snooping for hint thing isn't too obvious it
would be great if the comment contains more info which is explained in
the commit message.
> +void zpodd_snoop_status(struct ata_device *dev, struct scsi_cmnd *scmd)
> +{
> +	bool ready;
> +	char buf[8];
> +	struct event_header *eh = (void *)buf;
> +	struct media_event_desc *med = (void *)(buf + 4);
> +	struct sg_table *table = &scmd->sdb.table;
> +	struct zpodd *zpodd = dev->private_data;
Don't people usually put variables definitions w/ assignments above
the ones without?
> +/*
> + * Check ODD's zero power ready status.
> + *
> + * This function is called during ATA port's suspend path,
> + * when the port is not frozen yet, so that we can still make
> + * some IO to the ODD to decide if it is zero power ready.
> + *
> + * The ODD is regarded as zero power ready when it is in zero
> + * power ready state for some time(defined by POWEROFF_DELAY).
> + */
> +void zpodd_check_zpready(struct ata_device *dev)
> +{
> +	bool zp_ready;
> +	unsigned long expires;
> +	struct zpodd *zpodd = dev->private_data;
> +
> +	if (!zpodd->status_ready) {
> +		zpodd->last_ready = 0;
> +		return;
> +	}
> +
> +	if (!zpodd->last_ready) {
> +		zp_ready = zpready(dev);
> +		if (zp_ready)
> +			zpodd->last_ready = jiffies;
> +		return;
> +	}
> +
> +	expires = zpodd->last_ready + msecs_to_jiffies(POWEROFF_DELAY);
> +	if (time_before(jiffies, expires))
> +		return;
> +
> +	zpodd->zp_ready = zpready(dev);
> +	if (!zpodd->zp_ready)
> +		zpodd->last_ready = 0;
> +}
Hmmm... so, the "full" check only happens when autopm kicks in, right?
Is it really worth avoiding an extra TUR on autopm events?  That's not
really a hot path.  It seems a bit over-engineered to me.
Thanks.
-- 
tejun
^ permalink raw reply	[flat|nested] 85+ messages in thread
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-11-12 19:13   ` Tejun Heo
@ 2012-11-13 13:20     ` Aaron Lu
  2012-11-14  2:18     ` Aaron Lu
  1 sibling, 0 replies; 85+ messages in thread
From: Aaron Lu @ 2012-11-13 13:20 UTC (permalink / raw)
  To: Tejun Heo
  Cc: Jeff Garzik, James Bottomley, Rafael J. Wysocki, Alan Stern,
	Jeff Wu, Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi
On Mon, Nov 12, 2012 at 11:13:03AM -0800, Tejun Heo wrote:
> Hello,
> 
> On Fri, Nov 09, 2012 at 02:51:58PM +0800, Aaron Lu wrote:
> > +/*
> > + * Check ODD's zero power ready status.
> > + *
> > + * This function is called during ATA port's suspend path,
> > + * when the port is not frozen yet, so that we can still make
> > + * some IO to the ODD to decide if it is zero power ready.
> > + *
> > + * The ODD is regarded as zero power ready when it is in zero
> > + * power ready state for some time(defined by POWEROFF_DELAY).
> > + */
> > +void zpodd_check_zpready(struct ata_device *dev)
> > +{
> > +	bool zp_ready;
> > +	unsigned long expires;
> > +	struct zpodd *zpodd = dev->private_data;
> > +
> > +	if (!zpodd->status_ready) {
> > +		zpodd->last_ready = 0;
> > +		return;
> > +	}
> > +
> > +	if (!zpodd->last_ready) {
> > +		zp_ready = zpready(dev);
> > +		if (zp_ready)
> > +			zpodd->last_ready = jiffies;
> > +		return;
> > +	}
> > +
> > +	expires = zpodd->last_ready + msecs_to_jiffies(POWEROFF_DELAY);
> > +	if (time_before(jiffies, expires))
> > +		return;
> > +
> > +	zpodd->zp_ready = zpready(dev);
> > +	if (!zpodd->zp_ready)
> > +		zpodd->last_ready = 0;
> > +}
> 
> Hmmm... so, the "full" check only happens when autopm kicks in, right?
Yes, and unless the ODD is powered off, autopm can kick in every 2
seconds(together with the events poll).
> Is it really worth avoiding an extra TUR on autopm events?  That's not
> really a hot path.  It seems a bit over-engineered to me.
I'm not sure...But if issuing TUR every 2 seconds under some condition
is acceptable, I'll be happily removing the snoop thing and use TUR only.
Thanks,
Aaron
^ permalink raw reply	[flat|nested] 85+ messages in thread
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-11-12 19:13   ` Tejun Heo
  2012-11-13 13:20     ` Aaron Lu
@ 2012-11-14  2:18     ` Aaron Lu
  2012-11-18 15:00       ` Tejun Heo
  1 sibling, 1 reply; 85+ messages in thread
From: Aaron Lu @ 2012-11-14  2:18 UTC (permalink / raw)
  To: Tejun Heo
  Cc: Jeff Garzik, James Bottomley, Rafael J. Wysocki, Alan Stern,
	Jeff Wu, Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi
On 11/13/2012 03:13 AM, Tejun Heo wrote:
> Hello,
> 
> On Fri, Nov 09, 2012 at 02:51:58PM +0800, Aaron Lu wrote:
>> +#define POWEROFF_DELAY  (30 * 1000)     /* 30 seconds for power off delay */
>> +
>>  struct zpodd {
>>  	bool slot:1;
>>  	bool drawer:1;
>>  	bool from_notify:1;	/* resumed as a result of acpi notification */
>> +	bool status_ready:1;	/* ready status derived from media event poll,
>> +				   it is not accurate, but serves as a hint */
>> +	bool zp_ready:1;	/* zero power ready state */
>> +
>> +	unsigned long last_ready; /* last zero power ready timestamp */
>>  
>>  	struct ata_device *dev;
>>  };
> 
> How are accesses to the bit fields synchronized?
They are synchronized by PM core.
PM core ensures that no two suspend or resume callback run concurrently.
And when ODD is executing a command, it is in active state, no PM
callback will run.
> 
>> +/*
>> + * Snoop the result of GET_STATUS_NOTIFICATION_EVENT, the media
>> + * status byte has information on media present/door closed.
>> + *
>> + * This information serves only as a hint, as it is not accurate.
>> + * The sense code method will be used when deciding if the ODD is
>> + * really zero power ready.
>> + */
> 
> It would be great if you can make the above a proper dockbook function
> comment.  Also, as the snooping for hint thing isn't too obvious it
> would be great if the comment contains more info which is explained in
> the commit message.
OK.
> 
>> +/*
>> + * Check ODD's zero power ready status.
>> + *
>> + * This function is called during ATA port's suspend path,
>> + * when the port is not frozen yet, so that we can still make
>> + * some IO to the ODD to decide if it is zero power ready.
>> + *
>> + * The ODD is regarded as zero power ready when it is in zero
>> + * power ready state for some time(defined by POWEROFF_DELAY).
>> + */
>> +void zpodd_check_zpready(struct ata_device *dev)
>> +{
>> +	bool zp_ready;
>> +	unsigned long expires;
>> +	struct zpodd *zpodd = dev->private_data;
>> +
>> +	if (!zpodd->status_ready) {
>> +		zpodd->last_ready = 0;
>> +		return;
>> +	}
>> +
>> +	if (!zpodd->last_ready) {
>> +		zp_ready = zpready(dev);
>> +		if (zp_ready)
>> +			zpodd->last_ready = jiffies;
>> +		return;
>> +	}
>> +
>> +	expires = zpodd->last_ready + msecs_to_jiffies(POWEROFF_DELAY);
>> +	if (time_before(jiffies, expires))
>> +		return;
>> +
>> +	zpodd->zp_ready = zpready(dev);
>> +	if (!zpodd->zp_ready)
>> +		zpodd->last_ready = 0;
>> +}
> 
> Hmmm... so, the "full" check only happens when autopm kicks in, right?
> Is it really worth avoiding an extra TUR on autopm events?  That's not
> really a hot path.  It seems a bit over-engineered to me.
A little more information about this:
When there is disc inside and no program mounted the drive, the ODD will
be runtime suspended/resumed every 2 seconds along with the event poll.
I'm not sure if the above situation can happen often. Normal desktop
environment should automatically mount the ODD once they got uevent, and
for console users, they will probably manually mount the drive after
they have inserted a disc. The way I did it this way is to deal with the
worst possible case. But if this is deemed as not necessary, I think I
can remove the snoop hint thing and use TUR directly.
Thanks,
Aaron
^ permalink raw reply	[flat|nested] 85+ messages in thread
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-11-14  2:18     ` Aaron Lu
@ 2012-11-18 15:00       ` Tejun Heo
  2012-11-19  3:09         ` Aaron Lu
  0 siblings, 1 reply; 85+ messages in thread
From: Tejun Heo @ 2012-11-18 15:00 UTC (permalink / raw)
  To: Aaron Lu
  Cc: Jeff Garzik, James Bottomley, Rafael J. Wysocki, Alan Stern,
	Jeff Wu, Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi
Hello, Aaron.
On Wed, Nov 14, 2012 at 10:18:23AM +0800, Aaron Lu wrote:
> On 11/13/2012 03:13 AM, Tejun Heo wrote:
> > Hello,
> > 
> > On Fri, Nov 09, 2012 at 02:51:58PM +0800, Aaron Lu wrote:
> >> +#define POWEROFF_DELAY  (30 * 1000)     /* 30 seconds for power off delay */
> >> +
> >>  struct zpodd {
> >>  	bool slot:1;
> >>  	bool drawer:1;
> >>  	bool from_notify:1;	/* resumed as a result of acpi notification */
> >> +	bool status_ready:1;	/* ready status derived from media event poll,
> >> +				   it is not accurate, but serves as a hint */
> >> +	bool zp_ready:1;	/* zero power ready state */
> >> +
> >> +	unsigned long last_ready; /* last zero power ready timestamp */
> >>  
> >>  	struct ata_device *dev;
> >>  };
> > 
> > How are accesses to the bit fields synchronized?
> 
> They are synchronized by PM core.
> PM core ensures that no two suspend or resume callback run concurrently.
> And when ODD is executing a command, it is in active state, no PM
> callback will run.
Care to add short comment for that?  Flag and bitfield updates aren't
atomic to each other, so I find it usually helpful to clearly state
the synchronization rules for them.  More so if locking is inherited
from upprer layer and not immediately obvious.
> > Hmmm... so, the "full" check only happens when autopm kicks in, right?
> > Is it really worth avoiding an extra TUR on autopm events?  That's not
> > really a hot path.  It seems a bit over-engineered to me.
> 
> A little more information about this:
> When there is disc inside and no program mounted the drive, the ODD will
> be runtime suspended/resumed every 2 seconds along with the event poll.
Is that a desirable behavior?  I haven't been following autopm and am
a bit fuzzy about how autopm works and what it does.
If there isn't any user of the device autopm kicks in.  If zpodd is
enabled and there's no media, the device goes off power.  If the user
initiates an event which may change media status, the driver is
notified via acpi and autopm backs out restoring power to the device.
Am I understanding it correctly?
What I'm confused about is what autopm does for devices w/o zpodd.
What happens then?  Is it gonna leave power on for the device and,
say, go on to suspend the controller?  But, how would that work for,
say, future devices with async notification for media events?
Also, if autopm is enabled, an optical device would go in and out of
suspend every two seconds?
> I'm not sure if the above situation can happen often. Normal desktop
> environment should automatically mount the ODD once they got uevent, and
> for console users, they will probably manually mount the drive after
> they have inserted a disc. The way I did it this way is to deal with the
> worst possible case. But if this is deemed as not necessary, I think I
> can remove the snoop hint thing and use TUR directly.
The problem with issuing TUR regularly is that some ODDs lock up after
getting hit by frequent TURs.  That's the reason why sr event check
routine is being careful with TUR and only issue
GET_EVENT_STATUS_NOTIFICATION.  Windows does about the same thing and
some vendors somehow screwed up TUR.
That said, I can't say the snooping is pretty.  It's a rather nasty
thing to do.  So, libata now wants information from the event polling
in block layer, but reaching for block_device from ata_devices is
nasty too.  Hmmm... but aren't you already doing that to block polling
on a powered down device?
Thanks.
-- 
tejun
^ permalink raw reply	[flat|nested] 85+ messages in thread
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-11-18 15:00       ` Tejun Heo
@ 2012-11-19  3:09         ` Aaron Lu
  2012-11-19 14:56           ` Tejun Heo
  0 siblings, 1 reply; 85+ messages in thread
From: Aaron Lu @ 2012-11-19  3:09 UTC (permalink / raw)
  To: Tejun Heo
  Cc: Jeff Garzik, James Bottomley, Rafael J. Wysocki, Alan Stern,
	Jeff Wu, Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi
On 11/18/2012 11:00 PM, Tejun Heo wrote:
> Hello, Aaron.
Hi,
> 
> On Wed, Nov 14, 2012 at 10:18:23AM +0800, Aaron Lu wrote:
>> On 11/13/2012 03:13 AM, Tejun Heo wrote:
>>> Hello,
>>>
>>> On Fri, Nov 09, 2012 at 02:51:58PM +0800, Aaron Lu wrote:
>>>> +#define POWEROFF_DELAY  (30 * 1000)     /* 30 seconds for power off delay */
>>>> +
>>>>  struct zpodd {
>>>>  	bool slot:1;
>>>>  	bool drawer:1;
>>>>  	bool from_notify:1;	/* resumed as a result of acpi notification */
>>>> +	bool status_ready:1;	/* ready status derived from media event poll,
>>>> +				   it is not accurate, but serves as a hint */
>>>> +	bool zp_ready:1;	/* zero power ready state */
>>>> +
>>>> +	unsigned long last_ready; /* last zero power ready timestamp */
>>>>  
>>>>  	struct ata_device *dev;
>>>>  };
>>>
>>> How are accesses to the bit fields synchronized?
>>
>> They are synchronized by PM core.
>> PM core ensures that no two suspend or resume callback run concurrently.
>> And when ODD is executing a command, it is in active state, no PM
>> callback will run.
> 
> Care to add short comment for that?  Flag and bitfield updates aren't
> atomic to each other, so I find it usually helpful to clearly state
> the synchronization rules for them.  More so if locking is inherited
> from upprer layer and not immediately obvious.
OK.
> 
>>> Hmmm... so, the "full" check only happens when autopm kicks in, right?
>>> Is it really worth avoiding an extra TUR on autopm events?  That's not
>>> really a hot path.  It seems a bit over-engineered to me.
>>
>> A little more information about this:
>> When there is disc inside and no program mounted the drive, the ODD will
>> be runtime suspended/resumed every 2 seconds along with the event poll.
> 
> Is that a desirable behavior?  I haven't been following autopm and am
> a bit fuzzy about how autopm works and what it does.
> 
> If there isn't any user of the device autopm kicks in.  If zpodd is
> enabled and there's no media, the device goes off power.  If the user
> initiates an event which may change media status, the driver is
> notified via acpi and autopm backs out restoring power to the device.
> Am I understanding it correctly?
Yes.
> 
> What I'm confused about is what autopm does for devices w/o zpodd.
> What happens then?  Is it gonna leave power on for the device and,
> say, go on to suspend the controller?  But, how would that work for,
> say, future devices with async notification for media events?
Maybe we shouldn't allow autopm for such devices?
> 
> Also, if autopm is enabled, an optical device would go in and out of
> suspend every two seconds?
> 
>> I'm not sure if the above situation can happen often. Normal desktop
>> environment should automatically mount the ODD once they got uevent, and
>> for console users, they will probably manually mount the drive after
>> they have inserted a disc. The way I did it this way is to deal with the
>> worst possible case. But if this is deemed as not necessary, I think I
>> can remove the snoop hint thing and use TUR directly.
> 
> The problem with issuing TUR regularly is that some ODDs lock up after
> getting hit by frequent TURs.  That's the reason why sr event check
> routine is being careful with TUR and only issue
> GET_EVENT_STATUS_NOTIFICATION.  Windows does about the same thing and
> some vendors somehow screwed up TUR.
> 
> That said, I can't say the snooping is pretty.  It's a rather nasty
> thing to do.  So, libata now wants information from the event polling
> in block layer, but reaching for block_device from ata_devices is
> nasty too.  Hmmm... but aren't you already doing that to block polling
> on a powered down device?
I was feeling brain damaged by this for some time :-)
Basically, only ATA layer is aware of the power off thing, and sr knows
nothing about this(or it is not supposed to know this, at least, this is
what SCSI people think) and once powered off, I do not want the poll to
disturb the device, so I need to block the poll. I can't come up with
another way to achieve this except this nasty way.
James suggests me to keep the poll, but emulate the command. The problem
with this is, the autopm for resume will kick in on each poll, so I'll
need to decide if power up the ODD for this time's resume is needed in
port's runtime resume callback. This made things complex and it also put
too much logic in the resume callback, which is not desired. And even if
I keep the ODD in powered off state by emulating this poll command, its
ancestor devices will still be resumed, and I may need to do some trick
in their resume callback to avoid needless power/state transitions. This
doesn't feel like an elegant way to solve this either.
So yes, I'm still using this _nasty_ way to make the ODD stay in powered
off state as long as possible. But if there is other elegant ways to
solve this, I would appreciate it and happily using it. Personally, I
hope we can make sr aware of ZPODD, that would make the pain gone.
Thanks,
Aaron
^ permalink raw reply	[flat|nested] 85+ messages in thread
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-11-19  3:09         ` Aaron Lu
@ 2012-11-19 14:56           ` Tejun Heo
  2012-11-19 15:06             ` James Bottomley
  2012-11-20  6:00             ` Aaron Lu
  0 siblings, 2 replies; 85+ messages in thread
From: Tejun Heo @ 2012-11-19 14:56 UTC (permalink / raw)
  To: Aaron Lu
  Cc: Jeff Garzik, James Bottomley, Rafael J. Wysocki, Alan Stern,
	Jeff Wu, Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi
Hey, Aaron.
On Mon, Nov 19, 2012 at 11:09:40AM +0800, Aaron Lu wrote:
> > What I'm confused about is what autopm does for devices w/o zpodd.
> > What happens then?  Is it gonna leave power on for the device and,
> > say, go on to suspend the controller?  But, how would that work for,
> > say, future devices with async notification for media events?
> 
> Maybe we shouldn't allow autopm for such devices?
Yeah, maybe.  It would be nice to be able to automatically power off
disks and ports which aren't being used tho.
> > That said, I can't say the snooping is pretty.  It's a rather nasty
> > thing to do.  So, libata now wants information from the event polling
> > in block layer, but reaching for block_device from ata_devices is
> > nasty too.  Hmmm... but aren't you already doing that to block polling
> > on a powered down device?
> 
> I was feeling brain damaged by this for some time :-)
> 
> Basically, only ATA layer is aware of the power off thing, and sr knows
> nothing about this(or it is not supposed to know this, at least, this is
> what SCSI people think) and once powered off, I do not want the poll to
> disturb the device, so I need to block the poll. I can't come up with
> another way to achieve this except this nasty way.
> 
> James suggests me to keep the poll, but emulate the command. The problem
> with this is, the autopm for resume will kick in on each poll, so I'll
> need to decide if power up the ODD for this time's resume is needed in
> port's runtime resume callback. This made things complex and it also put
> too much logic in the resume callback, which is not desired. And even if
> I keep the ODD in powered off state by emulating this poll command, its
> ancestor devices will still be resumed, and I may need to do some trick
> in their resume callback to avoid needless power/state transitions. This
> doesn't feel like an elegant way to solve this either.
> 
> So yes, I'm still using this _nasty_ way to make the ODD stay in powered
> off state as long as possible. But if there is other elegant ways to
> solve this, I would appreciate it and happily using it. Personally, I
> hope we can make sr aware of ZPODD, that would make the pain gone.
I really think we need a way for (auto)pm and event polling to talk to
each other so that autopm can tell event poll to sod off while pm is
in effect.  Trying to solve this from inside libata doesn't seem
right.  The problem, again, seems to be figuring out which hardware
device maps to which block device.  Hmmm... Any good ideas?
Thanks.
-- 
tejun
^ permalink raw reply	[flat|nested] 85+ messages in thread 
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-11-19 14:56           ` Tejun Heo
@ 2012-11-19 15:06             ` James Bottomley
  2012-11-26  0:33               ` Rafael J. Wysocki
  2012-11-20  6:00             ` Aaron Lu
  1 sibling, 1 reply; 85+ messages in thread
From: James Bottomley @ 2012-11-19 15:06 UTC (permalink / raw)
  To: Tejun Heo
  Cc: Aaron Lu, Jeff Garzik, Rafael J. Wysocki, Alan Stern, Jeff Wu,
	Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi
On Mon, 2012-11-19 at 06:56 -0800, Tejun Heo wrote:
> Hey, Aaron.
> 
> On Mon, Nov 19, 2012 at 11:09:40AM +0800, Aaron Lu wrote:
> > > What I'm confused about is what autopm does for devices w/o zpodd.
> > > What happens then?  Is it gonna leave power on for the device and,
> > > say, go on to suspend the controller?  But, how would that work for,
> > > say, future devices with async notification for media events?
> > 
> > Maybe we shouldn't allow autopm for such devices?
> 
> Yeah, maybe.  It would be nice to be able to automatically power off
> disks and ports which aren't being used tho.
> 
> > > That said, I can't say the snooping is pretty.  It's a rather nasty
> > > thing to do.  So, libata now wants information from the event polling
> > > in block layer, but reaching for block_device from ata_devices is
> > > nasty too.  Hmmm... but aren't you already doing that to block polling
> > > on a powered down device?
> > 
> > I was feeling brain damaged by this for some time :-)
> > 
> > Basically, only ATA layer is aware of the power off thing, and sr knows
> > nothing about this(or it is not supposed to know this, at least, this is
> > what SCSI people think) and once powered off, I do not want the poll to
> > disturb the device, so I need to block the poll. I can't come up with
> > another way to achieve this except this nasty way.
> > 
> > James suggests me to keep the poll, but emulate the command. The problem
> > with this is, the autopm for resume will kick in on each poll, so I'll
> > need to decide if power up the ODD for this time's resume is needed in
> > port's runtime resume callback. This made things complex and it also put
> > too much logic in the resume callback, which is not desired. And even if
> > I keep the ODD in powered off state by emulating this poll command, its
> > ancestor devices will still be resumed, and I may need to do some trick
> > in their resume callback to avoid needless power/state transitions. This
> > doesn't feel like an elegant way to solve this either.
> > 
> > So yes, I'm still using this _nasty_ way to make the ODD stay in powered
> > off state as long as possible. But if there is other elegant ways to
> > solve this, I would appreciate it and happily using it. Personally, I
> > hope we can make sr aware of ZPODD, that would make the pain gone.
> 
> I really think we need a way for (auto)pm and event polling to talk to
> each other so that autopm can tell event poll to sod off while pm is
> in effect.  Trying to solve this from inside libata doesn't seem
> right.  The problem, again, seems to be figuring out which hardware
> device maps to which block device.  Hmmm... Any good ideas?
I've asked the PM people several times about this, because it's a real
problem for almost everything:  PM needs some type of top to bottom
stack view, which the layering isolation we have within storage really
doesn't cope with well.  No real suggestion has been forthcoming.
The reason I think it should be emulated (in the acpi layer, not libata,
but as long as it's not in SCSI, I'm not so fussed where it ends up) is
because ZPODD is the software equivalent of ZPREADY, which will be done
in hardware and will be effectively invisible to autopm in the same way
that SCSI (and ATA) power management is mostly invisible.  If we
currently do ZPREADY emulation in the low layer (i.e. ZPODD has exact
ZPREADY emulation), we won't care (except for flipping the sofware bit)
whether the device support ZPODD or ZPREADY and it will all just
work(tm).  The industry expectation is that ZPODD is just a transition
state between current power management and ZPREADY.
James
^ permalink raw reply	[flat|nested] 85+ messages in thread 
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-11-19 15:06             ` James Bottomley
@ 2012-11-26  0:33               ` Rafael J. Wysocki
  2012-11-26  0:45                 ` Aaron Lu
  2012-11-26  5:03                 ` James Bottomley
  0 siblings, 2 replies; 85+ messages in thread
From: Rafael J. Wysocki @ 2012-11-26  0:33 UTC (permalink / raw)
  To: James Bottomley
  Cc: Tejun Heo, Aaron Lu, Jeff Garzik, Alan Stern, Jeff Wu, Aaron Lu,
	linux-ide, linux-pm, linux-scsi, linux-acpi
On Monday, November 19, 2012 03:06:51 PM James Bottomley wrote:
> On Mon, 2012-11-19 at 06:56 -0800, Tejun Heo wrote:
> > Hey, Aaron.
> > 
> > On Mon, Nov 19, 2012 at 11:09:40AM +0800, Aaron Lu wrote:
> > > > What I'm confused about is what autopm does for devices w/o zpodd.
> > > > What happens then?  Is it gonna leave power on for the device and,
> > > > say, go on to suspend the controller?  But, how would that work for,
> > > > say, future devices with async notification for media events?
> > > 
> > > Maybe we shouldn't allow autopm for such devices?
> > 
> > Yeah, maybe.  It would be nice to be able to automatically power off
> > disks and ports which aren't being used tho.
> > 
> > > > That said, I can't say the snooping is pretty.  It's a rather nasty
> > > > thing to do.  So, libata now wants information from the event polling
> > > > in block layer, but reaching for block_device from ata_devices is
> > > > nasty too.  Hmmm... but aren't you already doing that to block polling
> > > > on a powered down device?
> > > 
> > > I was feeling brain damaged by this for some time :-)
> > > 
> > > Basically, only ATA layer is aware of the power off thing, and sr knows
> > > nothing about this(or it is not supposed to know this, at least, this is
> > > what SCSI people think) and once powered off, I do not want the poll to
> > > disturb the device, so I need to block the poll. I can't come up with
> > > another way to achieve this except this nasty way.
> > > 
> > > James suggests me to keep the poll, but emulate the command. The problem
> > > with this is, the autopm for resume will kick in on each poll, so I'll
> > > need to decide if power up the ODD for this time's resume is needed in
> > > port's runtime resume callback. This made things complex and it also put
> > > too much logic in the resume callback, which is not desired. And even if
> > > I keep the ODD in powered off state by emulating this poll command, its
> > > ancestor devices will still be resumed, and I may need to do some trick
> > > in their resume callback to avoid needless power/state transitions. This
> > > doesn't feel like an elegant way to solve this either.
> > > 
> > > So yes, I'm still using this _nasty_ way to make the ODD stay in powered
> > > off state as long as possible. But if there is other elegant ways to
> > > solve this, I would appreciate it and happily using it. Personally, I
> > > hope we can make sr aware of ZPODD, that would make the pain gone.
> > 
> > I really think we need a way for (auto)pm and event polling to talk to
> > each other so that autopm can tell event poll to sod off while pm is
> > in effect.  Trying to solve this from inside libata doesn't seem
> > right.  The problem, again, seems to be figuring out which hardware
> > device maps to which block device.  Hmmm... Any good ideas?
> 
> I've asked the PM people several times about this, because it's a real
> problem for almost everything:  PM needs some type of top to bottom
> stack view, which the layering isolation we have within storage really
> doesn't cope with well.  No real suggestion has been forthcoming.
Actually, I think that the particular case in question is really special
and the fact that there's the pollig loop that user space is involved in
doesn't make things more stratightforward.
And PM really doesn't need to see things top to bottom, but the polling
needs to know what happens in the PM land.  We need to be able to tell it
"from now on tell user space that there are no events here".  The question
is where to put that information so that it's accessible to all parts of the
stack involved.
> The reason I think it should be emulated (in the acpi layer, not libata,
> but as long as it's not in SCSI, I'm not so fussed where it ends up) is
> because ZPODD is the software equivalent of ZPREADY, which will be done
> in hardware and will be effectively invisible to autopm in the same way
> that SCSI (and ATA) power management is mostly invisible.  If we
> currently do ZPREADY emulation in the low layer (i.e. ZPODD has exact
> ZPREADY emulation), we won't care (except for flipping the sofware bit)
> whether the device support ZPODD or ZPREADY and it will all just
> work(tm).  The industry expectation is that ZPODD is just a transition
> state between current power management and ZPREADY.
Well, if you poll a ZPREADY-capable drive, it will go off and on in cycles
transparently, but still it won't save as much energy as it can.  We'll need
to do something about the polling in that case too, it seems.
Thanks,
Rafael
-- 
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.
^ permalink raw reply	[flat|nested] 85+ messages in thread 
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-11-26  0:33               ` Rafael J. Wysocki
@ 2012-11-26  0:45                 ` Aaron Lu
  2012-11-26  5:03                 ` James Bottomley
  1 sibling, 0 replies; 85+ messages in thread
From: Aaron Lu @ 2012-11-26  0:45 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: James Bottomley, Tejun Heo, Jeff Garzik, Alan Stern, Jeff Wu,
	Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi
On 11/26/2012 08:33 AM, Rafael J. Wysocki wrote:
> On Monday, November 19, 2012 03:06:51 PM James Bottomley wrote:
>> On Mon, 2012-11-19 at 06:56 -0800, Tejun Heo wrote:
>>> Hey, Aaron.
>>>
>>> On Mon, Nov 19, 2012 at 11:09:40AM +0800, Aaron Lu wrote:
>>>>> What I'm confused about is what autopm does for devices w/o zpodd.
>>>>> What happens then?  Is it gonna leave power on for the device and,
>>>>> say, go on to suspend the controller?  But, how would that work for,
>>>>> say, future devices with async notification for media events?
>>>>
>>>> Maybe we shouldn't allow autopm for such devices?
>>>
>>> Yeah, maybe.  It would be nice to be able to automatically power off
>>> disks and ports which aren't being used tho.
>>>
>>>>> That said, I can't say the snooping is pretty.  It's a rather nasty
>>>>> thing to do.  So, libata now wants information from the event polling
>>>>> in block layer, but reaching for block_device from ata_devices is
>>>>> nasty too.  Hmmm... but aren't you already doing that to block polling
>>>>> on a powered down device?
>>>>
>>>> I was feeling brain damaged by this for some time :-)
>>>>
>>>> Basically, only ATA layer is aware of the power off thing, and sr knows
>>>> nothing about this(or it is not supposed to know this, at least, this is
>>>> what SCSI people think) and once powered off, I do not want the poll to
>>>> disturb the device, so I need to block the poll. I can't come up with
>>>> another way to achieve this except this nasty way.
>>>>
>>>> James suggests me to keep the poll, but emulate the command. The problem
>>>> with this is, the autopm for resume will kick in on each poll, so I'll
>>>> need to decide if power up the ODD for this time's resume is needed in
>>>> port's runtime resume callback. This made things complex and it also put
>>>> too much logic in the resume callback, which is not desired. And even if
>>>> I keep the ODD in powered off state by emulating this poll command, its
>>>> ancestor devices will still be resumed, and I may need to do some trick
>>>> in their resume callback to avoid needless power/state transitions. This
>>>> doesn't feel like an elegant way to solve this either.
>>>>
>>>> So yes, I'm still using this _nasty_ way to make the ODD stay in powered
>>>> off state as long as possible. But if there is other elegant ways to
>>>> solve this, I would appreciate it and happily using it. Personally, I
>>>> hope we can make sr aware of ZPODD, that would make the pain gone.
>>>
>>> I really think we need a way for (auto)pm and event polling to talk to
>>> each other so that autopm can tell event poll to sod off while pm is
>>> in effect.  Trying to solve this from inside libata doesn't seem
>>> right.  The problem, again, seems to be figuring out which hardware
>>> device maps to which block device.  Hmmm... Any good ideas?
>>
>> I've asked the PM people several times about this, because it's a real
>> problem for almost everything:  PM needs some type of top to bottom
>> stack view, which the layering isolation we have within storage really
>> doesn't cope with well.  No real suggestion has been forthcoming.
> 
> Actually, I think that the particular case in question is really special
> and the fact that there's the pollig loop that user space is involved in
> doesn't make things more stratightforward.
> 
> And PM really doesn't need to see things top to bottom, but the polling
> needs to know what happens in the PM land.  We need to be able to tell it
> "from now on tell user space that there are no events here".  The question
Actually, in newer kernels(2.6.38 later) and user space tools(udisks
version 1.0.3 on), this is no longer the case.
udisks now no longer poll for event change, and in kernel polling has
been used to notify user space about event change with uevent. So the
polling thing can be entirely controlled in kernel space.
And please take a look at the v10 patch I've sent where I tried to solve
this by introducing PM_QOS_NO_POLL flag, see if that makes sense to you.
Thanks,
Aaron
> is where to put that information so that it's accessible to all parts of the
> stack involved.
> 
>> The reason I think it should be emulated (in the acpi layer, not libata,
>> but as long as it's not in SCSI, I'm not so fussed where it ends up) is
>> because ZPODD is the software equivalent of ZPREADY, which will be done
>> in hardware and will be effectively invisible to autopm in the same way
>> that SCSI (and ATA) power management is mostly invisible.  If we
>> currently do ZPREADY emulation in the low layer (i.e. ZPODD has exact
>> ZPREADY emulation), we won't care (except for flipping the sofware bit)
>> whether the device support ZPODD or ZPREADY and it will all just
>> work(tm).  The industry expectation is that ZPODD is just a transition
>> state between current power management and ZPREADY.
> 
> Well, if you poll a ZPREADY-capable drive, it will go off and on in cycles
> transparently, but still it won't save as much energy as it can.  We'll need
> to do something about the polling in that case too, it seems.
> 
> Thanks,
> Rafael
> 
> 
^ permalink raw reply	[flat|nested] 85+ messages in thread 
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-11-26  0:33               ` Rafael J. Wysocki
  2012-11-26  0:45                 ` Aaron Lu
@ 2012-11-26  5:03                 ` James Bottomley
  2012-11-26  5:09                   ` Aaron Lu
  2012-11-28  0:51                   ` Rafael J. Wysocki
  1 sibling, 2 replies; 85+ messages in thread
From: James Bottomley @ 2012-11-26  5:03 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Tejun Heo, Aaron Lu, Jeff Garzik, Alan Stern, Jeff Wu, Aaron Lu,
	linux-ide, linux-pm, linux-scsi, linux-acpi
On Mon, 2012-11-26 at 01:33 +0100, Rafael J. Wysocki wrote:
> On Monday, November 19, 2012 03:06:51 PM James Bottomley wrote:
> > > I really think we need a way for (auto)pm and event polling to talk to
> > > each other so that autopm can tell event poll to sod off while pm is
> > > in effect.  Trying to solve this from inside libata doesn't seem
> > > right.  The problem, again, seems to be figuring out which hardware
> > > device maps to which block device.  Hmmm... Any good ideas?
> > 
> > I've asked the PM people several times about this, because it's a real
> > problem for almost everything:  PM needs some type of top to bottom
> > stack view, which the layering isolation we have within storage really
> > doesn't cope with well.  No real suggestion has been forthcoming.
> 
> Actually, I think that the particular case in question is really special
> and the fact that there's the pollig loop that user space is involved in
> doesn't make things more stratightforward.
> 
> And PM really doesn't need to see things top to bottom, but the polling
> needs to know what happens in the PM land.  We need to be able to tell it
> "from now on tell user space that there are no events here".  The question
> is where to put that information so that it's accessible to all parts of the
> stack involved.
Right, open to suggestions ...
> > The reason I think it should be emulated (in the acpi layer, not libata,
> > but as long as it's not in SCSI, I'm not so fussed where it ends up) is
> > because ZPODD is the software equivalent of ZPREADY, which will be done
> > in hardware and will be effectively invisible to autopm in the same way
> > that SCSI (and ATA) power management is mostly invisible.  If we
> > currently do ZPREADY emulation in the low layer (i.e. ZPODD has exact
> > ZPREADY emulation), we won't care (except for flipping the sofware bit)
> > whether the device support ZPODD or ZPREADY and it will all just
> > work(tm).  The industry expectation is that ZPODD is just a transition
> > state between current power management and ZPREADY.
> 
> Well, if you poll a ZPREADY-capable drive, it will go off and on in cycles
> transparently, but still it won't save as much energy as it can.  We'll need
> to do something about the polling in that case too, it seems.
No: with ZPREADY, the device effectively lies to the poll.  The Spec
says that when it powers off the mechanical pieces, it must reply from
firmware to a certain preset emulations of SCSI commands and not wake
from power off.  These commands include TEST UNIT READY and a few
others, so the device will happily reply to polls while being off (it
replies with the original state before power was lost).  When you issue
actual medium access commands, or manually insert or remove media it
will wake up.
That's why I think ZPODD should emulate this behaviour.
James
^ permalink raw reply	[flat|nested] 85+ messages in thread 
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-11-26  5:03                 ` James Bottomley
@ 2012-11-26  5:09                   ` Aaron Lu
  2012-11-26  7:32                     ` James Bottomley
  2012-11-28  0:51                   ` Rafael J. Wysocki
  1 sibling, 1 reply; 85+ messages in thread
From: Aaron Lu @ 2012-11-26  5:09 UTC (permalink / raw)
  To: James Bottomley
  Cc: Rafael J. Wysocki, Tejun Heo, Jeff Garzik, Alan Stern, Jeff Wu,
	Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi
On 11/26/2012 01:03 PM, James Bottomley wrote:
> On Mon, 2012-11-26 at 01:33 +0100, Rafael J. Wysocki wrote:
>> On Monday, November 19, 2012 03:06:51 PM James Bottomley wrote:
>>>> I really think we need a way for (auto)pm and event polling to talk to
>>>> each other so that autopm can tell event poll to sod off while pm is
>>>> in effect.  Trying to solve this from inside libata doesn't seem
>>>> right.  The problem, again, seems to be figuring out which hardware
>>>> device maps to which block device.  Hmmm... Any good ideas?
>>>
>>> I've asked the PM people several times about this, because it's a real
>>> problem for almost everything:  PM needs some type of top to bottom
>>> stack view, which the layering isolation we have within storage really
>>> doesn't cope with well.  No real suggestion has been forthcoming.
>>
>> Actually, I think that the particular case in question is really special
>> and the fact that there's the pollig loop that user space is involved in
>> doesn't make things more stratightforward.
>>
>> And PM really doesn't need to see things top to bottom, but the polling
>> needs to know what happens in the PM land.  We need to be able to tell it
>> "from now on tell user space that there are no events here".  The question
>> is where to put that information so that it's accessible to all parts of the
>> stack involved.
> 
> Right, open to suggestions ...
> 
>>> The reason I think it should be emulated (in the acpi layer, not libata,
>>> but as long as it's not in SCSI, I'm not so fussed where it ends up) is
>>> because ZPODD is the software equivalent of ZPREADY, which will be done
>>> in hardware and will be effectively invisible to autopm in the same way
>>> that SCSI (and ATA) power management is mostly invisible.  If we
>>> currently do ZPREADY emulation in the low layer (i.e. ZPODD has exact
>>> ZPREADY emulation), we won't care (except for flipping the sofware bit)
>>> whether the device support ZPODD or ZPREADY and it will all just
>>> work(tm).  The industry expectation is that ZPODD is just a transition
>>> state between current power management and ZPREADY.
>>
>> Well, if you poll a ZPREADY-capable drive, it will go off and on in cycles
>> transparently, but still it won't save as much energy as it can.  We'll need
>> to do something about the polling in that case too, it seems.
> 
> No: with ZPREADY, the device effectively lies to the poll.  The Spec
> says that when it powers off the mechanical pieces, it must reply from
> firmware to a certain preset emulations of SCSI commands and not wake
> from power off.  These commands include TEST UNIT READY and a few
> others, so the device will happily reply to polls while being off (it
> replies with the original state before power was lost).  When you issue
> actual medium access commands, or manually insert or remove media it
> will wake up.
> 
> That's why I think ZPODD should emulate this behaviour.
I suppose you are refering to section 15.3.5?
I don't think the SPEC says what the host software _must_ do, it's
informative. And I agree that when possible, we should emulate the
command without powering up the ODD, but if we can eliminate the noise,
wouldn't that be better?
-Aaron
> 
> James
> 
> 
^ permalink raw reply	[flat|nested] 85+ messages in thread 
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-11-26  5:09                   ` Aaron Lu
@ 2012-11-26  7:32                     ` James Bottomley
  2012-11-26  8:27                       ` Aaron Lu
  0 siblings, 1 reply; 85+ messages in thread
From: James Bottomley @ 2012-11-26  7:32 UTC (permalink / raw)
  To: Aaron Lu
  Cc: Rafael J. Wysocki, Tejun Heo, Jeff Garzik, Alan Stern, Jeff Wu,
	Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi
On Mon, 2012-11-26 at 13:09 +0800, Aaron Lu wrote:
> On 11/26/2012 01:03 PM, James Bottomley wrote:
> > On Mon, 2012-11-26 at 01:33 +0100, Rafael J. Wysocki wrote:
> >> On Monday, November 19, 2012 03:06:51 PM James Bottomley wrote:
> >>>> I really think we need a way for (auto)pm and event polling to talk to
> >>>> each other so that autopm can tell event poll to sod off while pm is
> >>>> in effect.  Trying to solve this from inside libata doesn't seem
> >>>> right.  The problem, again, seems to be figuring out which hardware
> >>>> device maps to which block device.  Hmmm... Any good ideas?
> >>>
> >>> I've asked the PM people several times about this, because it's a real
> >>> problem for almost everything:  PM needs some type of top to bottom
> >>> stack view, which the layering isolation we have within storage really
> >>> doesn't cope with well.  No real suggestion has been forthcoming.
> >>
> >> Actually, I think that the particular case in question is really special
> >> and the fact that there's the pollig loop that user space is involved in
> >> doesn't make things more stratightforward.
> >>
> >> And PM really doesn't need to see things top to bottom, but the polling
> >> needs to know what happens in the PM land.  We need to be able to tell it
> >> "from now on tell user space that there are no events here".  The question
> >> is where to put that information so that it's accessible to all parts of the
> >> stack involved.
> > 
> > Right, open to suggestions ...
> > 
> >>> The reason I think it should be emulated (in the acpi layer, not libata,
> >>> but as long as it's not in SCSI, I'm not so fussed where it ends up) is
> >>> because ZPODD is the software equivalent of ZPREADY, which will be done
> >>> in hardware and will be effectively invisible to autopm in the same way
> >>> that SCSI (and ATA) power management is mostly invisible.  If we
> >>> currently do ZPREADY emulation in the low layer (i.e. ZPODD has exact
> >>> ZPREADY emulation), we won't care (except for flipping the sofware bit)
> >>> whether the device support ZPODD or ZPREADY and it will all just
> >>> work(tm).  The industry expectation is that ZPODD is just a transition
> >>> state between current power management and ZPREADY.
> >>
> >> Well, if you poll a ZPREADY-capable drive, it will go off and on in cycles
> >> transparently, but still it won't save as much energy as it can.  We'll need
> >> to do something about the polling in that case too, it seems.
> > 
> > No: with ZPREADY, the device effectively lies to the poll.  The Spec
> > says that when it powers off the mechanical pieces, it must reply from
> > firmware to a certain preset emulations of SCSI commands and not wake
> > from power off.  These commands include TEST UNIT READY and a few
> > others, so the device will happily reply to polls while being off (it
> > replies with the original state before power was lost).  When you issue
> > actual medium access commands, or manually insert or remove media it
> > will wake up.
> > 
> > That's why I think ZPODD should emulate this behaviour.
> 
> I suppose you are refering to section 15.3.5?
That's the recommendation for emulating ZPREADY in ZPODD devices, yes.
> I don't think the SPEC says what the host software _must_ do, it's
> informative. And I agree that when possible, we should emulate the
> command without powering up the ODD, but if we can eliminate the noise,
> wouldn't that be better?
The way I look at it is we currently have no real power management for
actual SCSI devices, we rely on the internal timers of the device to
effect power management for us.  ZPREADY fits right into this scheme (as
I think it was designed to) so it seems odd to me that we would
introduce a software PM based scheme for ZPODD, which is an interim spec
until everything supports ZPREADY, and then go back to doing nothing for
ZPREADY.
I'm amenable to a proposal that we *shouldn't* be doing nothing for SCSI
PM and perhaps it should be plumbed into software PM, but it looks odd
to me to have sofware PM for ZPODD but not for at least ZPREADY and
probably for SCSI PM as well.
If we elect to do nothing about ZPREADY and SCSI PM, then I think ZPODD
should be implemented in a way that emulates ZPREADY (i.e. as section
15.3.5 recommends).
James
^ permalink raw reply	[flat|nested] 85+ messages in thread 
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-11-26  7:32                     ` James Bottomley
@ 2012-11-26  8:27                       ` Aaron Lu
  2012-11-26 13:17                         ` James Bottomley
  0 siblings, 1 reply; 85+ messages in thread
From: Aaron Lu @ 2012-11-26  8:27 UTC (permalink / raw)
  To: James Bottomley
  Cc: Rafael J. Wysocki, Tejun Heo, Jeff Garzik, Alan Stern, Jeff Wu,
	Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi
On 11/26/2012 03:32 PM, James Bottomley wrote:
> On Mon, 2012-11-26 at 13:09 +0800, Aaron Lu wrote:
>> On 11/26/2012 01:03 PM, James Bottomley wrote:
>>> On Mon, 2012-11-26 at 01:33 +0100, Rafael J. Wysocki wrote:
>>>> On Monday, November 19, 2012 03:06:51 PM James Bottomley wrote:
>>>>>> I really think we need a way for (auto)pm and event polling to talk to
>>>>>> each other so that autopm can tell event poll to sod off while pm is
>>>>>> in effect.  Trying to solve this from inside libata doesn't seem
>>>>>> right.  The problem, again, seems to be figuring out which hardware
>>>>>> device maps to which block device.  Hmmm... Any good ideas?
>>>>>
>>>>> I've asked the PM people several times about this, because it's a real
>>>>> problem for almost everything:  PM needs some type of top to bottom
>>>>> stack view, which the layering isolation we have within storage really
>>>>> doesn't cope with well.  No real suggestion has been forthcoming.
>>>>
>>>> Actually, I think that the particular case in question is really special
>>>> and the fact that there's the pollig loop that user space is involved in
>>>> doesn't make things more stratightforward.
>>>>
>>>> And PM really doesn't need to see things top to bottom, but the polling
>>>> needs to know what happens in the PM land.  We need to be able to tell it
>>>> "from now on tell user space that there are no events here".  The question
>>>> is where to put that information so that it's accessible to all parts of the
>>>> stack involved.
>>>
>>> Right, open to suggestions ...
>>>
>>>>> The reason I think it should be emulated (in the acpi layer, not libata,
>>>>> but as long as it's not in SCSI, I'm not so fussed where it ends up) is
>>>>> because ZPODD is the software equivalent of ZPREADY, which will be done
>>>>> in hardware and will be effectively invisible to autopm in the same way
>>>>> that SCSI (and ATA) power management is mostly invisible.  If we
>>>>> currently do ZPREADY emulation in the low layer (i.e. ZPODD has exact
>>>>> ZPREADY emulation), we won't care (except for flipping the sofware bit)
>>>>> whether the device support ZPODD or ZPREADY and it will all just
>>>>> work(tm).  The industry expectation is that ZPODD is just a transition
>>>>> state between current power management and ZPREADY.
>>>>
>>>> Well, if you poll a ZPREADY-capable drive, it will go off and on in cycles
>>>> transparently, but still it won't save as much energy as it can.  We'll need
>>>> to do something about the polling in that case too, it seems.
>>>
>>> No: with ZPREADY, the device effectively lies to the poll.  The Spec
>>> says that when it powers off the mechanical pieces, it must reply from
>>> firmware to a certain preset emulations of SCSI commands and not wake
>>> from power off.  These commands include TEST UNIT READY and a few
>>> others, so the device will happily reply to polls while being off (it
>>> replies with the original state before power was lost).  When you issue
>>> actual medium access commands, or manually insert or remove media it
>>> will wake up.
>>>
>>> That's why I think ZPODD should emulate this behaviour.
>>
>> I suppose you are refering to section 15.3.5?
> 
> That's the recommendation for emulating ZPREADY in ZPODD devices, yes.
> 
>> I don't think the SPEC says what the host software _must_ do, it's
>> informative. And I agree that when possible, we should emulate the
>> command without powering up the ODD, but if we can eliminate the noise,
>> wouldn't that be better?
> 
> The way I look at it is we currently have no real power management for
> actual SCSI devices, we rely on the internal timers of the device to
> effect power management for us.  ZPREADY fits right into this scheme (as
> I think it was designed to) so it seems odd to me that we would
> introduce a software PM based scheme for ZPODD, which is an interim spec
> until everything supports ZPREADY, and then go back to doing nothing for
> ZPREADY.
> 
> I'm amenable to a proposal that we *shouldn't* be doing nothing for SCSI
> PM and perhaps it should be plumbed into software PM, but it looks odd
> to me to have sofware PM for ZPODD but not for at least ZPREADY and
> probably for SCSI PM as well.
Well, ZPREADY is not a power state that we can program the ODD to
enter(figure 234 and table 323 of the SPEC), it servers more like an
information provided by ODD to host so that host does not need to do TUR
and then examine the sense code to decide if zero power ready status is
satisfied but simply query ODD if its current power state is ZPREADY.
So it's not that we program the device to go into ZPREADY power state
and the ODD's power will be omitted.
The benefit of a ZPREADY capable ODD is that, when we need to decide if
the ODD is in a zero power ready status, we can simply query the ODD by
issuing a GET_EVENT_STATUS_NOTIFICATION and check the returned power
class events, if it is in ZPREADY power state, then we can omit the
power. To support ZPREADY, we just need some change to the zpready
funtion, which currently uses sense code to check ZP ready status.
So this is my understanding of ZPREADY, and I don't see it as a total
different thing with ZPODD, it just changes the way how host senses the
zero power ready status. But if I was wrong, please kindly let me know,
thanks.
-Aaron
> 
> If we elect to do nothing about ZPREADY and SCSI PM, then I think ZPODD
> should be implemented in a way that emulates ZPREADY (i.e. as section
> 15.3.5 recommends).
> 
> James
> 
> 
^ permalink raw reply	[flat|nested] 85+ messages in thread 
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-11-26  8:27                       ` Aaron Lu
@ 2012-11-26 13:17                         ` James Bottomley
  2012-11-26 16:21                           ` Alan Stern
  2012-11-27  1:41                           ` Aaron Lu
  0 siblings, 2 replies; 85+ messages in thread
From: James Bottomley @ 2012-11-26 13:17 UTC (permalink / raw)
  To: Aaron Lu
  Cc: Rafael J. Wysocki, Tejun Heo, Jeff Garzik, Alan Stern, Jeff Wu,
	Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi
On Mon, 2012-11-26 at 16:27 +0800, Aaron Lu wrote:
> Well, ZPREADY is not a power state that we can program the ODD to
> enter(figure 234 and table 323 of the SPEC), it servers more like an
> information provided by ODD to host so that host does not need to do TUR
> and then examine the sense code to decide if zero power ready status is
> satisfied but simply query ODD if its current power state is ZPREADY.
> So it's not that we program the device to go into ZPREADY power state
> and the ODD's power will be omitted.
> 
> The benefit of a ZPREADY capable ODD is that, when we need to decide if
> the ODD is in a zero power ready status, we can simply query the ODD by
> issuing a GET_EVENT_STATUS_NOTIFICATION and check the returned power
> class events, if it is in ZPREADY power state, then we can omit the
> power. To support ZPREADY, we just need some change to the zpready
> funtion, which currently uses sense code to check ZP ready status.
> 
> So this is my understanding of ZPREADY, and I don't see it as a total
> different thing with ZPODD, it just changes the way how host senses the
> zero power ready status. But if I was wrong, please kindly let me know,
> thanks.
My understanding is that a ZPREADY device may be capable of internal
power down, meaning it doesn't necessarily need the host to omit the
power.  It depends what the difference is between Sleep and Off is
(they're deliberately left as implementation defined in the standard, Ch
16, but the conditions of sleep are pretty onerous, so it sounds like
most of the mechanics are powered down).
However, if you want to work it similarly to ZPODD, then the timeouts
automatically transitions to ZPREADY, the device issues an event, we
trap the event at the low level and omit power.
I'm also curious about driving sleep from autopm, since mode page timers
don't control the sleep transition.
James
^ permalink raw reply	[flat|nested] 85+ messages in thread 
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-11-26 13:17                         ` James Bottomley
@ 2012-11-26 16:21                           ` Alan Stern
  2012-11-26 19:15                             ` James Bottomley
  2012-11-27  1:41                           ` Aaron Lu
  1 sibling, 1 reply; 85+ messages in thread
From: Alan Stern @ 2012-11-26 16:21 UTC (permalink / raw)
  To: James Bottomley
  Cc: Aaron Lu, Rafael J. Wysocki, Tejun Heo, Jeff Garzik, Jeff Wu,
	Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi
On Mon, 26 Nov 2012, James Bottomley wrote:
> I'm also curious about driving sleep from autopm, since mode page timers
> don't control the sleep transition.
Is it feasible to do this the other way around?  That is, to drive 
runtime suspend by noticing when the device decides to put itself into 
a low-power state?
Alan Stern
^ permalink raw reply	[flat|nested] 85+ messages in thread 
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-11-26 16:21                           ` Alan Stern
@ 2012-11-26 19:15                             ` James Bottomley
  0 siblings, 0 replies; 85+ messages in thread
From: James Bottomley @ 2012-11-26 19:15 UTC (permalink / raw)
  To: Alan Stern
  Cc: Aaron Lu, Rafael J. Wysocki, Tejun Heo, Jeff Garzik, Jeff Wu,
	Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi
On Mon, 2012-11-26 at 11:21 -0500, Alan Stern wrote:
> On Mon, 26 Nov 2012, James Bottomley wrote:
> 
> > I'm also curious about driving sleep from autopm, since mode page timers
> > don't control the sleep transition.
> 
> Is it feasible to do this the other way around?  That is, to drive 
> runtime suspend by noticing when the device decides to put itself into 
> a low-power state?
Well, yes and no.  The spec is annoyingly (and deliberately) vague about
what the states actually mean.  The two states that the devices go into
because of timers is idle and standby.  The supposedly deeper low power
state of sleep can only be reached by sending a command to the device.
The design is that software (or HBA drivers) don't really need to notice
idle and standby; the device automatically manages transitions between
them depending on command activity.  For sleep, we do have to care (if
it actually makes some meaningful difference).
James
^ permalink raw reply	[flat|nested] 85+ messages in thread 
 
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-11-26 13:17                         ` James Bottomley
  2012-11-26 16:21                           ` Alan Stern
@ 2012-11-27  1:41                           ` Aaron Lu
  1 sibling, 0 replies; 85+ messages in thread
From: Aaron Lu @ 2012-11-27  1:41 UTC (permalink / raw)
  To: James Bottomley
  Cc: Rafael J. Wysocki, Tejun Heo, Jeff Garzik, Alan Stern, Jeff Wu,
	Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi
On 11/26/2012 09:17 PM, James Bottomley wrote:
> On Mon, 2012-11-26 at 16:27 +0800, Aaron Lu wrote:
>> Well, ZPREADY is not a power state that we can program the ODD to
>> enter(figure 234 and table 323 of the SPEC), it servers more like an
>> information provided by ODD to host so that host does not need to do TUR
>> and then examine the sense code to decide if zero power ready status is
>> satisfied but simply query ODD if its current power state is ZPREADY.
>> So it's not that we program the device to go into ZPREADY power state
>> and the ODD's power will be omitted.
>>
>> The benefit of a ZPREADY capable ODD is that, when we need to decide if
>> the ODD is in a zero power ready status, we can simply query the ODD by
>> issuing a GET_EVENT_STATUS_NOTIFICATION and check the returned power
>> class events, if it is in ZPREADY power state, then we can omit the
>> power. To support ZPREADY, we just need some change to the zpready
>> funtion, which currently uses sense code to check ZP ready status.
>>
>> So this is my understanding of ZPREADY, and I don't see it as a total
>> different thing with ZPODD, it just changes the way how host senses the
>> zero power ready status. But if I was wrong, please kindly let me know,
>> thanks.
> 
> My understanding is that a ZPREADY device may be capable of internal
> power down, meaning it doesn't necessarily need the host to omit the
> power.  It depends what the difference is between Sleep and Off is
> (they're deliberately left as implementation defined in the standard, Ch
> 16, but the conditions of sleep are pretty onerous, so it sounds like
> most of the mechanics are powered down).
I Agree that when the ODD is put to Sleep state, it may power down most
of the mechanics, good for power saving. The problem is, we have the 2
seconds poll, and since Sleep state can not process any command, we will
need to bring the ODD out of Sleep state every 2 seconds, is this
feasible? Please note that leaving Sleep state needs full initialization
of the ODD.
ZPODD system(ODD+platform) solves this problem with ACPI, when the ODD
is powered off and any event that may induce a media change event will
generate an ACPI interrupt, so we can stop the poll(though in whatever
way is still in discussion).
So I suppose we need to find a proper way to implement Sleep.
> 
> However, if you want to work it similarly to ZPODD, then the timeouts
> automatically transitions to ZPREADY, the device issues an event, we
> trap the event at the low level and omit power.
Yeah, I can do this. Except that I don't quite understand how the device
issues the event to host, by interrupt? My understanding is that, it
will issue this event to itself...and host still needs to use command
GET_EVENT_STATUS_NOTIFICATION to fetch the event, much the same way like
the media related events it emits.
> 
> I'm also curious about driving sleep from autopm, since mode page timers
> don't control the sleep transition.
I see. But we will need to work out a sensible way to put the ODD into
that power state, if at all possible.
Thanks,
Aaron
^ permalink raw reply	[flat|nested] 85+ messages in thread 
 
 
 
 
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-11-26  5:03                 ` James Bottomley
  2012-11-26  5:09                   ` Aaron Lu
@ 2012-11-28  0:51                   ` Rafael J. Wysocki
  2012-11-28  1:39                     ` Tejun Heo
  1 sibling, 1 reply; 85+ messages in thread
From: Rafael J. Wysocki @ 2012-11-28  0:51 UTC (permalink / raw)
  To: James Bottomley
  Cc: linux-pm, Tejun Heo, Aaron Lu, Jeff Garzik, Alan Stern, Jeff Wu,
	Aaron Lu, linux-ide, linux-scsi, linux-acpi
On Monday, November 26, 2012 09:03:11 AM James Bottomley wrote:
> On Mon, 2012-11-26 at 01:33 +0100, Rafael J. Wysocki wrote:
> > On Monday, November 19, 2012 03:06:51 PM James Bottomley wrote:
> > > > I really think we need a way for (auto)pm and event polling to talk to
> > > > each other so that autopm can tell event poll to sod off while pm is
> > > > in effect.  Trying to solve this from inside libata doesn't seem
> > > > right.  The problem, again, seems to be figuring out which hardware
> > > > device maps to which block device.  Hmmm... Any good ideas?
> > > 
> > > I've asked the PM people several times about this, because it's a real
> > > problem for almost everything:  PM needs some type of top to bottom
> > > stack view, which the layering isolation we have within storage really
> > > doesn't cope with well.  No real suggestion has been forthcoming.
> > 
> > Actually, I think that the particular case in question is really special
> > and the fact that there's the pollig loop that user space is involved in
> > doesn't make things more stratightforward.
> > 
> > And PM really doesn't need to see things top to bottom, but the polling
> > needs to know what happens in the PM land.  We need to be able to tell it
> > "from now on tell user space that there are no events here".  The question
> > is where to put that information so that it's accessible to all parts of the
> > stack involved.
> 
> Right, open to suggestions ...
Having considered that a bit more I'm now thinking that in fact the power state
the device is in at the moment doesn't really matter, so the polling code need
not really know what PM is doing.  What it needs to know is that the device
will generate a hardware event when something interesting happens, so it is not
necessary to poll it.
In this particular case it is related to power management (apparently, hardware
events will only be generated after the device has been put into ACPI D3cold,
or so Aaron seems to be claiming), but it need not be in general, at least in
principle.
It looks like we need an "event driven" flag that the can be set in the lower
layers and read by the upper layers.  I suppose this means it would need to be
in struct device, but not necessarily in the PM-specific part of it.
Thanks,
Rafael
-- 
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.
^ permalink raw reply	[flat|nested] 85+ messages in thread 
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-11-28  0:51                   ` Rafael J. Wysocki
@ 2012-11-28  1:39                     ` Tejun Heo
  2012-11-28  2:24                       ` Aaron Lu
                                         ` (2 more replies)
  0 siblings, 3 replies; 85+ messages in thread
From: Tejun Heo @ 2012-11-28  1:39 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: James Bottomley, linux-pm, Aaron Lu, Jeff Garzik, Alan Stern,
	Jeff Wu, Aaron Lu, linux-ide, linux-scsi, linux-acpi
Hey, Rafael.
On Wed, Nov 28, 2012 at 01:51:00AM +0100, Rafael J. Wysocki wrote:
> Having considered that a bit more I'm now thinking that in fact the power state
> the device is in at the moment doesn't really matter, so the polling code need
> not really know what PM is doing.  What it needs to know is that the device
> will generate a hardware event when something interesting happens, so it is not
> necessary to poll it.
> 
> In this particular case it is related to power management (apparently, hardware
> events will only be generated after the device has been put into ACPI D3cold,
> or so Aaron seems to be claiming), but it need not be in general, at least in
> principle.
> 
> It looks like we need an "event driven" flag that the can be set in the lower
> layers and read by the upper layers.  I suppose this means it would need to be
> in struct device, but not necessarily in the PM-specific part of it.
We already have that.  That's what gendisk->async_events is for (as
opposed to gendisk->events).  If all events are async_events, block
won't poll for events, but I'm not sure that's the golden bullet.
* None implements async_events yet and an API is missing -
  disk_check_events() - which is trivial to add, but it's the same
  story.  We'll need a mechanism to shoot up notification from libata
  to block layer.  It's admittedly easier to justify routing through
  SCSI tho.  So, we're mostly shifting the problem.  Given that async
  events is nice to have, so it isn't a bad idea.
* Still dunno much about zpodd but IIUC the notification from
  zero-power is via ACPI.  To advertise that the device doesn't need
  polling, it should also be able to do async notification while
  powered up, which isn't covered by zpodd but ATA async notification.
  So, ummm... that's another obstacle.  If zpodd requires the device
  to support ATA async notification, it might not be too bad tho.
Thanks.
-- 
tejun
^ permalink raw reply	[flat|nested] 85+ messages in thread
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-11-28  1:39                     ` Tejun Heo
@ 2012-11-28  2:24                       ` Aaron Lu
  2012-11-28  8:56                       ` James Bottomley
  2012-11-30  8:55                       ` Aaron Lu
  2 siblings, 0 replies; 85+ messages in thread
From: Aaron Lu @ 2012-11-28  2:24 UTC (permalink / raw)
  To: Tejun Heo
  Cc: Rafael J. Wysocki, James Bottomley, linux-pm, Jeff Garzik,
	Alan Stern, Jeff Wu, Aaron Lu, linux-ide, linux-scsi, linux-acpi
On 11/28/2012 09:39 AM, Tejun Heo wrote:
> Hey, Rafael.
> 
> On Wed, Nov 28, 2012 at 01:51:00AM +0100, Rafael J. Wysocki wrote:
>> Having considered that a bit more I'm now thinking that in fact the power state
>> the device is in at the moment doesn't really matter, so the polling code need
>> not really know what PM is doing.  What it needs to know is that the device
>> will generate a hardware event when something interesting happens, so it is not
>> necessary to poll it.
>>
>> In this particular case it is related to power management (apparently, hardware
>> events will only be generated after the device has been put into ACPI D3cold,
>> or so Aaron seems to be claiming), but it need not be in general, at least in
>> principle.
>>
>> It looks like we need an "event driven" flag that the can be set in the lower
>> layers and read by the upper layers.  I suppose this means it would need to be
>> in struct device, but not necessarily in the PM-specific part of it.
> 
> We already have that.  That's what gendisk->async_events is for (as
> opposed to gendisk->events).  If all events are async_events, block
> won't poll for events, but I'm not sure that's the golden bullet.
Right. For ZPODD, the problem is, during power up time, it needs
gendisk->events to be set to get poll; and after powered off, it will
need to clear the gendisk->events field so that the poll work will stop.
I'm not sure if the gendisk->async_events should be set here, as the
interrupt only says that user pressed the eject button for the tray type
ODD but he may not insert any disc. The whole point of the interrupt is
to re-power the ODD, it is not designed to give notification of media
change. This is my understanding of ZPODD.
> 
> * None implements async_events yet and an API is missing -
That's what I'm confused when reading the sata async support code, the
SCSI sr driver will unconditionally set gendisk->events, so how the sata
async can benefit? But this is another story.
>   disk_check_events() - which is trivial to add, but it's the same
>   story.  We'll need a mechanism to shoot up notification from libata
>   to block layer.  It's admittedly easier to justify routing through
>   SCSI tho.  So, we're mostly shifting the problem.  Given that async
>   events is nice to have, so it isn't a bad idea.
> 
> * Still dunno much about zpodd but IIUC the notification from
>   zero-power is via ACPI.  To advertise that the device doesn't need
Yes, when powered off, if users press the eject button of a tray type
ODD or inserts a disc of the slot type ODD, the SATA ODD will generate a
DEVICE ATTENTION signal, which will cause ACPI to emit an interrupt.
On my test system, when I insert a disc into the slot type ODD to wake
it up, it will continue to generate DEVICE ATTENTION signal, and thus,
ACPI interrupts are coming all the time if I do not disable the ACPI GPE
that controls the interrupt. I guess the behaviour is device by device,
and the SPEC only states what ODD needs to do when in powered off state.
And I don't think a tray type ODD will generate DEVICE ATTENTION signal
when its eject button is pressed after powered up, but Jeff Wu from AMD
may be able to test this behaviour as he has some tray type ODDs if this
is meaningful to know.
>   polling, it should also be able to do async notification while
>   powered up, which isn't covered by zpodd but ATA async notification.
Agree. For powered up media change notification, that's SATA async
notification's job.
>   So, ummm... that's another obstacle.  If zpodd requires the device
>   to support ATA async notification, it might not be too bad tho.
This doesn't seem to be the case, since ZPODD is for how ODD to get
notified when it is powered off, so there is no words stating if the ODD
should also support sata async notification.
Thanks,
Aaron
^ permalink raw reply	[flat|nested] 85+ messages in thread 
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-11-28  1:39                     ` Tejun Heo
  2012-11-28  2:24                       ` Aaron Lu
@ 2012-11-28  8:56                       ` James Bottomley
  2012-12-03  8:13                         ` Aaron Lu
  2012-11-30  8:55                       ` Aaron Lu
  2 siblings, 1 reply; 85+ messages in thread
From: James Bottomley @ 2012-11-28  8:56 UTC (permalink / raw)
  To: Tejun Heo
  Cc: Rafael J. Wysocki, linux-pm, Aaron Lu, Jeff Garzik, Alan Stern,
	Jeff Wu, Aaron Lu, linux-ide, linux-scsi, linux-acpi
On Tue, 2012-11-27 at 17:39 -0800, Tejun Heo wrote:
> Hey, Rafael.
> 
> On Wed, Nov 28, 2012 at 01:51:00AM +0100, Rafael J. Wysocki wrote:
> > Having considered that a bit more I'm now thinking that in fact the power state
> > the device is in at the moment doesn't really matter, so the polling code need
> > not really know what PM is doing.  What it needs to know is that the device
> > will generate a hardware event when something interesting happens, so it is not
> > necessary to poll it.
> > 
> > In this particular case it is related to power management (apparently, hardware
> > events will only be generated after the device has been put into ACPI D3cold,
> > or so Aaron seems to be claiming), but it need not be in general, at least in
> > principle.
> > 
> > It looks like we need an "event driven" flag that the can be set in the lower
> > layers and read by the upper layers.  I suppose this means it would need to be
> > in struct device, but not necessarily in the PM-specific part of it.
> 
> We already have that.  That's what gendisk->async_events is for (as
> opposed to gendisk->events).  If all events are async_events, block
> won't poll for events, but I'm not sure that's the golden bullet.
> 
> * None implements async_events yet and an API is missing -
>   disk_check_events() - which is trivial to add, but it's the same
>   story.  We'll need a mechanism to shoot up notification from libata
>   to block layer.  It's admittedly easier to justify routing through
>   SCSI tho.  So, we're mostly shifting the problem.  Given that async
>   events is nice to have, so it isn't a bad idea.
Could we drive it in the polling code?  As in, if we set a flag to say
we're event driven and please don't bother us, we could just respond to
the poll with the last known state (this would probably have to be in
SCSI somewhere since most polls are Test Unit Readys).  That way ZPODD
sets this flag when the device powers down and unsets it when it powers
up.
James
> * Still dunno much about zpodd but IIUC the notification from
>   zero-power is via ACPI.  To advertise that the device doesn't need
>   polling, it should also be able to do async notification while
>   powered up, which isn't covered by zpodd but ATA async notification.
>   So, ummm... that's another obstacle.  If zpodd requires the device
>   to support ATA async notification, it might not be too bad tho.
^ permalink raw reply	[flat|nested] 85+ messages in thread 
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-11-28  8:56                       ` James Bottomley
@ 2012-12-03  8:13                         ` Aaron Lu
  2012-12-03  8:25                           ` James Bottomley
  0 siblings, 1 reply; 85+ messages in thread
From: Aaron Lu @ 2012-12-03  8:13 UTC (permalink / raw)
  To: James Bottomley
  Cc: Tejun Heo, Rafael J. Wysocki, linux-pm, Jeff Garzik, Alan Stern,
	Jeff Wu, Aaron Lu, linux-ide, linux-scsi, linux-acpi
On Wed, Nov 28, 2012 at 08:56:09AM +0000, James Bottomley wrote:
> On Tue, 2012-11-27 at 17:39 -0800, Tejun Heo wrote:
> > Hey, Rafael.
> > 
> > On Wed, Nov 28, 2012 at 01:51:00AM +0100, Rafael J. Wysocki wrote:
> > > Having considered that a bit more I'm now thinking that in fact the power state
> > > the device is in at the moment doesn't really matter, so the polling code need
> > > not really know what PM is doing.  What it needs to know is that the device
> > > will generate a hardware event when something interesting happens, so it is not
> > > necessary to poll it.
> > > 
> > > In this particular case it is related to power management (apparently, hardware
> > > events will only be generated after the device has been put into ACPI D3cold,
> > > or so Aaron seems to be claiming), but it need not be in general, at least in
> > > principle.
> > > 
> > > It looks like we need an "event driven" flag that the can be set in the lower
> > > layers and read by the upper layers.  I suppose this means it would need to be
> > > in struct device, but not necessarily in the PM-specific part of it.
> > 
> > We already have that.  That's what gendisk->async_events is for (as
> > opposed to gendisk->events).  If all events are async_events, block
> > won't poll for events, but I'm not sure that's the golden bullet.
> > 
> > * None implements async_events yet and an API is missing -
> >   disk_check_events() - which is trivial to add, but it's the same
> >   story.  We'll need a mechanism to shoot up notification from libata
> >   to block layer.  It's admittedly easier to justify routing through
> >   SCSI tho.  So, we're mostly shifting the problem.  Given that async
> >   events is nice to have, so it isn't a bad idea.
> 
> Could we drive it in the polling code?  As in, if we set a flag to say
> we're event driven and please don't bother us, we could just respond to
> the poll with the last known state (this would probably have to be in
> SCSI somewhere since most polls are Test Unit Readys).  That way ZPODD
> sets this flag when the device powers down and unsets it when it powers
> up.
Does it mean I can do something like this:
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 5fc97d2..219820c 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -578,7 +578,10 @@ static unsigned int sr_block_check_events(struct gendisk *disk,
 					  unsigned int clearing)
 {
 	struct scsi_cd *cd = scsi_cd(disk);
-	return cdrom_check_events(&cd->cdi, clearing);
+	if (!cd->device->event_driven)
+		return cdrom_check_events(&cd->cdi, clearing);
+	else
+		return 0;
 }
 
 static int sr_block_revalidate_disk(struct gendisk *disk)
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index e65c62e..1756151 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -160,6 +160,7 @@ struct scsi_device {
 	unsigned can_power_off:1; /* Device supports runtime power off */
 	unsigned wce_default_on:1;	/* Cache is ON by default */
 	unsigned no_dif:1;	/* T10 PI (DIF) should be disabled */
+	unsigned event_driven:1; /* No need to poll the device */
 
 	DECLARE_BITMAP(supported_events, SDEV_EVT_MAXBITS); /* supported events */
 	struct list_head event_list;	/* asserted events */
Then when ZPODD is powered off, it will set this flag; and unset it when
it is powered up.
Thanks,
Aaron
> 
> James
> 
> > * Still dunno much about zpodd but IIUC the notification from
> >   zero-power is via ACPI.  To advertise that the device doesn't need
> >   polling, it should also be able to do async notification while
> >   powered up, which isn't covered by zpodd but ATA async notification.
> >   So, ummm... that's another obstacle.  If zpodd requires the device
> >   to support ATA async notification, it might not be too bad tho.
> 
> 
> 
^ permalink raw reply related	[flat|nested] 85+ messages in thread
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-12-03  8:13                         ` Aaron Lu
@ 2012-12-03  8:25                           ` James Bottomley
  2012-12-03  8:59                             ` Aaron Lu
  2012-12-03 16:23                             ` Tejun Heo
  0 siblings, 2 replies; 85+ messages in thread
From: James Bottomley @ 2012-12-03  8:25 UTC (permalink / raw)
  To: Aaron Lu
  Cc: Tejun Heo, Rafael J. Wysocki, linux-pm, Jeff Garzik, Alan Stern,
	Jeff Wu, Aaron Lu, linux-ide, linux-scsi, linux-acpi
On Mon, 2012-12-03 at 16:13 +0800, Aaron Lu wrote:
> On Wed, Nov 28, 2012 at 08:56:09AM +0000, James Bottomley wrote:
> > On Tue, 2012-11-27 at 17:39 -0800, Tejun Heo wrote:
> > > Hey, Rafael.
> > > 
> > > On Wed, Nov 28, 2012 at 01:51:00AM +0100, Rafael J. Wysocki wrote:
> > > > Having considered that a bit more I'm now thinking that in fact the power state
> > > > the device is in at the moment doesn't really matter, so the polling code need
> > > > not really know what PM is doing.  What it needs to know is that the device
> > > > will generate a hardware event when something interesting happens, so it is not
> > > > necessary to poll it.
> > > > 
> > > > In this particular case it is related to power management (apparently, hardware
> > > > events will only be generated after the device has been put into ACPI D3cold,
> > > > or so Aaron seems to be claiming), but it need not be in general, at least in
> > > > principle.
> > > > 
> > > > It looks like we need an "event driven" flag that the can be set in the lower
> > > > layers and read by the upper layers.  I suppose this means it would need to be
> > > > in struct device, but not necessarily in the PM-specific part of it.
> > > 
> > > We already have that.  That's what gendisk->async_events is for (as
> > > opposed to gendisk->events).  If all events are async_events, block
> > > won't poll for events, but I'm not sure that's the golden bullet.
> > > 
> > > * None implements async_events yet and an API is missing -
> > >   disk_check_events() - which is trivial to add, but it's the same
> > >   story.  We'll need a mechanism to shoot up notification from libata
> > >   to block layer.  It's admittedly easier to justify routing through
> > >   SCSI tho.  So, we're mostly shifting the problem.  Given that async
> > >   events is nice to have, so it isn't a bad idea.
> > 
> > Could we drive it in the polling code?  As in, if we set a flag to say
> > we're event driven and please don't bother us, we could just respond to
> > the poll with the last known state (this would probably have to be in
> > SCSI somewhere since most polls are Test Unit Readys).  That way ZPODD
> > sets this flag when the device powers down and unsets it when it powers
> > up.
> 
> Does it mean I can do something like this:
> 
> diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
> index 5fc97d2..219820c 100644
> --- a/drivers/scsi/sr.c
> +++ b/drivers/scsi/sr.c
> @@ -578,7 +578,10 @@ static unsigned int sr_block_check_events(struct gendisk *disk,
>  					  unsigned int clearing)
>  {
>  	struct scsi_cd *cd = scsi_cd(disk);
> -	return cdrom_check_events(&cd->cdi, clearing);
> +	if (!cd->device->event_driven)
> +		return cdrom_check_events(&cd->cdi, clearing);
> +	else
> +		return 0;
>  }
>  
>  static int sr_block_revalidate_disk(struct gendisk *disk)
> diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
> index e65c62e..1756151 100644
> --- a/include/scsi/scsi_device.h
> +++ b/include/scsi/scsi_device.h
> @@ -160,6 +160,7 @@ struct scsi_device {
>  	unsigned can_power_off:1; /* Device supports runtime power off */
>  	unsigned wce_default_on:1;	/* Cache is ON by default */
>  	unsigned no_dif:1;	/* T10 PI (DIF) should be disabled */
> +	unsigned event_driven:1; /* No need to poll the device */
>  
>  	DECLARE_BITMAP(supported_events, SDEV_EVT_MAXBITS); /* supported events */
>  	struct list_head event_list;	/* asserted events */
Yes, but if we can get away with doing that, it should be in genhd
because it's completely generic.
I was imagining we'd have to fake the reply to the test unit ready or
some other commands, which is why it would need to be in sr.c
The check events code is Tejun's baby, if he's OK with it then just do
it in genhd.c
> Then when ZPODD is powered off, it will set this flag; and unset it when
> it is powered up.
Right.
James
^ permalink raw reply	[flat|nested] 85+ messages in thread
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-12-03  8:25                           ` James Bottomley
@ 2012-12-03  8:59                             ` Aaron Lu
  2012-12-03 16:23                             ` Tejun Heo
  1 sibling, 0 replies; 85+ messages in thread
From: Aaron Lu @ 2012-12-03  8:59 UTC (permalink / raw)
  To: James Bottomley, Tejun Heo
  Cc: Rafael J. Wysocki, linux-pm, Jeff Garzik, Alan Stern, Jeff Wu,
	Aaron Lu, linux-ide, linux-scsi, linux-acpi
On Mon, Dec 03, 2012 at 08:25:43AM +0000, James Bottomley wrote:
> On Mon, 2012-12-03 at 16:13 +0800, Aaron Lu wrote:
> > On Wed, Nov 28, 2012 at 08:56:09AM +0000, James Bottomley wrote:
> > > On Tue, 2012-11-27 at 17:39 -0800, Tejun Heo wrote:
> > > > Hey, Rafael.
> > > > 
> > > > On Wed, Nov 28, 2012 at 01:51:00AM +0100, Rafael J. Wysocki wrote:
> > > > > Having considered that a bit more I'm now thinking that in fact the power state
> > > > > the device is in at the moment doesn't really matter, so the polling code need
> > > > > not really know what PM is doing.  What it needs to know is that the device
> > > > > will generate a hardware event when something interesting happens, so it is not
> > > > > necessary to poll it.
> > > > > 
> > > > > In this particular case it is related to power management (apparently, hardware
> > > > > events will only be generated after the device has been put into ACPI D3cold,
> > > > > or so Aaron seems to be claiming), but it need not be in general, at least in
> > > > > principle.
> > > > > 
> > > > > It looks like we need an "event driven" flag that the can be set in the lower
> > > > > layers and read by the upper layers.  I suppose this means it would need to be
> > > > > in struct device, but not necessarily in the PM-specific part of it.
> > > > 
> > > > We already have that.  That's what gendisk->async_events is for (as
> > > > opposed to gendisk->events).  If all events are async_events, block
> > > > won't poll for events, but I'm not sure that's the golden bullet.
> > > > 
> > > > * None implements async_events yet and an API is missing -
> > > >   disk_check_events() - which is trivial to add, but it's the same
> > > >   story.  We'll need a mechanism to shoot up notification from libata
> > > >   to block layer.  It's admittedly easier to justify routing through
> > > >   SCSI tho.  So, we're mostly shifting the problem.  Given that async
> > > >   events is nice to have, so it isn't a bad idea.
> > > 
> > > Could we drive it in the polling code?  As in, if we set a flag to say
> > > we're event driven and please don't bother us, we could just respond to
> > > the poll with the last known state (this would probably have to be in
> > > SCSI somewhere since most polls are Test Unit Readys).  That way ZPODD
> > > sets this flag when the device powers down and unsets it when it powers
> > > up.
> > 
> > Does it mean I can do something like this:
> > 
> > diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
> > index 5fc97d2..219820c 100644
> > --- a/drivers/scsi/sr.c
> > +++ b/drivers/scsi/sr.c
> > @@ -578,7 +578,10 @@ static unsigned int sr_block_check_events(struct gendisk *disk,
> >  					  unsigned int clearing)
> >  {
> >  	struct scsi_cd *cd = scsi_cd(disk);
> > -	return cdrom_check_events(&cd->cdi, clearing);
> > +	if (!cd->device->event_driven)
> > +		return cdrom_check_events(&cd->cdi, clearing);
> > +	else
> > +		return 0;
> >  }
> >  
> >  static int sr_block_revalidate_disk(struct gendisk *disk)
> > diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
> > index e65c62e..1756151 100644
> > --- a/include/scsi/scsi_device.h
> > +++ b/include/scsi/scsi_device.h
> > @@ -160,6 +160,7 @@ struct scsi_device {
> >  	unsigned can_power_off:1; /* Device supports runtime power off */
> >  	unsigned wce_default_on:1;	/* Cache is ON by default */
> >  	unsigned no_dif:1;	/* T10 PI (DIF) should be disabled */
> > +	unsigned event_driven:1; /* No need to poll the device */
> >  
> >  	DECLARE_BITMAP(supported_events, SDEV_EVT_MAXBITS); /* supported events */
> >  	struct list_head event_list;	/* asserted events */
> 
> Yes, but if we can get away with doing that, it should be in genhd
> because it's completely generic.
> 
> I was imagining we'd have to fake the reply to the test unit ready or
> some other commands, which is why it would need to be in sr.c
> 
> The check events code is Tejun's baby, if he's OK with it then just do
> it in genhd.c
I agree that it better be done in genhd, the only concern is, in that
case, this flag will need to be in struct device. I'm not sure if this
is acceptible though, since the whole events checking thing is for block
based devices only.
Ideally, it should be in struct disk_events, but I don't see a way for
libata to access that structure... so any suggestion of doing this? or
it is OK to add such a field to struct device?
Thanks,
Aaron
> 
> > Then when ZPODD is powered off, it will set this flag; and unset it when
> > it is powered up.
> 
> Right.
> 
> James
> 
> 
^ permalink raw reply	[flat|nested] 85+ messages in thread
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-12-03  8:25                           ` James Bottomley
  2012-12-03  8:59                             ` Aaron Lu
@ 2012-12-03 16:23                             ` Tejun Heo
  2012-12-03 18:56                               ` Jeff Garzik
                                                 ` (2 more replies)
  1 sibling, 3 replies; 85+ messages in thread
From: Tejun Heo @ 2012-12-03 16:23 UTC (permalink / raw)
  To: James Bottomley
  Cc: Aaron Lu, Rafael J. Wysocki, linux-pm, Jeff Garzik, Alan Stern,
	Jeff Wu, Aaron Lu, linux-ide, linux-scsi, linux-acpi
Hello, James.
On Mon, Dec 03, 2012 at 08:25:43AM +0000, James Bottomley wrote:
> > diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
> > index e65c62e..1756151 100644
> > --- a/include/scsi/scsi_device.h
> > +++ b/include/scsi/scsi_device.h
> > @@ -160,6 +160,7 @@ struct scsi_device {
> >  	unsigned can_power_off:1; /* Device supports runtime power off */
> >  	unsigned wce_default_on:1;	/* Cache is ON by default */
> >  	unsigned no_dif:1;	/* T10 PI (DIF) should be disabled */
> > +	unsigned event_driven:1; /* No need to poll the device */
> >  
> >  	DECLARE_BITMAP(supported_events, SDEV_EVT_MAXBITS); /* supported events */
> >  	struct list_head event_list;	/* asserted events */
> 
> Yes, but if we can get away with doing that, it should be in genhd
> because it's completely generic.
> 
> I was imagining we'd have to fake the reply to the test unit ready or
> some other commands, which is why it would need to be in sr.c
> 
> The check events code is Tejun's baby, if he's OK with it then just do
> it in genhd.c
The problem here is there's no easy to reach genhd from libata (or the
other way around) without going through sr.  I think we're gonna have
to have something in sr one way or the other.
Thanks.
-- 
tejun
^ permalink raw reply	[flat|nested] 85+ messages in thread
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-12-03 16:23                             ` Tejun Heo
@ 2012-12-03 18:56                               ` Jeff Garzik
  2012-12-04  5:04                                 ` Aaron Lu
  2012-12-04 12:11                               ` James Bottomley
  2012-12-20  6:07                               ` Aaron Lu
  2 siblings, 1 reply; 85+ messages in thread
From: Jeff Garzik @ 2012-12-03 18:56 UTC (permalink / raw)
  To: Tejun Heo
  Cc: James Bottomley, Aaron Lu, Rafael J. Wysocki, linux-pm,
	Alan Stern, Jeff Wu, Aaron Lu, linux-ide, linux-scsi, linux-acpi
On 12/03/2012 11:23 AM, Tejun Heo wrote:
> Hello, James.
>
> On Mon, Dec 03, 2012 at 08:25:43AM +0000, James Bottomley wrote:
>>> diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
>>> index e65c62e..1756151 100644
>>> --- a/include/scsi/scsi_device.h
>>> +++ b/include/scsi/scsi_device.h
>>> @@ -160,6 +160,7 @@ struct scsi_device {
>>>   	unsigned can_power_off:1; /* Device supports runtime power off */
>>>   	unsigned wce_default_on:1;	/* Cache is ON by default */
>>>   	unsigned no_dif:1;	/* T10 PI (DIF) should be disabled */
>>> +	unsigned event_driven:1; /* No need to poll the device */
>>>
>>>   	DECLARE_BITMAP(supported_events, SDEV_EVT_MAXBITS); /* supported events */
>>>   	struct list_head event_list;	/* asserted events */
>>
>> Yes, but if we can get away with doing that, it should be in genhd
>> because it's completely generic.
>>
>> I was imagining we'd have to fake the reply to the test unit ready or
>> some other commands, which is why it would need to be in sr.c
>>
>> The check events code is Tejun's baby, if he's OK with it then just do
>> it in genhd.c
>
> The problem here is there's no easy to reach genhd from libata (or the
> other way around) without going through sr.  I think we're gonna have
> to have something in sr one way or the other.
  ...which is precisely as I said when v1 of this ZPODD patchset appeared.
sr modifications cannot be avoided.
	Jeff
^ permalink raw reply	[flat|nested] 85+ messages in thread
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-12-03 18:56                               ` Jeff Garzik
@ 2012-12-04  5:04                                 ` Aaron Lu
  0 siblings, 0 replies; 85+ messages in thread
From: Aaron Lu @ 2012-12-04  5:04 UTC (permalink / raw)
  To: Jeff Garzik, Tejun Heo, James Bottomley
  Cc: Rafael J. Wysocki, linux-pm, Alan Stern, Jeff Wu, Aaron Lu,
	linux-ide, linux-scsi, linux-acpi
On 12/04/2012 02:56 AM, Jeff Garzik wrote:
> On 12/03/2012 11:23 AM, Tejun Heo wrote:
>> Hello, James.
>>
>> On Mon, Dec 03, 2012 at 08:25:43AM +0000, James Bottomley wrote:
>>>> diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
>>>> index e65c62e..1756151 100644
>>>> --- a/include/scsi/scsi_device.h
>>>> +++ b/include/scsi/scsi_device.h
>>>> @@ -160,6 +160,7 @@ struct scsi_device {
>>>>   	unsigned can_power_off:1; /* Device supports runtime power off */
>>>>   	unsigned wce_default_on:1;	/* Cache is ON by default */
>>>>   	unsigned no_dif:1;	/* T10 PI (DIF) should be disabled */
>>>> +	unsigned event_driven:1; /* No need to poll the device */
>>>>
>>>>   	DECLARE_BITMAP(supported_events, SDEV_EVT_MAXBITS); /* supported events */
>>>>   	struct list_head event_list;	/* asserted events */
>>>
>>> Yes, but if we can get away with doing that, it should be in genhd
>>> because it's completely generic.
>>>
>>> I was imagining we'd have to fake the reply to the test unit ready or
>>> some other commands, which is why it would need to be in sr.c
>>>
>>> The check events code is Tejun's baby, if he's OK with it then just do
>>> it in genhd.c
>>
>> The problem here is there's no easy to reach genhd from libata (or the
>> other way around) without going through sr.  I think we're gonna have
>> to have something in sr one way or the other.
> 
>   ...which is precisely as I said when v1 of this ZPODD patchset appeared.
> 
> sr modifications cannot be avoided.
So I'm gonna use the above code to silence the poll when ODD is powered
off. I suppose everybody is OK with this, right? James, please let me
know if you disagree.
Thanks,
Aaron
> 
> 	Jeff
> 
> 
> 
> 
^ permalink raw reply	[flat|nested] 85+ messages in thread
 
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-12-03 16:23                             ` Tejun Heo
  2012-12-03 18:56                               ` Jeff Garzik
@ 2012-12-04 12:11                               ` James Bottomley
  2012-12-07  6:13                                 ` Aaron Lu
  2012-12-20  6:07                               ` Aaron Lu
  2 siblings, 1 reply; 85+ messages in thread
From: James Bottomley @ 2012-12-04 12:11 UTC (permalink / raw)
  To: Tejun Heo
  Cc: Aaron Lu, Rafael J. Wysocki, linux-pm, Jeff Garzik, Alan Stern,
	Jeff Wu, Aaron Lu, linux-ide, linux-scsi, linux-acpi
On Mon, 2012-12-03 at 08:23 -0800, Tejun Heo wrote:
> Hello, James.
> 
> On Mon, Dec 03, 2012 at 08:25:43AM +0000, James Bottomley wrote:
> > > diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
> > > index e65c62e..1756151 100644
> > > --- a/include/scsi/scsi_device.h
> > > +++ b/include/scsi/scsi_device.h
> > > @@ -160,6 +160,7 @@ struct scsi_device {
> > >  	unsigned can_power_off:1; /* Device supports runtime power off */
> > >  	unsigned wce_default_on:1;	/* Cache is ON by default */
> > >  	unsigned no_dif:1;	/* T10 PI (DIF) should be disabled */
> > > +	unsigned event_driven:1; /* No need to poll the device */
> > >  
> > >  	DECLARE_BITMAP(supported_events, SDEV_EVT_MAXBITS); /* supported events */
> > >  	struct list_head event_list;	/* asserted events */
> > 
> > Yes, but if we can get away with doing that, it should be in genhd
> > because it's completely generic.
> > 
> > I was imagining we'd have to fake the reply to the test unit ready or
> > some other commands, which is why it would need to be in sr.c
> > 
> > The check events code is Tejun's baby, if he's OK with it then just do
> > it in genhd.c
> 
> The problem here is there's no easy to reach genhd from libata (or the
> other way around) without going through sr.  I think we're gonna have
> to have something in sr one way or the other.
Can't we do that via an event? It's a bit clunky because we need the
callback in the layer that sees the sdev, which is libata-scsi, we just
need an analogue of ata_scsi_media_change_notify, but ignoring and
allowing polling is essentially event driven as well, so it should all
work.  We'll need a listener in genhd, which might be trickier.
This may also work as the more generic route for stuff where we can't
connect the bottom to the top of the stack (which looks like a problem
we'll begin wrestling with a lot now).
James
^ permalink raw reply	[flat|nested] 85+ messages in thread
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-12-04 12:11                               ` James Bottomley
@ 2012-12-07  6:13                                 ` Aaron Lu
  2012-12-10  3:26                                   ` Aaron Lu
  0 siblings, 1 reply; 85+ messages in thread
From: Aaron Lu @ 2012-12-07  6:13 UTC (permalink / raw)
  To: James Bottomley, Tejun Heo
  Cc: Rafael J. Wysocki, linux-pm, Jeff Garzik, Alan Stern, Jeff Wu,
	Aaron Lu, linux-ide, linux-scsi, linux-acpi
On 12/04/2012 08:11 PM, James Bottomley wrote:
> On Mon, 2012-12-03 at 08:23 -0800, Tejun Heo wrote:
>> Hello, James.
>>
>> On Mon, Dec 03, 2012 at 08:25:43AM +0000, James Bottomley wrote:
>>>> diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
>>>> index e65c62e..1756151 100644
>>>> --- a/include/scsi/scsi_device.h
>>>> +++ b/include/scsi/scsi_device.h
>>>> @@ -160,6 +160,7 @@ struct scsi_device {
>>>>  	unsigned can_power_off:1; /* Device supports runtime power off */
>>>>  	unsigned wce_default_on:1;	/* Cache is ON by default */
>>>>  	unsigned no_dif:1;	/* T10 PI (DIF) should be disabled */
>>>> +	unsigned event_driven:1; /* No need to poll the device */
>>>>  
>>>>  	DECLARE_BITMAP(supported_events, SDEV_EVT_MAXBITS); /* supported events */
>>>>  	struct list_head event_list;	/* asserted events */
>>>
>>> Yes, but if we can get away with doing that, it should be in genhd
>>> because it's completely generic.
>>>
>>> I was imagining we'd have to fake the reply to the test unit ready or
>>> some other commands, which is why it would need to be in sr.c
>>>
>>> The check events code is Tejun's baby, if he's OK with it then just do
>>> it in genhd.c
>>
>> The problem here is there's no easy to reach genhd from libata (or the
>> other way around) without going through sr.  I think we're gonna have
>> to have something in sr one way or the other.
> 
> Can't we do that via an event? It's a bit clunky because we need the
> callback in the layer that sees the sdev, which is libata-scsi, we just
> need an analogue of ata_scsi_media_change_notify, but ignoring and
> allowing polling is essentially event driven as well, so it should all
> work.  We'll need a listener in genhd, which might be trickier.
Hi Tejun,
Do you have any comments on James' suggestion? I want to know your
opinion, thanks.
Hi James,
Do you mean we start a thread in genhd that listen to events, so that
some driver can send an event to genhd about if the device should be
polled? I'm thinking where to store such information. If store it in
struct disk_events, then we will need to know which gendisk is for
the device, but how? Maybe by loop scan all gendisk's driverfs_dev?
If store it in struct device, then we do not need to send the event but
just set a flag in sturct device in libata, and let genhd check this
flag when poll. So I don't quite understand this, can you please
elaborate? Thanks.
Thanks,
Aaron
> 
> This may also work as the more generic route for stuff where we can't
> connect the bottom to the top of the stack (which looks like a problem
> we'll begin wrestling with a lot now).
> 
> James
> 
> 
^ permalink raw reply	[flat|nested] 85+ messages in thread
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-12-07  6:13                                 ` Aaron Lu
@ 2012-12-10  3:26                                   ` Aaron Lu
  2012-12-11  5:10                                     ` Tejun Heo
  0 siblings, 1 reply; 85+ messages in thread
From: Aaron Lu @ 2012-12-10  3:26 UTC (permalink / raw)
  To: Tejun Heo
  Cc: James Bottomley, Rafael J. Wysocki, linux-pm, Jeff Garzik,
	Alan Stern, Jeff Wu, linux-ide, linux-scsi, linux-acpi
On 12/07/2012 02:13 PM, Aaron Lu wrote:
> On 12/04/2012 08:11 PM, James Bottomley wrote:
>> On Mon, 2012-12-03 at 08:23 -0800, Tejun Heo wrote:
>>> Hello, James.
>>>
>>> On Mon, Dec 03, 2012 at 08:25:43AM +0000, James Bottomley wrote:
>>>>> diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
>>>>> index e65c62e..1756151 100644
>>>>> --- a/include/scsi/scsi_device.h
>>>>> +++ b/include/scsi/scsi_device.h
>>>>> @@ -160,6 +160,7 @@ struct scsi_device {
>>>>>  	unsigned can_power_off:1; /* Device supports runtime power off */
>>>>>  	unsigned wce_default_on:1;	/* Cache is ON by default */
>>>>>  	unsigned no_dif:1;	/* T10 PI (DIF) should be disabled */
>>>>> +	unsigned event_driven:1; /* No need to poll the device */
>>>>>  
>>>>>  	DECLARE_BITMAP(supported_events, SDEV_EVT_MAXBITS); /* supported events */
>>>>>  	struct list_head event_list;	/* asserted events */
>>>>
>>>> Yes, but if we can get away with doing that, it should be in genhd
>>>> because it's completely generic.
>>>>
>>>> I was imagining we'd have to fake the reply to the test unit ready or
>>>> some other commands, which is why it would need to be in sr.c
>>>>
>>>> The check events code is Tejun's baby, if he's OK with it then just do
>>>> it in genhd.c
>>>
>>> The problem here is there's no easy to reach genhd from libata (or the
>>> other way around) without going through sr.  I think we're gonna have
>>> to have something in sr one way or the other.
>>
>> Can't we do that via an event? It's a bit clunky because we need the
>> callback in the layer that sees the sdev, which is libata-scsi, we just
>> need an analogue of ata_scsi_media_change_notify, but ignoring and
>> allowing polling is essentially event driven as well, so it should all
>> work.  We'll need a listener in genhd, which might be trickier.
> 
> Hi Tejun,
> 
> Do you have any comments on James' suggestion? I want to know your
> opinion, thanks.
Hi Tejun,
A colleague of mine reminded me that it's impolite to write something
like this, and here is my apology. I didn't mean to be rude, I'm sorry
for this if it made you feel uncomfortable.
If possible, can you please shed some light on this problem and James'
suggestion? I need your opinions to make progress, thanks.
-Aaron
> 
> Hi James,
> 
> Do you mean we start a thread in genhd that listen to events, so that
> some driver can send an event to genhd about if the device should be
> polled? I'm thinking where to store such information. If store it in
> struct disk_events, then we will need to know which gendisk is for
> the device, but how? Maybe by loop scan all gendisk's driverfs_dev?
> If store it in struct device, then we do not need to send the event but
> just set a flag in sturct device in libata, and let genhd check this
> flag when poll. So I don't quite understand this, can you please
> elaborate? Thanks.
> 
> Thanks,
> Aaron
> 
>>
>> This may also work as the more generic route for stuff where we can't
>> connect the bottom to the top of the stack (which looks like a problem
>> we'll begin wrestling with a lot now).
>>
>> James
>>
>>
> 
^ permalink raw reply	[flat|nested] 85+ messages in thread
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-12-10  3:26                                   ` Aaron Lu
@ 2012-12-11  5:10                                     ` Tejun Heo
  2012-12-18  8:30                                       ` Aaron Lu
  0 siblings, 1 reply; 85+ messages in thread
From: Tejun Heo @ 2012-12-11  5:10 UTC (permalink / raw)
  To: Aaron Lu
  Cc: James Bottomley, Rafael J. Wysocki, linux-pm, Jeff Garzik,
	Alan Stern, Jeff Wu, linux-ide, linux-scsi, linux-acpi
Hello, guys.
On Mon, Dec 10, 2012 at 11:26:07AM +0800, Aaron Lu wrote:
> >>> The problem here is there's no easy to reach genhd from libata (or the
> >>> other way around) without going through sr.  I think we're gonna have
> >>> to have something in sr one way or the other.
> >>
> >> Can't we do that via an event? It's a bit clunky because we need the
> >> callback in the layer that sees the sdev, which is libata-scsi, we just
> >> need an analogue of ata_scsi_media_change_notify, but ignoring and
> >> allowing polling is essentially event driven as well, so it should all
> >> work.  We'll need a listener in genhd, which might be trickier.
I'm not really following what you mean.  Can you please elaborate?
One way or the other, doesn't the notification have to bubble up
through SCSI?
> A colleague of mine reminded me that it's impolite to write something
> like this, and here is my apology. I didn't mean to be rude, I'm sorry
> for this if it made you feel uncomfortable.
Oh, no, it's not rude at all.  I was just being distracted w/ other
stuff and lazy.  Sorry about that.
Thanks.
-- 
tejun
^ permalink raw reply	[flat|nested] 85+ messages in thread 
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-12-11  5:10                                     ` Tejun Heo
@ 2012-12-18  8:30                                       ` Aaron Lu
  0 siblings, 0 replies; 85+ messages in thread
From: Aaron Lu @ 2012-12-18  8:30 UTC (permalink / raw)
  To: Tejun Heo
  Cc: James Bottomley, Rafael J. Wysocki, linux-pm, Jeff Garzik,
	Alan Stern, Jeff Wu, linux-ide, linux-scsi, linux-acpi
Hi Everyone,
Due to lack of information, I think I'll go ahead and pick up a simple
solution for this(i.e. the code I attached previously to set a flag
event_driven in scsi_device to let sr skip events poll) and send a new
series out for you to review. After all, I can't wait forever...
Please feel free to let me know if you don't think I should send a new
series with the above mentioned solution.
Thanks a lot for your(all of you) kind help and comments.
-Aaron
On 12/11/2012 01:10 PM, Tejun Heo wrote:
> Hello, guys.
> 
> On Mon, Dec 10, 2012 at 11:26:07AM +0800, Aaron Lu wrote:
>>>>> The problem here is there's no easy to reach genhd from libata (or the
>>>>> other way around) without going through sr.  I think we're gonna have
>>>>> to have something in sr one way or the other.
>>>>
>>>> Can't we do that via an event? It's a bit clunky because we need the
>>>> callback in the layer that sees the sdev, which is libata-scsi, we just
>>>> need an analogue of ata_scsi_media_change_notify, but ignoring and
>>>> allowing polling is essentially event driven as well, so it should all
>>>> work.  We'll need a listener in genhd, which might be trickier.
> 
> I'm not really following what you mean.  Can you please elaborate?
> One way or the other, doesn't the notification have to bubble up
> through SCSI?
> 
>> A colleague of mine reminded me that it's impolite to write something
>> like this, and here is my apology. I didn't mean to be rude, I'm sorry
>> for this if it made you feel uncomfortable.
> 
> Oh, no, it's not rude at all. I was just being distracted w/ other
> stuff and lazy.  Sorry about that.
> 
> Thanks.
> 
^ permalink raw reply	[flat|nested] 85+ messages in thread 
 
 
 
 
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-12-03 16:23                             ` Tejun Heo
  2012-12-03 18:56                               ` Jeff Garzik
  2012-12-04 12:11                               ` James Bottomley
@ 2012-12-20  6:07                               ` Aaron Lu
  2012-12-25 17:17                                 ` Tejun Heo
  2 siblings, 1 reply; 85+ messages in thread
From: Aaron Lu @ 2012-12-20  6:07 UTC (permalink / raw)
  To: Tejun Heo
  Cc: James Bottomley, Rafael J. Wysocki, linux-pm, Jeff Garzik,
	Alan Stern, Jeff Wu, Aaron Lu, linux-ide, linux-scsi, linux-acpi
Hi Tejun,
On 12/04/2012 12:23 AM, Tejun Heo wrote:
> Hello, James.
> 
> On Mon, Dec 03, 2012 at 08:25:43AM +0000, James Bottomley wrote:
>>> diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
>>> index e65c62e..1756151 100644
>>> --- a/include/scsi/scsi_device.h
>>> +++ b/include/scsi/scsi_device.h
>>> @@ -160,6 +160,7 @@ struct scsi_device {
>>>  	unsigned can_power_off:1; /* Device supports runtime power off */
>>>  	unsigned wce_default_on:1;	/* Cache is ON by default */
>>>  	unsigned no_dif:1;	/* T10 PI (DIF) should be disabled */
>>> +	unsigned event_driven:1; /* No need to poll the device */
>>>  
>>>  	DECLARE_BITMAP(supported_events, SDEV_EVT_MAXBITS); /* supported events */
>>>  	struct list_head event_list;	/* asserted events */
>>
>> Yes, but if we can get away with doing that, it should be in genhd
>> because it's completely generic.
>>
>> I was imagining we'd have to fake the reply to the test unit ready or
>> some other commands, which is why it would need to be in sr.c
>>
>> The check events code is Tejun's baby, if he's OK with it then just do
>> it in genhd.c
> 
> The problem here is there's no easy to reach genhd from libata (or the
> other way around) without going through sr.  I think we're gonna have
> to have something in sr one way or the other.
Do you think we can do something like the following to get the gendisk
pointer in libata? - We have ata_dev->sdev->sdev_gendev, and the
gendisk->part0.__dev is a child of it, so we can loop scan sdev_gendev's
children to see which one is of block_class and disk_type. Assuming one
device will alloc only one disk(correct?), we can get the gendisk from
libata layer. Code like this:
diff --git a/block/genhd.c b/block/genhd.c
index 9a289d7..448b201 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1619,6 +1619,38 @@ static void disk_events_workfn(struct work_struct *work)
 		kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp);
 }
 
+static int is_gendisk_part0(struct device *dev, void *data)
+{
+	struct device **child = data;
+
+	if (dev->class == &block_class && dev->type == &disk_type) {
+		*child = dev;
+		return 1;
+	} else
+		return 0;
+}
+
+/**
+ * disk_from_device - Get the gendisk pointer for this device.
+ * @dev: the device this gendisk is created for, i.e. gendisk->driverfs_dev
+ *
+ * LLD sometimes need to play with the gendisk without HLD's aware,
+ * this routine gives LLD the required access to gendisk.
+ *
+ * CONTEXT:
+ * Don't care.
+ */
+struct gendisk *disk_from_device(struct device *dev)
+{
+	struct device *child;
+
+	if (device_for_each_child(dev, &child, is_gendisk_part0))
+		return dev_to_disk(child);
+	else
+		return NULL;
+}
+EXPORT_SYMBOL(disk_from_device);
+
 /*
  * A disk events enabled device has the following sysfs nodes under
  * its /sys/block/X/ directory.
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 79b8bba..e8002c0 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -427,6 +427,7 @@ extern void disk_block_events(struct gendisk *disk);
 extern void disk_unblock_events(struct gendisk *disk);
 extern void disk_flush_events(struct gendisk *disk, unsigned int mask);
 extern unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask);
+extern struct gendisk *disk_from_device(struct device *dev);
 
 /* drivers/char/random.c */
 extern void add_disk_randomness(struct gendisk *disk);
Then together with disk_try_block_events and disk_unblock_events, we can
avoid touching SCSI layer to let ODD stay in zero power state.
So what do you think?
Thanks,
Aaron
^ permalink raw reply related	[flat|nested] 85+ messages in thread
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-12-20  6:07                               ` Aaron Lu
@ 2012-12-25 17:17                                 ` Tejun Heo
  2012-12-26  1:42                                   ` Aaron Lu
  0 siblings, 1 reply; 85+ messages in thread
From: Tejun Heo @ 2012-12-25 17:17 UTC (permalink / raw)
  To: Aaron Lu
  Cc: James Bottomley, Rafael J. Wysocki, linux-pm, Jeff Garzik,
	Alan Stern, Jeff Wu, Aaron Lu, linux-ide, linux-scsi, linux-acpi
Hello, Aaron.
On Thu, Dec 20, 2012 at 02:07:25PM +0800, Aaron Lu wrote:
> +static int is_gendisk_part0(struct device *dev, void *data)
> +{
> +	struct device **child = data;
> +
> +	if (dev->class == &block_class && dev->type == &disk_type) {
> +		*child = dev;
> +		return 1;
> +	} else
> +		return 0;
> +}
> +
> +/**
> + * disk_from_device - Get the gendisk pointer for this device.
> + * @dev: the device this gendisk is created for, i.e. gendisk->driverfs_dev
> + *
> + * LLD sometimes need to play with the gendisk without HLD's aware,
> + * this routine gives LLD the required access to gendisk.
> + *
> + * CONTEXT:
> + * Don't care.
> + */
> +struct gendisk *disk_from_device(struct device *dev)
> +{
> +	struct device *child;
> +
> +	if (device_for_each_child(dev, &child, is_gendisk_part0))
> +		return dev_to_disk(child);
> +	else
> +		return NULL;
> +}
> +EXPORT_SYMBOL(disk_from_device);
This is really a round-about way to find out the matching device and
it wouldn't work if the disk device nests deeper.  Doesn't really look
like a good idea to me.
> Then together with disk_try_block_events and disk_unblock_events, we can
> avoid touching SCSI layer to let ODD stay in zero power state.
Also, I'd much prefer something along the line of
block_events_nowait() instead of try_block.
Thanks.
-- 
tejun
^ permalink raw reply	[flat|nested] 85+ messages in thread
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-12-25 17:17                                 ` Tejun Heo
@ 2012-12-26  1:42                                   ` Aaron Lu
  2012-12-28 21:16                                     ` Tejun Heo
  0 siblings, 1 reply; 85+ messages in thread
From: Aaron Lu @ 2012-12-26  1:42 UTC (permalink / raw)
  To: Tejun Heo
  Cc: James Bottomley, Rafael J. Wysocki, linux-pm, Jeff Garzik,
	Alan Stern, Jeff Wu, Aaron Lu, linux-ide, linux-scsi, linux-acpi
On 12/26/2012 01:17 AM, Tejun Heo wrote:
> Hello, Aaron.
> 
> On Thu, Dec 20, 2012 at 02:07:25PM +0800, Aaron Lu wrote:
>> +static int is_gendisk_part0(struct device *dev, void *data)
>> +{
>> +	struct device **child = data;
>> +
>> +	if (dev->class == &block_class && dev->type == &disk_type) {
>> +		*child = dev;
>> +		return 1;
>> +	} else
>> +		return 0;
>> +}
>> +
>> +/**
>> + * disk_from_device - Get the gendisk pointer for this device.
>> + * @dev: the device this gendisk is created for, i.e. gendisk->driverfs_dev
>> + *
>> + * LLD sometimes need to play with the gendisk without HLD's aware,
>> + * this routine gives LLD the required access to gendisk.
>> + *
>> + * CONTEXT:
>> + * Don't care.
>> + */
>> +struct gendisk *disk_from_device(struct device *dev)
>> +{
>> +	struct device *child;
>> +
>> +	if (device_for_each_child(dev, &child, is_gendisk_part0))
>> +		return dev_to_disk(child);
>> +	else
>> +		return NULL;
>> +}
>> +EXPORT_SYMBOL(disk_from_device);
> 
> This is really a round-about way to find out the matching device and
> it wouldn't work if the disk device nests deeper.  Doesn't really look
> like a good idea to me.
I don't quite understand the 'disk device nests deeper' case, can you
please elaborate? My understanding is, as long as the disk's part0
device has a parent, this function should work. For LLDs want to take
advantage of this function, it should pass the device that is the parent
of part0 as the param, this function itself doesn't try to dig further.
The only problem I can see is when there are multiple gendisks created
for a single device, which I don't know if there is such a case?
> 
>> Then together with disk_try_block_events and disk_unblock_events, we can
>> avoid touching SCSI layer to let ODD stay in zero power state.
> 
> Also, I'd much prefer something along the line of
> block_events_nowait() instead of try_block.
Sure, no problem.
Thanks,
Aaron
^ permalink raw reply	[flat|nested] 85+ messages in thread
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-12-26  1:42                                   ` Aaron Lu
@ 2012-12-28 21:16                                     ` Tejun Heo
  2013-01-04  1:04                                       ` Aaron Lu
  0 siblings, 1 reply; 85+ messages in thread
From: Tejun Heo @ 2012-12-28 21:16 UTC (permalink / raw)
  To: Aaron Lu
  Cc: James Bottomley, Rafael J. Wysocki, linux-pm, Jeff Garzik,
	Alan Stern, Jeff Wu, Aaron Lu, linux-ide, linux-scsi, linux-acpi
Hello,
On Wed, Dec 26, 2012 at 09:42:14AM +0800, Aaron Lu wrote:
> > This is really a round-about way to find out the matching device and
> > it wouldn't work if the disk device nests deeper.  Doesn't really look
> > like a good idea to me.
> 
> I don't quite understand the 'disk device nests deeper' case, can you
> please elaborate? My understanding is, as long as the disk's part0
> device has a parent, this function should work. For LLDs want to take
Hmmm, maybe I misread but it looked like it wouldn't work if there are
intermediate nodes between the parent device and part0.  It might not
happen for sata but I don't think it's a good idea to assume that the
part0 and hardware device are connected directly.
In general, it's a quite roundabout way to do it.  Let's just push it
through SCSI.  That's how everything else is routed after all.  It's
confusing to do this one differently.
Thanks.
-- 
tejun
^ permalink raw reply	[flat|nested] 85+ messages in thread 
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-12-28 21:16                                     ` Tejun Heo
@ 2013-01-04  1:04                                       ` Aaron Lu
  0 siblings, 0 replies; 85+ messages in thread
From: Aaron Lu @ 2013-01-04  1:04 UTC (permalink / raw)
  To: Tejun Heo
  Cc: James Bottomley, Rafael J. Wysocki, linux-pm, Jeff Garzik,
	Alan Stern, Jeff Wu, Aaron Lu, linux-ide, linux-scsi, linux-acpi
On 12/29/2012 05:16 AM, Tejun Heo wrote:
> Hello,
> 
> On Wed, Dec 26, 2012 at 09:42:14AM +0800, Aaron Lu wrote:
>>> This is really a round-about way to find out the matching device and
>>> it wouldn't work if the disk device nests deeper.  Doesn't really look
>>> like a good idea to me.
>>
>> I don't quite understand the 'disk device nests deeper' case, can you
>> please elaborate? My understanding is, as long as the disk's part0
>> device has a parent, this function should work. For LLDs want to take
> 
> Hmmm, maybe I misread but it looked like it wouldn't work if there are
> intermediate nodes between the parent device and part0.  It might not
> happen for sata but I don't think it's a good idea to assume that the
> part0 and hardware device are connected directly.
> 
> In general, it's a quite roundabout way to do it.  Let's just push it
> through SCSI.  That's how everything else is routed after all.  It's
> confusing to do this one differently.
OK, thanks for the suggestion.
I'll prepare v11 using the previous demonstrated way(adding the
event_driven flag to scsi_device) to silence the poll.
Best regards,
Aaron
^ permalink raw reply	[flat|nested] 85+ messages in thread 
 
 
 
 
 
 
 
 
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-11-28  1:39                     ` Tejun Heo
  2012-11-28  2:24                       ` Aaron Lu
  2012-11-28  8:56                       ` James Bottomley
@ 2012-11-30  8:55                       ` Aaron Lu
  2012-11-30 11:15                         ` Rafael J. Wysocki
  2 siblings, 1 reply; 85+ messages in thread
From: Aaron Lu @ 2012-11-30  8:55 UTC (permalink / raw)
  To: Tejun Heo
  Cc: Rafael J. Wysocki, James Bottomley, linux-pm, Jeff Garzik,
	Alan Stern, Jeff Wu, Aaron Lu, linux-ide, linux-scsi, linux-acpi
On 11/28/2012 09:39 AM, Tejun Heo wrote:
> Hey, Rafael.
> 
> On Wed, Nov 28, 2012 at 01:51:00AM +0100, Rafael J. Wysocki wrote:
>> Having considered that a bit more I'm now thinking that in fact the power state
>> the device is in at the moment doesn't really matter, so the polling code need
>> not really know what PM is doing.  What it needs to know is that the device
>> will generate a hardware event when something interesting happens, so it is not
>> necessary to poll it.
>>
>> In this particular case it is related to power management (apparently, hardware
>> events will only be generated after the device has been put into ACPI D3cold,
>> or so Aaron seems to be claiming), but it need not be in general, at least in
>> principle.
>>
>> It looks like we need an "event driven" flag that the can be set in the lower
>> layers and read by the upper layers.  I suppose this means it would need to be
>> in struct device, but not necessarily in the PM-specific part of it.
> 
> We already have that.  That's what gendisk->async_events is for (as
> opposed to gendisk->events).  If all events are async_events, block
> won't poll for events, but I'm not sure that's the golden bullet.
> 
> * None implements async_events yet and an API is missing -
>   disk_check_events() - which is trivial to add, but it's the same
>   story.  We'll need a mechanism to shoot up notification from libata
>   to block layer.  It's admittedly easier to justify routing through
I don't see a way to do this, since libata has no chance of accessing
the gendisk pointer. Or we can add a new field to struct device,
something like no_poll, but I don't think it is the right thing to do,
as not all devices are block ones.
Any other suggestions/ideas please?
Thanks,
Aaron
>   SCSI tho.  So, we're mostly shifting the problem.  Given that async
>   events is nice to have, so it isn't a bad idea.
> 
> * Still dunno much about zpodd but IIUC the notification from
>   zero-power is via ACPI.  To advertise that the device doesn't need
>   polling, it should also be able to do async notification while
>   powered up, which isn't covered by zpodd but ATA async notification.
>   So, ummm... that's another obstacle.  If zpodd requires the device
>   to support ATA async notification, it might not be too bad tho.
> 
> Thanks.
> 
^ permalink raw reply	[flat|nested] 85+ messages in thread 
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-11-30  8:55                       ` Aaron Lu
@ 2012-11-30 11:15                         ` Rafael J. Wysocki
  0 siblings, 0 replies; 85+ messages in thread
From: Rafael J. Wysocki @ 2012-11-30 11:15 UTC (permalink / raw)
  To: linux-acpi
  Cc: Aaron Lu, Tejun Heo, James Bottomley, linux-pm, Jeff Garzik,
	Alan Stern, Jeff Wu, Aaron Lu, linux-ide, linux-scsi
On Friday, November 30, 2012 04:55:56 PM Aaron Lu wrote:
> On 11/28/2012 09:39 AM, Tejun Heo wrote:
> > Hey, Rafael.
> > 
> > On Wed, Nov 28, 2012 at 01:51:00AM +0100, Rafael J. Wysocki wrote:
> >> Having considered that a bit more I'm now thinking that in fact the power state
> >> the device is in at the moment doesn't really matter, so the polling code need
> >> not really know what PM is doing.  What it needs to know is that the device
> >> will generate a hardware event when something interesting happens, so it is not
> >> necessary to poll it.
> >>
> >> In this particular case it is related to power management (apparently, hardware
> >> events will only be generated after the device has been put into ACPI D3cold,
> >> or so Aaron seems to be claiming), but it need not be in general, at least in
> >> principle.
> >>
> >> It looks like we need an "event driven" flag that the can be set in the lower
> >> layers and read by the upper layers.  I suppose this means it would need to be
> >> in struct device, but not necessarily in the PM-specific part of it.
> > 
> > We already have that.  That's what gendisk->async_events is for (as
> > opposed to gendisk->events).  If all events are async_events, block
> > won't poll for events, but I'm not sure that's the golden bullet.
> > 
> > * None implements async_events yet and an API is missing -
> >   disk_check_events() - which is trivial to add, but it's the same
> >   story.  We'll need a mechanism to shoot up notification from libata
> >   to block layer.  It's admittedly easier to justify routing through
> 
> I don't see a way to do this, since libata has no chance of accessing
> the gendisk pointer. Or we can add a new field to struct device,
> something like no_poll, but I don't think it is the right thing to do,
> as not all devices are block ones.
> 
> Any other suggestions/ideas please?
I think you can follow the James' suggestion:
http://www.spinics.net/lists/linux-acpi/msg40257.html
and see how that goes.
Thanks,
Rafael
-- 
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.
^ permalink raw reply	[flat|nested] 85+ messages in thread 
 
 
 
 
 
 
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-11-19 14:56           ` Tejun Heo
  2012-11-19 15:06             ` James Bottomley
@ 2012-11-20  6:00             ` Aaron Lu
  2012-11-20  8:59               ` Aaron Lu
  1 sibling, 1 reply; 85+ messages in thread
From: Aaron Lu @ 2012-11-20  6:00 UTC (permalink / raw)
  To: Tejun Heo, Rafael J. Wysocki, Alan Stern
  Cc: Jeff Garzik, James Bottomley, Jeff Wu, Aaron Lu, linux-ide,
	linux-pm, linux-scsi, linux-acpi
On 11/19/2012 10:56 PM, Tejun Heo wrote:
> Hey, Aaron.
> 
> On Mon, Nov 19, 2012 at 11:09:40AM +0800, Aaron Lu wrote:
>>> What I'm confused about is what autopm does for devices w/o zpodd.
>>> What happens then?  Is it gonna leave power on for the device and,
>>> say, go on to suspend the controller?  But, how would that work for,
>>> say, future devices with async notification for media events?
>>
>> Maybe we shouldn't allow autopm for such devices?
> 
> Yeah, maybe.  It would be nice to be able to automatically power off
> disks and ports which aren't being used tho.
Yes, we can do this.
I'm just saying, if an ODD is using async notification, we probably
shouldn't enable autopm for it at the moment.
> 
>>> That said, I can't say the snooping is pretty.  It's a rather nasty
>>> thing to do.  So, libata now wants information from the event polling
>>> in block layer, but reaching for block_device from ata_devices is
>>> nasty too.  Hmmm... but aren't you already doing that to block polling
>>> on a powered down device?
>>
>> I was feeling brain damaged by this for some time :-)
>>
>> Basically, only ATA layer is aware of the power off thing, and sr knows
>> nothing about this(or it is not supposed to know this, at least, this is
>> what SCSI people think) and once powered off, I do not want the poll to
>> disturb the device, so I need to block the poll. I can't come up with
>> another way to achieve this except this nasty way.
>>
>> James suggests me to keep the poll, but emulate the command. The problem
>> with this is, the autopm for resume will kick in on each poll, so I'll
>> need to decide if power up the ODD for this time's resume is needed in
>> port's runtime resume callback. This made things complex and it also put
>> too much logic in the resume callback, which is not desired. And even if
>> I keep the ODD in powered off state by emulating this poll command, its
>> ancestor devices will still be resumed, and I may need to do some trick
>> in their resume callback to avoid needless power/state transitions. This
>> doesn't feel like an elegant way to solve this either.
>>
>> So yes, I'm still using this _nasty_ way to make the ODD stay in powered
>> off state as long as possible. But if there is other elegant ways to
>> solve this, I would appreciate it and happily using it. Personally, I
>> hope we can make sr aware of ZPODD, that would make the pain gone.
> 
> I really think we need a way for (auto)pm and event polling to talk to
> each other so that autopm can tell event poll to sod off while pm is
> in effect.  Trying to solve this from inside libata doesn't seem
> right.  The problem, again, seems to be figuring out which hardware
> device maps to which block device.  Hmmm... Any good ideas?
A possible way of doing this is using pm qos.
We currently have 2 pm qos flags, NO_POWER_OFF and REMOTE_WAKEUP, and we
can add another one: NO_POLL, use it like the following:
1 Set the NO_POLL pm qos flag when the underlying driver thinks it is no
  longer necessary. In the ZPODD's case, it should be set when the
  device is to be powered off;
2 Clear it when poll is necessary again. In the ZPODD's case, when power
  is re-gained, this flag will be cleared.
3 In the disk_events_workfn, check if this flag is set, if so, simply
  return.
The disk->driverfs_dev can be used to host the pm qos flag, ATA layer
can access it through ata_device->sdev->sdev_gendev.
Is this OK?
Thanks,
Aaron
^ permalink raw reply	[flat|nested] 85+ messages in thread 
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-11-20  6:00             ` Aaron Lu
@ 2012-11-20  8:59               ` Aaron Lu
  2012-11-26  0:50                 ` Rafael J. Wysocki
  0 siblings, 1 reply; 85+ messages in thread
From: Aaron Lu @ 2012-11-20  8:59 UTC (permalink / raw)
  To: Tejun Heo, Rafael J. Wysocki, Alan Stern
  Cc: Jeff Garzik, James Bottomley, Jeff Wu, linux-ide, linux-pm,
	linux-scsi, linux-acpi
On 11/20/2012 02:00 PM, Aaron Lu wrote:
> On 11/19/2012 10:56 PM, Tejun Heo wrote:
>> I really think we need a way for (auto)pm and event polling to talk to
>> each other so that autopm can tell event poll to sod off while pm is
>> in effect.  Trying to solve this from inside libata doesn't seem
>> right.  The problem, again, seems to be figuring out which hardware
>> device maps to which block device.  Hmmm... Any good ideas?
> 
> A possible way of doing this is using pm qos.
> 
> We currently have 2 pm qos flags, NO_POWER_OFF and REMOTE_WAKEUP, and we
> can add another one: NO_POLL, use it like the following:
> 1 Set the NO_POLL pm qos flag when the underlying driver thinks it is no
>   longer necessary. In the ZPODD's case, it should be set when the
>   device is to be powered off;
> 2 Clear it when poll is necessary again. In the ZPODD's case, when power
>   is re-gained, this flag will be cleared.
> 3 In the disk_events_workfn, check if this flag is set, if so, simply
>   return.
It should be, skip calling disk->fops->check_events, but still queue the
work for next time's poll.
-Aaron
> 
> The disk->driverfs_dev can be used to host the pm qos flag, ATA layer
> can access it through ata_device->sdev->sdev_gendev.
> 
> Is this OK?
> 
> Thanks,
> Aaron
> 
^ permalink raw reply	[flat|nested] 85+ messages in thread 
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-11-20  8:59               ` Aaron Lu
@ 2012-11-26  0:50                 ` Rafael J. Wysocki
  2012-11-26  0:48                   ` Aaron Lu
  0 siblings, 1 reply; 85+ messages in thread
From: Rafael J. Wysocki @ 2012-11-26  0:50 UTC (permalink / raw)
  To: Aaron Lu
  Cc: Tejun Heo, Alan Stern, Jeff Garzik, James Bottomley, Jeff Wu,
	linux-ide, linux-pm, linux-scsi, linux-acpi
On Tuesday, November 20, 2012 04:59:57 PM Aaron Lu wrote:
> On 11/20/2012 02:00 PM, Aaron Lu wrote:
> > On 11/19/2012 10:56 PM, Tejun Heo wrote:
> >> I really think we need a way for (auto)pm and event polling to talk to
> >> each other so that autopm can tell event poll to sod off while pm is
> >> in effect.  Trying to solve this from inside libata doesn't seem
> >> right.  The problem, again, seems to be figuring out which hardware
> >> device maps to which block device.  Hmmm... Any good ideas?
> > 
> > A possible way of doing this is using pm qos.
> > 
> > We currently have 2 pm qos flags, NO_POWER_OFF and REMOTE_WAKEUP, and we
> > can add another one: NO_POLL, use it like the following:
> > 1 Set the NO_POLL pm qos flag when the underlying driver thinks it is no
> >   longer necessary. In the ZPODD's case, it should be set when the
> >   device is to be powered off;
> > 2 Clear it when poll is necessary again. In the ZPODD's case, when power
> >   is re-gained, this flag will be cleared.
> 
> 
> > 3 In the disk_events_workfn, check if this flag is set, if so, simply
> >   return.
> 
> It should be, skip calling disk->fops->check_events, but still queue the
> work for next time's poll.
> 
> -Aaron
> 
> > 
> > The disk->driverfs_dev can be used to host the pm qos flag, ATA layer
> > can access it through ata_device->sdev->sdev_gendev.
> > 
> > Is this OK?
No, I don't think so.  PM QoS is about telling the layer that will put the
device into low-power states what states are to be taken into consideration.
In this case, however, we need to tell someone else that the device has been
turned off.  Clearly, we need a way to do that, but not through PM QoS.
Did you consider using pm_runtime_suspended() to check the device status?
Rafael
-- 
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.
^ permalink raw reply	[flat|nested] 85+ messages in thread 
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-11-26  0:50                 ` Rafael J. Wysocki
@ 2012-11-26  0:48                   ` Aaron Lu
  2012-11-26  1:03                     ` Rafael J. Wysocki
  0 siblings, 1 reply; 85+ messages in thread
From: Aaron Lu @ 2012-11-26  0:48 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Tejun Heo, Alan Stern, Jeff Garzik, James Bottomley, Jeff Wu,
	linux-ide, linux-pm, linux-scsi, linux-acpi
On 11/26/2012 08:50 AM, Rafael J. Wysocki wrote:
> On Tuesday, November 20, 2012 04:59:57 PM Aaron Lu wrote:
>> On 11/20/2012 02:00 PM, Aaron Lu wrote:
>>> On 11/19/2012 10:56 PM, Tejun Heo wrote:
>>>> I really think we need a way for (auto)pm and event polling to talk to
>>>> each other so that autopm can tell event poll to sod off while pm is
>>>> in effect.  Trying to solve this from inside libata doesn't seem
>>>> right.  The problem, again, seems to be figuring out which hardware
>>>> device maps to which block device.  Hmmm... Any good ideas?
>>>
>>> A possible way of doing this is using pm qos.
>>>
>>> We currently have 2 pm qos flags, NO_POWER_OFF and REMOTE_WAKEUP, and we
>>> can add another one: NO_POLL, use it like the following:
>>> 1 Set the NO_POLL pm qos flag when the underlying driver thinks it is no
>>>   longer necessary. In the ZPODD's case, it should be set when the
>>>   device is to be powered off;
>>> 2 Clear it when poll is necessary again. In the ZPODD's case, when power
>>>   is re-gained, this flag will be cleared.
>>
>>
>>> 3 In the disk_events_workfn, check if this flag is set, if so, simply
>>>   return.
>>
>> It should be, skip calling disk->fops->check_events, but still queue the
>> work for next time's poll.
>>
>> -Aaron
>>
>>>
>>> The disk->driverfs_dev can be used to host the pm qos flag, ATA layer
>>> can access it through ata_device->sdev->sdev_gendev.
>>>
>>> Is this OK?
> 
> No, I don't think so.  PM QoS is about telling the layer that will put the
> device into low-power states what states are to be taken into consideration.
> In this case, however, we need to tell someone else that the device has been
> turned off.  Clearly, we need a way to do that, but not through PM QoS.
> 
> Did you consider using pm_runtime_suspended() to check the device status?
The problem is, a device can be in runtime suspended state while still
needs to be polled...
Thanks,
Aaron
> 
> Rafael
> 
> 
^ permalink raw reply	[flat|nested] 85+ messages in thread 
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-11-26  0:48                   ` Aaron Lu
@ 2012-11-26  1:03                     ` Rafael J. Wysocki
  2012-11-26  1:05                       ` Aaron Lu
  0 siblings, 1 reply; 85+ messages in thread
From: Rafael J. Wysocki @ 2012-11-26  1:03 UTC (permalink / raw)
  To: Aaron Lu
  Cc: Tejun Heo, Alan Stern, Jeff Garzik, James Bottomley, Jeff Wu,
	linux-ide, linux-pm, linux-scsi, linux-acpi
On Monday, November 26, 2012 08:48:51 AM Aaron Lu wrote:
> On 11/26/2012 08:50 AM, Rafael J. Wysocki wrote:
> > On Tuesday, November 20, 2012 04:59:57 PM Aaron Lu wrote:
> >> On 11/20/2012 02:00 PM, Aaron Lu wrote:
> >>> On 11/19/2012 10:56 PM, Tejun Heo wrote:
> >>>> I really think we need a way for (auto)pm and event polling to talk to
> >>>> each other so that autopm can tell event poll to sod off while pm is
> >>>> in effect.  Trying to solve this from inside libata doesn't seem
> >>>> right.  The problem, again, seems to be figuring out which hardware
> >>>> device maps to which block device.  Hmmm... Any good ideas?
> >>>
> >>> A possible way of doing this is using pm qos.
> >>>
> >>> We currently have 2 pm qos flags, NO_POWER_OFF and REMOTE_WAKEUP, and we
> >>> can add another one: NO_POLL, use it like the following:
> >>> 1 Set the NO_POLL pm qos flag when the underlying driver thinks it is no
> >>>   longer necessary. In the ZPODD's case, it should be set when the
> >>>   device is to be powered off;
> >>> 2 Clear it when poll is necessary again. In the ZPODD's case, when power
> >>>   is re-gained, this flag will be cleared.
> >>
> >>
> >>> 3 In the disk_events_workfn, check if this flag is set, if so, simply
> >>>   return.
> >>
> >> It should be, skip calling disk->fops->check_events, but still queue the
> >> work for next time's poll.
> >>
> >> -Aaron
> >>
> >>>
> >>> The disk->driverfs_dev can be used to host the pm qos flag, ATA layer
> >>> can access it through ata_device->sdev->sdev_gendev.
> >>>
> >>> Is this OK?
> > 
> > No, I don't think so.  PM QoS is about telling the layer that will put the
> > device into low-power states what states are to be taken into consideration.
> > In this case, however, we need to tell someone else that the device has been
> > turned off.  Clearly, we need a way to do that, but not through PM QoS.
> > 
> > Did you consider using pm_runtime_suspended() to check the device status?
> 
> The problem is, a device can be in runtime suspended state while still
> needs to be polled...
Well, maybe this is the problem, then?  Why does it need to be polled when
suspended?
Rafael
-- 
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.
^ permalink raw reply	[flat|nested] 85+ messages in thread 
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-11-26  1:03                     ` Rafael J. Wysocki
@ 2012-11-26  1:05                       ` Aaron Lu
  2012-11-26  1:11                         ` Rafael J. Wysocki
  0 siblings, 1 reply; 85+ messages in thread
From: Aaron Lu @ 2012-11-26  1:05 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Tejun Heo, Alan Stern, Jeff Garzik, James Bottomley, Jeff Wu,
	linux-ide, linux-pm, linux-scsi, linux-acpi
On 11/26/2012 09:03 AM, Rafael J. Wysocki wrote:
> On Monday, November 26, 2012 08:48:51 AM Aaron Lu wrote:
>> On 11/26/2012 08:50 AM, Rafael J. Wysocki wrote:
>>> On Tuesday, November 20, 2012 04:59:57 PM Aaron Lu wrote:
>>>> On 11/20/2012 02:00 PM, Aaron Lu wrote:
>>>>> On 11/19/2012 10:56 PM, Tejun Heo wrote:
>>>>>> I really think we need a way for (auto)pm and event polling to talk to
>>>>>> each other so that autopm can tell event poll to sod off while pm is
>>>>>> in effect.  Trying to solve this from inside libata doesn't seem
>>>>>> right.  The problem, again, seems to be figuring out which hardware
>>>>>> device maps to which block device.  Hmmm... Any good ideas?
>>>>>
>>>>> A possible way of doing this is using pm qos.
>>>>>
>>>>> We currently have 2 pm qos flags, NO_POWER_OFF and REMOTE_WAKEUP, and we
>>>>> can add another one: NO_POLL, use it like the following:
>>>>> 1 Set the NO_POLL pm qos flag when the underlying driver thinks it is no
>>>>>   longer necessary. In the ZPODD's case, it should be set when the
>>>>>   device is to be powered off;
>>>>> 2 Clear it when poll is necessary again. In the ZPODD's case, when power
>>>>>   is re-gained, this flag will be cleared.
>>>>
>>>>
>>>>> 3 In the disk_events_workfn, check if this flag is set, if so, simply
>>>>>   return.
>>>>
>>>> It should be, skip calling disk->fops->check_events, but still queue the
>>>> work for next time's poll.
>>>>
>>>> -Aaron
>>>>
>>>>>
>>>>> The disk->driverfs_dev can be used to host the pm qos flag, ATA layer
>>>>> can access it through ata_device->sdev->sdev_gendev.
>>>>>
>>>>> Is this OK?
>>>
>>> No, I don't think so.  PM QoS is about telling the layer that will put the
>>> device into low-power states what states are to be taken into consideration.
>>> In this case, however, we need to tell someone else that the device has been
>>> turned off.  Clearly, we need a way to do that, but not through PM QoS.
>>>
>>> Did you consider using pm_runtime_suspended() to check the device status?
>>
>> The problem is, a device can be in runtime suspended state while still
>> needs to be polled...
> 
> Well, maybe this is the problem, then?  Why does it need to be polled when
> suspended?
For ODDs, poll is not necessary only when ZP capable ODD is powered off.
For other ODDs, poll still needs to go on.
ZP capable ODDs:
  - runtime suspended, power remained(due to NO_POWER_OFF qos flag)
    poll is needed
  -- runtime suspended, power removed
    poll is not needed
Non ZP capable ODDs:
  -- runtime suspended, power remained (power will never be removed)
    poll is needed
If we do not poll for the powered on case, we will lose media change
event.
Thanks,
Aaron
> 
> Rafael
> 
> 
^ permalink raw reply	[flat|nested] 85+ messages in thread
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-11-26  1:05                       ` Aaron Lu
@ 2012-11-26  1:11                         ` Rafael J. Wysocki
  2012-11-26  1:09                           ` Aaron Lu
  2012-11-26  1:17                           ` Aaron Lu
  0 siblings, 2 replies; 85+ messages in thread
From: Rafael J. Wysocki @ 2012-11-26  1:11 UTC (permalink / raw)
  To: Aaron Lu
  Cc: Tejun Heo, Alan Stern, Jeff Garzik, James Bottomley, Jeff Wu,
	linux-ide, linux-pm, linux-scsi, linux-acpi
On Monday, November 26, 2012 09:05:58 AM Aaron Lu wrote:
> On 11/26/2012 09:03 AM, Rafael J. Wysocki wrote:
> > On Monday, November 26, 2012 08:48:51 AM Aaron Lu wrote:
> >> On 11/26/2012 08:50 AM, Rafael J. Wysocki wrote:
> >>> On Tuesday, November 20, 2012 04:59:57 PM Aaron Lu wrote:
> >>>> On 11/20/2012 02:00 PM, Aaron Lu wrote:
> >>>>> On 11/19/2012 10:56 PM, Tejun Heo wrote:
> >>>>>> I really think we need a way for (auto)pm and event polling to talk to
> >>>>>> each other so that autopm can tell event poll to sod off while pm is
> >>>>>> in effect.  Trying to solve this from inside libata doesn't seem
> >>>>>> right.  The problem, again, seems to be figuring out which hardware
> >>>>>> device maps to which block device.  Hmmm... Any good ideas?
> >>>>>
> >>>>> A possible way of doing this is using pm qos.
> >>>>>
> >>>>> We currently have 2 pm qos flags, NO_POWER_OFF and REMOTE_WAKEUP, and we
> >>>>> can add another one: NO_POLL, use it like the following:
> >>>>> 1 Set the NO_POLL pm qos flag when the underlying driver thinks it is no
> >>>>>   longer necessary. In the ZPODD's case, it should be set when the
> >>>>>   device is to be powered off;
> >>>>> 2 Clear it when poll is necessary again. In the ZPODD's case, when power
> >>>>>   is re-gained, this flag will be cleared.
> >>>>
> >>>>
> >>>>> 3 In the disk_events_workfn, check if this flag is set, if so, simply
> >>>>>   return.
> >>>>
> >>>> It should be, skip calling disk->fops->check_events, but still queue the
> >>>> work for next time's poll.
> >>>>
> >>>> -Aaron
> >>>>
> >>>>>
> >>>>> The disk->driverfs_dev can be used to host the pm qos flag, ATA layer
> >>>>> can access it through ata_device->sdev->sdev_gendev.
> >>>>>
> >>>>> Is this OK?
> >>>
> >>> No, I don't think so.  PM QoS is about telling the layer that will put the
> >>> device into low-power states what states are to be taken into consideration.
> >>> In this case, however, we need to tell someone else that the device has been
> >>> turned off.  Clearly, we need a way to do that, but not through PM QoS.
> >>>
> >>> Did you consider using pm_runtime_suspended() to check the device status?
> >>
> >> The problem is, a device can be in runtime suspended state while still
> >> needs to be polled...
> > 
> > Well, maybe this is the problem, then?  Why does it need to be polled when
> > suspended?
> 
> For ODDs, poll is not necessary only when ZP capable ODD is powered off.
> For other ODDs, poll still needs to go on.
> 
> ZP capable ODDs:
>   - runtime suspended, power remained(due to NO_POWER_OFF qos flag)
>     poll is needed
>   -- runtime suspended, power removed
>     poll is not needed
> 
> Non ZP capable ODDs:
>   -- runtime suspended, power remained (power will never be removed)
>     poll is needed
> 
> If we do not poll for the powered on case, we will lose media change
> event.
But the media change event should change the status from suspended to active,
shouldn't it?
Rafael
-- 
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.
^ permalink raw reply	[flat|nested] 85+ messages in thread 
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-11-26  1:11                         ` Rafael J. Wysocki
@ 2012-11-26  1:09                           ` Aaron Lu
  2012-11-26  1:22                             ` Rafael J. Wysocki
  2012-11-26  1:17                           ` Aaron Lu
  1 sibling, 1 reply; 85+ messages in thread
From: Aaron Lu @ 2012-11-26  1:09 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Tejun Heo, Alan Stern, Jeff Garzik, James Bottomley, Jeff Wu,
	linux-ide, linux-pm, linux-scsi, linux-acpi
On 11/26/2012 09:11 AM, Rafael J. Wysocki wrote:
> On Monday, November 26, 2012 09:05:58 AM Aaron Lu wrote:
>> On 11/26/2012 09:03 AM, Rafael J. Wysocki wrote:
>>> On Monday, November 26, 2012 08:48:51 AM Aaron Lu wrote:
>>>> On 11/26/2012 08:50 AM, Rafael J. Wysocki wrote:
>>>>> On Tuesday, November 20, 2012 04:59:57 PM Aaron Lu wrote:
>>>>>> On 11/20/2012 02:00 PM, Aaron Lu wrote:
>>>>>>> On 11/19/2012 10:56 PM, Tejun Heo wrote:
>>>>>>>> I really think we need a way for (auto)pm and event polling to talk to
>>>>>>>> each other so that autopm can tell event poll to sod off while pm is
>>>>>>>> in effect.  Trying to solve this from inside libata doesn't seem
>>>>>>>> right.  The problem, again, seems to be figuring out which hardware
>>>>>>>> device maps to which block device.  Hmmm... Any good ideas?
>>>>>>>
>>>>>>> A possible way of doing this is using pm qos.
>>>>>>>
>>>>>>> We currently have 2 pm qos flags, NO_POWER_OFF and REMOTE_WAKEUP, and we
>>>>>>> can add another one: NO_POLL, use it like the following:
>>>>>>> 1 Set the NO_POLL pm qos flag when the underlying driver thinks it is no
>>>>>>>   longer necessary. In the ZPODD's case, it should be set when the
>>>>>>>   device is to be powered off;
>>>>>>> 2 Clear it when poll is necessary again. In the ZPODD's case, when power
>>>>>>>   is re-gained, this flag will be cleared.
>>>>>>
>>>>>>
>>>>>>> 3 In the disk_events_workfn, check if this flag is set, if so, simply
>>>>>>>   return.
>>>>>>
>>>>>> It should be, skip calling disk->fops->check_events, but still queue the
>>>>>> work for next time's poll.
>>>>>>
>>>>>> -Aaron
>>>>>>
>>>>>>>
>>>>>>> The disk->driverfs_dev can be used to host the pm qos flag, ATA layer
>>>>>>> can access it through ata_device->sdev->sdev_gendev.
>>>>>>>
>>>>>>> Is this OK?
>>>>>
>>>>> No, I don't think so.  PM QoS is about telling the layer that will put the
>>>>> device into low-power states what states are to be taken into consideration.
>>>>> In this case, however, we need to tell someone else that the device has been
>>>>> turned off.  Clearly, we need a way to do that, but not through PM QoS.
>>>>>
>>>>> Did you consider using pm_runtime_suspended() to check the device status?
>>>>
>>>> The problem is, a device can be in runtime suspended state while still
>>>> needs to be polled...
>>>
>>> Well, maybe this is the problem, then?  Why does it need to be polled when
>>> suspended?
>>
>> For ODDs, poll is not necessary only when ZP capable ODD is powered off.
>> For other ODDs, poll still needs to go on.
>>
>> ZP capable ODDs:
>>   - runtime suspended, power remained(due to NO_POWER_OFF qos flag)
>>     poll is needed
>>   -- runtime suspended, power removed
>>     poll is not needed
>>
>> Non ZP capable ODDs:
>>   -- runtime suspended, power remained (power will never be removed)
>>     poll is needed
>>
>> If we do not poll for the powered on case, we will lose media change
>> event.
> 
> But the media change event should change the status from suspended to active,
> shouldn't it?
The media change event is derived from the poll, if we stop the poll, how
can we know the event in the first place?
Thanks,
Aaron
> 
> Rafael
> 
> 
^ permalink raw reply	[flat|nested] 85+ messages in thread 
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-11-26  1:09                           ` Aaron Lu
@ 2012-11-26  1:22                             ` Rafael J. Wysocki
  2012-11-26  1:22                               ` Aaron Lu
  0 siblings, 1 reply; 85+ messages in thread
From: Rafael J. Wysocki @ 2012-11-26  1:22 UTC (permalink / raw)
  To: Aaron Lu
  Cc: Tejun Heo, Alan Stern, Jeff Garzik, James Bottomley, Jeff Wu,
	linux-ide, linux-pm, linux-scsi, linux-acpi
On Monday, November 26, 2012 09:09:36 AM Aaron Lu wrote:
> On 11/26/2012 09:11 AM, Rafael J. Wysocki wrote:
> > On Monday, November 26, 2012 09:05:58 AM Aaron Lu wrote:
> >> On 11/26/2012 09:03 AM, Rafael J. Wysocki wrote:
> >>> On Monday, November 26, 2012 08:48:51 AM Aaron Lu wrote:
> >>>> On 11/26/2012 08:50 AM, Rafael J. Wysocki wrote:
> >>>>> On Tuesday, November 20, 2012 04:59:57 PM Aaron Lu wrote:
> >>>>>> On 11/20/2012 02:00 PM, Aaron Lu wrote:
> >>>>>>> On 11/19/2012 10:56 PM, Tejun Heo wrote:
> >>>>>>>> I really think we need a way for (auto)pm and event polling to talk to
> >>>>>>>> each other so that autopm can tell event poll to sod off while pm is
> >>>>>>>> in effect.  Trying to solve this from inside libata doesn't seem
> >>>>>>>> right.  The problem, again, seems to be figuring out which hardware
> >>>>>>>> device maps to which block device.  Hmmm... Any good ideas?
> >>>>>>>
> >>>>>>> A possible way of doing this is using pm qos.
> >>>>>>>
> >>>>>>> We currently have 2 pm qos flags, NO_POWER_OFF and REMOTE_WAKEUP, and we
> >>>>>>> can add another one: NO_POLL, use it like the following:
> >>>>>>> 1 Set the NO_POLL pm qos flag when the underlying driver thinks it is no
> >>>>>>>   longer necessary. In the ZPODD's case, it should be set when the
> >>>>>>>   device is to be powered off;
> >>>>>>> 2 Clear it when poll is necessary again. In the ZPODD's case, when power
> >>>>>>>   is re-gained, this flag will be cleared.
> >>>>>>
> >>>>>>
> >>>>>>> 3 In the disk_events_workfn, check if this flag is set, if so, simply
> >>>>>>>   return.
> >>>>>>
> >>>>>> It should be, skip calling disk->fops->check_events, but still queue the
> >>>>>> work for next time's poll.
> >>>>>>
> >>>>>> -Aaron
> >>>>>>
> >>>>>>>
> >>>>>>> The disk->driverfs_dev can be used to host the pm qos flag, ATA layer
> >>>>>>> can access it through ata_device->sdev->sdev_gendev.
> >>>>>>>
> >>>>>>> Is this OK?
> >>>>>
> >>>>> No, I don't think so.  PM QoS is about telling the layer that will put the
> >>>>> device into low-power states what states are to be taken into consideration.
> >>>>> In this case, however, we need to tell someone else that the device has been
> >>>>> turned off.  Clearly, we need a way to do that, but not through PM QoS.
> >>>>>
> >>>>> Did you consider using pm_runtime_suspended() to check the device status?
> >>>>
> >>>> The problem is, a device can be in runtime suspended state while still
> >>>> needs to be polled...
> >>>
> >>> Well, maybe this is the problem, then?  Why does it need to be polled when
> >>> suspended?
> >>
> >> For ODDs, poll is not necessary only when ZP capable ODD is powered off.
> >> For other ODDs, poll still needs to go on.
> >>
> >> ZP capable ODDs:
> >>   - runtime suspended, power remained(due to NO_POWER_OFF qos flag)
> >>     poll is needed
> >>   -- runtime suspended, power removed
> >>     poll is not needed
> >>
> >> Non ZP capable ODDs:
> >>   -- runtime suspended, power remained (power will never be removed)
> >>     poll is needed
> >>
> >> If we do not poll for the powered on case, we will lose media change
> >> event.
> > 
> > But the media change event should change the status from suspended to active,
> > shouldn't it?
> 
> The media change event is derived from the poll, if we stop the poll, how
> can we know the event in the first place?
OK, so what you're trying to say is that if the device is not turned off
and the user opens the tray and inserts a media in there, we won't know that
that happened without polling.  Is that correct?
If so, can you please remind me why we want to pretend that the device is
suspended if we want to poll it?
Rafael
-- 
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.
^ permalink raw reply	[flat|nested] 85+ messages in thread 
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-11-26  1:22                             ` Rafael J. Wysocki
@ 2012-11-26  1:22                               ` Aaron Lu
  0 siblings, 0 replies; 85+ messages in thread
From: Aaron Lu @ 2012-11-26  1:22 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Tejun Heo, Alan Stern, Jeff Garzik, James Bottomley, Jeff Wu,
	linux-ide, linux-pm, linux-scsi, linux-acpi
On 11/26/2012 09:22 AM, Rafael J. Wysocki wrote:
> On Monday, November 26, 2012 09:09:36 AM Aaron Lu wrote:
>> On 11/26/2012 09:11 AM, Rafael J. Wysocki wrote:
>>> On Monday, November 26, 2012 09:05:58 AM Aaron Lu wrote:
>>>> On 11/26/2012 09:03 AM, Rafael J. Wysocki wrote:
>>>>> On Monday, November 26, 2012 08:48:51 AM Aaron Lu wrote:
>>>>>> On 11/26/2012 08:50 AM, Rafael J. Wysocki wrote:
>>>>>>> On Tuesday, November 20, 2012 04:59:57 PM Aaron Lu wrote:
>>>>>>>> On 11/20/2012 02:00 PM, Aaron Lu wrote:
>>>>>>>>> On 11/19/2012 10:56 PM, Tejun Heo wrote:
>>>>>>>>>> I really think we need a way for (auto)pm and event polling to talk to
>>>>>>>>>> each other so that autopm can tell event poll to sod off while pm is
>>>>>>>>>> in effect.  Trying to solve this from inside libata doesn't seem
>>>>>>>>>> right.  The problem, again, seems to be figuring out which hardware
>>>>>>>>>> device maps to which block device.  Hmmm... Any good ideas?
>>>>>>>>>
>>>>>>>>> A possible way of doing this is using pm qos.
>>>>>>>>>
>>>>>>>>> We currently have 2 pm qos flags, NO_POWER_OFF and REMOTE_WAKEUP, and we
>>>>>>>>> can add another one: NO_POLL, use it like the following:
>>>>>>>>> 1 Set the NO_POLL pm qos flag when the underlying driver thinks it is no
>>>>>>>>>   longer necessary. In the ZPODD's case, it should be set when the
>>>>>>>>>   device is to be powered off;
>>>>>>>>> 2 Clear it when poll is necessary again. In the ZPODD's case, when power
>>>>>>>>>   is re-gained, this flag will be cleared.
>>>>>>>>
>>>>>>>>
>>>>>>>>> 3 In the disk_events_workfn, check if this flag is set, if so, simply
>>>>>>>>>   return.
>>>>>>>>
>>>>>>>> It should be, skip calling disk->fops->check_events, but still queue the
>>>>>>>> work for next time's poll.
>>>>>>>>
>>>>>>>> -Aaron
>>>>>>>>
>>>>>>>>>
>>>>>>>>> The disk->driverfs_dev can be used to host the pm qos flag, ATA layer
>>>>>>>>> can access it through ata_device->sdev->sdev_gendev.
>>>>>>>>>
>>>>>>>>> Is this OK?
>>>>>>>
>>>>>>> No, I don't think so.  PM QoS is about telling the layer that will put the
>>>>>>> device into low-power states what states are to be taken into consideration.
>>>>>>> In this case, however, we need to tell someone else that the device has been
>>>>>>> turned off.  Clearly, we need a way to do that, but not through PM QoS.
>>>>>>>
>>>>>>> Did you consider using pm_runtime_suspended() to check the device status?
>>>>>>
>>>>>> The problem is, a device can be in runtime suspended state while still
>>>>>> needs to be polled...
>>>>>
>>>>> Well, maybe this is the problem, then?  Why does it need to be polled when
>>>>> suspended?
>>>>
>>>> For ODDs, poll is not necessary only when ZP capable ODD is powered off.
>>>> For other ODDs, poll still needs to go on.
>>>>
>>>> ZP capable ODDs:
>>>>   - runtime suspended, power remained(due to NO_POWER_OFF qos flag)
>>>>     poll is needed
>>>>   -- runtime suspended, power removed
>>>>     poll is not needed
>>>>
>>>> Non ZP capable ODDs:
>>>>   -- runtime suspended, power remained (power will never be removed)
>>>>     poll is needed
>>>>
>>>> If we do not poll for the powered on case, we will lose media change
>>>> event.
>>>
>>> But the media change event should change the status from suspended to active,
>>> shouldn't it?
>>
>> The media change event is derived from the poll, if we stop the poll, how
>> can we know the event in the first place?
> 
> OK, so what you're trying to say is that if the device is not turned off
> and the user opens the tray and inserts a media in there, we won't know that
> that happened without polling.  Is that correct?
Yes.
> 
> If so, can you please remind me why we want to pretend that the device is
> suspended if we want to poll it?
We do not pretend the device is suspended, it is :-)
Even though we want to poll it, we are not polling it all the time, we
still have the poll interval, where the device and its ancestor devices
can enter runtime suspended state.
The timing to idle the device is decided by SCSI sr driver, and why it
is done this way is discussed here:
http://thread.gmane.org/gmane.linux.acpi.devel/55243/focus=52703
Thanks,
Aaron
> 
> Rafael
> 
> 
^ permalink raw reply	[flat|nested] 85+ messages in thread 
 
 
- * Re: [PATCH v9 06/10] ata: zpodd: check zero power ready status
  2012-11-26  1:11                         ` Rafael J. Wysocki
  2012-11-26  1:09                           ` Aaron Lu
@ 2012-11-26  1:17                           ` Aaron Lu
  1 sibling, 0 replies; 85+ messages in thread
From: Aaron Lu @ 2012-11-26  1:17 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Tejun Heo, Alan Stern, Jeff Garzik, James Bottomley, Jeff Wu,
	linux-ide, linux-pm, linux-scsi, linux-acpi
On 11/26/2012 09:11 AM, Rafael J. Wysocki wrote:
> On Monday, November 26, 2012 09:05:58 AM Aaron Lu wrote:
>> On 11/26/2012 09:03 AM, Rafael J. Wysocki wrote:
>>> On Monday, November 26, 2012 08:48:51 AM Aaron Lu wrote:
>>>> On 11/26/2012 08:50 AM, Rafael J. Wysocki wrote:
>>>>> On Tuesday, November 20, 2012 04:59:57 PM Aaron Lu wrote:
>>>>>> On 11/20/2012 02:00 PM, Aaron Lu wrote:
>>>>>>> On 11/19/2012 10:56 PM, Tejun Heo wrote:
>>>>>>>> I really think we need a way for (auto)pm and event polling to talk to
>>>>>>>> each other so that autopm can tell event poll to sod off while pm is
>>>>>>>> in effect.  Trying to solve this from inside libata doesn't seem
>>>>>>>> right.  The problem, again, seems to be figuring out which hardware
>>>>>>>> device maps to which block device.  Hmmm... Any good ideas?
>>>>>>>
>>>>>>> A possible way of doing this is using pm qos.
>>>>>>>
>>>>>>> We currently have 2 pm qos flags, NO_POWER_OFF and REMOTE_WAKEUP, and we
>>>>>>> can add another one: NO_POLL, use it like the following:
>>>>>>> 1 Set the NO_POLL pm qos flag when the underlying driver thinks it is no
>>>>>>>   longer necessary. In the ZPODD's case, it should be set when the
>>>>>>>   device is to be powered off;
>>>>>>> 2 Clear it when poll is necessary again. In the ZPODD's case, when power
>>>>>>>   is re-gained, this flag will be cleared.
>>>>>>
>>>>>>
>>>>>>> 3 In the disk_events_workfn, check if this flag is set, if so, simply
>>>>>>>   return.
>>>>>>
>>>>>> It should be, skip calling disk->fops->check_events, but still queue the
>>>>>> work for next time's poll.
>>>>>>
>>>>>> -Aaron
>>>>>>
>>>>>>>
>>>>>>> The disk->driverfs_dev can be used to host the pm qos flag, ATA layer
>>>>>>> can access it through ata_device->sdev->sdev_gendev.
>>>>>>>
>>>>>>> Is this OK?
>>>>>
>>>>> No, I don't think so.  PM QoS is about telling the layer that will put the
>>>>> device into low-power states what states are to be taken into consideration.
>>>>> In this case, however, we need to tell someone else that the device has been
>>>>> turned off.  Clearly, we need a way to do that, but not through PM QoS.
>>>>>
>>>>> Did you consider using pm_runtime_suspended() to check the device status?
>>>>
>>>> The problem is, a device can be in runtime suspended state while still
>>>> needs to be polled...
>>>
>>> Well, maybe this is the problem, then?  Why does it need to be polled when
>>> suspended?
>>
>> For ODDs, poll is not necessary only when ZP capable ODD is powered off.
>> For other ODDs, poll still needs to go on.
>>
>> ZP capable ODDs:
>>   - runtime suspended, power remained(due to NO_POWER_OFF qos flag)
>>     poll is needed
>>   -- runtime suspended, power removed
>>     poll is not needed
>>
>> Non ZP capable ODDs:
>>   -- runtime suspended, power remained (power will never be removed)
>>     poll is needed
>>
>> If we do not poll for the powered on case, we will lose media change
>> event.
> 
> But the media change event should change the status from suspended to active,
> shouldn't it?
We need a way PM code can tell block layer that it is not necessary to
poll the device now, and runtime suspended is not enough, so we need
another way.
Thanks,
Aaron
> 
> Rafael
> 
> 
^ permalink raw reply	[flat|nested] 85+ messages in thread 
 
 
 
 
 
 
 
 
 
 
 
 
 
- * [PATCH v9 07/10] block: add a new interface to block events
  2012-11-09  6:51 [PATCH v9 00/10] ZPODD Patches Aaron Lu
                   ` (5 preceding siblings ...)
  2012-11-09  6:51 ` [PATCH v9 06/10] ata: zpodd: check zero power ready status Aaron Lu
@ 2012-11-09  6:51 ` Aaron Lu
  2012-11-12 19:14   ` Tejun Heo
  2012-11-09  6:52 ` [PATCH v9 08/10] scsi: sr: support (un)block events Aaron Lu
                   ` (2 subsequent siblings)
  9 siblings, 1 reply; 85+ messages in thread
From: Aaron Lu @ 2012-11-09  6:51 UTC (permalink / raw)
  To: Jeff Garzik, James Bottomley, Rafael J. Wysocki, Alan Stern,
	Tejun Heo
  Cc: Jeff Wu, Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi,
	Aaron Lu
A new interface to block disk events is added, this interface is
meant to eliminate a race between PM runtime callback and disk events
checking.
Suppose the following device tree:
device_sata_port  (the parent)
  device_ODD      (the child)
When ODD is runtime suspended, sata port will have a chance to enter
runtime suspended state. And in sata port's runtime suspend callback,
it will check if it is OK to omit the power of the ODD. And if yes, the
periodically running events checking work will be stopped, as the ODD
will be waken up by that check and cancel it can make the ODD stay in
zero power state much longer(no worry about how the ODD gets media
change event in ZPODD's case).
I used disk_block_events to do the events blocking, but there is a race
and can lead to a deadlock: when I call disk_block_events in sata port's
runtime suspend callback, the events checking work may already be running
and it will resume the ODD synchronously, and PM core will try to resume
its parent, the sata port first. The PM core makes sure that runtime
resume callback does not run concurrently with runtime suspend callback,
and if the runtime suspend callback is running, the runtime resume
callback will wait for it done. So the events checking work will wait
for sata port's runtime suspend callback, while the sata port's runtime
suspend callback is waiting for the disk events work to finish. Deadlock.
ODD_suspend                        disk_events_workfn
  ata_port_suspend                   check_events
    disk_block_events                  resume ODD
      cancel_delayed_work_sync           resume parent
      (waiting for disk_events_workfn)   (waiting for suspend callback)
So a new function disk_try_block_events is added, it will try to
cancel the delayed work if it is pending. If succeed, disk_block_events
will be called and we are done; if failed, false is returned without
doing anything. In this way, the race can be avoided.
The newly added interface and the disk_unblock_events are exported, as
sr driver will need to use them to block/unblock disk events.
Signed-off-by: Aaron Lu <aaron.lu@intel.com>
---
 block/genhd.c         | 26 ++++++++++++++++++++++++++
 include/linux/genhd.h |  1 +
 2 files changed, 27 insertions(+)
diff --git a/block/genhd.c b/block/genhd.c
index 6cace66..8632fd3 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1469,6 +1469,31 @@ void disk_block_events(struct gendisk *disk)
 	mutex_unlock(&ev->block_mutex);
 }
 
+/*
+ * Under some circumstances, there is a race between the calling thread
+ * of disk_block_events and the events checking function. To avoid such a race,
+ * this function will check if the delayed work is pending. If not, it means
+ * the work is either not queued or is already running, false is returned.
+ * And if yes, try to cancel the delayed work. If succedded, disk_block_events
+ * will be called and there is no worry that cancel_delayed_work_sync will
+ * deadlock the events checking function. And if failed, false is returned.
+ */
+bool disk_try_block_events(struct gendisk *disk)
+{
+	struct disk_events *ev = disk->ev;
+
+	if (!ev)
+		return false;
+
+	if (cancel_delayed_work(&disk->ev->dwork)) {
+		disk_block_events(disk);
+		return true;
+	}
+
+	return false;
+}
+EXPORT_SYMBOL(disk_try_block_events);
+
 static void __disk_unblock_events(struct gendisk *disk, bool check_now)
 {
 	struct disk_events *ev = disk->ev;
@@ -1512,6 +1537,7 @@ void disk_unblock_events(struct gendisk *disk)
 	if (disk->ev)
 		__disk_unblock_events(disk, false);
 }
+EXPORT_SYMBOL(disk_unblock_events);
 
 /**
  * disk_flush_events - schedule immediate event checking and flushing
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 4f440b3..b67247f 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -420,6 +420,7 @@ static inline int get_disk_ro(struct gendisk *disk)
 }
 
 extern void disk_block_events(struct gendisk *disk);
+extern bool disk_try_block_events(struct gendisk *disk);
 extern void disk_unblock_events(struct gendisk *disk);
 extern void disk_flush_events(struct gendisk *disk, unsigned int mask);
 extern unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask);
-- 
1.7.12.4
^ permalink raw reply related	[flat|nested] 85+ messages in thread
- * Re: [PATCH v9 07/10] block: add a new interface to block events
  2012-11-09  6:51 ` [PATCH v9 07/10] block: add a new interface to block events Aaron Lu
@ 2012-11-12 19:14   ` Tejun Heo
  2012-11-12 19:18     ` Alan Stern
  0 siblings, 1 reply; 85+ messages in thread
From: Tejun Heo @ 2012-11-12 19:14 UTC (permalink / raw)
  To: Aaron Lu
  Cc: Jeff Garzik, James Bottomley, Rafael J. Wysocki, Alan Stern,
	Jeff Wu, Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi
On Fri, Nov 09, 2012 at 02:51:59PM +0800, Aaron Lu wrote:
> A new interface to block disk events is added, this interface is
> meant to eliminate a race between PM runtime callback and disk events
> checking.
> 
> Suppose the following device tree:
> device_sata_port  (the parent)
>   device_ODD      (the child)
Weren't you gonna do something different about this?  I mean, if sr
knows that autopm kicked in, it can simply tell the block layer that
nothing is going on.  Wouldn't that be simpler?
Thanks.
-- 
tejun
^ permalink raw reply	[flat|nested] 85+ messages in thread 
- * Re: [PATCH v9 07/10] block: add a new interface to block events
  2012-11-12 19:14   ` Tejun Heo
@ 2012-11-12 19:18     ` Alan Stern
  2012-11-12 19:21       ` Tejun Heo
  0 siblings, 1 reply; 85+ messages in thread
From: Alan Stern @ 2012-11-12 19:18 UTC (permalink / raw)
  To: Tejun Heo
  Cc: Aaron Lu, Jeff Garzik, James Bottomley, Rafael J. Wysocki,
	Jeff Wu, Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi
On Mon, 12 Nov 2012, Tejun Heo wrote:
> On Fri, Nov 09, 2012 at 02:51:59PM +0800, Aaron Lu wrote:
> > A new interface to block disk events is added, this interface is
> > meant to eliminate a race between PM runtime callback and disk events
> > checking.
> > 
> > Suppose the following device tree:
> > device_sata_port  (the parent)
> >   device_ODD      (the child)
> 
> Weren't you gonna do something different about this?  I mean, if sr
> knows that autopm kicked in, it can simply tell the block layer that
> nothing is going on.  Wouldn't that be simpler?
It wouldn't work for non-ZP drives.  They do need to be polled, even 
when suspended.
Alan Stern
^ permalink raw reply	[flat|nested] 85+ messages in thread 
- * Re: [PATCH v9 07/10] block: add a new interface to block events
  2012-11-12 19:18     ` Alan Stern
@ 2012-11-12 19:21       ` Tejun Heo
  2012-11-12 19:34         ` Alan Stern
  0 siblings, 1 reply; 85+ messages in thread
From: Tejun Heo @ 2012-11-12 19:21 UTC (permalink / raw)
  To: Alan Stern
  Cc: Aaron Lu, Jeff Garzik, James Bottomley, Rafael J. Wysocki,
	Jeff Wu, Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi
Hello, Alan.
On Mon, Nov 12, 2012 at 02:18:11PM -0500, Alan Stern wrote:
> > Weren't you gonna do something different about this?  I mean, if sr
> > knows that autopm kicked in, it can simply tell the block layer that
> > nothing is going on.  Wouldn't that be simpler?
> 
> It wouldn't work for non-ZP drives.  They do need to be polled, even 
> when suspended.
Hmmm... a bit confused, what would autopm do for those non-ZP devices?
Would it make any difference?
Thanks.
-- 
tejun
^ permalink raw reply	[flat|nested] 85+ messages in thread 
- * Re: [PATCH v9 07/10] block: add a new interface to block events
  2012-11-12 19:21       ` Tejun Heo
@ 2012-11-12 19:34         ` Alan Stern
  2012-11-18 15:05           ` Tejun Heo
  0 siblings, 1 reply; 85+ messages in thread
From: Alan Stern @ 2012-11-12 19:34 UTC (permalink / raw)
  To: Tejun Heo
  Cc: Aaron Lu, Jeff Garzik, James Bottomley, Rafael J. Wysocki,
	Jeff Wu, Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi
On Mon, 12 Nov 2012, Tejun Heo wrote:
> Hello, Alan.
> 
> On Mon, Nov 12, 2012 at 02:18:11PM -0500, Alan Stern wrote:
> > > Weren't you gonna do something different about this?  I mean, if sr
> > > knows that autopm kicked in, it can simply tell the block layer that
> > > nothing is going on.  Wouldn't that be simpler?
> > 
> > It wouldn't work for non-ZP drives.  They do need to be polled, even 
> > when suspended.
> 
> Hmmm... a bit confused, what would autopm do for those non-ZP devices?
> Would it make any difference?
It wouldn't do anything for the device, but it would allow the device's
parent to go to low power in between polls.  That's why Aaron at one
point suggested lengthening the polling interval.
Alan Stern
^ permalink raw reply	[flat|nested] 85+ messages in thread 
- * Re: [PATCH v9 07/10] block: add a new interface to block events
  2012-11-12 19:34         ` Alan Stern
@ 2012-11-18 15:05           ` Tejun Heo
  2012-11-18 17:41             ` Alan Stern
  0 siblings, 1 reply; 85+ messages in thread
From: Tejun Heo @ 2012-11-18 15:05 UTC (permalink / raw)
  To: Alan Stern
  Cc: Aaron Lu, Jeff Garzik, James Bottomley, Rafael J. Wysocki,
	Jeff Wu, Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi
Hello, Alan.
On Mon, Nov 12, 2012 at 02:34:01PM -0500, Alan Stern wrote:
> It wouldn't do anything for the device, but it would allow the device's
> parent to go to low power in between polls.  That's why Aaron at one
> point suggested lengthening the polling interval.
Ah, okay.  Entering and leaving suspend even every two seconds may
save noticeable power for some controllers.  Does that, tho, mean that
the port would get reset every two seconds?  If so (I can't see how
else it would behave), it may not really work.  I mean, you would be
spending about ten seconds reinitiaizing everything and then enter
sleep and do it all over again two seconds later, most likely wasting
power in the process.  Sounds weird to me.  What am I missing?
Thanks.
-- 
tejun
^ permalink raw reply	[flat|nested] 85+ messages in thread 
- * Re: [PATCH v9 07/10] block: add a new interface to block events
  2012-11-18 15:05           ` Tejun Heo
@ 2012-11-18 17:41             ` Alan Stern
  2012-11-18 21:56               ` Tejun Heo
  0 siblings, 1 reply; 85+ messages in thread
From: Alan Stern @ 2012-11-18 17:41 UTC (permalink / raw)
  To: Tejun Heo
  Cc: Aaron Lu, Jeff Garzik, James Bottomley, Rafael J. Wysocki,
	Jeff Wu, Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi
On Sun, 18 Nov 2012, Tejun Heo wrote:
> Hello, Alan.
Hi.
> On Mon, Nov 12, 2012 at 02:34:01PM -0500, Alan Stern wrote:
> > It wouldn't do anything for the device, but it would allow the device's
> > parent to go to low power in between polls.  That's why Aaron at one
> > point suggested lengthening the polling interval.
> 
> Ah, okay.  Entering and leaving suspend even every two seconds may
> save noticeable power for some controllers.  Does that, tho, mean that
> the port would get reset every two seconds?
I don't know how suspend works for (S)ATA ports.  For USB mass storage, 
suspend/resume does not involve any resets unless something goes wrong.
>  If so (I can't see how
> else it would behave), it may not really work.  I mean, you would be
> spending about ten seconds reinitiaizing everything and then enter
> sleep and do it all over again two seconds later, most likely wasting
> power in the process.  Sounds weird to me.  What am I missing?
Does it really take 10 seconds to recover from an ATA suspend?  That 
sounds awfully long.
The two second value is merely the default interval for media polling.  
Devices with no async notification mechanism require some sort of
polling if anybody wants to know when media is inserted or removed,
and the polling can't be carried out if the device's port is suspended.
You asked about autopm.  "Autopm" is a rather vague and ugly term I
made up; it means that the kernel automatically suspends the device
after some specified idle time.  In the case of optical drives, "idle"  
currently means the device file is not open (which implies the drive
doesn't contain a mounted filesystem).  As far as I know, the kernel
has no direct way to affect the power level of an optical drive that
isn't ZP.  For these devices "suspended" is merely a logical state,
meaning that the device isn't in use and therefore its parent can be
put into a low-power mode.
Regardless, an optical drive can't remain suspended when polling
occurs.  So unless there's some other mechanism for getting the
information about media changes, suspended drives (and their parents)  
will have to be resumed periodically for polling.
Hence there's a tradeoff.  How can we use the minimum amount of energy
while still polling the drive acceptably often?  In general, the kernel
doesn't know.  That's why these things can be controlled from
userspace.  And the answer may be different for ATA drives vs.  
USB-connected drives.
Does this answer your questions?
Alan Stern
^ permalink raw reply	[flat|nested] 85+ messages in thread 
- * Re: [PATCH v9 07/10] block: add a new interface to block events
  2012-11-18 17:41             ` Alan Stern
@ 2012-11-18 21:56               ` Tejun Heo
  2012-11-18 21:58                 ` Tejun Heo
  2012-11-18 23:28                 ` Alan Stern
  0 siblings, 2 replies; 85+ messages in thread
From: Tejun Heo @ 2012-11-18 21:56 UTC (permalink / raw)
  To: Alan Stern
  Cc: Aaron Lu, Jeff Garzik, James Bottomley, Rafael J. Wysocki,
	Jeff Wu, Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi
Hello, Alan.
On Sun, Nov 18, 2012 at 12:41:06PM -0500, Alan Stern wrote:
> >  If so (I can't see how
> > else it would behave), it may not really work.  I mean, you would be
> > spending about ten seconds reinitiaizing everything and then enter
> > sleep and do it all over again two seconds later, most likely wasting
> > power in the process.  Sounds weird to me.  What am I missing?
> 
> Does it really take 10 seconds to recover from an ATA suspend?  That 
> sounds awfully long.
If it's using the same ->suspend ops as the regular system suspend,
10secs isn't a crazy number.  If the controller goes offline, the PHY
would go offline too and the only way to come back from that is
performing full probing sequence.  From SATA procotol POV, it isn't
too bad.  It's just link initiazliation followed by IDENTIFY and some
config commands.
The problem is that SATA devices aren't really designed to be used
like that.  If you reset a ODD w/ a media in it, it'll probably spin
it up and try to re-establish media state during probe sequence.  It
isn't designed that way and has never been used like that.  SATA has
its own dynamic link power management (DIPM/HIPM) tho.
So, this whole autopm thing doesn't sound like a good idea to me.
> The two second value is merely the default interval for media polling.  
> Devices with no async notification mechanism require some sort of
> polling if anybody wants to know when media is inserted or removed,
> and the polling can't be carried out if the device's port is suspended.
Yeah, I know.  I wrote that thing. :)
> You asked about autopm.  "Autopm" is a rather vague and ugly term I
> made up; it means that the kernel automatically suspends the device
> after some specified idle time.  In the case of optical drives, "idle"  
> currently means the device file is not open (which implies the drive
> doesn't contain a mounted filesystem).  As far as I know, the kernel
> has no direct way to affect the power level of an optical drive that
> isn't ZP.  For these devices "suspended" is merely a logical state,
> meaning that the device isn't in use and therefore its parent can be
> put into a low-power mode.
>
> Regardless, an optical drive can't remain suspended when polling
> occurs.  So unless there's some other mechanism for getting the
> information about media changes, suspended drives (and their parents)  
> will have to be resumed periodically for polling.
> 
> Hence there's a tradeoff.  How can we use the minimum amount of energy
> while still polling the drive acceptably often?  In general, the kernel
> doesn't know.  That's why these things can be controlled from
> userspace.  And the answer may be different for ATA drives vs.  
> USB-connected drives.
> 
> Does this answer your questions?
I think the only reason autopm doesn't blow up for SATA devices is
that userland usually automatically mounts them thus effectively
disabling autopm.  I *think* the only sane thing we can do is doing
autopm iff zpodd is available && no media.
Thanks.
-- 
tejun
^ permalink raw reply	[flat|nested] 85+ messages in thread 
- * Re: [PATCH v9 07/10] block: add a new interface to block events
  2012-11-18 21:56               ` Tejun Heo
@ 2012-11-18 21:58                 ` Tejun Heo
  2012-11-18 23:28                 ` Alan Stern
  1 sibling, 0 replies; 85+ messages in thread
From: Tejun Heo @ 2012-11-18 21:58 UTC (permalink / raw)
  To: Alan Stern
  Cc: Aaron Lu, Jeff Garzik, James Bottomley, Rafael J. Wysocki,
	Jeff Wu, Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi
On Sun, Nov 18, 2012 at 01:56:57PM -0800, Tejun Heo wrote:
> I think the only reason autopm doesn't blow up for SATA devices is
> that userland usually automatically mounts them thus effectively
> disabling autopm.  I *think* the only sane thing we can do is doing
> autopm iff zpodd is available && no media.
That is, at least for devices which get polled.  If devices are to
leave unused for some time, suspending is fine but auto-suspending
inbetween event polling wouldn't work.
Thanks.
-- 
tejun
^ permalink raw reply	[flat|nested] 85+ messages in thread 
- * Re: [PATCH v9 07/10] block: add a new interface to block events
  2012-11-18 21:56               ` Tejun Heo
  2012-11-18 21:58                 ` Tejun Heo
@ 2012-11-18 23:28                 ` Alan Stern
  2012-11-18 23:35                   ` Tejun Heo
  1 sibling, 1 reply; 85+ messages in thread
From: Alan Stern @ 2012-11-18 23:28 UTC (permalink / raw)
  To: Tejun Heo
  Cc: Aaron Lu, Jeff Garzik, James Bottomley, Rafael J. Wysocki,
	Jeff Wu, Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi
On Sun, 18 Nov 2012, Tejun Heo wrote:
> > Does it really take 10 seconds to recover from an ATA suspend?  That 
> > sounds awfully long.
> 
> If it's using the same ->suspend ops as the regular system suspend,
> 10secs isn't a crazy number.  If the controller goes offline, the PHY
> would go offline too and the only way to come back from that is
> performing full probing sequence.  From SATA procotol POV, it isn't
> too bad.  It's just link initiazliation followed by IDENTIFY and some
> config commands.
> 
> The problem is that SATA devices aren't really designed to be used
> like that.  If you reset a ODD w/ a media in it, it'll probably spin
> it up and try to re-establish media state during probe sequence.  It
> isn't designed that way and has never been used like that.  SATA has
> its own dynamic link power management (DIPM/HIPM) tho.
Is it possible to use those to implement runtime suspend?  Or are they 
handled autonomously by the hardware?
> So, this whole autopm thing doesn't sound like a good idea to me.
No doubt it's better suited to some devices than to others.
> > Hence there's a tradeoff.  How can we use the minimum amount of energy
> > while still polling the drive acceptably often?  In general, the kernel
> > doesn't know.  That's why these things can be controlled from
> > userspace.  And the answer may be different for ATA drives vs.  
> > USB-connected drives.
> > 
> > Does this answer your questions?
> 
> I think the only reason autopm doesn't blow up for SATA devices is
> that userland usually automatically mounts them thus effectively
> disabling autopm.
Either that or else because it hasn't been fully implemented yet.  :-)
>  I *think* the only sane thing we can do is doing
> autopm iff zpodd is available && no media.
That may be true for SATA.  For USB optical drives, it does make sense
to power-down the host controller when the drive isn't in use.  USB
suspend/resume takes on the order of 50-100 ms or so.
Alan Stern
^ permalink raw reply	[flat|nested] 85+ messages in thread 
- * Re: [PATCH v9 07/10] block: add a new interface to block events
  2012-11-18 23:28                 ` Alan Stern
@ 2012-11-18 23:35                   ` Tejun Heo
  2012-11-19  2:07                     ` Alan Stern
  0 siblings, 1 reply; 85+ messages in thread
From: Tejun Heo @ 2012-11-18 23:35 UTC (permalink / raw)
  To: Alan Stern
  Cc: Aaron Lu, Jeff Garzik, James Bottomley, Rafael J. Wysocki,
	Jeff Wu, Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi
Hey, Alan.
On Sun, Nov 18, 2012 at 06:28:24PM -0500, Alan Stern wrote:
> > The problem is that SATA devices aren't really designed to be used
> > like that.  If you reset a ODD w/ a media in it, it'll probably spin
> > it up and try to re-establish media state during probe sequence.  It
> > isn't designed that way and has never been used like that.  SATA has
> > its own dynamic link power management (DIPM/HIPM) tho.
> 
> Is it possible to use those to implement runtime suspend?  Or are they 
> handled autonomously by the hardware?
It's controlled by /sys/class/scsi_host/hostX/link_power_management.
Once enabled, the power saving is completedly handled by hardware.  If
link stays idle longer than certain duration, the hardware initiates
low power state and leaves it when something needs to happen.
> > So, this whole autopm thing doesn't sound like a good idea to me.
> 
> No doubt it's better suited to some devices than to others.
Yeah, SATA seems to need a different approach than USB.
> > I think the only reason autopm doesn't blow up for SATA devices is
> > that userland usually automatically mounts them thus effectively
> > disabling autopm.
> 
> Either that or else because it hasn't been fully implemented yet.  :-)
Ah.. that makes sense. :)
> >  I *think* the only sane thing we can do is doing
> > autopm iff zpodd is available && no media.
> 
> That may be true for SATA.  For USB optical drives, it does make sense
> to power-down the host controller when the drive isn't in use.  USB
> suspend/resume takes on the order of 50-100 ms or so.
I see.  For SATA too, the controller / link bring up doesn't take too
long.  The crappy part is what the attached, especially ATAPI, devices
would do after such events as those events will be visible to them
basically as random link resets.
So, at least for SATA, I think what autopm can do is...
* Trigger zpodd if possible.
* Trigger suspend iff polling isn't happening on the device.
Thanks.
-- 
tejun
^ permalink raw reply	[flat|nested] 85+ messages in thread 
- * Re: [PATCH v9 07/10] block: add a new interface to block events
  2012-11-18 23:35                   ` Tejun Heo
@ 2012-11-19  2:07                     ` Alan Stern
  2012-11-19  3:21                       ` Aaron Lu
  2012-11-19 14:50                       ` Tejun Heo
  0 siblings, 2 replies; 85+ messages in thread
From: Alan Stern @ 2012-11-19  2:07 UTC (permalink / raw)
  To: Tejun Heo
  Cc: Aaron Lu, Jeff Garzik, James Bottomley, Rafael J. Wysocki,
	Jeff Wu, Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi
On Sun, 18 Nov 2012, Tejun Heo wrote:
> Hey, Alan.
Hey...
> It's controlled by /sys/class/scsi_host/hostX/link_power_management.
> Once enabled, the power saving is completedly handled by hardware.  If
> link stays idle longer than certain duration, the hardware initiates
> low power state and leaves it when something needs to happen.
> 
> > > So, this whole autopm thing doesn't sound like a good idea to me.
> > 
> > No doubt it's better suited to some devices than to others.
> 
> Yeah, SATA seems to need a different approach than USB.
Based on your description, I agree.
> > That may be true for SATA.  For USB optical drives, it does make sense
> > to power-down the host controller when the drive isn't in use.  USB
> > suspend/resume takes on the order of 50-100 ms or so.
> 
> I see.  For SATA too, the controller / link bring up doesn't take too
> long.  The crappy part is what the attached, especially ATAPI, devices
> would do after such events as those events will be visible to them
> basically as random link resets.
> 
> So, at least for SATA, I think what autopm can do is...
> 
> * Trigger zpodd if possible.
> 
> * Trigger suspend iff polling isn't happening on the device.
That sums it up nicely.  Of course, the PM core is unaware of details
such as media polling.  What we can do is have the SATA runtime-idle
method return -EBUSY if the device isn't a ZPODD and if polling is
enabled.  Unfortunately I don't think there's any way currently to
trigger autopm when the user turns off polling.  Maybe something could
be added to the appropriate sysfs handler.
Alan Stern
^ permalink raw reply	[flat|nested] 85+ messages in thread 
- * Re: [PATCH v9 07/10] block: add a new interface to block events
  2012-11-19  2:07                     ` Alan Stern
@ 2012-11-19  3:21                       ` Aaron Lu
  2012-11-19 14:50                       ` Tejun Heo
  1 sibling, 0 replies; 85+ messages in thread
From: Aaron Lu @ 2012-11-19  3:21 UTC (permalink / raw)
  To: Alan Stern, Tejun Heo
  Cc: Jeff Garzik, James Bottomley, Rafael J. Wysocki, Jeff Wu,
	Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi
On 11/19/2012 10:07 AM, Alan Stern wrote:
> On Sun, 18 Nov 2012, Tejun Heo wrote:
> 
>> Hey, Alan.
> 
> Hey...
> 
>> It's controlled by /sys/class/scsi_host/hostX/link_power_management.
>> Once enabled, the power saving is completedly handled by hardware.  If
>> link stays idle longer than certain duration, the hardware initiates
>> low power state and leaves it when something needs to happen.
>>
>>>> So, this whole autopm thing doesn't sound like a good idea to me.
>>>
>>> No doubt it's better suited to some devices than to others.
>>
>> Yeah, SATA seems to need a different approach than USB.
> 
> Based on your description, I agree.
> 
>>> That may be true for SATA.  For USB optical drives, it does make sense
>>> to power-down the host controller when the drive isn't in use.  USB
>>> suspend/resume takes on the order of 50-100 ms or so.
>>
>> I see.  For SATA too, the controller / link bring up doesn't take too
>> long.  The crappy part is what the attached, especially ATAPI, devices
>> would do after such events as those events will be visible to them
>> basically as random link resets.
>>
>> So, at least for SATA, I think what autopm can do is...
>>
>> * Trigger zpodd if possible.
>>
>> * Trigger suspend iff polling isn't happening on the device.
> 
> That sums it up nicely.  Of course, the PM core is unaware of details
> such as media polling.  What we can do is have the SATA runtime-idle
> method return -EBUSY if the device isn't a ZPODD and if polling is
> enabled.  Unfortunately I don't think there's any way currently to
> trigger autopm when the user turns off polling.  Maybe something could
> be added to the appropriate sysfs handler.
OK, thanks for your(both of you) suggestions.
But probably for ZPODD devices first, as it has a clear use case and
should already have productions. For async notification capable ODDs,
I'll leave it for someone else or maybe at a later time if I can get
such a device to test on.
To conclude, the ata port's runtime idle callback will return 0 when:
1 It attached a ZPODD capable ODD;
2 Or it attached a hard disk.
For all other cases, it will return -EBUSY.
Does this look correct?
Thanks,
Aaron
^ permalink raw reply	[flat|nested] 85+ messages in thread 
- * Re: [PATCH v9 07/10] block: add a new interface to block events
  2012-11-19  2:07                     ` Alan Stern
  2012-11-19  3:21                       ` Aaron Lu
@ 2012-11-19 14:50                       ` Tejun Heo
  1 sibling, 0 replies; 85+ messages in thread
From: Tejun Heo @ 2012-11-19 14:50 UTC (permalink / raw)
  To: Alan Stern
  Cc: Aaron Lu, Jeff Garzik, James Bottomley, Rafael J. Wysocki,
	Jeff Wu, Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi
On Sun, Nov 18, 2012 at 09:07:22PM -0500, Alan Stern wrote:
> Hey...
Hey, again. :P
> > So, at least for SATA, I think what autopm can do is...
> > 
> > * Trigger zpodd if possible.
> > 
> > * Trigger suspend iff polling isn't happening on the device.
> 
> That sums it up nicely.  Of course, the PM core is unaware of details
> such as media polling.  What we can do is have the SATA runtime-idle
> method return -EBUSY if the device isn't a ZPODD and if polling is
> enabled.  Unfortunately I don't think there's any way currently to
> trigger autopm when the user turns off polling.  Maybe something could
> be added to the appropriate sysfs handler.
Given that genhd event polling and PM are likely to interact with each
other, I think it would be a good idea to implement a proper way for
the two to communicate.  Dunno what it should be tho.  The tricky part
would be mapping the block device to the underlying hardware one.
Thanks.
-- 
tejun
^ permalink raw reply	[flat|nested] 85+ messages in thread 
 
 
 
 
 
 
 
 
 
 
 
- * [PATCH v9 08/10] scsi: sr: support (un)block events
  2012-11-09  6:51 [PATCH v9 00/10] ZPODD Patches Aaron Lu
                   ` (6 preceding siblings ...)
  2012-11-09  6:51 ` [PATCH v9 07/10] block: add a new interface to block events Aaron Lu
@ 2012-11-09  6:52 ` Aaron Lu
  2012-11-09  6:52 ` [PATCH v9 09/10] ata: zpodd: handle power transition of ODD Aaron Lu
  2012-11-09  6:52 ` [PATCH v9 10/10] ata: expose pm qos flags to user space for ata device Aaron Lu
  9 siblings, 0 replies; 85+ messages in thread
From: Aaron Lu @ 2012-11-09  6:52 UTC (permalink / raw)
  To: Jeff Garzik, James Bottomley, Rafael J. Wysocki, Alan Stern,
	Tejun Heo
  Cc: Jeff Wu, Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi,
	Aaron Lu
2 interfaces are added to block/unblock events for the disk sr manages.
This is used by SATA ZPODD, when ODD is runtime powered off, the events
poll is no longer needed so better be blocked. And once powered on,
events poll will be unblocked.
These 2 interfaces are needed here because SATA layer does not have
access to the gendisk structure sr manages.
Signed-off-by: Aaron Lu <aaron.lu@intel.com>
---
 drivers/scsi/Makefile   |  1 +
 drivers/scsi/sr_zpodd.c | 21 +++++++++++++++++++++
 drivers/scsi/sr_zpodd.h |  9 +++++++++
 3 files changed, 31 insertions(+)
 create mode 100644 drivers/scsi/sr_zpodd.c
 create mode 100644 drivers/scsi/sr_zpodd.h
diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile
index 888f73a..474efe2 100644
--- a/drivers/scsi/Makefile
+++ b/drivers/scsi/Makefile
@@ -177,6 +177,7 @@ sd_mod-objs	:= sd.o
 sd_mod-$(CONFIG_BLK_DEV_INTEGRITY) += sd_dif.o
 
 sr_mod-objs	:= sr.o sr_ioctl.o sr_vendor.o
+sr_mod-$(CONFIG_SATA_ZPODD) += sr_zpodd.o
 ncr53c8xx-flags-$(CONFIG_SCSI_ZALON) \
 		:= -DCONFIG_NCR53C8XX_PREFETCH -DSCSI_NCR_BIG_ENDIAN \
 			-DCONFIG_SCSI_NCR53C8XX_NO_WORD_TRANSFERS
diff --git a/drivers/scsi/sr_zpodd.c b/drivers/scsi/sr_zpodd.c
new file mode 100644
index 0000000..0686e8c
--- /dev/null
+++ b/drivers/scsi/sr_zpodd.c
@@ -0,0 +1,21 @@
+#include <linux/module.h>
+#include <linux/genhd.h>
+#include <linux/cdrom.h>
+#include "sr.h"
+
+bool sr_block_events(struct device *dev)
+{
+	struct scsi_cd *cd = dev_get_drvdata(dev);
+	if (cd)
+		return disk_try_block_events(cd->disk);
+	return false;
+}
+EXPORT_SYMBOL(sr_block_events);
+
+void sr_unblock_events(struct device *dev)
+{
+	struct scsi_cd *cd = dev_get_drvdata(dev);
+	if (cd)
+		disk_unblock_events(cd->disk);
+}
+EXPORT_SYMBOL(sr_unblock_events);
diff --git a/drivers/scsi/sr_zpodd.h b/drivers/scsi/sr_zpodd.h
new file mode 100644
index 0000000..49c6a55
--- /dev/null
+++ b/drivers/scsi/sr_zpodd.h
@@ -0,0 +1,9 @@
+#ifndef __SR_ZPODD_H__
+#define __SR_ZPODD_H__
+
+#include <linux/device.h>
+
+extern bool sr_block_events(struct device *dev);
+extern void sr_unblock_events(struct device *dev);
+
+#endif
-- 
1.7.12.4
^ permalink raw reply related	[flat|nested] 85+ messages in thread
- * [PATCH v9 09/10] ata: zpodd: handle power transition of ODD
  2012-11-09  6:51 [PATCH v9 00/10] ZPODD Patches Aaron Lu
                   ` (7 preceding siblings ...)
  2012-11-09  6:52 ` [PATCH v9 08/10] scsi: sr: support (un)block events Aaron Lu
@ 2012-11-09  6:52 ` Aaron Lu
  2012-11-09  6:52 ` [PATCH v9 10/10] ata: expose pm qos flags to user space for ata device Aaron Lu
  9 siblings, 0 replies; 85+ messages in thread
From: Aaron Lu @ 2012-11-09  6:52 UTC (permalink / raw)
  To: Jeff Garzik, James Bottomley, Rafael J. Wysocki, Alan Stern,
	Tejun Heo
  Cc: Jeff Wu, Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi,
	Aaron Lu
When ata port is runtime suspended, it will check if the ODD attched to
it is in zero power ready state by checking the zp_ready field. And if
this field indicates it is not ready to be powered off, NO_POWEROFF qos
flag will be set to avoid choosing ACPI_STATE_D3_COLD for it.
Once powered off, disk events will be blocked to avoid waking it up
every two seconds.
And on resume, it will re-gain power and go through the recovery
process. When reset for the ata port is done, the ODD is considered
functional, and post processing like eject tray if the ODD is drawer
type is done there. And disk events is unblocked here.
For normal ODDs that do not support zero power state, NO_POWEROFF qos
flag will always be set.
Signed-off-by: Aaron Lu <aaron.lu@intel.com>
---
 drivers/ata/libata-acpi.c  | 40 ++++++++++++++++++++++-------
 drivers/ata/libata-eh.c    |  2 ++
 drivers/ata/libata-zpodd.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++
 drivers/ata/libata.h       |  8 ++++++
 include/linux/libata.h     |  2 ++
 5 files changed, 106 insertions(+), 9 deletions(-)
diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c
index 13ee178..91f3405 100644
--- a/drivers/ata/libata-acpi.c
+++ b/drivers/ata/libata-acpi.c
@@ -838,6 +838,24 @@ void ata_acpi_on_resume(struct ata_port *ap)
 	}
 }
 
+static int ata_acpi_choose_state(struct ata_device *dev)
+{
+	/* Always choose D3 for PATA devices */
+	if (!(dev->link->ap->flags & ATA_FLAG_ACPI_SATA))
+		return ACPI_STATE_D3;
+
+	if (zpodd_dev_enabled(dev)) {
+		if (zpodd_poweroff_ready(dev))
+			dev_pm_qos_update_request(&dev->poweroff_req, 0);
+		else
+			dev_pm_qos_update_request(&dev->poweroff_req,
+						  PM_QOS_FLAG_NO_POWER_OFF);
+	}
+
+	return acpi_pm_device_sleep_state(&dev->sdev->sdev_gendev,
+					  NULL, ACPI_STATE_D3_COLD);
+}
+
 /**
  * ata_acpi_set_state - set the port power state
  * @ap: target ATA port
@@ -864,17 +882,16 @@ void ata_acpi_set_state(struct ata_port *ap, pm_message_t state)
 			continue;
 
 		if (state.event != PM_EVENT_ON) {
-			acpi_state = acpi_pm_device_sleep_state(
-				&dev->sdev->sdev_gendev, NULL, ACPI_STATE_D3);
-			if (acpi_state > 0)
+			acpi_state = ata_acpi_choose_state(dev);
+			if (acpi_state > 0) {
 				acpi_bus_set_power(handle, acpi_state);
-			/* TBD: need to check if it's runtime pm request */
-			acpi_pm_device_run_wake(
-				&dev->sdev->sdev_gendev, true);
+				if (zpodd_dev_enabled(dev) &&
+				    acpi_state == ACPI_STATE_D3_COLD)
+					zpodd_post_poweroff(dev);
+			}
 		} else {
-			/* Ditto */
-			acpi_pm_device_run_wake(
-				&dev->sdev->sdev_gendev, false);
+			if (zpodd_dev_enabled(dev))
+				zpodd_pre_poweron(dev);
 			acpi_bus_set_power(handle, ACPI_STATE_D0);
 		}
 	}
@@ -1008,7 +1025,12 @@ static void ata_acpi_unregister_power_resource(struct ata_device *dev)
 
 void ata_acpi_bind(struct ata_device *dev)
 {
+	/* ODD can't be put to D3 cold state, unless it is zero power capable */
+	s32 value = dev->class == ATA_DEV_ATAPI ? PM_QOS_FLAG_NO_POWER_OFF : 0;
+
 	ata_acpi_register_power_resource(dev);
+	dev_pm_qos_add_request(&dev->sdev->sdev_gendev, &dev->poweroff_req,
+				DEV_PM_QOS_FLAGS, value);
 }
 
 void ata_acpi_unbind(struct ata_device *dev)
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 6487b88..1348e7c 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -3771,6 +3771,8 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
 				rc = atapi_eh_clear_ua(dev);
 				if (rc)
 					goto rest_fail;
+				if (zpodd_dev_enabled(dev))
+					zpodd_post_resume(dev);
 			}
 		}
 
diff --git a/drivers/ata/libata-zpodd.c b/drivers/ata/libata-zpodd.c
index 533a39e..777f9c7 100644
--- a/drivers/ata/libata-zpodd.c
+++ b/drivers/ata/libata-zpodd.c
@@ -5,6 +5,7 @@
 #include <scsi/scsi_cmnd.h>
 
 #include "libata.h"
+#include "../scsi/sr_zpodd.h"
 
 #define POWEROFF_DELAY  (30 * 1000)     /* 30 seconds for power off delay */
 
@@ -15,6 +16,7 @@ struct zpodd {
 	bool status_ready:1;	/* ready status derived from media event poll,
 				   it is not accurate, but serves as a hint */
 	bool zp_ready:1;	/* zero power ready state */
+	bool powered_off:1;	/* ODD is powered off */
 
 	unsigned long last_ready; /* last zero power ready timestamp */
 
@@ -40,6 +42,17 @@ static int run_atapi_cmd(struct ata_device *dev, const char *cdb,
 			buf ? DMA_FROM_DEVICE : DMA_NONE, buf, buf_len, 0);
 }
 
+static int eject_tray(struct ata_device *dev)
+{
+	const char cdb[] = {  GPCMD_START_STOP_UNIT,
+			      0, 0, 0,
+			      0x02,     /* LoEj */
+			      0, 0, 0, 0, 0, 0, 0,
+	};
+
+	return run_atapi_cmd(dev, cdb, sizeof(cdb), NULL, 0);
+}
+
 /*
  * Per the spec, only slot type and drawer type ODD can be supported
  *
@@ -209,6 +222,56 @@ void zpodd_check_zpready(struct ata_device *dev)
 		zpodd->last_ready = 0;
 }
 
+/*
+ * Test if ODD is ready to be powered off.
+ * Determined by zp_ready and if events is successfully blocked
+ */
+bool zpodd_poweroff_ready(struct ata_device *dev)
+{
+	struct zpodd *zpodd = dev->private_data;
+	return zpodd->zp_ready;
+}
+
+void zpodd_post_poweroff(struct ata_device *dev)
+{
+	struct zpodd *zpodd = dev->private_data;
+
+	sr_block_events(&dev->sdev->sdev_gendev);
+
+	zpodd->powered_off = true;
+	device_set_run_wake(&dev->sdev->sdev_gendev, true);
+	acpi_pm_device_run_wake(&dev->sdev->sdev_gendev, true);
+}
+
+void zpodd_pre_poweron(struct ata_device *dev)
+{
+	struct zpodd *zpodd = dev->private_data;
+	if (zpodd->powered_off) {
+		acpi_pm_device_run_wake(&dev->sdev->sdev_gendev, false);
+		device_set_run_wake(&dev->sdev->sdev_gendev, false);
+	}
+}
+
+void zpodd_post_resume(struct ata_device *dev)
+{
+	struct zpodd *zpodd = dev->private_data;
+
+	if (!zpodd->powered_off)
+		return;
+
+	zpodd->powered_off = false;
+
+	if (zpodd->from_notify) {
+		zpodd->from_notify = false;
+		if (zpodd->drawer)
+			eject_tray(dev);
+	}
+
+	zpodd->last_ready = 0;
+	zpodd->zp_ready = false;
+	sr_unblock_events(&dev->sdev->sdev_gendev);
+}
+
 static void zpodd_wake_dev(acpi_handle handle, u32 event, void *context)
 {
 	struct ata_device *ata_dev = context;
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index 2b46703..5e4baf9 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -243,12 +243,20 @@ static inline bool zpodd_dev_enabled(struct ata_device *dev)
 }
 void zpodd_snoop_status(struct ata_device *dev, struct scsi_cmnd *scmd);
 void zpodd_check_zpready(struct ata_device *dev);
+bool zpodd_poweroff_ready(struct ata_device *dev);
+void zpodd_post_poweroff(struct ata_device *dev);
+void zpodd_pre_poweron(struct ata_device *dev);
+void zpodd_post_resume(struct ata_device *dev);
 #else /* CONFIG_SATA_ZPODD */
 static inline void zpodd_init(struct ata_device *dev) {}
 static inline void zpodd_deinit(struct ata_device *dev) {}
 static inline bool zpodd_dev_enabled(struct ata_device *dev) { return false; }
 static inline void zpodd_snoop_status(struct ata_device *dev, struct scsi_cmnd *scmd) {}
 static inline void zpodd_check_zpready(struct ata_device *dev) {}
+static inline bool zpodd_poweroff_ready(struct ata_device *dev) { return false; }
+static inline void zpodd_post_poweroff(struct ata_device *dev) {}
+static inline void zpodd_pre_poweron(struct ata_device *dev) {}
+static inline void zpodd_post_resume(struct ata_device *dev) {}
 #endif /* CONFIG_SATA_ZPODD */
 
 /* libata-atapi.c */
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 77eeeda..dc98912 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -38,6 +38,7 @@
 #include <linux/acpi.h>
 #include <linux/cdrom.h>
 #include <linux/sched.h>
+#include <linux/pm_qos.h>
 
 /*
  * Define if arch has non-standard setup.  This is a _PCI_ standard
@@ -618,6 +619,7 @@ struct ata_device {
 #ifdef CONFIG_ATA_ACPI
 	union acpi_object	*gtf_cache;
 	unsigned int		gtf_filter;
+	struct dev_pm_qos_request poweroff_req;
 #endif
 	struct device		tdev;
 	/* n_sector is CLEAR_BEGIN, read comment above CLEAR_BEGIN */
-- 
1.7.12.4
^ permalink raw reply related	[flat|nested] 85+ messages in thread
- * [PATCH v9 10/10] ata: expose pm qos flags to user space for ata device
  2012-11-09  6:51 [PATCH v9 00/10] ZPODD Patches Aaron Lu
                   ` (8 preceding siblings ...)
  2012-11-09  6:52 ` [PATCH v9 09/10] ata: zpodd: handle power transition of ODD Aaron Lu
@ 2012-11-09  6:52 ` Aaron Lu
  9 siblings, 0 replies; 85+ messages in thread
From: Aaron Lu @ 2012-11-09  6:52 UTC (permalink / raw)
  To: Jeff Garzik, James Bottomley, Rafael J. Wysocki, Alan Stern,
	Tejun Heo
  Cc: Jeff Wu, Aaron Lu, linux-ide, linux-pm, linux-scsi, linux-acpi,
	Aaron Lu
Expose pm qos flags to user space so that user has a chance to disable
pm features like power off, if he/she has a broken platform or devices
or simply does not like this pm feature.
This flag is exposed to user space only for ata device or atapi device
that is zero power capable. For normal atapi device, it will never be
powered off.
Signed-off-by: Aaron Lu <aaron.lu@intel.com>
---
 drivers/ata/libata-acpi.c | 2 ++
 1 file changed, 2 insertions(+)
diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c
index 91f3405..3984290 100644
--- a/drivers/ata/libata-acpi.c
+++ b/drivers/ata/libata-acpi.c
@@ -1031,6 +1031,8 @@ void ata_acpi_bind(struct ata_device *dev)
 	ata_acpi_register_power_resource(dev);
 	dev_pm_qos_add_request(&dev->sdev->sdev_gendev, &dev->poweroff_req,
 				DEV_PM_QOS_FLAGS, value);
+	if (dev->class == ATA_DEV_ATA || zpodd_dev_enabled(dev))
+		dev_pm_qos_expose_flags(&dev->sdev->sdev_gendev, 0);
 }
 
 void ata_acpi_unbind(struct ata_device *dev)
-- 
1.7.12.4
^ permalink raw reply related	[flat|nested] 85+ messages in thread