linux-ide.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] libata: device suspend/resume
@ 2005-05-23 20:15 Jeff Garzik
  2005-05-23 20:41 ` James Bottomley
  2005-05-24 16:04 ` Mark Lord
  0 siblings, 2 replies; 44+ messages in thread
From: Jeff Garzik @ 2005-05-23 20:15 UTC (permalink / raw)
  To: linux-scsi, linux-ide; +Cc: James Bottomley


Patch 2/2 for review by the linux-scsi crowd.

Please do not apply.



Patch is via Mark Lord, 

It's not ready for upstream, but I am posting it for two reasons:

1) People might find the feature useful (warning warning do not use in
production, just a test feature, might cook your hard drive and all your
data, standard disclaimer...)

2) Get comments from linux-scsi folks about the SCSI changes.




Index: drivers/scsi/ahci.c
===================================================================
--- b023d524fb0a3b71aa0b957ce7c5540611497370/drivers/scsi/ahci.c  (mode:100644 sha1:da5bd33d982d7e521b6b0603adcb0911395b184d)
+++ uncommitted/drivers/scsi/ahci.c  (mode:100644)
@@ -201,6 +201,8 @@
 	.slave_configure	= ata_scsi_slave_config,
 	.bios_param		= ata_std_bios_param,
 	.ordered_flush		= 1,
+	.resume			= ata_scsi_device_resume,
+	.suspend		= ata_scsi_device_suspend,
 };
 
 static struct ata_port_operations ahci_ops = {
@@ -272,6 +274,8 @@
 	.id_table		= ahci_pci_tbl,
 	.probe			= ahci_init_one,
 	.remove			= ata_pci_remove_one,
+	.suspend		= ata_pci_device_suspend,
+	.resume			= ata_pci_device_resume,
 };
 
 
Index: drivers/scsi/ata_piix.c
===================================================================
--- b023d524fb0a3b71aa0b957ce7c5540611497370/drivers/scsi/ata_piix.c  (mode:100644 sha1:3867f91ef8c7bebe3d0edf0bd2e3b326a1d4061b)
+++ uncommitted/drivers/scsi/ata_piix.c  (mode:100644)
@@ -104,6 +104,8 @@
 	.id_table		= piix_pci_tbl,
 	.probe			= piix_init_one,
 	.remove			= ata_pci_remove_one,
+	.suspend		= ata_pci_device_suspend,
+	.resume			= ata_pci_device_resume,
 };
 
 static Scsi_Host_Template piix_sht = {
@@ -124,6 +126,8 @@
 	.slave_configure	= ata_scsi_slave_config,
 	.bios_param		= ata_std_bios_param,
 	.ordered_flush		= 1,
+	.resume			= ata_scsi_device_resume,
+	.suspend		= ata_scsi_device_suspend,
 };
 
 static struct ata_port_operations piix_pata_ops = {
Index: drivers/scsi/libata-core.c
===================================================================
--- b023d524fb0a3b71aa0b957ce7c5540611497370/drivers/scsi/libata-core.c  (mode:100644 sha1:0b5d3a5b7edab5ce17889a0e698805ce346b4d1a)
+++ uncommitted/drivers/scsi/libata-core.c  (mode:100644)
@@ -3299,6 +3299,104 @@
 	ata_qc_complete(qc, ATA_ERR);
 }
 
+/*
+ * Execute a 'simple' command, that only consists of the opcode 'cmd' itself,
+ * without filling any other registers
+ */
+static int ata_do_simple_cmd(struct ata_port *ap, struct ata_device *dev,
+			     u8 cmd)
+{
+	DECLARE_COMPLETION(wait);
+	struct ata_queued_cmd *qc;
+	unsigned long flags;
+	int rc;
+
+	qc = ata_qc_new_init(ap, dev);
+	BUG_ON(qc == NULL);
+
+	qc->tf.command = cmd;
+	qc->tf.flags |= ATA_TFLAG_DEVICE;
+	qc->tf.protocol = ATA_PROT_NODATA;
+
+	qc->waiting = &wait;
+	qc->complete_fn = ata_qc_complete_noop;
+
+	spin_lock_irqsave(&ap->host_set->lock, flags);
+	rc = ata_qc_issue(qc);
+	spin_unlock_irqrestore(&ap->host_set->lock, flags);
+
+	if (!rc)
+		wait_for_completion(&wait);
+
+	return rc;
+}
+
+static int ata_flush_cache(struct ata_port *ap, struct ata_device *dev)
+{
+	u8 cmd;
+
+	if (!ata_try_flush_cache(dev))
+		return 0;
+
+	if (ata_id_has_flush_ext(dev->id))
+		cmd = ATA_CMD_FLUSH_EXT;
+	else
+		cmd = ATA_CMD_FLUSH;
+
+	return ata_do_simple_cmd(ap, dev, cmd);
+}
+
+static int ata_standby_drive(struct ata_port *ap, struct ata_device *dev)
+{
+	return ata_do_simple_cmd(ap, dev, ATA_CMD_STANDBYNOW1);
+}
+
+static int ata_start_drive(struct ata_port *ap, struct ata_device *dev)
+{
+	return ata_do_simple_cmd(ap, dev, ATA_CMD_IDLEIMMEDIATE);
+}
+
+/**
+ *	ata_device_resume - wakeup a previously suspended devices
+ *
+ *	Kick the drive back into action, by sending it an idle immediate
+ *	command and making sure its transfer mode matches between drive
+ *	and host.
+ *
+ */
+int ata_device_resume(struct ata_port *ap, struct ata_device *dev)
+{
+	if (ap->flags & ATA_FLAG_SUSPENDED) {
+		ap->flags &= ~ATA_FLAG_SUSPENDED;
+		ata_set_mode(ap);
+	}
+	if (!ata_dev_present(dev))
+		return 0;
+	if (dev->class == ATA_DEV_ATA)
+		ata_start_drive(ap, dev);
+
+	return 0;
+}
+
+/**
+ *	ata_device_suspend - prepare a device for suspend
+ *
+ *	Flush the cache on the drive, if appropriate, then issue a
+ *	standbynow command.
+ *
+ */
+int ata_device_suspend(struct ata_port *ap, struct ata_device *dev)
+{
+	if (!ata_dev_present(dev))
+		return 0;
+	if (dev->class == ATA_DEV_ATA)
+		ata_flush_cache(ap, dev);
+
+	ata_standby_drive(ap, dev);
+	ap->flags |= ATA_FLAG_SUSPENDED;
+	return 0;
+}
+
 int ata_port_start (struct ata_port *ap)
 {
 	struct device *dev = ap->host_set->dev;
@@ -3937,6 +4035,23 @@
 
 	return (tmp == bits->val) ? 1 : 0;
 }
+
+int ata_pci_device_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+	pci_save_state(pdev);
+	pci_disable_device(pdev);
+	pci_set_power_state(pdev, PCI_D3hot);
+	return 0;
+}
+
+int ata_pci_device_resume(struct pci_dev *pdev)
+{
+	pci_set_power_state(pdev, PCI_D0);
+	pci_restore_state(pdev);
+	pci_enable_device(pdev);
+	pci_set_master(pdev);
+	return 0;
+}
 #endif /* CONFIG_PCI */
 
 
@@ -4021,4 +4136,11 @@
 EXPORT_SYMBOL_GPL(ata_pci_init_native_mode);
 EXPORT_SYMBOL_GPL(ata_pci_init_one);
 EXPORT_SYMBOL_GPL(ata_pci_remove_one);
+EXPORT_SYMBOL_GPL(ata_pci_device_suspend);
+EXPORT_SYMBOL_GPL(ata_pci_device_resume);
 #endif /* CONFIG_PCI */
+
+EXPORT_SYMBOL_GPL(ata_device_suspend);
+EXPORT_SYMBOL_GPL(ata_device_resume);
+EXPORT_SYMBOL_GPL(ata_scsi_device_suspend);
+EXPORT_SYMBOL_GPL(ata_scsi_device_resume);
Index: drivers/scsi/libata-scsi.c
===================================================================
--- b023d524fb0a3b71aa0b957ce7c5540611497370/drivers/scsi/libata-scsi.c  (mode:100644 sha1:4c96df060c3bad9af44ab4c4664d8cba38c63030)
+++ uncommitted/drivers/scsi/libata-scsi.c  (mode:100644)
@@ -387,6 +387,22 @@
 	return 0;
 }
 
+int ata_scsi_device_resume(struct scsi_device *sdev)
+{
+	struct ata_port *ap = (struct ata_port *) &sdev->host->hostdata[0];
+	struct ata_device *dev = &ap->device[sdev->id];
+
+	return ata_device_resume(ap, dev);
+}
+
+int ata_scsi_device_suspend(struct scsi_device *sdev)
+{
+	struct ata_port *ap = (struct ata_port *) &sdev->host->hostdata[0];
+	struct ata_device *dev = &ap->device[sdev->id];
+
+	return ata_device_suspend(ap, dev);
+}
+
 /**
  *	ata_scsi_flush_xlat - Translate SCSI SYNCHRONIZE CACHE command
  *	@qc: Storage for translated ATA taskfile
Index: drivers/scsi/sata_nv.c
===================================================================
--- b023d524fb0a3b71aa0b957ce7c5540611497370/drivers/scsi/sata_nv.c  (mode:100644 sha1:69009f853a4917f54a88aa0c2cf3e6fb0428c8f7)
+++ uncommitted/drivers/scsi/sata_nv.c  (mode:100644)
@@ -187,6 +187,8 @@
 	.id_table		= nv_pci_tbl,
 	.probe			= nv_init_one,
 	.remove			= ata_pci_remove_one,
+	.suspend		= ata_pci_device_suspend,
+	.resume			= ata_pci_device_resume,
 };
 
 static Scsi_Host_Template nv_sht = {
@@ -207,6 +209,8 @@
 	.slave_configure	= ata_scsi_slave_config,
 	.bios_param		= ata_std_bios_param,
 	.ordered_flush		= 1,
+	.resume			= ata_scsi_device_resume,
+	.suspend		= ata_scsi_device_suspend,
 };
 
 static struct ata_port_operations nv_ops = {
Index: drivers/scsi/sata_promise.c
===================================================================
--- b023d524fb0a3b71aa0b957ce7c5540611497370/drivers/scsi/sata_promise.c  (mode:100644 sha1:19a13e3590f4eca5fab24675ffec12aaa56dd57f)
+++ uncommitted/drivers/scsi/sata_promise.c  (mode:100644)
@@ -103,6 +103,8 @@
 	.slave_configure	= ata_scsi_slave_config,
 	.bios_param		= ata_std_bios_param,
 	.ordered_flush		= 1,
+	.resume			= ata_scsi_device_resume,
+	.suspend		= ata_scsi_device_suspend,
 };
 
 static struct ata_port_operations pdc_ata_ops = {
@@ -178,6 +180,8 @@
 	.id_table		= pdc_ata_pci_tbl,
 	.probe			= pdc_ata_init_one,
 	.remove			= ata_pci_remove_one,
+	.suspend		= ata_pci_device_suspend,
+	.resume			= ata_pci_device_resume,
 };
 
 
Index: drivers/scsi/sata_qstor.c
===================================================================
--- b023d524fb0a3b71aa0b957ce7c5540611497370/drivers/scsi/sata_qstor.c  (mode:100644 sha1:dfd3621047171dc748ba9a11c82a418f89faaed6)
+++ uncommitted/drivers/scsi/sata_qstor.c  (mode:100644)
@@ -140,6 +140,8 @@
 	.dma_boundary		= QS_DMA_BOUNDARY,
 	.slave_configure	= ata_scsi_slave_config,
 	.bios_param		= ata_std_bios_param,
+	.resume			= ata_scsi_device_resume,
+	.suspend		= ata_scsi_device_suspend,
 };
 
 static struct ata_port_operations qs_ata_ops = {
@@ -191,6 +193,8 @@
 	.id_table		= qs_ata_pci_tbl,
 	.probe			= qs_ata_init_one,
 	.remove			= ata_pci_remove_one,
+	.suspend		= ata_pci_device_suspend,
+	.resume			= ata_pci_device_resume,
 };
 
 static int qs_check_atapi_dma(struct ata_queued_cmd *qc)
Index: drivers/scsi/sata_sil.c
===================================================================
--- b023d524fb0a3b71aa0b957ce7c5540611497370/drivers/scsi/sata_sil.c  (mode:100644 sha1:f0489dc302a03054d6461ba54c9705f30190a8f4)
+++ uncommitted/drivers/scsi/sata_sil.c  (mode:100644)
@@ -115,6 +115,8 @@
 	.id_table		= sil_pci_tbl,
 	.probe			= sil_init_one,
 	.remove			= ata_pci_remove_one,
+	.suspend		= ata_pci_device_suspend,
+	.resume			= ata_pci_device_resume,
 };
 
 static Scsi_Host_Template sil_sht = {
@@ -135,6 +137,8 @@
 	.slave_configure	= ata_scsi_slave_config,
 	.bios_param		= ata_std_bios_param,
 	.ordered_flush		= 1,
+	.resume			= ata_scsi_device_resume,
+	.suspend		= ata_scsi_device_suspend,
 };
 
 static struct ata_port_operations sil_ops = {
Index: drivers/scsi/sata_sis.c
===================================================================
--- b023d524fb0a3b71aa0b957ce7c5540611497370/drivers/scsi/sata_sis.c  (mode:100644 sha1:5105ddd08447807df3710492f70c31bf6f0d8767)
+++ uncommitted/drivers/scsi/sata_sis.c  (mode:100644)
@@ -71,6 +71,8 @@
 	.id_table		= sis_pci_tbl,
 	.probe			= sis_init_one,
 	.remove			= ata_pci_remove_one,
+	.suspend		= ata_pci_device_suspend,
+	.resume			= ata_pci_device_resume,
 };
 
 static Scsi_Host_Template sis_sht = {
@@ -91,6 +93,8 @@
 	.slave_configure	= ata_scsi_slave_config,
 	.bios_param		= ata_std_bios_param,
 	.ordered_flush		= 1,
+	.resume			= ata_scsi_device_resume,
+	.suspend		= ata_scsi_device_suspend,
 };
 
 static struct ata_port_operations sis_ops = {
Index: drivers/scsi/sata_svw.c
===================================================================
--- b023d524fb0a3b71aa0b957ce7c5540611497370/drivers/scsi/sata_svw.c  (mode:100644 sha1:8d1a5d25c05364e41e7f98198b94335ed30d0046)
+++ uncommitted/drivers/scsi/sata_svw.c  (mode:100644)
@@ -289,6 +289,8 @@
 #endif
 	.bios_param		= ata_std_bios_param,
 	.ordered_flush		= 1,
+	.resume			= ata_scsi_device_resume,
+	.suspend		= ata_scsi_device_suspend,
 };
 
 
@@ -458,6 +460,8 @@
 	.id_table		= k2_sata_pci_tbl,
 	.probe			= k2_sata_init_one,
 	.remove			= ata_pci_remove_one,
+	.suspend		= ata_pci_device_suspend,
+	.resume			= ata_pci_device_resume,
 };
 
 
Index: drivers/scsi/sata_sx4.c
===================================================================
--- b023d524fb0a3b71aa0b957ce7c5540611497370/drivers/scsi/sata_sx4.c  (mode:100644 sha1:70118650c461e9d5d10ff0c3053ae5b49c6755a0)
+++ uncommitted/drivers/scsi/sata_sx4.c  (mode:100644)
@@ -189,6 +189,8 @@
 	.slave_configure	= ata_scsi_slave_config,
 	.bios_param		= ata_std_bios_param,
 	.ordered_flush		= 1,
+	.resume			= ata_scsi_device_resume,
+	.suspend		= ata_scsi_device_suspend,
 };
 
 static struct ata_port_operations pdc_20621_ops = {
@@ -235,6 +237,8 @@
 	.id_table		= pdc_sata_pci_tbl,
 	.probe			= pdc_sata_init_one,
 	.remove			= ata_pci_remove_one,
+	.suspend		= ata_pci_device_suspend,
+	.resume			= ata_pci_device_resume,
 };
 
 
Index: drivers/scsi/sata_uli.c
===================================================================
--- b023d524fb0a3b71aa0b957ce7c5540611497370/drivers/scsi/sata_uli.c  (mode:100644 sha1:0bff4f475f262c5c40254aecf85bd6a7137bd04e)
+++ uncommitted/drivers/scsi/sata_uli.c  (mode:100644)
@@ -63,6 +63,8 @@
 	.id_table		= uli_pci_tbl,
 	.probe			= uli_init_one,
 	.remove			= ata_pci_remove_one,
+	.suspend		= ata_pci_device_suspend,
+	.resume			= ata_pci_device_resume,
 };
 
 static Scsi_Host_Template uli_sht = {
@@ -83,6 +85,8 @@
 	.slave_configure	= ata_scsi_slave_config,
 	.bios_param		= ata_std_bios_param,
 	.ordered_flush		= 1,
+	.resume			= ata_scsi_device_resume,
+	.suspend		= ata_scsi_device_suspend,
 };
 
 static struct ata_port_operations uli_ops = {
Index: drivers/scsi/sata_via.c
===================================================================
--- b023d524fb0a3b71aa0b957ce7c5540611497370/drivers/scsi/sata_via.c  (mode:100644 sha1:3a7830667277f35f71a83db5705b8cb2d376bafc)
+++ uncommitted/drivers/scsi/sata_via.c  (mode:100644)
@@ -83,6 +83,8 @@
 	.id_table		= svia_pci_tbl,
 	.probe			= svia_init_one,
 	.remove			= ata_pci_remove_one,
+	.suspend		= ata_pci_device_suspend,
+	.resume			= ata_pci_device_resume,
 };
 
 static Scsi_Host_Template svia_sht = {
@@ -103,6 +105,8 @@
 	.slave_configure	= ata_scsi_slave_config,
 	.bios_param		= ata_std_bios_param,
 	.ordered_flush		= 1,
+	.resume			= ata_scsi_device_resume,
+	.suspend		= ata_scsi_device_suspend,
 };
 
 static struct ata_port_operations svia_sata_ops = {
Index: drivers/scsi/sata_vsc.c
===================================================================
--- b023d524fb0a3b71aa0b957ce7c5540611497370/drivers/scsi/sata_vsc.c  (mode:100644 sha1:2c28f0ad73c204420aa4b419a60341a8b801d7fa)
+++ uncommitted/drivers/scsi/sata_vsc.c  (mode:100644)
@@ -206,6 +206,8 @@
 	.slave_configure	= ata_scsi_slave_config,
 	.bios_param		= ata_std_bios_param,
 	.ordered_flush		= 1,
+	.resume			= ata_scsi_device_resume,
+	.suspend		= ata_scsi_device_suspend,
 };
 
 
@@ -382,6 +384,8 @@
 	.id_table		= vsc_sata_pci_tbl,
 	.probe			= vsc_sata_init_one,
 	.remove			= ata_pci_remove_one,
+	.suspend		= ata_pci_device_suspend,
+	.resume			= ata_pci_device_resume,
 };
 
 
Index: drivers/scsi/scsi_lib.c
===================================================================
--- b023d524fb0a3b71aa0b957ce7c5540611497370/drivers/scsi/scsi_lib.c  (mode:100644 sha1:d18da21c9c57e0ef590369ea815b7b1adba4042b)
+++ uncommitted/drivers/scsi/scsi_lib.c  (mode:100644)
@@ -1855,8 +1855,9 @@
 void
 scsi_device_resume(struct scsi_device *sdev)
 {
-	if(scsi_device_set_state(sdev, SDEV_RUNNING))
+	if (scsi_device_set_state(sdev, SDEV_RUNNING))
 		return;
+
 	scsi_run_queue(sdev->request_queue);
 }
 EXPORT_SYMBOL(scsi_device_resume);
Index: drivers/scsi/scsi_sysfs.c
===================================================================
--- b023d524fb0a3b71aa0b957ce7c5540611497370/drivers/scsi/scsi_sysfs.c  (mode:100644 sha1:e75ee4671ee3a0a6dcb608a73d6bd2f4007eff09)
+++ uncommitted/drivers/scsi/scsi_sysfs.c  (mode:100644)
@@ -199,9 +199,40 @@
 	return (sdp->inq_periph_qual == SCSI_INQ_PQ_CON)? 1: 0;
 }
 
+static int scsi_bus_suspend(struct device * dev, pm_message_t state)
+{
+	struct scsi_device *sdev = to_scsi_device(dev);
+	struct scsi_host_template *sht = sdev->host->hostt;
+	int err;
+
+	err = scsi_device_quiesce(sdev);
+	if (err)
+		return err;
+
+	if (sht->suspend)
+		err = sht->suspend(sdev);
+
+	return err;
+}
+
+static int scsi_bus_resume(struct device * dev)
+{
+	struct scsi_device *sdev = to_scsi_device(dev);
+	struct scsi_host_template *sht = sdev->host->hostt;
+	int err = 0;
+
+	if (sht->resume)
+		err = sht->resume(sdev);
+
+	scsi_device_resume(sdev);
+	return err;
+}
+
 struct bus_type scsi_bus_type = {
         .name		= "scsi",
         .match		= scsi_bus_match,
+	.suspend	= scsi_bus_suspend,
+	.resume		= scsi_bus_resume,
 };
 
 int scsi_sysfs_register(void)
Index: include/linux/ata.h
===================================================================
--- b023d524fb0a3b71aa0b957ce7c5540611497370/include/linux/ata.h  (mode:100644 sha1:f178894edd04f954a4f650661175b5fe009011c6)
+++ uncommitted/include/linux/ata.h  (mode:100644)
@@ -125,6 +125,8 @@
 	ATA_CMD_PACKET		= 0xA0,
 	ATA_CMD_VERIFY		= 0x40,
 	ATA_CMD_VERIFY_EXT	= 0x42,
+	ATA_CMD_STANDBYNOW1	= 0xE0,
+	ATA_CMD_IDLEIMMEDIATE	= 0xE1,
 
 	/* SETFEATURES stuff */
 	SETFEATURES_XFER	= 0x03,
Index: include/linux/libata.h
===================================================================
--- b023d524fb0a3b71aa0b957ce7c5540611497370/include/linux/libata.h  (mode:100644 sha1:505160ab472b0198ec5393380ce7c21b2fad85d1)
+++ uncommitted/include/linux/libata.h  (mode:100644)
@@ -113,6 +113,7 @@
 	ATA_FLAG_MMIO		= (1 << 6), /* use MMIO, not PIO */
 	ATA_FLAG_SATA_RESET	= (1 << 7), /* use COMRESET */
 	ATA_FLAG_PIO_DMA	= (1 << 8), /* PIO cmds via DMA */
+	ATA_FLAG_SUSPENDED	= (1 << 9), /* port is suspended */
 
 	ATA_QCFLAG_ACTIVE	= (1 << 1), /* cmd not yet ack'd to scsi lyer */
 	ATA_QCFLAG_SG		= (1 << 3), /* have s/g table? */
@@ -387,6 +388,8 @@
 extern int ata_pci_init_one (struct pci_dev *pdev, struct ata_port_info **port_info,
 			     unsigned int n_ports);
 extern void ata_pci_remove_one (struct pci_dev *pdev);
+extern int ata_pci_device_suspend(struct pci_dev *pdev, pm_message_t state);
+extern int ata_pci_device_resume(struct pci_dev *pdev);
 #endif /* CONFIG_PCI */
 extern int ata_device_add(struct ata_probe_ent *ent);
 extern int ata_scsi_detect(Scsi_Host_Template *sht);
@@ -395,6 +398,10 @@
 extern int ata_scsi_error(struct Scsi_Host *host);
 extern int ata_scsi_release(struct Scsi_Host *host);
 extern unsigned int ata_host_intr(struct ata_port *ap, struct ata_queued_cmd *qc);
+extern int ata_scsi_device_resume(struct scsi_device *);
+extern int ata_scsi_device_suspend(struct scsi_device *);
+extern int ata_device_resume(struct ata_port *, struct ata_device *);
+extern int ata_device_suspend(struct ata_port *, struct ata_device *);
 /*
  * Default driver ops implementations
  */
Index: include/scsi/scsi_host.h
===================================================================
--- b023d524fb0a3b71aa0b957ce7c5540611497370/include/scsi/scsi_host.h  (mode:100644 sha1:1cee1e100943dafe00c14d5329b73659062a4d1a)
+++ uncommitted/include/scsi/scsi_host.h  (mode:100644)
@@ -270,6 +270,12 @@
 	int (*proc_info)(struct Scsi_Host *, char *, char **, off_t, int, int);
 
 	/*
+	 * suspend support
+	 */
+	int (*resume)(struct scsi_device *);
+	int (*suspend)(struct scsi_device *);
+
+	/*
 	 * Name of proc directory
 	 */
 	char *proc_name;


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH] libata: device suspend/resume
  2005-05-23 20:15 [PATCH] libata: device suspend/resume Jeff Garzik
@ 2005-05-23 20:41 ` James Bottomley
  2005-05-23 20:45   ` Jeff Garzik
  2005-05-24 16:27   ` Mark Lord
  2005-05-24 16:04 ` Mark Lord
  1 sibling, 2 replies; 44+ messages in thread
From: James Bottomley @ 2005-05-23 20:41 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: SCSI Mailing List, linux-ide

On Mon, 2005-05-23 at 16:15 -0400, Jeff Garzik wrote:
> Patch is via Mark Lord, 

Um, it is? it looks identical to this one from Jens (down to the
gratuitous style corrections)

http://marc.theaimsgroup.com/?t=111409327100001

However, before anything happens I'd like to get the SCSI pieces
correct.  The thread petered out before a resolution, however, I think
at least disk devices should be doing a synchronize cache on suspend.

James



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH] libata: device suspend/resume
  2005-05-23 20:41 ` James Bottomley
@ 2005-05-23 20:45   ` Jeff Garzik
  2005-05-23 22:10     ` James Bottomley
  2005-05-24 16:27   ` Mark Lord
  1 sibling, 1 reply; 44+ messages in thread
From: Jeff Garzik @ 2005-05-23 20:45 UTC (permalink / raw)
  To: James Bottomley; +Cc: SCSI Mailing List, linux-ide

On Mon, May 23, 2005 at 03:41:15PM -0500, James Bottomley wrote:
> On Mon, 2005-05-23 at 16:15 -0400, Jeff Garzik wrote:
> > Patch is via Mark Lord, 
> 
> Um, it is? it looks identical to this one from Jens (down to the
> gratuitous style corrections)
> 
> http://marc.theaimsgroup.com/?t=111409327100001

Got it "via" Mark Lord; I didn't know the proper attribution.


> However, before anything happens I'd like to get the SCSI pieces
> correct.

The "anything" I want to happen here is getting the SCSI pieces correct ;-)



> The thread petered out before a resolution, however, I think
> at least disk devices should be doing a synchronize cache on suspend.

Agreed.

	Jeff




^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH] libata: device suspend/resume
  2005-05-23 20:45   ` Jeff Garzik
@ 2005-05-23 22:10     ` James Bottomley
  2005-05-24  6:21       ` Jens Axboe
  2005-05-27  2:54       ` Jeff Garzik
  0 siblings, 2 replies; 44+ messages in thread
From: James Bottomley @ 2005-05-23 22:10 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: SCSI Mailing List, linux-ide, Jens Axboe

On Mon, 2005-05-23 at 16:45 -0400, Jeff Garzik wrote:
> On Mon, May 23, 2005 at 03:41:15PM -0500, James Bottomley wrote:
> > The thread petered out before a resolution, however, I think
> > at least disk devices should be doing a synchronize cache on suspend.
> 
> Agreed.

OK, try this as a straw horse.  It plumbs our scsi bus model into
suspend and resume via the driver (which would be the ULD) function.
The only ULD which has an extra function is sd, and all it does is
synchronize the cache if it was writeback.  I believe you translate SYNC
CACHE in libata, so this patch should be sufficient (unless we still
have a problem with ATA devices that lie about their cache types?)

I think this is sufficient, because if the LLD also wants this
information, it can get it from the model of the bus it's attached to.

James

--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -199,10 +200,37 @@ static int scsi_bus_match(struct device 
 	return (sdp->inq_periph_qual == SCSI_INQ_PQ_CON)? 1: 0;
 }
 
+static int scsi_bus_suspend(struct device *dev, pm_message_t state)
+{
+	struct scsi_device *sdev = to_scsi_device(dev);
+	int err = scsi_device_quiesce(sdev);
+
+	if (err)
+		return err;
+
+	if(dev->driver && dev->driver->suspend)
+		return dev->driver->suspend(dev, state, 0);
+	return 0;
+}
+
+static int scsi_bus_resume(struct device *dev)
+{
+	struct scsi_device *sdev = to_scsi_device(dev);
+	int err = 0;
+
+	if(dev->driver && dev->driver->resume)
+		err = dev->driver->resume(dev, 0);
+
+	scsi_device_resume(sdev);
+	return err;
+}
+
 struct bus_type scsi_bus_type = {
         .name		= "scsi",
         .match		= scsi_bus_match,
+	.suspend	= scsi_bus_suspend,
+	.resume		= scsi_bus_resume
 };
 
 int scsi_sysfs_register(void)
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -119,6 +119,7 @@ static void sd_rw_intr(struct scsi_cmnd 
 static int sd_probe(struct device *);
 static int sd_remove(struct device *);
 static void sd_shutdown(struct device *dev);
+static int sd_suspend(struct device *dev, pm_message_t state, u32 level);
 static void sd_rescan(struct device *);
 static int sd_init_command(struct scsi_cmnd *);
 static int sd_issue_flush(struct device *, sector_t *);
@@ -134,6 +135,7 @@ static struct scsi_driver sd_template = 
 		.probe		= sd_probe,
 		.remove		= sd_remove,
 		.shutdown	= sd_shutdown,
+		.suspend	= sd_suspend,
 	},
 	.rescan			= sd_rescan,
 	.init_command		= sd_init_command,
@@ -1710,7 +1712,28 @@ static void sd_shutdown(struct device *d
 	printk(KERN_NOTICE "Synchronizing SCSI cache for disk %s: \n",
 			sdkp->disk->disk_name);
 	sd_sync_cache(sdp);
-}	
+}
+
+/* Just quietly quiesce the device and SYNCHRONIZE CACHE for suspend too */
+static int sd_suspend(struct device *dev, pm_message_t state, u32 level)
+{
+	struct scsi_device *sdp = to_scsi_device(dev);
+	struct scsi_disk *sdkp = dev_get_drvdata(dev);
+
+	if (!sdkp)
+		return 0;         /* this can happen */
+
+	if (!sdkp->WCE)
+		return 0;
+
+	/* don't try to sync an offline device ... it will only error */
+	if (!scsi_device_online(sdp))
+		return 0;
+
+	if (sd_sync_cache(sdp))
+		return -EIO;
+	return 0;
+}
 
 /**
  *	init_sd - entry point for this driver (both when built in or when



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH] libata: device suspend/resume
  2005-05-23 22:10     ` James Bottomley
@ 2005-05-24  6:21       ` Jens Axboe
  2005-05-24  6:53         ` Jeff Garzik
  2005-05-24 13:05         ` Luben Tuikov
  2005-05-27  2:54       ` Jeff Garzik
  1 sibling, 2 replies; 44+ messages in thread
From: Jens Axboe @ 2005-05-24  6:21 UTC (permalink / raw)
  To: James Bottomley; +Cc: Jeff Garzik, SCSI Mailing List, linux-ide

On Mon, May 23 2005, James Bottomley wrote:
> On Mon, 2005-05-23 at 16:45 -0400, Jeff Garzik wrote:
> > On Mon, May 23, 2005 at 03:41:15PM -0500, James Bottomley wrote:
> > > The thread petered out before a resolution, however, I think
> > > at least disk devices should be doing a synchronize cache on suspend.
> > 
> > Agreed.
> 
> OK, try this as a straw horse.  It plumbs our scsi bus model into
> suspend and resume via the driver (which would be the ULD) function.
> The only ULD which has an extra function is sd, and all it does is
> synchronize the cache if it was writeback.  I believe you translate SYNC
> CACHE in libata, so this patch should be sufficient (unless we still
> have a problem with ATA devices that lie about their cache types?)
> 
> I think this is sufficient, because if the LLD also wants this
> information, it can get it from the model of the bus it's attached to.

suspend/resume is a lot more complicated than just flushing a cache, the
below will probably get you safe asleep but you will never get devices
alive again after power-up on suspend-to-ram.

I also greatly prefer issuing a standby command to the drive after the
flush, so that we don't risk using the emergency parks of the drive. If
a drive happens to lie about wrt the flush command, it gets an extra
chance to flush the cache as it now knows that power will be gone very
soon. So I think the ->suspend/->resume hooks should belong to the LLD,
not the ULD as the ULD has no idea how to suspend all devices types.


-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH] libata: device suspend/resume
  2005-05-24  6:21       ` Jens Axboe
@ 2005-05-24  6:53         ` Jeff Garzik
  2005-05-24  7:06           ` Hannes Reinecke
                             ` (3 more replies)
  2005-05-24 13:05         ` Luben Tuikov
  1 sibling, 4 replies; 44+ messages in thread
From: Jeff Garzik @ 2005-05-24  6:53 UTC (permalink / raw)
  To: Jens Axboe; +Cc: James Bottomley, SCSI Mailing List, linux-ide

Jens Axboe wrote:
> suspend/resume is a lot more complicated than just flushing a cache, the
> below will probably get you safe asleep but you will never get devices
> alive again after power-up on suspend-to-ram.
> 
> I also greatly prefer issuing a standby command to the drive after the
> flush, so that we don't risk using the emergency parks of the drive. If
> a drive happens to lie about wrt the flush command, it gets an extra
> chance to flush the cache as it now knows that power will be gone very
> soon. So I think the ->suspend/->resume hooks should belong to the LLD,
> not the ULD as the ULD has no idea how to suspend all devices types.


You are right about issuing the standby command after flush.  I don't 
think an LLD hook is the proper way to accomplish it, however.

The SCSI layer needs to issue the START STOP UNIT command in response to 
a suspend event, and libata-scsi will (per SAT spec) translate that into 
the ATA standby command.  Merely following the relevant SCSI+SAT+ATA 
standards gets us there.

Longer term, SATA PM through SCSI will have three facets:

* Device PM.  This is best handled by the device class driver (sd/sr/st).

* Bus PM.  This is best handled by the transport class driver (need to 
write for SATA and SAS).

* Host PM.  This is handled in the obvious manner, using existing PM 
driver hooks.  PCI D0/D3, etc.

I can describe how this will look when libata is divorced from SCSI, if 
you would like, too...

	Jeff



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH] libata: device suspend/resume
  2005-05-24  6:53         ` Jeff Garzik
@ 2005-05-24  7:06           ` Hannes Reinecke
  2005-05-24  7:08             ` Jens Axboe
  2005-05-24  7:16             ` Jeff Garzik
  2005-05-24  7:07           ` Jens Axboe
                             ` (2 subsequent siblings)
  3 siblings, 2 replies; 44+ messages in thread
From: Hannes Reinecke @ 2005-05-24  7:06 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: Jens Axboe, James Bottomley, SCSI Mailing List, linux-ide

Jeff Garzik wrote:
[ .. ]
> 
> Longer term, SATA PM through SCSI will have three facets:
> 
> * Device PM.  This is best handled by the device class driver (sd/sr/st).
> 
> * Bus PM.  This is best handled by the transport class driver (need to
> write for SATA and SAS).
> 
> * Host PM.  This is handled in the obvious manner, using existing PM
> driver hooks.  PCI D0/D3, etc.
> 
> I can describe how this will look when libata is divorced from SCSI, if
> you would like, too...

Divorced from SCSI? Care to elaborate?

Cheers,

Hannes
-- 
Dr. Hannes Reinecke			hare@suse.de
SuSE Linux AG				S390 & zSeries
Maxfeldstraße 5				+49 911 74053 688
90409 Nürnberg				http://www.suse.de
-
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH] libata: device suspend/resume
  2005-05-24  6:53         ` Jeff Garzik
  2005-05-24  7:06           ` Hannes Reinecke
@ 2005-05-24  7:07           ` Jens Axboe
  2005-05-24  7:10             ` Jeff Garzik
  2005-05-24  7:59           ` Jens Axboe
  2005-05-24 13:48           ` Luben Tuikov
  3 siblings, 1 reply; 44+ messages in thread
From: Jens Axboe @ 2005-05-24  7:07 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: James Bottomley, SCSI Mailing List, linux-ide

On Tue, May 24 2005, Jeff Garzik wrote:
> Jens Axboe wrote:
> >suspend/resume is a lot more complicated than just flushing a cache, the
> >below will probably get you safe asleep but you will never get devices
> >alive again after power-up on suspend-to-ram.
> >
> >I also greatly prefer issuing a standby command to the drive after the
> >flush, so that we don't risk using the emergency parks of the drive. If
> >a drive happens to lie about wrt the flush command, it gets an extra
> >chance to flush the cache as it now knows that power will be gone very
> >soon. So I think the ->suspend/->resume hooks should belong to the LLD,
> >not the ULD as the ULD has no idea how to suspend all devices types.
> 
> 
> You are right about issuing the standby command after flush.  I don't 
> think an LLD hook is the proper way to accomplish it, however.
> 
> The SCSI layer needs to issue the START STOP UNIT command in response to 
> a suspend event, and libata-scsi will (per SAT spec) translate that into 
> the ATA standby command.  Merely following the relevant SCSI+SAT+ATA 
> standards gets us there.

That sounds fine!

> Longer term, SATA PM through SCSI will have three facets:
> 
> * Device PM.  This is best handled by the device class driver (sd/sr/st).
> 
> * Bus PM.  This is best handled by the transport class driver (need to 
> write for SATA and SAS).
> 
> * Host PM.  This is handled in the obvious manner, using existing PM 
> driver hooks.  PCI D0/D3, etc.
> 
> I can describe how this will look when libata is divorced from SCSI, if 
> you would like, too...

I was beginning to dispair you had given up that plan...

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH] libata: device suspend/resume
  2005-05-24  7:06           ` Hannes Reinecke
@ 2005-05-24  7:08             ` Jens Axboe
  2005-05-24  7:16             ` Jeff Garzik
  1 sibling, 0 replies; 44+ messages in thread
From: Jens Axboe @ 2005-05-24  7:08 UTC (permalink / raw)
  To: Hannes Reinecke
  Cc: Jeff Garzik, James Bottomley, SCSI Mailing List, linux-ide

On Tue, May 24 2005, Hannes Reinecke wrote:
> Jeff Garzik wrote:
> [ .. ]
> > 
> > Longer term, SATA PM through SCSI will have three facets:
> > 
> > * Device PM.  This is best handled by the device class driver (sd/sr/st).
> > 
> > * Bus PM.  This is best handled by the transport class driver (need to
> > write for SATA and SAS).
> > 
> > * Host PM.  This is handled in the obvious manner, using existing PM
> > driver hooks.  PCI D0/D3, etc.
> > 
> > I can describe how this will look when libata is divorced from SCSI, if
> > you would like, too...
> 
> Divorced from SCSI? Care to elaborate?

libata being a scsi citizen was just a temporary solution, Jeff has
promised to turn it into a real block driver for years :)

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH] libata: device suspend/resume
  2005-05-24  7:07           ` Jens Axboe
@ 2005-05-24  7:10             ` Jeff Garzik
  2005-05-24  7:13               ` Jens Axboe
  2005-05-24  7:14               ` [PATCH] libata: device suspend/resume Hannes Reinecke
  0 siblings, 2 replies; 44+ messages in thread
From: Jeff Garzik @ 2005-05-24  7:10 UTC (permalink / raw)
  To: Jens Axboe; +Cc: James Bottomley, SCSI Mailing List, linux-ide

Jens Axboe wrote:
> On Tue, May 24 2005, Jeff Garzik wrote:
>>I can describe how this will look when libata is divorced from SCSI, if 
>>you would like, too...
> 
> I was beginning to dispair you had given up that plan...

hehe, nope.  I promised Linus, and I plan to keep my promise :)

I know how to do it.  Internally things have been kept as separate as 
possible from the SCSI layer.

Bart even hinted at possibly using libata-without-SCSI for future 
/dev/hdX support.  It's certainly doable.

	Jeff




^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH] libata: device suspend/resume
  2005-05-24  7:10             ` Jeff Garzik
@ 2005-05-24  7:13               ` Jens Axboe
  2005-05-27  2:49                 ` libata, SCSI and storage drivers Jeff Garzik
  2005-05-24  7:14               ` [PATCH] libata: device suspend/resume Hannes Reinecke
  1 sibling, 1 reply; 44+ messages in thread
From: Jens Axboe @ 2005-05-24  7:13 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: James Bottomley, SCSI Mailing List, linux-ide

On Tue, May 24 2005, Jeff Garzik wrote:
> Jens Axboe wrote:
> >On Tue, May 24 2005, Jeff Garzik wrote:
> >>I can describe how this will look when libata is divorced from SCSI, if 
> >>you would like, too...
> >
> >I was beginning to dispair you had given up that plan...
> 
> hehe, nope.  I promised Linus, and I plan to keep my promise :)

You promised me, too :)

> I know how to do it.  Internally things have been kept as separate as 
> possible from the SCSI layer.

We should start a list of items that could potentially be moved to the
block layer that libata currently uses.
> 
> Bart even hinted at possibly using libata-without-SCSI for future 
> /dev/hdX support.  It's certainly doable.

>From my point of view, I think it would be a good way to kick off
/dev/disk device agnostic mappings.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH] libata: device suspend/resume
  2005-05-24  7:10             ` Jeff Garzik
  2005-05-24  7:13               ` Jens Axboe
@ 2005-05-24  7:14               ` Hannes Reinecke
  2005-05-24  7:15                 ` Jens Axboe
  2005-05-24  7:18                 ` Jeff Garzik
  1 sibling, 2 replies; 44+ messages in thread
From: Hannes Reinecke @ 2005-05-24  7:14 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: Jens Axboe, James Bottomley, SCSI Mailing List, linux-ide

Jeff Garzik wrote:
> Jens Axboe wrote:
>> On Tue, May 24 2005, Jeff Garzik wrote:
>>> I can describe how this will look when libata is divorced from SCSI,
>>> if you would like, too...
>>
>> I was beginning to dispair you had given up that plan...
> 
> hehe, nope.  I promised Linus, and I plan to keep my promise :)
> 
> I know how to do it.  Internally things have been kept as separate as
> possible from the SCSI layer.
> 
> Bart even hinted at possibly using libata-without-SCSI for future
> /dev/hdX support.  It's certainly doable.
> 
AAArgh! No, not again!

We've finally succeeded in convincing everyone that having '/dev/sdX'
instead of '/dev/hdX' was a neccessary step forward.
I think we're getting shot if we now tell them to reverse this _again_.
Can we at least have it optional?

Or are you talking about linux-3.X ?

Cheers,

Hannes
-- 
Dr. Hannes Reinecke			hare@suse.de
SuSE Linux AG				S390 & zSeries
Maxfeldstraße 5				+49 911 74053 688
90409 Nürnberg				http://www.suse.de

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH] libata: device suspend/resume
  2005-05-24  7:14               ` [PATCH] libata: device suspend/resume Hannes Reinecke
@ 2005-05-24  7:15                 ` Jens Axboe
  2005-05-24  7:18                 ` Jeff Garzik
  1 sibling, 0 replies; 44+ messages in thread
From: Jens Axboe @ 2005-05-24  7:15 UTC (permalink / raw)
  To: Hannes Reinecke
  Cc: Jeff Garzik, James Bottomley, SCSI Mailing List, linux-ide

On Tue, May 24 2005, Hannes Reinecke wrote:
> Jeff Garzik wrote:
> > Jens Axboe wrote:
> >> On Tue, May 24 2005, Jeff Garzik wrote:
> >>> I can describe how this will look when libata is divorced from SCSI,
> >>> if you would like, too...
> >>
> >> I was beginning to dispair you had given up that plan...
> > 
> > hehe, nope.  I promised Linus, and I plan to keep my promise :)
> > 
> > I know how to do it.  Internally things have been kept as separate as
> > possible from the SCSI layer.
> > 
> > Bart even hinted at possibly using libata-without-SCSI for future
> > /dev/hdX support.  It's certainly doable.
> > 
> AAArgh! No, not again!
> 
> We've finally succeeded in convincing everyone that having '/dev/sdX'
> instead of '/dev/hdX' was a neccessary step forward.
> I think we're getting shot if we now tell them to reverse this _again_.
> Can we at least have it optional?

Moving to /dev/sdX was never a step forward, it's was merely a necessary
side ways step because libata used the scsi layer. A step forward would
be device agnostic /dev/disk mappings instead.

> Or are you talking about linux-3.X ?

Hopefully before that :-)

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH] libata: device suspend/resume
  2005-05-24  7:06           ` Hannes Reinecke
  2005-05-24  7:08             ` Jens Axboe
@ 2005-05-24  7:16             ` Jeff Garzik
  1 sibling, 0 replies; 44+ messages in thread
From: Jeff Garzik @ 2005-05-24  7:16 UTC (permalink / raw)
  To: Hannes Reinecke; +Cc: Jens Axboe, James Bottomley, SCSI Mailing List, linux-ide

Hannes Reinecke wrote:
> Jeff Garzik wrote:
> [ .. ]
> 
>>Longer term, SATA PM through SCSI will have three facets:
>>
>>* Device PM.  This is best handled by the device class driver (sd/sr/st).
>>
>>* Bus PM.  This is best handled by the transport class driver (need to
>>write for SATA and SAS).
>>
>>* Host PM.  This is handled in the obvious manner, using existing PM
>>driver hooks.  PCI D0/D3, etc.
>>
>>I can describe how this will look when libata is divorced from SCSI, if
>>you would like, too...
> 
> 
> Divorced from SCSI? Care to elaborate?


General Linux thesis:  translation and emulation are lame.  Adds 
needless overhead.  One should write a "bare metal" driver.

libata should be able to export a native block driver for ATA devices, 
making libata-scsi.c simply an optional "T10 SAT implementation" that 
users can retain for compatibility and certain features, or excise for 
lower overhead.

As ATAPI is converging with SCSI MMC -- the two committes are actively 
aligning the standards -- I would prefer to handle ATAPI devices through 
the SCSI layer.

	Jeff



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH] libata: device suspend/resume
  2005-05-24  7:14               ` [PATCH] libata: device suspend/resume Hannes Reinecke
  2005-05-24  7:15                 ` Jens Axboe
@ 2005-05-24  7:18                 ` Jeff Garzik
  2005-05-24 10:17                   ` Douglas Gilbert
  1 sibling, 1 reply; 44+ messages in thread
From: Jeff Garzik @ 2005-05-24  7:18 UTC (permalink / raw)
  To: Hannes Reinecke; +Cc: Jens Axboe, James Bottomley, SCSI Mailing List, linux-ide

Hannes Reinecke wrote:
> We've finally succeeded in convincing everyone that having '/dev/sdX'
> instead of '/dev/hdX' was a neccessary step forward.
> I think we're getting shot if we now tell them to reverse this _again_.
> Can we at least have it optional?

I will not break compatibility with /dev/sdX, don't worry :)

It will always be there as an optional piece, as I described in my other 
reply to you.

	Jeff



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH] libata: device suspend/resume
  2005-05-24  6:53         ` Jeff Garzik
  2005-05-24  7:06           ` Hannes Reinecke
  2005-05-24  7:07           ` Jens Axboe
@ 2005-05-24  7:59           ` Jens Axboe
  2005-05-24  8:21             ` Jeff Garzik
  2005-05-24 13:48           ` Luben Tuikov
  3 siblings, 1 reply; 44+ messages in thread
From: Jens Axboe @ 2005-05-24  7:59 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: James Bottomley, SCSI Mailing List, linux-ide

On Tue, May 24 2005, Jeff Garzik wrote:
> The SCSI layer needs to issue the START STOP UNIT command in response to 
> a suspend event, and libata-scsi will (per SAT spec) translate that into 
> the ATA standby command.  Merely following the relevant SCSI+SAT+ATA 
> standards gets us there.

BTW, you also need to set host/device modes on resume. Where do you
propose to do that currently, without the LLD hook?

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH] libata: device suspend/resume
  2005-05-24  7:59           ` Jens Axboe
@ 2005-05-24  8:21             ` Jeff Garzik
  2005-05-24  8:51               ` Jens Axboe
  0 siblings, 1 reply; 44+ messages in thread
From: Jeff Garzik @ 2005-05-24  8:21 UTC (permalink / raw)
  To: Jens Axboe; +Cc: James Bottomley, SCSI Mailing List, linux-ide

Jens Axboe wrote:
> On Tue, May 24 2005, Jeff Garzik wrote:
> 
>>The SCSI layer needs to issue the START STOP UNIT command in response to 
>>a suspend event, and libata-scsi will (per SAT spec) translate that into 
>>the ATA standby command.  Merely following the relevant SCSI+SAT+ATA 
>>standards gets us there.
> 
> 
> BTW, you also need to set host/device modes on resume. Where do you
> propose to do that currently, without the LLD hook?

In the START STOP UNIT emulation.  SSU is basically a high level "do PM 
command".



As a tangent, I would like to try and convince you to stop thinking so 
much about hooks, and start thinking about adding new commands at the 
struct request level.  Sending things down the request_queue is much 
nicer because commands can be sent using the normal rq mechanisms. 
Often, commands can be translated directly into ATA or SCSI commands in 
the prep function.

Two commands I have been thinking about, to add alongside READ, WRITE, 
READA, READ_SYNC, etc. are:  SYNC_CACHE and PM_EVENT.

This would be quite useful for DM and md RAID, especially.  It becomes 
trivial to clone a SYNC_CACHE command, and send it to 5 underlying ATA 
and SCSI devices in a RAID array.  If the PM_EVENT or SYNC_CACHE command 
needs special processing, one can easily add code for that without 
needing any hooks -- just call your new code from the request_queue 
function that handles READ/WRITE/READA/...

This applies to the current thread (PM_EVENT), and also fits right into 
your barrier work, too (SYNC_CACHE).

	Jeff



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH] libata: device suspend/resume
  2005-05-24  8:21             ` Jeff Garzik
@ 2005-05-24  8:51               ` Jens Axboe
  2005-05-24 16:37                 ` Jeff Garzik
  0 siblings, 1 reply; 44+ messages in thread
From: Jens Axboe @ 2005-05-24  8:51 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: James Bottomley, SCSI Mailing List, linux-ide

On Tue, May 24 2005, Jeff Garzik wrote:
> Jens Axboe wrote:
> >On Tue, May 24 2005, Jeff Garzik wrote:
> >
> >>The SCSI layer needs to issue the START STOP UNIT command in response to 
> >>a suspend event, and libata-scsi will (per SAT spec) translate that into 
> >>the ATA standby command.  Merely following the relevant SCSI+SAT+ATA 
> >>standards gets us there.
> >
> >
> >BTW, you also need to set host/device modes on resume. Where do you
> >propose to do that currently, without the LLD hook?
> 
> In the START STOP UNIT emulation.  SSU is basically a high level "do PM 
> command".

Ok, works for me.

> As a tangent, I would like to try and convince you to stop thinking so 
> much about hooks, and start thinking about adding new commands at the 
> struct request level.  Sending things down the request_queue is much 
> nicer because commands can be sent using the normal rq mechanisms. 
> Often, commands can be translated directly into ATA or SCSI commands in 
> the prep function.
> 
> Two commands I have been thinking about, to add alongside READ, WRITE, 
> READA, READ_SYNC, etc. are:  SYNC_CACHE and PM_EVENT.
> 
> This would be quite useful for DM and md RAID, especially.  It becomes 
> trivial to clone a SYNC_CACHE command, and send it to 5 underlying ATA 
> and SCSI devices in a RAID array.  If the PM_EVENT or SYNC_CACHE command 
> needs special processing, one can easily add code for that without 
> needing any hooks -- just call your new code from the request_queue 
> function that handles READ/WRITE/READA/...
> 
> This applies to the current thread (PM_EVENT), and also fits right into 
> your barrier work, too (SYNC_CACHE).

I agree, it's a cleaner approach, with the rq being a container for
generel messages as well not just SCSI commands. The one missing piece
for that was the rq->end_io() callback so everything doesn't have to go
down sync, but that is in now as well.

I'll try and cook something up.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH] libata: device suspend/resume
  2005-05-24  7:18                 ` Jeff Garzik
@ 2005-05-24 10:17                   ` Douglas Gilbert
  2005-05-24 17:10                     ` Jeff Garzik
  0 siblings, 1 reply; 44+ messages in thread
From: Douglas Gilbert @ 2005-05-24 10:17 UTC (permalink / raw)
  To: Jeff Garzik
  Cc: Hannes Reinecke, Jens Axboe, James Bottomley, SCSI Mailing List,
	linux-ide

Jeff Garzik wrote:
> Hannes Reinecke wrote:
> 
>> We've finally succeeded in convincing everyone that having '/dev/sdX'
>> instead of '/dev/hdX' was a neccessary step forward.
>> I think we're getting shot if we now tell them to reverse this _again_.
>> Can we at least have it optional?
> 
> 
> I will not break compatibility with /dev/sdX, don't worry :)
> 
> It will always be there as an optional piece, as I described in my other 
> reply to you.

Jeff,
At the risk of boring you but informing others, Serial
Attached SCSI (SAS) is going to muddy things.

The SCSI ATA Translation (SAT
http://www.t10.org/ftp/t10/drafts/sat/sat-r04.pdf )
draft hopefully will find application any many areas
other than libata (e.g. USB mass storage and 1394's
SBP-2 drivers). The Scope section (section 1) of that
draft makes interesting reading: in point a) the author
sees SAT as presenting a unified command interface for
block type devices across PATA, SATA and ATAPI!

However the primary reason that SAT started is the
fact that SAS infrastructure can include SATA disks.
SAS defines the Serial ATA Tunneled Protocol (STP)
which in linux will first appear in a SAS LLD (i.e.
a scsi subsystem Low Level Driver, something like
aic7xxx). Connecting a SATA disk (transported via SAS)
to the IDE subsystem and a /dev/hd? device node seems
like an unnatural act (a bit like ide-scsi in reverse).
A solution involving libata seems appropriate in this case.

Even more likely is that a SAT layer will be implemented
in SAS enclosures just inches away from lots of SATA
disks. From the point of view of a linux machine
such SATA disks talk the SCSI command set and are
connected via a SAS transport (Serial SCSI Protocol (SSP)).
The only things that will probably care about the
difference between such SATA disks and SAS disks are
utilities like smartmontools (and that gap is being
partially closed as well).

Even without SAS we already have many fibre channel
enclosures that bridge to SATA disks. Hopefully a
SAT layer will be retro-fitted into some of those.

Perhaps libata could be renamed libsat.

Doug Gilbert


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH] libata: device suspend/resume
  2005-05-24  6:21       ` Jens Axboe
  2005-05-24  6:53         ` Jeff Garzik
@ 2005-05-24 13:05         ` Luben Tuikov
  1 sibling, 0 replies; 44+ messages in thread
From: Luben Tuikov @ 2005-05-24 13:05 UTC (permalink / raw)
  To: Jens Axboe; +Cc: James Bottomley, Jeff Garzik, SCSI Mailing List, linux-ide

On 05/24/05 02:21, Jens Axboe wrote:
> suspend/resume is a lot more complicated than just flushing a cache, the
> below will probably get you safe asleep but you will never get devices
> alive again after power-up on suspend-to-ram.
> 
> I also greatly prefer issuing a standby command to the drive after the
> flush, so that we don't risk using the emergency parks of the drive. If
> a drive happens to lie about wrt the flush command, it gets an extra
> chance to flush the cache as it now knows that power will be gone very
> soon. So I think the ->suspend/->resume hooks should belong to the LLD,
> not the ULD as the ULD has no idea how to suspend all devices types.

Very, very true and very, very correct!

	Luben


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH] libata: device suspend/resume
  2005-05-24  6:53         ` Jeff Garzik
                             ` (2 preceding siblings ...)
  2005-05-24  7:59           ` Jens Axboe
@ 2005-05-24 13:48           ` Luben Tuikov
  2005-05-24 17:29             ` Jeff Garzik
  3 siblings, 1 reply; 44+ messages in thread
From: Luben Tuikov @ 2005-05-24 13:48 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: Jens Axboe, James Bottomley, SCSI Mailing List, linux-ide

On 05/24/05 02:53, Jeff Garzik wrote:
> You are right about issuing the standby command after flush.  I don't 
> think an LLD hook is the proper way to accomplish it, however.
> 
> The SCSI layer needs to issue the START STOP UNIT command in response to 
> a suspend event, and libata-scsi will (per SAT spec) translate that into 
> the ATA standby command.  Merely following the relevant SCSI+SAT+ATA 
> standards gets us there.

An SSU command may not be viable as the device may be shared
on the domain (i.e. accessed by another initiator).
 
> Longer term, SATA PM through SCSI will have three facets:
> 
> * Device PM.  This is best handled by the device class driver (sd/sr/st).

In terms of cache syncronization yes.  But we may not be the only initiator.
 
> * Bus PM.  This is best handled by the transport class driver (need to 
> write for SATA and SAS).

This is tricky.  Kudos to Doug's answer.  One thing to remember is that
this is intricately entangled with host PM.  That is, if a device
is unplugged from the domain -- would one want to computer to come
out of its current power saving mode?  Probably not.
 
> * Host PM.  This is handled in the obvious manner, using existing PM 
> driver hooks.  PCI D0/D3, etc.

Then D3->D0 would be equivalent to reinintialization of the PCI device,
doing domain discovery as usual, etc.
 
> I can describe how this will look when libata is divorced from SCSI, if 
> you would like, too...

Please Jeff.

A concern is SATA devices behind SAS HAs (host adapters).  A straightforward
translation provided by libata would be a goal.

	Luben



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH] libata: device suspend/resume
  2005-05-23 20:15 [PATCH] libata: device suspend/resume Jeff Garzik
  2005-05-23 20:41 ` James Bottomley
@ 2005-05-24 16:04 ` Mark Lord
  1 sibling, 0 replies; 44+ messages in thread
From: Mark Lord @ 2005-05-24 16:04 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: linux-scsi, linux-ide, James Bottomley, axboe

Jeff Garzik wrote:
> Patch 2/2 for review by the linux-scsi crowd.
> 
> Please do not apply.
> 
> 
> 
> Patch is via Mark Lord, 
 >

The original patch was from Jens Axboe (now copied).

Cheers
--
Mark Lord

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH] libata: device suspend/resume
  2005-05-23 20:41 ` James Bottomley
  2005-05-23 20:45   ` Jeff Garzik
@ 2005-05-24 16:27   ` Mark Lord
  2005-05-24 16:33     ` Mark Lord
  1 sibling, 1 reply; 44+ messages in thread
From: Mark Lord @ 2005-05-24 16:27 UTC (permalink / raw)
  To: James Bottomley; +Cc: Jeff Garzik, SCSI Mailing List, linux-ide

 >> Patch is via Mark Lord,
 >
 >Um, it is? it looks identical to this one from Jens
...

As Jeff said, he got the patch "via" me.
Here are the first few lines from the file I sent to Jeff (below),
just to make it clear I never tried to claim it as my own.

Cheers
--
Mark Lord


From: Hannes Reinecke <hare@suse.de>
Subject: Fix sata atapi error handling
References: 70918

SCSI commands which end up on the error handler need special attention;
we have to make sure that eh_cmd_q is properly emptied or scsi_eh will
try to forever finalize the command.

With this patch eh_cmd_q is explicitely emptied if not done so in the
strategy handler and a proper abort sequence is executed for each
command if required.
We rely on the strategy handler to fill out proper sense information for
us as SATA is 'special' when it comes to command sense gathering.

Signed-off-by: Kurt Garloff <garloff@suse.de>
Signed-off-by: Jens Axboe <axboe@suse.de>
Acked-by: Andreas Gruenbacher <agruen@suse.de>
...


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH] libata: device suspend/resume
  2005-05-24 16:27   ` Mark Lord
@ 2005-05-24 16:33     ` Mark Lord
  0 siblings, 0 replies; 44+ messages in thread
From: Mark Lord @ 2005-05-24 16:33 UTC (permalink / raw)
  To: Mark Lord; +Cc: James Bottomley, Jeff Garzik, SCSI Mailing List, linux-ide

Oh crap, wrong attachment..
It appears I did at some point trim off Jens's header.
Sorry about that, Jens!

Cheers

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH] libata: device suspend/resume
  2005-05-24  8:51               ` Jens Axboe
@ 2005-05-24 16:37                 ` Jeff Garzik
  2005-05-25  9:29                   ` Jens Axboe
  0 siblings, 1 reply; 44+ messages in thread
From: Jeff Garzik @ 2005-05-24 16:37 UTC (permalink / raw)
  To: Jens Axboe; +Cc: James Bottomley, SCSI Mailing List, linux-ide

Jens Axboe wrote:
> I agree, it's a cleaner approach, with the rq being a container for
> generel messages as well not just SCSI commands. The one missing piece
> for that was the rq->end_io() callback so everything doesn't have to go
> down sync, but that is in now as well.
> 
> I'll try and cook something up.

Very cool ;)

	Jeff




^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH] libata: device suspend/resume
  2005-05-24 10:17                   ` Douglas Gilbert
@ 2005-05-24 17:10                     ` Jeff Garzik
  0 siblings, 0 replies; 44+ messages in thread
From: Jeff Garzik @ 2005-05-24 17:10 UTC (permalink / raw)
  To: dougg, Jens Axboe, luben_tuikov
  Cc: Hannes Reinecke, James Bottomley, SCSI Mailing List, linux-ide

Douglas Gilbert wrote:
> At the risk of boring you but informing others, Serial
> Attached SCSI (SAS) is going to muddy things.

Not necessarily...  ;-)

But yes, I am keeping track of SAS, don't worry.


> The SCSI ATA Translation (SAT
> http://www.t10.org/ftp/t10/drafts/sat/sat-r04.pdf )
> draft hopefully will find application any many areas
> other than libata (e.g. USB mass storage and 1394's
> SBP-2 drivers). The Scope section (section 1) of that
> draft makes interesting reading: in point a) the author
> sees SAT as presenting a unified command interface for
> block type devices across PATA, SATA and ATAPI!

In Linux's case, libata's SCSI<->ATA translation is merely a convenience 
so that I didn't have to bother writing all that storage infrastructure 
at the block layer.  And SAT was merely a convenient standard I could 
follow during the exercise.


> However the primary reason that SAT started is the
> fact that SAS infrastructure can include SATA disks.
> SAS defines the Serial ATA Tunneled Protocol (STP)
> which in linux will first appear in a SAS LLD (i.e.
> a scsi subsystem Low Level Driver, something like
> aic7xxx). Connecting a SATA disk (transported via SAS)
> to the IDE subsystem and a /dev/hd? device node seems
> like an unnatural act (a bit like ide-scsi in reverse).
> A solution involving libata seems appropriate in this case.

Certainly there will be hardware that attaches to both SATA and SAS. 
I've been planning for this.  I even have an Adaptec card (and docs).

It depends highly on the LLD how this is implemented.  If a SCSI 
interface to SATA is forced upon us (i.e. SAT is implemented in a 
firmware somewhere), then there's not much you can do.

If the LLD provides direct access to either SCSI CDBs or ATA taskfiles 
(FIS's), then the OS should directly work with the device's native 
command set.  Far from adding value, translation actually hinders 
progress, as users must either (a) find a way around the SAT layer to 
access an ATA feature, or (b) wait until the SAT layer is updated to 
support a translation for the desired feature.

One of the biggest requested features for libata is "ATA passthru", the 
ability to directly submit ATA commands to the device.  That should tell 
you something about what -users- want.

SCSI and ATA are competing and evolving command sets, which implies that 
there is -increased- interest in -not- hiding ATA devices behind an 
emulation layer.

A SATA+SAS LLD would ideally register an instance of the SAS transport 
class with the SCSI layer, and an instance of the SATA transport class 
with the block layer.  That's all it needs to do.

Everything else -- /dev/hdX versus /dev/sdX, emulation, sd/sr/st, etc. 
-- are all clients to either a SATA or SCSI transport class.  Remember, 
to many users, the fact that SATA right now uses /dev/sdX is confusing 
and annoying: they would prefer /dev/hdX as IDE has been presenting for 
over a decadce.

With the right design, there is little value OR NEED in hiding ATA 
devices behind emulation.


> Even more likely is that a SAT layer will be implemented
> in SAS enclosures just inches away from lots of SATA
> disks. From the point of view of a linux machine
> such SATA disks talk the SCSI command set and are
> connected via a SAS transport (Serial SCSI Protocol (SSP)).
> The only things that will probably care about the
> difference between such SATA disks and SAS disks are
> utilities like smartmontools (and that gap is being
> partially closed as well).

Linux will always want to talk to a device in its native command set, 
unless a firmware _forces_ us to do otherwise.

If the user considers a SAT layer added value, then they can certainly 
use such at their site.


> Perhaps libata could be renamed libsat.

s/libata/libata-scsi/

libata core is a SCSI-independent ATA transport, approaching the "ATA 
transport class" that I described above.

	Jeff



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH] libata: device suspend/resume
  2005-05-24 13:48           ` Luben Tuikov
@ 2005-05-24 17:29             ` Jeff Garzik
  0 siblings, 0 replies; 44+ messages in thread
From: Jeff Garzik @ 2005-05-24 17:29 UTC (permalink / raw)
  To: Luben Tuikov, James Bottomley; +Cc: Jens Axboe, SCSI Mailing List, linux-ide

Luben Tuikov wrote:
> On 05/24/05 02:53, Jeff Garzik wrote:
> 
>>You are right about issuing the standby command after flush.  I don't 
>>think an LLD hook is the proper way to accomplish it, however.
>>
>>The SCSI layer needs to issue the START STOP UNIT command in response to 
>>a suspend event, and libata-scsi will (per SAT spec) translate that into 
>>the ATA standby command.  Merely following the relevant SCSI+SAT+ATA 
>>standards gets us there.
> 
> 
> An SSU command may not be viable as the device may be shared
> on the domain (i.e. accessed by another initiator).

Yes.  Don't worry, both James and I are quite aware of this.


>>* Bus PM.  This is best handled by the transport class driver (need to 
>>write for SATA and SAS).
> 
> 
> This is tricky.  Kudos to Doug's answer.  One thing to remember is that
> this is intricately entangled with host PM.  That is, if a device
> is unplugged from the domain -- would one want to computer to come
> out of its current power saving mode?  Probably not.

In general, in a class-based system, one can easily see a situation 
where a host driver would provide "adaptec_sas_transport" class, which 
does a few host-specific things, then falls back to a generic 
"sas_transport" class.

That is how to do host-specific stuff, with transport classes.

However, with regards to your specific example:  to come out of 
power-saving mode based on a certain event, the motherboard needs 
special circuitry.  Example of this is wake-on-LAN or wake-on-serial.


>>* Host PM.  This is handled in the obvious manner, using existing PM 
>>driver hooks.  PCI D0/D3, etc.
> 
> 
> Then D3->D0 would be equivalent to reinintialization of the PCI device,

yes


> doing domain discovery as usual, etc.

Eventually, yes.  At the moment, suspend is rather dumb, and things 
mostly assume that the devices present at suspend will be present at resume.


>>I can describe how this will look when libata is divorced from SCSI, if 
>>you would like, too...
> 
> 
> Please Jeff.
> 
> A concern is SATA devices behind SAS HAs (host adapters).  A straightforward
> translation provided by libata would be a goal.

No need for translation.  The LLD driver would either use the high level 
libata interface (for SATA devices), or use the high level SCSI 
interface (for SAS devices).

Even though libata doesn't have an explicit notion of transport classes 
(it _is_ a transport class, really), this is doable today.

	Jeff



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH] libata: device suspend/resume
  2005-05-24 16:37                 ` Jeff Garzik
@ 2005-05-25  9:29                   ` Jens Axboe
  2005-05-25 23:40                     ` Guennadi Liakhovetski
                                       ` (3 more replies)
  0 siblings, 4 replies; 44+ messages in thread
From: Jens Axboe @ 2005-05-25  9:29 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: James Bottomley, SCSI Mailing List, linux-ide

On Tue, May 24 2005, Jeff Garzik wrote:
> Jens Axboe wrote:
> >I agree, it's a cleaner approach, with the rq being a container for
> >generel messages as well not just SCSI commands. The one missing piece
> >for that was the rq->end_io() callback so everything doesn't have to go
> >down sync, but that is in now as well.
> >
> >I'll try and cook something up.
> 
> Very cool ;)

This is the base for it. It splits request->flags into two variables:

- cmd_type. this is not a bitmask, but a value indicating what type of
  request this is.

- cmd_flags. various command modified flags.

The idea is to add a REQ_TYPE_LINUX_BLOCK request type, where we define
a set of command opcodes that signify an upper level defined function
(such as flush) that is implemented differently at the hardware/driver
level. Basically a way to pass down messages or commands generically.

I like this better than using scsi opcodes always, it's a cleaner
abstraction. For sending generic commands to a device, we could add a
function ala:

blk_send_message(queue, unsigned char cmd, completion_callback);

that would allocate the request, set it up, add to block layer queue,
and call the completion_callback when it is done. So for a flush, a
driver could do

        blk_send_message(q, REQ_LB_OP_FLUSH, flush_done);

to a target queue.

What do you think?

Index: drivers/block/as-iosched.c
===================================================================
--- 137318b273db26b889675101fbd02d2e84cae5e3/drivers/block/as-iosched.c  (mode:100644)
+++ uncommitted/drivers/block/as-iosched.c  (mode:100644)
@@ -1476,7 +1476,7 @@
 		}
 	} else
 		WARN_ON(blk_fs_request(rq)
-			&& (!(rq->flags & (REQ_HARDBARRIER|REQ_SOFTBARRIER))) );
+			&& (!(rq->cmd_flags & (REQ_HARDBARRIER|REQ_SOFTBARRIER))) );
 
 	/* Stop anticipating - let this request get through */
 	as_antic_stop(ad);
@@ -1522,7 +1522,7 @@
 	}
 
 	/* barriers must flush the reorder queue */
-	if (unlikely(rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)
+	if (unlikely(rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)
 			&& where == ELEVATOR_INSERT_SORT)) {
 		WARN_ON(1);
 		where = ELEVATOR_INSERT_BACK;
Index: drivers/block/deadline-iosched.c
===================================================================
--- 137318b273db26b889675101fbd02d2e84cae5e3/drivers/block/deadline-iosched.c  (mode:100644)
+++ uncommitted/drivers/block/deadline-iosched.c  (mode:100644)
@@ -628,7 +628,7 @@
 	struct deadline_data *dd = q->elevator->elevator_data;
 
 	/* barriers must flush the reorder queue */
-	if (unlikely(rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)
+	if (unlikely(rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)
 			&& where == ELEVATOR_INSERT_SORT))
 		where = ELEVATOR_INSERT_BACK;
 
Index: drivers/block/elevator.c
===================================================================
--- 137318b273db26b889675101fbd02d2e84cae5e3/drivers/block/elevator.c  (mode:100644)
+++ uncommitted/drivers/block/elevator.c  (mode:100644)
@@ -272,7 +272,7 @@
 	if (blk_account_rq(rq))
 		q->in_flight--;
 
-	rq->flags &= ~REQ_STARTED;
+	rq->cmd_flags &= ~REQ_STARTED;
 
 	if (e->ops->elevator_deactivate_req_fn)
 		e->ops->elevator_deactivate_req_fn(q, rq);
@@ -285,7 +285,7 @@
 	/*
 	 * if this is the flush, requeue the original instead and drop the flush
 	 */
-	if (rq->flags & REQ_BAR_FLUSH) {
+	if (rq->cmd_type == REQ_TYPE_FLUSH) {
 		clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);
 		rq = rq->end_io_data;
 	}
@@ -306,7 +306,7 @@
 	/*
 	 * barriers implicitly indicate back insertion
 	 */
-	if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER) &&
+	if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER) &&
 	    where == ELEVATOR_INSERT_SORT)
 		where = ELEVATOR_INSERT_BACK;
 
@@ -374,12 +374,12 @@
 		 * that has been delayed should not be passed by new incoming
 		 * requests
 		 */
-		rq->flags |= REQ_STARTED;
+		rq->cmd_flags |= REQ_STARTED;
 
 		if (rq == q->last_merge)
 			q->last_merge = NULL;
 
-		if ((rq->flags & REQ_DONTPREP) || !q->prep_rq_fn)
+		if ((rq->cmd_flags & REQ_DONTPREP) || !q->prep_rq_fn)
 			break;
 
 		ret = q->prep_rq_fn(q, rq);
@@ -395,7 +395,7 @@
 				nr_bytes = rq->data_len;
 
 			blkdev_dequeue_request(rq);
-			rq->flags |= REQ_QUIET;
+			rq->cmd_flags |= REQ_QUIET;
 			end_that_request_chunk(rq, 0, nr_bytes);
 			end_that_request_last(rq);
 		} else {
Index: drivers/block/floppy.c
===================================================================
--- 137318b273db26b889675101fbd02d2e84cae5e3/drivers/block/floppy.c  (mode:100644)
+++ uncommitted/drivers/block/floppy.c  (mode:100644)
@@ -3001,8 +3001,8 @@
 	if (usage_count == 0) {
 		printk("warning: usage count=0, current_req=%p exiting\n",
 		       current_req);
-		printk("sect=%ld flags=%lx\n", (long)current_req->sector,
-		       current_req->flags);
+		printk("sect=%ld type=%x flags=%x\n", (long)current_req->sector,
+		       current_req->cmd_type, current_req->cmd_flags);
 		return;
 	}
 	if (test_bit(0, &fdc_busy)) {
Index: drivers/block/ll_rw_blk.c
===================================================================
--- 137318b273db26b889675101fbd02d2e84cae5e3/drivers/block/ll_rw_blk.c  (mode:100644)
+++ uncommitted/drivers/block/ll_rw_blk.c  (mode:100644)
@@ -349,7 +349,7 @@
 	struct request *rq = flush_rq->end_io_data;
 	request_queue_t *q = rq->q;
 
-	rq->flags |= REQ_BAR_PREFLUSH;
+	rq->cmd_flags |= REQ_BAR_PREFLUSH;
 
 	if (!flush_rq->errors)
 		elv_requeue_request(q, rq);
@@ -365,7 +365,7 @@
 	struct request *rq = flush_rq->end_io_data;
 	request_queue_t *q = rq->q;
 
-	rq->flags |= REQ_BAR_POSTFLUSH;
+	rq->cmd_flags |= REQ_BAR_POSTFLUSH;
 
 	q->end_flush_fn(q, flush_rq);
 	clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);
@@ -383,7 +383,7 @@
 
 	rq_init(q, flush_rq);
 	flush_rq->elevator_private = NULL;
-	flush_rq->flags = REQ_BAR_FLUSH;
+	flush_rq->cmd_type = REQ_TYPE_FLUSH;
 	flush_rq->rq_disk = rq->rq_disk;
 	flush_rq->rl = NULL;
 
@@ -392,7 +392,7 @@
 	 * pre and post flush as done in that case
 	 */
 	if (!q->prepare_flush_fn(q, flush_rq)) {
-		rq->flags |= REQ_BAR_PREFLUSH | REQ_BAR_POSTFLUSH;
+		rq->cmd_flags |= REQ_BAR_PREFLUSH | REQ_BAR_POSTFLUSH;
 		clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);
 		return rq;
 	}
@@ -421,7 +421,7 @@
 
 	rq_init(q, flush_rq);
 	flush_rq->elevator_private = NULL;
-	flush_rq->flags = REQ_BAR_FLUSH;
+	flush_rq->cmd_type = REQ_TYPE_FLUSH;
 	flush_rq->rq_disk = rq->rq_disk;
 	flush_rq->rl = NULL;
 
@@ -934,7 +934,7 @@
 	}
 
 	list_del_init(&rq->queuelist);
-	rq->flags &= ~REQ_QUEUED;
+	rq->cmd_flags &= ~REQ_QUEUED;
 	rq->tag = -1;
 
 	if (unlikely(bqt->tag_index[tag] == NULL))
@@ -970,7 +970,7 @@
 	unsigned long *map = bqt->tag_map;
 	int tag = 0;
 
-	if (unlikely((rq->flags & REQ_QUEUED))) {
+	if (unlikely((rq->cmd_flags & REQ_QUEUED))) {
 		printk(KERN_ERR 
 		       "request %p for device [%s] already tagged %d",
 		       rq, rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->tag);
@@ -987,7 +987,7 @@
 	tag += ffz(*map);
 	__set_bit(tag, bqt->tag_map);
 
-	rq->flags |= REQ_QUEUED;
+	rq->cmd_flags |= REQ_QUEUED;
 	rq->tag = tag;
 	bqt->tag_index[tag] = rq;
 	blkdev_dequeue_request(rq);
@@ -1022,61 +1022,31 @@
 		if (rq->tag == -1) {
 			printk("bad tag found on list\n");
 			list_del_init(&rq->queuelist);
-			rq->flags &= ~REQ_QUEUED;
+			rq->cmd_flags &= ~REQ_QUEUED;
 		} else
 			blk_queue_end_tag(q, rq);
 
-		rq->flags &= ~REQ_STARTED;
+		rq->cmd_flags &= ~REQ_STARTED;
 		__elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0);
 	}
 }
 
 EXPORT_SYMBOL(blk_queue_invalidate_tags);
 
-static char *rq_flags[] = {
-	"REQ_RW",
-	"REQ_FAILFAST",
-	"REQ_SOFTBARRIER",
-	"REQ_HARDBARRIER",
-	"REQ_CMD",
-	"REQ_NOMERGE",
-	"REQ_STARTED",
-	"REQ_DONTPREP",
-	"REQ_QUEUED",
-	"REQ_PC",
-	"REQ_BLOCK_PC",
-	"REQ_SENSE",
-	"REQ_FAILED",
-	"REQ_QUIET",
-	"REQ_SPECIAL",
-	"REQ_DRIVE_CMD",
-	"REQ_DRIVE_TASK",
-	"REQ_DRIVE_TASKFILE",
-	"REQ_PREEMPT",
-	"REQ_PM_SUSPEND",
-	"REQ_PM_RESUME",
-	"REQ_PM_SHUTDOWN",
-};
-
 void blk_dump_rq_flags(struct request *rq, char *msg)
 {
 	int bit;
 
-	printk("%s: dev %s: flags = ", msg,
-		rq->rq_disk ? rq->rq_disk->disk_name : "?");
-	bit = 0;
-	do {
-		if (rq->flags & (1 << bit))
-			printk("%s ", rq_flags[bit]);
-		bit++;
-	} while (bit < __REQ_NR_BITS);
+	printk("%s: dev %s: type=%x, flags=%x\n", msg,
+		rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type,
+		rq->cmd_flags);
 
 	printk("\nsector %llu, nr/cnr %lu/%u\n", (unsigned long long)rq->sector,
 						       rq->nr_sectors,
 						       rq->current_nr_sectors);
 	printk("bio %p, biotail %p, buffer %p, data %p, len %u\n", rq->bio, rq->biotail, rq->buffer, rq->data, rq->data_len);
 
-	if (rq->flags & (REQ_BLOCK_PC | REQ_PC)) {
+	if (blk_pc_request(rq)) {
 		printk("cdb: ");
 		for (bit = 0; bit < sizeof(rq->cmd); bit++)
 			printk("%02x ", rq->cmd[bit]);
@@ -1253,7 +1223,7 @@
 	int nr_phys_segs = bio_phys_segments(q, bio);
 
 	if (req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
-		req->flags |= REQ_NOMERGE;
+		req->cmd_flags |= REQ_NOMERGE;
 		if (req == q->last_merge)
 			q->last_merge = NULL;
 		return 0;
@@ -1276,7 +1246,7 @@
 
 	if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments
 	    || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) {
-		req->flags |= REQ_NOMERGE;
+		req->cmd_flags |= REQ_NOMERGE;
 		if (req == q->last_merge)
 			q->last_merge = NULL;
 		return 0;
@@ -1297,7 +1267,7 @@
 	int len;
 
 	if (req->nr_sectors + bio_sectors(bio) > q->max_sectors) {
-		req->flags |= REQ_NOMERGE;
+		req->cmd_flags |= REQ_NOMERGE;
 		if (req == q->last_merge)
 			q->last_merge = NULL;
 		return 0;
@@ -1329,7 +1299,7 @@
 	int len;
 
 	if (req->nr_sectors + bio_sectors(bio) > q->max_sectors) {
-		req->flags |= REQ_NOMERGE;
+		req->cmd_flags |= REQ_NOMERGE;
 		if (req == q->last_merge)
 			q->last_merge = NULL;
 		return 0;
@@ -1784,10 +1754,10 @@
 		return NULL;
 
 	/*
-	 * first three bits are identical in rq->flags and bio->bi_rw,
+	 * first three bits are identical in rq->cmd_flags and bio->bi_rw,
 	 * see bio.h and blkdev.h
 	 */
-	rq->flags = rw;
+	rq->cmd_flags = rw;
 
 	if (!elv_set_request(q, rq, gfp_mask))
 		return rq;
@@ -2044,8 +2014,8 @@
  *    Many block devices need to execute commands asynchronously, so they don't
  *    block the whole kernel from preemption during request execution.  This is
  *    accomplished normally by inserting aritficial requests tagged as
- *    REQ_SPECIAL in to the corresponding request queue, and letting them be
- *    scheduled for actual execution by the request queue.
+ *    REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them
+ *    be scheduled for actual execution by the request queue.
  *
  *    We have the option of inserting the head or the tail of the queue.
  *    Typically we use the tail for new ioctls and so forth.  We use the head
@@ -2062,7 +2032,8 @@
 	 * must not attempt merges on this) and that it acts as a soft
 	 * barrier
 	 */
-	rq->flags |= REQ_SPECIAL | REQ_SOFTBARRIER;
+	rq->cmd_type = REQ_TYPE_SPECIAL;
+	rq->cmd_flags |= REQ_SOFTBARRIER;
 
 	rq->special = data;
 
@@ -2215,7 +2186,7 @@
 		rq->sense_len = 0;
 	}
 
-	rq->flags |= REQ_NOMERGE;
+	rq->cmd_flags |= REQ_NOMERGE;
 	rq->waiting = &wait;
 	rq->end_io = blk_end_sync_rq;
 	elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 1);
@@ -2277,7 +2248,8 @@
 	struct request *rq = blk_get_request(q, WRITE, __GFP_WAIT);
 	int ret;
 
-	rq->flags |= REQ_BLOCK_PC | REQ_SOFTBARRIER;
+	rq->cmd_type = REQ_TYPE_BLOCK_PC;
+	rq->cmd_flags |= REQ_SOFTBARRIER;
 	rq->sector = 0;
 	memset(rq->cmd, 0, sizeof(rq->cmd));
 	rq->cmd[0] = 0x35;
@@ -2679,19 +2651,19 @@
 		goto again;
 	}
 
-	req->flags |= REQ_CMD;
+	req->cmd_type = REQ_TYPE_FS;
 
 	/*
 	 * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST)
 	 */
 	if (bio_rw_ahead(bio) || bio_failfast(bio))
-		req->flags |= REQ_FAILFAST;
+		req->cmd_flags |= REQ_FAILFAST;
 
 	/*
 	 * REQ_BARRIER implies no merging, but lets make it explicit
 	 */
 	if (barrier)
-		req->flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
+		req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
 
 	req->errors = 0;
 	req->hard_sector = req->sector = sector;
@@ -3068,7 +3040,7 @@
 		req->errors = 0;
 
 	if (!uptodate) {
-		if (blk_fs_request(req) && !(req->flags & REQ_QUIET))
+		if (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET))
 			printk("end_request: I/O error, dev %s, sector %llu\n",
 				req->rq_disk ? req->rq_disk->disk_name : "?",
 				(unsigned long long)req->sector);
@@ -3236,8 +3208,8 @@
 
 void blk_rq_bio_prep(request_queue_t *q, struct request *rq, struct bio *bio)
 {
-	/* first three bits are identical in rq->flags and bio->bi_rw */
-	rq->flags |= (bio->bi_rw & 7);
+	/* first three bits are identical in rq->cmd_flags and bio->bi_rw */
+	rq->cmd_flags |= (bio->bi_rw & 7);
 
 	rq->nr_phys_segments = bio_phys_segments(q, bio);
 	rq->nr_hw_segments = bio_hw_segments(q, bio);
Index: drivers/block/nbd.c
===================================================================
--- 137318b273db26b889675101fbd02d2e84cae5e3/drivers/block/nbd.c  (mode:100644)
+++ uncommitted/drivers/block/nbd.c  (mode:100644)
@@ -428,7 +428,7 @@
 		dprintk(DBG_BLKDEV, "%s: request %p: dequeued (flags=%lx)\n",
 				req->rq_disk->disk_name, req, req->flags);
 
-		if (!(req->flags & REQ_CMD))
+		if (!blk_fs_request(req))
 			goto error_out;
 
 		lo = req->rq_disk->private_data;
@@ -510,7 +510,7 @@
 	switch (cmd) {
 	case NBD_DISCONNECT:
 	        printk(KERN_INFO "%s: NBD_DISCONNECT\n", lo->disk->disk_name);
-		sreq.flags = REQ_SPECIAL;
+		sreq.cmd_type = REQ_TYPE_SPECIAL;
 		nbd_cmd(&sreq) = NBD_CMD_DISC;
 		/*
 		 * Set these to sane values in case server implementation
Index: drivers/block/noop-iosched.c
===================================================================
--- 137318b273db26b889675101fbd02d2e84cae5e3/drivers/block/noop-iosched.c  (mode:100644)
+++ uncommitted/drivers/block/noop-iosched.c  (mode:100644)
@@ -39,7 +39,7 @@
 	/*
 	 * new merges must not precede this barrier
 	 */
-	if (rq->flags & REQ_HARDBARRIER)
+	if (rq->cmd_flags & REQ_HARDBARRIER)
 		q->last_merge = NULL;
 	else if (!q->last_merge)
 		q->last_merge = rq;
Index: drivers/block/paride/pd.c
===================================================================
--- 137318b273db26b889675101fbd02d2e84cae5e3/drivers/block/paride/pd.c  (mode:100644)
+++ uncommitted/drivers/block/paride/pd.c  (mode:100644)
@@ -436,7 +436,7 @@
 
 static enum action do_pd_io_start(void)
 {
-	if (pd_req->flags & REQ_SPECIAL) {
+	if (blk_special_request(pd_req)) {
 		phase = pd_special;
 		return pd_special();
 	}
Index: drivers/block/pktcdvd.c
===================================================================
--- 137318b273db26b889675101fbd02d2e84cae5e3/drivers/block/pktcdvd.c  (mode:100644)
+++ uncommitted/drivers/block/pktcdvd.c  (mode:100644)
@@ -365,15 +365,15 @@
 	rq->sense = sense;
 	memset(sense, 0, sizeof(sense));
 	rq->sense_len = 0;
-	rq->flags |= REQ_BLOCK_PC | REQ_HARDBARRIER;
+	rq->cmd_type = REQ_TYPE_BLOCK_PC;
+	rq->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
 	if (cgc->quiet)
-		rq->flags |= REQ_QUIET;
+		rq->cmd_flags |= REQ_QUIET;
 	memcpy(rq->cmd, cgc->cmd, CDROM_PACKET_SIZE);
 	if (sizeof(rq->cmd) > CDROM_PACKET_SIZE)
 		memset(rq->cmd + CDROM_PACKET_SIZE, 0, sizeof(rq->cmd) - CDROM_PACKET_SIZE);
 
 	rq->ref_count++;
-	rq->flags |= REQ_NOMERGE;
 	rq->waiting = &wait;
 	rq->end_io = blk_end_sync_rq;
 	elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 1);
Index: drivers/block/scsi_ioctl.c
===================================================================
--- 137318b273db26b889675101fbd02d2e84cae5e3/drivers/block/scsi_ioctl.c  (mode:100644)
+++ uncommitted/drivers/block/scsi_ioctl.c  (mode:100644)
@@ -276,7 +276,7 @@
 	rq->sense = sense;
 	rq->sense_len = 0;
 
-	rq->flags |= REQ_BLOCK_PC;
+	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 	bio = rq->bio;
 
 	/*
@@ -406,7 +406,7 @@
 
 	rq->data = buffer;
 	rq->data_len = bytes;
-	rq->flags |= REQ_BLOCK_PC;
+	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 
 	blk_execute_rq(q, bd_disk, rq);
 	err = rq->errors & 0xff;	/* only 8 bit SCSI status */
@@ -553,7 +553,7 @@
 			close = 1;
 		case CDROMEJECT:
 			rq = blk_get_request(q, WRITE, __GFP_WAIT);
-			rq->flags |= REQ_BLOCK_PC;
+			rq->cmd_type = REQ_TYPE_BLOCK_PC;
 			rq->data = NULL;
 			rq->data_len = 0;
 			rq->timeout = BLK_DEFAULT_TIMEOUT;
Index: drivers/block/xd.c
===================================================================
--- 137318b273db26b889675101fbd02d2e84cae5e3/drivers/block/xd.c  (mode:100644)
+++ uncommitted/drivers/block/xd.c  (mode:100644)
@@ -310,7 +310,7 @@
 		int res = 0;
 		int retry;
 
-		if (!(req->flags & REQ_CMD)) {
+		if (!blk_fs_request(req)) {
 			end_request(req, 0);
 			continue;
 		}
Index: drivers/cdrom/cdrom.c
===================================================================
--- 137318b273db26b889675101fbd02d2e84cae5e3/drivers/cdrom/cdrom.c  (mode:100644)
+++ uncommitted/drivers/cdrom/cdrom.c  (mode:100644)
@@ -2125,7 +2125,7 @@
 		rq->cmd[9] = 0xf8;
 
 		rq->cmd_len = 12;
-		rq->flags |= REQ_BLOCK_PC;
+		rq->cmd_type = REQ_TYPE_BLOCK_PC;
 		rq->timeout = 60 * HZ;
 		bio = rq->bio;
 
Index: drivers/cdrom/cdu31a.c
===================================================================
--- 137318b273db26b889675101fbd02d2e84cae5e3/drivers/cdrom/cdu31a.c  (mode:100644)
+++ uncommitted/drivers/cdrom/cdu31a.c  (mode:100644)
@@ -1339,8 +1339,10 @@
 		}
 
 		/* WTF??? */
-		if (!(req->flags & REQ_CMD))
+		if (!blk_fs_request(req)) {
+			end_request(req, 0);
 			continue;
+		}
 		if (rq_data_dir(req) == WRITE) {
 			end_request(req, 0);
 			continue;
Index: drivers/ide/ide-cd.c
===================================================================
--- 137318b273db26b889675101fbd02d2e84cae5e3/drivers/ide/ide-cd.c  (mode:100644)
+++ uncommitted/drivers/ide/ide-cd.c  (mode:100644)
@@ -372,7 +372,7 @@
 {
 	int log = 0;
 
-	if (!sense || !rq || (rq->flags & REQ_QUIET))
+	if (!sense || !rq || (rq->cmd_flags & REQ_QUIET))
 		return 0;
 
 	switch (sense->sense_key) {
@@ -562,7 +562,7 @@
 	struct cdrom_info *cd = drive->driver_data;
 
 	ide_init_drive_cmd(rq);
-	rq->flags = REQ_PC;
+	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 	rq->rq_disk = cd->disk;
 }
 
@@ -582,7 +582,7 @@
 	rq->cmd[0] = GPCMD_REQUEST_SENSE;
 	rq->cmd[4] = rq->data_len = 18;
 
-	rq->flags = REQ_SENSE;
+	rq->cmd_type = REQ_TYPE_SENSE;
 
 	/* NOTE! Save the failed command in "rq->buffer" */
 	rq->buffer = (void *) failed_command;
@@ -595,10 +595,10 @@
 	struct request *rq = HWGROUP(drive)->rq;
 	int nsectors = rq->hard_cur_sectors;
 
-	if ((rq->flags & REQ_SENSE) && uptodate) {
+	if (blk_sense_request(rq) && uptodate) {
 		/*
-		 * For REQ_SENSE, "rq->buffer" points to the original failed
-		 * request
+		 * For REQ_TYPE_SENSE, "rq->buffer" points to the original
+		 * failed request
 		 */
 		struct request *failed = (struct request *) rq->buffer;
 		struct cdrom_info *info = drive->driver_data;
@@ -658,17 +658,17 @@
 		return 1;
 	}
 
-	if (rq->flags & REQ_SENSE) {
+	if (blk_sense_request(rq)) {
 		/* We got an error trying to get sense info
 		   from the drive (probably while trying
 		   to recover from a former error).  Just give up. */
 
-		rq->flags |= REQ_FAILED;
+		rq->cmd_flags |= REQ_FAILED;
 		cdrom_end_request(drive, 0);
 		ide_error(drive, "request sense failure", stat);
 		return 1;
 
-	} else if (rq->flags & (REQ_PC | REQ_BLOCK_PC)) {
+	} else if (blk_pc_request(rq)) {
 		/* All other functions, except for READ. */
 		unsigned long flags;
 
@@ -676,7 +676,7 @@
 		 * if we have an error, pass back CHECK_CONDITION as the
 		 * scsi status byte
 		 */
-		if ((rq->flags & REQ_BLOCK_PC) && !rq->errors)
+		if (!rq->errors)
 			rq->errors = SAM_STAT_CHECK_CONDITION;
 
 		/* Check for tray open. */
@@ -687,12 +687,12 @@
 			cdrom_saw_media_change (drive);
 			/*printk("%s: media changed\n",drive->name);*/
 			return 0;
-		} else if (!(rq->flags & REQ_QUIET)) {
+		} else if (!(rq->cmd_flags & REQ_QUIET)) {
 			/* Otherwise, print an error. */
 			ide_dump_status(drive, "packet command error", stat);
 		}
 		
-		rq->flags |= REQ_FAILED;
+		rq->cmd_flags |= REQ_FAILED;
 
 		/*
 		 * instead of playing games with moving completions around,
@@ -819,7 +819,7 @@
 			wait = ATAPI_WAIT_PC;
 			break;
 		default:
-			if (!(rq->flags & REQ_QUIET))
+			if (!(rq->cmd_flags & REQ_QUIET))
 				printk(KERN_INFO "ide-cd: cmd 0x%x timed out\n", rq->cmd[0]);
 			wait = 0;
 			break;
@@ -1062,7 +1062,7 @@
 		if (rq->current_nr_sectors > 0) {
 			printk (KERN_ERR "%s: cdrom_read_intr: data underrun (%d blocks)\n",
 				drive->name, rq->current_nr_sectors);
-			rq->flags |= REQ_FAILED;
+			rq->cmd_flags |= REQ_FAILED;
 			cdrom_end_request(drive, 0);
 		} else
 			cdrom_end_request(drive, 1);
@@ -1399,7 +1399,7 @@
 			printk ("%s: cdrom_pc_intr: data underrun %d\n",
 				drive->name, pc->buflen);
 			*/
-			rq->flags |= REQ_FAILED;
+			rq->cmd_flags |= REQ_FAILED;
 			cdrom_end_request(drive, 0);
 		}
 		return ide_stopped;
@@ -1452,14 +1452,14 @@
 		rq->data += thislen;
 		rq->data_len -= thislen;
 
-		if (rq->flags & REQ_SENSE)
+		if (blk_sense_request(rq))
 			rq->sense_len += thislen;
 	} else {
 confused:
 		printk (KERN_ERR "%s: cdrom_pc_intr: The drive "
 			"appears confused (ireason = 0x%02x)\n",
 			drive->name, ireason);
-		rq->flags |= REQ_FAILED;
+		rq->cmd_flags |= REQ_FAILED;
 	}
 
 	/* Now we wait for another interrupt. */
@@ -1487,7 +1487,7 @@
 
 	info->dma = 0;
 	info->cmd = 0;
-	rq->flags &= ~REQ_FAILED;
+	rq->cmd_flags &= ~REQ_FAILED;
 	len = rq->data_len;
 
 	/* Start sending the command to the drive. */
@@ -1500,7 +1500,7 @@
 {
 	struct request_sense sense;
 	int retries = 10;
-	unsigned int flags = rq->flags;
+	unsigned int flags = rq->cmd_flags;
 
 	if (rq->sense == NULL)
 		rq->sense = &sense;
@@ -1509,14 +1509,14 @@
 	do {
 		int error;
 		unsigned long time = jiffies;
-		rq->flags = flags;
+		rq->cmd_flags = flags;
 
 		error = ide_do_drive_cmd(drive, rq, ide_wait);
 		time = jiffies - time;
 
 		/* FIXME: we should probably abort/retry or something 
 		 * in case of failure */
-		if (rq->flags & REQ_FAILED) {
+		if (rq->cmd_flags & REQ_FAILED) {
 			/* The request failed.  Retry if it was due to a unit
 			   attention status
 			   (usually means media was changed). */
@@ -1538,10 +1538,10 @@
 		}
 
 		/* End of retry loop. */
-	} while ((rq->flags & REQ_FAILED) && retries >= 0);
+	} while ((rq->cmd_flags & REQ_FAILED) && retries >= 0);
 
 	/* Return an error if the command failed. */
-	return (rq->flags & REQ_FAILED) ? -EIO : 0;
+	return (rq->cmd_flags & REQ_FAILED) ? -EIO : 0;
 }
 
 /*
@@ -1915,7 +1915,7 @@
 {
 	struct cdrom_info *info = drive->driver_data;
 
-	rq->flags |= REQ_QUIET;
+	rq->cmd_flags |= REQ_QUIET;
 
 	info->dma = 0;
 	info->cmd = 0;
@@ -1974,11 +1974,11 @@
 		}
 		info->last_block = block;
 		return action;
-	} else if (rq->flags & (REQ_PC | REQ_SENSE)) {
+	} else if (rq->cmd_type == REQ_TYPE_SENSE) {
 		return cdrom_do_packet_command(drive);
-	} else if (rq->flags & REQ_BLOCK_PC) {
+	} else if (blk_pc_request(rq)) {
 		return cdrom_do_block_pc(drive, rq);
-	} else if (rq->flags & REQ_SPECIAL) {
+	} else if (blk_special_request(rq)) {
 		/*
 		 * right now this can only be a reset...
 		 */
@@ -2056,7 +2056,7 @@
 
 	req.sense = sense;
 	req.cmd[0] = GPCMD_TEST_UNIT_READY;
-	req.flags |= REQ_QUIET;
+	req.cmd_flags |= REQ_QUIET;
 
 #if ! STANDARD_ATAPI
         /* the Sanyo 3 CD changer uses byte 7 of TEST_UNIT_READY to 
@@ -2180,7 +2180,7 @@
 	req.sense = sense;
 	req.data =  buf;
 	req.data_len = buflen;
-	req.flags |= REQ_QUIET;
+	req.cmd_flags |= REQ_QUIET;
 	req.cmd[0] = GPCMD_READ_TOC_PMA_ATIP;
 	req.cmd[6] = trackno;
 	req.cmd[7] = (buflen >> 8);
@@ -2476,7 +2476,7 @@
 	req.timeout = cgc->timeout;
 
 	if (cgc->quiet)
-		req.flags |= REQ_QUIET;
+		req.cmd_flags |= REQ_QUIET;
 
 	req.sense = cgc->sense;
 	cgc->stat = cdrom_queue_packet_command(drive, &req);
@@ -2618,7 +2618,8 @@
 	int ret;
 
 	cdrom_prepare_request(drive, &req);
-	req.flags = REQ_SPECIAL | REQ_QUIET;
+	req.cmd_type = REQ_TYPE_SPECIAL;
+	req.cmd_flags = REQ_QUIET;
 	ret = ide_do_drive_cmd(drive, &req, ide_wait);
 
 	/*
@@ -3097,9 +3098,9 @@
 
 static int ide_cdrom_prep_fn(request_queue_t *q, struct request *rq)
 {
-	if (rq->flags & REQ_CMD)
+	if (blk_fs_request(rq))
 		return ide_cdrom_prep_fs(q, rq);
-	else if (rq->flags & REQ_BLOCK_PC)
+	else if (blk_pc_request(rq))
 		return ide_cdrom_prep_pc(rq);
 
 	return 0;
Index: drivers/ide/ide-disk.c
===================================================================
--- 137318b273db26b889675101fbd02d2e84cae5e3/drivers/ide/ide-disk.c  (mode:100644)
+++ uncommitted/drivers/ide/ide-disk.c  (mode:100644)
@@ -731,7 +731,8 @@
 		rq->cmd[0] = WIN_FLUSH_CACHE;
 
 
-	rq->flags |= REQ_DRIVE_TASK | REQ_SOFTBARRIER;
+	rq->cmd_type = REQ_TYPE_ATA_TASK;
+	rq->cmd_flags |= REQ_SOFTBARRIER;
 	rq->buffer = rq->cmd;
 	return 1;
 }
Index: drivers/ide/ide-dma.c
===================================================================
--- 137318b273db26b889675101fbd02d2e84cae5e3/drivers/ide/ide-dma.c  (mode:100644)
+++ uncommitted/drivers/ide/ide-dma.c  (mode:100644)
@@ -210,7 +210,7 @@
 	ide_hwif_t *hwif = HWIF(drive);
 	struct scatterlist *sg = hwif->sg_table;
 
-	if ((rq->flags & REQ_DRIVE_TASKFILE) && rq->nr_sectors > 256)
+	if ((rq->cmd_type == REQ_TYPE_ATA_TASKFILE) && rq->nr_sectors > 256)
 		BUG();
 
 	ide_map_sg(drive, rq);
Index: drivers/ide/ide-floppy.c
===================================================================
--- 137318b273db26b889675101fbd02d2e84cae5e3/drivers/ide/ide-floppy.c  (mode:100644)
+++ uncommitted/drivers/ide/ide-floppy.c  (mode:100644)
@@ -588,7 +588,7 @@
 	/* Why does this happen? */
 	if (!rq)
 		return 0;
-	if (!(rq->flags & REQ_SPECIAL)) { //if (!IDEFLOPPY_RQ_CMD (rq->cmd)) {
+	if (!blk_special_request(rq)) {
 		/* our real local end request function */
 		ide_end_request(drive, uptodate, nsecs);
 		return 0;
@@ -687,7 +687,7 @@
 
 	ide_init_drive_cmd(rq);
 	rq->buffer = (char *) pc;
-	rq->flags = REQ_SPECIAL;	//rq->cmd = IDEFLOPPY_PC_RQ;
+	rq->cmd_type = REQ_TYPE_SPECIAL;
 	rq->rq_disk = floppy->disk;
 	(void) ide_do_drive_cmd(drive, rq, ide_preempt);
 }
@@ -1301,7 +1301,7 @@
 		idefloppy_do_end_request(drive, 0, 0);
 		return ide_stopped;
 	}
-	if (rq->flags & REQ_CMD) {
+	if (blk_fs_request(rq)) {
 		if (((long)rq->sector % floppy->bs_factor) ||
 		    (rq->nr_sectors % floppy->bs_factor)) {
 			printk("%s: unsupported r/w request size\n",
@@ -1311,9 +1311,9 @@
 		}
 		pc = idefloppy_next_pc_storage(drive);
 		idefloppy_create_rw_cmd(floppy, pc, rq, block);
-	} else if (rq->flags & REQ_SPECIAL) {
+	} else if (blk_special_request(rq))
 		pc = (idefloppy_pc_t *) rq->buffer;
-	} else if (rq->flags & REQ_BLOCK_PC) {
+	} else if (blk_fs_request(rq)) {
 		pc = idefloppy_next_pc_storage(drive);
 		if (idefloppy_blockpc_cmd(floppy, pc, rq)) {
 			idefloppy_do_end_request(drive, 0, 0);
@@ -1341,7 +1341,7 @@
 
 	ide_init_drive_cmd (&rq);
 	rq.buffer = (char *) pc;
-	rq.flags = REQ_SPECIAL;		//	rq.cmd = IDEFLOPPY_PC_RQ;
+	rq.cmd_type = REQ_TYPE_SPECIAL;
 	rq.rq_disk = floppy->disk;
 
 	return ide_do_drive_cmd(drive, &rq, ide_wait);
Index: drivers/ide/ide-io.c
===================================================================
--- 137318b273db26b889675101fbd02d2e84cae5e3/drivers/ide/ide-io.c  (mode:100644)
+++ uncommitted/drivers/ide/ide-io.c  (mode:100644)
@@ -60,7 +60,7 @@
 {
 	int ret = 1;
 
-	BUG_ON(!(rq->flags & REQ_STARTED));
+	BUG_ON(!blk_rq_started(rq));
 
 	/*
 	 * if failfast is set on a request, override number of sectors and
@@ -310,7 +310,7 @@
 	rq = HWGROUP(drive)->rq;
 	spin_unlock_irqrestore(&ide_lock, flags);
 
-	if (rq->flags & REQ_DRIVE_CMD) {
+	if (rq->cmd_type == REQ_TYPE_ATA_CMD) {
 		u8 *args = (u8 *) rq->buffer;
 		if (rq->errors == 0)
 			rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT);
@@ -320,7 +320,7 @@
 			args[1] = err;
 			args[2] = hwif->INB(IDE_NSECTOR_REG);
 		}
-	} else if (rq->flags & REQ_DRIVE_TASK) {
+	} else if (rq->cmd_type == REQ_TYPE_ATA_TASK) {
 		u8 *args = (u8 *) rq->buffer;
 		if (rq->errors == 0)
 			rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT);
@@ -334,7 +334,7 @@
 			args[5] = hwif->INB(IDE_HCYL_REG);
 			args[6] = hwif->INB(IDE_SELECT_REG);
 		}
-	} else if (rq->flags & REQ_DRIVE_TASKFILE) {
+	} else if (rq->cmd_type & REQ_TYPE_ATA_TASKFILE) {
 		ide_task_t *args = (ide_task_t *) rq->special;
 		if (rq->errors == 0)
 			rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT);
@@ -530,7 +530,7 @@
 		return ide_stopped;
 
 	/* retry only "normal" I/O: */
-	if (rq->flags & (REQ_DRIVE_CMD | REQ_DRIVE_TASK | REQ_DRIVE_TASKFILE)) {
+	if (!blk_fs_request(rq)) {
 		rq->errors = 1;
 		ide_end_drive_cmd(drive, stat, err);
 		return ide_stopped;
@@ -581,7 +581,7 @@
 		return ide_stopped;
 
 	/* retry only "normal" I/O: */
-	if (rq->flags & (REQ_DRIVE_CMD | REQ_DRIVE_TASK | REQ_DRIVE_TASKFILE)) {
+	if (!blk_fs_request(rq)) {
 		rq->errors = 1;
 		ide_end_drive_cmd(drive, BUSY_STAT, 0);
 		return ide_stopped;
@@ -751,7 +751,7 @@
 	if (hwif->sg_mapped)	/* needed by ide-scsi */
 		return;
 
-	if ((rq->flags & REQ_DRIVE_TASKFILE) == 0) {
+	if (rq->cmd_type != REQ_TYPE_ATA_TASKFILE) {
 		hwif->sg_nents = blk_rq_map_sg(drive->queue, rq, sg);
 	} else {
 		sg_init_one(sg, rq->buffer, rq->nr_sectors * SECTOR_SIZE);
@@ -787,7 +787,7 @@
 		struct request *rq)
 {
 	ide_hwif_t *hwif = HWIF(drive);
-	if (rq->flags & REQ_DRIVE_TASKFILE) {
+	if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) {
  		ide_task_t *args = rq->special;
  
 		if (!args)
@@ -809,7 +809,7 @@
 		if (args->tf_out_flags.all != 0) 
 			return flagged_taskfile(drive, args);
 		return do_rw_taskfile(drive, args);
-	} else if (rq->flags & REQ_DRIVE_TASK) {
+	} else if (rq->cmd_type == REQ_TYPE_ATA_TASK) {
 		u8 *args = rq->buffer;
 		u8 sel;
  
@@ -835,7 +835,7 @@
  		hwif->OUTB(sel, IDE_SELECT_REG);
  		ide_cmd(drive, args[0], args[2], &drive_cmd_intr);
  		return ide_started;
- 	} else if (rq->flags & REQ_DRIVE_CMD) {
+ 	} else if (rq->cmd_type == REQ_TYPE_ATA_CMD) {
  		u8 *args = rq->buffer;
 
 		if (!args)
@@ -890,7 +890,7 @@
 	ide_startstop_t startstop;
 	sector_t block;
 
-	BUG_ON(!(rq->flags & REQ_STARTED));
+	BUG_ON(!blk_rq_started(rq));
 
 #ifdef DEBUG
 	printk("%s: start_request: current=0x%08lx\n",
@@ -948,9 +948,10 @@
 	if (!drive->special.all) {
 		ide_driver_t *drv;
 
-		if (rq->flags & (REQ_DRIVE_CMD | REQ_DRIVE_TASK))
+		if (rq->cmd_type == REQ_TYPE_ATA_CMD ||
+		    rq->cmd_type == REQ_TYPE_ATA_TASK)
 			return execute_drive_cmd(drive, rq);
-		else if (rq->flags & REQ_DRIVE_TASKFILE)
+		else if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE)
 			return execute_drive_cmd(drive, rq);
 		else if (blk_pm_request(rq)) {
 #ifdef DEBUG_PM
@@ -1193,7 +1194,8 @@
 		 * unless the subdriver triggers such a thing in its own PM
 		 * state machine.
 		 */
-		if (drive->blocked && !blk_pm_request(rq) && !(rq->flags & REQ_PREEMPT)) {
+		if (drive->blocked && !blk_pm_request(rq) &&
+		    !(rq->cmd_flags & REQ_PREEMPT)) {
 			/* We clear busy, there should be no pending ATA command at this point. */
 			hwgroup->busy = 0;
 			break;
@@ -1596,7 +1598,7 @@
 void ide_init_drive_cmd (struct request *rq)
 {
 	memset(rq, 0, sizeof(*rq));
-	rq->flags = REQ_DRIVE_CMD;
+	rq->cmd_type = REQ_TYPE_ATA_CMD;
 	rq->ref_count = 1;
 }
 
@@ -1659,7 +1661,7 @@
 		hwgroup->rq = NULL;
 	if (action == ide_preempt || action == ide_head_wait) {
 		where = ELEVATOR_INSERT_FRONT;
-		rq->flags |= REQ_PREEMPT;
+		rq->cmd_flags |= REQ_PREEMPT;
 	}
 	__elv_add_request(drive->queue, rq, where, 0);
 	ide_do_request(hwgroup, IDE_NO_IRQ);
Index: drivers/ide/ide-lib.c
===================================================================
--- 137318b273db26b889675101fbd02d2e84cae5e3/drivers/ide/ide-lib.c  (mode:100644)
+++ uncommitted/drivers/ide/ide-lib.c  (mode:100644)
@@ -458,13 +458,14 @@
 	spin_unlock(&ide_lock);
 	if (!rq)
 		return;
-	if (rq->flags & (REQ_DRIVE_CMD | REQ_DRIVE_TASK)) {
+	if (rq->cmd_type == REQ_TYPE_ATA_CMD ||
+	    rq->cmd_type == REQ_TYPE_ATA_TASK) {
 		char *args = rq->buffer;
 		if (args) {
 			opcode = args[0];
 			found = 1;
 		}
-	} else if (rq->flags & REQ_DRIVE_TASKFILE) {
+	} else if (rq->cmd_type & REQ_TYPE_ATA_TASKFILE) {
 		ide_task_t *args = rq->special;
 		if (args) {
 			task_struct_t *tf = (task_struct_t *) args->tfRegister;
Index: drivers/ide/ide-tape.c
===================================================================
--- 137318b273db26b889675101fbd02d2e84cae5e3/drivers/ide/ide-tape.c  (mode:100644)
+++ uncommitted/drivers/ide/ide-tape.c  (mode:100644)
@@ -1774,7 +1774,7 @@
 static void idetape_init_rq(struct request *rq, u8 cmd)
 {
 	memset(rq, 0, sizeof(*rq));
-	rq->flags = REQ_SPECIAL;
+	rq->cmd_type = REQ_TYPE_SPECIAL;
 	rq->cmd[0] = cmd;
 }
 
@@ -2431,7 +2431,7 @@
 			rq->sector, rq->nr_sectors, rq->current_nr_sectors);
 #endif /* IDETAPE_DEBUG_LOG */
 
-	if ((rq->flags & REQ_SPECIAL) == 0) {
+	if (!blk_special_request(rq)) {
 		/*
 		 * We do not support buffer cache originated requests.
 		 */
@@ -2760,7 +2760,7 @@
 	idetape_tape_t *tape = drive->driver_data;
 
 #if IDETAPE_DEBUG_BUGS
-	if (rq == NULL || (rq->flags & REQ_SPECIAL) == 0) {
+	if (rq == NULL || !blk_special_request(rq)) {
 		printk (KERN_ERR "ide-tape: bug: Trying to sleep on non-valid request\n");
 		return;
 	}
Index: drivers/ide/ide-taskfile.c
===================================================================
--- 137318b273db26b889675101fbd02d2e84cae5e3/drivers/ide/ide-taskfile.c  (mode:100644)
+++ uncommitted/drivers/ide/ide-taskfile.c  (mode:100644)
@@ -366,7 +366,7 @@
 
 static void task_end_request(ide_drive_t *drive, struct request *rq, u8 stat)
 {
-	if (rq->flags & REQ_DRIVE_TASKFILE) {
+	if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) {
 		ide_task_t *task = rq->special;
 
 		if (task->tf_out_flags.all) {
@@ -471,7 +471,7 @@
 	struct request rq;
 
 	memset(&rq, 0, sizeof(rq));
-	rq.flags = REQ_DRIVE_TASKFILE;
+	rq.cmd_type = REQ_TYPE_ATA_TASKFILE;
 	rq.buffer = buf;
 
 	/*
@@ -496,7 +496,7 @@
 		rq.hard_cur_sectors = rq.current_nr_sectors = rq.nr_sectors;
 
 		if (args->command_type == IDE_DRIVE_TASK_RAW_WRITE)
-			rq.flags |= REQ_RW;
+			rq.cmd_flags |= REQ_RW;
 	}
 
 	rq.special = args;
@@ -739,7 +739,7 @@
 	struct request rq;
 
 	ide_init_drive_cmd(&rq);
-	rq.flags = REQ_DRIVE_TASK;
+	rq.cmd_type = REQ_TYPE_ATA_TASK;
 	rq.buffer = buf;
 	return ide_do_drive_cmd(drive, &rq, ide_wait);
 }
Index: drivers/ide/ide.c
===================================================================
--- 137318b273db26b889675101fbd02d2e84cae5e3/drivers/ide/ide.c  (mode:100644)
+++ uncommitted/drivers/ide/ide.c  (mode:100644)
@@ -1381,7 +1381,7 @@
 	memset(&rq, 0, sizeof(rq));
 	memset(&rqpm, 0, sizeof(rqpm));
 	memset(&args, 0, sizeof(args));
-	rq.flags = REQ_PM_SUSPEND;
+	rq.cmd_type = REQ_TYPE_PM_SUSPEND;
 	rq.special = &args;
 	rq.pm = &rqpm;
 	rqpm.pm_step = ide_pm_state_start_suspend;
@@ -1400,7 +1400,7 @@
 	memset(&rq, 0, sizeof(rq));
 	memset(&rqpm, 0, sizeof(rqpm));
 	memset(&args, 0, sizeof(args));
-	rq.flags = REQ_PM_RESUME;
+	rq.cmd_type = REQ_TYPE_PM_RESUME;
 	rq.special = &args;
 	rq.pm = &rqpm;
 	rqpm.pm_step = ide_pm_state_start_resume;
Index: drivers/ide/legacy/hd.c
===================================================================
--- 137318b273db26b889675101fbd02d2e84cae5e3/drivers/ide/legacy/hd.c  (mode:100644)
+++ uncommitted/drivers/ide/legacy/hd.c  (mode:100644)
@@ -624,7 +624,7 @@
 		req->rq_disk->disk_name, (req->cmd == READ)?"read":"writ",
 		cyl, head, sec, nsect, req->buffer);
 #endif
-	if (req->flags & REQ_CMD) {
+	if (blk_fs_request(req)) {
 		switch (rq_data_dir(req)) {
 		case READ:
 			hd_out(disk,nsect,sec,head,cyl,WIN_READ,&read_intr);
Index: drivers/message/i2o/i2o_block.c
===================================================================
--- 137318b273db26b889675101fbd02d2e84cae5e3/drivers/message/i2o/i2o_block.c  (mode:100644)
+++ uncommitted/drivers/message/i2o/i2o_block.c  (mode:100644)
@@ -352,9 +352,9 @@
 	struct i2o_block_request *ireq;
 
 	/* request is already processed by us, so return */
-	if (req->flags & REQ_SPECIAL) {
+	if (blk_special_request(req)) {
 		osm_debug("REQ_SPECIAL already set!\n");
-		req->flags |= REQ_DONTPREP;
+		req->cmd_flags |= REQ_DONTPREP;
 		return BLKPREP_OK;
 	}
 
@@ -373,7 +373,8 @@
 		ireq = req->special;
 
 	/* do not come back here */
-	req->flags |= REQ_DONTPREP | REQ_SPECIAL;
+	req->cmd_type = REQ_TYPE_SPECIAL;
+	req->cmd_flags |= REQ_DONTPREP;
 
 	return BLKPREP_OK;
 };
Index: drivers/mmc/mmc_queue.c
===================================================================
--- 137318b273db26b889675101fbd02d2e84cae5e3/drivers/mmc/mmc_queue.c  (mode:100644)
+++ uncommitted/drivers/mmc/mmc_queue.c  (mode:100644)
@@ -28,7 +28,7 @@
 	struct mmc_queue *mq = q->queuedata;
 	int ret = BLKPREP_KILL;
 
-	if (req->flags & REQ_SPECIAL) {
+	if (blk_special_request(req)) {
 		/*
 		 * Special commands already have the command
 		 * blocks already setup in req->special.
@@ -36,7 +36,7 @@
 		BUG_ON(!req->special);
 
 		ret = BLKPREP_OK;
-	} else if (req->flags & (REQ_CMD | REQ_BLOCK_PC)) {
+	} else if (blk_fs_request(req) || blk_pc_request(req)) {
 		/*
 		 * Block I/O requests need translating according
 		 * to the protocol.
Index: drivers/mtd/mtd_blkdevs.c
===================================================================
--- 137318b273db26b889675101fbd02d2e84cae5e3/drivers/mtd/mtd_blkdevs.c  (mode:100644)
+++ uncommitted/drivers/mtd/mtd_blkdevs.c  (mode:100644)
@@ -47,7 +47,7 @@
 	nsect = req->current_nr_sectors;
 	buf = req->buffer;
 
-	if (!(req->flags & REQ_CMD))
+	if (!blk_fs_request(req))
 		return 0;
 
 	if (block + nsect > get_capacity(req->rq_disk))
Index: drivers/scsi/ide-scsi.c
===================================================================
--- 137318b273db26b889675101fbd02d2e84cae5e3/drivers/scsi/ide-scsi.c  (mode:100644)
+++ uncommitted/drivers/scsi/ide-scsi.c  (mode:100644)
@@ -317,7 +317,7 @@
 	pc->buffer = buf;
 	pc->c[0] = REQUEST_SENSE;
 	pc->c[4] = pc->request_transfer = pc->buffer_size = SCSI_SENSE_BUFFERSIZE;
-	rq->flags = REQ_SENSE;
+	rq->cmd_type = REQ_TYPE_SENSE;
 	pc->timeout = jiffies + WAIT_READY;
 	/* NOTE! Save the failed packet command in "rq->buffer" */
 	rq->buffer = (void *) failed_command->special;
@@ -370,12 +370,12 @@
 	u8 *scsi_buf;
 	unsigned long flags;
 
-	if (!(rq->flags & (REQ_SPECIAL|REQ_SENSE))) {
+	if (!blk_special_request(rq) || !blk_sense_request(rq)) {
 		ide_end_request(drive, uptodate, nrsecs);
 		return 0;
 	}
 	ide_end_drive_cmd (drive, 0, 0);
-	if (rq->flags & REQ_SENSE) {
+	if (blk_sense_request(rq)) {
 		idescsi_pc_t *opc = (idescsi_pc_t *) rq->buffer;
 		if (log) {
 			printk ("ide-scsi: %s: wrap up check %lu, rst = ", drive->name, opc->scsi_cmd->serial_number);
@@ -686,7 +686,7 @@
 	printk (KERN_INFO "sector: %ld, nr_sectors: %ld, current_nr_sectors: %d\n",rq->sector,rq->nr_sectors,rq->current_nr_sectors);
 #endif /* IDESCSI_DEBUG_LOG */
 
-	if (rq->flags & (REQ_SPECIAL|REQ_SENSE)) {
+	if (blk_sense_request(rq) || blk_special_request(rq)) {
 		return idescsi_issue_pc (drive, (idescsi_pc_t *) rq->special);
 	}
 	blk_dump_rq_flags(rq, "ide-scsi: unsup command");
@@ -921,7 +921,7 @@
 
 	ide_init_drive_cmd (rq);
 	rq->special = (char *) pc;
-	rq->flags = REQ_SPECIAL;
+	rq->cmd_type = REQ_TYPE_SPECIAL;
 	spin_unlock_irq(host->host_lock);
 	rq->rq_disk = scsi->disk;
 	(void) ide_do_drive_cmd (drive, rq, ide_end);
@@ -975,7 +975,7 @@
 		 */
 		printk (KERN_ERR "ide-scsi: cmd aborted!\n");
 
-		if (scsi->pc->rq->flags & REQ_SENSE)
+		if (blk_sense_request(scsi->pc->rq))
 			kfree(scsi->pc->buffer);
 		kfree(scsi->pc->rq);
 		kfree(scsi->pc);
@@ -1023,7 +1023,7 @@
 	/* kill current request */
 	blkdev_dequeue_request(req);
 	end_that_request_last(req);
-	if (req->flags & REQ_SENSE)
+	if (blk_sense_request(req))
 		kfree(scsi->pc->buffer);
 	kfree(scsi->pc);
 	scsi->pc = NULL;
Index: drivers/scsi/scsi_lib.c
===================================================================
--- 137318b273db26b889675101fbd02d2e84cae5e3/drivers/scsi/scsi_lib.c  (mode:100644)
+++ uncommitted/drivers/scsi/scsi_lib.c  (mode:100644)
@@ -90,7 +90,7 @@
 	 * Because users of this function are apt to reuse requests with no
 	 * modification, we have to sanitise the request flags here
 	 */
-	sreq->sr_request->flags &= ~REQ_DONTPREP;
+	sreq->sr_request->cmd_flags &= ~REQ_DONTPREP;
 	blk_insert_request(sreq->sr_device->request_queue, sreq->sr_request,
 		       	   at_head, sreq, 0);
 	return 0;
@@ -485,7 +485,7 @@
  */
 static void scsi_requeue_command(struct request_queue *q, struct scsi_cmnd *cmd)
 {
-	cmd->request->flags &= ~REQ_DONTPREP;
+	cmd->request->cmd_flags &= ~REQ_DONTPREP;
 	blk_insert_request(q, cmd->request, 1, cmd, 1);
 
 	scsi_run_queue(q);
@@ -922,7 +922,7 @@
 	/*
 	 * if this is a rq->data based REQ_BLOCK_PC, setup for a non-sg xfer
 	 */
-	if ((req->flags & REQ_BLOCK_PC) && !req->bio) {
+	if (blk_pc_request(req) && req->bio) {
 		cmd->request_bufflen = req->data_len;
 		cmd->request_buffer = req->data;
 		req->buffer = req->data;
@@ -942,7 +942,7 @@
 	 */
 	sgpnt = scsi_alloc_sgtable(cmd, GFP_ATOMIC);
 	if (unlikely(!sgpnt)) {
-		req->flags |= REQ_SPECIAL;
+		req->cmd_type = REQ_TYPE_SPECIAL;
 		return BLKPREP_DEFER;
 	}
 
@@ -1066,7 +1066,7 @@
 	 * these two cases differently.  We differentiate by looking
 	 * at request->cmd, as this tells us the real story.
 	 */
-	if (req->flags & REQ_SPECIAL) {
+	if (req->cmd_type == REQ_TYPE_SPECIAL) {
 		struct scsi_request *sreq = req->special;
 
 		if (sreq->sr_magic == SCSI_REQ_MAGIC) {
@@ -1076,7 +1076,7 @@
 			scsi_init_cmd_from_req(cmd, sreq);
 		} else
 			cmd = req->special;
-	} else if (req->flags & (REQ_CMD | REQ_BLOCK_PC)) {
+	} else if (blk_fs_request(req) || blk_pc_request(req)) {
 
 		if(unlikely(specials_only)) {
 			if(specials_only == SDEV_QUIESCE ||
@@ -1119,7 +1119,7 @@
 	 * lock.  We hope REQ_STARTED prevents anything untoward from
 	 * happening now.
 	 */
-	if (req->flags & (REQ_CMD | REQ_BLOCK_PC)) {
+	if (blk_fs_request(req) || blk_pc_request(req)) {
 		struct scsi_driver *drv;
 		int ret;
 
@@ -1158,7 +1158,7 @@
 	/*
 	 * The request is now prepped, no need to come back here
 	 */
-	req->flags |= REQ_DONTPREP;
+	req->cmd_flags |= REQ_DONTPREP;
 	return BLKPREP_OK;
 
  defer:
@@ -1250,7 +1250,7 @@
 
 	while ((req = elv_next_request(q)) != NULL) {
 		blkdev_dequeue_request(req);
-		req->flags |= REQ_QUIET;
+		req->cmd_flags |= REQ_QUIET;
 		while (end_that_request_first(req, 0, req->nr_sectors))
 			;
 		end_that_request_last(req);
@@ -1305,7 +1305,7 @@
 			printk(KERN_ERR "scsi%d (%d:%d): rejecting I/O to offline device\n",
 			       sdev->host->host_no, sdev->id, sdev->lun);
 			blkdev_dequeue_request(req);
-			req->flags |= REQ_QUIET;
+			req->cmd_flags |= REQ_QUIET;
 			while (end_that_request_first(req, 0, req->nr_sectors))
 				;
 			end_that_request_last(req);
Index: drivers/scsi/sd.c
===================================================================
--- 137318b273db26b889675101fbd02d2e84cae5e3/drivers/scsi/sd.c  (mode:100644)
+++ uncommitted/drivers/scsi/sd.c  (mode:100644)
@@ -329,13 +329,9 @@
 		}
 		SCpnt->cmnd[0] = WRITE_6;
 		SCpnt->sc_data_direction = DMA_TO_DEVICE;
-	} else if (rq_data_dir(rq) == READ) {
+	} else {
 		SCpnt->cmnd[0] = READ_6;
 		SCpnt->sc_data_direction = DMA_FROM_DEVICE;
-	} else {
-		printk(KERN_ERR "sd: Unknown command %lx\n", rq->flags);
-/* overkill 	panic("Unknown sd command %lx\n", rq->flags); */
-		return 0;
 	}
 
 	SCSI_LOG_HLQUEUE(2, printk("%s : %s %d/%ld 512 byte blocks.\n", 
@@ -769,7 +765,8 @@
 
 	if (sdkp->WCE) {
 		memset(rq->cmd, 0, sizeof(rq->cmd));
-		rq->flags |= REQ_BLOCK_PC | REQ_SOFTBARRIER;
+		rq->cmd_type = REQ_TYPE_BLOCK_PC;
+		rq->cmd_flags |= REQ_SOFTBARRIER;
 		rq->timeout = SD_TIMEOUT;
 		rq->cmd[0] = SYNCHRONIZE_CACHE;
 		return 1;
Index: drivers/scsi/sr.c
===================================================================
--- 137318b273db26b889675101fbd02d2e84cae5e3/drivers/scsi/sr.c  (mode:100644)
+++ uncommitted/drivers/scsi/sr.c  (mode:100644)
@@ -349,7 +349,7 @@
 		goto queue;
 	}
 
-	if (!(SCpnt->request->flags & REQ_CMD)) {
+	if (!blk_fs_request(SCpnt->request)) {
 		blk_dump_rq_flags(SCpnt->request, "sr unsup command");
 		return 0;
 	}
Index: drivers/scsi/sun3_NCR5380.c
===================================================================
--- 137318b273db26b889675101fbd02d2e84cae5e3/drivers/scsi/sun3_NCR5380.c  (mode:100644)
+++ uncommitted/drivers/scsi/sun3_NCR5380.c  (mode:100644)
@@ -2017,7 +2017,7 @@
 		if((count > SUN3_DMA_MINSIZE) && (sun3_dma_setup_done
 						  != cmd))
 		{
-			if(cmd->request->flags & REQ_CMD) {
+			if(blk_fs_request(cmd->request)) {
 				sun3scsi_dma_setup(d, count,
 						   rq_data_dir(cmd->request));
 				sun3_dma_setup_done = cmd;
Index: drivers/scsi/sun3_scsi.c
===================================================================
--- 137318b273db26b889675101fbd02d2e84cae5e3/drivers/scsi/sun3_scsi.c  (mode:100644)
+++ uncommitted/drivers/scsi/sun3_scsi.c  (mode:100644)
@@ -524,7 +524,7 @@
 static inline unsigned long sun3scsi_dma_xfer_len(unsigned long wanted, Scsi_Cmnd *cmd,
 				    int write_flag)
 {
-	if(cmd->request->flags & REQ_CMD)
+	if(blk_fs_request(cmd->request))
  		return wanted;
 	else
 		return 0;
Index: drivers/scsi/sun3_scsi_vme.c
===================================================================
--- 137318b273db26b889675101fbd02d2e84cae5e3/drivers/scsi/sun3_scsi_vme.c  (mode:100644)
+++ uncommitted/drivers/scsi/sun3_scsi_vme.c  (mode:100644)
@@ -458,7 +458,7 @@
 static inline unsigned long sun3scsi_dma_xfer_len(unsigned long wanted, Scsi_Cmnd *cmd,
 				    int write_flag)
 {
-	if(cmd->request->flags & REQ_CMD)
+	if(blk_fs_request(cmd->request))
  		return wanted;
 	else
 		return 0;
Index: include/linux/blkdev.h
===================================================================
--- 137318b273db26b889675101fbd02d2e84cae5e3/include/linux/blkdev.h  (mode:100644)
+++ uncommitted/include/linux/blkdev.h  (mode:100644)
@@ -113,7 +113,8 @@
 	struct list_head queuelist; /* looking for ->queue? you must _not_
 				     * access it directly, use
 				     * blkdev_dequeue_request! */
-	unsigned long flags;		/* see REQ_ bits below */
+	unsigned int cmd_flags;
+	unsigned char cmd_type;
 
 	/* Maintain bio traversal state for part by part I/O submission.
 	 * hard_* are block layer internals, no driver should touch them!
@@ -188,38 +189,63 @@
 };
 
 /*
- * first three bits match BIO_RW* bits, important
+ * request command types
  */
-enum rq_flag_bits {
+enum rq_cmd_type_bits {
+	REQ_TYPE_FS		= 1,	/* fs request */
+	REQ_TYPE_BLOCK_PC,		/* scsi command */
+	REQ_TYPE_SENSE,			/* sense request */
+	REQ_TYPE_PM_SUSPEND,		/* suspend request */
+	REQ_TYPE_PM_RESUME,		/* resume request */
+	REQ_TYPE_PM_SHUTDOWN,		/* shutdown request */
+	REQ_TYPE_FLUSH,			/* flush request */
+	REQ_TYPE_SPECIAL,		/* driver defined type */
+	REQ_TYPE_LINUX_BLOCK,		/* generic block layer message */
+	/*
+	 * for ATA/ATAPI devices. this really doesn't belong here, ide should
+	 * use REQ_TYPE_SPECIAL and use rq->cmd[0] to differentiate what type
+	 * of request this is.
+	 */
+	REQ_TYPE_ATA_CMD,
+	REQ_TYPE_ATA_TASK,
+	REQ_TYPE_ATA_TASKFILE,
+};
+
+/*
+ * For request of type REQ_TYPE_LINUX_BLOCK, rq->cmd[0] is the opcode being
+ * sent down (similar to how REQ_TYPE_BLOCK_PC means that ->cmd[] holds a
+ * SCSI cdb.
+ *
+ * 0x00 -> 0x3f are driver private, to be used for whatever purpose they need,
+ * typically to differentiate REQ_TYPE_SPECIAL requests.
+ *
+ */
+enum {
+	/*
+	 * just examples for now
+	 */
+	REQ_LB_OP_EJECT	= 0x40,		/* eject request */
+	REQ_LB_OP_FLUSH = 0x41,		/* flush device */
+};
+
+/*
+ * request type modified bits. first three bits match BIO_RW* bits, important
+ */
+enum rq_cmd_flag_bits {
 	__REQ_RW,		/* not set, read. set, write */
 	__REQ_FAILFAST,		/* no low level driver retries */
 	__REQ_SOFTBARRIER,	/* may not be passed by ioscheduler */
 	__REQ_HARDBARRIER,	/* may not be passed by drive either */
-	__REQ_CMD,		/* is a regular fs rw request */
 	__REQ_NOMERGE,		/* don't touch this for merging */
 	__REQ_STARTED,		/* drive already may have started this one */
 	__REQ_DONTPREP,		/* don't call prep for this one */
 	__REQ_QUEUED,		/* uses queueing */
-	/*
-	 * for ATA/ATAPI devices
-	 */
-	__REQ_PC,		/* packet command (special) */
-	__REQ_BLOCK_PC,		/* queued down pc from block layer */
-	__REQ_SENSE,		/* sense retrival */
-
 	__REQ_FAILED,		/* set if the request failed */
 	__REQ_QUIET,		/* don't worry about errors */
-	__REQ_SPECIAL,		/* driver suplied command */
-	__REQ_DRIVE_CMD,
-	__REQ_DRIVE_TASK,
-	__REQ_DRIVE_TASKFILE,
 	__REQ_PREEMPT,		/* set for "ide_preempt" requests */
-	__REQ_PM_SUSPEND,	/* suspend request */
-	__REQ_PM_RESUME,	/* resume request */
-	__REQ_PM_SHUTDOWN,	/* shutdown request */
 	__REQ_BAR_PREFLUSH,	/* barrier pre-flush done */
 	__REQ_BAR_POSTFLUSH,	/* barrier post-flush */
-	__REQ_BAR_FLUSH,	/* rq is the flush request */
+
 	__REQ_NR_BITS,		/* stops here */
 };
 
@@ -227,30 +253,18 @@
 #define REQ_FAILFAST	(1 << __REQ_FAILFAST)
 #define REQ_SOFTBARRIER	(1 << __REQ_SOFTBARRIER)
 #define REQ_HARDBARRIER	(1 << __REQ_HARDBARRIER)
-#define REQ_CMD		(1 << __REQ_CMD)
 #define REQ_NOMERGE	(1 << __REQ_NOMERGE)
 #define REQ_STARTED	(1 << __REQ_STARTED)
 #define REQ_DONTPREP	(1 << __REQ_DONTPREP)
 #define REQ_QUEUED	(1 << __REQ_QUEUED)
-#define REQ_PC		(1 << __REQ_PC)
-#define REQ_BLOCK_PC	(1 << __REQ_BLOCK_PC)
-#define REQ_SENSE	(1 << __REQ_SENSE)
 #define REQ_FAILED	(1 << __REQ_FAILED)
 #define REQ_QUIET	(1 << __REQ_QUIET)
-#define REQ_SPECIAL	(1 << __REQ_SPECIAL)
-#define REQ_DRIVE_CMD	(1 << __REQ_DRIVE_CMD)
-#define REQ_DRIVE_TASK	(1 << __REQ_DRIVE_TASK)
-#define REQ_DRIVE_TASKFILE	(1 << __REQ_DRIVE_TASKFILE)
 #define REQ_PREEMPT	(1 << __REQ_PREEMPT)
-#define REQ_PM_SUSPEND	(1 << __REQ_PM_SUSPEND)
-#define REQ_PM_RESUME	(1 << __REQ_PM_RESUME)
-#define REQ_PM_SHUTDOWN	(1 << __REQ_PM_SHUTDOWN)
 #define REQ_BAR_PREFLUSH	(1 << __REQ_BAR_PREFLUSH)
 #define REQ_BAR_POSTFLUSH	(1 << __REQ_BAR_POSTFLUSH)
-#define REQ_BAR_FLUSH	(1 << __REQ_BAR_FLUSH)
 
 /*
- * State information carried for REQ_PM_SUSPEND and REQ_PM_RESUME
+ * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME
  * requests. Some step values could eventually be made generic.
  */
 struct request_pm_state
@@ -434,25 +448,28 @@
 #define blk_queue_stopped(q)	test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
 #define blk_queue_flushing(q)	test_bit(QUEUE_FLAG_FLUSH, &(q)->queue_flags)
 
-#define blk_fs_request(rq)	((rq)->flags & REQ_CMD)
-#define blk_pc_request(rq)	((rq)->flags & REQ_BLOCK_PC)
-#define blk_noretry_request(rq)	((rq)->flags & REQ_FAILFAST)
-#define blk_rq_started(rq)	((rq)->flags & REQ_STARTED)
+#define blk_fs_request(rq)	((rq)->cmd_type == REQ_TYPE_FS)
+#define blk_pc_request(rq)	((rq)->cmd_type == REQ_TYPE_BLOCK_PC)
+#define blk_special_request(rq)	((rq)->cmd_type == REQ_TYPE_SPECIAL)
+#define blk_sense_request(rq)	((rq)->cmd_type == REQ_TYPE_SENSE)
+
+#define blk_noretry_request(rq)	((rq)->cmd_flags & REQ_FAILFAST)
+#define blk_rq_started(rq)	((rq)->cmd_flags & REQ_STARTED)
 
 #define blk_account_rq(rq)	(blk_rq_started(rq) && blk_fs_request(rq))
 
-#define blk_pm_suspend_request(rq)	((rq)->flags & REQ_PM_SUSPEND)
-#define blk_pm_resume_request(rq)	((rq)->flags & REQ_PM_RESUME)
+#define blk_pm_suspend_request(rq)	((rq)->cmd_type == REQ_TYPE_PM_SUSPEND)
+#define blk_pm_resume_request(rq)	((rq)->cmd_type == REQ_TYPE_PM_RESUME)
 #define blk_pm_request(rq)	\
-	((rq)->flags & (REQ_PM_SUSPEND | REQ_PM_RESUME))
+	(blk_pm_suspend_request(rq) || blk_pm_resume_request(rq))
 
-#define blk_barrier_rq(rq)	((rq)->flags & REQ_HARDBARRIER)
-#define blk_barrier_preflush(rq)	((rq)->flags & REQ_BAR_PREFLUSH)
-#define blk_barrier_postflush(rq)	((rq)->flags & REQ_BAR_POSTFLUSH)
+#define blk_barrier_rq(rq)	((rq)->cmd_flags & REQ_HARDBARRIER)
+#define blk_barrier_preflush(rq)	((rq)->cmd_flags & REQ_BAR_PREFLUSH)
+#define blk_barrier_postflush(rq)	((rq)->cmd_flags & REQ_BAR_POSTFLUSH)
 
 #define list_entry_rq(ptr)	list_entry((ptr), struct request, queuelist)
 
-#define rq_data_dir(rq)		((rq)->flags & 1)
+#define rq_data_dir(rq)		((rq)->cmd_flags & 1)
 
 static inline int blk_queue_full(struct request_queue *q, int rw)
 {
@@ -485,7 +502,7 @@
 #define RQ_NOMERGE_FLAGS	\
 	(REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER)
 #define rq_mergeable(rq)	\
-	(!((rq)->flags & RQ_NOMERGE_FLAGS) && blk_fs_request((rq)))
+	(!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && blk_fs_request((rq)))
 
 /*
  * noop, requests are automagically marked as active/inactive by I/O
@@ -654,7 +671,7 @@
  */
 #define blk_queue_tag_depth(q)		((q)->queue_tags->busy)
 #define blk_queue_tag_queue(q)		((q)->queue_tags->busy < (q)->queue_tags->max_depth)
-#define blk_rq_tagged(rq)		((rq)->flags & REQ_QUEUED)
+#define blk_rq_tagged(rq)		((rq)->cmd_flags & REQ_QUEUED)
 extern int blk_queue_start_tag(request_queue_t *, struct request *);
 extern struct request *blk_queue_find_tag(request_queue_t *, int);
 extern void blk_queue_end_tag(request_queue_t *, struct request *);
Index: include/scsi/scsi_tcq.h
===================================================================
--- 137318b273db26b889675101fbd02d2e84cae5e3/include/scsi/scsi_tcq.h  (mode:100644)
+++ uncommitted/include/scsi/scsi_tcq.h  (mode:100644)
@@ -98,7 +98,7 @@
 	struct scsi_device *sdev = cmd->device;
 
         if (blk_rq_tagged(req)) {
-		if (sdev->ordered_tags && req->flags & REQ_HARDBARRIER)
+		if (sdev->ordered_tags && req->cmd_flags & REQ_HARDBARRIER)
         	        *msg++ = MSG_ORDERED_TAG;
         	else
         	        *msg++ = MSG_SIMPLE_TAG;

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH] libata: device suspend/resume
  2005-05-25  9:29                   ` Jens Axboe
@ 2005-05-25 23:40                     ` Guennadi Liakhovetski
  2005-05-26  1:05                     ` Jeff Garzik
                                       ` (2 subsequent siblings)
  3 siblings, 0 replies; 44+ messages in thread
From: Guennadi Liakhovetski @ 2005-05-25 23:40 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Jeff Garzik, James Bottomley, SCSI Mailing List, linux-ide

On Wed, 25 May 2005, Jens Axboe wrote:

> On Tue, May 24 2005, Jeff Garzik wrote:
> > Jens Axboe wrote:
> > >I agree, it's a cleaner approach, with the rq being a container for
> > >generel messages as well not just SCSI commands. The one missing piece
> > >for that was the rq->end_io() callback so everything doesn't have to go
> > >down sync, but that is in now as well.
> > >
> > >I'll try and cook something up.
> > 
> > Very cool ;)
> 
> This is the base for it. It splits request->flags into two variables:
> 
> - cmd_type. this is not a bitmask, but a value indicating what type of
>   request this is.
> 
> - cmd_flags. various command modified flags.
> 
> The idea is to add a REQ_TYPE_LINUX_BLOCK request type, where we define
> a set of command opcodes that signify an upper level defined function
> (such as flush) that is implemented differently at the hardware/driver
> level. Basically a way to pass down messages or commands generically.
> 
> I like this better than using scsi opcodes always, it's a cleaner
> abstraction. For sending generic commands to a device, we could add a
> function ala:
> 
> blk_send_message(queue, unsigned char cmd, completion_callback);
> 
> that would allocate the request, set it up, add to block layer queue,
> and call the completion_callback when it is done. So for a flush, a
> driver could do
> 
>         blk_send_message(q, REQ_LB_OP_FLUSH, flush_done);
> 
> to a target queue.
> 
> What do you think?

Heh, very nice! Just what's needed for nbd!:-) NBD has several "threads":

1. the one running in the original nbd-client context, receiving answers 
from the server and completing requests,

2. the block queue

3. (my addition) block-device operations open, close, ioctl...

1. and 2. are already working in the block queue context, to simplify 3. I 
also faked those operations with block requests. I didn't go as far as 
really submitting them to the block layer and completing them 
respectively, I just used the same request object and inserted it directly 
into the internal list to process replies in 1. With the current patch 
from Jens (with some minor extensions) this could be done even nicer.

(see also my another post today to linux-scsi)

Thanks
Guennadi
---
Guennadi Liakhovetski


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH] libata: device suspend/resume
  2005-05-25  9:29                   ` Jens Axboe
  2005-05-25 23:40                     ` Guennadi Liakhovetski
@ 2005-05-26  1:05                     ` Jeff Garzik
  2005-05-26  5:57                       ` Jens Axboe
  2005-05-26 22:56                     ` Bartlomiej Zolnierkiewicz
  2005-05-27  2:01                     ` Benjamin Herrenschmidt
  3 siblings, 1 reply; 44+ messages in thread
From: Jeff Garzik @ 2005-05-26  1:05 UTC (permalink / raw)
  To: Jens Axboe; +Cc: James Bottomley, SCSI Mailing List, linux-ide

[-- Attachment #1: Type: text/plain, Size: 1938 bytes --]

Jens Axboe wrote:
> On Tue, May 24 2005, Jeff Garzik wrote:
> 
>>Jens Axboe wrote:
>>
>>>I agree, it's a cleaner approach, with the rq being a container for
>>>generel messages as well not just SCSI commands. The one missing piece
>>>for that was the rq->end_io() callback so everything doesn't have to go
>>>down sync, but that is in now as well.
>>>
>>>I'll try and cook something up.
>>
>>Very cool ;)
> 
> 
> This is the base for it. It splits request->flags into two variables:
> 
> - cmd_type. this is not a bitmask, but a value indicating what type of
>   request this is.
> 
> - cmd_flags. various command modified flags.
> 
> The idea is to add a REQ_TYPE_LINUX_BLOCK request type, where we define
> a set of command opcodes that signify an upper level defined function
> (such as flush) that is implemented differently at the hardware/driver
> level. Basically a way to pass down messages or commands generically.
> 
> I like this better than using scsi opcodes always, it's a cleaner
> abstraction. For sending generic commands to a device, we could add a
> function ala:

cmd_type just adds a needless layer of switch{} statements in block 
drivers, and its information that can be trivially derived from the 
command opcode itself.

	cmd_type = cmd_types[opcode];

HOWEVER, don't fall in love with SCSI opcodes.

We want _Linux_ commands, not SCSI commands.  Just think of a 
request_queue as having its own command protocol, one that you can 
_change at will_.

Yes, often request_queue commands may map seamlessly to SCSI (or ATA or 
I2O) commands.  And that's good.  But don't let yourself be locked into 
SCSI.  SCSI is not generic enough, nor mutable enough for all our needs.

Just the other day I was thinking about the simpler approach, like the 
attached :)  It
* adds RQ_ to avoid namespace conflicts
* adds RQ_FLUSH, RQ_PM_EVENT

So, overall, I would say "think Linux opcodes" as the preferred direction.

	Jeff



[-- Attachment #2: patch --]
[-- Type: text/plain, Size: 1210 bytes --]

diff --git a/include/linux/fs.h b/include/linux/fs.h
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -66,13 +66,24 @@ extern int dir_notify_enable;
 
 #define RW_MASK		1
 #define RWA_MASK	2
-#define READ 0
-#define WRITE 1
-#define READA 2		/* read-ahead  - don't block if no resources */
-#define SPECIAL 4	/* For non-blockdevice requests in request queue */
-#define READ_SYNC	(READ | (1 << BIO_RW_SYNC))
-#define WRITE_SYNC	(WRITE | (1 << BIO_RW_SYNC))
-#define WRITE_BARRIER	((1 << BIO_RW) | (1 << BIO_RW_BARRIER))
+#define RQ_READ		0
+#define RQ_WRITE	1
+#define RQ_READA	2	/* read-ahead  - don't block if no resources */
+#define RQ_SPECIAL	4	/* For non-blockdevice requests in request queue */
+#define RQ_READ_SYNC		(READ | (1 << BIO_RW_SYNC))
+#define RQ_WRITE_SYNC		(WRITE | (1 << BIO_RW_SYNC))
+#define RQ_WRITE_BARRIER	((1 << BIO_RW) | (1 << BIO_RW_BARRIER))
+#define RQ_FLUSH	7
+#define RQ_PM_EVENT	8
+
+/* compatibility with existing code */
+#define READ		RQ_READ
+#define WRITE		RQ_WRITE
+#define READA		RQ_READA
+#define SPECIAL		RQ_SPECIAL
+#define READ_SYNC	RQ_READ_SYNC
+#define WRITE_SYNC	RQ_WRITE_SYNC
+#define WRITE_BARRIER	RQ_WRITE_BARRIER
 
 #define SEL_IN		1
 #define SEL_OUT		2

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH] libata: device suspend/resume
  2005-05-26  1:05                     ` Jeff Garzik
@ 2005-05-26  5:57                       ` Jens Axboe
  0 siblings, 0 replies; 44+ messages in thread
From: Jens Axboe @ 2005-05-26  5:57 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: James Bottomley, SCSI Mailing List, linux-ide

On Wed, May 25 2005, Jeff Garzik wrote:
> Jens Axboe wrote:
> >On Tue, May 24 2005, Jeff Garzik wrote:
> >
> >>Jens Axboe wrote:
> >>
> >>>I agree, it's a cleaner approach, with the rq being a container for
> >>>generel messages as well not just SCSI commands. The one missing piece
> >>>for that was the rq->end_io() callback so everything doesn't have to go
> >>>down sync, but that is in now as well.
> >>>
> >>>I'll try and cook something up.
> >>
> >>Very cool ;)
> >
> >
> >This is the base for it. It splits request->flags into two variables:
> >
> >- cmd_type. this is not a bitmask, but a value indicating what type of
> >  request this is.
> >
> >- cmd_flags. various command modified flags.
> >
> >The idea is to add a REQ_TYPE_LINUX_BLOCK request type, where we define
> >a set of command opcodes that signify an upper level defined function
> >(such as flush) that is implemented differently at the hardware/driver
> >level. Basically a way to pass down messages or commands generically.
> >
> >I like this better than using scsi opcodes always, it's a cleaner
> >abstraction. For sending generic commands to a device, we could add a
> >function ala:
> 
> cmd_type just adds a needless layer of switch{} statements in block 
> drivers, and its information that can be trivially derived from the 
> command opcode itself.
> 
> 	cmd_type = cmd_types[opcode];
> 
> HOWEVER, don't fall in love with SCSI opcodes.

Eh, I don't follow. The opcode is only valid for cmd_type
REQ_TYPE_LINUX_MSG, not for anything else. Or for REQ_TYPE_BLOCK_PC of
course, like it is right now as well.

> We want _Linux_ commands, not SCSI commands.  Just think of a 
> request_queue as having its own command protocol, one that you can 
> _change at will_.
> 
> Yes, often request_queue commands may map seamlessly to SCSI (or ATA or 
> I2O) commands.  And that's good.  But don't let yourself be locked into 
> SCSI.  SCSI is not generic enough, nor mutable enough for all our needs.
> 
> Just the other day I was thinking about the simpler approach, like the 
> attached :)  It
> * adds RQ_ to avoid namespace conflicts
> * adds RQ_FLUSH, RQ_PM_EVENT
> 
> So, overall, I would say "think Linux opcodes" as the preferred direction.

And that's exactly what I proposed, the REQ_LB_OP_xxx are the linux
specific opcodes that can be defined at will. I'm not locking myself
into SCSI at all, I'm just putting ->cmd[] to use for a new command
type.

Don't confuse/mixup fs.h and the block layer, I don't see your proposal
adding anything of use. The fs.h READ/WRITE/etc were divorced from the
block layer in 2.5.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH] libata: device suspend/resume
  2005-05-25  9:29                   ` Jens Axboe
  2005-05-25 23:40                     ` Guennadi Liakhovetski
  2005-05-26  1:05                     ` Jeff Garzik
@ 2005-05-26 22:56                     ` Bartlomiej Zolnierkiewicz
  2005-05-27  6:54                       ` Jens Axboe
  2005-05-27  2:01                     ` Benjamin Herrenschmidt
  3 siblings, 1 reply; 44+ messages in thread
From: Bartlomiej Zolnierkiewicz @ 2005-05-26 22:56 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Jeff Garzik, James Bottomley, SCSI Mailing List, linux-ide

Nice work Jens, some feedback below:

On 5/25/05, Jens Axboe <axboe@suse.de> wrote:

> Index: drivers/ide/ide-floppy.c
> ===================================================================
> --- 137318b273db26b889675101fbd02d2e84cae5e3/drivers/ide/ide-floppy.c  (mode:100644)
> +++ uncommitted/drivers/ide/ide-floppy.c  (mode:100644)

> @@ -1311,9 +1311,9 @@
>                 }
>                 pc = idefloppy_next_pc_storage(drive);
>                 idefloppy_create_rw_cmd(floppy, pc, rq, block);
> -       } else if (rq->flags & REQ_SPECIAL) {
> +       } else if (blk_special_request(rq))
>                 pc = (idefloppy_pc_t *) rq->buffer;
> -       } else if (rq->flags & REQ_BLOCK_PC) {
> +       } else if (blk_fs_request(rq)) {
>                 pc = idefloppy_next_pc_storage(drive);
>                 if (idefloppy_blockpc_cmd(floppy, pc, rq)) {
>                         idefloppy_do_end_request(drive, 0, 0);

blk_pc_request(rq)

> Index: drivers/ide/ide-io.c
> ===================================================================
> --- 137318b273db26b889675101fbd02d2e84cae5e3/drivers/ide/ide-io.c  (mode:100644)
> +++ uncommitted/drivers/ide/ide-io.c  (mode:100644)

> @@ -948,9 +948,10 @@
>         if (!drive->special.all) {
>                 ide_driver_t *drv;
> 
> -               if (rq->flags & (REQ_DRIVE_CMD | REQ_DRIVE_TASK))
> +               if (rq->cmd_type == REQ_TYPE_ATA_CMD ||
> +                   rq->cmd_type == REQ_TYPE_ATA_TASK)
>                         return execute_drive_cmd(drive, rq);
> -               else if (rq->flags & REQ_DRIVE_TASKFILE)
> +               else if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE)
>                         return execute_drive_cmd(drive, rq);
>                 else if (blk_pm_request(rq)) {
>  #ifdef DEBUG_PM

|| rq->cmd_type == REQ_TYPE_ATA_TASKFILE while at it

> Index: drivers/scsi/ide-scsi.c
> ===================================================================
> --- 137318b273db26b889675101fbd02d2e84cae5e3/drivers/scsi/ide-scsi.c  (mode:100644)
> +++ uncommitted/drivers/scsi/ide-scsi.c  (mode:100644)

> @@ -370,12 +370,12 @@
>         u8 *scsi_buf;
>         unsigned long flags;
> 
> -       if (!(rq->flags & (REQ_SPECIAL|REQ_SENSE))) {
> +       if (!blk_special_request(rq) || !blk_sense_request(rq)) {
>                 ide_end_request(drive, uptodate, nrsecs);
>                 return 0;
>         }

!blk_special_request(rq) && !blk_sense_request(rq)

> Index: drivers/scsi/scsi_lib.c
> ===================================================================
> --- 137318b273db26b889675101fbd02d2e84cae5e3/drivers/scsi/scsi_lib.c  (mode:100644)
> +++ uncommitted/drivers/scsi/scsi_lib.c  (mode:100644)

> @@ -922,7 +922,7 @@
>         /*
>          * if this is a rq->data based REQ_BLOCK_PC, setup for a non-sg xfer
>          */
> -       if ((req->flags & REQ_BLOCK_PC) && !req->bio) {
> +       if (blk_pc_request(req) && req->bio) {
>                 cmd->request_bufflen = req->data_len;
>                 cmd->request_buffer = req->data;
>                 req->buffer = req->data;

!req->bio


I assume they you've verified that there are no odd cases possible like
rq->flags = REQ_DRIVE_TASKFILE | REQ_BAR_FLUSH or rq->flags = REQ_PC
(without REQ_BLOCK_PC) which would be obviously broken by this patch?

Bartlomiej

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH] libata: device suspend/resume
  2005-05-25  9:29                   ` Jens Axboe
                                       ` (2 preceding siblings ...)
  2005-05-26 22:56                     ` Bartlomiej Zolnierkiewicz
@ 2005-05-27  2:01                     ` Benjamin Herrenschmidt
  2005-05-27  6:55                       ` Jens Axboe
  3 siblings, 1 reply; 44+ messages in thread
From: Benjamin Herrenschmidt @ 2005-05-27  2:01 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Jeff Garzik, James Bottomley, SCSI Mailing List, linux-ide

On Wed, 2005-05-25 at 11:29 +0200, Jens Axboe wrote:
> On Tue, May 24 2005, Jeff Garzik wrote:
> > Jens Axboe wrote:
> > >I agree, it's a cleaner approach, with the rq being a container for
> > >generel messages as well not just SCSI commands. The one missing piece
> > >for that was the rq->end_io() callback so everything doesn't have to go
> > >down sync, but that is in now as well.
> > >
> > >I'll try and cook something up.
> > 
> > Very cool ;)
> 
> This is the base for it. It splits request->flags into two variables:
> 
> - cmd_type. this is not a bitmask, but a value indicating what type of
>   request this is.
> 
> - cmd_flags. various command modified flags.

I like it too. It's a long overdue cleanup :)

Another thing is we should/could probably move some of the other fields
in the struct request into a union of structs. Some of them are never
present simultaneously ...

Ben.



^ permalink raw reply	[flat|nested] 44+ messages in thread

* libata, SCSI and storage drivers
  2005-05-24  7:13               ` Jens Axboe
@ 2005-05-27  2:49                 ` Jeff Garzik
  2005-05-27  6:45                   ` Douglas Gilbert
  0 siblings, 1 reply; 44+ messages in thread
From: Jeff Garzik @ 2005-05-27  2:49 UTC (permalink / raw)
  To: Jens Axboe; +Cc: James Bottomley, SCSI Mailing List, linux-ide

Jens Axboe wrote:
> On Tue, May 24 2005, Jeff Garzik wrote:
> 
>>Jens Axboe wrote:
>>
>>>On Tue, May 24 2005, Jeff Garzik wrote:
>>>
>>>>I can describe how this will look when libata is divorced from SCSI, if 
>>>>you would like, too...
>>>
>>>I was beginning to dispair you had given up that plan...
>>
>>hehe, nope.  I promised Linus, and I plan to keep my promise :)
> 
> 
> You promised me, too :)
> 
> 
>>I know how to do it.  Internally things have been kept as separate as 
>>possible from the SCSI layer.
> 
> 
> We should start a list of items that could potentially be moved to the
> block layer that libata currently uses.


Here are the two broad categories of things that immediately come to mind.


1.  Hardware in pre-production right now can do SAS or SATA on the same 
card.  So, real soon, a driver will need to do both SCSI and ATA 
depending on runtime conditions.

The SCSI transport class is a very nice way to connect low-level drivers 
and the class drivers (disk/cdrom/tape/...).  It works well with the 
device model, and is modular in just the right location.

I would like to develop ATA transport class(es).  In order to work well 
with SATA/SAS hardware, there will need to be at least one.

And as I hoped you have guessed.....  the ATA transport class should be 
a child of the block layer, not the SCSI layer.


2.  driver API.  Linux SCSI layer provides several services which are 
generalized to any "packet transport":
* mapping of devices to protocol buses (domain topology, etc.)
* command queueing
* error handling

For hardware like ATA or I2O, this "send-command" type of API is the 
most natural way to implement a low-level driver, particularly if the 
core code provides all the necessary queueing/mapping/EH services as well.

This infrastructure is -not- specific to SCSI at all.  And it is this 
infrastructure that allowed me to bring up libata so rapidly.

	Jeff



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH] libata: device suspend/resume
  2005-05-23 22:10     ` James Bottomley
  2005-05-24  6:21       ` Jens Axboe
@ 2005-05-27  2:54       ` Jeff Garzik
  1 sibling, 0 replies; 44+ messages in thread
From: Jeff Garzik @ 2005-05-27  2:54 UTC (permalink / raw)
  To: James Bottomley; +Cc: SCSI Mailing List, linux-ide, Jens Axboe

James Bottomley wrote:
> +/* Just quietly quiesce the device and SYNCHRONIZE CACHE for suspend too */
> +static int sd_suspend(struct device *dev, pm_message_t state, u32 level)
> +{
> +	struct scsi_device *sdp = to_scsi_device(dev);
> +	struct scsi_disk *sdkp = dev_get_drvdata(dev);
> +
> +	if (!sdkp)
> +		return 0;         /* this can happen */
> +
> +	if (!sdkp->WCE)
> +		return 0;
> +
> +	/* don't try to sync an offline device ... it will only error */
> +	if (!scsi_device_online(sdp))
> +		return 0;
> +
> +	if (sd_sync_cache(sdp))
> +		return -EIO;


Add

	if (I_can_PM_this_disk && sd_start_stop_unit(sdp))
		return -EIO;

and libata is happy.

	Jeff



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: libata, SCSI and storage drivers
  2005-05-27  2:49                 ` libata, SCSI and storage drivers Jeff Garzik
@ 2005-05-27  6:45                   ` Douglas Gilbert
  2005-05-27 14:41                     ` Luben Tuikov
  0 siblings, 1 reply; 44+ messages in thread
From: Douglas Gilbert @ 2005-05-27  6:45 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: Jens Axboe, James Bottomley, SCSI Mailing List, linux-ide

Jeff Garzik wrote:
> Jens Axboe wrote:
> 
>> On Tue, May 24 2005, Jeff Garzik wrote:
>>
>>> Jens Axboe wrote:
>>>
>>>> On Tue, May 24 2005, Jeff Garzik wrote:
>>>>
>>>>> I can describe how this will look when libata is divorced from 
>>>>> SCSI, if you would like, too...
>>>>
>>>>
>>>> I was beginning to dispair you had given up that plan...
>>>
>>>
>>> hehe, nope.  I promised Linus, and I plan to keep my promise :)
>>
>>
>>
>> You promised me, too :)
>>
>>
>>> I know how to do it.  Internally things have been kept as separate as 
>>> possible from the SCSI layer.
>>
>>
>>
>> We should start a list of items that could potentially be moved to the
>> block layer that libata currently uses.
> 
> 
> 
> Here are the two broad categories of things that immediately come to mind.
> 
> 
> 1.  Hardware in pre-production right now can do SAS or SATA on the same 
> card.  So, real soon, a driver will need to do both SCSI and ATA 
> depending on runtime conditions.
> 
> The SCSI transport class is a very nice way to connect low-level drivers 
> and the class drivers (disk/cdrom/tape/...).  It works well with the 
> device model, and is modular in just the right location.
> 
> I would like to develop ATA transport class(es).  In order to work well 
> with SATA/SAS hardware, there will need to be at least one.
> 
> And as I hoped you have guessed.....  the ATA transport class should be 
> a child of the block layer, not the SCSI layer.

Jeff,
Yes, the SAS HBAs that I'm aware of can connect to
SATA I/II disks "directly" on their phys.

So here are two scenarios for connecting SATA disks:

a)
  SAS HBA < -------------------------> SATA disk

b)
  SAS HBA <------>SAS_expander<------> SATA disk

In a) the interconnect is SATA. Still it is hard
to believe that the SAS HBA LLD would belong to
anything other than the SCSI subsystem (since
SAS HBAs come with 4 or 8 phys, others of which could
be in scenario b) and/or connected to SAS disks).
Hence the initiator_ports/phys/target_ports on and
seen by that SAS HBA should (also) have SAS transport
classes.

When a SAS HBA phy is not connected to anything is it
a member of a SAS transport class or a ATA transport
class (or both)?

In scenario b) the left interconnect is SAS and the
right interconnect is SATA. The SAS_expander contains
the STP<->SATA bridging function (and, for sake of
argument, no SCSI-ATA Translation (SAT) layer).
Would scenario b) also have a ATA transport
class? I'll assume it does. To be discovered it also
needs a SAS transport class. Larger enclosures are
likely to be amplifications of scenario b). The presence
of the SATA disk in scenario b) will be discovered via
the SAS SMP (i.e. talking to the SAS_expander) or via
the SES protocol (i.e. a SCSI Enclosure Services target
running near the SAS_expander). Either way if there are
a lot of SATA disks then they are likely to be held
in their initialization sequence to stop them spinning
up all at once. SAS transport intervention may be
required (staggered timers are another possibility).

Now I may be wrong but I think that one of the SAS HBAs
that I have read about that looks (externally) like
scenario a) but is actually scenario b). In other words
the SAS_expander is silicon on the HBA and it is not
controlled via the PCI bus, but rather by SMP.

This suggests to me we would need an ordered sequence of
transport classes. I really wonder about trying to model
this level of complexity in sysfs, plus the nightmare of
keeping state data of (topologically) distant nodes up
to date. At some point one needs to supply a management
pass through and hand the problem over to the user
space. The question is, at what point.


There are already FCP enclosures filled with SATA disks
on the market so this problem in not unique to SAS.
However they have (I presume) a SAT layer in their
FC enclosures so the OS thinks it is taking to SCSI
disks.

Doug Gilbert




^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH] libata: device suspend/resume
  2005-05-26 22:56                     ` Bartlomiej Zolnierkiewicz
@ 2005-05-27  6:54                       ` Jens Axboe
  0 siblings, 0 replies; 44+ messages in thread
From: Jens Axboe @ 2005-05-27  6:54 UTC (permalink / raw)
  To: Bartlomiej Zolnierkiewicz
  Cc: Jeff Garzik, James Bottomley, SCSI Mailing List, linux-ide

On Fri, May 27 2005, Bartlomiej Zolnierkiewicz wrote:
> Nice work Jens, some feedback below:
> 
> On 5/25/05, Jens Axboe <axboe@suse.de> wrote:
> 
> > Index: drivers/ide/ide-floppy.c
> > ===================================================================
> > --- 137318b273db26b889675101fbd02d2e84cae5e3/drivers/ide/ide-floppy.c  (mode:100644)
> > +++ uncommitted/drivers/ide/ide-floppy.c  (mode:100644)
> 
> > @@ -1311,9 +1311,9 @@
> >                 }
> >                 pc = idefloppy_next_pc_storage(drive);
> >                 idefloppy_create_rw_cmd(floppy, pc, rq, block);
> > -       } else if (rq->flags & REQ_SPECIAL) {
> > +       } else if (blk_special_request(rq))
> >                 pc = (idefloppy_pc_t *) rq->buffer;
> > -       } else if (rq->flags & REQ_BLOCK_PC) {
> > +       } else if (blk_fs_request(rq)) {
> >                 pc = idefloppy_next_pc_storage(drive);
> >                 if (idefloppy_blockpc_cmd(floppy, pc, rq)) {
> >                         idefloppy_do_end_request(drive, 0, 0);
> 
> blk_pc_request(rq)

Indeed, thanks!

> > Index: drivers/ide/ide-io.c
> > ===================================================================
> > --- 137318b273db26b889675101fbd02d2e84cae5e3/drivers/ide/ide-io.c  (mode:100644)
> > +++ uncommitted/drivers/ide/ide-io.c  (mode:100644)
> 
> > @@ -948,9 +948,10 @@
> >         if (!drive->special.all) {
> >                 ide_driver_t *drv;
> > 
> > -               if (rq->flags & (REQ_DRIVE_CMD | REQ_DRIVE_TASK))
> > +               if (rq->cmd_type == REQ_TYPE_ATA_CMD ||
> > +                   rq->cmd_type == REQ_TYPE_ATA_TASK)
> >                         return execute_drive_cmd(drive, rq);
> > -               else if (rq->flags & REQ_DRIVE_TASKFILE)
> > +               else if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE)
> >                         return execute_drive_cmd(drive, rq);
> >                 else if (blk_pm_request(rq)) {
> >  #ifdef DEBUG_PM
> 
> || rq->cmd_type == REQ_TYPE_ATA_TASKFILE while at it

Didn't even notice that, fixed.

> > Index: drivers/scsi/ide-scsi.c
> > ===================================================================
> > --- 137318b273db26b889675101fbd02d2e84cae5e3/drivers/scsi/ide-scsi.c  (mode:100644)
> > +++ uncommitted/drivers/scsi/ide-scsi.c  (mode:100644)
> 
> > @@ -370,12 +370,12 @@
> >         u8 *scsi_buf;
> >         unsigned long flags;
> > 
> > -       if (!(rq->flags & (REQ_SPECIAL|REQ_SENSE))) {
> > +       if (!blk_special_request(rq) || !blk_sense_request(rq)) {
> >                 ide_end_request(drive, uptodate, nrsecs);
> >                 return 0;
> >         }
> 
> !blk_special_request(rq) && !blk_sense_request(rq)

Thanks.

> > Index: drivers/scsi/scsi_lib.c
> > ===================================================================
> > --- 137318b273db26b889675101fbd02d2e84cae5e3/drivers/scsi/scsi_lib.c  (mode:100644)
> > +++ uncommitted/drivers/scsi/scsi_lib.c  (mode:100644)
> 
> > @@ -922,7 +922,7 @@
> >         /*
> >          * if this is a rq->data based REQ_BLOCK_PC, setup for a non-sg xfer
> >          */
> > -       if ((req->flags & REQ_BLOCK_PC) && !req->bio) {
> > +       if (blk_pc_request(req) && req->bio) {
> >                 cmd->request_bufflen = req->data_len;
> >                 cmd->request_buffer = req->data;
> >                 req->buffer = req->data;
> 
> !req->bio

Yeah, this one I already found while trying to boot the patch, SG_IO
barfed quickly without it.

> I assume they you've verified that there are no odd cases possible like
> rq->flags = REQ_DRIVE_TASKFILE | REQ_BAR_FLUSH or rq->flags = REQ_PC
> (without REQ_BLOCK_PC) which would be obviously broken by this patch?

Not fully verified yet, there's some work to be done there. REQ_PC
never had REQ_BLOCK_PC set, I want to unify those two in ide-cd though.
Should be pretty trivial, I hope to even kill of the old pc_interrupt
handling and just use newpc_interrupt at the same time.

But that really should be on top of this patch, done afterwards!

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH] libata: device suspend/resume
  2005-05-27  2:01                     ` Benjamin Herrenschmidt
@ 2005-05-27  6:55                       ` Jens Axboe
  0 siblings, 0 replies; 44+ messages in thread
From: Jens Axboe @ 2005-05-27  6:55 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: Jeff Garzik, James Bottomley, SCSI Mailing List, linux-ide

On Fri, May 27 2005, Benjamin Herrenschmidt wrote:
> On Wed, 2005-05-25 at 11:29 +0200, Jens Axboe wrote:
> > On Tue, May 24 2005, Jeff Garzik wrote:
> > > Jens Axboe wrote:
> > > >I agree, it's a cleaner approach, with the rq being a container for
> > > >generel messages as well not just SCSI commands. The one missing piece
> > > >for that was the rq->end_io() callback so everything doesn't have to go
> > > >down sync, but that is in now as well.
> > > >
> > > >I'll try and cook something up.
> > > 
> > > Very cool ;)
> > 
> > This is the base for it. It splits request->flags into two variables:
> > 
> > - cmd_type. this is not a bitmask, but a value indicating what type of
> >   request this is.
> > 
> > - cmd_flags. various command modified flags.
> 
> I like it too. It's a long overdue cleanup :)

Indeed, it is something I wish I had done originally when moving away
from the 2.4 rq->cmd setup. Better late than never :)

> Another thing is we should/could probably move some of the other fields
> in the struct request into a union of structs. Some of them are never
> present simultaneously ...

Yes, that would be a nice way to save some space.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 44+ messages in thread

* RE: libata, SCSI and storage drivers
@ 2005-05-27 12:42 James.Smart
  0 siblings, 0 replies; 44+ messages in thread
From: James.Smart @ 2005-05-27 12:42 UTC (permalink / raw)
  To: dougg, jgarzik; +Cc: axboe, James.Bottomley, linux-scsi, linux-ide

FYI -  There is currently a standards proposal in T11 for the
following scenario:

  c) FC HBA  <--------> FC fabric/loop <------>  SATA disk

In this case, the SAT layer could be in the FC HBA firmware or
in the FC driver (or whatever linux transport exists).  A demo for
this scenario was shown recently at SNW.

Also - I expect to see more and more implementations with the
SAS Expander tightly integrated to the SAS HBA/phys.

-- james s


> Jeff,
> Yes, the SAS HBAs that I'm aware of can connect to
> SATA I/II disks "directly" on their phys.
> 
> So here are two scenarios for connecting SATA disks:
> 
> a)
>   SAS HBA < -------------------------> SATA disk
> 
> b)
>   SAS HBA <------>SAS_expander<------> SATA disk
> 
> In a) the interconnect is SATA. Still it is hard
> to believe that the SAS HBA LLD would belong to
> anything other than the SCSI subsystem (since
> SAS HBAs come with 4 or 8 phys, others of which could
> be in scenario b) and/or connected to SAS disks).
> Hence the initiator_ports/phys/target_ports on and
> seen by that SAS HBA should (also) have SAS transport
> classes.
> 
> When a SAS HBA phy is not connected to anything is it
> a member of a SAS transport class or a ATA transport
> class (or both)?
> 
> In scenario b) the left interconnect is SAS and the
> right interconnect is SATA. The SAS_expander contains
> the STP<->SATA bridging function (and, for sake of
> argument, no SCSI-ATA Translation (SAT) layer).
> Would scenario b) also have a ATA transport
> class? I'll assume it does. To be discovered it also
> needs a SAS transport class. Larger enclosures are
> likely to be amplifications of scenario b). The presence
> of the SATA disk in scenario b) will be discovered via
> the SAS SMP (i.e. talking to the SAS_expander) or via
> the SES protocol (i.e. a SCSI Enclosure Services target
> running near the SAS_expander). Either way if there are
> a lot of SATA disks then they are likely to be held
> in their initialization sequence to stop them spinning
> up all at once. SAS transport intervention may be
> required (staggered timers are another possibility).
> 
> Now I may be wrong but I think that one of the SAS HBAs
> that I have read about that looks (externally) like
> scenario a) but is actually scenario b). In other words
> the SAS_expander is silicon on the HBA and it is not
> controlled via the PCI bus, but rather by SMP.
> 
> This suggests to me we would need an ordered sequence of
> transport classes. I really wonder about trying to model
> this level of complexity in sysfs, plus the nightmare of
> keeping state data of (topologically) distant nodes up
> to date. At some point one needs to supply a management
> pass through and hand the problem over to the user
> space. The question is, at what point.
> 
> 
> There are already FCP enclosures filled with SATA disks
> on the market so this problem in not unique to SAS.
> However they have (I presume) a SAT layer in their
> FC enclosures so the OS thinks it is taking to SCSI
> disks.
> 
> Doug Gilbert
> 
> 
> 
> -
> To unsubscribe from this list: send the line "unsubscribe 
> linux-scsi" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

^ permalink raw reply	[flat|nested] 44+ messages in thread

* RE: libata, SCSI and storage drivers
@ 2005-05-27 14:26 James.Smart
  0 siblings, 0 replies; 44+ messages in thread
From: James.Smart @ 2005-05-27 14:26 UTC (permalink / raw)
  To: James.Smart, dougg, jgarzik; +Cc: axboe, James.Bottomley, linux-scsi, linux-ide

Also, I should note that plain ATA (no SAT) is valid in this
configuration as well.

-- james s

> -----Original Message-----
> From: linux-scsi-owner@vger.kernel.org
> [mailto:linux-scsi-owner@vger.kernel.org]On Behalf Of Smart, James
> Sent: Friday, May 27, 2005 8:43 AM
> To: dougg@torque.net; jgarzik@pobox.com
> Cc: axboe@suse.de; James.Bottomley@SteelEye.com;
> linux-scsi@vger.kernel.org; linux-ide@vger.kernel.org
> Subject: RE: libata, SCSI and storage drivers
> 
> 
> FYI -  There is currently a standards proposal in T11 for the
> following scenario:
> 
>   c) FC HBA  <--------> FC fabric/loop <------>  SATA disk
> 
> In this case, the SAT layer could be in the FC HBA firmware or
> in the FC driver (or whatever linux transport exists).  A demo for
> this scenario was shown recently at SNW.
> 
> Also - I expect to see more and more implementations with the
> SAS Expander tightly integrated to the SAS HBA/phys.
> 
> -- james s
> 
> 
> > Jeff,
> > Yes, the SAS HBAs that I'm aware of can connect to
> > SATA I/II disks "directly" on their phys.
> > 
> > So here are two scenarios for connecting SATA disks:
> > 
> > a)
> >   SAS HBA < -------------------------> SATA disk
> > 
> > b)
> >   SAS HBA <------>SAS_expander<------> SATA disk
> > 
> > In a) the interconnect is SATA. Still it is hard
> > to believe that the SAS HBA LLD would belong to
> > anything other than the SCSI subsystem (since
> > SAS HBAs come with 4 or 8 phys, others of which could
> > be in scenario b) and/or connected to SAS disks).
> > Hence the initiator_ports/phys/target_ports on and
> > seen by that SAS HBA should (also) have SAS transport
> > classes.
> > 
> > When a SAS HBA phy is not connected to anything is it
> > a member of a SAS transport class or a ATA transport
> > class (or both)?
> > 
> > In scenario b) the left interconnect is SAS and the
> > right interconnect is SATA. The SAS_expander contains
> > the STP<->SATA bridging function (and, for sake of
> > argument, no SCSI-ATA Translation (SAT) layer).
> > Would scenario b) also have a ATA transport
> > class? I'll assume it does. To be discovered it also
> > needs a SAS transport class. Larger enclosures are
> > likely to be amplifications of scenario b). The presence
> > of the SATA disk in scenario b) will be discovered via
> > the SAS SMP (i.e. talking to the SAS_expander) or via
> > the SES protocol (i.e. a SCSI Enclosure Services target
> > running near the SAS_expander). Either way if there are
> > a lot of SATA disks then they are likely to be held
> > in their initialization sequence to stop them spinning
> > up all at once. SAS transport intervention may be
> > required (staggered timers are another possibility).
> > 
> > Now I may be wrong but I think that one of the SAS HBAs
> > that I have read about that looks (externally) like
> > scenario a) but is actually scenario b). In other words
> > the SAS_expander is silicon on the HBA and it is not
> > controlled via the PCI bus, but rather by SMP.
> > 
> > This suggests to me we would need an ordered sequence of
> > transport classes. I really wonder about trying to model
> > this level of complexity in sysfs, plus the nightmare of
> > keeping state data of (topologically) distant nodes up
> > to date. At some point one needs to supply a management
> > pass through and hand the problem over to the user
> > space. The question is, at what point.
> > 
> > 
> > There are already FCP enclosures filled with SATA disks
> > on the market so this problem in not unique to SAS.
> > However they have (I presume) a SAT layer in their
> > FC enclosures so the OS thinks it is taking to SCSI
> > disks.
> > 
> > Doug Gilbert
> > 
> > 
> > 
> > -
> > To unsubscribe from this list: send the line "unsubscribe 
> > linux-scsi" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > 
> -
> To unsubscribe from this list: send the line "unsubscribe 
> linux-scsi" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: libata, SCSI and storage drivers
  2005-05-27  6:45                   ` Douglas Gilbert
@ 2005-05-27 14:41                     ` Luben Tuikov
  0 siblings, 0 replies; 44+ messages in thread
From: Luben Tuikov @ 2005-05-27 14:41 UTC (permalink / raw)
  To: dougg
  Cc: Jeff Garzik, Jens Axboe, James Bottomley, SCSI Mailing List,
	linux-ide

On 05/27/05 02:45, Douglas Gilbert wrote:
> In a) the interconnect is SATA. Still it is hard
> to believe that the SAS HBA LLD would belong to
> anything other than the SCSI subsystem (since
> SAS HBAs come with 4 or 8 phys, others of which could
> be in scenario b) and/or connected to SAS disks).
> Hence the initiator_ports/phys/target_ports on and
> seen by that SAS HBA should (also) have SAS transport
> classes.

It should register with the SAS transport class, where
the SAS discover "function" is.  The Discover function
would find out what is out on the domain and "register"
it with the proper "transport" (a more correct way to
put it there is).

For SSP devices that would be a "scsi disk", for
SATA devices that would be a "libata disk" or something
like this.  The LLDD would either get a CDB for SSP,
or taskfile for STP, and pass it along to the hardware.
 
> When a SAS HBA phy is not connected to anything is it
> a member of a SAS transport class or a ATA transport
> class (or both)?

Quite possibly both: SAS transport class, since it is
a super class in meaning that the Discover function
is in there.
 
> In scenario b) the left interconnect is SAS and the
> right interconnect is SATA. The SAS_expander contains
> the STP<->SATA bridging function (and, for sake of
> argument, no SCSI-ATA Translation (SAT) layer).
> Would scenario b) also have a ATA transport
> class? I'll assume it does. To be discovered it also

Yes it would.  As far as the class is concerned it
is a SATA device -- how you get to it (direct or through
an expander) is the LLDD/firmware/Host Adapter concern.

> needs a SAS transport class. Larger enclosures are
> likely to be amplifications of scenario b). The presence
> of the SATA disk in scenario b) will be discovered via
> the SAS SMP (i.e. talking to the SAS_expander) or via
> the SES protocol (i.e. a SCSI Enclosure Services target
> running near the SAS_expander). Either way if there are
> a lot of SATA disks then they are likely to be held
> in their initialization sequence to stop them spinning
> up all at once. SAS transport intervention may be
> required (staggered timers are another possibility).

Yes, spinup-hold states would be managed by the Discover
function, or by the FW on the expander/ses device.

BTW, very nice to mention SES devices.  We'll have to
bring that up more as they'd be very much en vogue
with expanders.

> Now I may be wrong but I think that one of the SAS HBAs
> that I have read about that looks (externally) like
> scenario a) but is actually scenario b). In other words
> the SAS_expander is silicon on the HBA and it is not
> controlled via the PCI bus, but rather by SMP.

I know of such a SAS HA (Host Adapter), and from what it
seems the role of the expander-on-chip is to simulate
a larger number of phys than actually/physically supported
by the HA.  By design, this should be transparent to the
Discover function, and if it is not, I believe the Discover
fn. would do the right thing (i.e. treat is as an expander).

> This suggests to me we would need an ordered sequence of
> transport classes. I really wonder about trying to model
> this level of complexity in sysfs, plus the nightmare of
> keeping state data of (topologically) distant nodes up
> to date. At some point one needs to supply a management

When a SSP talking device is found it is registered
as a "scsi device".  When a STP talking device is
discovered (directly attached or elsewhere) it would
be registered as a "libata device".

> pass through and hand the problem over to the user
> space. The question is, at what point.

I'm not 100% sure about user space.  The Discover function
is pretty straightforward, and simple.  Plus the root/bootable
device could be a SATA device on an expander or even deeper
in the domain.

Thanks,
	Luben

^ permalink raw reply	[flat|nested] 44+ messages in thread

* RE: libata, SCSI and storage drivers
@ 2005-05-27 17:45 James.Smart
  2005-05-27 18:05 ` Jeff Garzik
  2005-05-27 19:04 ` James Bottomley
  0 siblings, 2 replies; 44+ messages in thread
From: James.Smart @ 2005-05-27 17:45 UTC (permalink / raw)
  To: luben_tuikov, dougg
  Cc: jgarzik, axboe, James.Bottomley, linux-scsi, linux-ide

Folks,

This discussion brings up some latent questions...

The transport can be a subsystem on it's own and is perhaps independent
of SCSI altogether. In this case, SCSI just happens to be a personality
of something on the transport. This is at odds with the current design
in which the transport is something under SCSI and inherently bound to
the SCSI "host".

I understand how we got to where we are, but shouldn't we consider making
some transports independent subsystems ? If the only protocol that
can be run on the transport is SCSI (ex: SPI), then the transport can be
under SCSI. However, if the transport can support multiple protocols (FC
can support SCSI, IP, (or ATA)), shouldn't it be structured more like an io
bus like pci ? 

It does mess up the device tree heirarchy. In general, you want the
device tree to continue along the transport specific elements until it finds
remote endpoints (things your going to use), at which point the protocol
specific elements can kick in. For example (using FC):
 /sys/devices/<pci>/fcport5/rport-5:3/target10:0:0/10:0:0:0  - the SCSI lun

What this leaves out is : where is the scsi host device ? It doesn't make
sense to insert it in-between the transport elements. It likely just becomes
a leaf entity. Continuing the example:
 /sys/devices/<pci>/fcport5/host10  - scsi host interface
 /sys/devices/<pci>/fcport5/eth3    - network interface

Food for thought...   Is this out in left field ?

-- james s


> -----Original Message-----
> From: linux-scsi-owner@vger.kernel.org
> [mailto:linux-scsi-owner@vger.kernel.org]On Behalf Of Luben Tuikov
> Sent: Friday, May 27, 2005 10:42 AM
> To: dougg@torque.net
> Cc: Jeff Garzik; Jens Axboe; James Bottomley; SCSI Mailing List;
> linux-ide@vger.kernel.org
> Subject: Re: libata, SCSI and storage drivers
> 
> 
> On 05/27/05 02:45, Douglas Gilbert wrote:
> > In a) the interconnect is SATA. Still it is hard
> > to believe that the SAS HBA LLD would belong to
> > anything other than the SCSI subsystem (since
> > SAS HBAs come with 4 or 8 phys, others of which could
> > be in scenario b) and/or connected to SAS disks).
> > Hence the initiator_ports/phys/target_ports on and
> > seen by that SAS HBA should (also) have SAS transport
> > classes.
> 
> It should register with the SAS transport class, where
> the SAS discover "function" is.  The Discover function
> would find out what is out on the domain and "register"
> it with the proper "transport" (a more correct way to
> put it there is).
> 
> For SSP devices that would be a "scsi disk", for
> SATA devices that would be a "libata disk" or something
> like this.  The LLDD would either get a CDB for SSP,
> or taskfile for STP, and pass it along to the hardware.
>  
> > When a SAS HBA phy is not connected to anything is it
> > a member of a SAS transport class or a ATA transport
> > class (or both)?
> 
> Quite possibly both: SAS transport class, since it is
> a super class in meaning that the Discover function
> is in there.
>  
> > In scenario b) the left interconnect is SAS and the
> > right interconnect is SATA. The SAS_expander contains
> > the STP<->SATA bridging function (and, for sake of
> > argument, no SCSI-ATA Translation (SAT) layer).
> > Would scenario b) also have a ATA transport
> > class? I'll assume it does. To be discovered it also
> 
> Yes it would.  As far as the class is concerned it
> is a SATA device -- how you get to it (direct or through
> an expander) is the LLDD/firmware/Host Adapter concern.
> 
> > needs a SAS transport class. Larger enclosures are
> > likely to be amplifications of scenario b). The presence
> > of the SATA disk in scenario b) will be discovered via
> > the SAS SMP (i.e. talking to the SAS_expander) or via
> > the SES protocol (i.e. a SCSI Enclosure Services target
> > running near the SAS_expander). Either way if there are
> > a lot of SATA disks then they are likely to be held
> > in their initialization sequence to stop them spinning
> > up all at once. SAS transport intervention may be
> > required (staggered timers are another possibility).
> 
> Yes, spinup-hold states would be managed by the Discover
> function, or by the FW on the expander/ses device.
> 
> BTW, very nice to mention SES devices.  We'll have to
> bring that up more as they'd be very much en vogue
> with expanders.
> 
> > Now I may be wrong but I think that one of the SAS HBAs
> > that I have read about that looks (externally) like
> > scenario a) but is actually scenario b). In other words
> > the SAS_expander is silicon on the HBA and it is not
> > controlled via the PCI bus, but rather by SMP.
> 
> I know of such a SAS HA (Host Adapter), and from what it
> seems the role of the expander-on-chip is to simulate
> a larger number of phys than actually/physically supported
> by the HA.  By design, this should be transparent to the
> Discover function, and if it is not, I believe the Discover
> fn. would do the right thing (i.e. treat is as an expander).
> 
> > This suggests to me we would need an ordered sequence of
> > transport classes. I really wonder about trying to model
> > this level of complexity in sysfs, plus the nightmare of
> > keeping state data of (topologically) distant nodes up
> > to date. At some point one needs to supply a management
> 
> When a SSP talking device is found it is registered
> as a "scsi device".  When a STP talking device is
> discovered (directly attached or elsewhere) it would
> be registered as a "libata device".
> 
> > pass through and hand the problem over to the user
> > space. The question is, at what point.
> 
> I'm not 100% sure about user space.  The Discover function
> is pretty straightforward, and simple.  Plus the root/bootable
> device could be a SATA device on an expander or even deeper
> in the domain.
> 
> Thanks,
> 	Luben
> -
> To unsubscribe from this list: send the line "unsubscribe 
> linux-scsi" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: libata, SCSI and storage drivers
  2005-05-27 17:45 James.Smart
@ 2005-05-27 18:05 ` Jeff Garzik
  2005-05-27 19:04 ` James Bottomley
  1 sibling, 0 replies; 44+ messages in thread
From: Jeff Garzik @ 2005-05-27 18:05 UTC (permalink / raw)
  To: James.Smart
  Cc: luben_tuikov, dougg, axboe, James.Bottomley, linux-scsi,
	linux-ide

James.Smart@Emulex.Com wrote:
> Folks,
> 
> This discussion brings up some latent questions...
> 
> The transport can be a subsystem on it's own and is perhaps independent
> of SCSI altogether. In this case, SCSI just happens to be a personality
> of something on the transport. This is at odds with the current design
> in which the transport is something under SCSI and inherently bound to
> the SCSI "host".

Yes.  This is something I really want to change, for libata.


> I understand how we got to where we are, but shouldn't we consider making
> some transports independent subsystems ? If the only protocol that
> can be run on the transport is SCSI (ex: SPI), then the transport can be
> under SCSI. However, if the transport can support multiple protocols (FC
> can support SCSI, IP, (or ATA)), shouldn't it be structured more like an io
> bus like pci ? 

Unfortunately this is an open-ended question that Linux is rather poor 
at answering, since the answer could range from "no" to "show me the 
code" to "you're an absolute visionary!"  :)

My own opinion:

I consciously avoid thinking too much in that direction.  Linux 
development is a lot like a biological process.  The evolution of the 
kernel code over time will give us the best answer.

Perhaps we will merge request_queue and network stack systems into a 
single "packet transport" system.  Perhaps net stack and request_queue 
systems will stay separate, and request_queue will evolve into a 
generalized system for RPC message transport.

With the device model, both IDE [as of yesterday] and SCSI -already- 
export bus topology in a standardized manner, just like PCI.

	Jeff



^ permalink raw reply	[flat|nested] 44+ messages in thread

* RE: libata, SCSI and storage drivers
  2005-05-27 17:45 James.Smart
  2005-05-27 18:05 ` Jeff Garzik
@ 2005-05-27 19:04 ` James Bottomley
  1 sibling, 0 replies; 44+ messages in thread
From: James Bottomley @ 2005-05-27 19:04 UTC (permalink / raw)
  To: James.Smart
  Cc: luben_tuikov, Douglas Gilbert, Jeff Garzik, Jens Axboe,
	SCSI Mailing List, linux-ide

On Fri, 2005-05-27 at 13:45 -0400, James.Smart@Emulex.Com wrote:
> The transport can be a subsystem on it's own and is perhaps independent
> of SCSI altogether. In this case, SCSI just happens to be a personality
> of something on the transport. This is at odds with the current design
> in which the transport is something under SCSI and inherently bound to
> the SCSI "host".

Actually, no that's no longer true.  Initially I did it this way, but
now the SCSI transport classes are build on top of the generic transport
classes (and these are independent of SCSI).  I anticipate that soon
we'll get a PHY transport class that will attach both to SAS and SATA
devices (and won't care which subsystem they're under).

> I understand how we got to where we are, but shouldn't we consider making
> some transports independent subsystems ? If the only protocol that
> can be run on the transport is SCSI (ex: SPI), then the transport can be
> under SCSI. However, if the transport can support multiple protocols (FC
> can support SCSI, IP, (or ATA)), shouldn't it be structured more like an io
> bus like pci ? 
> 
> It does mess up the device tree heirarchy. In general, you want the
> device tree to continue along the transport specific elements until it finds
> remote endpoints (things your going to use), at which point the protocol
> specific elements can kick in. For example (using FC):
>  /sys/devices/<pci>/fcport5/rport-5:3/target10:0:0/10:0:0:0  - the SCSI lun
> 
> What this leaves out is : where is the scsi host device ? It doesn't make
> sense to insert it in-between the transport elements. It likely just becomes
> a leaf entity. Continuing the example:
>  /sys/devices/<pci>/fcport5/host10  - scsi host interface
>  /sys/devices/<pci>/fcport5/eth3    - network interface
> 
> Food for thought...   Is this out in left field ?

Well, that's why it's a class.  All the devices appear under 

/class/<transport class name>

and these devices are simply the names of the actual generic devices, so
there's no reason target0:3:0 can't co-exist happily with ata3:0 or
something here.  The idea being (I think) that the class infrastructure
actually flattens the tree.  So there's always a device link that points
into the true device tree, but all the class properties are available in
flattened form.

James



^ permalink raw reply	[flat|nested] 44+ messages in thread

end of thread, other threads:[~2005-05-27 19:04 UTC | newest]

Thread overview: 44+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-05-23 20:15 [PATCH] libata: device suspend/resume Jeff Garzik
2005-05-23 20:41 ` James Bottomley
2005-05-23 20:45   ` Jeff Garzik
2005-05-23 22:10     ` James Bottomley
2005-05-24  6:21       ` Jens Axboe
2005-05-24  6:53         ` Jeff Garzik
2005-05-24  7:06           ` Hannes Reinecke
2005-05-24  7:08             ` Jens Axboe
2005-05-24  7:16             ` Jeff Garzik
2005-05-24  7:07           ` Jens Axboe
2005-05-24  7:10             ` Jeff Garzik
2005-05-24  7:13               ` Jens Axboe
2005-05-27  2:49                 ` libata, SCSI and storage drivers Jeff Garzik
2005-05-27  6:45                   ` Douglas Gilbert
2005-05-27 14:41                     ` Luben Tuikov
2005-05-24  7:14               ` [PATCH] libata: device suspend/resume Hannes Reinecke
2005-05-24  7:15                 ` Jens Axboe
2005-05-24  7:18                 ` Jeff Garzik
2005-05-24 10:17                   ` Douglas Gilbert
2005-05-24 17:10                     ` Jeff Garzik
2005-05-24  7:59           ` Jens Axboe
2005-05-24  8:21             ` Jeff Garzik
2005-05-24  8:51               ` Jens Axboe
2005-05-24 16:37                 ` Jeff Garzik
2005-05-25  9:29                   ` Jens Axboe
2005-05-25 23:40                     ` Guennadi Liakhovetski
2005-05-26  1:05                     ` Jeff Garzik
2005-05-26  5:57                       ` Jens Axboe
2005-05-26 22:56                     ` Bartlomiej Zolnierkiewicz
2005-05-27  6:54                       ` Jens Axboe
2005-05-27  2:01                     ` Benjamin Herrenschmidt
2005-05-27  6:55                       ` Jens Axboe
2005-05-24 13:48           ` Luben Tuikov
2005-05-24 17:29             ` Jeff Garzik
2005-05-24 13:05         ` Luben Tuikov
2005-05-27  2:54       ` Jeff Garzik
2005-05-24 16:27   ` Mark Lord
2005-05-24 16:33     ` Mark Lord
2005-05-24 16:04 ` Mark Lord
  -- strict thread matches above, loose matches on Subject: below --
2005-05-27 12:42 libata, SCSI and storage drivers James.Smart
2005-05-27 14:26 James.Smart
2005-05-27 17:45 James.Smart
2005-05-27 18:05 ` Jeff Garzik
2005-05-27 19:04 ` James Bottomley

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).