public inbox for linux-scsi@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/1] cciss: PCI power management reset for kexec
@ 2009-02-07 19:39 Mike Miller
  2009-02-10 22:55 ` Randy Dunlap
  0 siblings, 1 reply; 3+ messages in thread
From: Mike Miller @ 2009-02-07 19:39 UTC (permalink / raw)
  To: Andrew Morton, Jens Axboe; +Cc: LKML, randy.dunlap, LKML-scsi

Patch 1 of 1

This patch provides the better "kick-in-the-pants" on driver load in a
kexec'ed environment. 

I've successfully sanity tested the port in my lab. Randy, please apply and
test. You seem to be able to bring out the worst in the driver. ;-)

Author: Chip Coldwell <coldwell@redhat.com>

    CCISS: Use PCI power management to reset the controller

    The kexec kernel resets the CCISS hardware in three steps:

    1. Use PCI power management states to reset the controller
       in the kexec kernel.
    2. Clear the MSI/MSI-X bits in PCI configuration space so
       that MSI initialization in the kexec kernel doesn't fail.
    3. Use the CCISS "No-op" message to determine when the
       controller firmware has recovered from the PCI PM reset.

Signed-off-by: Mike Miller <mike.miller@hp.com>

-------------------------------------------------------------------------------
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 01e6938..ff4a105 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -3390,6 +3390,205 @@ static void free_hba(int i)
 	kfree(p);
 }
 
+/* Send a message CDB to the firmware. */
+static __devinit int cciss_message(struct pci_dev *pdev, unsigned char opcode, unsigned char type)
+{
+	typedef struct {
+		CommandListHeader_struct CommandHeader;
+		RequestBlock_struct Request;
+		ErrDescriptor_struct ErrorDescriptor;
+	} Command;
+	static const size_t cmd_sz = sizeof(Command) + sizeof(ErrorInfo_struct);
+	Command *cmd;
+	dma_addr_t paddr64;
+	uint32_t paddr32, tag;
+	void __iomem *vaddr;
+	int i, err;
+
+	vaddr = ioremap_nocache(pci_resource_start(pdev, 0), pci_resource_len(pdev, 0));
+	if (vaddr == NULL)
+		return -ENOMEM;
+
+	/* The Inbound Post Queue only accepts 32-bit physical addresses for the
+	   CCISS commands, so they must be allocated from the lower 4GiB of
+	   memory. */
+	err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
+	if (err) {
+		iounmap(vaddr);
+		return -ENOMEM;
+	}
+
+	cmd = pci_alloc_consistent(pdev, cmd_sz, &paddr64);
+	if (cmd == NULL) {
+		iounmap(vaddr);
+		return -ENOMEM;
+	}
+
+	/* This must fit, because of the 32-bit consistent DMA mask.  Also,
+	   although there's no guarantee, we assume that the address is at
+	   least 4-byte aligned (most likely, it's page-aligned). */
+	paddr32 = paddr64;
+
+	cmd->CommandHeader.ReplyQueue = 0;
+	cmd->CommandHeader.SGList = 0;
+	cmd->CommandHeader.SGTotal = 0;
+	cmd->CommandHeader.Tag.lower = paddr32;
+	cmd->CommandHeader.Tag.upper = 0;
+	memset(&cmd->CommandHeader.LUN.LunAddrBytes, 0, 8);
+
+	cmd->Request.CDBLen = 16;
+	cmd->Request.Type.Type = TYPE_MSG;
+	cmd->Request.Type.Attribute = ATTR_HEADOFQUEUE;
+	cmd->Request.Type.Direction = XFER_NONE;
+	cmd->Request.Timeout = 0; /* Don't time out */
+	cmd->Request.CDB[0] = opcode;
+	cmd->Request.CDB[1] = type;
+	memset(&cmd->Request.CDB[2], 0, 14); /* the rest of the CDB is reserved */
+
+	cmd->ErrorDescriptor.Addr.lower = paddr32 + sizeof(Command);
+	cmd->ErrorDescriptor.Addr.upper = 0;
+	cmd->ErrorDescriptor.Len = sizeof(ErrorInfo_struct);
+
+	writel(paddr32, vaddr + SA5_REQUEST_PORT_OFFSET);
+
+	for (i = 0; i < 10; i++) {
+		tag = readl(vaddr + SA5_REPLY_PORT_OFFSET);
+		if ((tag & ~3) == paddr32)
+			break;
+		schedule_timeout_uninterruptible(HZ);
+	}
+
+	iounmap(vaddr);
+
+	/* we leak the DMA buffer here ... no choice since the controller could
+	   still complete the command. */
+	if (i == 10) {
+		printk(KERN_ERR "cciss: controller message %02x:%02x timed out\n",
+			opcode, type);
+		return -ETIMEDOUT;
+	}
+
+	pci_free_consistent(pdev, cmd_sz, cmd, paddr64);
+
+	if (tag & 2) {
+		printk(KERN_ERR "cciss: controller message %02x:%02x failed\n",
+			opcode, type);
+		return -EIO;
+	}
+
+	printk(KERN_INFO "cciss: controller message %02x:%02x succeeded\n",
+		opcode, type);
+	return 0;
+}
+
+#define cciss_soft_reset_controller(p) cciss_message(p, 1, 0)
+#define cciss_noop(p) cciss_message(p, 3, 0)
+
+static __devinit int cciss_reset_msi(struct pci_dev *pdev)
+{
+/* the #defines are stolen from drivers/pci/msi.h. */
+#define msi_control_reg(base)		(base + PCI_MSI_FLAGS)
+#define PCI_MSIX_FLAGS_ENABLE		(1 << 15)
+
+	int pos;
+	u16 control = 0;
+
+	pos = pci_find_capability(pdev, PCI_CAP_ID_MSI);
+	if (pos) {
+		pci_read_config_word(pdev, msi_control_reg(pos), &control);
+		if (control & PCI_MSI_FLAGS_ENABLE) {
+			printk(KERN_INFO "cciss: resetting MSI\n");
+			pci_write_config_word(pdev, msi_control_reg(pos), control & ~PCI_MSI_FLAGS_ENABLE);
+		}
+	}
+
+	pos = pci_find_capability(pdev, PCI_CAP_ID_MSIX);
+	if (pos) {
+		pci_read_config_word(pdev, msi_control_reg(pos), &control);
+		if (control & PCI_MSIX_FLAGS_ENABLE) {
+			printk(KERN_INFO "cciss: resetting MSI-X\n");
+			pci_write_config_word(pdev, msi_control_reg(pos), control & ~PCI_MSIX_FLAGS_ENABLE);
+		}
+	}
+
+	return 0;
+}
+
+/* This does a hard reset of the controller using PCI power management
+ * states. */
+static __devinit int cciss_hard_reset_controller(struct pci_dev *pdev)
+{
+	u16 pmcsr, saved_config_space[32];
+	int i, pos;
+
+	printk(KERN_INFO "cciss: using PCI PM to reset controller\n");
+
+	/* This is very nearly the same thing as
+
+	   pci_save_state(pci_dev);
+	   pci_set_power_state(pci_dev, PCI_D3hot);
+	   pci_set_power_state(pci_dev, PCI_D0);
+	   pci_restore_state(pci_dev);
+
+	   but we can't use these nice canned kernel routines on
+	   kexec, because they also check the MSI/MSI-X state in PCI
+	   configuration space and do the wrong thing when it is
+	   set/cleared.  Also, the pci_save/restore_state functions
+	   violate the ordering requirements for restoring the
+	   configuration space from the CCISS document (see the
+	   comment below).  So we roll our own .... */
+
+	for (i = 0; i < 32; i++)
+		pci_read_config_word(pdev, 2*i, &saved_config_space[i]);
+
+	pos = pci_find_capability(pdev, PCI_CAP_ID_PM);
+	if (pos == 0) {
+		printk(KERN_ERR "cciss_reset_controller: PCI PM not supported\n");
+		return -ENODEV;
+	}
+
+	/* Quoting from the Open CISS Specification: "The Power
+	 * Management Control/Status Register (CSR) controls the power
+	 * state of the device.  The normal operating state is D0,
+	 * CSR=00h.  The software off state is D3, CSR=03h.  To reset
+	 * the controller, place the interface device in D3 then to
+	 * D0, this causes a secondary PCI reset which will reset the
+	 * controller." */
+
+	/* enter the D3hot power management state */
+	pci_read_config_word(pdev, pos + PCI_PM_CTRL, &pmcsr);
+	pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
+	pmcsr |= PCI_D3hot;
+	pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
+
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	schedule_timeout(HZ >> 1);
+
+	/* enter the D0 power management state */
+	pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
+	pmcsr |= PCI_D0;
+	pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
+
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	schedule_timeout(HZ >> 1);
+
+	/* Restore the PCI configuration space.  The Open CISS
+	 * Specification says, "Restore the PCI Configuration
+	 * Registers, offsets 00h through 60h. It is important to
+	 * restore the command register, 16-bits at offset 04h,
+	 * last. Do not restore the configuration status register,
+	 * 16-bits at offset 06h."  Note that the offset is 2*i. */
+	for (i = 0; i < 32; i++) {
+		if (i == 2 || i == 3)
+			continue;
+		pci_write_config_word(pdev, 2*i, saved_config_space[i]);
+	}
+	wmb();
+	pci_write_config_word(pdev, 4, saved_config_space[2]);
+
+	return 0;
+}
+
 /*
  *  This is it.  Find all the controllers and register them.  I really hate
  *  stealing all these major device numbers.
@@ -3404,6 +3603,24 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
 	int dac, return_code;
 	InquiryData_struct *inq_buff = NULL;
 
+	if (reset_devices) {
+		/* Reset the controller with a PCI power-cycle */
+		if (cciss_hard_reset_controller(pdev) || cciss_reset_msi(pdev))
+			return -ENODEV;
+
+		/* Some devices (notably the HP Smart Array 5i Controller)
+		   need a little pause here */
+		schedule_timeout_uninterruptible(30*HZ);
+
+		/* Now try to get the controller to respond to a no-op */
+		for (i=0; i<12; i++) {
+			if (cciss_noop(pdev) == 0)
+				break;
+			else
+				printk("cciss: no-op failed%s\n", (i < 11 ? "; re-trying" : ""));
+		}
+	}
+
 	i = alloc_cciss_hba();
 	if (i < 0)
 		return -1;

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH 1/1] cciss: PCI power management reset for kexec
  2009-02-07 19:39 [PATCH 1/1] cciss: PCI power management reset for kexec Mike Miller
@ 2009-02-10 22:55 ` Randy Dunlap
  2009-02-11 15:21   ` Miller, Mike (OS Dev)
  0 siblings, 1 reply; 3+ messages in thread
From: Randy Dunlap @ 2009-02-10 22:55 UTC (permalink / raw)
  To: Mike Miller; +Cc: Andrew Morton, Jens Axboe, LKML, LKML-scsi

Mike Miller wrote:
> Patch 1 of 1
> 
> This patch provides the better "kick-in-the-pants" on driver load in a
> kexec'ed environment. 
> 
> I've successfully sanity tested the port in my lab. Randy, please apply and
> test. You seem to be able to bring out the worst in the driver. ;-)

Hi Mike,

I've booted this successfully (new kernel thru kexec) 4-5 times successfully,
which doesn't prove a whole lot since the failure is intermittent.
Anyway, I'll continue to apply this patch in my daily kernel testing...

Thanks.

> Author: Chip Coldwell <coldwell@redhat.com>
> 
>     CCISS: Use PCI power management to reset the controller
> 
>     The kexec kernel resets the CCISS hardware in three steps:
> 
>     1. Use PCI power management states to reset the controller
>        in the kexec kernel.
>     2. Clear the MSI/MSI-X bits in PCI configuration space so
>        that MSI initialization in the kexec kernel doesn't fail.
>     3. Use the CCISS "No-op" message to determine when the
>        controller firmware has recovered from the PCI PM reset.
> 
> Signed-off-by: Mike Miller <mike.miller@hp.com>
> 
> -------------------------------------------------------------------------------
> diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
> index 01e6938..ff4a105 100644
> --- a/drivers/block/cciss.c
> +++ b/drivers/block/cciss.c
> @@ -3390,6 +3390,205 @@ static void free_hba(int i)
>  	kfree(p);
>  }
>  
> +/* Send a message CDB to the firmware. */
> +static __devinit int cciss_message(struct pci_dev *pdev, unsigned char opcode, unsigned char type)
> +{
> +	typedef struct {
> +		CommandListHeader_struct CommandHeader;
> +		RequestBlock_struct Request;
> +		ErrDescriptor_struct ErrorDescriptor;
> +	} Command;
> +	static const size_t cmd_sz = sizeof(Command) + sizeof(ErrorInfo_struct);
> +	Command *cmd;
> +	dma_addr_t paddr64;
> +	uint32_t paddr32, tag;
> +	void __iomem *vaddr;
> +	int i, err;
> +
> +	vaddr = ioremap_nocache(pci_resource_start(pdev, 0), pci_resource_len(pdev, 0));
> +	if (vaddr == NULL)
> +		return -ENOMEM;
> +
> +	/* The Inbound Post Queue only accepts 32-bit physical addresses for the
> +	   CCISS commands, so they must be allocated from the lower 4GiB of
> +	   memory. */
> +	err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
> +	if (err) {
> +		iounmap(vaddr);
> +		return -ENOMEM;
> +	}
> +
> +	cmd = pci_alloc_consistent(pdev, cmd_sz, &paddr64);
> +	if (cmd == NULL) {
> +		iounmap(vaddr);
> +		return -ENOMEM;
> +	}
> +
> +	/* This must fit, because of the 32-bit consistent DMA mask.  Also,
> +	   although there's no guarantee, we assume that the address is at
> +	   least 4-byte aligned (most likely, it's page-aligned). */
> +	paddr32 = paddr64;
> +
> +	cmd->CommandHeader.ReplyQueue = 0;
> +	cmd->CommandHeader.SGList = 0;
> +	cmd->CommandHeader.SGTotal = 0;
> +	cmd->CommandHeader.Tag.lower = paddr32;
> +	cmd->CommandHeader.Tag.upper = 0;
> +	memset(&cmd->CommandHeader.LUN.LunAddrBytes, 0, 8);
> +
> +	cmd->Request.CDBLen = 16;
> +	cmd->Request.Type.Type = TYPE_MSG;
> +	cmd->Request.Type.Attribute = ATTR_HEADOFQUEUE;
> +	cmd->Request.Type.Direction = XFER_NONE;
> +	cmd->Request.Timeout = 0; /* Don't time out */
> +	cmd->Request.CDB[0] = opcode;
> +	cmd->Request.CDB[1] = type;
> +	memset(&cmd->Request.CDB[2], 0, 14); /* the rest of the CDB is reserved */
> +
> +	cmd->ErrorDescriptor.Addr.lower = paddr32 + sizeof(Command);
> +	cmd->ErrorDescriptor.Addr.upper = 0;
> +	cmd->ErrorDescriptor.Len = sizeof(ErrorInfo_struct);
> +
> +	writel(paddr32, vaddr + SA5_REQUEST_PORT_OFFSET);
> +
> +	for (i = 0; i < 10; i++) {
> +		tag = readl(vaddr + SA5_REPLY_PORT_OFFSET);
> +		if ((tag & ~3) == paddr32)
> +			break;
> +		schedule_timeout_uninterruptible(HZ);
> +	}
> +
> +	iounmap(vaddr);
> +
> +	/* we leak the DMA buffer here ... no choice since the controller could
> +	   still complete the command. */
> +	if (i == 10) {
> +		printk(KERN_ERR "cciss: controller message %02x:%02x timed out\n",
> +			opcode, type);
> +		return -ETIMEDOUT;
> +	}
> +
> +	pci_free_consistent(pdev, cmd_sz, cmd, paddr64);
> +
> +	if (tag & 2) {
> +		printk(KERN_ERR "cciss: controller message %02x:%02x failed\n",
> +			opcode, type);
> +		return -EIO;
> +	}
> +
> +	printk(KERN_INFO "cciss: controller message %02x:%02x succeeded\n",
> +		opcode, type);
> +	return 0;
> +}
> +
> +#define cciss_soft_reset_controller(p) cciss_message(p, 1, 0)
> +#define cciss_noop(p) cciss_message(p, 3, 0)
> +
> +static __devinit int cciss_reset_msi(struct pci_dev *pdev)
> +{
> +/* the #defines are stolen from drivers/pci/msi.h. */
> +#define msi_control_reg(base)		(base + PCI_MSI_FLAGS)
> +#define PCI_MSIX_FLAGS_ENABLE		(1 << 15)
> +
> +	int pos;
> +	u16 control = 0;
> +
> +	pos = pci_find_capability(pdev, PCI_CAP_ID_MSI);
> +	if (pos) {
> +		pci_read_config_word(pdev, msi_control_reg(pos), &control);
> +		if (control & PCI_MSI_FLAGS_ENABLE) {
> +			printk(KERN_INFO "cciss: resetting MSI\n");
> +			pci_write_config_word(pdev, msi_control_reg(pos), control & ~PCI_MSI_FLAGS_ENABLE);
> +		}
> +	}
> +
> +	pos = pci_find_capability(pdev, PCI_CAP_ID_MSIX);
> +	if (pos) {
> +		pci_read_config_word(pdev, msi_control_reg(pos), &control);
> +		if (control & PCI_MSIX_FLAGS_ENABLE) {
> +			printk(KERN_INFO "cciss: resetting MSI-X\n");
> +			pci_write_config_word(pdev, msi_control_reg(pos), control & ~PCI_MSIX_FLAGS_ENABLE);
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +/* This does a hard reset of the controller using PCI power management
> + * states. */
> +static __devinit int cciss_hard_reset_controller(struct pci_dev *pdev)
> +{
> +	u16 pmcsr, saved_config_space[32];
> +	int i, pos;
> +
> +	printk(KERN_INFO "cciss: using PCI PM to reset controller\n");
> +
> +	/* This is very nearly the same thing as
> +
> +	   pci_save_state(pci_dev);
> +	   pci_set_power_state(pci_dev, PCI_D3hot);
> +	   pci_set_power_state(pci_dev, PCI_D0);
> +	   pci_restore_state(pci_dev);
> +
> +	   but we can't use these nice canned kernel routines on
> +	   kexec, because they also check the MSI/MSI-X state in PCI
> +	   configuration space and do the wrong thing when it is
> +	   set/cleared.  Also, the pci_save/restore_state functions
> +	   violate the ordering requirements for restoring the
> +	   configuration space from the CCISS document (see the
> +	   comment below).  So we roll our own .... */
> +
> +	for (i = 0; i < 32; i++)
> +		pci_read_config_word(pdev, 2*i, &saved_config_space[i]);
> +
> +	pos = pci_find_capability(pdev, PCI_CAP_ID_PM);
> +	if (pos == 0) {
> +		printk(KERN_ERR "cciss_reset_controller: PCI PM not supported\n");
> +		return -ENODEV;
> +	}
> +
> +	/* Quoting from the Open CISS Specification: "The Power
> +	 * Management Control/Status Register (CSR) controls the power
> +	 * state of the device.  The normal operating state is D0,
> +	 * CSR=00h.  The software off state is D3, CSR=03h.  To reset
> +	 * the controller, place the interface device in D3 then to
> +	 * D0, this causes a secondary PCI reset which will reset the
> +	 * controller." */
> +
> +	/* enter the D3hot power management state */
> +	pci_read_config_word(pdev, pos + PCI_PM_CTRL, &pmcsr);
> +	pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
> +	pmcsr |= PCI_D3hot;
> +	pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
> +
> +	set_current_state(TASK_UNINTERRUPTIBLE);
> +	schedule_timeout(HZ >> 1);
> +
> +	/* enter the D0 power management state */
> +	pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
> +	pmcsr |= PCI_D0;
> +	pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
> +
> +	set_current_state(TASK_UNINTERRUPTIBLE);
> +	schedule_timeout(HZ >> 1);
> +
> +	/* Restore the PCI configuration space.  The Open CISS
> +	 * Specification says, "Restore the PCI Configuration
> +	 * Registers, offsets 00h through 60h. It is important to
> +	 * restore the command register, 16-bits at offset 04h,
> +	 * last. Do not restore the configuration status register,
> +	 * 16-bits at offset 06h."  Note that the offset is 2*i. */
> +	for (i = 0; i < 32; i++) {
> +		if (i == 2 || i == 3)
> +			continue;
> +		pci_write_config_word(pdev, 2*i, saved_config_space[i]);
> +	}
> +	wmb();
> +	pci_write_config_word(pdev, 4, saved_config_space[2]);
> +
> +	return 0;
> +}
> +
>  /*
>   *  This is it.  Find all the controllers and register them.  I really hate
>   *  stealing all these major device numbers.
> @@ -3404,6 +3603,24 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
>  	int dac, return_code;
>  	InquiryData_struct *inq_buff = NULL;
>  
> +	if (reset_devices) {
> +		/* Reset the controller with a PCI power-cycle */
> +		if (cciss_hard_reset_controller(pdev) || cciss_reset_msi(pdev))
> +			return -ENODEV;
> +
> +		/* Some devices (notably the HP Smart Array 5i Controller)
> +		   need a little pause here */
> +		schedule_timeout_uninterruptible(30*HZ);
> +
> +		/* Now try to get the controller to respond to a no-op */
> +		for (i=0; i<12; i++) {
> +			if (cciss_noop(pdev) == 0)
> +				break;
> +			else
> +				printk("cciss: no-op failed%s\n", (i < 11 ? "; re-trying" : ""));
> +		}
> +	}
> +
>  	i = alloc_cciss_hba();
>  	if (i < 0)
>  		return -1;


-- 
~Randy

^ permalink raw reply	[flat|nested] 3+ messages in thread

* RE: [PATCH 1/1] cciss: PCI power management reset for kexec
  2009-02-10 22:55 ` Randy Dunlap
@ 2009-02-11 15:21   ` Miller, Mike (OS Dev)
  0 siblings, 0 replies; 3+ messages in thread
From: Miller, Mike (OS Dev) @ 2009-02-11 15:21 UTC (permalink / raw)
  To: Randy Dunlap; +Cc: Andrew Morton, Jens Axboe, LKML, LKML-scsi

Randy wrote: 

> -----Original Message-----
> From: Randy Dunlap [mailto:randy.dunlap@oracle.com] 
> Sent: Tuesday, February 10, 2009 4:55 PM
> To: Miller, Mike (OS Dev)
> Cc: Andrew Morton; Jens Axboe; LKML; LKML-scsi
> Subject: Re: [PATCH 1/1] cciss: PCI power management reset for kexec
> 
> Mike Miller wrote:
> > Patch 1 of 1
> > 
> > This patch provides the better "kick-in-the-pants" on 
> driver load in a 
> > kexec'ed environment.
> > 
> > I've successfully sanity tested the port in my lab. Randy, please 
> > apply and test. You seem to be able to bring out the worst in the 
> > driver. ;-)
> 
> Hi Mike,
> 
> I've booted this successfully (new kernel thru kexec) 4-5 
> times successfully, which doesn't prove a whole lot since the 
> failure is intermittent.
> Anyway, I'll continue to apply this patch in my daily kernel 
> testing...
> 
> Thanks.

Thanks for the update, Randy.


> 
> > Author: Chip Coldwell <coldwell@redhat.com>
> > 
> >     CCISS: Use PCI power management to reset the controller
> > 
> >     The kexec kernel resets the CCISS hardware in three steps:
> > 
> >     1. Use PCI power management states to reset the controller
> >        in the kexec kernel.
> >     2. Clear the MSI/MSI-X bits in PCI configuration space so
> >        that MSI initialization in the kexec kernel doesn't fail.
> >     3. Use the CCISS "No-op" message to determine when the
> >        controller firmware has recovered from the PCI PM reset.
> > 
> > Signed-off-by: Mike Miller <mike.miller@hp.com>
> > 
> > 
> ----------------------------------------------------------------------
> > --------- diff --git a/drivers/block/cciss.c 
> b/drivers/block/cciss.c 
> > index 01e6938..ff4a105 100644
> > --- a/drivers/block/cciss.c
> > +++ b/drivers/block/cciss.c
> > @@ -3390,6 +3390,205 @@ static void free_hba(int i)
> >  	kfree(p);
> >  }
> >  
> > +/* Send a message CDB to the firmware. */ static __devinit int 
> > +cciss_message(struct pci_dev *pdev, unsigned char opcode, unsigned 
> > +char type) {
> > +	typedef struct {
> > +		CommandListHeader_struct CommandHeader;
> > +		RequestBlock_struct Request;
> > +		ErrDescriptor_struct ErrorDescriptor;
> > +	} Command;
> > +	static const size_t cmd_sz = sizeof(Command) + 
> sizeof(ErrorInfo_struct);
> > +	Command *cmd;
> > +	dma_addr_t paddr64;
> > +	uint32_t paddr32, tag;
> > +	void __iomem *vaddr;
> > +	int i, err;
> > +
> > +	vaddr = ioremap_nocache(pci_resource_start(pdev, 0), 
> pci_resource_len(pdev, 0));
> > +	if (vaddr == NULL)
> > +		return -ENOMEM;
> > +
> > +	/* The Inbound Post Queue only accepts 32-bit physical 
> addresses for the
> > +	   CCISS commands, so they must be allocated from the 
> lower 4GiB of
> > +	   memory. */
> > +	err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
> > +	if (err) {
> > +		iounmap(vaddr);
> > +		return -ENOMEM;
> > +	}
> > +
> > +	cmd = pci_alloc_consistent(pdev, cmd_sz, &paddr64);
> > +	if (cmd == NULL) {
> > +		iounmap(vaddr);
> > +		return -ENOMEM;
> > +	}
> > +
> > +	/* This must fit, because of the 32-bit consistent DMA 
> mask.  Also,
> > +	   although there's no guarantee, we assume that the 
> address is at
> > +	   least 4-byte aligned (most likely, it's page-aligned). */
> > +	paddr32 = paddr64;
> > +
> > +	cmd->CommandHeader.ReplyQueue = 0;
> > +	cmd->CommandHeader.SGList = 0;
> > +	cmd->CommandHeader.SGTotal = 0;
> > +	cmd->CommandHeader.Tag.lower = paddr32;
> > +	cmd->CommandHeader.Tag.upper = 0;
> > +	memset(&cmd->CommandHeader.LUN.LunAddrBytes, 0, 8);
> > +
> > +	cmd->Request.CDBLen = 16;
> > +	cmd->Request.Type.Type = TYPE_MSG;
> > +	cmd->Request.Type.Attribute = ATTR_HEADOFQUEUE;
> > +	cmd->Request.Type.Direction = XFER_NONE;
> > +	cmd->Request.Timeout = 0; /* Don't time out */
> > +	cmd->Request.CDB[0] = opcode;
> > +	cmd->Request.CDB[1] = type;
> > +	memset(&cmd->Request.CDB[2], 0, 14); /* the rest of the CDB is 
> > +reserved */
> > +
> > +	cmd->ErrorDescriptor.Addr.lower = paddr32 + sizeof(Command);
> > +	cmd->ErrorDescriptor.Addr.upper = 0;
> > +	cmd->ErrorDescriptor.Len = sizeof(ErrorInfo_struct);
> > +
> > +	writel(paddr32, vaddr + SA5_REQUEST_PORT_OFFSET);
> > +
> > +	for (i = 0; i < 10; i++) {
> > +		tag = readl(vaddr + SA5_REPLY_PORT_OFFSET);
> > +		if ((tag & ~3) == paddr32)
> > +			break;
> > +		schedule_timeout_uninterruptible(HZ);
> > +	}
> > +
> > +	iounmap(vaddr);
> > +
> > +	/* we leak the DMA buffer here ... no choice since the 
> controller could
> > +	   still complete the command. */
> > +	if (i == 10) {
> > +		printk(KERN_ERR "cciss: controller message 
> %02x:%02x timed out\n",
> > +			opcode, type);
> > +		return -ETIMEDOUT;
> > +	}
> > +
> > +	pci_free_consistent(pdev, cmd_sz, cmd, paddr64);
> > +
> > +	if (tag & 2) {
> > +		printk(KERN_ERR "cciss: controller message 
> %02x:%02x failed\n",
> > +			opcode, type);
> > +		return -EIO;
> > +	}
> > +
> > +	printk(KERN_INFO "cciss: controller message %02x:%02x 
> succeeded\n",
> > +		opcode, type);
> > +	return 0;
> > +}
> > +
> > +#define cciss_soft_reset_controller(p) cciss_message(p, 1, 
> 0) #define 
> > +cciss_noop(p) cciss_message(p, 3, 0)
> > +
> > +static __devinit int cciss_reset_msi(struct pci_dev *pdev) {
> > +/* the #defines are stolen from drivers/pci/msi.h. */
> > +#define msi_control_reg(base)		(base + PCI_MSI_FLAGS)
> > +#define PCI_MSIX_FLAGS_ENABLE		(1 << 15)
> > +
> > +	int pos;
> > +	u16 control = 0;
> > +
> > +	pos = pci_find_capability(pdev, PCI_CAP_ID_MSI);
> > +	if (pos) {
> > +		pci_read_config_word(pdev, 
> msi_control_reg(pos), &control);
> > +		if (control & PCI_MSI_FLAGS_ENABLE) {
> > +			printk(KERN_INFO "cciss: resetting MSI\n");
> > +			pci_write_config_word(pdev, 
> msi_control_reg(pos), control & ~PCI_MSI_FLAGS_ENABLE);
> > +		}
> > +	}
> > +
> > +	pos = pci_find_capability(pdev, PCI_CAP_ID_MSIX);
> > +	if (pos) {
> > +		pci_read_config_word(pdev, 
> msi_control_reg(pos), &control);
> > +		if (control & PCI_MSIX_FLAGS_ENABLE) {
> > +			printk(KERN_INFO "cciss: resetting MSI-X\n");
> > +			pci_write_config_word(pdev, 
> msi_control_reg(pos), control & ~PCI_MSIX_FLAGS_ENABLE);
> > +		}
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +/* This does a hard reset of the controller using PCI power 
> > +management
> > + * states. */
> > +static __devinit int cciss_hard_reset_controller(struct pci_dev 
> > +*pdev) {
> > +	u16 pmcsr, saved_config_space[32];
> > +	int i, pos;
> > +
> > +	printk(KERN_INFO "cciss: using PCI PM to reset controller\n");
> > +
> > +	/* This is very nearly the same thing as
> > +
> > +	   pci_save_state(pci_dev);
> > +	   pci_set_power_state(pci_dev, PCI_D3hot);
> > +	   pci_set_power_state(pci_dev, PCI_D0);
> > +	   pci_restore_state(pci_dev);
> > +
> > +	   but we can't use these nice canned kernel routines on
> > +	   kexec, because they also check the MSI/MSI-X state in PCI
> > +	   configuration space and do the wrong thing when it is
> > +	   set/cleared.  Also, the pci_save/restore_state functions
> > +	   violate the ordering requirements for restoring the
> > +	   configuration space from the CCISS document (see the
> > +	   comment below).  So we roll our own .... */
> > +
> > +	for (i = 0; i < 32; i++)
> > +		pci_read_config_word(pdev, 2*i, &saved_config_space[i]);
> > +
> > +	pos = pci_find_capability(pdev, PCI_CAP_ID_PM);
> > +	if (pos == 0) {
> > +		printk(KERN_ERR "cciss_reset_controller: PCI PM 
> not supported\n");
> > +		return -ENODEV;
> > +	}
> > +
> > +	/* Quoting from the Open CISS Specification: "The Power
> > +	 * Management Control/Status Register (CSR) controls the power
> > +	 * state of the device.  The normal operating state is D0,
> > +	 * CSR=00h.  The software off state is D3, CSR=03h.  To reset
> > +	 * the controller, place the interface device in D3 then to
> > +	 * D0, this causes a secondary PCI reset which will reset the
> > +	 * controller." */
> > +
> > +	/* enter the D3hot power management state */
> > +	pci_read_config_word(pdev, pos + PCI_PM_CTRL, &pmcsr);
> > +	pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
> > +	pmcsr |= PCI_D3hot;
> > +	pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
> > +
> > +	set_current_state(TASK_UNINTERRUPTIBLE);
> > +	schedule_timeout(HZ >> 1);
> > +
> > +	/* enter the D0 power management state */
> > +	pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
> > +	pmcsr |= PCI_D0;
> > +	pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
> > +
> > +	set_current_state(TASK_UNINTERRUPTIBLE);
> > +	schedule_timeout(HZ >> 1);
> > +
> > +	/* Restore the PCI configuration space.  The Open CISS
> > +	 * Specification says, "Restore the PCI Configuration
> > +	 * Registers, offsets 00h through 60h. It is important to
> > +	 * restore the command register, 16-bits at offset 04h,
> > +	 * last. Do not restore the configuration status register,
> > +	 * 16-bits at offset 06h."  Note that the offset is 2*i. */
> > +	for (i = 0; i < 32; i++) {
> > +		if (i == 2 || i == 3)
> > +			continue;
> > +		pci_write_config_word(pdev, 2*i, saved_config_space[i]);
> > +	}
> > +	wmb();
> > +	pci_write_config_word(pdev, 4, saved_config_space[2]);
> > +
> > +	return 0;
> > +}
> > +
> >  /*
> >   *  This is it.  Find all the controllers and register 
> them.  I really hate
> >   *  stealing all these major device numbers.
> > @@ -3404,6 +3603,24 @@ static int __devinit 
> cciss_init_one(struct pci_dev *pdev,
> >  	int dac, return_code;
> >  	InquiryData_struct *inq_buff = NULL;
> >  
> > +	if (reset_devices) {
> > +		/* Reset the controller with a PCI power-cycle */
> > +		if (cciss_hard_reset_controller(pdev) || 
> cciss_reset_msi(pdev))
> > +			return -ENODEV;
> > +
> > +		/* Some devices (notably the HP Smart Array 5i 
> Controller)
> > +		   need a little pause here */
> > +		schedule_timeout_uninterruptible(30*HZ);
> > +
> > +		/* Now try to get the controller to respond to 
> a no-op */
> > +		for (i=0; i<12; i++) {
> > +			if (cciss_noop(pdev) == 0)
> > +				break;
> > +			else
> > +				printk("cciss: no-op 
> failed%s\n", (i < 11 ? "; re-trying" : ""));
> > +		}
> > +	}
> > +
> >  	i = alloc_cciss_hba();
> >  	if (i < 0)
> >  		return -1;
> 
> 
> --
> ~Randy
> 

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2009-02-11 15:22 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-02-07 19:39 [PATCH 1/1] cciss: PCI power management reset for kexec Mike Miller
2009-02-10 22:55 ` Randy Dunlap
2009-02-11 15:21   ` Miller, Mike (OS Dev)

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox