* [PATCH 1/1] cciss: PCI power management reset for kexec
@ 2009-02-07 19:39 Mike Miller
2009-02-10 22:55 ` Randy Dunlap
0 siblings, 1 reply; 3+ messages in thread
From: Mike Miller @ 2009-02-07 19:39 UTC (permalink / raw)
To: Andrew Morton, Jens Axboe; +Cc: LKML, randy.dunlap, LKML-scsi
Patch 1 of 1
This patch provides the better "kick-in-the-pants" on driver load in a
kexec'ed environment.
I've successfully sanity tested the port in my lab. Randy, please apply and
test. You seem to be able to bring out the worst in the driver. ;-)
Author: Chip Coldwell <coldwell@redhat.com>
CCISS: Use PCI power management to reset the controller
The kexec kernel resets the CCISS hardware in three steps:
1. Use PCI power management states to reset the controller
in the kexec kernel.
2. Clear the MSI/MSI-X bits in PCI configuration space so
that MSI initialization in the kexec kernel doesn't fail.
3. Use the CCISS "No-op" message to determine when the
controller firmware has recovered from the PCI PM reset.
Signed-off-by: Mike Miller <mike.miller@hp.com>
-------------------------------------------------------------------------------
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 01e6938..ff4a105 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -3390,6 +3390,205 @@ static void free_hba(int i)
kfree(p);
}
+/* Send a message CDB to the firmware. */
+static __devinit int cciss_message(struct pci_dev *pdev, unsigned char opcode, unsigned char type)
+{
+ typedef struct {
+ CommandListHeader_struct CommandHeader;
+ RequestBlock_struct Request;
+ ErrDescriptor_struct ErrorDescriptor;
+ } Command;
+ static const size_t cmd_sz = sizeof(Command) + sizeof(ErrorInfo_struct);
+ Command *cmd;
+ dma_addr_t paddr64;
+ uint32_t paddr32, tag;
+ void __iomem *vaddr;
+ int i, err;
+
+ vaddr = ioremap_nocache(pci_resource_start(pdev, 0), pci_resource_len(pdev, 0));
+ if (vaddr == NULL)
+ return -ENOMEM;
+
+ /* The Inbound Post Queue only accepts 32-bit physical addresses for the
+ CCISS commands, so they must be allocated from the lower 4GiB of
+ memory. */
+ err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
+ if (err) {
+ iounmap(vaddr);
+ return -ENOMEM;
+ }
+
+ cmd = pci_alloc_consistent(pdev, cmd_sz, &paddr64);
+ if (cmd == NULL) {
+ iounmap(vaddr);
+ return -ENOMEM;
+ }
+
+ /* This must fit, because of the 32-bit consistent DMA mask. Also,
+ although there's no guarantee, we assume that the address is at
+ least 4-byte aligned (most likely, it's page-aligned). */
+ paddr32 = paddr64;
+
+ cmd->CommandHeader.ReplyQueue = 0;
+ cmd->CommandHeader.SGList = 0;
+ cmd->CommandHeader.SGTotal = 0;
+ cmd->CommandHeader.Tag.lower = paddr32;
+ cmd->CommandHeader.Tag.upper = 0;
+ memset(&cmd->CommandHeader.LUN.LunAddrBytes, 0, 8);
+
+ cmd->Request.CDBLen = 16;
+ cmd->Request.Type.Type = TYPE_MSG;
+ cmd->Request.Type.Attribute = ATTR_HEADOFQUEUE;
+ cmd->Request.Type.Direction = XFER_NONE;
+ cmd->Request.Timeout = 0; /* Don't time out */
+ cmd->Request.CDB[0] = opcode;
+ cmd->Request.CDB[1] = type;
+ memset(&cmd->Request.CDB[2], 0, 14); /* the rest of the CDB is reserved */
+
+ cmd->ErrorDescriptor.Addr.lower = paddr32 + sizeof(Command);
+ cmd->ErrorDescriptor.Addr.upper = 0;
+ cmd->ErrorDescriptor.Len = sizeof(ErrorInfo_struct);
+
+ writel(paddr32, vaddr + SA5_REQUEST_PORT_OFFSET);
+
+ for (i = 0; i < 10; i++) {
+ tag = readl(vaddr + SA5_REPLY_PORT_OFFSET);
+ if ((tag & ~3) == paddr32)
+ break;
+ schedule_timeout_uninterruptible(HZ);
+ }
+
+ iounmap(vaddr);
+
+ /* we leak the DMA buffer here ... no choice since the controller could
+ still complete the command. */
+ if (i == 10) {
+ printk(KERN_ERR "cciss: controller message %02x:%02x timed out\n",
+ opcode, type);
+ return -ETIMEDOUT;
+ }
+
+ pci_free_consistent(pdev, cmd_sz, cmd, paddr64);
+
+ if (tag & 2) {
+ printk(KERN_ERR "cciss: controller message %02x:%02x failed\n",
+ opcode, type);
+ return -EIO;
+ }
+
+ printk(KERN_INFO "cciss: controller message %02x:%02x succeeded\n",
+ opcode, type);
+ return 0;
+}
+
+#define cciss_soft_reset_controller(p) cciss_message(p, 1, 0)
+#define cciss_noop(p) cciss_message(p, 3, 0)
+
+static __devinit int cciss_reset_msi(struct pci_dev *pdev)
+{
+/* the #defines are stolen from drivers/pci/msi.h. */
+#define msi_control_reg(base) (base + PCI_MSI_FLAGS)
+#define PCI_MSIX_FLAGS_ENABLE (1 << 15)
+
+ int pos;
+ u16 control = 0;
+
+ pos = pci_find_capability(pdev, PCI_CAP_ID_MSI);
+ if (pos) {
+ pci_read_config_word(pdev, msi_control_reg(pos), &control);
+ if (control & PCI_MSI_FLAGS_ENABLE) {
+ printk(KERN_INFO "cciss: resetting MSI\n");
+ pci_write_config_word(pdev, msi_control_reg(pos), control & ~PCI_MSI_FLAGS_ENABLE);
+ }
+ }
+
+ pos = pci_find_capability(pdev, PCI_CAP_ID_MSIX);
+ if (pos) {
+ pci_read_config_word(pdev, msi_control_reg(pos), &control);
+ if (control & PCI_MSIX_FLAGS_ENABLE) {
+ printk(KERN_INFO "cciss: resetting MSI-X\n");
+ pci_write_config_word(pdev, msi_control_reg(pos), control & ~PCI_MSIX_FLAGS_ENABLE);
+ }
+ }
+
+ return 0;
+}
+
+/* This does a hard reset of the controller using PCI power management
+ * states. */
+static __devinit int cciss_hard_reset_controller(struct pci_dev *pdev)
+{
+ u16 pmcsr, saved_config_space[32];
+ int i, pos;
+
+ printk(KERN_INFO "cciss: using PCI PM to reset controller\n");
+
+ /* This is very nearly the same thing as
+
+ pci_save_state(pci_dev);
+ pci_set_power_state(pci_dev, PCI_D3hot);
+ pci_set_power_state(pci_dev, PCI_D0);
+ pci_restore_state(pci_dev);
+
+ but we can't use these nice canned kernel routines on
+ kexec, because they also check the MSI/MSI-X state in PCI
+ configuration space and do the wrong thing when it is
+ set/cleared. Also, the pci_save/restore_state functions
+ violate the ordering requirements for restoring the
+ configuration space from the CCISS document (see the
+ comment below). So we roll our own .... */
+
+ for (i = 0; i < 32; i++)
+ pci_read_config_word(pdev, 2*i, &saved_config_space[i]);
+
+ pos = pci_find_capability(pdev, PCI_CAP_ID_PM);
+ if (pos == 0) {
+ printk(KERN_ERR "cciss_reset_controller: PCI PM not supported\n");
+ return -ENODEV;
+ }
+
+ /* Quoting from the Open CISS Specification: "The Power
+ * Management Control/Status Register (CSR) controls the power
+ * state of the device. The normal operating state is D0,
+ * CSR=00h. The software off state is D3, CSR=03h. To reset
+ * the controller, place the interface device in D3 then to
+ * D0, this causes a secondary PCI reset which will reset the
+ * controller." */
+
+ /* enter the D3hot power management state */
+ pci_read_config_word(pdev, pos + PCI_PM_CTRL, &pmcsr);
+ pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
+ pmcsr |= PCI_D3hot;
+ pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
+
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(HZ >> 1);
+
+ /* enter the D0 power management state */
+ pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
+ pmcsr |= PCI_D0;
+ pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
+
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(HZ >> 1);
+
+ /* Restore the PCI configuration space. The Open CISS
+ * Specification says, "Restore the PCI Configuration
+ * Registers, offsets 00h through 60h. It is important to
+ * restore the command register, 16-bits at offset 04h,
+ * last. Do not restore the configuration status register,
+ * 16-bits at offset 06h." Note that the offset is 2*i. */
+ for (i = 0; i < 32; i++) {
+ if (i == 2 || i == 3)
+ continue;
+ pci_write_config_word(pdev, 2*i, saved_config_space[i]);
+ }
+ wmb();
+ pci_write_config_word(pdev, 4, saved_config_space[2]);
+
+ return 0;
+}
+
/*
* This is it. Find all the controllers and register them. I really hate
* stealing all these major device numbers.
@@ -3404,6 +3603,24 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
int dac, return_code;
InquiryData_struct *inq_buff = NULL;
+ if (reset_devices) {
+ /* Reset the controller with a PCI power-cycle */
+ if (cciss_hard_reset_controller(pdev) || cciss_reset_msi(pdev))
+ return -ENODEV;
+
+ /* Some devices (notably the HP Smart Array 5i Controller)
+ need a little pause here */
+ schedule_timeout_uninterruptible(30*HZ);
+
+ /* Now try to get the controller to respond to a no-op */
+ for (i=0; i<12; i++) {
+ if (cciss_noop(pdev) == 0)
+ break;
+ else
+ printk("cciss: no-op failed%s\n", (i < 11 ? "; re-trying" : ""));
+ }
+ }
+
i = alloc_cciss_hba();
if (i < 0)
return -1;
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH 1/1] cciss: PCI power management reset for kexec
2009-02-07 19:39 [PATCH 1/1] cciss: PCI power management reset for kexec Mike Miller
@ 2009-02-10 22:55 ` Randy Dunlap
2009-02-11 15:21 ` Miller, Mike (OS Dev)
0 siblings, 1 reply; 3+ messages in thread
From: Randy Dunlap @ 2009-02-10 22:55 UTC (permalink / raw)
To: Mike Miller; +Cc: Andrew Morton, Jens Axboe, LKML, LKML-scsi
Mike Miller wrote:
> Patch 1 of 1
>
> This patch provides the better "kick-in-the-pants" on driver load in a
> kexec'ed environment.
>
> I've successfully sanity tested the port in my lab. Randy, please apply and
> test. You seem to be able to bring out the worst in the driver. ;-)
Hi Mike,
I've booted this successfully (new kernel thru kexec) 4-5 times successfully,
which doesn't prove a whole lot since the failure is intermittent.
Anyway, I'll continue to apply this patch in my daily kernel testing...
Thanks.
> Author: Chip Coldwell <coldwell@redhat.com>
>
> CCISS: Use PCI power management to reset the controller
>
> The kexec kernel resets the CCISS hardware in three steps:
>
> 1. Use PCI power management states to reset the controller
> in the kexec kernel.
> 2. Clear the MSI/MSI-X bits in PCI configuration space so
> that MSI initialization in the kexec kernel doesn't fail.
> 3. Use the CCISS "No-op" message to determine when the
> controller firmware has recovered from the PCI PM reset.
>
> Signed-off-by: Mike Miller <mike.miller@hp.com>
>
> -------------------------------------------------------------------------------
> diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
> index 01e6938..ff4a105 100644
> --- a/drivers/block/cciss.c
> +++ b/drivers/block/cciss.c
> @@ -3390,6 +3390,205 @@ static void free_hba(int i)
> kfree(p);
> }
>
> +/* Send a message CDB to the firmware. */
> +static __devinit int cciss_message(struct pci_dev *pdev, unsigned char opcode, unsigned char type)
> +{
> + typedef struct {
> + CommandListHeader_struct CommandHeader;
> + RequestBlock_struct Request;
> + ErrDescriptor_struct ErrorDescriptor;
> + } Command;
> + static const size_t cmd_sz = sizeof(Command) + sizeof(ErrorInfo_struct);
> + Command *cmd;
> + dma_addr_t paddr64;
> + uint32_t paddr32, tag;
> + void __iomem *vaddr;
> + int i, err;
> +
> + vaddr = ioremap_nocache(pci_resource_start(pdev, 0), pci_resource_len(pdev, 0));
> + if (vaddr == NULL)
> + return -ENOMEM;
> +
> + /* The Inbound Post Queue only accepts 32-bit physical addresses for the
> + CCISS commands, so they must be allocated from the lower 4GiB of
> + memory. */
> + err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
> + if (err) {
> + iounmap(vaddr);
> + return -ENOMEM;
> + }
> +
> + cmd = pci_alloc_consistent(pdev, cmd_sz, &paddr64);
> + if (cmd == NULL) {
> + iounmap(vaddr);
> + return -ENOMEM;
> + }
> +
> + /* This must fit, because of the 32-bit consistent DMA mask. Also,
> + although there's no guarantee, we assume that the address is at
> + least 4-byte aligned (most likely, it's page-aligned). */
> + paddr32 = paddr64;
> +
> + cmd->CommandHeader.ReplyQueue = 0;
> + cmd->CommandHeader.SGList = 0;
> + cmd->CommandHeader.SGTotal = 0;
> + cmd->CommandHeader.Tag.lower = paddr32;
> + cmd->CommandHeader.Tag.upper = 0;
> + memset(&cmd->CommandHeader.LUN.LunAddrBytes, 0, 8);
> +
> + cmd->Request.CDBLen = 16;
> + cmd->Request.Type.Type = TYPE_MSG;
> + cmd->Request.Type.Attribute = ATTR_HEADOFQUEUE;
> + cmd->Request.Type.Direction = XFER_NONE;
> + cmd->Request.Timeout = 0; /* Don't time out */
> + cmd->Request.CDB[0] = opcode;
> + cmd->Request.CDB[1] = type;
> + memset(&cmd->Request.CDB[2], 0, 14); /* the rest of the CDB is reserved */
> +
> + cmd->ErrorDescriptor.Addr.lower = paddr32 + sizeof(Command);
> + cmd->ErrorDescriptor.Addr.upper = 0;
> + cmd->ErrorDescriptor.Len = sizeof(ErrorInfo_struct);
> +
> + writel(paddr32, vaddr + SA5_REQUEST_PORT_OFFSET);
> +
> + for (i = 0; i < 10; i++) {
> + tag = readl(vaddr + SA5_REPLY_PORT_OFFSET);
> + if ((tag & ~3) == paddr32)
> + break;
> + schedule_timeout_uninterruptible(HZ);
> + }
> +
> + iounmap(vaddr);
> +
> + /* we leak the DMA buffer here ... no choice since the controller could
> + still complete the command. */
> + if (i == 10) {
> + printk(KERN_ERR "cciss: controller message %02x:%02x timed out\n",
> + opcode, type);
> + return -ETIMEDOUT;
> + }
> +
> + pci_free_consistent(pdev, cmd_sz, cmd, paddr64);
> +
> + if (tag & 2) {
> + printk(KERN_ERR "cciss: controller message %02x:%02x failed\n",
> + opcode, type);
> + return -EIO;
> + }
> +
> + printk(KERN_INFO "cciss: controller message %02x:%02x succeeded\n",
> + opcode, type);
> + return 0;
> +}
> +
> +#define cciss_soft_reset_controller(p) cciss_message(p, 1, 0)
> +#define cciss_noop(p) cciss_message(p, 3, 0)
> +
> +static __devinit int cciss_reset_msi(struct pci_dev *pdev)
> +{
> +/* the #defines are stolen from drivers/pci/msi.h. */
> +#define msi_control_reg(base) (base + PCI_MSI_FLAGS)
> +#define PCI_MSIX_FLAGS_ENABLE (1 << 15)
> +
> + int pos;
> + u16 control = 0;
> +
> + pos = pci_find_capability(pdev, PCI_CAP_ID_MSI);
> + if (pos) {
> + pci_read_config_word(pdev, msi_control_reg(pos), &control);
> + if (control & PCI_MSI_FLAGS_ENABLE) {
> + printk(KERN_INFO "cciss: resetting MSI\n");
> + pci_write_config_word(pdev, msi_control_reg(pos), control & ~PCI_MSI_FLAGS_ENABLE);
> + }
> + }
> +
> + pos = pci_find_capability(pdev, PCI_CAP_ID_MSIX);
> + if (pos) {
> + pci_read_config_word(pdev, msi_control_reg(pos), &control);
> + if (control & PCI_MSIX_FLAGS_ENABLE) {
> + printk(KERN_INFO "cciss: resetting MSI-X\n");
> + pci_write_config_word(pdev, msi_control_reg(pos), control & ~PCI_MSIX_FLAGS_ENABLE);
> + }
> + }
> +
> + return 0;
> +}
> +
> +/* This does a hard reset of the controller using PCI power management
> + * states. */
> +static __devinit int cciss_hard_reset_controller(struct pci_dev *pdev)
> +{
> + u16 pmcsr, saved_config_space[32];
> + int i, pos;
> +
> + printk(KERN_INFO "cciss: using PCI PM to reset controller\n");
> +
> + /* This is very nearly the same thing as
> +
> + pci_save_state(pci_dev);
> + pci_set_power_state(pci_dev, PCI_D3hot);
> + pci_set_power_state(pci_dev, PCI_D0);
> + pci_restore_state(pci_dev);
> +
> + but we can't use these nice canned kernel routines on
> + kexec, because they also check the MSI/MSI-X state in PCI
> + configuration space and do the wrong thing when it is
> + set/cleared. Also, the pci_save/restore_state functions
> + violate the ordering requirements for restoring the
> + configuration space from the CCISS document (see the
> + comment below). So we roll our own .... */
> +
> + for (i = 0; i < 32; i++)
> + pci_read_config_word(pdev, 2*i, &saved_config_space[i]);
> +
> + pos = pci_find_capability(pdev, PCI_CAP_ID_PM);
> + if (pos == 0) {
> + printk(KERN_ERR "cciss_reset_controller: PCI PM not supported\n");
> + return -ENODEV;
> + }
> +
> + /* Quoting from the Open CISS Specification: "The Power
> + * Management Control/Status Register (CSR) controls the power
> + * state of the device. The normal operating state is D0,
> + * CSR=00h. The software off state is D3, CSR=03h. To reset
> + * the controller, place the interface device in D3 then to
> + * D0, this causes a secondary PCI reset which will reset the
> + * controller." */
> +
> + /* enter the D3hot power management state */
> + pci_read_config_word(pdev, pos + PCI_PM_CTRL, &pmcsr);
> + pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
> + pmcsr |= PCI_D3hot;
> + pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
> +
> + set_current_state(TASK_UNINTERRUPTIBLE);
> + schedule_timeout(HZ >> 1);
> +
> + /* enter the D0 power management state */
> + pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
> + pmcsr |= PCI_D0;
> + pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
> +
> + set_current_state(TASK_UNINTERRUPTIBLE);
> + schedule_timeout(HZ >> 1);
> +
> + /* Restore the PCI configuration space. The Open CISS
> + * Specification says, "Restore the PCI Configuration
> + * Registers, offsets 00h through 60h. It is important to
> + * restore the command register, 16-bits at offset 04h,
> + * last. Do not restore the configuration status register,
> + * 16-bits at offset 06h." Note that the offset is 2*i. */
> + for (i = 0; i < 32; i++) {
> + if (i == 2 || i == 3)
> + continue;
> + pci_write_config_word(pdev, 2*i, saved_config_space[i]);
> + }
> + wmb();
> + pci_write_config_word(pdev, 4, saved_config_space[2]);
> +
> + return 0;
> +}
> +
> /*
> * This is it. Find all the controllers and register them. I really hate
> * stealing all these major device numbers.
> @@ -3404,6 +3603,24 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
> int dac, return_code;
> InquiryData_struct *inq_buff = NULL;
>
> + if (reset_devices) {
> + /* Reset the controller with a PCI power-cycle */
> + if (cciss_hard_reset_controller(pdev) || cciss_reset_msi(pdev))
> + return -ENODEV;
> +
> + /* Some devices (notably the HP Smart Array 5i Controller)
> + need a little pause here */
> + schedule_timeout_uninterruptible(30*HZ);
> +
> + /* Now try to get the controller to respond to a no-op */
> + for (i=0; i<12; i++) {
> + if (cciss_noop(pdev) == 0)
> + break;
> + else
> + printk("cciss: no-op failed%s\n", (i < 11 ? "; re-trying" : ""));
> + }
> + }
> +
> i = alloc_cciss_hba();
> if (i < 0)
> return -1;
--
~Randy
^ permalink raw reply [flat|nested] 3+ messages in thread
* RE: [PATCH 1/1] cciss: PCI power management reset for kexec
2009-02-10 22:55 ` Randy Dunlap
@ 2009-02-11 15:21 ` Miller, Mike (OS Dev)
0 siblings, 0 replies; 3+ messages in thread
From: Miller, Mike (OS Dev) @ 2009-02-11 15:21 UTC (permalink / raw)
To: Randy Dunlap; +Cc: Andrew Morton, Jens Axboe, LKML, LKML-scsi
Randy wrote:
> -----Original Message-----
> From: Randy Dunlap [mailto:randy.dunlap@oracle.com]
> Sent: Tuesday, February 10, 2009 4:55 PM
> To: Miller, Mike (OS Dev)
> Cc: Andrew Morton; Jens Axboe; LKML; LKML-scsi
> Subject: Re: [PATCH 1/1] cciss: PCI power management reset for kexec
>
> Mike Miller wrote:
> > Patch 1 of 1
> >
> > This patch provides the better "kick-in-the-pants" on
> driver load in a
> > kexec'ed environment.
> >
> > I've successfully sanity tested the port in my lab. Randy, please
> > apply and test. You seem to be able to bring out the worst in the
> > driver. ;-)
>
> Hi Mike,
>
> I've booted this successfully (new kernel thru kexec) 4-5
> times successfully, which doesn't prove a whole lot since the
> failure is intermittent.
> Anyway, I'll continue to apply this patch in my daily kernel
> testing...
>
> Thanks.
Thanks for the update, Randy.
>
> > Author: Chip Coldwell <coldwell@redhat.com>
> >
> > CCISS: Use PCI power management to reset the controller
> >
> > The kexec kernel resets the CCISS hardware in three steps:
> >
> > 1. Use PCI power management states to reset the controller
> > in the kexec kernel.
> > 2. Clear the MSI/MSI-X bits in PCI configuration space so
> > that MSI initialization in the kexec kernel doesn't fail.
> > 3. Use the CCISS "No-op" message to determine when the
> > controller firmware has recovered from the PCI PM reset.
> >
> > Signed-off-by: Mike Miller <mike.miller@hp.com>
> >
> >
> ----------------------------------------------------------------------
> > --------- diff --git a/drivers/block/cciss.c
> b/drivers/block/cciss.c
> > index 01e6938..ff4a105 100644
> > --- a/drivers/block/cciss.c
> > +++ b/drivers/block/cciss.c
> > @@ -3390,6 +3390,205 @@ static void free_hba(int i)
> > kfree(p);
> > }
> >
> > +/* Send a message CDB to the firmware. */ static __devinit int
> > +cciss_message(struct pci_dev *pdev, unsigned char opcode, unsigned
> > +char type) {
> > + typedef struct {
> > + CommandListHeader_struct CommandHeader;
> > + RequestBlock_struct Request;
> > + ErrDescriptor_struct ErrorDescriptor;
> > + } Command;
> > + static const size_t cmd_sz = sizeof(Command) +
> sizeof(ErrorInfo_struct);
> > + Command *cmd;
> > + dma_addr_t paddr64;
> > + uint32_t paddr32, tag;
> > + void __iomem *vaddr;
> > + int i, err;
> > +
> > + vaddr = ioremap_nocache(pci_resource_start(pdev, 0),
> pci_resource_len(pdev, 0));
> > + if (vaddr == NULL)
> > + return -ENOMEM;
> > +
> > + /* The Inbound Post Queue only accepts 32-bit physical
> addresses for the
> > + CCISS commands, so they must be allocated from the
> lower 4GiB of
> > + memory. */
> > + err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
> > + if (err) {
> > + iounmap(vaddr);
> > + return -ENOMEM;
> > + }
> > +
> > + cmd = pci_alloc_consistent(pdev, cmd_sz, &paddr64);
> > + if (cmd == NULL) {
> > + iounmap(vaddr);
> > + return -ENOMEM;
> > + }
> > +
> > + /* This must fit, because of the 32-bit consistent DMA
> mask. Also,
> > + although there's no guarantee, we assume that the
> address is at
> > + least 4-byte aligned (most likely, it's page-aligned). */
> > + paddr32 = paddr64;
> > +
> > + cmd->CommandHeader.ReplyQueue = 0;
> > + cmd->CommandHeader.SGList = 0;
> > + cmd->CommandHeader.SGTotal = 0;
> > + cmd->CommandHeader.Tag.lower = paddr32;
> > + cmd->CommandHeader.Tag.upper = 0;
> > + memset(&cmd->CommandHeader.LUN.LunAddrBytes, 0, 8);
> > +
> > + cmd->Request.CDBLen = 16;
> > + cmd->Request.Type.Type = TYPE_MSG;
> > + cmd->Request.Type.Attribute = ATTR_HEADOFQUEUE;
> > + cmd->Request.Type.Direction = XFER_NONE;
> > + cmd->Request.Timeout = 0; /* Don't time out */
> > + cmd->Request.CDB[0] = opcode;
> > + cmd->Request.CDB[1] = type;
> > + memset(&cmd->Request.CDB[2], 0, 14); /* the rest of the CDB is
> > +reserved */
> > +
> > + cmd->ErrorDescriptor.Addr.lower = paddr32 + sizeof(Command);
> > + cmd->ErrorDescriptor.Addr.upper = 0;
> > + cmd->ErrorDescriptor.Len = sizeof(ErrorInfo_struct);
> > +
> > + writel(paddr32, vaddr + SA5_REQUEST_PORT_OFFSET);
> > +
> > + for (i = 0; i < 10; i++) {
> > + tag = readl(vaddr + SA5_REPLY_PORT_OFFSET);
> > + if ((tag & ~3) == paddr32)
> > + break;
> > + schedule_timeout_uninterruptible(HZ);
> > + }
> > +
> > + iounmap(vaddr);
> > +
> > + /* we leak the DMA buffer here ... no choice since the
> controller could
> > + still complete the command. */
> > + if (i == 10) {
> > + printk(KERN_ERR "cciss: controller message
> %02x:%02x timed out\n",
> > + opcode, type);
> > + return -ETIMEDOUT;
> > + }
> > +
> > + pci_free_consistent(pdev, cmd_sz, cmd, paddr64);
> > +
> > + if (tag & 2) {
> > + printk(KERN_ERR "cciss: controller message
> %02x:%02x failed\n",
> > + opcode, type);
> > + return -EIO;
> > + }
> > +
> > + printk(KERN_INFO "cciss: controller message %02x:%02x
> succeeded\n",
> > + opcode, type);
> > + return 0;
> > +}
> > +
> > +#define cciss_soft_reset_controller(p) cciss_message(p, 1,
> 0) #define
> > +cciss_noop(p) cciss_message(p, 3, 0)
> > +
> > +static __devinit int cciss_reset_msi(struct pci_dev *pdev) {
> > +/* the #defines are stolen from drivers/pci/msi.h. */
> > +#define msi_control_reg(base) (base + PCI_MSI_FLAGS)
> > +#define PCI_MSIX_FLAGS_ENABLE (1 << 15)
> > +
> > + int pos;
> > + u16 control = 0;
> > +
> > + pos = pci_find_capability(pdev, PCI_CAP_ID_MSI);
> > + if (pos) {
> > + pci_read_config_word(pdev,
> msi_control_reg(pos), &control);
> > + if (control & PCI_MSI_FLAGS_ENABLE) {
> > + printk(KERN_INFO "cciss: resetting MSI\n");
> > + pci_write_config_word(pdev,
> msi_control_reg(pos), control & ~PCI_MSI_FLAGS_ENABLE);
> > + }
> > + }
> > +
> > + pos = pci_find_capability(pdev, PCI_CAP_ID_MSIX);
> > + if (pos) {
> > + pci_read_config_word(pdev,
> msi_control_reg(pos), &control);
> > + if (control & PCI_MSIX_FLAGS_ENABLE) {
> > + printk(KERN_INFO "cciss: resetting MSI-X\n");
> > + pci_write_config_word(pdev,
> msi_control_reg(pos), control & ~PCI_MSIX_FLAGS_ENABLE);
> > + }
> > + }
> > +
> > + return 0;
> > +}
> > +
> > +/* This does a hard reset of the controller using PCI power
> > +management
> > + * states. */
> > +static __devinit int cciss_hard_reset_controller(struct pci_dev
> > +*pdev) {
> > + u16 pmcsr, saved_config_space[32];
> > + int i, pos;
> > +
> > + printk(KERN_INFO "cciss: using PCI PM to reset controller\n");
> > +
> > + /* This is very nearly the same thing as
> > +
> > + pci_save_state(pci_dev);
> > + pci_set_power_state(pci_dev, PCI_D3hot);
> > + pci_set_power_state(pci_dev, PCI_D0);
> > + pci_restore_state(pci_dev);
> > +
> > + but we can't use these nice canned kernel routines on
> > + kexec, because they also check the MSI/MSI-X state in PCI
> > + configuration space and do the wrong thing when it is
> > + set/cleared. Also, the pci_save/restore_state functions
> > + violate the ordering requirements for restoring the
> > + configuration space from the CCISS document (see the
> > + comment below). So we roll our own .... */
> > +
> > + for (i = 0; i < 32; i++)
> > + pci_read_config_word(pdev, 2*i, &saved_config_space[i]);
> > +
> > + pos = pci_find_capability(pdev, PCI_CAP_ID_PM);
> > + if (pos == 0) {
> > + printk(KERN_ERR "cciss_reset_controller: PCI PM
> not supported\n");
> > + return -ENODEV;
> > + }
> > +
> > + /* Quoting from the Open CISS Specification: "The Power
> > + * Management Control/Status Register (CSR) controls the power
> > + * state of the device. The normal operating state is D0,
> > + * CSR=00h. The software off state is D3, CSR=03h. To reset
> > + * the controller, place the interface device in D3 then to
> > + * D0, this causes a secondary PCI reset which will reset the
> > + * controller." */
> > +
> > + /* enter the D3hot power management state */
> > + pci_read_config_word(pdev, pos + PCI_PM_CTRL, &pmcsr);
> > + pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
> > + pmcsr |= PCI_D3hot;
> > + pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
> > +
> > + set_current_state(TASK_UNINTERRUPTIBLE);
> > + schedule_timeout(HZ >> 1);
> > +
> > + /* enter the D0 power management state */
> > + pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
> > + pmcsr |= PCI_D0;
> > + pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
> > +
> > + set_current_state(TASK_UNINTERRUPTIBLE);
> > + schedule_timeout(HZ >> 1);
> > +
> > + /* Restore the PCI configuration space. The Open CISS
> > + * Specification says, "Restore the PCI Configuration
> > + * Registers, offsets 00h through 60h. It is important to
> > + * restore the command register, 16-bits at offset 04h,
> > + * last. Do not restore the configuration status register,
> > + * 16-bits at offset 06h." Note that the offset is 2*i. */
> > + for (i = 0; i < 32; i++) {
> > + if (i == 2 || i == 3)
> > + continue;
> > + pci_write_config_word(pdev, 2*i, saved_config_space[i]);
> > + }
> > + wmb();
> > + pci_write_config_word(pdev, 4, saved_config_space[2]);
> > +
> > + return 0;
> > +}
> > +
> > /*
> > * This is it. Find all the controllers and register
> them. I really hate
> > * stealing all these major device numbers.
> > @@ -3404,6 +3603,24 @@ static int __devinit
> cciss_init_one(struct pci_dev *pdev,
> > int dac, return_code;
> > InquiryData_struct *inq_buff = NULL;
> >
> > + if (reset_devices) {
> > + /* Reset the controller with a PCI power-cycle */
> > + if (cciss_hard_reset_controller(pdev) ||
> cciss_reset_msi(pdev))
> > + return -ENODEV;
> > +
> > + /* Some devices (notably the HP Smart Array 5i
> Controller)
> > + need a little pause here */
> > + schedule_timeout_uninterruptible(30*HZ);
> > +
> > + /* Now try to get the controller to respond to
> a no-op */
> > + for (i=0; i<12; i++) {
> > + if (cciss_noop(pdev) == 0)
> > + break;
> > + else
> > + printk("cciss: no-op
> failed%s\n", (i < 11 ? "; re-trying" : ""));
> > + }
> > + }
> > +
> > i = alloc_cciss_hba();
> > if (i < 0)
> > return -1;
>
>
> --
> ~Randy
>
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2009-02-11 15:22 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-02-07 19:39 [PATCH 1/1] cciss: PCI power management reset for kexec Mike Miller
2009-02-10 22:55 ` Randy Dunlap
2009-02-11 15:21 ` Miller, Mike (OS Dev)
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox