From: Mike Miller <mike.miller@hp.com>
To: Andrew Morton <akpm@linux-foundation.org>,
Jens Axboe <jens.axboe@oracle.com>
Cc: LKML <linux-kernel@vger.kernel.org>,
randy.dunlap@oracle.com, LKML-scsi <linux-scsi@vger.kernel.org>
Subject: [PATCH 1/1] cciss: PCI power management reset for kexec
Date: Sat, 7 Feb 2009 13:39:29 -0600 [thread overview]
Message-ID: <20090207193929.GA5372@roadking.ldev.net> (raw)
Patch 1 of 1
This patch provides the better "kick-in-the-pants" on driver load in a
kexec'ed environment.
I've successfully sanity tested the port in my lab. Randy, please apply and
test. You seem to be able to bring out the worst in the driver. ;-)
Author: Chip Coldwell <coldwell@redhat.com>
CCISS: Use PCI power management to reset the controller
The kexec kernel resets the CCISS hardware in three steps:
1. Use PCI power management states to reset the controller
in the kexec kernel.
2. Clear the MSI/MSI-X bits in PCI configuration space so
that MSI initialization in the kexec kernel doesn't fail.
3. Use the CCISS "No-op" message to determine when the
controller firmware has recovered from the PCI PM reset.
Signed-off-by: Mike Miller <mike.miller@hp.com>
-------------------------------------------------------------------------------
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 01e6938..ff4a105 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -3390,6 +3390,205 @@ static void free_hba(int i)
kfree(p);
}
+/* Send a message CDB to the firmware. */
+static __devinit int cciss_message(struct pci_dev *pdev, unsigned char opcode, unsigned char type)
+{
+ typedef struct {
+ CommandListHeader_struct CommandHeader;
+ RequestBlock_struct Request;
+ ErrDescriptor_struct ErrorDescriptor;
+ } Command;
+ static const size_t cmd_sz = sizeof(Command) + sizeof(ErrorInfo_struct);
+ Command *cmd;
+ dma_addr_t paddr64;
+ uint32_t paddr32, tag;
+ void __iomem *vaddr;
+ int i, err;
+
+ vaddr = ioremap_nocache(pci_resource_start(pdev, 0), pci_resource_len(pdev, 0));
+ if (vaddr == NULL)
+ return -ENOMEM;
+
+ /* The Inbound Post Queue only accepts 32-bit physical addresses for the
+ CCISS commands, so they must be allocated from the lower 4GiB of
+ memory. */
+ err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
+ if (err) {
+ iounmap(vaddr);
+ return -ENOMEM;
+ }
+
+ cmd = pci_alloc_consistent(pdev, cmd_sz, &paddr64);
+ if (cmd == NULL) {
+ iounmap(vaddr);
+ return -ENOMEM;
+ }
+
+ /* This must fit, because of the 32-bit consistent DMA mask. Also,
+ although there's no guarantee, we assume that the address is at
+ least 4-byte aligned (most likely, it's page-aligned). */
+ paddr32 = paddr64;
+
+ cmd->CommandHeader.ReplyQueue = 0;
+ cmd->CommandHeader.SGList = 0;
+ cmd->CommandHeader.SGTotal = 0;
+ cmd->CommandHeader.Tag.lower = paddr32;
+ cmd->CommandHeader.Tag.upper = 0;
+ memset(&cmd->CommandHeader.LUN.LunAddrBytes, 0, 8);
+
+ cmd->Request.CDBLen = 16;
+ cmd->Request.Type.Type = TYPE_MSG;
+ cmd->Request.Type.Attribute = ATTR_HEADOFQUEUE;
+ cmd->Request.Type.Direction = XFER_NONE;
+ cmd->Request.Timeout = 0; /* Don't time out */
+ cmd->Request.CDB[0] = opcode;
+ cmd->Request.CDB[1] = type;
+ memset(&cmd->Request.CDB[2], 0, 14); /* the rest of the CDB is reserved */
+
+ cmd->ErrorDescriptor.Addr.lower = paddr32 + sizeof(Command);
+ cmd->ErrorDescriptor.Addr.upper = 0;
+ cmd->ErrorDescriptor.Len = sizeof(ErrorInfo_struct);
+
+ writel(paddr32, vaddr + SA5_REQUEST_PORT_OFFSET);
+
+ for (i = 0; i < 10; i++) {
+ tag = readl(vaddr + SA5_REPLY_PORT_OFFSET);
+ if ((tag & ~3) == paddr32)
+ break;
+ schedule_timeout_uninterruptible(HZ);
+ }
+
+ iounmap(vaddr);
+
+ /* we leak the DMA buffer here ... no choice since the controller could
+ still complete the command. */
+ if (i == 10) {
+ printk(KERN_ERR "cciss: controller message %02x:%02x timed out\n",
+ opcode, type);
+ return -ETIMEDOUT;
+ }
+
+ pci_free_consistent(pdev, cmd_sz, cmd, paddr64);
+
+ if (tag & 2) {
+ printk(KERN_ERR "cciss: controller message %02x:%02x failed\n",
+ opcode, type);
+ return -EIO;
+ }
+
+ printk(KERN_INFO "cciss: controller message %02x:%02x succeeded\n",
+ opcode, type);
+ return 0;
+}
+
+#define cciss_soft_reset_controller(p) cciss_message(p, 1, 0)
+#define cciss_noop(p) cciss_message(p, 3, 0)
+
+static __devinit int cciss_reset_msi(struct pci_dev *pdev)
+{
+/* the #defines are stolen from drivers/pci/msi.h. */
+#define msi_control_reg(base) (base + PCI_MSI_FLAGS)
+#define PCI_MSIX_FLAGS_ENABLE (1 << 15)
+
+ int pos;
+ u16 control = 0;
+
+ pos = pci_find_capability(pdev, PCI_CAP_ID_MSI);
+ if (pos) {
+ pci_read_config_word(pdev, msi_control_reg(pos), &control);
+ if (control & PCI_MSI_FLAGS_ENABLE) {
+ printk(KERN_INFO "cciss: resetting MSI\n");
+ pci_write_config_word(pdev, msi_control_reg(pos), control & ~PCI_MSI_FLAGS_ENABLE);
+ }
+ }
+
+ pos = pci_find_capability(pdev, PCI_CAP_ID_MSIX);
+ if (pos) {
+ pci_read_config_word(pdev, msi_control_reg(pos), &control);
+ if (control & PCI_MSIX_FLAGS_ENABLE) {
+ printk(KERN_INFO "cciss: resetting MSI-X\n");
+ pci_write_config_word(pdev, msi_control_reg(pos), control & ~PCI_MSIX_FLAGS_ENABLE);
+ }
+ }
+
+ return 0;
+}
+
+/* This does a hard reset of the controller using PCI power management
+ * states. */
+static __devinit int cciss_hard_reset_controller(struct pci_dev *pdev)
+{
+ u16 pmcsr, saved_config_space[32];
+ int i, pos;
+
+ printk(KERN_INFO "cciss: using PCI PM to reset controller\n");
+
+ /* This is very nearly the same thing as
+
+ pci_save_state(pci_dev);
+ pci_set_power_state(pci_dev, PCI_D3hot);
+ pci_set_power_state(pci_dev, PCI_D0);
+ pci_restore_state(pci_dev);
+
+ but we can't use these nice canned kernel routines on
+ kexec, because they also check the MSI/MSI-X state in PCI
+ configuration space and do the wrong thing when it is
+ set/cleared. Also, the pci_save/restore_state functions
+ violate the ordering requirements for restoring the
+ configuration space from the CCISS document (see the
+ comment below). So we roll our own .... */
+
+ for (i = 0; i < 32; i++)
+ pci_read_config_word(pdev, 2*i, &saved_config_space[i]);
+
+ pos = pci_find_capability(pdev, PCI_CAP_ID_PM);
+ if (pos == 0) {
+ printk(KERN_ERR "cciss_reset_controller: PCI PM not supported\n");
+ return -ENODEV;
+ }
+
+ /* Quoting from the Open CISS Specification: "The Power
+ * Management Control/Status Register (CSR) controls the power
+ * state of the device. The normal operating state is D0,
+ * CSR=00h. The software off state is D3, CSR=03h. To reset
+ * the controller, place the interface device in D3 then to
+ * D0, this causes a secondary PCI reset which will reset the
+ * controller." */
+
+ /* enter the D3hot power management state */
+ pci_read_config_word(pdev, pos + PCI_PM_CTRL, &pmcsr);
+ pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
+ pmcsr |= PCI_D3hot;
+ pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
+
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(HZ >> 1);
+
+ /* enter the D0 power management state */
+ pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
+ pmcsr |= PCI_D0;
+ pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
+
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(HZ >> 1);
+
+ /* Restore the PCI configuration space. The Open CISS
+ * Specification says, "Restore the PCI Configuration
+ * Registers, offsets 00h through 60h. It is important to
+ * restore the command register, 16-bits at offset 04h,
+ * last. Do not restore the configuration status register,
+ * 16-bits at offset 06h." Note that the offset is 2*i. */
+ for (i = 0; i < 32; i++) {
+ if (i == 2 || i == 3)
+ continue;
+ pci_write_config_word(pdev, 2*i, saved_config_space[i]);
+ }
+ wmb();
+ pci_write_config_word(pdev, 4, saved_config_space[2]);
+
+ return 0;
+}
+
/*
* This is it. Find all the controllers and register them. I really hate
* stealing all these major device numbers.
@@ -3404,6 +3603,24 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
int dac, return_code;
InquiryData_struct *inq_buff = NULL;
+ if (reset_devices) {
+ /* Reset the controller with a PCI power-cycle */
+ if (cciss_hard_reset_controller(pdev) || cciss_reset_msi(pdev))
+ return -ENODEV;
+
+ /* Some devices (notably the HP Smart Array 5i Controller)
+ need a little pause here */
+ schedule_timeout_uninterruptible(30*HZ);
+
+ /* Now try to get the controller to respond to a no-op */
+ for (i=0; i<12; i++) {
+ if (cciss_noop(pdev) == 0)
+ break;
+ else
+ printk("cciss: no-op failed%s\n", (i < 11 ? "; re-trying" : ""));
+ }
+ }
+
i = alloc_cciss_hba();
if (i < 0)
return -1;
next reply other threads:[~2009-02-07 19:39 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-02-07 19:39 Mike Miller [this message]
2009-02-10 22:55 ` [PATCH 1/1] cciss: PCI power management reset for kexec Randy Dunlap
2009-02-11 15:21 ` Miller, Mike (OS Dev)
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20090207193929.GA5372@roadking.ldev.net \
--to=mike.miller@hp.com \
--cc=akpm@linux-foundation.org \
--cc=jens.axboe@oracle.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-scsi@vger.kernel.org \
--cc=randy.dunlap@oracle.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.