From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-9.0 required=3.0 tests=HEADER_FROM_DIFFERENT_DOMAINS, INCLUDES_PATCH,MAILING_LIST_MULTI,SIGNED_OFF_BY,SPF_PASS,URIBL_BLOCKED, USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id F07EAC43381 for ; Fri, 22 Feb 2019 01:07:49 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id BB5482080F for ; Fri, 22 Feb 2019 01:07:49 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726749AbfBVBHs (ORCPT ); Thu, 21 Feb 2019 20:07:48 -0500 Received: from mga06.intel.com ([134.134.136.31]:22794 "EHLO mga06.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726067AbfBVBHr (ORCPT ); Thu, 21 Feb 2019 20:07:47 -0500 X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga007.fm.intel.com ([10.253.24.52]) by orsmga104.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 21 Feb 2019 17:07:47 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.58,397,1544515200"; d="scan'208";a="124288743" Received: from debian-vmd.lm.intel.com ([10.232.112.218]) by fmsmga007.fm.intel.com with ESMTP; 21 Feb 2019 17:07:46 -0800 From: Jon Derrick To: Cc: Keith Busch , Jens Axboe , Christoph Hellwig , Sagi Grimberg , Alex Gagniuc , , Jon Derrick Subject: [PATCH] nvme-pci: Prevent mmio reads if pci channel offline Date: Thu, 21 Feb 2019 18:05:02 -0700 Message-Id: <20190222010502.2434-1-jonathan.derrick@intel.com> X-Mailer: git-send-email 2.19.1 MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Sender: linux-kernel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Some platforms don't seem to easily tolerate non-posted mmio reads on lost (hot removed) devices. This has been noted in previous modifications to other layers where an mmio read to a lost device could cause an undesired firmware intervention [1][2]. This patch reworks the nvme-pci reads to quickly check connectivity prior to reading the BAR. The intent is to prevent a non-posted mmio read which would definitely result in an error condition of some sort. There is, of course, a chance the link becomes disconnected between the check and the read. Like other similar checks, it is only intended to reduce the likelihood of encountering these issues and not fully close the gap. mmio writes are posted and in the fast path and have been left as-is. [1] https://lkml.org/lkml/2018/7/30/858 [2] https://lkml.org/lkml/2018/9/18/1546 Signed-off-by: Jon Derrick --- drivers/nvme/host/pci.c | 75 ++++++++++++++++++++++++++++++----------- 1 file changed, 56 insertions(+), 19 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index f54718b63637..e555ac2afacd 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1264,6 +1264,33 @@ static void nvme_warn_reset(struct nvme_dev *dev, u32 csts) csts, result); } +static int nvme_reg_readl(struct nvme_dev *dev, u32 off, u32 *val) +{ + if (pci_channel_offline(to_pci_dev(dev->dev))) + return -ENODEV; + + *val = readl(dev->bar + off); + return 0; +} + +static int nvme_reg_readq(struct nvme_dev *dev, u32 off, u64 *val) +{ + if (pci_channel_offline(to_pci_dev(dev->dev))) + return -ENODEV; + + *val = readq(dev->bar + off); + return 0; +} + +static int nvme_reg_lo_hi_readq(struct nvme_dev *dev, u32 off, u64 *val) +{ + if (pci_channel_offline(to_pci_dev(dev->dev))) + return -ENODEV; + + *val = lo_hi_readq(dev->bar + off); + return 0; +} + static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); @@ -1271,13 +1298,13 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) struct nvme_dev *dev = nvmeq->dev; struct request *abort_req; struct nvme_command cmd; - u32 csts = readl(dev->bar + NVME_REG_CSTS); + u32 csts; /* If PCI error recovery process is happening, we cannot reset or * the recovery mechanism will surely fail. */ mb(); - if (pci_channel_offline(to_pci_dev(dev->dev))) + if (nvme_reg_readl(dev, NVME_REG_CSTS, &csts)) return BLK_EH_RESET_TIMER; /* @@ -1692,18 +1719,24 @@ static int nvme_remap_bar(struct nvme_dev *dev, unsigned long size) static int nvme_pci_configure_admin_queue(struct nvme_dev *dev) { int result; - u32 aqa; + u32 csts, vs, aqa; struct nvme_queue *nvmeq; result = nvme_remap_bar(dev, db_bar_size(dev, 0)); if (result < 0) return result; - dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1, 0) ? - NVME_CAP_NSSRC(dev->ctrl.cap) : 0; + result = nvme_reg_readl(dev, NVME_REG_CSTS, &csts); + if (result) + return result; + + result = nvme_reg_readl(dev, NVME_REG_VS, &vs); + if (result) + return result; - if (dev->subsystem && - (readl(dev->bar + NVME_REG_CSTS) & NVME_CSTS_NSSRO)) + dev->subsystem = (vs >= NVME_VS(1, 1, 0)) ? + NVME_CAP_NSSRC(dev->ctrl.cap) : 0; + if (dev->subsystem && (csts & NVME_CSTS_NSSRO)) writel(NVME_CSTS_NSSRO, dev->bar + NVME_REG_CSTS); result = nvme_disable_ctrl(&dev->ctrl, dev->ctrl.cap); @@ -1808,10 +1841,11 @@ static void nvme_map_cmb(struct nvme_dev *dev) if (dev->cmb_size) return; - dev->cmbsz = readl(dev->bar + NVME_REG_CMBSZ); - if (!dev->cmbsz) + if (nvme_reg_readl(dev, NVME_REG_CMBSZ, &dev->cmbsz) || !dev->cmbsz) + return; + + if (nvme_reg_readl(dev, NVME_REG_CMBLOC, &dev->cmbloc)) return; - dev->cmbloc = readl(dev->bar + NVME_REG_CMBLOC); size = nvme_cmb_size_unit(dev) * nvme_cmb_size(dev); offset = nvme_cmb_size_unit(dev) * NVME_CMB_OFST(dev->cmbloc); @@ -2357,6 +2391,7 @@ static int nvme_pci_enable(struct nvme_dev *dev) { int result = -ENOMEM; struct pci_dev *pdev = to_pci_dev(dev->dev); + u32 csts; if (pci_enable_device_mem(pdev)) return result; @@ -2367,7 +2402,7 @@ static int nvme_pci_enable(struct nvme_dev *dev) dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(32))) goto disable; - if (readl(dev->bar + NVME_REG_CSTS) == -1) { + if (nvme_reg_readl(dev, NVME_REG_CSTS, &csts) || csts == -1) { result = -ENODEV; goto disable; } @@ -2381,7 +2416,9 @@ static int nvme_pci_enable(struct nvme_dev *dev) if (result < 0) return result; - dev->ctrl.cap = lo_hi_readq(dev->bar + NVME_REG_CAP); + result = nvme_reg_lo_hi_readq(dev, NVME_REG_CAP, &dev->ctrl.cap); + if (result) + return result; dev->q_depth = min_t(int, NVME_CAP_MQES(dev->ctrl.cap) + 1, io_queue_depth); @@ -2442,13 +2479,15 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) mutex_lock(&dev->shutdown_lock); if (pci_is_enabled(pdev)) { - u32 csts = readl(dev->bar + NVME_REG_CSTS); + u32 csts; if (dev->ctrl.state == NVME_CTRL_LIVE || dev->ctrl.state == NVME_CTRL_RESETTING) nvme_start_freeze(&dev->ctrl); - dead = !!((csts & NVME_CSTS_CFS) || !(csts & NVME_CSTS_RDY) || - pdev->error_state != pci_channel_io_normal); + + if (nvme_reg_readl(dev, NVME_REG_CSTS, &csts) == 0) + dead = !!((csts & NVME_CSTS_CFS) || + !(csts & NVME_CSTS_RDY)); } /* @@ -2661,8 +2700,7 @@ static void nvme_remove_dead_ctrl_work(struct work_struct *work) static int nvme_pci_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val) { - *val = readl(to_nvme_dev(ctrl)->bar + off); - return 0; + return nvme_reg_readl(to_nvme_dev(ctrl), off, val); } static int nvme_pci_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val) @@ -2673,8 +2711,7 @@ static int nvme_pci_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val) static int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val) { - *val = readq(to_nvme_dev(ctrl)->bar + off); - return 0; + return nvme_reg_readq(to_nvme_dev(ctrl), off, val); } static int nvme_pci_get_address(struct nvme_ctrl *ctrl, char *buf, int size) -- 2.19.1