[PATCH V3 1/2] NTB: Add AMD PCI-Express NTB driver

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH V3 1/2] NTB: Add AMD PCI-Express NTB driver
@ 2016-01-14 11:44 Xiangliang Yu
  2016-01-14 15:56 ` Allen Hubbe
  2016-01-18  4:04 ` Jon Mason
  0 siblings, 2 replies; 9+ messages in thread
From: Xiangliang Yu @ 2016-01-14 11:44 UTC (permalink / raw)
  To: jdmason, dave.jiang, Allen.Hubbe, linux-ntb, linux-kernel
  Cc: SPG_Linux_Kernel, Xiangliang Yu

AMD NTB support following main features:
(1) Three memory windows;
(2) Sixteen 32-bit scratch pad;
(3) Two 16-bit doorbell interrupt;
(4) Five event interrupts;
(5) One system can wake up opposite system of NTB;
(6) Flush previous request to the opposite system;
(7) There are reset and PME_TO mechanisms between two systems;

Signed-off-by: Xiangliang Yu <Xiangliang.Yu@amd.com>
---
 drivers/ntb/hw/Kconfig          |    1 +
 drivers/ntb/hw/Makefile         |    1 +
 drivers/ntb/hw/amd/Kconfig      |    7 +
 drivers/ntb/hw/amd/Makefile     |    1 +
 drivers/ntb/hw/amd/ntb_hw_amd.c | 1148 +++++++++++++++++++++++++++++++++++++++
 drivers/ntb/hw/amd/ntb_hw_amd.h |  246 +++++++++
 6 files changed, 1404 insertions(+)
 create mode 100644 drivers/ntb/hw/amd/Kconfig
 create mode 100644 drivers/ntb/hw/amd/Makefile
 create mode 100644 drivers/ntb/hw/amd/ntb_hw_amd.c
 create mode 100644 drivers/ntb/hw/amd/ntb_hw_amd.h

diff --git a/drivers/ntb/hw/Kconfig b/drivers/ntb/hw/Kconfig
index 4d5535c..0c5c2a6 100644
--- a/drivers/ntb/hw/Kconfig
+++ b/drivers/ntb/hw/Kconfig
@@ -1 +1,2 @@
 source "drivers/ntb/hw/intel/Kconfig"
+source "drivers/ntb/hw/amd/Kconfig"
diff --git a/drivers/ntb/hw/Makefile b/drivers/ntb/hw/Makefile
index 175d7c9..636be7d 100644
--- a/drivers/ntb/hw/Makefile
+++ b/drivers/ntb/hw/Makefile
@@ -1 +1,2 @@
 obj-$(CONFIG_NTB_INTEL)	+= intel/
+obj-$(CONFIG_NTB_AMD)	+= amd/
diff --git a/drivers/ntb/hw/amd/Kconfig b/drivers/ntb/hw/amd/Kconfig
new file mode 100644
index 0000000..cfe903c
--- /dev/null
+++ b/drivers/ntb/hw/amd/Kconfig
@@ -0,0 +1,7 @@
+config NTB_AMD
+	tristate "AMD Non-Transparent Bridge support"
+	depends on X86_64
+	help
+	 This driver supports AMD NTB on capable Zeppelin hardware.
+
+	 If unsure, say N.
diff --git a/drivers/ntb/hw/amd/Makefile b/drivers/ntb/hw/amd/Makefile
new file mode 100644
index 0000000..ad54da9
--- /dev/null
+++ b/drivers/ntb/hw/amd/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_NTB_AMD) += ntb_hw_amd.o
diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.c b/drivers/ntb/hw/amd/ntb_hw_amd.c
new file mode 100644
index 0000000..8df6d7b
--- /dev/null
+++ b/drivers/ntb/hw/amd/ntb_hw_amd.c
@@ -0,0 +1,1148 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ *   redistributing this file, you may do so under either license.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ *   Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   BSD LICENSE
+ *
+ *   Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copy
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of AMD Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * AMD PCIe NTB Linux driver
+ *
+ * Contact Information:
+ * Xiangliang Yu<Xiangliang.Yu@amd.com>
+ */
+
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/acpi.h>
+#include <linux/pci.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/ntb.h>
+
+#include "ntb_hw_amd.h"
+
+#define NTB_NAME	"ntb_hw_amd"
+#define NTB_DESC	"AMD(R) PCI-E Non-Transparent Bridge Driver"
+#define NTB_VER		"1.0"
+
+MODULE_DESCRIPTION(NTB_DESC);
+MODULE_VERSION(NTB_VER);
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("AMD Inc.");
+
+static const struct file_operations amd_ntb_debugfs_info;
+static struct dentry *debugfs_dir;
+
+static const struct ntb_dev_ops amd_ntb_ops = {
+	.mw_count		= amd_ntb_mw_count,
+	.mw_get_range		= amd_ntb_mw_get_range,
+	.mw_set_trans		= amd_ntb_mw_set_trans,
+	.link_is_up		= amd_ntb_link_is_up,
+	.link_enable		= amd_ntb_link_enable,
+	.link_disable		= amd_ntb_link_disable,
+	.db_valid_mask		= amd_ntb_db_valid_mask,
+	.db_vector_count	= amd_ntb_db_vector_count,
+	.db_vector_mask		= amd_ntb_db_vector_mask,
+	.db_read		= amd_ntb_db_read,
+	.db_clear		= amd_ntb_db_clear,
+	.db_set_mask		= amd_ntb_db_set_mask,
+	.db_clear_mask		= amd_ntb_db_clear_mask,
+	.peer_db_addr		= amd_ntb_peer_db_addr,
+	.peer_db_set		= amd_ntb_peer_db_set,
+	.spad_count		= amd_ntb_spad_count,
+	.spad_read		= amd_ntb_spad_read,
+	.spad_write		= amd_ntb_spad_write,
+	.peer_spad_addr		= amd_ntb_peer_spad_addr,
+	.peer_spad_read		= amd_ntb_peer_spad_read,
+	.peer_spad_write	= amd_ntb_peer_spad_write,
+};
+
+static int ndev_mw_to_bar(struct amd_ntb_dev *ndev, int idx)
+{
+	if (idx < 0 || idx > ndev->mw_count)
+		return -EINVAL;
+
+	return 1 << idx;
+}
+
+static int amd_ntb_mw_count(struct ntb_dev *ntb)
+{
+	return ntb_ndev(ntb)->mw_count;
+}
+
+static int amd_ntb_mw_get_range(struct ntb_dev *ntb, int idx,
+				phys_addr_t *base,
+				resource_size_t *size,
+				resource_size_t *align,
+				resource_size_t *align_size)
+{
+	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+	int bar;
+
+	bar = ndev_mw_to_bar(ndev, idx);
+	if (bar < 0)
+		return bar;
+
+	if (base)
+		*base = pci_resource_start(ndev->ntb.pdev, bar);
+
+	if (size)
+		*size = pci_resource_len(ndev->ntb.pdev, bar);
+
+	if (align)
+		*align = 4096;
+
+	if (align_size)
+		*align_size = 1;
+
+	return 0;
+}
+
+static int amd_ntb_mw_set_trans(struct ntb_dev *ntb, int idx,
+				dma_addr_t addr, resource_size_t size)
+{
+	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+	unsigned long xlat_reg, limit_reg = 0;
+	resource_size_t mw_size;
+	void __iomem *mmio, *peer_mmio;
+	u64 base_addr, limit, reg_val;
+	int bar;
+
+	bar = ndev_mw_to_bar(ndev, idx);
+	if (bar < 0)
+		return bar;
+
+	mw_size = pci_resource_len(ndev->ntb.pdev, bar);
+
+	/* make sure the range fits in the usable mw size */
+	if (size > mw_size)
+		return -EINVAL;
+
+	mmio = ndev->self_mmio;
+	peer_mmio = ndev->peer_mmio;
+
+	base_addr = pci_resource_start(ndev->ntb.pdev, bar);
+	if (bar == 1) {
+		xlat_reg = AMD_BAR1XLAT_OFFSET;
+		limit_reg = AMD_BAR1LMT_OFFSET;
+	} else {
+		xlat_reg = AMD_BAR23XLAT_OFFSET + ((bar - 2) << 3);
+		limit_reg = AMD_BAR23LMT_OFFSET + ((bar - 2) << 3);
+	}
+
+	if (bar != 1) {
+		/* Set the limit if supported */
+		limit = base_addr + size;
+
+		/* set and verify setting the translation address */
+		iowrite64(addr, peer_mmio + xlat_reg);
+		reg_val = ioread64(peer_mmio + xlat_reg);
+		if (reg_val != addr) {
+			iowrite64(0, peer_mmio + xlat_reg);
+			return -EIO;
+		}
+
+		/* set and verify setting the limit */
+		iowrite64(limit, mmio + limit_reg);
+		reg_val = ioread64(mmio + limit_reg);
+		if (reg_val != limit) {
+			iowrite64(base_addr, mmio + limit_reg);
+			iowrite64(0, peer_mmio + xlat_reg);
+			return -EIO;
+		}
+	} else {
+		/* split bar addr range must all be 32 bit */
+		if (addr & (~0ull << 32))
+			return -EINVAL;
+		if ((addr + size) & (~0ull << 32))
+			return -EINVAL;
+
+		/* Set the limit if supported */
+		limit = base_addr + size;
+
+		/* set and verify setting the translation address */
+		iowrite64(addr, peer_mmio + xlat_reg);
+		reg_val = ioread64(peer_mmio + xlat_reg);
+		if (reg_val != addr) {
+			iowrite64(0, peer_mmio + xlat_reg);
+			return -EIO;
+		}
+
+		/* set and verify setting the limit */
+		writel(limit, mmio + limit_reg);
+		reg_val = readl(mmio + limit_reg);
+		if (reg_val != limit) {
+			writel(base_addr, mmio + limit_reg);
+			writel(0, peer_mmio + xlat_reg);
+			return -EIO;
+		}
+	}
+
+	return 0;
+}
+
+static int amd_link_is_up(struct amd_ntb_dev *ndev)
+{
+	/* set link down and clear flag */
+	if (ndev->peer_sta & AMD_PEER_RESET_EVENT) {
+		ndev->peer_sta &= ~AMD_PEER_RESET_EVENT;
+		return 0;
+	} else if (ndev->peer_sta & (AMD_PEER_D3_EVENT |
+				     AMD_PEER_PMETO_EVENT)) {
+		return 0;
+	} else if (ndev->peer_sta & AMD_PEER_D0_EVENT) {
+		ndev->peer_sta = 0;
+		return 0;
+	} else
+		return NTB_LNK_STA_ACTIVE(ndev->cntl_sta);
+}
+
+static int amd_ntb_link_is_up(struct ntb_dev *ntb,
+			      enum ntb_speed *speed,
+			      enum ntb_width *width)
+{
+	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+	int ret = 0;
+
+	if (amd_link_is_up(ndev)) {
+		if (speed)
+			*speed = NTB_LNK_STA_SPEED(ndev->lnk_sta);
+		if (width)
+			*width = NTB_LNK_STA_WIDTH(ndev->lnk_sta);
+
+		dev_dbg(ndev_dev(ndev), "link is up.\n");
+
+		ret = 1;
+	} else {
+		if (speed)
+			*speed = NTB_SPEED_NONE;
+		if (width)
+			*width = NTB_WIDTH_NONE;
+
+		dev_dbg(ndev_dev(ndev), "link is down.\n");
+	}
+
+	return ret;
+}
+
+static int amd_ntb_link_enable(struct ntb_dev *ntb,
+			       enum ntb_speed max_speed,
+			       enum ntb_width max_width)
+{
+	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+	void __iomem *mmio = ndev->self_mmio;
+	u32 ntb_ctl;
+
+	/* Enable event interrupt */
+	ndev->int_mask &= ~AMD_EVENT_INTMASK;
+	writel(ndev->int_mask, mmio + AMD_INTMASK_OFFSET);
+
+	if (ndev->ntb.topo == NTB_TOPO_SEC)
+		return -EINVAL;
+	dev_dbg(ndev_dev(ndev), "Enabling Link.\n");
+
+	ntb_ctl = readl(mmio + AMD_CNTL_OFFSET);
+	ntb_ctl |= (3 << 20);
+	writel(ntb_ctl, mmio + AMD_CNTL_OFFSET);
+
+	return 0;
+}
+
+static int amd_ntb_link_disable(struct ntb_dev *ntb)
+{
+	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+	void __iomem *mmio = ndev->self_mmio;
+	u32 ntb_ctl;
+
+	/* Disable event interrupt */
+	ndev->int_mask |= AMD_EVENT_INTMASK;
+	writel(ndev->int_mask, mmio + AMD_INTMASK_OFFSET);
+
+	if (ndev->ntb.topo == NTB_TOPO_SEC)
+		return -EINVAL;
+	dev_dbg(ndev_dev(ndev), "Enabling Link.\n");
+
+	ntb_ctl = readl(mmio + AMD_CNTL_OFFSET);
+	ntb_ctl &= ~(3 << 16);
+	writel(ntb_ctl, mmio + AMD_CNTL_OFFSET);
+
+	return 0;
+}
+
+static u64 amd_ntb_db_valid_mask(struct ntb_dev *ntb)
+{
+	return ntb_ndev(ntb)->db_valid_mask;
+}
+
+static int amd_ntb_db_vector_count(struct ntb_dev *ntb)
+{
+	return ntb_ndev(ntb)->db_count;
+}
+
+static u64 amd_ntb_db_vector_mask(struct ntb_dev *ntb, int db_vector)
+{
+	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+
+	if (db_vector < 0 || db_vector > ndev->db_count)
+		return 0;
+
+	return ntb_ndev(ntb)->db_valid_mask & (1 << db_vector);
+}
+
+static u64 amd_ntb_db_read(struct ntb_dev *ntb)
+{
+	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+	void __iomem *mmio = ndev->self_mmio;
+
+	return (u64)readw(mmio + AMD_DBSTAT_OFFSET);
+}
+
+static int amd_ntb_db_clear(struct ntb_dev *ntb, u64 db_bits)
+{
+	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+	void __iomem *mmio = ndev->self_mmio;
+
+	writew((u16)db_bits, mmio + AMD_DBSTAT_OFFSET);
+
+	return 0;
+}
+
+static int amd_ntb_db_set_mask(struct ntb_dev *ntb, u64 db_bits)
+{
+	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+	void __iomem *mmio = ndev->self_mmio;
+	unsigned long flags;
+
+	if (db_bits & ~ndev->db_valid_mask)
+		return -EINVAL;
+
+	spin_lock_irqsave(&ndev->db_mask_lock, flags);
+	ndev->db_mask |= db_bits;
+	writew((u16)ndev->db_mask, mmio + AMD_DBMASK_OFFSET);
+	spin_unlock_irqrestore(&ndev->db_mask_lock, flags);
+
+	return 0;
+}
+
+static int amd_ntb_db_clear_mask(struct ntb_dev *ntb, u64 db_bits)
+{
+	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+	void __iomem *mmio = ndev->self_mmio;
+	unsigned long flags;
+
+	if (db_bits & ~ndev->db_valid_mask)
+		return -EINVAL;
+
+	spin_lock_irqsave(&ndev->db_mask_lock, flags);
+	ndev->db_mask &= ~db_bits;
+	writew((u16)ndev->db_mask, mmio + AMD_DBMASK_OFFSET);
+	spin_unlock_irqrestore(&ndev->db_mask_lock, flags);
+
+	return 0;
+}
+
+static int amd_ntb_peer_db_addr(struct ntb_dev *ntb,
+		phys_addr_t *db_addr,
+		resource_size_t *db_size)
+{
+	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+
+	if (db_addr)
+		*db_addr = (phys_addr_t)(ndev->peer_mmio + AMD_DBREQ_OFFSET);
+	if (db_size)
+		*db_size = sizeof(u32);
+
+	return 0;
+}
+
+static int amd_ntb_peer_db_set(struct ntb_dev *ntb, u64 db_bits)
+{
+	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+	void __iomem *mmio = ndev->self_mmio;
+
+	writew((u16)db_bits, mmio + AMD_DBREQ_OFFSET);
+
+	return 0;
+}
+
+static int amd_ntb_spad_count(struct ntb_dev *ntb)
+{
+	return ntb_ndev(ntb)->spad_count;
+}
+
+static u32 amd_ntb_spad_read(struct ntb_dev *ntb, int idx)
+{
+	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+	void __iomem *mmio = ndev->self_mmio;
+	u32 offset = 0;
+
+	if (idx < 0 || idx >= ndev->spad_count)
+		return 0;
+
+	offset = ndev->self_spad + (idx << 2);
+	return readl(mmio + AMD_SPAD_OFFSET + offset);
+}
+
+static int amd_ntb_spad_write(struct ntb_dev *ntb,
+				int idx, u32 val)
+{
+	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+	void __iomem *mmio = ndev->self_mmio;
+	u32 offset = 0;
+
+	if (idx < 0 || idx >= ndev->spad_count)
+		return -EINVAL;
+
+	offset = ndev->self_spad + (idx << 2);
+	writel(val, mmio + AMD_SPAD_OFFSET + offset);
+
+	return 0;
+}
+
+static int amd_ntb_peer_spad_addr(struct ntb_dev *ntb, int idx,
+				    phys_addr_t *spad_addr)
+{
+	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+
+	if (idx < 0 || idx >= ndev->spad_count)
+		return -EINVAL;
+
+	if (spad_addr)
+		*spad_addr = (phys_addr_t)(ndev->self_mmio + AMD_SPAD_OFFSET +
+				ndev->peer_spad + (idx << 2));
+	return 0;
+}
+
+static u32 amd_ntb_peer_spad_read(struct ntb_dev *ntb, int idx)
+{
+	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+	void __iomem *mmio = ndev->self_mmio;
+	u32 offset;
+
+	if (idx < 0 || idx >= ndev->spad_count)
+		return -EINVAL;
+
+	offset = ndev->peer_spad + (idx << 2);
+	return readl(mmio + AMD_SPAD_OFFSET + offset);
+}
+
+static int amd_ntb_peer_spad_write(struct ntb_dev *ntb,
+				     int idx, u32 val)
+{
+	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+	void __iomem *mmio = ndev->self_mmio;
+	u32 offset;
+
+	if (idx < 0 || idx >= ndev->spad_count)
+		return -EINVAL;
+
+	offset = ndev->peer_spad + (idx << 2);
+	writel(val, mmio + AMD_SPAD_OFFSET + offset);
+
+	return 0;
+}
+
+static void amd_ack_SMU(struct amd_ntb_dev *ndev, u32 bit)
+{
+	void __iomem *mmio = ndev->self_mmio;
+	int reg;
+
+	reg = readl(mmio + AMD_SMUACK_OFFSET);
+	reg |= bit;
+	writel(reg, mmio + AMD_SMUACK_OFFSET);
+
+	ndev->peer_sta |= bit;
+}
+
+static void amd_handle_event(struct amd_ntb_dev *ndev, int vec)
+{
+	void __iomem *mmio = ndev->self_mmio;
+	u32 status;
+
+	status = readl(mmio + AMD_INTSTAT_OFFSET);
+	if (!(status & AMD_EVENT_INTMASK))
+		return;
+
+	dev_dbg(ndev_dev(ndev), "status = 0x%x and vec = %d\n", status, vec);
+
+	status &= AMD_EVENT_INTMASK;
+	switch (status) {
+	case AMD_PEER_FLUSH_EVENT:
+		dev_info(ndev_dev(ndev), "Flush is done.\n");
+		break;
+	case AMD_PEER_RESET_EVENT:
+		amd_ack_SMU(ndev, AMD_PEER_RESET_EVENT);
+
+		/* link down first */
+		ntb_link_event(&ndev->ntb);
+		/* polling peer status */
+		schedule_delayed_work(&ndev->hb_timer, AMD_LINK_HB_TIMEOUT);
+
+		break;
+	case AMD_PEER_D3_EVENT:
+	case AMD_PEER_PMETO_EVENT:
+		amd_ack_SMU(ndev, status);
+
+		/* link down */
+		ntb_link_event(&ndev->ntb);
+
+		break;
+	case AMD_PEER_D0_EVENT:
+		mmio = ndev->peer_mmio;
+		status = readl(mmio + AMD_PMESTAT_OFFSET);
+		/* check if this is WAKEUP event */
+		if (status & 0x1)
+			dev_info(ndev_dev(ndev), "Wakeup is done.\n");
+
+		amd_ack_SMU(ndev, AMD_PEER_D0_EVENT);
+
+		if (amd_link_is_up(ndev))
+			ntb_link_event(&ndev->ntb);
+		else
+			schedule_delayed_work(&ndev->hb_timer,
+						AMD_LINK_HB_TIMEOUT);
+		break;
+	default:
+		dev_info(ndev_dev(ndev), "event status = 0x%x.\n", status);
+		break;
+	}
+}
+
+static irqreturn_t ndev_interrupt(struct amd_ntb_dev *ndev, int vec)
+{
+	dev_dbg(ndev_dev(ndev), "vec %d\n", vec);
+
+	if (vec > (ndev->msix_vec_count - 1)) {
+		dev_err(ndev_dev(ndev), "Invalid interrupt.\n");
+		return IRQ_HANDLED;
+	}
+
+	if (vec > 15 || (ndev->msix_vec_count == 1))
+		amd_handle_event(ndev, vec);
+
+	if (vec < 16)
+		ntb_db_event(&ndev->ntb, vec);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t ndev_vec_isr(int irq, void *dev)
+{
+	struct amd_ntb_vec *nvec = dev;
+
+	return ndev_interrupt(nvec->ndev, nvec->num);
+}
+
+static irqreturn_t ndev_irq_isr(int irq, void *dev)
+{
+	struct amd_ntb_dev *ndev = dev;
+
+	return ndev_interrupt(ndev, irq - ndev_pdev(ndev)->irq);
+}
+
+static int ndev_init_isr(struct amd_ntb_dev *ndev,
+				int msix_min, int msix_max)
+{
+	struct pci_dev *pdev;
+	int rc, i, msix_count, node;
+
+	pdev = ndev_pdev(ndev);
+
+	node = dev_to_node(&pdev->dev);
+
+	ndev->db_mask = ndev->db_valid_mask;
+
+	/* Try to set up msix irq */
+	ndev->vec = kzalloc_node(msix_max * sizeof(*ndev->vec),
+				 GFP_KERNEL, node);
+	if (!ndev->vec)
+		goto err_msix_vec_alloc;
+
+	ndev->msix = kzalloc_node(msix_max * sizeof(*ndev->msix),
+				  GFP_KERNEL, node);
+	if (!ndev->msix)
+		goto err_msix_alloc;
+
+	for (i = 0; i < msix_max; ++i)
+		ndev->msix[i].entry = i;
+
+	msix_count = pci_enable_msix_range(pdev, ndev->msix,
+					   msix_min, msix_max);
+	if (msix_count < 0)
+		goto err_msix_enable;
+
+	/* Disable MSIX if msix count is less than 16 */
+	if (msix_count < msix_min) {
+		pci_disable_msix(pdev);
+		goto err_msix_enable;
+	}
+
+	for (i = 0; i < msix_count; ++i) {
+		ndev->vec[i].ndev = ndev;
+		ndev->vec[i].num = i;
+		rc = request_irq(ndev->msix[i].vector, ndev_vec_isr, 0,
+				 "ndev_vec_isr", &ndev->vec[i]);
+		if (rc)
+			goto err_msix_request;
+	}
+
+	dev_dbg(ndev_dev(ndev), "Using msix interrupts\n");
+	ndev->db_count = msix_min;
+	ndev->msix_vec_count = msix_max;
+	return 0;
+
+err_msix_request:
+	while (i-- > 0)
+		free_irq(ndev->msix[i].vector, ndev);
+	pci_disable_msix(pdev);
+err_msix_enable:
+	kfree(ndev->msix);
+err_msix_alloc:
+	kfree(ndev->vec);
+err_msix_vec_alloc:
+	ndev->msix = NULL;
+	ndev->vec = NULL;
+
+	/* Try to set up msi irq */
+	rc = pci_enable_msi(pdev);
+	if (rc)
+		goto err_msi_enable;
+
+	rc = request_irq(pdev->irq, ndev_irq_isr, 0,
+			 "ndev_irq_isr", ndev);
+	if (rc)
+		goto err_msi_request;
+
+	dev_dbg(ndev_dev(ndev), "Using msi interrupts\n");
+	ndev->db_count = 1;
+	ndev->msix_vec_count = 1;
+	return 0;
+
+err_msi_request:
+	pci_disable_msi(pdev);
+err_msi_enable:
+
+	/* Try to set up intx irq */
+	pci_intx(pdev, 1);
+
+	rc = request_irq(pdev->irq, ndev_irq_isr, IRQF_SHARED,
+			 "ndev_irq_isr", ndev);
+	if (rc)
+		goto err_intx_request;
+
+	dev_dbg(ndev_dev(ndev), "Using intx interrupts\n");
+	ndev->db_count = 1;
+	ndev->msix_vec_count = 1;
+	return 0;
+
+err_intx_request:
+	return rc;
+}
+
+static void ndev_deinit_isr(struct amd_ntb_dev *ndev)
+{
+	struct pci_dev *pdev;
+	void __iomem *mmio = ndev->self_mmio;
+	int i;
+
+	pdev = ndev_pdev(ndev);
+
+	/* Mask all doorbell interrupts */
+	ndev->db_mask = ndev->db_valid_mask;
+	writel(ndev->db_mask, mmio + AMD_DBMASK_OFFSET);
+
+	if (ndev->msix) {
+		i = ndev->msix_vec_count;
+		while (i--)
+			free_irq(ndev->msix[i].vector, &ndev->vec[i]);
+		pci_disable_msix(pdev);
+		kfree(ndev->msix);
+		kfree(ndev->vec);
+	} else {
+		free_irq(pdev->irq, ndev);
+		if (pci_dev_msi_enabled(pdev))
+			pci_disable_msi(pdev);
+		else
+			pci_intx(pdev, 0);
+	}
+}
+
+static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf,
+				 size_t count, loff_t *offp)
+{
+	struct amd_ntb_dev *ndev;
+	void __iomem *mmio;
+	char *buf;
+	size_t buf_size;
+	ssize_t ret, off;
+	union { u64 v64; u32 v32; u16 v16; } u;
+
+	ndev = filp->private_data;
+	mmio = ndev->self_mmio;
+
+	buf_size = min(count, 0x800ul);
+
+	buf = kmalloc(buf_size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	off = 0;
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "NTB Device Information:\n");
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "Connection Topology -\t%s\n",
+			 ntb_topo_string(ndev->ntb.topo));
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "LNK STA -\t\t%#06x\n", ndev->lnk_sta);
+
+	if (!amd_link_is_up(ndev)) {
+		off += scnprintf(buf + off, buf_size - off,
+				 "Link Status -\t\tDown\n");
+	} else {
+		off += scnprintf(buf + off, buf_size - off,
+				 "Link Status -\t\tUp\n");
+		off += scnprintf(buf + off, buf_size - off,
+				 "Link Speed -\t\tPCI-E Gen %u\n",
+				 NTB_LNK_STA_SPEED(ndev->lnk_sta));
+		off += scnprintf(buf + off, buf_size - off,
+				 "Link Width -\t\tx%u\n",
+				 NTB_LNK_STA_WIDTH(ndev->lnk_sta));
+	}
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "Memory Window Count -\t%u\n", ndev->mw_count);
+	off += scnprintf(buf + off, buf_size - off,
+			 "Scratchpad Count -\t%u\n", ndev->spad_count);
+	off += scnprintf(buf + off, buf_size - off,
+			 "Doorbell Count -\t%u\n", ndev->db_count);
+	off += scnprintf(buf + off, buf_size - off,
+			 "MSIX Vector Count -\t%u\n", ndev->msix_vec_count);
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "Doorbell Valid Mask -\t%#llx\n", ndev->db_valid_mask);
+
+	u.v32 = readl(ndev->self_mmio + AMD_DBMASK_OFFSET);
+	off += scnprintf(buf + off, buf_size - off,
+			 "Doorbell Mask -\t\t\t%#06x\n", u.v32);
+
+	u.v32 = readl(mmio + AMD_DBSTAT_OFFSET);
+	off += scnprintf(buf + off, buf_size - off,
+			 "Doorbell Bell -\t\t\t%#06x\n", u.v32);
+
+	off += scnprintf(buf + off, buf_size - off,
+			 "\nNTB Incoming XLAT:\n");
+
+	u.v64 = ioread64(mmio + AMD_BAR1XLAT_OFFSET);
+	off += scnprintf(buf + off, buf_size - off,
+			 "XLAT1 -\t\t%#018llx\n", u.v64);
+
+	u.v64 = ioread64(ndev->self_mmio + AMD_BAR23XLAT_OFFSET);
+	off += scnprintf(buf + off, buf_size - off,
+				 "XLAT23 -\t\t%#018llx\n", u.v64);
+
+	u.v64 = ioread64(ndev->self_mmio + AMD_BAR45XLAT_OFFSET);
+	off += scnprintf(buf + off, buf_size - off,
+				 "XLAT45 -\t\t%#018llx\n", u.v64);
+
+	u.v32 = readl(mmio + AMD_BAR1LMT_OFFSET);
+	off += scnprintf(buf + off, buf_size - off,
+			 "LMT1 -\t\t\t%#06x\n", u.v32);
+
+	u.v64 = ioread64(ndev->self_mmio + AMD_BAR23LMT_OFFSET);
+	off += scnprintf(buf + off, buf_size - off,
+				 "LMT23 -\t\t\t%#018llx\n", u.v64);
+
+	u.v64 = ioread64(ndev->self_mmio + AMD_BAR45LMT_OFFSET);
+	off += scnprintf(buf + off, buf_size - off,
+				 "LMT45 -\t\t\t%#018llx\n", u.v64);
+
+	ret = simple_read_from_buffer(ubuf, count, offp, buf, off);
+	kfree(buf);
+	return ret;
+}
+
+static void ndev_init_debugfs(struct amd_ntb_dev *ndev)
+{
+	if (!debugfs_dir) {
+		ndev->debugfs_dir = NULL;
+		ndev->debugfs_info = NULL;
+	} else {
+		ndev->debugfs_dir =
+			debugfs_create_dir(ndev_name(ndev), debugfs_dir);
+		if (!ndev->debugfs_dir)
+			ndev->debugfs_info = NULL;
+		else
+			ndev->debugfs_info =
+				debugfs_create_file("info", S_IRUSR,
+						    ndev->debugfs_dir, ndev,
+						    &amd_ntb_debugfs_info);
+	}
+}
+
+static void ndev_deinit_debugfs(struct amd_ntb_dev *ndev)
+{
+	debugfs_remove_recursive(ndev->debugfs_dir);
+}
+
+static inline void ndev_init_struct(struct amd_ntb_dev *ndev,
+				    struct pci_dev *pdev)
+{
+	ndev->ntb.pdev = pdev;
+	ndev->ntb.topo = NTB_TOPO_NONE;
+	ndev->ntb.ops = &amd_ntb_ops;
+	ndev->int_mask = AMD_EVENT_INTMASK;
+	spin_lock_init(&ndev->db_mask_lock);
+}
+
+static int amd_poll_link(struct amd_ntb_dev *ndev)
+{
+	void __iomem *mmio = ndev->peer_mmio;
+	u32 reg, stat;
+	int rc;
+
+	reg = readl(mmio + AMD_SIDEINFO_OFFSET);
+	reg &= NTB_LIN_STA_ACTIVE_BIT;
+
+	dev_dbg(ndev_dev(ndev), "%s: reg_val = 0x%x.\n", __func__, reg);
+
+	if (reg == ndev->cntl_sta)
+		return 0;
+
+	ndev->cntl_sta = reg;
+
+	rc = pci_read_config_dword(ndev->ntb.pdev,
+			AMD_LINK_STATUS_OFFSET, &stat);
+	if (rc)
+		return 0;
+	ndev->lnk_sta = stat;
+
+	return 1;
+}
+
+static void amd_link_hb(struct work_struct *work)
+{
+	struct amd_ntb_dev *ndev = hb_ndev(work);
+
+	if (amd_poll_link(ndev))
+		ntb_link_event(&ndev->ntb);
+
+	if (!amd_link_is_up(ndev))
+		schedule_delayed_work(&ndev->hb_timer, AMD_LINK_HB_TIMEOUT);
+}
+
+static int amd_init_isr(struct amd_ntb_dev *ndev)
+{
+	return ndev_init_isr(ndev, AMD_DB_CNT, AMD_MSIX_VECTOR_CNT);
+}
+
+static void amd_init_side_info(struct amd_ntb_dev *ndev)
+{
+	void __iomem *mmio = ndev->self_mmio;
+	unsigned int reg = 0;
+
+	reg = readl(mmio + AMD_SIDEINFO_OFFSET);
+	if (!(reg & 0x2)) {
+		reg |= 0x2;
+		writel(reg, mmio + AMD_SIDEINFO_OFFSET);
+	}
+}
+
+static void amd_deinit_side_info(struct amd_ntb_dev *ndev)
+{
+	void __iomem *mmio = ndev->self_mmio;
+	unsigned int reg = 0;
+
+	reg = readl(mmio + AMD_SIDEINFO_OFFSET);
+	if (reg & 0x2) {
+		reg &= ~(0x2);
+		writel(reg, mmio + AMD_SIDEINFO_OFFSET);
+		readl(mmio + AMD_SIDEINFO_OFFSET);
+	}
+}
+
+static int amd_init_ntb(struct amd_ntb_dev *ndev)
+{
+	void __iomem *mmio = ndev->self_mmio;
+
+	ndev->mw_count = AMD_MW_CNT;
+	ndev->spad_count = AMD_SPADS_CNT;
+	ndev->db_count = AMD_DB_CNT;
+
+	switch (ndev->ntb.topo) {
+	case NTB_TOPO_PRI:
+	case NTB_TOPO_SEC:
+		ndev->spad_count >>= 1;
+		if (ndev->ntb.topo == NTB_TOPO_PRI) {
+			ndev->self_spad = 0;
+			ndev->peer_spad = 0x20;
+		} else {
+			ndev->self_spad = 0x20;
+			ndev->peer_spad = 0;
+		}
+
+		INIT_DELAYED_WORK(&ndev->hb_timer, amd_link_hb);
+		schedule_delayed_work(&ndev->hb_timer, AMD_LINK_HB_TIMEOUT);
+
+		break;
+	default:
+		dev_err(ndev_dev(ndev), "AMD NTB does not support B2B mode.\n");
+		return -EINVAL;
+	}
+
+	ndev->db_valid_mask = BIT_ULL(ndev->db_count) - 1;
+
+	/* Mask event interrupts */
+	writel(ndev->int_mask, mmio + AMD_INTMASK_OFFSET);
+
+	return 0;
+}
+
+static inline enum ntb_topo amd_get_topo(struct amd_ntb_dev *ndev)
+{
+	void __iomem *mmio = ndev->self_mmio;
+	u32 info;
+
+	info = readl(mmio + AMD_SIDEINFO_OFFSET);
+	if (info & AMD_SIDE_MASK)
+		return NTB_TOPO_SEC;
+	else
+		return NTB_TOPO_PRI;
+}
+
+static int amd_init_dev(struct amd_ntb_dev *ndev)
+{
+	struct pci_dev *pdev;
+	int rc = 0;
+
+	pdev = ndev_pdev(ndev);
+
+	ndev->ntb.topo = amd_get_topo(ndev);
+	dev_dbg(ndev_dev(ndev), "AMD NTB topo is %s\n",
+			ntb_topo_string(ndev->ntb.topo));
+
+	rc = amd_init_ntb(ndev);
+	if (rc)
+		return rc;
+
+	rc = amd_init_isr(ndev);
+	if (rc) {
+		dev_err(ndev_dev(ndev), "fail to init isr.\n");
+		return rc;
+	}
+
+	ndev->db_valid_mask = BIT_ULL(ndev->db_count) - 1;
+
+	return 0;
+}
+
+static void amd_deinit_dev(struct amd_ntb_dev *ndev)
+{
+	cancel_delayed_work_sync(&ndev->hb_timer);
+
+	ndev_deinit_isr(ndev);
+}
+
+static int amd_ntb_init_pci(struct amd_ntb_dev *ndev,
+			    struct pci_dev *pdev)
+{
+	int rc;
+
+	pci_set_drvdata(pdev, ndev);
+
+	rc = pci_enable_device(pdev);
+	if (rc)
+		goto err_pci_enable;
+
+	rc = pci_request_regions(pdev, NTB_NAME);
+	if (rc)
+		goto err_pci_regions;
+
+	pci_set_master(pdev);
+
+	rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (rc) {
+		rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+		if (rc)
+			goto err_dma_mask;
+		dev_warn(ndev_dev(ndev), "Cannot DMA highmem\n");
+	}
+
+	rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (rc) {
+		rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+		if (rc)
+			goto err_dma_mask;
+		dev_warn(ndev_dev(ndev), "Cannot DMA consistent highmem\n");
+	}
+
+	ndev->self_mmio = pci_iomap(pdev, 0, 0);
+	if (!ndev->self_mmio) {
+		rc = -EIO;
+		goto err_mmio;
+	}
+	ndev->peer_mmio = ndev->self_mmio + AMD_PEER_OFFSET;
+
+	return 0;
+
+err_mmio:
+err_dma_mask:
+	pci_clear_master(pdev);
+err_pci_regions:
+	pci_disable_device(pdev);
+err_pci_enable:
+	pci_set_drvdata(pdev, NULL);
+	return rc;
+}
+
+static void amd_ntb_deinit_pci(struct amd_ntb_dev *ndev)
+{
+	struct pci_dev *pdev = ndev_pdev(ndev);
+
+	pci_iounmap(pdev, ndev->self_mmio);
+
+	pci_clear_master(pdev);
+	pci_release_regions(pdev);
+	pci_disable_device(pdev);
+	pci_set_drvdata(pdev, NULL);
+}
+
+
+static int amd_ntb_pci_probe(struct pci_dev *pdev,
+			     const struct pci_device_id *id)
+{
+	struct amd_ntb_dev *ndev;
+	int rc, node;
+
+	node = dev_to_node(&pdev->dev);
+
+	ndev = kzalloc_node(sizeof(*ndev), GFP_KERNEL, node);
+	if (!ndev) {
+		rc = -ENOMEM;
+		goto err_ndev;
+	}
+
+	ndev_init_struct(ndev, pdev);
+
+	rc = amd_ntb_init_pci(ndev, pdev);
+	if (rc)
+		goto err_init_pci;
+
+	rc = amd_init_dev(ndev);
+	if (rc)
+		goto err_init_dev;
+
+	/* write side info */
+	amd_init_side_info(ndev);
+
+	amd_poll_link(ndev);
+
+	ndev_init_debugfs(ndev);
+
+	rc = ntb_register_device(&ndev->ntb);
+	if (rc)
+		goto err_register;
+
+	dev_info(&pdev->dev, "NTB device registered.\n");
+
+	return 0;
+
+err_register:
+	ndev_deinit_debugfs(ndev);
+	amd_deinit_dev(ndev);
+err_init_dev:
+	amd_ntb_deinit_pci(ndev);
+err_init_pci:
+	kfree(ndev);
+err_ndev:
+	return rc;
+}
+
+static void amd_ntb_pci_remove(struct pci_dev *pdev)
+{
+	struct amd_ntb_dev *ndev = pci_get_drvdata(pdev);
+
+	ntb_unregister_device(&ndev->ntb);
+	ndev_deinit_debugfs(ndev);
+	amd_deinit_side_info(ndev);
+	amd_deinit_dev(ndev);
+	amd_ntb_deinit_pci(ndev);
+	kfree(ndev);
+}
+
+static const struct file_operations amd_ntb_debugfs_info = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = ndev_debugfs_read,
+};
+
+static const struct pci_device_id amd_ntb_pci_tbl[] = {
+	{PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_NTB)},
+	{0}
+};
+MODULE_DEVICE_TABLE(pci, amd_ntb_pci_tbl);
+
+static struct pci_driver amd_ntb_pci_driver = {
+	.name		= KBUILD_MODNAME,
+	.id_table	= amd_ntb_pci_tbl,
+	.probe		= amd_ntb_pci_probe,
+	.remove		= amd_ntb_pci_remove,
+};
+
+static int __init amd_ntb_pci_driver_init(void)
+{
+	pr_info("%s %s\n", NTB_DESC, NTB_VER);
+
+	if (debugfs_initialized())
+		debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
+
+	return pci_register_driver(&amd_ntb_pci_driver);
+}
+module_init(amd_ntb_pci_driver_init);
+
+static void __exit amd_ntb_pci_driver_exit(void)
+{
+	pci_unregister_driver(&amd_ntb_pci_driver);
+	debugfs_remove_recursive(debugfs_dir);
+}
+module_exit(amd_ntb_pci_driver_exit);
diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.h b/drivers/ntb/hw/amd/ntb_hw_amd.h
new file mode 100644
index 0000000..31021c0
--- /dev/null
+++ b/drivers/ntb/hw/amd/ntb_hw_amd.h
@@ -0,0 +1,246 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ *   redistributing this file, you may do so under either license.
+ *
+ *   GPL LICENSE SUMMARY
+ *
+ *   Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of version 2 of the GNU General Public License as
+ *   published by the Free Software Foundation.
+ *
+ *   BSD LICENSE
+ *
+ *   Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copy
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of AMD Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * AMD PCIe NTB Linux driver
+ *
+ * Contact Information:
+ * Xiangliang Yu<Xiangliang.Yu@amd.com>
+ */
+
+#ifndef NTB_HW_AMD_H
+#define NTB_HW_AMD_H
+
+#include <linux/ntb.h>
+#include <linux/pci.h>
+
+#define PCI_DEVICE_ID_AMD_NTB	0x145B
+#define AMD_LINK_HB_TIMEOUT	msecs_to_jiffies(1000)
+#define AMD_LINK_STATUS_OFFSET	0x68
+#define NTB_LIN_STA_ACTIVE_BIT	0x00000002
+#define NTB_LNK_STA_SPEED_MASK	0x000F0000
+#define NTB_LNK_STA_WIDTH_MASK	0x03F00000
+#define NTB_LNK_STA_ACTIVE(x)	(!!((x) & NTB_LIN_STA_ACTIVE_BIT))
+#define NTB_LNK_STA_SPEED(x)	(((x) & NTB_LNK_STA_SPEED_MASK) >> 16)
+#define NTB_LNK_STA_WIDTH(x)	(((x) & NTB_LNK_STA_WIDTH_MASK) >> 20)
+
+#ifndef ioread64
+#ifdef readq
+#define ioread64 readq
+#else
+#define ioread64 _ioread64
+static inline u64 _ioread64(void __iomem *mmio)
+{
+	u64 low, high;
+
+	low = ioread32(mmio);
+	high = ioread32(mmio + sizeof(u32));
+	return low | (high << 32);
+}
+#endif
+#endif
+
+#ifndef iowrite64
+#ifdef writeq
+#define iowrite64 writeq
+#else
+#define iowrite64 _iowrite64
+static inline void _iowrite64(u64 val, void __iomem *mmio)
+{
+	iowrite32(val, mmio);
+	iowrite32(val >> 32, mmio + sizeof(u32));
+}
+#endif
+#endif
+
+enum {
+	/* AMD NTB Capability */
+	AMD_MW_CNT		= 3,
+	AMD_DB_CNT		= 16,
+	AMD_MSIX_VECTOR_CNT	= 24,
+	AMD_SPADS_CNT		= 16,
+
+	/*  AMD NTB register offset */
+	AMD_CNTL_OFFSET		= 0x200,
+
+	/* NTB control register bits */
+	PMM_REG_CTL		= BIT(21),
+	SMM_REG_CTL		= BIT(20),
+	SMM_REG_ACC_PATH	= BIT(18),
+	PMM_REG_ACC_PATH	= BIT(17),
+	NTB_CLK_EN		= BIT(16),
+
+	AMD_STA_OFFSET		= 0x204,
+	AMD_PGSLV_OFFSET	= 0x208,
+	AMD_SPAD_MUX_OFFSET	= 0x20C,
+	AMD_SPAD_OFFSET		= 0x210,
+	AMD_RSMU_HCID		= 0x250,
+	AMD_RSMU_SIID		= 0x254,
+	AMD_PSION_OFFSET	= 0x300,
+	AMD_SSION_OFFSET	= 0x330,
+	AMD_MMINDEX_OFFSET	= 0x400,
+	AMD_MMDATA_OFFSET	= 0x404,
+	AMD_SIDEINFO_OFFSET	= 0x408,
+
+	AMD_SIDE_MASK		= BIT(0),
+
+	/* limit register */
+	AMD_ROMBARLMT_OFFSET	= 0x410,
+	AMD_BAR1LMT_OFFSET	= 0x414,
+	AMD_BAR23LMT_OFFSET	= 0x418,
+	AMD_BAR45LMT_OFFSET	= 0x420,
+	/* xlat address */
+	AMD_POMBARXLAT_OFFSET	= 0x428,
+	AMD_BAR1XLAT_OFFSET	= 0x430,
+	AMD_BAR23XLAT_OFFSET	= 0x438,
+	AMD_BAR45XLAT_OFFSET	= 0x440,
+	/* doorbell and interrupt */
+	AMD_DBFM_OFFSET		= 0x450,
+	AMD_DBREQ_OFFSET	= 0x454,
+	AMD_MIRRDBSTAT_OFFSET	= 0x458,
+	AMD_DBMASK_OFFSET	= 0x45C,
+	AMD_DBSTAT_OFFSET	= 0x460,
+	AMD_INTMASK_OFFSET	= 0x470,
+	AMD_INTSTAT_OFFSET	= 0x474,
+
+	/* event type */
+	AMD_PEER_FLUSH_EVENT	= BIT(0),
+	AMD_PEER_RESET_EVENT	= BIT(1),
+	AMD_PEER_D3_EVENT	= BIT(2),
+	AMD_PEER_PMETO_EVENT	= BIT(3),
+	AMD_PEER_D0_EVENT	= BIT(4),
+	AMD_EVENT_INTMASK	= (AMD_PEER_FLUSH_EVENT |
+				AMD_PEER_RESET_EVENT | AMD_PEER_D3_EVENT |
+				AMD_PEER_PMETO_EVENT | AMD_PEER_D0_EVENT),
+
+	AMD_PMESTAT_OFFSET	= 0x480,
+	AMD_PMSGTRIG_OFFSET	= 0x490,
+	AMD_LTRLATENCY_OFFSET	= 0x494,
+	AMD_FLUSHTRIG_OFFSET	= 0x498,
+
+	/* SMU register*/
+	AMD_SMUACK_OFFSET	= 0x4A0,
+	AMD_SINRST_OFFSET	= 0x4A4,
+	AMD_RSPNUM_OFFSET	= 0x4A8,
+	AMD_SMU_SPADMUTEX	= 0x4B0,
+	AMD_SMU_SPADOFFSET	= 0x4B4,
+
+	AMD_PEER_OFFSET		= 0x400,
+};
+
+struct amd_ntb_dev;
+
+struct amd_ntb_vec {
+	struct amd_ntb_dev	*ndev;
+	int			num;
+};
+
+struct amd_ntb_dev {
+	struct ntb_dev ntb;
+
+	u32 ntb_side;
+	u32 lnk_sta;
+	u32 cntl_sta;
+	u32 peer_sta;
+
+	unsigned char mw_count;
+	unsigned char spad_count;
+	unsigned char db_count;
+	unsigned char msix_vec_count;
+
+	u64 db_valid_mask;
+	u64 db_mask;
+	u32 int_mask;
+
+	struct msix_entry *msix;
+	struct amd_ntb_vec *vec;
+
+	spinlock_t db_mask_lock;
+
+	void __iomem *self_mmio;
+	void __iomem *peer_mmio;
+	unsigned int self_spad;
+	unsigned int peer_spad;
+
+	struct delayed_work hb_timer;
+
+	struct dentry *debugfs_dir;
+	struct dentry *debugfs_info;
+};
+
+#define ndev_pdev(ndev) ((ndev)->ntb.pdev)
+#define ndev_name(ndev) pci_name(ndev_pdev(ndev))
+#define ndev_dev(ndev) (&ndev_pdev(ndev)->dev)
+#define ntb_ndev(__ntb) container_of(__ntb, struct amd_ntb_dev, ntb)
+#define hb_ndev(__work) container_of(__work, struct amd_ntb_dev, hb_timer.work)
+
+static int amd_ntb_mw_count(struct ntb_dev *ntb);
+static int amd_ntb_mw_get_range(struct ntb_dev *ntb, int idx,
+			phys_addr_t *base, resource_size_t *size,
+			resource_size_t *align, resource_size_t *algin_size);
+static int amd_ntb_mw_set_trans(struct ntb_dev *ndev, int idx,
+				dma_addr_t addr, resource_size_t size);
+static int amd_ntb_link_is_up(struct ntb_dev *ntb,
+				enum ntb_speed *speed,
+				enum ntb_width *width);
+static int amd_ntb_link_enable(struct ntb_dev *ntb,
+				enum ntb_speed speed,
+				enum ntb_width width);
+static int amd_ntb_link_disable(struct ntb_dev *ntb);
+static u64 amd_ntb_db_valid_mask(struct ntb_dev *ntb);
+static int amd_ntb_db_vector_count(struct ntb_dev *ntb);
+static u64 amd_ntb_db_vector_mask(struct ntb_dev *ntb, int db_vector);
+static u64 amd_ntb_db_read(struct ntb_dev *ntb);
+static int amd_ntb_db_clear(struct ntb_dev *ntb, u64 db_bits);
+static int amd_ntb_db_set_mask(struct ntb_dev *ntb, u64 db_bits);
+static int amd_ntb_db_clear_mask(struct ntb_dev *ntb, u64 db_bits);
+static int amd_ntb_peer_db_addr(struct ntb_dev *ntb,
+				phys_addr_t *db_addr,
+				resource_size_t *db_size);
+static int amd_ntb_peer_db_set(struct ntb_dev *ntb, u64 db_bits);
+static int amd_ntb_spad_count(struct ntb_dev *ntb);
+static u32 amd_ntb_spad_read(struct ntb_dev *ntb, int idx);
+static int amd_ntb_spad_write(struct ntb_dev *ntb, int idx, u32 val);
+static int amd_ntb_peer_spad_addr(struct ntb_dev *ntb, int idx,
+					phys_addr_t *spad_addr);
+static u32 amd_ntb_peer_spad_read(struct ntb_dev *ntb, int idx);
+static int amd_ntb_peer_spad_write(struct ntb_dev *ntb, int idx, u32 val);
+#endif
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* RE: [PATCH V3 1/2] NTB: Add AMD PCI-Express NTB driver
  2016-01-14 11:44 [PATCH V3 1/2] NTB: Add AMD PCI-Express NTB driver Xiangliang Yu
@ 2016-01-14 15:56 ` Allen Hubbe
  2016-01-15  7:37   ` Yu, Xiangliang
  2016-01-18  4:04 ` Jon Mason
  1 sibling, 1 reply; 9+ messages in thread
From: Allen Hubbe @ 2016-01-14 15:56 UTC (permalink / raw)
  To: 'Xiangliang Yu', jdmason, dave.jiang, linux-ntb,
	linux-kernel
  Cc: SPG_Linux_Kernel

> From: Xiangliang Yu <Xiangliang.Yu@amd.com>
> 
> AMD NTB support following main features:
> (1) Three memory windows;
> (2) Sixteen 32-bit scratch pad;
> (3) Two 16-bit doorbell interrupt;

This is interesting.  Let's submit your doorbell implementation as is, but I wonder if "multiple doorbell registers" is something we should expose in the ntb.h api.  I wonder how we should reconcile that with "vectors", unless they are equivalent.  If we see a device with more than 64 doorbell bits total, the ntb.h api will be inadequate.

> (4) Five event interrupts;
> (5) One system can wake up opposite system of NTB;

You could say: power management support will be added in a following patch.

> (6) Flush previous request to the opposite system;

...in a following patch.

> (7) There are reset and PME_TO mechanisms between two systems;

How is this different from (5 or 6)?  What is PME_TO?

> 
> Signed-off-by: Xiangliang Yu <Xiangliang.Yu@amd.com>


Overall, this driver is looking much better.  Thanks for your efforts!

With the -rc1 window likely closing this weekend, let's see what others have to say about this patch v3, or getting a v4 this week.

There are a few minor requests below.  They are nothing critical.

> +static void amd_ack_SMU(struct amd_ntb_dev *ndev, u32 bit)
> +{
> +	void __iomem *mmio = ndev->self_mmio;
> +	int reg;
> +
> +	reg = readl(mmio + AMD_SMUACK_OFFSET);
> +	reg |= bit;
> +	writel(reg, mmio + AMD_SMUACK_OFFSET);
> +
> +	ndev->peer_sta |= bit;
> +}

What is SMU?

Does this have to do with power management, or flush?  Can you say why it should be in this patch, and not one of the other ones?

Speculation: Is it something like, if the peer driver has the flush patch, and this side is not patched, this side driver still needs to respond (ack) for the peer flush to complete?  In other words, the flush is not fully implemented in hw?  I'm just guessing, so I would like to hear your explanation.

> +static void amd_handle_event(struct amd_ntb_dev *ndev, int vec)
> +{
> +	void __iomem *mmio = ndev->self_mmio;
> +	u32 status;
> +
> +	status = readl(mmio + AMD_INTSTAT_OFFSET);
> +	if (!(status & AMD_EVENT_INTMASK))
> +		return;
> +
> +	dev_dbg(ndev_dev(ndev), "status = 0x%x and vec = %d\n", status,
> vec);
> +
> +	status &= AMD_EVENT_INTMASK;
> +	switch (status) {
> +	case AMD_PEER_FLUSH_EVENT:
> +		dev_info(ndev_dev(ndev), "Flush is done.\n");
> +		break;
> +	case AMD_PEER_RESET_EVENT:
> +		amd_ack_SMU(ndev, AMD_PEER_RESET_EVENT);
> +
> +		/* link down first */
> +		ntb_link_event(&ndev->ntb);
> +		/* polling peer status */
> +		schedule_delayed_work(&ndev->hb_timer, AMD_LINK_HB_TIMEOUT);
> +
> +		break;
> +	case AMD_PEER_D3_EVENT:
> +	case AMD_PEER_PMETO_EVENT:
> +		amd_ack_SMU(ndev, status);

Here is ack SMU called.

> +
> +		/* link down */
> +		ntb_link_event(&ndev->ntb);
> +
> +		break;
> +	case AMD_PEER_D0_EVENT:
> +		mmio = ndev->peer_mmio;
> +		status = readl(mmio + AMD_PMESTAT_OFFSET);
> +		/* check if this is WAKEUP event */
> +		if (status & 0x1)
> +			dev_info(ndev_dev(ndev), "Wakeup is done.\n");
> +
> +		amd_ack_SMU(ndev, AMD_PEER_D0_EVENT);
> +
> +		if (amd_link_is_up(ndev))
> +			ntb_link_event(&ndev->ntb);
> +		else
> +			schedule_delayed_work(&ndev->hb_timer,
> +						AMD_LINK_HB_TIMEOUT);
> +		break;
> +	default:
> +		dev_info(ndev_dev(ndev), "event status = 0x%x.\n", status);
> +		break;
> +	}
> +}

> +enum {

I would prefer to see #define, but this works... There are good arguments to be made for compiler constants vs the preprocessor.  My preference just comes from what I've observed about how other drivers are written, and #define for this stuff seems to be the convention.

> +	/* AMD NTB Capability */
> +	AMD_MW_CNT		= 3,
> +	AMD_DB_CNT		= 16,
> +	AMD_MSIX_VECTOR_CNT	= 24,
> +	AMD_SPADS_CNT		= 16,
> +
> +	/*  AMD NTB register offset */
> +	AMD_CNTL_OFFSET		= 0x200,
> +
> +	/* NTB control register bits */
> +	PMM_REG_CTL		= BIT(21),
> +	SMM_REG_CTL		= BIT(20),
> +	SMM_REG_ACC_PATH	= BIT(18),
> +	PMM_REG_ACC_PATH	= BIT(17),
> +	NTB_CLK_EN		= BIT(16),
> +
> +	AMD_STA_OFFSET		= 0x204,
> +	AMD_PGSLV_OFFSET	= 0x208,
> +	AMD_SPAD_MUX_OFFSET	= 0x20C,
> +	AMD_SPAD_OFFSET		= 0x210,
> +	AMD_RSMU_HCID		= 0x250,
> +	AMD_RSMU_SIID		= 0x254,
> +	AMD_PSION_OFFSET	= 0x300,
> +	AMD_SSION_OFFSET	= 0x330,
> +	AMD_MMINDEX_OFFSET	= 0x400,
> +	AMD_MMDATA_OFFSET	= 0x404,
> +	AMD_SIDEINFO_OFFSET	= 0x408,
> +
> +	AMD_SIDE_MASK		= BIT(0),
> +
> +	/* limit register */
> +	AMD_ROMBARLMT_OFFSET	= 0x410,
> +	AMD_BAR1LMT_OFFSET	= 0x414,
> +	AMD_BAR23LMT_OFFSET	= 0x418,
> +	AMD_BAR45LMT_OFFSET	= 0x420,
> +	/* xlat address */
> +	AMD_POMBARXLAT_OFFSET	= 0x428,
> +	AMD_BAR1XLAT_OFFSET	= 0x430,
> +	AMD_BAR23XLAT_OFFSET	= 0x438,
> +	AMD_BAR45XLAT_OFFSET	= 0x440,
> +	/* doorbell and interrupt */
> +	AMD_DBFM_OFFSET		= 0x450,
> +	AMD_DBREQ_OFFSET	= 0x454,
> +	AMD_MIRRDBSTAT_OFFSET	= 0x458,
> +	AMD_DBMASK_OFFSET	= 0x45C,
> +	AMD_DBSTAT_OFFSET	= 0x460,
> +	AMD_INTMASK_OFFSET	= 0x470,
> +	AMD_INTSTAT_OFFSET	= 0x474,
> +
> +	/* event type */
> +	AMD_PEER_FLUSH_EVENT	= BIT(0),
> +	AMD_PEER_RESET_EVENT	= BIT(1),
> +	AMD_PEER_D3_EVENT	= BIT(2),
> +	AMD_PEER_PMETO_EVENT	= BIT(3),
> +	AMD_PEER_D0_EVENT	= BIT(4),
> +	AMD_EVENT_INTMASK	= (AMD_PEER_FLUSH_EVENT |
> +				AMD_PEER_RESET_EVENT | AMD_PEER_D3_EVENT |
> +				AMD_PEER_PMETO_EVENT | AMD_PEER_D0_EVENT),
> +
> +	AMD_PMESTAT_OFFSET	= 0x480,
> +	AMD_PMSGTRIG_OFFSET	= 0x490,
> +	AMD_LTRLATENCY_OFFSET	= 0x494,
> +	AMD_FLUSHTRIG_OFFSET	= 0x498,
> +
> +	/* SMU register*/
> +	AMD_SMUACK_OFFSET	= 0x4A0,
> +	AMD_SINRST_OFFSET	= 0x4A4,
> +	AMD_RSPNUM_OFFSET	= 0x4A8,
> +	AMD_SMU_SPADMUTEX	= 0x4B0,
> +	AMD_SMU_SPADOFFSET	= 0x4B4,
> +
> +	AMD_PEER_OFFSET		= 0x400,
> +};

> +struct amd_ntb_dev {
...
> +};
> +
> +#define ndev_pdev(ndev) ((ndev)->ntb.pdev)
> +#define ndev_name(ndev) pci_name(ndev_pdev(ndev))
> +#define ndev_dev(ndev) (&ndev_pdev(ndev)->dev)
> +#define ntb_ndev(__ntb) container_of(__ntb, struct amd_ntb_dev, ntb)
> +#define hb_ndev(__work) container_of(__work, struct amd_ntb_dev,
> hb_timer.work)

Jon: I think these macros are good, and I like that they are here next to the struct amd_ntb_dev.

> +static int amd_ntb_mw_count(struct ntb_dev *ntb);
> +static int amd_ntb_mw_get_range(struct ntb_dev *ntb, int idx,
> +			phys_addr_t *base, resource_size_t *size,
> +			resource_size_t *align, resource_size_t *algin_size);
> +static int amd_ntb_mw_set_trans(struct ntb_dev *ndev, int idx,
> +				dma_addr_t addr, resource_size_t size);
> +static int amd_ntb_link_is_up(struct ntb_dev *ntb,
> +				enum ntb_speed *speed,
> +				enum ntb_width *width);
> +static int amd_ntb_link_enable(struct ntb_dev *ntb,
> +				enum ntb_speed speed,
> +				enum ntb_width width);
> +static int amd_ntb_link_disable(struct ntb_dev *ntb);
> +static u64 amd_ntb_db_valid_mask(struct ntb_dev *ntb);
> +static int amd_ntb_db_vector_count(struct ntb_dev *ntb);
> +static u64 amd_ntb_db_vector_mask(struct ntb_dev *ntb, int db_vector);
> +static u64 amd_ntb_db_read(struct ntb_dev *ntb);
> +static int amd_ntb_db_clear(struct ntb_dev *ntb, u64 db_bits);
> +static int amd_ntb_db_set_mask(struct ntb_dev *ntb, u64 db_bits);
> +static int amd_ntb_db_clear_mask(struct ntb_dev *ntb, u64 db_bits);
> +static int amd_ntb_peer_db_addr(struct ntb_dev *ntb,
> +				phys_addr_t *db_addr,
> +				resource_size_t *db_size);
> +static int amd_ntb_peer_db_set(struct ntb_dev *ntb, u64 db_bits);
> +static int amd_ntb_spad_count(struct ntb_dev *ntb);
> +static u32 amd_ntb_spad_read(struct ntb_dev *ntb, int idx);
> +static int amd_ntb_spad_write(struct ntb_dev *ntb, int idx, u32 val);
> +static int amd_ntb_peer_spad_addr(struct ntb_dev *ntb, int idx,
> +					phys_addr_t *spad_addr);
> +static u32 amd_ntb_peer_spad_read(struct ntb_dev *ntb, int idx);
> +static int amd_ntb_peer_spad_write(struct ntb_dev *ntb, int idx, u32
> val);

It's unusual, sometimes dangerous, to use the static keyword in a .h file (unless static inline, or things that you really intend to be static in multiple compilation unit).  This .h is only included by a single .c file (effectively, this file is part of ntb_hw_amd.c), and these are only declarations, not function definitions, but it makes me nervous.  Do you /need/ these functions forward declared?  Could you rearrange your .c file so you don't need these forward declared?  If you need them, could you put the declarations in the .c file?

Allen

^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: [PATCH V3 1/2] NTB: Add AMD PCI-Express NTB driver
  2016-01-14 15:56 ` Allen Hubbe
@ 2016-01-15  7:37   ` Yu, Xiangliang
  2016-01-15 17:13     ` Allen Hubbe
  0 siblings, 1 reply; 9+ messages in thread
From: Yu, Xiangliang @ 2016-01-15  7:37 UTC (permalink / raw)
  To: Allen Hubbe, jdmason@kudzu.us, dave.jiang@intel.com,
	linux-ntb@googlegroups.com, linux-kernel@vger.kernel.org
  Cc: SPG_Linux_Kernel

> > AMD NTB support following main features:
> > (1) Three memory windows;
> > (2) Sixteen 32-bit scratch pad;
> > (3) Two 16-bit doorbell interrupt;
> 
> This is interesting.  Let's submit your doorbell implementation as is, but I
> wonder if "multiple doorbell registers" is something we should expose in the
> ntb.h api.  I wonder how we should reconcile that with "vectors", unless they
> are equivalent.  If we see a device with more than 64 doorbell bits total, the
> ntb.h api will be inadequate.

Sorry, let me clarify it: one 16-bit doorbell is for primary side and the other is be used
By secondary side. The #3 should be is that two 16-bit doorbell registers.

> > (4) Five event interrupts;
> > (5) One system can wake up opposite system of NTB;
> 
> You could say: power management support will be added in a following patch.

In here, i list all features AMD NTB support, not patch. Please see my word again.

> > (6) Flush previous request to the opposite system;
> 
> ...in a following patch.

Ditto.

> > (7) There are reset and PME_TO mechanisms between two systems;
> 
> How is this different from (5 or 6)?  What is PME_TO?
I don't know what you mean for the different?
#5, #6 and #7 is hardware design:
#5: one side can wake up the opposite if the opposite is sleep;
#6 : one side flush the pending request to opposite side;
#7: if one side has reset or power off event, the opposite side can get the information
>From the reset and PME_TO mechanism and execute some handler to avoid data lost.

PME_TO is PCIe message of power turn off. 

> >
> > Signed-off-by: Xiangliang Yu <Xiangliang.Yu@amd.com>
> 
> 
> Overall, this driver is looking much better.  Thanks for your efforts!
> 
> With the -rc1 window likely closing this weekend, let's see what others have
> to say about this patch v3, or getting a v4 this week.
> There are a few minor requests below.  They are nothing critical.
> 
> > +static void amd_ack_SMU(struct amd_ntb_dev *ndev, u32 bit) {
> > +	void __iomem *mmio = ndev->self_mmio;
> > +	int reg;
> > +
> > +	reg = readl(mmio + AMD_SMUACK_OFFSET);
> > +	reg |= bit;
> > +	writel(reg, mmio + AMD_SMUACK_OFFSET);
> > +
> > +	ndev->peer_sta |= bit;
> > +}
> 
> What is SMU?

System management unit, please reference AMD manual.

> Does this have to do with power management, or flush?  Can you say why it
> should be in this patch, and not one of the other ones?

This is AMD NTB hardware rules, I just following hardware design. One side can 
convey the information of event changes to the other side by SMU ACK register. 

> 
> Speculation: Is it something like, if the peer driver has the flush patch, and
> this side is not patched, this side driver still needs to respond (ack) for the
> peer flush to complete?  In other words, the flush is not fully implemented in
> hw?  I'm just guessing, so I would like to hear your explanation.
> 

I don't know what to say for your speculation, why not is software bug?
I don’t think any hardware can work fine in this case.

> > +static void amd_handle_event(struct amd_ntb_dev *ndev, int vec) {
> > +	void __iomem *mmio = ndev->self_mmio;
> > +	u32 status;
> > +
> > +	status = readl(mmio + AMD_INTSTAT_OFFSET);
> > +	if (!(status & AMD_EVENT_INTMASK))
> > +		return;
> > +
> > +	dev_dbg(ndev_dev(ndev), "status = 0x%x and vec = %d\n", status,
> > vec);
> > +
> > +	status &= AMD_EVENT_INTMASK;
> > +	switch (status) {
> > +	case AMD_PEER_FLUSH_EVENT:
> > +		dev_info(ndev_dev(ndev), "Flush is done.\n");
> > +		break;
> > +	case AMD_PEER_RESET_EVENT:
> > +		amd_ack_SMU(ndev, AMD_PEER_RESET_EVENT);
> > +
> > +		/* link down first */
> > +		ntb_link_event(&ndev->ntb);
> > +		/* polling peer status */
> > +		schedule_delayed_work(&ndev->hb_timer,
> AMD_LINK_HB_TIMEOUT);
> > +
> > +		break;
> > +	case AMD_PEER_D3_EVENT:
> > +	case AMD_PEER_PMETO_EVENT:
> > +		amd_ack_SMU(ndev, status);
> 
> Here is ack SMU called.

Yes, follow hardware design.

> > +
> > +		/* link down */
> > +		ntb_link_event(&ndev->ntb);
> > +
> > +		break;
> > +	case AMD_PEER_D0_EVENT:
> > +		mmio = ndev->peer_mmio;
> > +		status = readl(mmio + AMD_PMESTAT_OFFSET);
> > +		/* check if this is WAKEUP event */
> > +		if (status & 0x1)
> > +			dev_info(ndev_dev(ndev), "Wakeup is done.\n");
> > +
> > +		amd_ack_SMU(ndev, AMD_PEER_D0_EVENT);
> > +
> > +		if (amd_link_is_up(ndev))
> > +			ntb_link_event(&ndev->ntb);
> > +		else
> > +			schedule_delayed_work(&ndev->hb_timer,
> > +						AMD_LINK_HB_TIMEOUT);
> > +		break;
> > +	default:
> > +		dev_info(ndev_dev(ndev), "event status = 0x%x.\n", status);
> > +		break;
> > +	}
> > +}
> 
> > +enum {
> 
> I would prefer to see #define, but this works... There are good arguments to
> be made for compiler constants vs the preprocessor.  My preference just
> comes from what I've observed about how other drivers are written, and
> #define for this stuff seems to be the convention.

I have lots of SATA/Block layer experience, this following AHCI coding style.
Compiler will check enum variable, not for macro. 
Actually, there are lots of similar definition in kernel, such as block layer code.

> > +	/* AMD NTB Capability */
> > +	AMD_MW_CNT		= 3,
> > +	AMD_DB_CNT		= 16,
> > +	AMD_MSIX_VECTOR_CNT	= 24,
> > +	AMD_SPADS_CNT		= 16,
> > +
> > +	/*  AMD NTB register offset */
> > +	AMD_CNTL_OFFSET		= 0x200,
> > +
> > +	/* NTB control register bits */
> > +	PMM_REG_CTL		= BIT(21),
> > +	SMM_REG_CTL		= BIT(20),
> > +	SMM_REG_ACC_PATH	= BIT(18),
> > +	PMM_REG_ACC_PATH	= BIT(17),
> > +	NTB_CLK_EN		= BIT(16),
> > +
> > +	AMD_STA_OFFSET		= 0x204,
> > +	AMD_PGSLV_OFFSET	= 0x208,
> > +	AMD_SPAD_MUX_OFFSET	= 0x20C,
> > +	AMD_SPAD_OFFSET		= 0x210,
> > +	AMD_RSMU_HCID		= 0x250,
> > +	AMD_RSMU_SIID		= 0x254,
> > +	AMD_PSION_OFFSET	= 0x300,
> > +	AMD_SSION_OFFSET	= 0x330,
> > +	AMD_MMINDEX_OFFSET	= 0x400,
> > +	AMD_MMDATA_OFFSET	= 0x404,
> > +	AMD_SIDEINFO_OFFSET	= 0x408,
> > +
> > +	AMD_SIDE_MASK		= BIT(0),
> > +
> > +	/* limit register */
> > +	AMD_ROMBARLMT_OFFSET	= 0x410,
> > +	AMD_BAR1LMT_OFFSET	= 0x414,
> > +	AMD_BAR23LMT_OFFSET	= 0x418,
> > +	AMD_BAR45LMT_OFFSET	= 0x420,
> > +	/* xlat address */
> > +	AMD_POMBARXLAT_OFFSET	= 0x428,
> > +	AMD_BAR1XLAT_OFFSET	= 0x430,
> > +	AMD_BAR23XLAT_OFFSET	= 0x438,
> > +	AMD_BAR45XLAT_OFFSET	= 0x440,
> > +	/* doorbell and interrupt */
> > +	AMD_DBFM_OFFSET		= 0x450,
> > +	AMD_DBREQ_OFFSET	= 0x454,
> > +	AMD_MIRRDBSTAT_OFFSET	= 0x458,
> > +	AMD_DBMASK_OFFSET	= 0x45C,
> > +	AMD_DBSTAT_OFFSET	= 0x460,
> > +	AMD_INTMASK_OFFSET	= 0x470,
> > +	AMD_INTSTAT_OFFSET	= 0x474,
> > +
> > +	/* event type */
> > +	AMD_PEER_FLUSH_EVENT	= BIT(0),
> > +	AMD_PEER_RESET_EVENT	= BIT(1),
> > +	AMD_PEER_D3_EVENT	= BIT(2),
> > +	AMD_PEER_PMETO_EVENT	= BIT(3),
> > +	AMD_PEER_D0_EVENT	= BIT(4),
> > +	AMD_EVENT_INTMASK	= (AMD_PEER_FLUSH_EVENT |
> > +				AMD_PEER_RESET_EVENT |
> AMD_PEER_D3_EVENT |
> > +				AMD_PEER_PMETO_EVENT |
> AMD_PEER_D0_EVENT),
> > +
> > +	AMD_PMESTAT_OFFSET	= 0x480,
> > +	AMD_PMSGTRIG_OFFSET	= 0x490,
> > +	AMD_LTRLATENCY_OFFSET	= 0x494,
> > +	AMD_FLUSHTRIG_OFFSET	= 0x498,
> > +
> > +	/* SMU register*/
> > +	AMD_SMUACK_OFFSET	= 0x4A0,
> > +	AMD_SINRST_OFFSET	= 0x4A4,
> > +	AMD_RSPNUM_OFFSET	= 0x4A8,
> > +	AMD_SMU_SPADMUTEX	= 0x4B0,
> > +	AMD_SMU_SPADOFFSET	= 0x4B4,
> > +
> > +	AMD_PEER_OFFSET		= 0x400,
> > +};
> 
> > +struct amd_ntb_dev {
> ...
> > +};
> > +
> > +#define ndev_pdev(ndev) ((ndev)->ntb.pdev) #define ndev_name(ndev)
> > +pci_name(ndev_pdev(ndev)) #define ndev_dev(ndev)
> > +(&ndev_pdev(ndev)->dev) #define ntb_ndev(__ntb)
> container_of(__ntb,
> > +struct amd_ntb_dev, ntb) #define hb_ndev(__work)
> container_of(__work,
> > +struct amd_ntb_dev,
> > hb_timer.work)
> 
> Jon: I think these macros are good, and I like that they are here next to the
> struct amd_ntb_dev.
> 
> > +static int amd_ntb_mw_count(struct ntb_dev *ntb); static int
> > +amd_ntb_mw_get_range(struct ntb_dev *ntb, int idx,
> > +			phys_addr_t *base, resource_size_t *size,
> > +			resource_size_t *align, resource_size_t *algin_size);
> static int
> > +amd_ntb_mw_set_trans(struct ntb_dev *ndev, int idx,
> > +				dma_addr_t addr, resource_size_t size);
> static int
> > +amd_ntb_link_is_up(struct ntb_dev *ntb,
> > +				enum ntb_speed *speed,
> > +				enum ntb_width *width);
> > +static int amd_ntb_link_enable(struct ntb_dev *ntb,
> > +				enum ntb_speed speed,
> > +				enum ntb_width width);
> > +static int amd_ntb_link_disable(struct ntb_dev *ntb); static u64
> > +amd_ntb_db_valid_mask(struct ntb_dev *ntb); static int
> > +amd_ntb_db_vector_count(struct ntb_dev *ntb); static u64
> > +amd_ntb_db_vector_mask(struct ntb_dev *ntb, int db_vector); static
> > +u64 amd_ntb_db_read(struct ntb_dev *ntb); static int
> > +amd_ntb_db_clear(struct ntb_dev *ntb, u64 db_bits); static int
> > +amd_ntb_db_set_mask(struct ntb_dev *ntb, u64 db_bits); static int
> > +amd_ntb_db_clear_mask(struct ntb_dev *ntb, u64 db_bits); static int
> > +amd_ntb_peer_db_addr(struct ntb_dev *ntb,
> > +				phys_addr_t *db_addr,
> > +				resource_size_t *db_size);
> > +static int amd_ntb_peer_db_set(struct ntb_dev *ntb, u64 db_bits);
> > +static int amd_ntb_spad_count(struct ntb_dev *ntb); static u32
> > +amd_ntb_spad_read(struct ntb_dev *ntb, int idx); static int
> > +amd_ntb_spad_write(struct ntb_dev *ntb, int idx, u32 val); static int
> > +amd_ntb_peer_spad_addr(struct ntb_dev *ntb, int idx,
> > +					phys_addr_t *spad_addr);
> > +static u32 amd_ntb_peer_spad_read(struct ntb_dev *ntb, int idx);
> > +static int amd_ntb_peer_spad_write(struct ntb_dev *ntb, int idx, u32
> > val);
> 
> It's unusual, sometimes dangerous, to use the static keyword in a .h file
> (unless static inline, or things that you really intend to be static in multiple
> compilation unit).  This .h is only included by a single .c file (effectively, this
> file is part of ntb_hw_amd.c), and these are only declarations, not function
> definitions, but it makes me nervous.  Do you /need/ these functions
> forward declared?  Could you rearrange your .c file so you don't need these
> forward declared?  If you need them, could you put the declarations in the .c
> file?

Actually, I put it in .c file in first version, I think it make code clear if moving it to
.h file. Yes, I can remove all these definitions by rearranging .c file.

And please speak out all your concern in this time, you know, I had spent lots of time
 On these patches.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: [PATCH V3 1/2] NTB: Add AMD PCI-Express NTB driver
  2016-01-15  7:37   ` Yu, Xiangliang
@ 2016-01-15 17:13     ` Allen Hubbe
  2016-01-18  6:47       ` Yu, Xiangliang
  0 siblings, 1 reply; 9+ messages in thread
From: Allen Hubbe @ 2016-01-15 17:13 UTC (permalink / raw)
  To: 'Yu, Xiangliang', jdmason, dave.jiang, linux-ntb,
	linux-kernel
  Cc: 'SPG_Linux_Kernel'

> From: Yu, Xiangliang <Xiangliang.Yu@amd.com>
> 
> Sorry, let me clarify it: one 16-bit doorbell is for primary side and
> the other is be used
> By secondary side. The #3 should be is that two 16-bit doorbell
> registers.

Thanks!  I think I understand now.

> In here, i list all features AMD NTB support, not patch. Please see my
> word again.

This is the commit message for this patch.  It would be confusing to have features mentioned in the commit message not implemented, and no explanation why not.

> > What is SMU?
> 
> System management unit, please reference AMD manual.

Thanks, but it is your responsibility to support this driver, and respond to questions like this.  I didn't write it, I can't test it, and I don't have a hardware manual.

If this is an ack for the system management unit, is it acknowledging a watchdog on the SMU?  Does this ack have to do with the flush or power management functions?  If it is a watchdog, what implications are there for unloading your driver - does the watchdog just time out, and then what, reset the device?  I'm speculating (guessing) again, but that's really the best I can do without your authoritative answers, or a hardware manual.

Where can the hardware manual be found?  Has it been published, or is it AMD confidential?

http://search.amd.com/en-us/Pages/results-all.aspx#k=zeppelin
http://search.amd.com/en-us/Pages/results-all.aspx#k=ntb

"Nothing here matches your search"

> > I would prefer to see #define, but this works... There are good
> arguments to
> > be made for compiler constants vs the preprocessor.  My preference
> just
> > comes from what I've observed about how other drivers are written, and
> > #define for this stuff seems to be the convention.
> 
> I have lots of SATA/Block layer experience, this following AHCI coding
> style.
> Compiler will check enum variable, not for macro.
> Actually, there are lots of similar definition in kernel, such as block
> layer code.

Thanks.  I'm ok with this.

> > It's unusual, sometimes dangerous, to use the static keyword in a .h
> file
> 
> Actually, I put it in .c file in first version, I think it make code
> clear if moving it to
> .h file. Yes, I can remove all these definitions by rearranging .c file.
> 
> And please speak out all your concern in this time, you know, I had
> spent lots of time
>  On these patches.

You're right.  I didn't notice this change from v1 to v2.  It would have been a more efficient code review had I made these comments in v2.

My comments this time around, as I said, were minor.  Each version of this patch series has been an improvement.  Maybe this version is even good enough, but there are /always/ comments to be made about code.  It's not unusual to see patches on the kernel mailing list with double digit reroll counts (I saw one at v12 today).  Sure, it can be frustrating.   Please don't take offense.  Thank you for your efforts.

The bad news:  I have two more comments.

Please run scritps/checkpatch.pl --strict with your patch.  There are some formatting issues.

Please add your name to the MAINTANERS file.  Add a section, NTB AMD DRIVER.  The checkpatch script also mentions this, but it's especially important.  You are the only one who can test this driver, with the simulator.

The good news:

I'm inclined to say yes to this patch.  It looks ok, and I'll take your word that it has been tested.  Ultimately, the patch has to be accepted by Jon and Linus.  By adding my comments, I'm just trying to help.

Do you think you can email v4 by tomorrow?  I'll be sure to respond quickly if you do, so it has the best chance to meet the deadline for the merge window.  It's getting close to the deadline, and I'll take some of the blame for that.

Allen

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH V3 1/2] NTB: Add AMD PCI-Express NTB driver
  2016-01-14 11:44 [PATCH V3 1/2] NTB: Add AMD PCI-Express NTB driver Xiangliang Yu
  2016-01-14 15:56 ` Allen Hubbe
@ 2016-01-18  4:04 ` Jon Mason
  2016-01-18  9:42   ` Yu, Xiangliang
  1 sibling, 1 reply; 9+ messages in thread
From: Jon Mason @ 2016-01-18  4:04 UTC (permalink / raw)
  To: Xiangliang Yu
  Cc: dave.jiang, Allen.Hubbe, linux-ntb, linux-kernel,
	SPG_Linux_Kernel

On Thu, Jan 14, 2016 at 07:44:07PM +0800, Xiangliang Yu wrote:
> AMD NTB support following main features:
> (1) Three memory windows;
> (2) Sixteen 32-bit scratch pad;
> (3) Two 16-bit doorbell interrupt;
> (4) Five event interrupts;
> (5) One system can wake up opposite system of NTB;
> (6) Flush previous request to the opposite system;
> (7) There are reset and PME_TO mechanisms between two systems;

Please create a much more verbose description of the patch.  This is
not acceptable in it's current form, and I'm sure I could get flamed
by Linus if he saw it in my pull request.  Even writing this out as a
paragaph instead of a numbered list would be better.

> 
> Signed-off-by: Xiangliang Yu <Xiangliang.Yu@amd.com>
> ---
>  drivers/ntb/hw/Kconfig          |    1 +
>  drivers/ntb/hw/Makefile         |    1 +
>  drivers/ntb/hw/amd/Kconfig      |    7 +
>  drivers/ntb/hw/amd/Makefile     |    1 +
>  drivers/ntb/hw/amd/ntb_hw_amd.c | 1148 +++++++++++++++++++++++++++++++++++++++
>  drivers/ntb/hw/amd/ntb_hw_amd.h |  246 +++++++++
>  6 files changed, 1404 insertions(+)
>  create mode 100644 drivers/ntb/hw/amd/Kconfig
>  create mode 100644 drivers/ntb/hw/amd/Makefile
>  create mode 100644 drivers/ntb/hw/amd/ntb_hw_amd.c
>  create mode 100644 drivers/ntb/hw/amd/ntb_hw_amd.h
> 
> diff --git a/drivers/ntb/hw/Kconfig b/drivers/ntb/hw/Kconfig
> index 4d5535c..0c5c2a6 100644
> --- a/drivers/ntb/hw/Kconfig
> +++ b/drivers/ntb/hw/Kconfig
> @@ -1 +1,2 @@
>  source "drivers/ntb/hw/intel/Kconfig"
> +source "drivers/ntb/hw/amd/Kconfig"

Being nit picky, but please make it alphabetical and put amd before
intel.  

> diff --git a/drivers/ntb/hw/Makefile b/drivers/ntb/hw/Makefile
> index 175d7c9..636be7d 100644
> --- a/drivers/ntb/hw/Makefile
> +++ b/drivers/ntb/hw/Makefile
> @@ -1 +1,2 @@
>  obj-$(CONFIG_NTB_INTEL)	+= intel/
> +obj-$(CONFIG_NTB_AMD)	+= amd/

Same nit, make it alphabetical.

> diff --git a/drivers/ntb/hw/amd/Kconfig b/drivers/ntb/hw/amd/Kconfig
> new file mode 100644
> index 0000000..cfe903c
> --- /dev/null
> +++ b/drivers/ntb/hw/amd/Kconfig
> @@ -0,0 +1,7 @@
> +config NTB_AMD
> +	tristate "AMD Non-Transparent Bridge support"
> +	depends on X86_64
> +	help
> +	 This driver supports AMD NTB on capable Zeppelin hardware.
> +
> +	 If unsure, say N.
> diff --git a/drivers/ntb/hw/amd/Makefile b/drivers/ntb/hw/amd/Makefile
> new file mode 100644
> index 0000000..ad54da9
> --- /dev/null
> +++ b/drivers/ntb/hw/amd/Makefile
> @@ -0,0 +1 @@
> +obj-$(CONFIG_NTB_AMD) += ntb_hw_amd.o
> diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.c b/drivers/ntb/hw/amd/ntb_hw_amd.c
> new file mode 100644
> index 0000000..8df6d7b
> --- /dev/null
> +++ b/drivers/ntb/hw/amd/ntb_hw_amd.c
> @@ -0,0 +1,1148 @@
> +/*
> + * This file is provided under a dual BSD/GPLv2 license.  When using or
> + *   redistributing this file, you may do so under either license.
> + *
> + *   GPL LICENSE SUMMARY
> + *
> + *   Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
> + *
> + *   This program is free software; you can redistribute it and/or modify
> + *   it under the terms of version 2 of the GNU General Public License as
> + *   published by the Free Software Foundation.
> + *
> + *   BSD LICENSE
> + *
> + *   Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copy
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of AMD Corporation nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + *
> + * AMD PCIe NTB Linux driver
> + *
> + * Contact Information:
> + * Xiangliang Yu<Xiangliang.Yu@amd.com>

Nit, space needed between 'u' and '<'

> + */
> +
> +#include <linux/debugfs.h>
> +#include <linux/delay.h>
> +#include <linux/init.h>
> +#include <linux/interrupt.h>
> +#include <linux/module.h>
> +#include <linux/acpi.h>
> +#include <linux/pci.h>
> +#include <linux/random.h>
> +#include <linux/slab.h>
> +#include <linux/ntb.h>
> +
> +#include "ntb_hw_amd.h"
> +
> +#define NTB_NAME	"ntb_hw_amd"
> +#define NTB_DESC	"AMD(R) PCI-E Non-Transparent Bridge Driver"
> +#define NTB_VER		"1.0"
> +
> +MODULE_DESCRIPTION(NTB_DESC);
> +MODULE_VERSION(NTB_VER);
> +MODULE_LICENSE("Dual BSD/GPL");
> +MODULE_AUTHOR("AMD Inc.");
> +
> +static const struct file_operations amd_ntb_debugfs_info;
> +static struct dentry *debugfs_dir;
> +
> +static const struct ntb_dev_ops amd_ntb_ops = {
> +	.mw_count		= amd_ntb_mw_count,
> +	.mw_get_range		= amd_ntb_mw_get_range,
> +	.mw_set_trans		= amd_ntb_mw_set_trans,
> +	.link_is_up		= amd_ntb_link_is_up,
> +	.link_enable		= amd_ntb_link_enable,
> +	.link_disable		= amd_ntb_link_disable,
> +	.db_valid_mask		= amd_ntb_db_valid_mask,
> +	.db_vector_count	= amd_ntb_db_vector_count,
> +	.db_vector_mask		= amd_ntb_db_vector_mask,
> +	.db_read		= amd_ntb_db_read,
> +	.db_clear		= amd_ntb_db_clear,
> +	.db_set_mask		= amd_ntb_db_set_mask,
> +	.db_clear_mask		= amd_ntb_db_clear_mask,
> +	.peer_db_addr		= amd_ntb_peer_db_addr,
> +	.peer_db_set		= amd_ntb_peer_db_set,
> +	.spad_count		= amd_ntb_spad_count,
> +	.spad_read		= amd_ntb_spad_read,
> +	.spad_write		= amd_ntb_spad_write,
> +	.peer_spad_addr		= amd_ntb_peer_spad_addr,
> +	.peer_spad_read		= amd_ntb_peer_spad_read,
> +	.peer_spad_write	= amd_ntb_peer_spad_write,
> +};
> +
> +static int ndev_mw_to_bar(struct amd_ntb_dev *ndev, int idx)
> +{
> +	if (idx < 0 || idx > ndev->mw_count)
> +		return -EINVAL;
> +
> +	return 1 << idx;
> +}
> +
> +static int amd_ntb_mw_count(struct ntb_dev *ntb)
> +{
> +	return ntb_ndev(ntb)->mw_count;
> +}
> +
> +static int amd_ntb_mw_get_range(struct ntb_dev *ntb, int idx,
> +				phys_addr_t *base,
> +				resource_size_t *size,
> +				resource_size_t *align,
> +				resource_size_t *align_size)
> +{
> +	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> +	int bar;
> +
> +	bar = ndev_mw_to_bar(ndev, idx);
> +	if (bar < 0)
> +		return bar;
> +
> +	if (base)
> +		*base = pci_resource_start(ndev->ntb.pdev, bar);
> +
> +	if (size)
> +		*size = pci_resource_len(ndev->ntb.pdev, bar);
> +
> +	if (align)
> +		*align = 4096;

Please use SZ_4K from sizes.h

> +
> +	if (align_size)
> +		*align_size = 1;
> +
> +	return 0;
> +}
> +
> +static int amd_ntb_mw_set_trans(struct ntb_dev *ntb, int idx,
> +				dma_addr_t addr, resource_size_t size)
> +{
> +	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> +	unsigned long xlat_reg, limit_reg = 0;
> +	resource_size_t mw_size;
> +	void __iomem *mmio, *peer_mmio;
> +	u64 base_addr, limit, reg_val;
> +	int bar;
> +
> +	bar = ndev_mw_to_bar(ndev, idx);
> +	if (bar < 0)
> +		return bar;
> +
> +	mw_size = pci_resource_len(ndev->ntb.pdev, bar);
> +
> +	/* make sure the range fits in the usable mw size */
> +	if (size > mw_size)
> +		return -EINVAL;
> +
> +	mmio = ndev->self_mmio;
> +	peer_mmio = ndev->peer_mmio;
> +
> +	base_addr = pci_resource_start(ndev->ntb.pdev, bar);
> +	if (bar == 1) {
> +		xlat_reg = AMD_BAR1XLAT_OFFSET;
> +		limit_reg = AMD_BAR1LMT_OFFSET;
> +	} else {
> +		xlat_reg = AMD_BAR23XLAT_OFFSET + ((bar - 2) << 3);
> +		limit_reg = AMD_BAR23LMT_OFFSET + ((bar - 2) << 3);
> +	}
> +
> +	if (bar != 1) {
> +		/* Set the limit if supported */
> +		limit = base_addr + size;
> +
> +		/* set and verify setting the translation address */
> +		iowrite64(addr, peer_mmio + xlat_reg);
> +		reg_val = ioread64(peer_mmio + xlat_reg);
> +		if (reg_val != addr) {
> +			iowrite64(0, peer_mmio + xlat_reg);
> +			return -EIO;
> +		}
> +
> +		/* set and verify setting the limit */
> +		iowrite64(limit, mmio + limit_reg);
> +		reg_val = ioread64(mmio + limit_reg);
> +		if (reg_val != limit) {
> +			iowrite64(base_addr, mmio + limit_reg);
> +			iowrite64(0, peer_mmio + xlat_reg);
> +			return -EIO;
> +		}
> +	} else {
> +		/* split bar addr range must all be 32 bit */
> +		if (addr & (~0ull << 32))
> +			return -EINVAL;
> +		if ((addr + size) & (~0ull << 32))
> +			return -EINVAL;
> +
> +		/* Set the limit if supported */
> +		limit = base_addr + size;
> +
> +		/* set and verify setting the translation address */
> +		iowrite64(addr, peer_mmio + xlat_reg);
> +		reg_val = ioread64(peer_mmio + xlat_reg);
> +		if (reg_val != addr) {
> +			iowrite64(0, peer_mmio + xlat_reg);
> +			return -EIO;
> +		}
> +
> +		/* set and verify setting the limit */
> +		writel(limit, mmio + limit_reg);
> +		reg_val = readl(mmio + limit_reg);
> +		if (reg_val != limit) {
> +			writel(base_addr, mmio + limit_reg);
> +			writel(0, peer_mmio + xlat_reg);
> +			return -EIO;
> +		}
> +	}

Being nit picky here again, but why is it necessary to make 2
consecutive checks for the same 'bar' variable.  Please combine them.

> +
> +	return 0;
> +}
> +
> +static int amd_link_is_up(struct amd_ntb_dev *ndev)
> +{
> +	/* set link down and clear flag */
> +	if (ndev->peer_sta & AMD_PEER_RESET_EVENT) {
> +		ndev->peer_sta &= ~AMD_PEER_RESET_EVENT;
> +		return 0;
> +	} else if (ndev->peer_sta & (AMD_PEER_D3_EVENT |
> +				     AMD_PEER_PMETO_EVENT)) {
> +		return 0;
> +	} else if (ndev->peer_sta & AMD_PEER_D0_EVENT) {
> +		ndev->peer_sta = 0;
> +		return 0;
> +	} else
> +		return NTB_LNK_STA_ACTIVE(ndev->cntl_sta);
> +}
> +
> +static int amd_ntb_link_is_up(struct ntb_dev *ntb,
> +			      enum ntb_speed *speed,
> +			      enum ntb_width *width)
> +{
> +	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> +	int ret = 0;
> +
> +	if (amd_link_is_up(ndev)) {
> +		if (speed)
> +			*speed = NTB_LNK_STA_SPEED(ndev->lnk_sta);
> +		if (width)
> +			*width = NTB_LNK_STA_WIDTH(ndev->lnk_sta);
> +
> +		dev_dbg(ndev_dev(ndev), "link is up.\n");
> +
> +		ret = 1;
> +	} else {
> +		if (speed)
> +			*speed = NTB_SPEED_NONE;
> +		if (width)
> +			*width = NTB_WIDTH_NONE;
> +
> +		dev_dbg(ndev_dev(ndev), "link is down.\n");
> +	}
> +
> +	return ret;
> +}
> +
> +static int amd_ntb_link_enable(struct ntb_dev *ntb,
> +			       enum ntb_speed max_speed,
> +			       enum ntb_width max_width)
> +{
> +	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> +	void __iomem *mmio = ndev->self_mmio;
> +	u32 ntb_ctl;
> +
> +	/* Enable event interrupt */
> +	ndev->int_mask &= ~AMD_EVENT_INTMASK;
> +	writel(ndev->int_mask, mmio + AMD_INTMASK_OFFSET);
> +
> +	if (ndev->ntb.topo == NTB_TOPO_SEC)
> +		return -EINVAL;
> +	dev_dbg(ndev_dev(ndev), "Enabling Link.\n");
> +
> +	ntb_ctl = readl(mmio + AMD_CNTL_OFFSET);
> +	ntb_ctl |= (3 << 20);

This "20" is a bit magical to me.  Please use a #define

> +	writel(ntb_ctl, mmio + AMD_CNTL_OFFSET);
> +
> +	return 0;
> +}
> +
> +static int amd_ntb_link_disable(struct ntb_dev *ntb)
> +{
> +	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> +	void __iomem *mmio = ndev->self_mmio;
> +	u32 ntb_ctl;
> +
> +	/* Disable event interrupt */
> +	ndev->int_mask |= AMD_EVENT_INTMASK;
> +	writel(ndev->int_mask, mmio + AMD_INTMASK_OFFSET);
> +
> +	if (ndev->ntb.topo == NTB_TOPO_SEC)
> +		return -EINVAL;
> +	dev_dbg(ndev_dev(ndev), "Enabling Link.\n");
> +
> +	ntb_ctl = readl(mmio + AMD_CNTL_OFFSET);
> +	ntb_ctl &= ~(3 << 16);

Should this be "20" or "16"?  Seems odd that you would use a different
offset to enable than disable.  Either way, a #define here would be
nice too.

> +	writel(ntb_ctl, mmio + AMD_CNTL_OFFSET);
> +
> +	return 0;
> +}
> +
> +static u64 amd_ntb_db_valid_mask(struct ntb_dev *ntb)
> +{
> +	return ntb_ndev(ntb)->db_valid_mask;
> +}
> +
> +static int amd_ntb_db_vector_count(struct ntb_dev *ntb)
> +{
> +	return ntb_ndev(ntb)->db_count;
> +}
> +
> +static u64 amd_ntb_db_vector_mask(struct ntb_dev *ntb, int db_vector)
> +{
> +	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> +
> +	if (db_vector < 0 || db_vector > ndev->db_count)
> +		return 0;
> +
> +	return ntb_ndev(ntb)->db_valid_mask & (1 << db_vector);
> +}
> +
> +static u64 amd_ntb_db_read(struct ntb_dev *ntb)
> +{
> +	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> +	void __iomem *mmio = ndev->self_mmio;
> +
> +	return (u64)readw(mmio + AMD_DBSTAT_OFFSET);

Is this cast necessary?

> +}
> +
> +static int amd_ntb_db_clear(struct ntb_dev *ntb, u64 db_bits)
> +{
> +	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> +	void __iomem *mmio = ndev->self_mmio;
> +
> +	writew((u16)db_bits, mmio + AMD_DBSTAT_OFFSET);
> +
> +	return 0;
> +}
> +
> +static int amd_ntb_db_set_mask(struct ntb_dev *ntb, u64 db_bits)
> +{
> +	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> +	void __iomem *mmio = ndev->self_mmio;
> +	unsigned long flags;
> +
> +	if (db_bits & ~ndev->db_valid_mask)
> +		return -EINVAL;
> +
> +	spin_lock_irqsave(&ndev->db_mask_lock, flags);
> +	ndev->db_mask |= db_bits;
> +	writew((u16)ndev->db_mask, mmio + AMD_DBMASK_OFFSET);
> +	spin_unlock_irqrestore(&ndev->db_mask_lock, flags);
> +
> +	return 0;
> +}
> +
> +static int amd_ntb_db_clear_mask(struct ntb_dev *ntb, u64 db_bits)
> +{
> +	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> +	void __iomem *mmio = ndev->self_mmio;
> +	unsigned long flags;
> +
> +	if (db_bits & ~ndev->db_valid_mask)
> +		return -EINVAL;
> +
> +	spin_lock_irqsave(&ndev->db_mask_lock, flags);
> +	ndev->db_mask &= ~db_bits;
> +	writew((u16)ndev->db_mask, mmio + AMD_DBMASK_OFFSET);
> +	spin_unlock_irqrestore(&ndev->db_mask_lock, flags);
> +
> +	return 0;
> +}
> +
> +static int amd_ntb_peer_db_addr(struct ntb_dev *ntb,
> +		phys_addr_t *db_addr,
> +		resource_size_t *db_size)

Nit, this doesn't seem like it would be aligned properly.

> +{
> +	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> +
> +	if (db_addr)
> +		*db_addr = (phys_addr_t)(ndev->peer_mmio + AMD_DBREQ_OFFSET);
> +	if (db_size)
> +		*db_size = sizeof(u32);
> +
> +	return 0;
> +}
> +
> +static int amd_ntb_peer_db_set(struct ntb_dev *ntb, u64 db_bits)
> +{
> +	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> +	void __iomem *mmio = ndev->self_mmio;
> +
> +	writew((u16)db_bits, mmio + AMD_DBREQ_OFFSET);
> +
> +	return 0;
> +}
> +
> +static int amd_ntb_spad_count(struct ntb_dev *ntb)
> +{
> +	return ntb_ndev(ntb)->spad_count;
> +}
> +
> +static u32 amd_ntb_spad_read(struct ntb_dev *ntb, int idx)
> +{
> +	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> +	void __iomem *mmio = ndev->self_mmio;
> +	u32 offset = 0;

Unnecessary init of this variable

> +
> +	if (idx < 0 || idx >= ndev->spad_count)
> +		return 0;
> +
> +	offset = ndev->self_spad + (idx << 2);
> +	return readl(mmio + AMD_SPAD_OFFSET + offset);
> +}
> +
> +static int amd_ntb_spad_write(struct ntb_dev *ntb,
> +				int idx, u32 val)
> +{
> +	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> +	void __iomem *mmio = ndev->self_mmio;
> +	u32 offset = 0;

Unnecessary init of this variable

> +
> +	if (idx < 0 || idx >= ndev->spad_count)
> +		return -EINVAL;
> +
> +	offset = ndev->self_spad + (idx << 2);
> +	writel(val, mmio + AMD_SPAD_OFFSET + offset);
> +
> +	return 0;
> +}
> +
> +static int amd_ntb_peer_spad_addr(struct ntb_dev *ntb, int idx,
> +				    phys_addr_t *spad_addr)
> +{
> +	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> +
> +	if (idx < 0 || idx >= ndev->spad_count)
> +		return -EINVAL;
> +
> +	if (spad_addr)
> +		*spad_addr = (phys_addr_t)(ndev->self_mmio + AMD_SPAD_OFFSET +
> +				ndev->peer_spad + (idx << 2));

This doesn't seem aligned properly either.

> +	return 0;
> +}
> +
> +static u32 amd_ntb_peer_spad_read(struct ntb_dev *ntb, int idx)
> +{
> +	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> +	void __iomem *mmio = ndev->self_mmio;
> +	u32 offset;
> +
> +	if (idx < 0 || idx >= ndev->spad_count)
> +		return -EINVAL;
> +
> +	offset = ndev->peer_spad + (idx << 2);
> +	return readl(mmio + AMD_SPAD_OFFSET + offset);
> +}
> +
> +static int amd_ntb_peer_spad_write(struct ntb_dev *ntb,
> +				     int idx, u32 val)
> +{
> +	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> +	void __iomem *mmio = ndev->self_mmio;
> +	u32 offset;
> +
> +	if (idx < 0 || idx >= ndev->spad_count)
> +		return -EINVAL;
> +
> +	offset = ndev->peer_spad + (idx << 2);
> +	writel(val, mmio + AMD_SPAD_OFFSET + offset);
> +
> +	return 0;
> +}
> +
> +static void amd_ack_SMU(struct amd_ntb_dev *ndev, u32 bit)

Nit, Please make this SMU lower case.  Also, it is a bit odd to see
SMUACK and ack_smu.  It is not a big deal, but since there has to be
another version to address Allen's comments (and mine), then this
won't add to much additional work to address.

> +{
> +	void __iomem *mmio = ndev->self_mmio;
> +	int reg;
> +
> +	reg = readl(mmio + AMD_SMUACK_OFFSET);
> +	reg |= bit;
> +	writel(reg, mmio + AMD_SMUACK_OFFSET);
> +
> +	ndev->peer_sta |= bit;
> +}
> +
> +static void amd_handle_event(struct amd_ntb_dev *ndev, int vec)
> +{
> +	void __iomem *mmio = ndev->self_mmio;
> +	u32 status;
> +
> +	status = readl(mmio + AMD_INTSTAT_OFFSET);
> +	if (!(status & AMD_EVENT_INTMASK))
> +		return;
> +
> +	dev_dbg(ndev_dev(ndev), "status = 0x%x and vec = %d\n", status, vec);
> +
> +	status &= AMD_EVENT_INTMASK;
> +	switch (status) {
> +	case AMD_PEER_FLUSH_EVENT:
> +		dev_info(ndev_dev(ndev), "Flush is done.\n");
> +		break;
> +	case AMD_PEER_RESET_EVENT:
> +		amd_ack_SMU(ndev, AMD_PEER_RESET_EVENT);
> +
> +		/* link down first */
> +		ntb_link_event(&ndev->ntb);
> +		/* polling peer status */
> +		schedule_delayed_work(&ndev->hb_timer, AMD_LINK_HB_TIMEOUT);
> +
> +		break;
> +	case AMD_PEER_D3_EVENT:
> +	case AMD_PEER_PMETO_EVENT:
> +		amd_ack_SMU(ndev, status);
> +
> +		/* link down */
> +		ntb_link_event(&ndev->ntb);
> +
> +		break;
> +	case AMD_PEER_D0_EVENT:
> +		mmio = ndev->peer_mmio;
> +		status = readl(mmio + AMD_PMESTAT_OFFSET);
> +		/* check if this is WAKEUP event */
> +		if (status & 0x1)
> +			dev_info(ndev_dev(ndev), "Wakeup is done.\n");
> +
> +		amd_ack_SMU(ndev, AMD_PEER_D0_EVENT);
> +
> +		if (amd_link_is_up(ndev))
> +			ntb_link_event(&ndev->ntb);
> +		else
> +			schedule_delayed_work(&ndev->hb_timer,
> +						AMD_LINK_HB_TIMEOUT);
> +		break;
> +	default:
> +		dev_info(ndev_dev(ndev), "event status = 0x%x.\n", status);
> +		break;
> +	}
> +}
> +
> +static irqreturn_t ndev_interrupt(struct amd_ntb_dev *ndev, int vec)
> +{
> +	dev_dbg(ndev_dev(ndev), "vec %d\n", vec);
> +
> +	if (vec > (ndev->msix_vec_count - 1)) {
> +		dev_err(ndev_dev(ndev), "Invalid interrupt.\n");
> +		return IRQ_HANDLED;
> +	}

Unless this actually happens, you might not want the unnecessary
branch in your fast path.

> +
> +	if (vec > 15 || (ndev->msix_vec_count == 1))
> +		amd_handle_event(ndev, vec);
> +
> +	if (vec < 16)

I'd prefer to see a #define here for 16

> +		ntb_db_event(&ndev->ntb, vec);
> +
> +	return IRQ_HANDLED;
> +}
> +
> +static irqreturn_t ndev_vec_isr(int irq, void *dev)
> +{
> +	struct amd_ntb_vec *nvec = dev;
> +
> +	return ndev_interrupt(nvec->ndev, nvec->num);
> +}
> +
> +static irqreturn_t ndev_irq_isr(int irq, void *dev)
> +{
> +	struct amd_ntb_dev *ndev = dev;
> +
> +	return ndev_interrupt(ndev, irq - ndev_pdev(ndev)->irq);
> +}
> +
> +static int ndev_init_isr(struct amd_ntb_dev *ndev,
> +				int msix_min, int msix_max)
> +{
> +	struct pci_dev *pdev;
> +	int rc, i, msix_count, node;
> +
> +	pdev = ndev_pdev(ndev);
> +
> +	node = dev_to_node(&pdev->dev);
> +
> +	ndev->db_mask = ndev->db_valid_mask;
> +
> +	/* Try to set up msix irq */
> +	ndev->vec = kzalloc_node(msix_max * sizeof(*ndev->vec),
> +				 GFP_KERNEL, node);
> +	if (!ndev->vec)
> +		goto err_msix_vec_alloc;
> +
> +	ndev->msix = kzalloc_node(msix_max * sizeof(*ndev->msix),
> +				  GFP_KERNEL, node);
> +	if (!ndev->msix)
> +		goto err_msix_alloc;
> +
> +	for (i = 0; i < msix_max; ++i)
> +		ndev->msix[i].entry = i;
> +
> +	msix_count = pci_enable_msix_range(pdev, ndev->msix,
> +					   msix_min, msix_max);
> +	if (msix_count < 0)
> +		goto err_msix_enable;
> +
> +	/* Disable MSIX if msix count is less than 16 */

Why?  Is there a HW errata or limitation (if so, write so in this
comment)?  If not, MSIX has many benefits over legacy interrupts, and
it doesn't make sense to walk away from that.

> +	if (msix_count < msix_min) {
> +		pci_disable_msix(pdev);
> +		goto err_msix_enable;
> +	}
> +
> +	for (i = 0; i < msix_count; ++i) {
> +		ndev->vec[i].ndev = ndev;
> +		ndev->vec[i].num = i;
> +		rc = request_irq(ndev->msix[i].vector, ndev_vec_isr, 0,
> +				 "ndev_vec_isr", &ndev->vec[i]);
> +		if (rc)
> +			goto err_msix_request;
> +	}
> +
> +	dev_dbg(ndev_dev(ndev), "Using msix interrupts\n");
> +	ndev->db_count = msix_min;
> +	ndev->msix_vec_count = msix_max;
> +	return 0;
> +
> +err_msix_request:
> +	while (i-- > 0)
> +		free_irq(ndev->msix[i].vector, ndev);
> +	pci_disable_msix(pdev);
> +err_msix_enable:
> +	kfree(ndev->msix);
> +err_msix_alloc:
> +	kfree(ndev->vec);
> +err_msix_vec_alloc:
> +	ndev->msix = NULL;
> +	ndev->vec = NULL;
> +
> +	/* Try to set up msi irq */
> +	rc = pci_enable_msi(pdev);
> +	if (rc)
> +		goto err_msi_enable;
> +
> +	rc = request_irq(pdev->irq, ndev_irq_isr, 0,
> +			 "ndev_irq_isr", ndev);
> +	if (rc)
> +		goto err_msi_request;
> +

I have problems with the msix/msi/intx selection being done this way,
but I see it being done in the Intel driver.  So....I'll fix it there
first.

> +	dev_dbg(ndev_dev(ndev), "Using msi interrupts\n");
> +	ndev->db_count = 1;
> +	ndev->msix_vec_count = 1;
> +	return 0;
> +
> +err_msi_request:
> +	pci_disable_msi(pdev);
> +err_msi_enable:
> +
> +	/* Try to set up intx irq */
> +	pci_intx(pdev, 1);
> +
> +	rc = request_irq(pdev->irq, ndev_irq_isr, IRQF_SHARED,
> +			 "ndev_irq_isr", ndev);
> +	if (rc)
> +		goto err_intx_request;
> +
> +	dev_dbg(ndev_dev(ndev), "Using intx interrupts\n");
> +	ndev->db_count = 1;
> +	ndev->msix_vec_count = 1;
> +	return 0;
> +
> +err_intx_request:
> +	return rc;
> +}
> +
> +static void ndev_deinit_isr(struct amd_ntb_dev *ndev)
> +{
> +	struct pci_dev *pdev;
> +	void __iomem *mmio = ndev->self_mmio;
> +	int i;
> +
> +	pdev = ndev_pdev(ndev);
> +
> +	/* Mask all doorbell interrupts */
> +	ndev->db_mask = ndev->db_valid_mask;
> +	writel(ndev->db_mask, mmio + AMD_DBMASK_OFFSET);
> +
> +	if (ndev->msix) {
> +		i = ndev->msix_vec_count;
> +		while (i--)
> +			free_irq(ndev->msix[i].vector, &ndev->vec[i]);
> +		pci_disable_msix(pdev);
> +		kfree(ndev->msix);
> +		kfree(ndev->vec);
> +	} else {
> +		free_irq(pdev->irq, ndev);
> +		if (pci_dev_msi_enabled(pdev))
> +			pci_disable_msi(pdev);
> +		else
> +			pci_intx(pdev, 0);
> +	}
> +}
> +
> +static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf,
> +				 size_t count, loff_t *offp)
> +{
> +	struct amd_ntb_dev *ndev;
> +	void __iomem *mmio;
> +	char *buf;
> +	size_t buf_size;
> +	ssize_t ret, off;
> +	union { u64 v64; u32 v32; u16 v16; } u;
> +
> +	ndev = filp->private_data;
> +	mmio = ndev->self_mmio;
> +
> +	buf_size = min(count, 0x800ul);
> +
> +	buf = kmalloc(buf_size, GFP_KERNEL);
> +	if (!buf)
> +		return -ENOMEM;
> +
> +	off = 0;
> +
> +	off += scnprintf(buf + off, buf_size - off,
> +			 "NTB Device Information:\n");
> +
> +	off += scnprintf(buf + off, buf_size - off,
> +			 "Connection Topology -\t%s\n",
> +			 ntb_topo_string(ndev->ntb.topo));
> +
> +	off += scnprintf(buf + off, buf_size - off,
> +			 "LNK STA -\t\t%#06x\n", ndev->lnk_sta);
> +
> +	if (!amd_link_is_up(ndev)) {
> +		off += scnprintf(buf + off, buf_size - off,
> +				 "Link Status -\t\tDown\n");
> +	} else {
> +		off += scnprintf(buf + off, buf_size - off,
> +				 "Link Status -\t\tUp\n");
> +		off += scnprintf(buf + off, buf_size - off,
> +				 "Link Speed -\t\tPCI-E Gen %u\n",
> +				 NTB_LNK_STA_SPEED(ndev->lnk_sta));
> +		off += scnprintf(buf + off, buf_size - off,
> +				 "Link Width -\t\tx%u\n",
> +				 NTB_LNK_STA_WIDTH(ndev->lnk_sta));
> +	}
> +
> +	off += scnprintf(buf + off, buf_size - off,
> +			 "Memory Window Count -\t%u\n", ndev->mw_count);
> +	off += scnprintf(buf + off, buf_size - off,
> +			 "Scratchpad Count -\t%u\n", ndev->spad_count);
> +	off += scnprintf(buf + off, buf_size - off,
> +			 "Doorbell Count -\t%u\n", ndev->db_count);
> +	off += scnprintf(buf + off, buf_size - off,
> +			 "MSIX Vector Count -\t%u\n", ndev->msix_vec_count);
> +
> +	off += scnprintf(buf + off, buf_size - off,
> +			 "Doorbell Valid Mask -\t%#llx\n", ndev->db_valid_mask);
> +
> +	u.v32 = readl(ndev->self_mmio + AMD_DBMASK_OFFSET);
> +	off += scnprintf(buf + off, buf_size - off,
> +			 "Doorbell Mask -\t\t\t%#06x\n", u.v32);
> +
> +	u.v32 = readl(mmio + AMD_DBSTAT_OFFSET);
> +	off += scnprintf(buf + off, buf_size - off,
> +			 "Doorbell Bell -\t\t\t%#06x\n", u.v32);
> +
> +	off += scnprintf(buf + off, buf_size - off,
> +			 "\nNTB Incoming XLAT:\n");
> +
> +	u.v64 = ioread64(mmio + AMD_BAR1XLAT_OFFSET);
> +	off += scnprintf(buf + off, buf_size - off,
> +			 "XLAT1 -\t\t%#018llx\n", u.v64);
> +
> +	u.v64 = ioread64(ndev->self_mmio + AMD_BAR23XLAT_OFFSET);
> +	off += scnprintf(buf + off, buf_size - off,
> +				 "XLAT23 -\t\t%#018llx\n", u.v64);
> +
> +	u.v64 = ioread64(ndev->self_mmio + AMD_BAR45XLAT_OFFSET);
> +	off += scnprintf(buf + off, buf_size - off,
> +				 "XLAT45 -\t\t%#018llx\n", u.v64);
> +
> +	u.v32 = readl(mmio + AMD_BAR1LMT_OFFSET);
> +	off += scnprintf(buf + off, buf_size - off,
> +			 "LMT1 -\t\t\t%#06x\n", u.v32);
> +
> +	u.v64 = ioread64(ndev->self_mmio + AMD_BAR23LMT_OFFSET);
> +	off += scnprintf(buf + off, buf_size - off,
> +				 "LMT23 -\t\t\t%#018llx\n", u.v64);
> +
> +	u.v64 = ioread64(ndev->self_mmio + AMD_BAR45LMT_OFFSET);
> +	off += scnprintf(buf + off, buf_size - off,
> +				 "LMT45 -\t\t\t%#018llx\n", u.v64);
> +
> +	ret = simple_read_from_buffer(ubuf, count, offp, buf, off);
> +	kfree(buf);
> +	return ret;
> +}
> +
> +static void ndev_init_debugfs(struct amd_ntb_dev *ndev)
> +{
> +	if (!debugfs_dir) {
> +		ndev->debugfs_dir = NULL;
> +		ndev->debugfs_info = NULL;
> +	} else {
> +		ndev->debugfs_dir =
> +			debugfs_create_dir(ndev_name(ndev), debugfs_dir);
> +		if (!ndev->debugfs_dir)
> +			ndev->debugfs_info = NULL;
> +		else
> +			ndev->debugfs_info =
> +				debugfs_create_file("info", S_IRUSR,
> +						    ndev->debugfs_dir, ndev,
> +						    &amd_ntb_debugfs_info);
> +	}
> +}
> +
> +static void ndev_deinit_debugfs(struct amd_ntb_dev *ndev)
> +{
> +	debugfs_remove_recursive(ndev->debugfs_dir);
> +}
> +
> +static inline void ndev_init_struct(struct amd_ntb_dev *ndev,
> +				    struct pci_dev *pdev)
> +{
> +	ndev->ntb.pdev = pdev;
> +	ndev->ntb.topo = NTB_TOPO_NONE;
> +	ndev->ntb.ops = &amd_ntb_ops;
> +	ndev->int_mask = AMD_EVENT_INTMASK;
> +	spin_lock_init(&ndev->db_mask_lock);
> +}
> +
> +static int amd_poll_link(struct amd_ntb_dev *ndev)
> +{
> +	void __iomem *mmio = ndev->peer_mmio;
> +	u32 reg, stat;
> +	int rc;
> +
> +	reg = readl(mmio + AMD_SIDEINFO_OFFSET);
> +	reg &= NTB_LIN_STA_ACTIVE_BIT;
> +
> +	dev_dbg(ndev_dev(ndev), "%s: reg_val = 0x%x.\n", __func__, reg);
> +
> +	if (reg == ndev->cntl_sta)
> +		return 0;
> +
> +	ndev->cntl_sta = reg;
> +
> +	rc = pci_read_config_dword(ndev->ntb.pdev,
> +			AMD_LINK_STATUS_OFFSET, &stat);
> +	if (rc)
> +		return 0;
> +	ndev->lnk_sta = stat;
> +
> +	return 1;
> +}
> +
> +static void amd_link_hb(struct work_struct *work)
> +{
> +	struct amd_ntb_dev *ndev = hb_ndev(work);
> +
> +	if (amd_poll_link(ndev))
> +		ntb_link_event(&ndev->ntb);
> +
> +	if (!amd_link_is_up(ndev))
> +		schedule_delayed_work(&ndev->hb_timer, AMD_LINK_HB_TIMEOUT);
> +}
> +
> +static int amd_init_isr(struct amd_ntb_dev *ndev)
> +{
> +	return ndev_init_isr(ndev, AMD_DB_CNT, AMD_MSIX_VECTOR_CNT);
> +}
> +
> +static void amd_init_side_info(struct amd_ntb_dev *ndev)
> +{
> +	void __iomem *mmio = ndev->self_mmio;
> +	unsigned int reg = 0;

variable initialized unnecessarily

> +
> +	reg = readl(mmio + AMD_SIDEINFO_OFFSET);
> +	if (!(reg & 0x2)) {
> +		reg |= 0x2;

I'd like a #define for the "2" above, as you are using it multiple
times.

> +		writel(reg, mmio + AMD_SIDEINFO_OFFSET);
> +	}
> +}
> +
> +static void amd_deinit_side_info(struct amd_ntb_dev *ndev)
> +{
> +	void __iomem *mmio = ndev->self_mmio;
> +	unsigned int reg = 0;

variable initialized unnecessarily

> +
> +	reg = readl(mmio + AMD_SIDEINFO_OFFSET);
> +	if (reg & 0x2) {
> +		reg &= ~(0x2);
> +		writel(reg, mmio + AMD_SIDEINFO_OFFSET);
> +		readl(mmio + AMD_SIDEINFO_OFFSET);
> +	}
> +}
> +
> +static int amd_init_ntb(struct amd_ntb_dev *ndev)
> +{
> +	void __iomem *mmio = ndev->self_mmio;
> +
> +	ndev->mw_count = AMD_MW_CNT;
> +	ndev->spad_count = AMD_SPADS_CNT;
> +	ndev->db_count = AMD_DB_CNT;
> +
> +	switch (ndev->ntb.topo) {
> +	case NTB_TOPO_PRI:
> +	case NTB_TOPO_SEC:
> +		ndev->spad_count >>= 1;
> +		if (ndev->ntb.topo == NTB_TOPO_PRI) {
> +			ndev->self_spad = 0;
> +			ndev->peer_spad = 0x20;
> +		} else {
> +			ndev->self_spad = 0x20;
> +			ndev->peer_spad = 0;
> +		}
> +
> +		INIT_DELAYED_WORK(&ndev->hb_timer, amd_link_hb);
> +		schedule_delayed_work(&ndev->hb_timer, AMD_LINK_HB_TIMEOUT);
> +
> +		break;
> +	default:
> +		dev_err(ndev_dev(ndev), "AMD NTB does not support B2B mode.\n");
> +		return -EINVAL;
> +	}
> +
> +	ndev->db_valid_mask = BIT_ULL(ndev->db_count) - 1;
> +
> +	/* Mask event interrupts */
> +	writel(ndev->int_mask, mmio + AMD_INTMASK_OFFSET);
> +
> +	return 0;
> +}
> +
> +static inline enum ntb_topo amd_get_topo(struct amd_ntb_dev *ndev)

It is usually better to let the compiler determine what to inline and
what not to inline..

> +{
> +	void __iomem *mmio = ndev->self_mmio;
> +	u32 info;
> +
> +	info = readl(mmio + AMD_SIDEINFO_OFFSET);
> +	if (info & AMD_SIDE_MASK)
> +		return NTB_TOPO_SEC;
> +	else
> +		return NTB_TOPO_PRI;
> +}
> +
> +static int amd_init_dev(struct amd_ntb_dev *ndev)
> +{
> +	struct pci_dev *pdev;
> +	int rc = 0;
> +
> +	pdev = ndev_pdev(ndev);
> +
> +	ndev->ntb.topo = amd_get_topo(ndev);
> +	dev_dbg(ndev_dev(ndev), "AMD NTB topo is %s\n",
> +			ntb_topo_string(ndev->ntb.topo));
> +
> +	rc = amd_init_ntb(ndev);
> +	if (rc)
> +		return rc;
> +
> +	rc = amd_init_isr(ndev);
> +	if (rc) {
> +		dev_err(ndev_dev(ndev), "fail to init isr.\n");
> +		return rc;
> +	}
> +
> +	ndev->db_valid_mask = BIT_ULL(ndev->db_count) - 1;
> +
> +	return 0;
> +}
> +
> +static void amd_deinit_dev(struct amd_ntb_dev *ndev)
> +{
> +	cancel_delayed_work_sync(&ndev->hb_timer);
> +
> +	ndev_deinit_isr(ndev);
> +}
> +
> +static int amd_ntb_init_pci(struct amd_ntb_dev *ndev,
> +			    struct pci_dev *pdev)
> +{
> +	int rc;
> +
> +	pci_set_drvdata(pdev, ndev);
> +
> +	rc = pci_enable_device(pdev);
> +	if (rc)
> +		goto err_pci_enable;
> +
> +	rc = pci_request_regions(pdev, NTB_NAME);
> +	if (rc)
> +		goto err_pci_regions;
> +
> +	pci_set_master(pdev);
> +
> +	rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
> +	if (rc) {
> +		rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
> +		if (rc)
> +			goto err_dma_mask;
> +		dev_warn(ndev_dev(ndev), "Cannot DMA highmem\n");
> +	}
> +
> +	rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
> +	if (rc) {
> +		rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
> +		if (rc)
> +			goto err_dma_mask;
> +		dev_warn(ndev_dev(ndev), "Cannot DMA consistent highmem\n");
> +	}
> +
> +	ndev->self_mmio = pci_iomap(pdev, 0, 0);
> +	if (!ndev->self_mmio) {
> +		rc = -EIO;
> +		goto err_mmio;
> +	}
> +	ndev->peer_mmio = ndev->self_mmio + AMD_PEER_OFFSET;
> +
> +	return 0;
> +
> +err_mmio:

seems like an unnecessary goto destination here

> +err_dma_mask:
> +	pci_clear_master(pdev);
> +err_pci_regions:
> +	pci_disable_device(pdev);
> +err_pci_enable:
> +	pci_set_drvdata(pdev, NULL);
> +	return rc;
> +}
> +
> +static void amd_ntb_deinit_pci(struct amd_ntb_dev *ndev)
> +{
> +	struct pci_dev *pdev = ndev_pdev(ndev);
> +
> +	pci_iounmap(pdev, ndev->self_mmio);
> +
> +	pci_clear_master(pdev);
> +	pci_release_regions(pdev);
> +	pci_disable_device(pdev);
> +	pci_set_drvdata(pdev, NULL);
> +}
> +
> +
> +static int amd_ntb_pci_probe(struct pci_dev *pdev,
> +			     const struct pci_device_id *id)
> +{
> +	struct amd_ntb_dev *ndev;
> +	int rc, node;
> +
> +	node = dev_to_node(&pdev->dev);
> +
> +	ndev = kzalloc_node(sizeof(*ndev), GFP_KERNEL, node);
> +	if (!ndev) {
> +		rc = -ENOMEM;
> +		goto err_ndev;
> +	}
> +
> +	ndev_init_struct(ndev, pdev);
> +
> +	rc = amd_ntb_init_pci(ndev, pdev);
> +	if (rc)
> +		goto err_init_pci;
> +
> +	rc = amd_init_dev(ndev);
> +	if (rc)
> +		goto err_init_dev;
> +
> +	/* write side info */
> +	amd_init_side_info(ndev);
> +
> +	amd_poll_link(ndev);
> +
> +	ndev_init_debugfs(ndev);
> +
> +	rc = ntb_register_device(&ndev->ntb);
> +	if (rc)
> +		goto err_register;
> +
> +	dev_info(&pdev->dev, "NTB device registered.\n");
> +
> +	return 0;
> +
> +err_register:
> +	ndev_deinit_debugfs(ndev);
> +	amd_deinit_dev(ndev);
> +err_init_dev:
> +	amd_ntb_deinit_pci(ndev);
> +err_init_pci:
> +	kfree(ndev);
> +err_ndev:
> +	return rc;
> +}
> +
> +static void amd_ntb_pci_remove(struct pci_dev *pdev)
> +{
> +	struct amd_ntb_dev *ndev = pci_get_drvdata(pdev);
> +
> +	ntb_unregister_device(&ndev->ntb);
> +	ndev_deinit_debugfs(ndev);
> +	amd_deinit_side_info(ndev);
> +	amd_deinit_dev(ndev);
> +	amd_ntb_deinit_pci(ndev);
> +	kfree(ndev);
> +}
> +
> +static const struct file_operations amd_ntb_debugfs_info = {
> +	.owner = THIS_MODULE,
> +	.open = simple_open,
> +	.read = ndev_debugfs_read,
> +};
> +
> +static const struct pci_device_id amd_ntb_pci_tbl[] = {
> +	{PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_NTB)},
> +	{0}
> +};
> +MODULE_DEVICE_TABLE(pci, amd_ntb_pci_tbl);
> +
> +static struct pci_driver amd_ntb_pci_driver = {
> +	.name		= KBUILD_MODNAME,
> +	.id_table	= amd_ntb_pci_tbl,
> +	.probe		= amd_ntb_pci_probe,
> +	.remove		= amd_ntb_pci_remove,
> +};
> +
> +static int __init amd_ntb_pci_driver_init(void)
> +{
> +	pr_info("%s %s\n", NTB_DESC, NTB_VER);
> +
> +	if (debugfs_initialized())
> +		debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
> +
> +	return pci_register_driver(&amd_ntb_pci_driver);
> +}
> +module_init(amd_ntb_pci_driver_init);
> +
> +static void __exit amd_ntb_pci_driver_exit(void)
> +{
> +	pci_unregister_driver(&amd_ntb_pci_driver);
> +	debugfs_remove_recursive(debugfs_dir);
> +}
> +module_exit(amd_ntb_pci_driver_exit);
> diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.h b/drivers/ntb/hw/amd/ntb_hw_amd.h
> new file mode 100644
> index 0000000..31021c0
> --- /dev/null
> +++ b/drivers/ntb/hw/amd/ntb_hw_amd.h
> @@ -0,0 +1,246 @@
> +/*
> + * This file is provided under a dual BSD/GPLv2 license.  When using or
> + *   redistributing this file, you may do so under either license.
> + *
> + *   GPL LICENSE SUMMARY
> + *
> + *   Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
> + *
> + *   This program is free software; you can redistribute it and/or modify
> + *   it under the terms of version 2 of the GNU General Public License as
> + *   published by the Free Software Foundation.
> + *
> + *   BSD LICENSE
> + *
> + *   Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copy
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of AMD Corporation nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + *
> + * AMD PCIe NTB Linux driver
> + *
> + * Contact Information:
> + * Xiangliang Yu<Xiangliang.Yu@amd.com>

Same comment as above about the space needed here.

> + */
> +
> +#ifndef NTB_HW_AMD_H
> +#define NTB_HW_AMD_H
> +
> +#include <linux/ntb.h>
> +#include <linux/pci.h>
> +
> +#define PCI_DEVICE_ID_AMD_NTB	0x145B
> +#define AMD_LINK_HB_TIMEOUT	msecs_to_jiffies(1000)

Tab align the second half of these

> +#define AMD_LINK_STATUS_OFFSET	0x68
> +#define NTB_LIN_STA_ACTIVE_BIT	0x00000002
> +#define NTB_LNK_STA_SPEED_MASK	0x000F0000
> +#define NTB_LNK_STA_WIDTH_MASK	0x03F00000
> +#define NTB_LNK_STA_ACTIVE(x)	(!!((x) & NTB_LIN_STA_ACTIVE_BIT))
> +#define NTB_LNK_STA_SPEED(x)	(((x) & NTB_LNK_STA_SPEED_MASK) >> 16)
> +#define NTB_LNK_STA_WIDTH(x)	(((x) & NTB_LNK_STA_WIDTH_MASK) >> 20)
> +
> +#ifndef ioread64
> +#ifdef readq
> +#define ioread64 readq
> +#else
> +#define ioread64 _ioread64
> +static inline u64 _ioread64(void __iomem *mmio)
> +{
> +	u64 low, high;
> +
> +	low = ioread32(mmio);
> +	high = ioread32(mmio + sizeof(u32));
> +	return low | (high << 32);
> +}
> +#endif
> +#endif
> +
> +#ifndef iowrite64
> +#ifdef writeq
> +#define iowrite64 writeq
> +#else
> +#define iowrite64 _iowrite64
> +static inline void _iowrite64(u64 val, void __iomem *mmio)
> +{
> +	iowrite32(val, mmio);
> +	iowrite32(val >> 32, mmio + sizeof(u32));
> +}
> +#endif
> +#endif
> +
> +enum {
> +	/* AMD NTB Capability */
> +	AMD_MW_CNT		= 3,
> +	AMD_DB_CNT		= 16,
> +	AMD_MSIX_VECTOR_CNT	= 24,
> +	AMD_SPADS_CNT		= 16,
> +
> +	/*  AMD NTB register offset */
> +	AMD_CNTL_OFFSET		= 0x200,
> +
> +	/* NTB control register bits */
> +	PMM_REG_CTL		= BIT(21),
> +	SMM_REG_CTL		= BIT(20),
> +	SMM_REG_ACC_PATH	= BIT(18),
> +	PMM_REG_ACC_PATH	= BIT(17),
> +	NTB_CLK_EN		= BIT(16),
> +
> +	AMD_STA_OFFSET		= 0x204,
> +	AMD_PGSLV_OFFSET	= 0x208,
> +	AMD_SPAD_MUX_OFFSET	= 0x20C,
> +	AMD_SPAD_OFFSET		= 0x210,
> +	AMD_RSMU_HCID		= 0x250,
> +	AMD_RSMU_SIID		= 0x254,
> +	AMD_PSION_OFFSET	= 0x300,
> +	AMD_SSION_OFFSET	= 0x330,
> +	AMD_MMINDEX_OFFSET	= 0x400,
> +	AMD_MMDATA_OFFSET	= 0x404,
> +	AMD_SIDEINFO_OFFSET	= 0x408,
> +
> +	AMD_SIDE_MASK		= BIT(0),
> +
> +	/* limit register */
> +	AMD_ROMBARLMT_OFFSET	= 0x410,
> +	AMD_BAR1LMT_OFFSET	= 0x414,
> +	AMD_BAR23LMT_OFFSET	= 0x418,
> +	AMD_BAR45LMT_OFFSET	= 0x420,
> +	/* xlat address */
> +	AMD_POMBARXLAT_OFFSET	= 0x428,
> +	AMD_BAR1XLAT_OFFSET	= 0x430,
> +	AMD_BAR23XLAT_OFFSET	= 0x438,
> +	AMD_BAR45XLAT_OFFSET	= 0x440,
> +	/* doorbell and interrupt */
> +	AMD_DBFM_OFFSET		= 0x450,
> +	AMD_DBREQ_OFFSET	= 0x454,
> +	AMD_MIRRDBSTAT_OFFSET	= 0x458,
> +	AMD_DBMASK_OFFSET	= 0x45C,
> +	AMD_DBSTAT_OFFSET	= 0x460,
> +	AMD_INTMASK_OFFSET	= 0x470,
> +	AMD_INTSTAT_OFFSET	= 0x474,
> +
> +	/* event type */
> +	AMD_PEER_FLUSH_EVENT	= BIT(0),
> +	AMD_PEER_RESET_EVENT	= BIT(1),
> +	AMD_PEER_D3_EVENT	= BIT(2),
> +	AMD_PEER_PMETO_EVENT	= BIT(3),
> +	AMD_PEER_D0_EVENT	= BIT(4),
> +	AMD_EVENT_INTMASK	= (AMD_PEER_FLUSH_EVENT |
> +				AMD_PEER_RESET_EVENT | AMD_PEER_D3_EVENT |
> +				AMD_PEER_PMETO_EVENT | AMD_PEER_D0_EVENT),
> +
> +	AMD_PMESTAT_OFFSET	= 0x480,
> +	AMD_PMSGTRIG_OFFSET	= 0x490,
> +	AMD_LTRLATENCY_OFFSET	= 0x494,
> +	AMD_FLUSHTRIG_OFFSET	= 0x498,
> +
> +	/* SMU register*/
> +	AMD_SMUACK_OFFSET	= 0x4A0,
> +	AMD_SINRST_OFFSET	= 0x4A4,
> +	AMD_RSPNUM_OFFSET	= 0x4A8,
> +	AMD_SMU_SPADMUTEX	= 0x4B0,
> +	AMD_SMU_SPADOFFSET	= 0x4B4,
> +
> +	AMD_PEER_OFFSET		= 0x400,
> +};

It seems extremely odd to mix and match different kinds of things in
the same enumerated type.  I'd prefer to have these as #defines or
separated enum types.

> +
> +struct amd_ntb_dev;
> +
> +struct amd_ntb_vec {
> +	struct amd_ntb_dev	*ndev;
> +	int			num;
> +};
> +
> +struct amd_ntb_dev {
> +	struct ntb_dev ntb;
> +
> +	u32 ntb_side;
> +	u32 lnk_sta;
> +	u32 cntl_sta;
> +	u32 peer_sta;
> +
> +	unsigned char mw_count;
> +	unsigned char spad_count;
> +	unsigned char db_count;
> +	unsigned char msix_vec_count;
> +
> +	u64 db_valid_mask;
> +	u64 db_mask;
> +	u32 int_mask;
> +
> +	struct msix_entry *msix;
> +	struct amd_ntb_vec *vec;
> +
> +	spinlock_t db_mask_lock;
> +
> +	void __iomem *self_mmio;
> +	void __iomem *peer_mmio;
> +	unsigned int self_spad;
> +	unsigned int peer_spad;
> +
> +	struct delayed_work hb_timer;
> +
> +	struct dentry *debugfs_dir;
> +	struct dentry *debugfs_info;
> +};
> +
> +#define ndev_pdev(ndev) ((ndev)->ntb.pdev)
> +#define ndev_name(ndev) pci_name(ndev_pdev(ndev))
> +#define ndev_dev(ndev) (&ndev_pdev(ndev)->dev)
> +#define ntb_ndev(__ntb) container_of(__ntb, struct amd_ntb_dev, ntb)
> +#define hb_ndev(__work) container_of(__work, struct amd_ntb_dev, hb_timer.work)
> +

This should probably be moved into the device driver C file (as they
are not called anywhere else).

> +static int amd_ntb_mw_count(struct ntb_dev *ntb);
> +static int amd_ntb_mw_get_range(struct ntb_dev *ntb, int idx,
> +			phys_addr_t *base, resource_size_t *size,
> +			resource_size_t *align, resource_size_t *algin_size);
> +static int amd_ntb_mw_set_trans(struct ntb_dev *ndev, int idx,
> +				dma_addr_t addr, resource_size_t size);
> +static int amd_ntb_link_is_up(struct ntb_dev *ntb,
> +				enum ntb_speed *speed,
> +				enum ntb_width *width);
> +static int amd_ntb_link_enable(struct ntb_dev *ntb,
> +				enum ntb_speed speed,
> +				enum ntb_width width);
> +static int amd_ntb_link_disable(struct ntb_dev *ntb);
> +static u64 amd_ntb_db_valid_mask(struct ntb_dev *ntb);
> +static int amd_ntb_db_vector_count(struct ntb_dev *ntb);
> +static u64 amd_ntb_db_vector_mask(struct ntb_dev *ntb, int db_vector);
> +static u64 amd_ntb_db_read(struct ntb_dev *ntb);
> +static int amd_ntb_db_clear(struct ntb_dev *ntb, u64 db_bits);
> +static int amd_ntb_db_set_mask(struct ntb_dev *ntb, u64 db_bits);
> +static int amd_ntb_db_clear_mask(struct ntb_dev *ntb, u64 db_bits);
> +static int amd_ntb_peer_db_addr(struct ntb_dev *ntb,
> +				phys_addr_t *db_addr,
> +				resource_size_t *db_size);
> +static int amd_ntb_peer_db_set(struct ntb_dev *ntb, u64 db_bits);
> +static int amd_ntb_spad_count(struct ntb_dev *ntb);
> +static u32 amd_ntb_spad_read(struct ntb_dev *ntb, int idx);
> +static int amd_ntb_spad_write(struct ntb_dev *ntb, int idx, u32 val);
> +static int amd_ntb_peer_spad_addr(struct ntb_dev *ntb, int idx,
> +					phys_addr_t *spad_addr);
> +static u32 amd_ntb_peer_spad_read(struct ntb_dev *ntb, int idx);
> +static int amd_ntb_peer_spad_write(struct ntb_dev *ntb, int idx, u32 val);

I believe all of these function declarations should be removed. 

Thanks,
Jon

> +#endif
> -- 
> 1.9.1
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: [PATCH V3 1/2] NTB: Add AMD PCI-Express NTB driver
  2016-01-15 17:13     ` Allen Hubbe
@ 2016-01-18  6:47       ` Yu, Xiangliang
  0 siblings, 0 replies; 9+ messages in thread
From: Yu, Xiangliang @ 2016-01-18  6:47 UTC (permalink / raw)
  To: Allen Hubbe, jdmason@kudzu.us, dave.jiang@intel.com,
	linux-ntb@googlegroups.com, linux-kernel@vger.kernel.org
  Cc: SPG_Linux_Kernel


> > In here, i list all features AMD NTB support, not patch. Please see my
> > word again.
> 
> This is the commit message for this patch.  It would be confusing to have
> features mentioned in the commit message not implemented, and no
> explanation why not.

Ok, I'll change it.

> > > What is SMU?
> >
> > System management unit, please reference AMD manual.
> 
> Thanks, but it is your responsibility to support this driver, and respond to
> questions like this.  I didn't write it, I can't test it, and I don't have a hardware
> manual.
> 
> If this is an ack for the system management unit, is it acknowledging a
> watchdog on the SMU?  Does this ack have to do with the flush or power
> management functions?  If it is a watchdog, what implications are there for
> unloading your driver - does the watchdog just time out, and then what,
> reset the device?  I'm speculating (guessing) again, but that's really the best I
> can do without your authoritative answers, or a hardware manual.
> 
> Where can the hardware manual be found?  Has it been published, or is it
> AMD confidential?
> 
> http://search.amd.com/en-us/Pages/results-all.aspx#k=zeppelin
> http://search.amd.com/en-us/Pages/results-all.aspx#k=ntb
> 
> "Nothing here matches your search"

SMU is a big system, which including hardware and firmware. I think it is
Transport to NTB, just use its interface it expose and following NTB hardware
Specification. Right now, the specification hasn't been public released. I think
You need to request NDA( Non Disclosure Agreeement) license to review it.

> > > I would prefer to see #define, but this works... There are good
> > arguments to
> > > be made for compiler constants vs the preprocessor.  My preference
> > just
> > > comes from what I've observed about how other drivers are written,
> > > and #define for this stuff seems to be the convention.
> >
> > I have lots of SATA/Block layer experience, this following AHCI coding
> > style.
> > Compiler will check enum variable, not for macro.
> > Actually, there are lots of similar definition in kernel, such as
> > block layer code.
> 
> Thanks.  I'm ok with this.
> 
> > > It's unusual, sometimes dangerous, to use the static keyword in a .h
> > file
> >
> > Actually, I put it in .c file in first version, I think it make code
> > clear if moving it to .h file. Yes, I can remove all these definitions
> > by rearranging .c file.
> >
> > And please speak out all your concern in this time, you know, I had
> > spent lots of time  On these patches.
> 
> You're right.  I didn't notice this change from v1 to v2.  It would have been a
> more efficient code review had I made these comments in v2.
> 
> My comments this time around, as I said, were minor.  Each version of this
> patch series has been an improvement.  Maybe this version is even good
> enough, but there are /always/ comments to be made about code.  It's not
> unusual to see patches on the kernel mailing list with double digit reroll
> counts (I saw one at v12 today).  Sure, it can be frustrating.   Please don't take
> offense.  Thank you for your efforts.
> 
> The bad news:  I have two more comments.
> 
> Please run scritps/checkpatch.pl --strict with your patch.  There are some
> formatting issues.
> Please add your name to the MAINTANERS file.  Add a section, NTB AMD
> DRIVER.  The checkpatch script also mentions this, but it's especially
> important.  You are the only one who can test this driver, with the simulator.

Ok, I'll do it. 

> The good news:
> 
> I'm inclined to say yes to this patch.  It looks ok, and I'll take your word that it
> has been tested.  Ultimately, the patch has to be accepted by Jon and Linus.
> By adding my comments, I'm just trying to help.
> 
> Do you think you can email v4 by tomorrow?  I'll be sure to respond quickly if
> you do, so it has the best chance to meet the deadline for the merge window.
> It's getting close to the deadline, and I'll take some of the blame for that.

Because I was busying other things at weekend, I'll change it right now.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: [PATCH V3 1/2] NTB: Add AMD PCI-Express NTB driver
  2016-01-18  4:04 ` Jon Mason
@ 2016-01-18  9:42   ` Yu, Xiangliang
  2016-01-18 15:07     ` Jon Mason
  0 siblings, 1 reply; 9+ messages in thread
From: Yu, Xiangliang @ 2016-01-18  9:42 UTC (permalink / raw)
  To: Jon Mason
  Cc: dave.jiang@intel.com, Allen.Hubbe@emc.com,
	linux-ntb@googlegroups.com, linux-kernel@vger.kernel.org,
	SPG_Linux_Kernel

> On Thu, Jan 14, 2016 at 07:44:07PM +0800, Xiangliang Yu wrote:
> > AMD NTB support following main features:
> > (1) Three memory windows;
> > (2) Sixteen 32-bit scratch pad;
> > (3) Two 16-bit doorbell interrupt;
> > (4) Five event interrupts;
> > (5) One system can wake up opposite system of NTB;
> > (6) Flush previous request to the opposite system;
> > (7) There are reset and PME_TO mechanisms between two systems;
> 
> Please create a much more verbose description of the patch.  This is not
> acceptable in it's current form, and I'm sure I could get flamed by Linus if he
> saw it in my pull request.  Even writing this out as a paragaph instead of a
> numbered list would be better.

Ok, I'll change it.

> >
> > Signed-off-by: Xiangliang Yu <Xiangliang.Yu@amd.com>
> > ---
> >  drivers/ntb/hw/Kconfig          |    1 +
> >  drivers/ntb/hw/Makefile         |    1 +
> >  drivers/ntb/hw/amd/Kconfig      |    7 +
> >  drivers/ntb/hw/amd/Makefile     |    1 +
> >  drivers/ntb/hw/amd/ntb_hw_amd.c | 1148
> > +++++++++++++++++++++++++++++++++++++++
> >  drivers/ntb/hw/amd/ntb_hw_amd.h |  246 +++++++++
> >  6 files changed, 1404 insertions(+)
> >  create mode 100644 drivers/ntb/hw/amd/Kconfig  create mode 100644
> > drivers/ntb/hw/amd/Makefile  create mode 100644
> > drivers/ntb/hw/amd/ntb_hw_amd.c  create mode 100644
> > drivers/ntb/hw/amd/ntb_hw_amd.h
> >
> > diff --git a/drivers/ntb/hw/Kconfig b/drivers/ntb/hw/Kconfig index
> > 4d5535c..0c5c2a6 100644
> > --- a/drivers/ntb/hw/Kconfig
> > +++ b/drivers/ntb/hw/Kconfig
> > @@ -1 +1,2 @@
> >  source "drivers/ntb/hw/intel/Kconfig"
> > +source "drivers/ntb/hw/amd/Kconfig"
> 
> Being nit picky, but please make it alphabetical and put amd before intel.

Ok.

> > diff --git a/drivers/ntb/hw/Makefile b/drivers/ntb/hw/Makefile index
> > 175d7c9..636be7d 100644
> > --- a/drivers/ntb/hw/Makefile
> > +++ b/drivers/ntb/hw/Makefile
> > @@ -1 +1,2 @@
> >  obj-$(CONFIG_NTB_INTEL)	+= intel/
> > +obj-$(CONFIG_NTB_AMD)	+= amd/
> 
> Same nit, make it alphabetical.

Ok.

> > diff --git a/drivers/ntb/hw/amd/Kconfig b/drivers/ntb/hw/amd/Kconfig
> > new file mode 100644 index 0000000..cfe903c
> > --- /dev/null
> > +++ b/drivers/ntb/hw/amd/Kconfig
> > @@ -0,0 +1,7 @@
> > +config NTB_AMD
> > +	tristate "AMD Non-Transparent Bridge support"
> > +	depends on X86_64
> > +	help
> > +	 This driver supports AMD NTB on capable Zeppelin hardware.
> > +
> > +	 If unsure, say N.
> > diff --git a/drivers/ntb/hw/amd/Makefile b/drivers/ntb/hw/amd/Makefile
> > new file mode 100644 index 0000000..ad54da9
> > --- /dev/null
> > +++ b/drivers/ntb/hw/amd/Makefile
> > @@ -0,0 +1 @@
> > +obj-$(CONFIG_NTB_AMD) += ntb_hw_amd.o
> > diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.c
> > b/drivers/ntb/hw/amd/ntb_hw_amd.c new file mode 100644 index
> > 0000000..8df6d7b
> > --- /dev/null
> > +++ b/drivers/ntb/hw/amd/ntb_hw_amd.c
> > @@ -0,0 +1,1148 @@
> > +/*
> > + * This file is provided under a dual BSD/GPLv2 license.  When using or
> > + *   redistributing this file, you may do so under either license.
> > + *
> > + *   GPL LICENSE SUMMARY
> > + *
> > + *   Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
> > + *
> > + *   This program is free software; you can redistribute it and/or modify
> > + *   it under the terms of version 2 of the GNU General Public License as
> > + *   published by the Free Software Foundation.
> > + *
> > + *   BSD LICENSE
> > + *
> > + *   Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
> > + *
> > + *   Redistribution and use in source and binary forms, with or without
> > + *   modification, are permitted provided that the following conditions
> > + *   are met:
> > + *
> > + *     * Redistributions of source code must retain the above copyright
> > + *       notice, this list of conditions and the following disclaimer.
> > + *     * Redistributions in binary form must reproduce the above copy
> > + *       notice, this list of conditions and the following disclaimer in
> > + *       the documentation and/or other materials provided with the
> > + *       distribution.
> > + *     * Neither the name of AMD Corporation nor the names of its
> > + *       contributors may be used to endorse or promote products derived
> > + *       from this software without specific prior written permission.
> > + *
> > + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
> CONTRIBUTORS
> > + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
> NOT
> > + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
> FITNESS FOR
> > + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
> COPYRIGHT
> > + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
> INCIDENTAL,
> > + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
> BUT NOT
> > + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
> LOSS OF USE,
> > + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
> AND ON ANY
> > + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
> TORT
> > + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
> OF THE USE
> > + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> DAMAGE.
> > + *
> > + * AMD PCIe NTB Linux driver
> > + *
> > + * Contact Information:
> > + * Xiangliang Yu<Xiangliang.Yu@amd.com>
> 
> Nit, space needed between 'u' and '<'

Ok

> > + */
> > +
> > +#include <linux/debugfs.h>
> > +#include <linux/delay.h>
> > +#include <linux/init.h>
> > +#include <linux/interrupt.h>
> > +#include <linux/module.h>
> > +#include <linux/acpi.h>
> > +#include <linux/pci.h>
> > +#include <linux/random.h>
> > +#include <linux/slab.h>
> > +#include <linux/ntb.h>
> > +
> > +#include "ntb_hw_amd.h"
> > +
> > +#define NTB_NAME	"ntb_hw_amd"
> > +#define NTB_DESC	"AMD(R) PCI-E Non-Transparent Bridge Driver"
> > +#define NTB_VER		"1.0"
> > +
> > +MODULE_DESCRIPTION(NTB_DESC);
> > +MODULE_VERSION(NTB_VER);
> > +MODULE_LICENSE("Dual BSD/GPL");
> > +MODULE_AUTHOR("AMD Inc.");
> > +
> > +static const struct file_operations amd_ntb_debugfs_info; static
> > +struct dentry *debugfs_dir;
> > +
> > +static const struct ntb_dev_ops amd_ntb_ops = {
> > +	.mw_count		= amd_ntb_mw_count,
> > +	.mw_get_range		= amd_ntb_mw_get_range,
> > +	.mw_set_trans		= amd_ntb_mw_set_trans,
> > +	.link_is_up		= amd_ntb_link_is_up,
> > +	.link_enable		= amd_ntb_link_enable,
> > +	.link_disable		= amd_ntb_link_disable,
> > +	.db_valid_mask		= amd_ntb_db_valid_mask,
> > +	.db_vector_count	= amd_ntb_db_vector_count,
> > +	.db_vector_mask		= amd_ntb_db_vector_mask,
> > +	.db_read		= amd_ntb_db_read,
> > +	.db_clear		= amd_ntb_db_clear,
> > +	.db_set_mask		= amd_ntb_db_set_mask,
> > +	.db_clear_mask		= amd_ntb_db_clear_mask,
> > +	.peer_db_addr		= amd_ntb_peer_db_addr,
> > +	.peer_db_set		= amd_ntb_peer_db_set,
> > +	.spad_count		= amd_ntb_spad_count,
> > +	.spad_read		= amd_ntb_spad_read,
> > +	.spad_write		= amd_ntb_spad_write,
> > +	.peer_spad_addr		= amd_ntb_peer_spad_addr,
> > +	.peer_spad_read		= amd_ntb_peer_spad_read,
> > +	.peer_spad_write	= amd_ntb_peer_spad_write,
> > +};
> > +
> > +static int ndev_mw_to_bar(struct amd_ntb_dev *ndev, int idx) {
> > +	if (idx < 0 || idx > ndev->mw_count)
> > +		return -EINVAL;
> > +
> > +	return 1 << idx;
> > +}
> > +
> > +static int amd_ntb_mw_count(struct ntb_dev *ntb) {
> > +	return ntb_ndev(ntb)->mw_count;
> > +}
> > +
> > +static int amd_ntb_mw_get_range(struct ntb_dev *ntb, int idx,
> > +				phys_addr_t *base,
> > +				resource_size_t *size,
> > +				resource_size_t *align,
> > +				resource_size_t *align_size)
> > +{
> > +	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> > +	int bar;
> > +
> > +	bar = ndev_mw_to_bar(ndev, idx);
> > +	if (bar < 0)
> > +		return bar;
> > +
> > +	if (base)
> > +		*base = pci_resource_start(ndev->ntb.pdev, bar);
> > +
> > +	if (size)
> > +		*size = pci_resource_len(ndev->ntb.pdev, bar);
> > +
> > +	if (align)
> > +		*align = 4096;
> 
> Please use SZ_4K from sizes.h

Ok

> > +
> > +	if (align_size)
> > +		*align_size = 1;
> > +
> > +	return 0;
> > +}
> > +
> > +static int amd_ntb_mw_set_trans(struct ntb_dev *ntb, int idx,
> > +				dma_addr_t addr, resource_size_t size) {
> > +	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> > +	unsigned long xlat_reg, limit_reg = 0;
> > +	resource_size_t mw_size;
> > +	void __iomem *mmio, *peer_mmio;
> > +	u64 base_addr, limit, reg_val;
> > +	int bar;
> > +
> > +	bar = ndev_mw_to_bar(ndev, idx);
> > +	if (bar < 0)
> > +		return bar;
> > +
> > +	mw_size = pci_resource_len(ndev->ntb.pdev, bar);
> > +
> > +	/* make sure the range fits in the usable mw size */
> > +	if (size > mw_size)
> > +		return -EINVAL;
> > +
> > +	mmio = ndev->self_mmio;
> > +	peer_mmio = ndev->peer_mmio;
> > +
> > +	base_addr = pci_resource_start(ndev->ntb.pdev, bar);
> > +	if (bar == 1) {
> > +		xlat_reg = AMD_BAR1XLAT_OFFSET;
> > +		limit_reg = AMD_BAR1LMT_OFFSET;
> > +	} else {
> > +		xlat_reg = AMD_BAR23XLAT_OFFSET + ((bar - 2) << 3);
> > +		limit_reg = AMD_BAR23LMT_OFFSET + ((bar - 2) << 3);
> > +	}
> > +
> > +	if (bar != 1) {
> > +		/* Set the limit if supported */
> > +		limit = base_addr + size;
> > +
> > +		/* set and verify setting the translation address */
> > +		iowrite64(addr, peer_mmio + xlat_reg);
> > +		reg_val = ioread64(peer_mmio + xlat_reg);
> > +		if (reg_val != addr) {
> > +			iowrite64(0, peer_mmio + xlat_reg);
> > +			return -EIO;
> > +		}
> > +
> > +		/* set and verify setting the limit */
> > +		iowrite64(limit, mmio + limit_reg);
> > +		reg_val = ioread64(mmio + limit_reg);
> > +		if (reg_val != limit) {
> > +			iowrite64(base_addr, mmio + limit_reg);
> > +			iowrite64(0, peer_mmio + xlat_reg);
> > +			return -EIO;
> > +		}
> > +	} else {
> > +		/* split bar addr range must all be 32 bit */
> > +		if (addr & (~0ull << 32))
> > +			return -EINVAL;
> > +		if ((addr + size) & (~0ull << 32))
> > +			return -EINVAL;
> > +
> > +		/* Set the limit if supported */
> > +		limit = base_addr + size;
> > +
> > +		/* set and verify setting the translation address */
> > +		iowrite64(addr, peer_mmio + xlat_reg);
> > +		reg_val = ioread64(peer_mmio + xlat_reg);
> > +		if (reg_val != addr) {
> > +			iowrite64(0, peer_mmio + xlat_reg);
> > +			return -EIO;
> > +		}
> > +
> > +		/* set and verify setting the limit */
> > +		writel(limit, mmio + limit_reg);
> > +		reg_val = readl(mmio + limit_reg);
> > +		if (reg_val != limit) {
> > +			writel(base_addr, mmio + limit_reg);
> > +			writel(0, peer_mmio + xlat_reg);
> > +			return -EIO;
> > +		}
> > +	}
> 
> Being nit picky here again, but why is it necessary to make 2 consecutive
> checks for the same 'bar' variable.  Please combine them.

Ok.

> > +
> > +	return 0;
> > +}
> > +
> > +static int amd_link_is_up(struct amd_ntb_dev *ndev) {
> > +	/* set link down and clear flag */
> > +	if (ndev->peer_sta & AMD_PEER_RESET_EVENT) {
> > +		ndev->peer_sta &= ~AMD_PEER_RESET_EVENT;
> > +		return 0;
> > +	} else if (ndev->peer_sta & (AMD_PEER_D3_EVENT |
> > +				     AMD_PEER_PMETO_EVENT)) {
> > +		return 0;
> > +	} else if (ndev->peer_sta & AMD_PEER_D0_EVENT) {
> > +		ndev->peer_sta = 0;
> > +		return 0;
> > +	} else
> > +		return NTB_LNK_STA_ACTIVE(ndev->cntl_sta);
> > +}
> > +
> > +static int amd_ntb_link_is_up(struct ntb_dev *ntb,
> > +			      enum ntb_speed *speed,
> > +			      enum ntb_width *width)
> > +{
> > +	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> > +	int ret = 0;
> > +
> > +	if (amd_link_is_up(ndev)) {
> > +		if (speed)
> > +			*speed = NTB_LNK_STA_SPEED(ndev->lnk_sta);
> > +		if (width)
> > +			*width = NTB_LNK_STA_WIDTH(ndev->lnk_sta);
> > +
> > +		dev_dbg(ndev_dev(ndev), "link is up.\n");
> > +
> > +		ret = 1;
> > +	} else {
> > +		if (speed)
> > +			*speed = NTB_SPEED_NONE;
> > +		if (width)
> > +			*width = NTB_WIDTH_NONE;
> > +
> > +		dev_dbg(ndev_dev(ndev), "link is down.\n");
> > +	}
> > +
> > +	return ret;
> > +}
> > +
> > +static int amd_ntb_link_enable(struct ntb_dev *ntb,
> > +			       enum ntb_speed max_speed,
> > +			       enum ntb_width max_width)
> > +{
> > +	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> > +	void __iomem *mmio = ndev->self_mmio;
> > +	u32 ntb_ctl;
> > +
> > +	/* Enable event interrupt */
> > +	ndev->int_mask &= ~AMD_EVENT_INTMASK;
> > +	writel(ndev->int_mask, mmio + AMD_INTMASK_OFFSET);
> > +
> > +	if (ndev->ntb.topo == NTB_TOPO_SEC)
> > +		return -EINVAL;
> > +	dev_dbg(ndev_dev(ndev), "Enabling Link.\n");
> > +
> > +	ntb_ctl = readl(mmio + AMD_CNTL_OFFSET);
> > +	ntb_ctl |= (3 << 20);
> 
> This "20" is a bit magical to me.  Please use a #define

Yes. 

> > +	writel(ntb_ctl, mmio + AMD_CNTL_OFFSET);
> > +
> > +	return 0;
> > +}
> > +
> > +static int amd_ntb_link_disable(struct ntb_dev *ntb) {
> > +	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> > +	void __iomem *mmio = ndev->self_mmio;
> > +	u32 ntb_ctl;
> > +
> > +	/* Disable event interrupt */
> > +	ndev->int_mask |= AMD_EVENT_INTMASK;
> > +	writel(ndev->int_mask, mmio + AMD_INTMASK_OFFSET);
> > +
> > +	if (ndev->ntb.topo == NTB_TOPO_SEC)
> > +		return -EINVAL;
> > +	dev_dbg(ndev_dev(ndev), "Enabling Link.\n");
> > +
> > +	ntb_ctl = readl(mmio + AMD_CNTL_OFFSET);
> > +	ntb_ctl &= ~(3 << 16);
> 
> Should this be "20" or "16"?  Seems odd that you would use a different offset
> to enable than disable.  Either way, a #define here would be nice too.

Got it.

> > +	writel(ntb_ctl, mmio + AMD_CNTL_OFFSET);
> > +
> > +	return 0;
> > +}
> > +
> > +static u64 amd_ntb_db_valid_mask(struct ntb_dev *ntb) {
> > +	return ntb_ndev(ntb)->db_valid_mask; }
> > +
> > +static int amd_ntb_db_vector_count(struct ntb_dev *ntb) {
> > +	return ntb_ndev(ntb)->db_count;
> > +}
> > +
> > +static u64 amd_ntb_db_vector_mask(struct ntb_dev *ntb, int db_vector)
> > +{
> > +	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> > +
> > +	if (db_vector < 0 || db_vector > ndev->db_count)
> > +		return 0;
> > +
> > +	return ntb_ndev(ntb)->db_valid_mask & (1 << db_vector); }
> > +
> > +static u64 amd_ntb_db_read(struct ntb_dev *ntb) {
> > +	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> > +	void __iomem *mmio = ndev->self_mmio;
> > +
> > +	return (u64)readw(mmio + AMD_DBSTAT_OFFSET);
> 
> Is this cast necessary?

Just following Intel ntb driver, keep consistent in Intel code.

> 
> > +}
> > +
> > +static int amd_ntb_db_clear(struct ntb_dev *ntb, u64 db_bits) {
> > +	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> > +	void __iomem *mmio = ndev->self_mmio;
> > +
> > +	writew((u16)db_bits, mmio + AMD_DBSTAT_OFFSET);
> > +
> > +	return 0;
> > +}
> > +
> > +static int amd_ntb_db_set_mask(struct ntb_dev *ntb, u64 db_bits) {
> > +	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> > +	void __iomem *mmio = ndev->self_mmio;
> > +	unsigned long flags;
> > +
> > +	if (db_bits & ~ndev->db_valid_mask)
> > +		return -EINVAL;
> > +
> > +	spin_lock_irqsave(&ndev->db_mask_lock, flags);
> > +	ndev->db_mask |= db_bits;
> > +	writew((u16)ndev->db_mask, mmio + AMD_DBMASK_OFFSET);
> > +	spin_unlock_irqrestore(&ndev->db_mask_lock, flags);
> > +
> > +	return 0;
> > +}
> > +
> > +static int amd_ntb_db_clear_mask(struct ntb_dev *ntb, u64 db_bits) {
> > +	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> > +	void __iomem *mmio = ndev->self_mmio;
> > +	unsigned long flags;
> > +
> > +	if (db_bits & ~ndev->db_valid_mask)
> > +		return -EINVAL;
> > +
> > +	spin_lock_irqsave(&ndev->db_mask_lock, flags);
> > +	ndev->db_mask &= ~db_bits;
> > +	writew((u16)ndev->db_mask, mmio + AMD_DBMASK_OFFSET);
> > +	spin_unlock_irqrestore(&ndev->db_mask_lock, flags);
> > +
> > +	return 0;
> > +}
> > +
> > +static int amd_ntb_peer_db_addr(struct ntb_dev *ntb,
> > +		phys_addr_t *db_addr,
> > +		resource_size_t *db_size)
> 
> Nit, this doesn't seem like it would be aligned properly.

Got it.

> > +{
> > +	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> > +
> > +	if (db_addr)
> > +		*db_addr = (phys_addr_t)(ndev->peer_mmio +
> AMD_DBREQ_OFFSET);
> > +	if (db_size)
> > +		*db_size = sizeof(u32);
> > +
> > +	return 0;
> > +}
> > +
> > +static int amd_ntb_peer_db_set(struct ntb_dev *ntb, u64 db_bits) {
> > +	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> > +	void __iomem *mmio = ndev->self_mmio;
> > +
> > +	writew((u16)db_bits, mmio + AMD_DBREQ_OFFSET);
> > +
> > +	return 0;
> > +}
> > +
> > +static int amd_ntb_spad_count(struct ntb_dev *ntb) {
> > +	return ntb_ndev(ntb)->spad_count;
> > +}
> > +
> > +static u32 amd_ntb_spad_read(struct ntb_dev *ntb, int idx) {
> > +	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> > +	void __iomem *mmio = ndev->self_mmio;
> > +	u32 offset = 0;
> 
> Unnecessary init of this variable

Ok.

> > +
> > +	if (idx < 0 || idx >= ndev->spad_count)
> > +		return 0;
> > +
> > +	offset = ndev->self_spad + (idx << 2);
> > +	return readl(mmio + AMD_SPAD_OFFSET + offset); }
> > +
> > +static int amd_ntb_spad_write(struct ntb_dev *ntb,
> > +				int idx, u32 val)
> > +{
> > +	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> > +	void __iomem *mmio = ndev->self_mmio;
> > +	u32 offset = 0;
> 
> Unnecessary init of this variable

Ok.

> > +
> > +	if (idx < 0 || idx >= ndev->spad_count)
> > +		return -EINVAL;
> > +
> > +	offset = ndev->self_spad + (idx << 2);
> > +	writel(val, mmio + AMD_SPAD_OFFSET + offset);
> > +
> > +	return 0;
> > +}
> > +
> > +static int amd_ntb_peer_spad_addr(struct ntb_dev *ntb, int idx,
> > +				    phys_addr_t *spad_addr)
> > +{
> > +	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> > +
> > +	if (idx < 0 || idx >= ndev->spad_count)
> > +		return -EINVAL;
> > +
> > +	if (spad_addr)
> > +		*spad_addr = (phys_addr_t)(ndev->self_mmio +
> AMD_SPAD_OFFSET +
> > +				ndev->peer_spad + (idx << 2));
> 
> This doesn't seem aligned properly either.

Ok

> > +	return 0;
> > +}
> > +
> > +static u32 amd_ntb_peer_spad_read(struct ntb_dev *ntb, int idx) {
> > +	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> > +	void __iomem *mmio = ndev->self_mmio;
> > +	u32 offset;
> > +
> > +	if (idx < 0 || idx >= ndev->spad_count)
> > +		return -EINVAL;
> > +
> > +	offset = ndev->peer_spad + (idx << 2);
> > +	return readl(mmio + AMD_SPAD_OFFSET + offset); }
> > +
> > +static int amd_ntb_peer_spad_write(struct ntb_dev *ntb,
> > +				     int idx, u32 val)
> > +{
> > +	struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> > +	void __iomem *mmio = ndev->self_mmio;
> > +	u32 offset;
> > +
> > +	if (idx < 0 || idx >= ndev->spad_count)
> > +		return -EINVAL;
> > +
> > +	offset = ndev->peer_spad + (idx << 2);
> > +	writel(val, mmio + AMD_SPAD_OFFSET + offset);
> > +
> > +	return 0;
> > +}
> > +
> > +static void amd_ack_SMU(struct amd_ntb_dev *ndev, u32 bit)
> 
> Nit, Please make this SMU lower case.  Also, it is a bit odd to see SMUACK
> and ack_smu.  It is not a big deal, but since there has to be another version to
> address Allen's comments (and mine), then this won't add to much additional
> work to address.

Ok.

> > +{
> > +	void __iomem *mmio = ndev->self_mmio;
> > +	int reg;
> > +
> > +	reg = readl(mmio + AMD_SMUACK_OFFSET);
> > +	reg |= bit;
> > +	writel(reg, mmio + AMD_SMUACK_OFFSET);
> > +
> > +	ndev->peer_sta |= bit;
> > +}
> > +
> > +static void amd_handle_event(struct amd_ntb_dev *ndev, int vec) {
> > +	void __iomem *mmio = ndev->self_mmio;
> > +	u32 status;
> > +
> > +	status = readl(mmio + AMD_INTSTAT_OFFSET);
> > +	if (!(status & AMD_EVENT_INTMASK))
> > +		return;
> > +
> > +	dev_dbg(ndev_dev(ndev), "status = 0x%x and vec = %d\n", status,
> > +vec);
> > +
> > +	status &= AMD_EVENT_INTMASK;
> > +	switch (status) {
> > +	case AMD_PEER_FLUSH_EVENT:
> > +		dev_info(ndev_dev(ndev), "Flush is done.\n");
> > +		break;
> > +	case AMD_PEER_RESET_EVENT:
> > +		amd_ack_SMU(ndev, AMD_PEER_RESET_EVENT);
> > +
> > +		/* link down first */
> > +		ntb_link_event(&ndev->ntb);
> > +		/* polling peer status */
> > +		schedule_delayed_work(&ndev->hb_timer,
> AMD_LINK_HB_TIMEOUT);
> > +
> > +		break;
> > +	case AMD_PEER_D3_EVENT:
> > +	case AMD_PEER_PMETO_EVENT:
> > +		amd_ack_SMU(ndev, status);
> > +
> > +		/* link down */
> > +		ntb_link_event(&ndev->ntb);
> > +
> > +		break;
> > +	case AMD_PEER_D0_EVENT:
> > +		mmio = ndev->peer_mmio;
> > +		status = readl(mmio + AMD_PMESTAT_OFFSET);
> > +		/* check if this is WAKEUP event */
> > +		if (status & 0x1)
> > +			dev_info(ndev_dev(ndev), "Wakeup is done.\n");
> > +
> > +		amd_ack_SMU(ndev, AMD_PEER_D0_EVENT);
> > +
> > +		if (amd_link_is_up(ndev))
> > +			ntb_link_event(&ndev->ntb);
> > +		else
> > +			schedule_delayed_work(&ndev->hb_timer,
> > +						AMD_LINK_HB_TIMEOUT);
> > +		break;
> > +	default:
> > +		dev_info(ndev_dev(ndev), "event status = 0x%x.\n", status);
> > +		break;
> > +	}
> > +}
> > +
> > +static irqreturn_t ndev_interrupt(struct amd_ntb_dev *ndev, int vec)
> > +{
> > +	dev_dbg(ndev_dev(ndev), "vec %d\n", vec);
> > +
> > +	if (vec > (ndev->msix_vec_count - 1)) {
> > +		dev_err(ndev_dev(ndev), "Invalid interrupt.\n");
> > +		return IRQ_HANDLED;
> > +	}
> 
> Unless this actually happens, you might not want the unnecessary branch in
> your fast path.

Ok, I'll remove it.

> > +
> > +	if (vec > 15 || (ndev->msix_vec_count == 1))
> > +		amd_handle_event(ndev, vec);
> > +
> > +	if (vec < 16)
> 
> I'd prefer to see a #define here for 16

Ok.

> 
> > +		ntb_db_event(&ndev->ntb, vec);
> > +
> > +	return IRQ_HANDLED;
> > +}
> > +
> > +static irqreturn_t ndev_vec_isr(int irq, void *dev) {
> > +	struct amd_ntb_vec *nvec = dev;
> > +
> > +	return ndev_interrupt(nvec->ndev, nvec->num); }
> > +
> > +static irqreturn_t ndev_irq_isr(int irq, void *dev) {
> > +	struct amd_ntb_dev *ndev = dev;
> > +
> > +	return ndev_interrupt(ndev, irq - ndev_pdev(ndev)->irq); }
> > +
> > +static int ndev_init_isr(struct amd_ntb_dev *ndev,
> > +				int msix_min, int msix_max)
> > +{
> > +	struct pci_dev *pdev;
> > +	int rc, i, msix_count, node;
> > +
> > +	pdev = ndev_pdev(ndev);
> > +
> > +	node = dev_to_node(&pdev->dev);
> > +
> > +	ndev->db_mask = ndev->db_valid_mask;
> > +
> > +	/* Try to set up msix irq */
> > +	ndev->vec = kzalloc_node(msix_max * sizeof(*ndev->vec),
> > +				 GFP_KERNEL, node);
> > +	if (!ndev->vec)
> > +		goto err_msix_vec_alloc;
> > +
> > +	ndev->msix = kzalloc_node(msix_max * sizeof(*ndev->msix),
> > +				  GFP_KERNEL, node);
> > +	if (!ndev->msix)
> > +		goto err_msix_alloc;
> > +
> > +	for (i = 0; i < msix_max; ++i)
> > +		ndev->msix[i].entry = i;
> > +
> > +	msix_count = pci_enable_msix_range(pdev, ndev->msix,
> > +					   msix_min, msix_max);
> > +	if (msix_count < 0)
> > +		goto err_msix_enable;
> > +
> > +	/* Disable MSIX if msix count is less than 16 */
> 
> Why?  Is there a HW errata or limitation (if so, write so in this comment)?  If
> not, MSIX has many benefits over legacy interrupts, and it doesn't make
> sense to walk away from that.

Yes, this is hardware limitation, i'll add "hardware limitation" into the comment.

> 
> > +	if (msix_count < msix_min) {
> > +		pci_disable_msix(pdev);
> > +		goto err_msix_enable;
> > +	}
> > +
> > +	for (i = 0; i < msix_count; ++i) {
> > +		ndev->vec[i].ndev = ndev;
> > +		ndev->vec[i].num = i;
> > +		rc = request_irq(ndev->msix[i].vector, ndev_vec_isr, 0,
> > +				 "ndev_vec_isr", &ndev->vec[i]);
> > +		if (rc)
> > +			goto err_msix_request;
> > +	}
> > +
> > +	dev_dbg(ndev_dev(ndev), "Using msix interrupts\n");
> > +	ndev->db_count = msix_min;
> > +	ndev->msix_vec_count = msix_max;
> > +	return 0;
> > +
> > +err_msix_request:
> > +	while (i-- > 0)
> > +		free_irq(ndev->msix[i].vector, ndev);
> > +	pci_disable_msix(pdev);
> > +err_msix_enable:
> > +	kfree(ndev->msix);
> > +err_msix_alloc:
> > +	kfree(ndev->vec);
> > +err_msix_vec_alloc:
> > +	ndev->msix = NULL;
> > +	ndev->vec = NULL;
> > +
> > +	/* Try to set up msi irq */
> > +	rc = pci_enable_msi(pdev);
> > +	if (rc)
> > +		goto err_msi_enable;
> > +
> > +	rc = request_irq(pdev->irq, ndev_irq_isr, 0,
> > +			 "ndev_irq_isr", ndev);
> > +	if (rc)
> > +		goto err_msi_request;
> > +
> 
> I have problems with the msix/msi/intx selection being done this way, but I
> see it being done in the Intel driver.  So....I'll fix it there first.

Ok, I'll change it according to your patch.

> 
> > +	dev_dbg(ndev_dev(ndev), "Using msi interrupts\n");
> > +	ndev->db_count = 1;
> > +	ndev->msix_vec_count = 1;
> > +	return 0;
> > +
> > +err_msi_request:
> > +	pci_disable_msi(pdev);
> > +err_msi_enable:
> > +
> > +	/* Try to set up intx irq */
> > +	pci_intx(pdev, 1);
> > +
> > +	rc = request_irq(pdev->irq, ndev_irq_isr, IRQF_SHARED,
> > +			 "ndev_irq_isr", ndev);
> > +	if (rc)
> > +		goto err_intx_request;
> > +
> > +	dev_dbg(ndev_dev(ndev), "Using intx interrupts\n");
> > +	ndev->db_count = 1;
> > +	ndev->msix_vec_count = 1;
> > +	return 0;
> > +
> > +err_intx_request:
> > +	return rc;
> > +}
> > +
> > +static void ndev_deinit_isr(struct amd_ntb_dev *ndev) {
> > +	struct pci_dev *pdev;
> > +	void __iomem *mmio = ndev->self_mmio;
> > +	int i;
> > +
> > +	pdev = ndev_pdev(ndev);
> > +
> > +	/* Mask all doorbell interrupts */
> > +	ndev->db_mask = ndev->db_valid_mask;
> > +	writel(ndev->db_mask, mmio + AMD_DBMASK_OFFSET);
> > +
> > +	if (ndev->msix) {
> > +		i = ndev->msix_vec_count;
> > +		while (i--)
> > +			free_irq(ndev->msix[i].vector, &ndev->vec[i]);
> > +		pci_disable_msix(pdev);
> > +		kfree(ndev->msix);
> > +		kfree(ndev->vec);
> > +	} else {
> > +		free_irq(pdev->irq, ndev);
> > +		if (pci_dev_msi_enabled(pdev))
> > +			pci_disable_msi(pdev);
> > +		else
> > +			pci_intx(pdev, 0);
> > +	}
> > +}
> > +
> > +static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf,
> > +				 size_t count, loff_t *offp)
> > +{
> > +	struct amd_ntb_dev *ndev;
> > +	void __iomem *mmio;
> > +	char *buf;
> > +	size_t buf_size;
> > +	ssize_t ret, off;
> > +	union { u64 v64; u32 v32; u16 v16; } u;
> > +
> > +	ndev = filp->private_data;
> > +	mmio = ndev->self_mmio;
> > +
> > +	buf_size = min(count, 0x800ul);
> > +
> > +	buf = kmalloc(buf_size, GFP_KERNEL);
> > +	if (!buf)
> > +		return -ENOMEM;
> > +
> > +	off = 0;
> > +
> > +	off += scnprintf(buf + off, buf_size - off,
> > +			 "NTB Device Information:\n");
> > +
> > +	off += scnprintf(buf + off, buf_size - off,
> > +			 "Connection Topology -\t%s\n",
> > +			 ntb_topo_string(ndev->ntb.topo));
> > +
> > +	off += scnprintf(buf + off, buf_size - off,
> > +			 "LNK STA -\t\t%#06x\n", ndev->lnk_sta);
> > +
> > +	if (!amd_link_is_up(ndev)) {
> > +		off += scnprintf(buf + off, buf_size - off,
> > +				 "Link Status -\t\tDown\n");
> > +	} else {
> > +		off += scnprintf(buf + off, buf_size - off,
> > +				 "Link Status -\t\tUp\n");
> > +		off += scnprintf(buf + off, buf_size - off,
> > +				 "Link Speed -\t\tPCI-E Gen %u\n",
> > +				 NTB_LNK_STA_SPEED(ndev->lnk_sta));
> > +		off += scnprintf(buf + off, buf_size - off,
> > +				 "Link Width -\t\tx%u\n",
> > +				 NTB_LNK_STA_WIDTH(ndev->lnk_sta));
> > +	}
> > +
> > +	off += scnprintf(buf + off, buf_size - off,
> > +			 "Memory Window Count -\t%u\n", ndev-
> >mw_count);
> > +	off += scnprintf(buf + off, buf_size - off,
> > +			 "Scratchpad Count -\t%u\n", ndev->spad_count);
> > +	off += scnprintf(buf + off, buf_size - off,
> > +			 "Doorbell Count -\t%u\n", ndev->db_count);
> > +	off += scnprintf(buf + off, buf_size - off,
> > +			 "MSIX Vector Count -\t%u\n", ndev-
> >msix_vec_count);
> > +
> > +	off += scnprintf(buf + off, buf_size - off,
> > +			 "Doorbell Valid Mask -\t%#llx\n", ndev-
> >db_valid_mask);
> > +
> > +	u.v32 = readl(ndev->self_mmio + AMD_DBMASK_OFFSET);
> > +	off += scnprintf(buf + off, buf_size - off,
> > +			 "Doorbell Mask -\t\t\t%#06x\n", u.v32);
> > +
> > +	u.v32 = readl(mmio + AMD_DBSTAT_OFFSET);
> > +	off += scnprintf(buf + off, buf_size - off,
> > +			 "Doorbell Bell -\t\t\t%#06x\n", u.v32);
> > +
> > +	off += scnprintf(buf + off, buf_size - off,
> > +			 "\nNTB Incoming XLAT:\n");
> > +
> > +	u.v64 = ioread64(mmio + AMD_BAR1XLAT_OFFSET);
> > +	off += scnprintf(buf + off, buf_size - off,
> > +			 "XLAT1 -\t\t%#018llx\n", u.v64);
> > +
> > +	u.v64 = ioread64(ndev->self_mmio + AMD_BAR23XLAT_OFFSET);
> > +	off += scnprintf(buf + off, buf_size - off,
> > +				 "XLAT23 -\t\t%#018llx\n", u.v64);
> > +
> > +	u.v64 = ioread64(ndev->self_mmio + AMD_BAR45XLAT_OFFSET);
> > +	off += scnprintf(buf + off, buf_size - off,
> > +				 "XLAT45 -\t\t%#018llx\n", u.v64);
> > +
> > +	u.v32 = readl(mmio + AMD_BAR1LMT_OFFSET);
> > +	off += scnprintf(buf + off, buf_size - off,
> > +			 "LMT1 -\t\t\t%#06x\n", u.v32);
> > +
> > +	u.v64 = ioread64(ndev->self_mmio + AMD_BAR23LMT_OFFSET);
> > +	off += scnprintf(buf + off, buf_size - off,
> > +				 "LMT23 -\t\t\t%#018llx\n", u.v64);
> > +
> > +	u.v64 = ioread64(ndev->self_mmio + AMD_BAR45LMT_OFFSET);
> > +	off += scnprintf(buf + off, buf_size - off,
> > +				 "LMT45 -\t\t\t%#018llx\n", u.v64);
> > +
> > +	ret = simple_read_from_buffer(ubuf, count, offp, buf, off);
> > +	kfree(buf);
> > +	return ret;
> > +}
> > +
> > +static void ndev_init_debugfs(struct amd_ntb_dev *ndev) {
> > +	if (!debugfs_dir) {
> > +		ndev->debugfs_dir = NULL;
> > +		ndev->debugfs_info = NULL;
> > +	} else {
> > +		ndev->debugfs_dir =
> > +			debugfs_create_dir(ndev_name(ndev),
> debugfs_dir);
> > +		if (!ndev->debugfs_dir)
> > +			ndev->debugfs_info = NULL;
> > +		else
> > +			ndev->debugfs_info =
> > +				debugfs_create_file("info", S_IRUSR,
> > +						    ndev->debugfs_dir, ndev,
> > +						    &amd_ntb_debugfs_info);
> > +	}
> > +}
> > +
> > +static void ndev_deinit_debugfs(struct amd_ntb_dev *ndev) {
> > +	debugfs_remove_recursive(ndev->debugfs_dir);
> > +}
> > +
> > +static inline void ndev_init_struct(struct amd_ntb_dev *ndev,
> > +				    struct pci_dev *pdev)
> > +{
> > +	ndev->ntb.pdev = pdev;
> > +	ndev->ntb.topo = NTB_TOPO_NONE;
> > +	ndev->ntb.ops = &amd_ntb_ops;
> > +	ndev->int_mask = AMD_EVENT_INTMASK;
> > +	spin_lock_init(&ndev->db_mask_lock);
> > +}
> > +
> > +static int amd_poll_link(struct amd_ntb_dev *ndev) {
> > +	void __iomem *mmio = ndev->peer_mmio;
> > +	u32 reg, stat;
> > +	int rc;
> > +
> > +	reg = readl(mmio + AMD_SIDEINFO_OFFSET);
> > +	reg &= NTB_LIN_STA_ACTIVE_BIT;
> > +
> > +	dev_dbg(ndev_dev(ndev), "%s: reg_val = 0x%x.\n", __func__, reg);
> > +
> > +	if (reg == ndev->cntl_sta)
> > +		return 0;
> > +
> > +	ndev->cntl_sta = reg;
> > +
> > +	rc = pci_read_config_dword(ndev->ntb.pdev,
> > +			AMD_LINK_STATUS_OFFSET, &stat);
> > +	if (rc)
> > +		return 0;
> > +	ndev->lnk_sta = stat;
> > +
> > +	return 1;
> > +}
> > +
> > +static void amd_link_hb(struct work_struct *work) {
> > +	struct amd_ntb_dev *ndev = hb_ndev(work);
> > +
> > +	if (amd_poll_link(ndev))
> > +		ntb_link_event(&ndev->ntb);
> > +
> > +	if (!amd_link_is_up(ndev))
> > +		schedule_delayed_work(&ndev->hb_timer,
> AMD_LINK_HB_TIMEOUT); }
> > +
> > +static int amd_init_isr(struct amd_ntb_dev *ndev) {
> > +	return ndev_init_isr(ndev, AMD_DB_CNT,
> AMD_MSIX_VECTOR_CNT); }
> > +
> > +static void amd_init_side_info(struct amd_ntb_dev *ndev) {
> > +	void __iomem *mmio = ndev->self_mmio;
> > +	unsigned int reg = 0;
> 
> variable initialized unnecessarily

Ok.

> > +
> > +	reg = readl(mmio + AMD_SIDEINFO_OFFSET);
> > +	if (!(reg & 0x2)) {
> > +		reg |= 0x2;
> 
> I'd like a #define for the "2" above, as you are using it multiple times.

Ok.

> 
> > +		writel(reg, mmio + AMD_SIDEINFO_OFFSET);
> > +	}
> > +}
> > +
> > +static void amd_deinit_side_info(struct amd_ntb_dev *ndev) {
> > +	void __iomem *mmio = ndev->self_mmio;
> > +	unsigned int reg = 0;
> 
> variable initialized unnecessarily

Ok.

> > +
> > +	reg = readl(mmio + AMD_SIDEINFO_OFFSET);
> > +	if (reg & 0x2) {
> > +		reg &= ~(0x2);
> > +		writel(reg, mmio + AMD_SIDEINFO_OFFSET);
> > +		readl(mmio + AMD_SIDEINFO_OFFSET);
> > +	}
> > +}
> > +
> > +static int amd_init_ntb(struct amd_ntb_dev *ndev) {
> > +	void __iomem *mmio = ndev->self_mmio;
> > +
> > +	ndev->mw_count = AMD_MW_CNT;
> > +	ndev->spad_count = AMD_SPADS_CNT;
> > +	ndev->db_count = AMD_DB_CNT;
> > +
> > +	switch (ndev->ntb.topo) {
> > +	case NTB_TOPO_PRI:
> > +	case NTB_TOPO_SEC:
> > +		ndev->spad_count >>= 1;
> > +		if (ndev->ntb.topo == NTB_TOPO_PRI) {
> > +			ndev->self_spad = 0;
> > +			ndev->peer_spad = 0x20;
> > +		} else {
> > +			ndev->self_spad = 0x20;
> > +			ndev->peer_spad = 0;
> > +		}
> > +
> > +		INIT_DELAYED_WORK(&ndev->hb_timer, amd_link_hb);
> > +		schedule_delayed_work(&ndev->hb_timer,
> AMD_LINK_HB_TIMEOUT);
> > +
> > +		break;
> > +	default:
> > +		dev_err(ndev_dev(ndev), "AMD NTB does not support B2B
> mode.\n");
> > +		return -EINVAL;
> > +	}
> > +
> > +	ndev->db_valid_mask = BIT_ULL(ndev->db_count) - 1;
> > +
> > +	/* Mask event interrupts */
> > +	writel(ndev->int_mask, mmio + AMD_INTMASK_OFFSET);
> > +
> > +	return 0;
> > +}
> > +
> > +static inline enum ntb_topo amd_get_topo(struct amd_ntb_dev *ndev)
> 
> It is usually better to let the compiler determine what to inline and what not
> to inline..

Do you mean removing the key word of inline?

> > +{
> > +	void __iomem *mmio = ndev->self_mmio;
> > +	u32 info;
> > +
> > +	info = readl(mmio + AMD_SIDEINFO_OFFSET);
> > +	if (info & AMD_SIDE_MASK)
> > +		return NTB_TOPO_SEC;
> > +	else
> > +		return NTB_TOPO_PRI;
> > +}
> > +
> > +static int amd_init_dev(struct amd_ntb_dev *ndev) {
> > +	struct pci_dev *pdev;
> > +	int rc = 0;
> > +
> > +	pdev = ndev_pdev(ndev);
> > +
> > +	ndev->ntb.topo = amd_get_topo(ndev);
> > +	dev_dbg(ndev_dev(ndev), "AMD NTB topo is %s\n",
> > +			ntb_topo_string(ndev->ntb.topo));
> > +
> > +	rc = amd_init_ntb(ndev);
> > +	if (rc)
> > +		return rc;
> > +
> > +	rc = amd_init_isr(ndev);
> > +	if (rc) {
> > +		dev_err(ndev_dev(ndev), "fail to init isr.\n");
> > +		return rc;
> > +	}
> > +
> > +	ndev->db_valid_mask = BIT_ULL(ndev->db_count) - 1;
> > +
> > +	return 0;
> > +}
> > +
> > +static void amd_deinit_dev(struct amd_ntb_dev *ndev) {
> > +	cancel_delayed_work_sync(&ndev->hb_timer);
> > +
> > +	ndev_deinit_isr(ndev);
> > +}
> > +
> > +static int amd_ntb_init_pci(struct amd_ntb_dev *ndev,
> > +			    struct pci_dev *pdev)
> > +{
> > +	int rc;
> > +
> > +	pci_set_drvdata(pdev, ndev);
> > +
> > +	rc = pci_enable_device(pdev);
> > +	if (rc)
> > +		goto err_pci_enable;
> > +
> > +	rc = pci_request_regions(pdev, NTB_NAME);
> > +	if (rc)
> > +		goto err_pci_regions;
> > +
> > +	pci_set_master(pdev);
> > +
> > +	rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
> > +	if (rc) {
> > +		rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
> > +		if (rc)
> > +			goto err_dma_mask;
> > +		dev_warn(ndev_dev(ndev), "Cannot DMA highmem\n");
> > +	}
> > +
> > +	rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
> > +	if (rc) {
> > +		rc = pci_set_consistent_dma_mask(pdev,
> DMA_BIT_MASK(32));
> > +		if (rc)
> > +			goto err_dma_mask;
> > +		dev_warn(ndev_dev(ndev), "Cannot DMA consistent
> highmem\n");
> > +	}
> > +
> > +	ndev->self_mmio = pci_iomap(pdev, 0, 0);
> > +	if (!ndev->self_mmio) {
> > +		rc = -EIO;
> > +		goto err_mmio;
> > +	}
> > +	ndev->peer_mmio = ndev->self_mmio + AMD_PEER_OFFSET;
> > +
> > +	return 0;
> > +
> > +err_mmio:
> 
> seems like an unnecessary goto destination here

Will entry the goto destination if pci_iomap failed. The code is following
Intel NTB and I see many drivers in kernel will check the return value 
of pci_iomap.

> 
> > +err_dma_mask:
> > +	pci_clear_master(pdev);
> > +err_pci_regions:
> > +	pci_disable_device(pdev);
> > +err_pci_enable:
> > +	pci_set_drvdata(pdev, NULL);
> > +	return rc;
> > +}
> > +
> > +static void amd_ntb_deinit_pci(struct amd_ntb_dev *ndev) {
> > +	struct pci_dev *pdev = ndev_pdev(ndev);
> > +
> > +	pci_iounmap(pdev, ndev->self_mmio);
> > +
> > +	pci_clear_master(pdev);
> > +	pci_release_regions(pdev);
> > +	pci_disable_device(pdev);
> > +	pci_set_drvdata(pdev, NULL);
> > +}
> > +
> > +
> > +static int amd_ntb_pci_probe(struct pci_dev *pdev,
> > +			     const struct pci_device_id *id) {
> > +	struct amd_ntb_dev *ndev;
> > +	int rc, node;
> > +
> > +	node = dev_to_node(&pdev->dev);
> > +
> > +	ndev = kzalloc_node(sizeof(*ndev), GFP_KERNEL, node);
> > +	if (!ndev) {
> > +		rc = -ENOMEM;
> > +		goto err_ndev;
> > +	}
> > +
> > +	ndev_init_struct(ndev, pdev);
> > +
> > +	rc = amd_ntb_init_pci(ndev, pdev);
> > +	if (rc)
> > +		goto err_init_pci;
> > +
> > +	rc = amd_init_dev(ndev);
> > +	if (rc)
> > +		goto err_init_dev;
> > +
> > +	/* write side info */
> > +	amd_init_side_info(ndev);
> > +
> > +	amd_poll_link(ndev);
> > +
> > +	ndev_init_debugfs(ndev);
> > +
> > +	rc = ntb_register_device(&ndev->ntb);
> > +	if (rc)
> > +		goto err_register;
> > +
> > +	dev_info(&pdev->dev, "NTB device registered.\n");
> > +
> > +	return 0;
> > +
> > +err_register:
> > +	ndev_deinit_debugfs(ndev);
> > +	amd_deinit_dev(ndev);
> > +err_init_dev:
> > +	amd_ntb_deinit_pci(ndev);
> > +err_init_pci:
> > +	kfree(ndev);
> > +err_ndev:
> > +	return rc;
> > +}
> > +
> > +static void amd_ntb_pci_remove(struct pci_dev *pdev) {
> > +	struct amd_ntb_dev *ndev = pci_get_drvdata(pdev);
> > +
> > +	ntb_unregister_device(&ndev->ntb);
> > +	ndev_deinit_debugfs(ndev);
> > +	amd_deinit_side_info(ndev);
> > +	amd_deinit_dev(ndev);
> > +	amd_ntb_deinit_pci(ndev);
> > +	kfree(ndev);
> > +}
> > +
> > +static const struct file_operations amd_ntb_debugfs_info = {
> > +	.owner = THIS_MODULE,
> > +	.open = simple_open,
> > +	.read = ndev_debugfs_read,
> > +};
> > +
> > +static const struct pci_device_id amd_ntb_pci_tbl[] = {
> > +	{PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_NTB)},
> > +	{0}
> > +};
> > +MODULE_DEVICE_TABLE(pci, amd_ntb_pci_tbl);
> > +
> > +static struct pci_driver amd_ntb_pci_driver = {
> > +	.name		= KBUILD_MODNAME,
> > +	.id_table	= amd_ntb_pci_tbl,
> > +	.probe		= amd_ntb_pci_probe,
> > +	.remove		= amd_ntb_pci_remove,
> > +};
> > +
> > +static int __init amd_ntb_pci_driver_init(void) {
> > +	pr_info("%s %s\n", NTB_DESC, NTB_VER);
> > +
> > +	if (debugfs_initialized())
> > +		debugfs_dir = debugfs_create_dir(KBUILD_MODNAME,
> NULL);
> > +
> > +	return pci_register_driver(&amd_ntb_pci_driver);
> > +}
> > +module_init(amd_ntb_pci_driver_init);
> > +
> > +static void __exit amd_ntb_pci_driver_exit(void) {
> > +	pci_unregister_driver(&amd_ntb_pci_driver);
> > +	debugfs_remove_recursive(debugfs_dir);
> > +}
> > +module_exit(amd_ntb_pci_driver_exit);
> > diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.h
> > b/drivers/ntb/hw/amd/ntb_hw_amd.h new file mode 100644 index
> > 0000000..31021c0
> > --- /dev/null
> > +++ b/drivers/ntb/hw/amd/ntb_hw_amd.h
> > @@ -0,0 +1,246 @@
> > +/*
> > + * This file is provided under a dual BSD/GPLv2 license.  When using or
> > + *   redistributing this file, you may do so under either license.
> > + *
> > + *   GPL LICENSE SUMMARY
> > + *
> > + *   Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
> > + *
> > + *   This program is free software; you can redistribute it and/or modify
> > + *   it under the terms of version 2 of the GNU General Public License as
> > + *   published by the Free Software Foundation.
> > + *
> > + *   BSD LICENSE
> > + *
> > + *   Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
> > + *
> > + *   Redistribution and use in source and binary forms, with or without
> > + *   modification, are permitted provided that the following conditions
> > + *   are met:
> > + *
> > + *     * Redistributions of source code must retain the above copyright
> > + *       notice, this list of conditions and the following disclaimer.
> > + *     * Redistributions in binary form must reproduce the above copy
> > + *       notice, this list of conditions and the following disclaimer in
> > + *       the documentation and/or other materials provided with the
> > + *       distribution.
> > + *     * Neither the name of AMD Corporation nor the names of its
> > + *       contributors may be used to endorse or promote products derived
> > + *       from this software without specific prior written permission.
> > + *
> > + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
> CONTRIBUTORS
> > + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
> NOT
> > + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
> FITNESS FOR
> > + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
> COPYRIGHT
> > + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
> INCIDENTAL,
> > + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
> BUT NOT
> > + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
> LOSS OF USE,
> > + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
> AND ON ANY
> > + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
> TORT
> > + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
> OF THE USE
> > + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> DAMAGE.
> > + *
> > + * AMD PCIe NTB Linux driver
> > + *
> > + * Contact Information:
> > + * Xiangliang Yu<Xiangliang.Yu@amd.com>
> 
> Same comment as above about the space needed here.

Ok

> > + */
> > +
> > +#ifndef NTB_HW_AMD_H
> > +#define NTB_HW_AMD_H
> > +
> > +#include <linux/ntb.h>
> > +#include <linux/pci.h>
> > +
> > +#define PCI_DEVICE_ID_AMD_NTB	0x145B
> > +#define AMD_LINK_HB_TIMEOUT	msecs_to_jiffies(1000)
> 
> Tab align the second half of these

I'll check it.

> 
> > +#define AMD_LINK_STATUS_OFFSET	0x68
> > +#define NTB_LIN_STA_ACTIVE_BIT	0x00000002
> > +#define NTB_LNK_STA_SPEED_MASK	0x000F0000
> > +#define NTB_LNK_STA_WIDTH_MASK	0x03F00000
> > +#define NTB_LNK_STA_ACTIVE(x)	(!!((x) & NTB_LIN_STA_ACTIVE_BIT))
> > +#define NTB_LNK_STA_SPEED(x)	(((x) &
> NTB_LNK_STA_SPEED_MASK) >> 16)
> > +#define NTB_LNK_STA_WIDTH(x)	(((x) &
> NTB_LNK_STA_WIDTH_MASK) >> 20)
> > +
> > +#ifndef ioread64
> > +#ifdef readq
> > +#define ioread64 readq
> > +#else
> > +#define ioread64 _ioread64
> > +static inline u64 _ioread64(void __iomem *mmio) {
> > +	u64 low, high;
> > +
> > +	low = ioread32(mmio);
> > +	high = ioread32(mmio + sizeof(u32));
> > +	return low | (high << 32);
> > +}
> > +#endif
> > +#endif
> > +
> > +#ifndef iowrite64
> > +#ifdef writeq
> > +#define iowrite64 writeq
> > +#else
> > +#define iowrite64 _iowrite64
> > +static inline void _iowrite64(u64 val, void __iomem *mmio) {
> > +	iowrite32(val, mmio);
> > +	iowrite32(val >> 32, mmio + sizeof(u32)); } #endif #endif
> > +
> > +enum {
> > +	/* AMD NTB Capability */
> > +	AMD_MW_CNT		= 3,
> > +	AMD_DB_CNT		= 16,
> > +	AMD_MSIX_VECTOR_CNT	= 24,
> > +	AMD_SPADS_CNT		= 16,
> > +
> > +	/*  AMD NTB register offset */
> > +	AMD_CNTL_OFFSET		= 0x200,
> > +
> > +	/* NTB control register bits */
> > +	PMM_REG_CTL		= BIT(21),
> > +	SMM_REG_CTL		= BIT(20),
> > +	SMM_REG_ACC_PATH	= BIT(18),
> > +	PMM_REG_ACC_PATH	= BIT(17),
> > +	NTB_CLK_EN		= BIT(16),
> > +
> > +	AMD_STA_OFFSET		= 0x204,
> > +	AMD_PGSLV_OFFSET	= 0x208,
> > +	AMD_SPAD_MUX_OFFSET	= 0x20C,
> > +	AMD_SPAD_OFFSET		= 0x210,
> > +	AMD_RSMU_HCID		= 0x250,
> > +	AMD_RSMU_SIID		= 0x254,
> > +	AMD_PSION_OFFSET	= 0x300,
> > +	AMD_SSION_OFFSET	= 0x330,
> > +	AMD_MMINDEX_OFFSET	= 0x400,
> > +	AMD_MMDATA_OFFSET	= 0x404,
> > +	AMD_SIDEINFO_OFFSET	= 0x408,
> > +
> > +	AMD_SIDE_MASK		= BIT(0),
> > +
> > +	/* limit register */
> > +	AMD_ROMBARLMT_OFFSET	= 0x410,
> > +	AMD_BAR1LMT_OFFSET	= 0x414,
> > +	AMD_BAR23LMT_OFFSET	= 0x418,
> > +	AMD_BAR45LMT_OFFSET	= 0x420,
> > +	/* xlat address */
> > +	AMD_POMBARXLAT_OFFSET	= 0x428,
> > +	AMD_BAR1XLAT_OFFSET	= 0x430,
> > +	AMD_BAR23XLAT_OFFSET	= 0x438,
> > +	AMD_BAR45XLAT_OFFSET	= 0x440,
> > +	/* doorbell and interrupt */
> > +	AMD_DBFM_OFFSET		= 0x450,
> > +	AMD_DBREQ_OFFSET	= 0x454,
> > +	AMD_MIRRDBSTAT_OFFSET	= 0x458,
> > +	AMD_DBMASK_OFFSET	= 0x45C,
> > +	AMD_DBSTAT_OFFSET	= 0x460,
> > +	AMD_INTMASK_OFFSET	= 0x470,
> > +	AMD_INTSTAT_OFFSET	= 0x474,
> > +
> > +	/* event type */
> > +	AMD_PEER_FLUSH_EVENT	= BIT(0),
> > +	AMD_PEER_RESET_EVENT	= BIT(1),
> > +	AMD_PEER_D3_EVENT	= BIT(2),
> > +	AMD_PEER_PMETO_EVENT	= BIT(3),
> > +	AMD_PEER_D0_EVENT	= BIT(4),
> > +	AMD_EVENT_INTMASK	= (AMD_PEER_FLUSH_EVENT |
> > +				AMD_PEER_RESET_EVENT |
> AMD_PEER_D3_EVENT |
> > +				AMD_PEER_PMETO_EVENT |
> AMD_PEER_D0_EVENT),
> > +
> > +	AMD_PMESTAT_OFFSET	= 0x480,
> > +	AMD_PMSGTRIG_OFFSET	= 0x490,
> > +	AMD_LTRLATENCY_OFFSET	= 0x494,
> > +	AMD_FLUSHTRIG_OFFSET	= 0x498,
> > +
> > +	/* SMU register*/
> > +	AMD_SMUACK_OFFSET	= 0x4A0,
> > +	AMD_SINRST_OFFSET	= 0x4A4,
> > +	AMD_RSPNUM_OFFSET	= 0x4A8,
> > +	AMD_SMU_SPADMUTEX	= 0x4B0,
> > +	AMD_SMU_SPADOFFSET	= 0x4B4,
> > +
> > +	AMD_PEER_OFFSET		= 0x400,
> > +};
> 
> It seems extremely odd to mix and match different kinds of things in the
> same enumerated type.  I'd prefer to have these as #defines or separated
> enum types.

The code following AHCI.h style, it put field definition of register close to register
Marco definition seems appropriate to me. Actually, I have seen lot of the style in kernel.

> > +
> > +struct amd_ntb_dev;
> > +
> > +struct amd_ntb_vec {
> > +	struct amd_ntb_dev	*ndev;
> > +	int			num;
> > +};
> > +
> > +struct amd_ntb_dev {
> > +	struct ntb_dev ntb;
> > +
> > +	u32 ntb_side;
> > +	u32 lnk_sta;
> > +	u32 cntl_sta;
> > +	u32 peer_sta;
> > +
> > +	unsigned char mw_count;
> > +	unsigned char spad_count;
> > +	unsigned char db_count;
> > +	unsigned char msix_vec_count;
> > +
> > +	u64 db_valid_mask;
> > +	u64 db_mask;
> > +	u32 int_mask;
> > +
> > +	struct msix_entry *msix;
> > +	struct amd_ntb_vec *vec;
> > +
> > +	spinlock_t db_mask_lock;
> > +
> > +	void __iomem *self_mmio;
> > +	void __iomem *peer_mmio;
> > +	unsigned int self_spad;
> > +	unsigned int peer_spad;
> > +
> > +	struct delayed_work hb_timer;
> > +
> > +	struct dentry *debugfs_dir;
> > +	struct dentry *debugfs_info;
> > +};
> > +
> > +#define ndev_pdev(ndev) ((ndev)->ntb.pdev) #define ndev_name(ndev)
> > +pci_name(ndev_pdev(ndev)) #define ndev_dev(ndev)
> > +(&ndev_pdev(ndev)->dev) #define ntb_ndev(__ntb)
> container_of(__ntb,
> > +struct amd_ntb_dev, ntb) #define hb_ndev(__work)
> container_of(__work,
> > +struct amd_ntb_dev, hb_timer.work)
> > +
> 
> This should probably be moved into the device driver C file (as they are not
> called anywhere else).

I just following intel ntb driver, I'll also change it if you change intel driver.

> 
> > +static int amd_ntb_mw_count(struct ntb_dev *ntb); static int
> > +amd_ntb_mw_get_range(struct ntb_dev *ntb, int idx,
> > +			phys_addr_t *base, resource_size_t *size,
> > +			resource_size_t *align, resource_size_t *algin_size);
> static int
> > +amd_ntb_mw_set_trans(struct ntb_dev *ndev, int idx,
> > +				dma_addr_t addr, resource_size_t size);
> static int
> > +amd_ntb_link_is_up(struct ntb_dev *ntb,
> > +				enum ntb_speed *speed,
> > +				enum ntb_width *width);
> > +static int amd_ntb_link_enable(struct ntb_dev *ntb,
> > +				enum ntb_speed speed,
> > +				enum ntb_width width);
> > +static int amd_ntb_link_disable(struct ntb_dev *ntb); static u64
> > +amd_ntb_db_valid_mask(struct ntb_dev *ntb); static int
> > +amd_ntb_db_vector_count(struct ntb_dev *ntb); static u64
> > +amd_ntb_db_vector_mask(struct ntb_dev *ntb, int db_vector); static
> > +u64 amd_ntb_db_read(struct ntb_dev *ntb); static int
> > +amd_ntb_db_clear(struct ntb_dev *ntb, u64 db_bits); static int
> > +amd_ntb_db_set_mask(struct ntb_dev *ntb, u64 db_bits); static int
> > +amd_ntb_db_clear_mask(struct ntb_dev *ntb, u64 db_bits); static int
> > +amd_ntb_peer_db_addr(struct ntb_dev *ntb,
> > +				phys_addr_t *db_addr,
> > +				resource_size_t *db_size);
> > +static int amd_ntb_peer_db_set(struct ntb_dev *ntb, u64 db_bits);
> > +static int amd_ntb_spad_count(struct ntb_dev *ntb); static u32
> > +amd_ntb_spad_read(struct ntb_dev *ntb, int idx); static int
> > +amd_ntb_spad_write(struct ntb_dev *ntb, int idx, u32 val); static int
> > +amd_ntb_peer_spad_addr(struct ntb_dev *ntb, int idx,
> > +					phys_addr_t *spad_addr);
> > +static u32 amd_ntb_peer_spad_read(struct ntb_dev *ntb, int idx);
> > +static int amd_ntb_peer_spad_write(struct ntb_dev *ntb, int idx, u32
> > +val);
> 
> I believe all of these function declarations should be removed.
Ok.

And please see my comments, I'll submit v4 patch immediately when getting your feedback.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH V3 1/2] NTB: Add AMD PCI-Express NTB driver
  2016-01-18  9:42   ` Yu, Xiangliang
@ 2016-01-18 15:07     ` Jon Mason
  2016-01-18 15:26       ` Yu, Xiangliang
  0 siblings, 1 reply; 9+ messages in thread
From: Jon Mason @ 2016-01-18 15:07 UTC (permalink / raw)
  To: Yu, Xiangliang
  Cc: dave.jiang@intel.com, Allen.Hubbe@emc.com,
	linux-ntb@googlegroups.com, linux-kernel@vger.kernel.org,
	SPG_Linux_Kernel

On Mon, Jan 18, 2016 at 4:42 AM, Yu, Xiangliang <Xiangliang.Yu@amd.com> wrote:
>> On Thu, Jan 14, 2016 at 07:44:07PM +0800, Xiangliang Yu wrote:
>> > AMD NTB support following main features:
>> > (1) Three memory windows;
>> > (2) Sixteen 32-bit scratch pad;
>> > (3) Two 16-bit doorbell interrupt;
>> > (4) Five event interrupts;
>> > (5) One system can wake up opposite system of NTB;
>> > (6) Flush previous request to the opposite system;
>> > (7) There are reset and PME_TO mechanisms between two systems;
>>
>> Please create a much more verbose description of the patch.  This is not
>> acceptable in it's current form, and I'm sure I could get flamed by Linus if he
>> saw it in my pull request.  Even writing this out as a paragaph instead of a
>> numbered list would be better.
>
> Ok, I'll change it.
>
>> >
>> > Signed-off-by: Xiangliang Yu <Xiangliang.Yu@amd.com>
>> > ---
>> >  drivers/ntb/hw/Kconfig          |    1 +
>> >  drivers/ntb/hw/Makefile         |    1 +
>> >  drivers/ntb/hw/amd/Kconfig      |    7 +
>> >  drivers/ntb/hw/amd/Makefile     |    1 +
>> >  drivers/ntb/hw/amd/ntb_hw_amd.c | 1148
>> > +++++++++++++++++++++++++++++++++++++++
>> >  drivers/ntb/hw/amd/ntb_hw_amd.h |  246 +++++++++
>> >  6 files changed, 1404 insertions(+)
>> >  create mode 100644 drivers/ntb/hw/amd/Kconfig  create mode 100644
>> > drivers/ntb/hw/amd/Makefile  create mode 100644
>> > drivers/ntb/hw/amd/ntb_hw_amd.c  create mode 100644
>> > drivers/ntb/hw/amd/ntb_hw_amd.h
>> >
>> > diff --git a/drivers/ntb/hw/Kconfig b/drivers/ntb/hw/Kconfig index
>> > 4d5535c..0c5c2a6 100644
>> > --- a/drivers/ntb/hw/Kconfig
>> > +++ b/drivers/ntb/hw/Kconfig
>> > @@ -1 +1,2 @@
>> >  source "drivers/ntb/hw/intel/Kconfig"
>> > +source "drivers/ntb/hw/amd/Kconfig"
>>
>> Being nit picky, but please make it alphabetical and put amd before intel.
>
> Ok.
>
>> > diff --git a/drivers/ntb/hw/Makefile b/drivers/ntb/hw/Makefile index
>> > 175d7c9..636be7d 100644
>> > --- a/drivers/ntb/hw/Makefile
>> > +++ b/drivers/ntb/hw/Makefile
>> > @@ -1 +1,2 @@
>> >  obj-$(CONFIG_NTB_INTEL)    += intel/
>> > +obj-$(CONFIG_NTB_AMD)      += amd/
>>
>> Same nit, make it alphabetical.
>
> Ok.
>
>> > diff --git a/drivers/ntb/hw/amd/Kconfig b/drivers/ntb/hw/amd/Kconfig
>> > new file mode 100644 index 0000000..cfe903c
>> > --- /dev/null
>> > +++ b/drivers/ntb/hw/amd/Kconfig
>> > @@ -0,0 +1,7 @@
>> > +config NTB_AMD
>> > +   tristate "AMD Non-Transparent Bridge support"
>> > +   depends on X86_64
>> > +   help
>> > +    This driver supports AMD NTB on capable Zeppelin hardware.
>> > +
>> > +    If unsure, say N.
>> > diff --git a/drivers/ntb/hw/amd/Makefile b/drivers/ntb/hw/amd/Makefile
>> > new file mode 100644 index 0000000..ad54da9
>> > --- /dev/null
>> > +++ b/drivers/ntb/hw/amd/Makefile
>> > @@ -0,0 +1 @@
>> > +obj-$(CONFIG_NTB_AMD) += ntb_hw_amd.o
>> > diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.c
>> > b/drivers/ntb/hw/amd/ntb_hw_amd.c new file mode 100644 index
>> > 0000000..8df6d7b
>> > --- /dev/null
>> > +++ b/drivers/ntb/hw/amd/ntb_hw_amd.c
>> > @@ -0,0 +1,1148 @@
>> > +/*
>> > + * This file is provided under a dual BSD/GPLv2 license.  When using or
>> > + *   redistributing this file, you may do so under either license.
>> > + *
>> > + *   GPL LICENSE SUMMARY
>> > + *
>> > + *   Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
>> > + *
>> > + *   This program is free software; you can redistribute it and/or modify
>> > + *   it under the terms of version 2 of the GNU General Public License as
>> > + *   published by the Free Software Foundation.
>> > + *
>> > + *   BSD LICENSE
>> > + *
>> > + *   Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
>> > + *
>> > + *   Redistribution and use in source and binary forms, with or without
>> > + *   modification, are permitted provided that the following conditions
>> > + *   are met:
>> > + *
>> > + *     * Redistributions of source code must retain the above copyright
>> > + *       notice, this list of conditions and the following disclaimer.
>> > + *     * Redistributions in binary form must reproduce the above copy
>> > + *       notice, this list of conditions and the following disclaimer in
>> > + *       the documentation and/or other materials provided with the
>> > + *       distribution.
>> > + *     * Neither the name of AMD Corporation nor the names of its
>> > + *       contributors may be used to endorse or promote products derived
>> > + *       from this software without specific prior written permission.
>> > + *
>> > + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
>> CONTRIBUTORS
>> > + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
>> NOT
>> > + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
>> FITNESS FOR
>> > + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
>> COPYRIGHT
>> > + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
>> INCIDENTAL,
>> > + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
>> BUT NOT
>> > + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
>> LOSS OF USE,
>> > + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
>> AND ON ANY
>> > + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
>> TORT
>> > + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
>> OF THE USE
>> > + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
>> DAMAGE.
>> > + *
>> > + * AMD PCIe NTB Linux driver
>> > + *
>> > + * Contact Information:
>> > + * Xiangliang Yu<Xiangliang.Yu@amd.com>
>>
>> Nit, space needed between 'u' and '<'
>
> Ok
>
>> > + */
>> > +
>> > +#include <linux/debugfs.h>
>> > +#include <linux/delay.h>
>> > +#include <linux/init.h>
>> > +#include <linux/interrupt.h>
>> > +#include <linux/module.h>
>> > +#include <linux/acpi.h>
>> > +#include <linux/pci.h>
>> > +#include <linux/random.h>
>> > +#include <linux/slab.h>
>> > +#include <linux/ntb.h>
>> > +
>> > +#include "ntb_hw_amd.h"
>> > +
>> > +#define NTB_NAME   "ntb_hw_amd"
>> > +#define NTB_DESC   "AMD(R) PCI-E Non-Transparent Bridge Driver"
>> > +#define NTB_VER            "1.0"
>> > +
>> > +MODULE_DESCRIPTION(NTB_DESC);
>> > +MODULE_VERSION(NTB_VER);
>> > +MODULE_LICENSE("Dual BSD/GPL");
>> > +MODULE_AUTHOR("AMD Inc.");
>> > +
>> > +static const struct file_operations amd_ntb_debugfs_info; static
>> > +struct dentry *debugfs_dir;
>> > +
>> > +static const struct ntb_dev_ops amd_ntb_ops = {
>> > +   .mw_count               = amd_ntb_mw_count,
>> > +   .mw_get_range           = amd_ntb_mw_get_range,
>> > +   .mw_set_trans           = amd_ntb_mw_set_trans,
>> > +   .link_is_up             = amd_ntb_link_is_up,
>> > +   .link_enable            = amd_ntb_link_enable,
>> > +   .link_disable           = amd_ntb_link_disable,
>> > +   .db_valid_mask          = amd_ntb_db_valid_mask,
>> > +   .db_vector_count        = amd_ntb_db_vector_count,
>> > +   .db_vector_mask         = amd_ntb_db_vector_mask,
>> > +   .db_read                = amd_ntb_db_read,
>> > +   .db_clear               = amd_ntb_db_clear,
>> > +   .db_set_mask            = amd_ntb_db_set_mask,
>> > +   .db_clear_mask          = amd_ntb_db_clear_mask,
>> > +   .peer_db_addr           = amd_ntb_peer_db_addr,
>> > +   .peer_db_set            = amd_ntb_peer_db_set,
>> > +   .spad_count             = amd_ntb_spad_count,
>> > +   .spad_read              = amd_ntb_spad_read,
>> > +   .spad_write             = amd_ntb_spad_write,
>> > +   .peer_spad_addr         = amd_ntb_peer_spad_addr,
>> > +   .peer_spad_read         = amd_ntb_peer_spad_read,
>> > +   .peer_spad_write        = amd_ntb_peer_spad_write,
>> > +};
>> > +
>> > +static int ndev_mw_to_bar(struct amd_ntb_dev *ndev, int idx) {
>> > +   if (idx < 0 || idx > ndev->mw_count)
>> > +           return -EINVAL;
>> > +
>> > +   return 1 << idx;
>> > +}
>> > +
>> > +static int amd_ntb_mw_count(struct ntb_dev *ntb) {
>> > +   return ntb_ndev(ntb)->mw_count;
>> > +}
>> > +
>> > +static int amd_ntb_mw_get_range(struct ntb_dev *ntb, int idx,
>> > +                           phys_addr_t *base,
>> > +                           resource_size_t *size,
>> > +                           resource_size_t *align,
>> > +                           resource_size_t *align_size)
>> > +{
>> > +   struct amd_ntb_dev *ndev = ntb_ndev(ntb);
>> > +   int bar;
>> > +
>> > +   bar = ndev_mw_to_bar(ndev, idx);
>> > +   if (bar < 0)
>> > +           return bar;
>> > +
>> > +   if (base)
>> > +           *base = pci_resource_start(ndev->ntb.pdev, bar);
>> > +
>> > +   if (size)
>> > +           *size = pci_resource_len(ndev->ntb.pdev, bar);
>> > +
>> > +   if (align)
>> > +           *align = 4096;
>>
>> Please use SZ_4K from sizes.h
>
> Ok
>
>> > +
>> > +   if (align_size)
>> > +           *align_size = 1;
>> > +
>> > +   return 0;
>> > +}
>> > +
>> > +static int amd_ntb_mw_set_trans(struct ntb_dev *ntb, int idx,
>> > +                           dma_addr_t addr, resource_size_t size) {
>> > +   struct amd_ntb_dev *ndev = ntb_ndev(ntb);
>> > +   unsigned long xlat_reg, limit_reg = 0;
>> > +   resource_size_t mw_size;
>> > +   void __iomem *mmio, *peer_mmio;
>> > +   u64 base_addr, limit, reg_val;
>> > +   int bar;
>> > +
>> > +   bar = ndev_mw_to_bar(ndev, idx);
>> > +   if (bar < 0)
>> > +           return bar;
>> > +
>> > +   mw_size = pci_resource_len(ndev->ntb.pdev, bar);
>> > +
>> > +   /* make sure the range fits in the usable mw size */
>> > +   if (size > mw_size)
>> > +           return -EINVAL;
>> > +
>> > +   mmio = ndev->self_mmio;
>> > +   peer_mmio = ndev->peer_mmio;
>> > +
>> > +   base_addr = pci_resource_start(ndev->ntb.pdev, bar);
>> > +   if (bar == 1) {
>> > +           xlat_reg = AMD_BAR1XLAT_OFFSET;
>> > +           limit_reg = AMD_BAR1LMT_OFFSET;
>> > +   } else {
>> > +           xlat_reg = AMD_BAR23XLAT_OFFSET + ((bar - 2) << 3);
>> > +           limit_reg = AMD_BAR23LMT_OFFSET + ((bar - 2) << 3);
>> > +   }
>> > +
>> > +   if (bar != 1) {
>> > +           /* Set the limit if supported */
>> > +           limit = base_addr + size;
>> > +
>> > +           /* set and verify setting the translation address */
>> > +           iowrite64(addr, peer_mmio + xlat_reg);
>> > +           reg_val = ioread64(peer_mmio + xlat_reg);
>> > +           if (reg_val != addr) {
>> > +                   iowrite64(0, peer_mmio + xlat_reg);
>> > +                   return -EIO;
>> > +           }
>> > +
>> > +           /* set and verify setting the limit */
>> > +           iowrite64(limit, mmio + limit_reg);
>> > +           reg_val = ioread64(mmio + limit_reg);
>> > +           if (reg_val != limit) {
>> > +                   iowrite64(base_addr, mmio + limit_reg);
>> > +                   iowrite64(0, peer_mmio + xlat_reg);
>> > +                   return -EIO;
>> > +           }
>> > +   } else {
>> > +           /* split bar addr range must all be 32 bit */
>> > +           if (addr & (~0ull << 32))
>> > +                   return -EINVAL;
>> > +           if ((addr + size) & (~0ull << 32))
>> > +                   return -EINVAL;
>> > +
>> > +           /* Set the limit if supported */
>> > +           limit = base_addr + size;
>> > +
>> > +           /* set and verify setting the translation address */
>> > +           iowrite64(addr, peer_mmio + xlat_reg);
>> > +           reg_val = ioread64(peer_mmio + xlat_reg);
>> > +           if (reg_val != addr) {
>> > +                   iowrite64(0, peer_mmio + xlat_reg);
>> > +                   return -EIO;
>> > +           }
>> > +
>> > +           /* set and verify setting the limit */
>> > +           writel(limit, mmio + limit_reg);
>> > +           reg_val = readl(mmio + limit_reg);
>> > +           if (reg_val != limit) {
>> > +                   writel(base_addr, mmio + limit_reg);
>> > +                   writel(0, peer_mmio + xlat_reg);
>> > +                   return -EIO;
>> > +           }
>> > +   }
>>
>> Being nit picky here again, but why is it necessary to make 2 consecutive
>> checks for the same 'bar' variable.  Please combine them.
>
> Ok.
>
>> > +
>> > +   return 0;
>> > +}
>> > +
>> > +static int amd_link_is_up(struct amd_ntb_dev *ndev) {
>> > +   /* set link down and clear flag */
>> > +   if (ndev->peer_sta & AMD_PEER_RESET_EVENT) {
>> > +           ndev->peer_sta &= ~AMD_PEER_RESET_EVENT;
>> > +           return 0;
>> > +   } else if (ndev->peer_sta & (AMD_PEER_D3_EVENT |
>> > +                                AMD_PEER_PMETO_EVENT)) {
>> > +           return 0;
>> > +   } else if (ndev->peer_sta & AMD_PEER_D0_EVENT) {
>> > +           ndev->peer_sta = 0;
>> > +           return 0;
>> > +   } else
>> > +           return NTB_LNK_STA_ACTIVE(ndev->cntl_sta);
>> > +}
>> > +
>> > +static int amd_ntb_link_is_up(struct ntb_dev *ntb,
>> > +                         enum ntb_speed *speed,
>> > +                         enum ntb_width *width)
>> > +{
>> > +   struct amd_ntb_dev *ndev = ntb_ndev(ntb);
>> > +   int ret = 0;
>> > +
>> > +   if (amd_link_is_up(ndev)) {
>> > +           if (speed)
>> > +                   *speed = NTB_LNK_STA_SPEED(ndev->lnk_sta);
>> > +           if (width)
>> > +                   *width = NTB_LNK_STA_WIDTH(ndev->lnk_sta);
>> > +
>> > +           dev_dbg(ndev_dev(ndev), "link is up.\n");
>> > +
>> > +           ret = 1;
>> > +   } else {
>> > +           if (speed)
>> > +                   *speed = NTB_SPEED_NONE;
>> > +           if (width)
>> > +                   *width = NTB_WIDTH_NONE;
>> > +
>> > +           dev_dbg(ndev_dev(ndev), "link is down.\n");
>> > +   }
>> > +
>> > +   return ret;
>> > +}
>> > +
>> > +static int amd_ntb_link_enable(struct ntb_dev *ntb,
>> > +                          enum ntb_speed max_speed,
>> > +                          enum ntb_width max_width)
>> > +{
>> > +   struct amd_ntb_dev *ndev = ntb_ndev(ntb);
>> > +   void __iomem *mmio = ndev->self_mmio;
>> > +   u32 ntb_ctl;
>> > +
>> > +   /* Enable event interrupt */
>> > +   ndev->int_mask &= ~AMD_EVENT_INTMASK;
>> > +   writel(ndev->int_mask, mmio + AMD_INTMASK_OFFSET);
>> > +
>> > +   if (ndev->ntb.topo == NTB_TOPO_SEC)
>> > +           return -EINVAL;
>> > +   dev_dbg(ndev_dev(ndev), "Enabling Link.\n");
>> > +
>> > +   ntb_ctl = readl(mmio + AMD_CNTL_OFFSET);
>> > +   ntb_ctl |= (3 << 20);
>>
>> This "20" is a bit magical to me.  Please use a #define
>
> Yes.
>
>> > +   writel(ntb_ctl, mmio + AMD_CNTL_OFFSET);
>> > +
>> > +   return 0;
>> > +}
>> > +
>> > +static int amd_ntb_link_disable(struct ntb_dev *ntb) {
>> > +   struct amd_ntb_dev *ndev = ntb_ndev(ntb);
>> > +   void __iomem *mmio = ndev->self_mmio;
>> > +   u32 ntb_ctl;
>> > +
>> > +   /* Disable event interrupt */
>> > +   ndev->int_mask |= AMD_EVENT_INTMASK;
>> > +   writel(ndev->int_mask, mmio + AMD_INTMASK_OFFSET);
>> > +
>> > +   if (ndev->ntb.topo == NTB_TOPO_SEC)
>> > +           return -EINVAL;
>> > +   dev_dbg(ndev_dev(ndev), "Enabling Link.\n");
>> > +
>> > +   ntb_ctl = readl(mmio + AMD_CNTL_OFFSET);
>> > +   ntb_ctl &= ~(3 << 16);
>>
>> Should this be "20" or "16"?  Seems odd that you would use a different offset
>> to enable than disable.  Either way, a #define here would be nice too.
>
> Got it.
>
>> > +   writel(ntb_ctl, mmio + AMD_CNTL_OFFSET);
>> > +
>> > +   return 0;
>> > +}
>> > +
>> > +static u64 amd_ntb_db_valid_mask(struct ntb_dev *ntb) {
>> > +   return ntb_ndev(ntb)->db_valid_mask; }
>> > +
>> > +static int amd_ntb_db_vector_count(struct ntb_dev *ntb) {
>> > +   return ntb_ndev(ntb)->db_count;
>> > +}
>> > +
>> > +static u64 amd_ntb_db_vector_mask(struct ntb_dev *ntb, int db_vector)
>> > +{
>> > +   struct amd_ntb_dev *ndev = ntb_ndev(ntb);
>> > +
>> > +   if (db_vector < 0 || db_vector > ndev->db_count)
>> > +           return 0;
>> > +
>> > +   return ntb_ndev(ntb)->db_valid_mask & (1 << db_vector); }
>> > +
>> > +static u64 amd_ntb_db_read(struct ntb_dev *ntb) {
>> > +   struct amd_ntb_dev *ndev = ntb_ndev(ntb);
>> > +   void __iomem *mmio = ndev->self_mmio;
>> > +
>> > +   return (u64)readw(mmio + AMD_DBSTAT_OFFSET);
>>
>> Is this cast necessary?
>
> Just following Intel ntb driver, keep consistent in Intel code.

Okay, another thing for me to fix in the Intel driver :)
Leave it this way, and I'll "correct" both at the same time.

>>
>> > +}
>> > +
>> > +static int amd_ntb_db_clear(struct ntb_dev *ntb, u64 db_bits) {
>> > +   struct amd_ntb_dev *ndev = ntb_ndev(ntb);
>> > +   void __iomem *mmio = ndev->self_mmio;
>> > +
>> > +   writew((u16)db_bits, mmio + AMD_DBSTAT_OFFSET);
>> > +
>> > +   return 0;
>> > +}
>> > +
>> > +static int amd_ntb_db_set_mask(struct ntb_dev *ntb, u64 db_bits) {
>> > +   struct amd_ntb_dev *ndev = ntb_ndev(ntb);
>> > +   void __iomem *mmio = ndev->self_mmio;
>> > +   unsigned long flags;
>> > +
>> > +   if (db_bits & ~ndev->db_valid_mask)
>> > +           return -EINVAL;
>> > +
>> > +   spin_lock_irqsave(&ndev->db_mask_lock, flags);
>> > +   ndev->db_mask |= db_bits;
>> > +   writew((u16)ndev->db_mask, mmio + AMD_DBMASK_OFFSET);
>> > +   spin_unlock_irqrestore(&ndev->db_mask_lock, flags);
>> > +
>> > +   return 0;
>> > +}
>> > +
>> > +static int amd_ntb_db_clear_mask(struct ntb_dev *ntb, u64 db_bits) {
>> > +   struct amd_ntb_dev *ndev = ntb_ndev(ntb);
>> > +   void __iomem *mmio = ndev->self_mmio;
>> > +   unsigned long flags;
>> > +
>> > +   if (db_bits & ~ndev->db_valid_mask)
>> > +           return -EINVAL;
>> > +
>> > +   spin_lock_irqsave(&ndev->db_mask_lock, flags);
>> > +   ndev->db_mask &= ~db_bits;
>> > +   writew((u16)ndev->db_mask, mmio + AMD_DBMASK_OFFSET);
>> > +   spin_unlock_irqrestore(&ndev->db_mask_lock, flags);
>> > +
>> > +   return 0;
>> > +}
>> > +
>> > +static int amd_ntb_peer_db_addr(struct ntb_dev *ntb,
>> > +           phys_addr_t *db_addr,
>> > +           resource_size_t *db_size)
>>
>> Nit, this doesn't seem like it would be aligned properly.
>
> Got it.
>
>> > +{
>> > +   struct amd_ntb_dev *ndev = ntb_ndev(ntb);
>> > +
>> > +   if (db_addr)
>> > +           *db_addr = (phys_addr_t)(ndev->peer_mmio +
>> AMD_DBREQ_OFFSET);
>> > +   if (db_size)
>> > +           *db_size = sizeof(u32);
>> > +
>> > +   return 0;
>> > +}
>> > +
>> > +static int amd_ntb_peer_db_set(struct ntb_dev *ntb, u64 db_bits) {
>> > +   struct amd_ntb_dev *ndev = ntb_ndev(ntb);
>> > +   void __iomem *mmio = ndev->self_mmio;
>> > +
>> > +   writew((u16)db_bits, mmio + AMD_DBREQ_OFFSET);
>> > +
>> > +   return 0;
>> > +}
>> > +
>> > +static int amd_ntb_spad_count(struct ntb_dev *ntb) {
>> > +   return ntb_ndev(ntb)->spad_count;
>> > +}
>> > +
>> > +static u32 amd_ntb_spad_read(struct ntb_dev *ntb, int idx) {
>> > +   struct amd_ntb_dev *ndev = ntb_ndev(ntb);
>> > +   void __iomem *mmio = ndev->self_mmio;
>> > +   u32 offset = 0;
>>
>> Unnecessary init of this variable
>
> Ok.
>
>> > +
>> > +   if (idx < 0 || idx >= ndev->spad_count)
>> > +           return 0;
>> > +
>> > +   offset = ndev->self_spad + (idx << 2);
>> > +   return readl(mmio + AMD_SPAD_OFFSET + offset); }
>> > +
>> > +static int amd_ntb_spad_write(struct ntb_dev *ntb,
>> > +                           int idx, u32 val)
>> > +{
>> > +   struct amd_ntb_dev *ndev = ntb_ndev(ntb);
>> > +   void __iomem *mmio = ndev->self_mmio;
>> > +   u32 offset = 0;
>>
>> Unnecessary init of this variable
>
> Ok.
>
>> > +
>> > +   if (idx < 0 || idx >= ndev->spad_count)
>> > +           return -EINVAL;
>> > +
>> > +   offset = ndev->self_spad + (idx << 2);
>> > +   writel(val, mmio + AMD_SPAD_OFFSET + offset);
>> > +
>> > +   return 0;
>> > +}
>> > +
>> > +static int amd_ntb_peer_spad_addr(struct ntb_dev *ntb, int idx,
>> > +                               phys_addr_t *spad_addr)
>> > +{
>> > +   struct amd_ntb_dev *ndev = ntb_ndev(ntb);
>> > +
>> > +   if (idx < 0 || idx >= ndev->spad_count)
>> > +           return -EINVAL;
>> > +
>> > +   if (spad_addr)
>> > +           *spad_addr = (phys_addr_t)(ndev->self_mmio +
>> AMD_SPAD_OFFSET +
>> > +                           ndev->peer_spad + (idx << 2));
>>
>> This doesn't seem aligned properly either.
>
> Ok
>
>> > +   return 0;
>> > +}
>> > +
>> > +static u32 amd_ntb_peer_spad_read(struct ntb_dev *ntb, int idx) {
>> > +   struct amd_ntb_dev *ndev = ntb_ndev(ntb);
>> > +   void __iomem *mmio = ndev->self_mmio;
>> > +   u32 offset;
>> > +
>> > +   if (idx < 0 || idx >= ndev->spad_count)
>> > +           return -EINVAL;
>> > +
>> > +   offset = ndev->peer_spad + (idx << 2);
>> > +   return readl(mmio + AMD_SPAD_OFFSET + offset); }
>> > +
>> > +static int amd_ntb_peer_spad_write(struct ntb_dev *ntb,
>> > +                                int idx, u32 val)
>> > +{
>> > +   struct amd_ntb_dev *ndev = ntb_ndev(ntb);
>> > +   void __iomem *mmio = ndev->self_mmio;
>> > +   u32 offset;
>> > +
>> > +   if (idx < 0 || idx >= ndev->spad_count)
>> > +           return -EINVAL;
>> > +
>> > +   offset = ndev->peer_spad + (idx << 2);
>> > +   writel(val, mmio + AMD_SPAD_OFFSET + offset);
>> > +
>> > +   return 0;
>> > +}
>> > +
>> > +static void amd_ack_SMU(struct amd_ntb_dev *ndev, u32 bit)
>>
>> Nit, Please make this SMU lower case.  Also, it is a bit odd to see SMUACK
>> and ack_smu.  It is not a big deal, but since there has to be another version to
>> address Allen's comments (and mine), then this won't add to much additional
>> work to address.
>
> Ok.
>
>> > +{
>> > +   void __iomem *mmio = ndev->self_mmio;
>> > +   int reg;
>> > +
>> > +   reg = readl(mmio + AMD_SMUACK_OFFSET);
>> > +   reg |= bit;
>> > +   writel(reg, mmio + AMD_SMUACK_OFFSET);
>> > +
>> > +   ndev->peer_sta |= bit;
>> > +}
>> > +
>> > +static void amd_handle_event(struct amd_ntb_dev *ndev, int vec) {
>> > +   void __iomem *mmio = ndev->self_mmio;
>> > +   u32 status;
>> > +
>> > +   status = readl(mmio + AMD_INTSTAT_OFFSET);
>> > +   if (!(status & AMD_EVENT_INTMASK))
>> > +           return;
>> > +
>> > +   dev_dbg(ndev_dev(ndev), "status = 0x%x and vec = %d\n", status,
>> > +vec);
>> > +
>> > +   status &= AMD_EVENT_INTMASK;
>> > +   switch (status) {
>> > +   case AMD_PEER_FLUSH_EVENT:
>> > +           dev_info(ndev_dev(ndev), "Flush is done.\n");
>> > +           break;
>> > +   case AMD_PEER_RESET_EVENT:
>> > +           amd_ack_SMU(ndev, AMD_PEER_RESET_EVENT);
>> > +
>> > +           /* link down first */
>> > +           ntb_link_event(&ndev->ntb);
>> > +           /* polling peer status */
>> > +           schedule_delayed_work(&ndev->hb_timer,
>> AMD_LINK_HB_TIMEOUT);
>> > +
>> > +           break;
>> > +   case AMD_PEER_D3_EVENT:
>> > +   case AMD_PEER_PMETO_EVENT:
>> > +           amd_ack_SMU(ndev, status);
>> > +
>> > +           /* link down */
>> > +           ntb_link_event(&ndev->ntb);
>> > +
>> > +           break;
>> > +   case AMD_PEER_D0_EVENT:
>> > +           mmio = ndev->peer_mmio;
>> > +           status = readl(mmio + AMD_PMESTAT_OFFSET);
>> > +           /* check if this is WAKEUP event */
>> > +           if (status & 0x1)
>> > +                   dev_info(ndev_dev(ndev), "Wakeup is done.\n");
>> > +
>> > +           amd_ack_SMU(ndev, AMD_PEER_D0_EVENT);
>> > +
>> > +           if (amd_link_is_up(ndev))
>> > +                   ntb_link_event(&ndev->ntb);
>> > +           else
>> > +                   schedule_delayed_work(&ndev->hb_timer,
>> > +                                           AMD_LINK_HB_TIMEOUT);
>> > +           break;
>> > +   default:
>> > +           dev_info(ndev_dev(ndev), "event status = 0x%x.\n", status);
>> > +           break;
>> > +   }
>> > +}
>> > +
>> > +static irqreturn_t ndev_interrupt(struct amd_ntb_dev *ndev, int vec)
>> > +{
>> > +   dev_dbg(ndev_dev(ndev), "vec %d\n", vec);
>> > +
>> > +   if (vec > (ndev->msix_vec_count - 1)) {
>> > +           dev_err(ndev_dev(ndev), "Invalid interrupt.\n");
>> > +           return IRQ_HANDLED;
>> > +   }
>>
>> Unless this actually happens, you might not want the unnecessary branch in
>> your fast path.
>
> Ok, I'll remove it.
>
>> > +
>> > +   if (vec > 15 || (ndev->msix_vec_count == 1))
>> > +           amd_handle_event(ndev, vec);
>> > +
>> > +   if (vec < 16)
>>
>> I'd prefer to see a #define here for 16
>
> Ok.
>
>>
>> > +           ntb_db_event(&ndev->ntb, vec);
>> > +
>> > +   return IRQ_HANDLED;
>> > +}
>> > +
>> > +static irqreturn_t ndev_vec_isr(int irq, void *dev) {
>> > +   struct amd_ntb_vec *nvec = dev;
>> > +
>> > +   return ndev_interrupt(nvec->ndev, nvec->num); }
>> > +
>> > +static irqreturn_t ndev_irq_isr(int irq, void *dev) {
>> > +   struct amd_ntb_dev *ndev = dev;
>> > +
>> > +   return ndev_interrupt(ndev, irq - ndev_pdev(ndev)->irq); }
>> > +
>> > +static int ndev_init_isr(struct amd_ntb_dev *ndev,
>> > +                           int msix_min, int msix_max)
>> > +{
>> > +   struct pci_dev *pdev;
>> > +   int rc, i, msix_count, node;
>> > +
>> > +   pdev = ndev_pdev(ndev);
>> > +
>> > +   node = dev_to_node(&pdev->dev);
>> > +
>> > +   ndev->db_mask = ndev->db_valid_mask;
>> > +
>> > +   /* Try to set up msix irq */
>> > +   ndev->vec = kzalloc_node(msix_max * sizeof(*ndev->vec),
>> > +                            GFP_KERNEL, node);
>> > +   if (!ndev->vec)
>> > +           goto err_msix_vec_alloc;
>> > +
>> > +   ndev->msix = kzalloc_node(msix_max * sizeof(*ndev->msix),
>> > +                             GFP_KERNEL, node);
>> > +   if (!ndev->msix)
>> > +           goto err_msix_alloc;
>> > +
>> > +   for (i = 0; i < msix_max; ++i)
>> > +           ndev->msix[i].entry = i;
>> > +
>> > +   msix_count = pci_enable_msix_range(pdev, ndev->msix,
>> > +                                      msix_min, msix_max);
>> > +   if (msix_count < 0)
>> > +           goto err_msix_enable;
>> > +
>> > +   /* Disable MSIX if msix count is less than 16 */
>>
>> Why?  Is there a HW errata or limitation (if so, write so in this comment)?  If
>> not, MSIX has many benefits over legacy interrupts, and it doesn't make
>> sense to walk away from that.
>
> Yes, this is hardware limitation, i'll add "hardware limitation" into the comment.
>
>>
>> > +   if (msix_count < msix_min) {
>> > +           pci_disable_msix(pdev);
>> > +           goto err_msix_enable;
>> > +   }
>> > +
>> > +   for (i = 0; i < msix_count; ++i) {
>> > +           ndev->vec[i].ndev = ndev;
>> > +           ndev->vec[i].num = i;
>> > +           rc = request_irq(ndev->msix[i].vector, ndev_vec_isr, 0,
>> > +                            "ndev_vec_isr", &ndev->vec[i]);
>> > +           if (rc)
>> > +                   goto err_msix_request;
>> > +   }
>> > +
>> > +   dev_dbg(ndev_dev(ndev), "Using msix interrupts\n");
>> > +   ndev->db_count = msix_min;
>> > +   ndev->msix_vec_count = msix_max;
>> > +   return 0;
>> > +
>> > +err_msix_request:
>> > +   while (i-- > 0)
>> > +           free_irq(ndev->msix[i].vector, ndev);
>> > +   pci_disable_msix(pdev);
>> > +err_msix_enable:
>> > +   kfree(ndev->msix);
>> > +err_msix_alloc:
>> > +   kfree(ndev->vec);
>> > +err_msix_vec_alloc:
>> > +   ndev->msix = NULL;
>> > +   ndev->vec = NULL;
>> > +
>> > +   /* Try to set up msi irq */
>> > +   rc = pci_enable_msi(pdev);
>> > +   if (rc)
>> > +           goto err_msi_enable;
>> > +
>> > +   rc = request_irq(pdev->irq, ndev_irq_isr, 0,
>> > +                    "ndev_irq_isr", ndev);
>> > +   if (rc)
>> > +           goto err_msi_request;
>> > +
>>
>> I have problems with the msix/msi/intx selection being done this way, but I
>> see it being done in the Intel driver.  So....I'll fix it there first.
>
> Ok, I'll change it according to your patch.

Don't worry about it.  I'll make a patch to fix both after I accept
your patch :)

>>
>> > +   dev_dbg(ndev_dev(ndev), "Using msi interrupts\n");
>> > +   ndev->db_count = 1;
>> > +   ndev->msix_vec_count = 1;
>> > +   return 0;
>> > +
>> > +err_msi_request:
>> > +   pci_disable_msi(pdev);
>> > +err_msi_enable:
>> > +
>> > +   /* Try to set up intx irq */
>> > +   pci_intx(pdev, 1);
>> > +
>> > +   rc = request_irq(pdev->irq, ndev_irq_isr, IRQF_SHARED,
>> > +                    "ndev_irq_isr", ndev);
>> > +   if (rc)
>> > +           goto err_intx_request;
>> > +
>> > +   dev_dbg(ndev_dev(ndev), "Using intx interrupts\n");
>> > +   ndev->db_count = 1;
>> > +   ndev->msix_vec_count = 1;
>> > +   return 0;
>> > +
>> > +err_intx_request:
>> > +   return rc;
>> > +}
>> > +
>> > +static void ndev_deinit_isr(struct amd_ntb_dev *ndev) {
>> > +   struct pci_dev *pdev;
>> > +   void __iomem *mmio = ndev->self_mmio;
>> > +   int i;
>> > +
>> > +   pdev = ndev_pdev(ndev);
>> > +
>> > +   /* Mask all doorbell interrupts */
>> > +   ndev->db_mask = ndev->db_valid_mask;
>> > +   writel(ndev->db_mask, mmio + AMD_DBMASK_OFFSET);
>> > +
>> > +   if (ndev->msix) {
>> > +           i = ndev->msix_vec_count;
>> > +           while (i--)
>> > +                   free_irq(ndev->msix[i].vector, &ndev->vec[i]);
>> > +           pci_disable_msix(pdev);
>> > +           kfree(ndev->msix);
>> > +           kfree(ndev->vec);
>> > +   } else {
>> > +           free_irq(pdev->irq, ndev);
>> > +           if (pci_dev_msi_enabled(pdev))
>> > +                   pci_disable_msi(pdev);
>> > +           else
>> > +                   pci_intx(pdev, 0);
>> > +   }
>> > +}
>> > +
>> > +static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf,
>> > +                            size_t count, loff_t *offp)
>> > +{
>> > +   struct amd_ntb_dev *ndev;
>> > +   void __iomem *mmio;
>> > +   char *buf;
>> > +   size_t buf_size;
>> > +   ssize_t ret, off;
>> > +   union { u64 v64; u32 v32; u16 v16; } u;
>> > +
>> > +   ndev = filp->private_data;
>> > +   mmio = ndev->self_mmio;
>> > +
>> > +   buf_size = min(count, 0x800ul);
>> > +
>> > +   buf = kmalloc(buf_size, GFP_KERNEL);
>> > +   if (!buf)
>> > +           return -ENOMEM;
>> > +
>> > +   off = 0;
>> > +
>> > +   off += scnprintf(buf + off, buf_size - off,
>> > +                    "NTB Device Information:\n");
>> > +
>> > +   off += scnprintf(buf + off, buf_size - off,
>> > +                    "Connection Topology -\t%s\n",
>> > +                    ntb_topo_string(ndev->ntb.topo));
>> > +
>> > +   off += scnprintf(buf + off, buf_size - off,
>> > +                    "LNK STA -\t\t%#06x\n", ndev->lnk_sta);
>> > +
>> > +   if (!amd_link_is_up(ndev)) {
>> > +           off += scnprintf(buf + off, buf_size - off,
>> > +                            "Link Status -\t\tDown\n");
>> > +   } else {
>> > +           off += scnprintf(buf + off, buf_size - off,
>> > +                            "Link Status -\t\tUp\n");
>> > +           off += scnprintf(buf + off, buf_size - off,
>> > +                            "Link Speed -\t\tPCI-E Gen %u\n",
>> > +                            NTB_LNK_STA_SPEED(ndev->lnk_sta));
>> > +           off += scnprintf(buf + off, buf_size - off,
>> > +                            "Link Width -\t\tx%u\n",
>> > +                            NTB_LNK_STA_WIDTH(ndev->lnk_sta));
>> > +   }
>> > +
>> > +   off += scnprintf(buf + off, buf_size - off,
>> > +                    "Memory Window Count -\t%u\n", ndev-
>> >mw_count);
>> > +   off += scnprintf(buf + off, buf_size - off,
>> > +                    "Scratchpad Count -\t%u\n", ndev->spad_count);
>> > +   off += scnprintf(buf + off, buf_size - off,
>> > +                    "Doorbell Count -\t%u\n", ndev->db_count);
>> > +   off += scnprintf(buf + off, buf_size - off,
>> > +                    "MSIX Vector Count -\t%u\n", ndev-
>> >msix_vec_count);
>> > +
>> > +   off += scnprintf(buf + off, buf_size - off,
>> > +                    "Doorbell Valid Mask -\t%#llx\n", ndev-
>> >db_valid_mask);
>> > +
>> > +   u.v32 = readl(ndev->self_mmio + AMD_DBMASK_OFFSET);
>> > +   off += scnprintf(buf + off, buf_size - off,
>> > +                    "Doorbell Mask -\t\t\t%#06x\n", u.v32);
>> > +
>> > +   u.v32 = readl(mmio + AMD_DBSTAT_OFFSET);
>> > +   off += scnprintf(buf + off, buf_size - off,
>> > +                    "Doorbell Bell -\t\t\t%#06x\n", u.v32);
>> > +
>> > +   off += scnprintf(buf + off, buf_size - off,
>> > +                    "\nNTB Incoming XLAT:\n");
>> > +
>> > +   u.v64 = ioread64(mmio + AMD_BAR1XLAT_OFFSET);
>> > +   off += scnprintf(buf + off, buf_size - off,
>> > +                    "XLAT1 -\t\t%#018llx\n", u.v64);
>> > +
>> > +   u.v64 = ioread64(ndev->self_mmio + AMD_BAR23XLAT_OFFSET);
>> > +   off += scnprintf(buf + off, buf_size - off,
>> > +                            "XLAT23 -\t\t%#018llx\n", u.v64);
>> > +
>> > +   u.v64 = ioread64(ndev->self_mmio + AMD_BAR45XLAT_OFFSET);
>> > +   off += scnprintf(buf + off, buf_size - off,
>> > +                            "XLAT45 -\t\t%#018llx\n", u.v64);
>> > +
>> > +   u.v32 = readl(mmio + AMD_BAR1LMT_OFFSET);
>> > +   off += scnprintf(buf + off, buf_size - off,
>> > +                    "LMT1 -\t\t\t%#06x\n", u.v32);
>> > +
>> > +   u.v64 = ioread64(ndev->self_mmio + AMD_BAR23LMT_OFFSET);
>> > +   off += scnprintf(buf + off, buf_size - off,
>> > +                            "LMT23 -\t\t\t%#018llx\n", u.v64);
>> > +
>> > +   u.v64 = ioread64(ndev->self_mmio + AMD_BAR45LMT_OFFSET);
>> > +   off += scnprintf(buf + off, buf_size - off,
>> > +                            "LMT45 -\t\t\t%#018llx\n", u.v64);
>> > +
>> > +   ret = simple_read_from_buffer(ubuf, count, offp, buf, off);
>> > +   kfree(buf);
>> > +   return ret;
>> > +}
>> > +
>> > +static void ndev_init_debugfs(struct amd_ntb_dev *ndev) {
>> > +   if (!debugfs_dir) {
>> > +           ndev->debugfs_dir = NULL;
>> > +           ndev->debugfs_info = NULL;
>> > +   } else {
>> > +           ndev->debugfs_dir =
>> > +                   debugfs_create_dir(ndev_name(ndev),
>> debugfs_dir);
>> > +           if (!ndev->debugfs_dir)
>> > +                   ndev->debugfs_info = NULL;
>> > +           else
>> > +                   ndev->debugfs_info =
>> > +                           debugfs_create_file("info", S_IRUSR,
>> > +                                               ndev->debugfs_dir, ndev,
>> > +                                               &amd_ntb_debugfs_info);
>> > +   }
>> > +}
>> > +
>> > +static void ndev_deinit_debugfs(struct amd_ntb_dev *ndev) {
>> > +   debugfs_remove_recursive(ndev->debugfs_dir);
>> > +}
>> > +
>> > +static inline void ndev_init_struct(struct amd_ntb_dev *ndev,
>> > +                               struct pci_dev *pdev)
>> > +{
>> > +   ndev->ntb.pdev = pdev;
>> > +   ndev->ntb.topo = NTB_TOPO_NONE;
>> > +   ndev->ntb.ops = &amd_ntb_ops;
>> > +   ndev->int_mask = AMD_EVENT_INTMASK;
>> > +   spin_lock_init(&ndev->db_mask_lock);
>> > +}
>> > +
>> > +static int amd_poll_link(struct amd_ntb_dev *ndev) {
>> > +   void __iomem *mmio = ndev->peer_mmio;
>> > +   u32 reg, stat;
>> > +   int rc;
>> > +
>> > +   reg = readl(mmio + AMD_SIDEINFO_OFFSET);
>> > +   reg &= NTB_LIN_STA_ACTIVE_BIT;
>> > +
>> > +   dev_dbg(ndev_dev(ndev), "%s: reg_val = 0x%x.\n", __func__, reg);
>> > +
>> > +   if (reg == ndev->cntl_sta)
>> > +           return 0;
>> > +
>> > +   ndev->cntl_sta = reg;
>> > +
>> > +   rc = pci_read_config_dword(ndev->ntb.pdev,
>> > +                   AMD_LINK_STATUS_OFFSET, &stat);
>> > +   if (rc)
>> > +           return 0;
>> > +   ndev->lnk_sta = stat;
>> > +
>> > +   return 1;
>> > +}
>> > +
>> > +static void amd_link_hb(struct work_struct *work) {
>> > +   struct amd_ntb_dev *ndev = hb_ndev(work);
>> > +
>> > +   if (amd_poll_link(ndev))
>> > +           ntb_link_event(&ndev->ntb);
>> > +
>> > +   if (!amd_link_is_up(ndev))
>> > +           schedule_delayed_work(&ndev->hb_timer,
>> AMD_LINK_HB_TIMEOUT); }
>> > +
>> > +static int amd_init_isr(struct amd_ntb_dev *ndev) {
>> > +   return ndev_init_isr(ndev, AMD_DB_CNT,
>> AMD_MSIX_VECTOR_CNT); }
>> > +
>> > +static void amd_init_side_info(struct amd_ntb_dev *ndev) {
>> > +   void __iomem *mmio = ndev->self_mmio;
>> > +   unsigned int reg = 0;
>>
>> variable initialized unnecessarily
>
> Ok.
>
>> > +
>> > +   reg = readl(mmio + AMD_SIDEINFO_OFFSET);
>> > +   if (!(reg & 0x2)) {
>> > +           reg |= 0x2;
>>
>> I'd like a #define for the "2" above, as you are using it multiple times.
>
> Ok.
>
>>
>> > +           writel(reg, mmio + AMD_SIDEINFO_OFFSET);
>> > +   }
>> > +}
>> > +
>> > +static void amd_deinit_side_info(struct amd_ntb_dev *ndev) {
>> > +   void __iomem *mmio = ndev->self_mmio;
>> > +   unsigned int reg = 0;
>>
>> variable initialized unnecessarily
>
> Ok.
>
>> > +
>> > +   reg = readl(mmio + AMD_SIDEINFO_OFFSET);
>> > +   if (reg & 0x2) {
>> > +           reg &= ~(0x2);
>> > +           writel(reg, mmio + AMD_SIDEINFO_OFFSET);
>> > +           readl(mmio + AMD_SIDEINFO_OFFSET);
>> > +   }
>> > +}
>> > +
>> > +static int amd_init_ntb(struct amd_ntb_dev *ndev) {
>> > +   void __iomem *mmio = ndev->self_mmio;
>> > +
>> > +   ndev->mw_count = AMD_MW_CNT;
>> > +   ndev->spad_count = AMD_SPADS_CNT;
>> > +   ndev->db_count = AMD_DB_CNT;
>> > +
>> > +   switch (ndev->ntb.topo) {
>> > +   case NTB_TOPO_PRI:
>> > +   case NTB_TOPO_SEC:
>> > +           ndev->spad_count >>= 1;
>> > +           if (ndev->ntb.topo == NTB_TOPO_PRI) {
>> > +                   ndev->self_spad = 0;
>> > +                   ndev->peer_spad = 0x20;
>> > +           } else {
>> > +                   ndev->self_spad = 0x20;
>> > +                   ndev->peer_spad = 0;
>> > +           }
>> > +
>> > +           INIT_DELAYED_WORK(&ndev->hb_timer, amd_link_hb);
>> > +           schedule_delayed_work(&ndev->hb_timer,
>> AMD_LINK_HB_TIMEOUT);
>> > +
>> > +           break;
>> > +   default:
>> > +           dev_err(ndev_dev(ndev), "AMD NTB does not support B2B
>> mode.\n");
>> > +           return -EINVAL;
>> > +   }
>> > +
>> > +   ndev->db_valid_mask = BIT_ULL(ndev->db_count) - 1;
>> > +
>> > +   /* Mask event interrupts */
>> > +   writel(ndev->int_mask, mmio + AMD_INTMASK_OFFSET);
>> > +
>> > +   return 0;
>> > +}
>> > +
>> > +static inline enum ntb_topo amd_get_topo(struct amd_ntb_dev *ndev)
>>
>> It is usually better to let the compiler determine what to inline and what not
>> to inline..
>
> Do you mean removing the key word of inline?

Yes.  "inline" will force the compiler to inline the function.  If you
do not specify, then the compiler is free to inline it if it makes
more optimal code (and won't inline it if the inline makes less
optimal code).    So, please remove "inline" unless you know for
certain there is a good reason to inline it.

>> > +{
>> > +   void __iomem *mmio = ndev->self_mmio;
>> > +   u32 info;
>> > +
>> > +   info = readl(mmio + AMD_SIDEINFO_OFFSET);
>> > +   if (info & AMD_SIDE_MASK)
>> > +           return NTB_TOPO_SEC;
>> > +   else
>> > +           return NTB_TOPO_PRI;
>> > +}
>> > +
>> > +static int amd_init_dev(struct amd_ntb_dev *ndev) {
>> > +   struct pci_dev *pdev;
>> > +   int rc = 0;
>> > +
>> > +   pdev = ndev_pdev(ndev);
>> > +
>> > +   ndev->ntb.topo = amd_get_topo(ndev);
>> > +   dev_dbg(ndev_dev(ndev), "AMD NTB topo is %s\n",
>> > +                   ntb_topo_string(ndev->ntb.topo));
>> > +
>> > +   rc = amd_init_ntb(ndev);
>> > +   if (rc)
>> > +           return rc;
>> > +
>> > +   rc = amd_init_isr(ndev);
>> > +   if (rc) {
>> > +           dev_err(ndev_dev(ndev), "fail to init isr.\n");
>> > +           return rc;
>> > +   }
>> > +
>> > +   ndev->db_valid_mask = BIT_ULL(ndev->db_count) - 1;
>> > +
>> > +   return 0;
>> > +}
>> > +
>> > +static void amd_deinit_dev(struct amd_ntb_dev *ndev) {
>> > +   cancel_delayed_work_sync(&ndev->hb_timer);
>> > +
>> > +   ndev_deinit_isr(ndev);
>> > +}
>> > +
>> > +static int amd_ntb_init_pci(struct amd_ntb_dev *ndev,
>> > +                       struct pci_dev *pdev)
>> > +{
>> > +   int rc;
>> > +
>> > +   pci_set_drvdata(pdev, ndev);
>> > +
>> > +   rc = pci_enable_device(pdev);
>> > +   if (rc)
>> > +           goto err_pci_enable;
>> > +
>> > +   rc = pci_request_regions(pdev, NTB_NAME);
>> > +   if (rc)
>> > +           goto err_pci_regions;
>> > +
>> > +   pci_set_master(pdev);
>> > +
>> > +   rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
>> > +   if (rc) {
>> > +           rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
>> > +           if (rc)
>> > +                   goto err_dma_mask;
>> > +           dev_warn(ndev_dev(ndev), "Cannot DMA highmem\n");
>> > +   }
>> > +
>> > +   rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
>> > +   if (rc) {
>> > +           rc = pci_set_consistent_dma_mask(pdev,
>> DMA_BIT_MASK(32));
>> > +           if (rc)
>> > +                   goto err_dma_mask;
>> > +           dev_warn(ndev_dev(ndev), "Cannot DMA consistent
>> highmem\n");
>> > +   }
>> > +
>> > +   ndev->self_mmio = pci_iomap(pdev, 0, 0);
>> > +   if (!ndev->self_mmio) {
>> > +           rc = -EIO;
>> > +           goto err_mmio;
>> > +   }
>> > +   ndev->peer_mmio = ndev->self_mmio + AMD_PEER_OFFSET;
>> > +
>> > +   return 0;
>> > +
>> > +err_mmio:
>>
>> seems like an unnecessary goto destination here
>
> Will entry the goto destination if pci_iomap failed. The code is following
> Intel NTB and I see many drivers in kernel will check the return value
> of pci_iomap.

If you just used err_dma_mask in the err_mmio case, you have the same
result and won't have an unnecessary goto destination label.

>> > +err_dma_mask:
>> > +   pci_clear_master(pdev);
>> > +err_pci_regions:
>> > +   pci_disable_device(pdev);
>> > +err_pci_enable:
>> > +   pci_set_drvdata(pdev, NULL);
>> > +   return rc;
>> > +}
>> > +
>> > +static void amd_ntb_deinit_pci(struct amd_ntb_dev *ndev) {
>> > +   struct pci_dev *pdev = ndev_pdev(ndev);
>> > +
>> > +   pci_iounmap(pdev, ndev->self_mmio);
>> > +
>> > +   pci_clear_master(pdev);
>> > +   pci_release_regions(pdev);
>> > +   pci_disable_device(pdev);
>> > +   pci_set_drvdata(pdev, NULL);
>> > +}
>> > +
>> > +
>> > +static int amd_ntb_pci_probe(struct pci_dev *pdev,
>> > +                        const struct pci_device_id *id) {
>> > +   struct amd_ntb_dev *ndev;
>> > +   int rc, node;
>> > +
>> > +   node = dev_to_node(&pdev->dev);
>> > +
>> > +   ndev = kzalloc_node(sizeof(*ndev), GFP_KERNEL, node);
>> > +   if (!ndev) {
>> > +           rc = -ENOMEM;
>> > +           goto err_ndev;
>> > +   }
>> > +
>> > +   ndev_init_struct(ndev, pdev);
>> > +
>> > +   rc = amd_ntb_init_pci(ndev, pdev);
>> > +   if (rc)
>> > +           goto err_init_pci;
>> > +
>> > +   rc = amd_init_dev(ndev);
>> > +   if (rc)
>> > +           goto err_init_dev;
>> > +
>> > +   /* write side info */
>> > +   amd_init_side_info(ndev);
>> > +
>> > +   amd_poll_link(ndev);
>> > +
>> > +   ndev_init_debugfs(ndev);
>> > +
>> > +   rc = ntb_register_device(&ndev->ntb);
>> > +   if (rc)
>> > +           goto err_register;
>> > +
>> > +   dev_info(&pdev->dev, "NTB device registered.\n");
>> > +
>> > +   return 0;
>> > +
>> > +err_register:
>> > +   ndev_deinit_debugfs(ndev);
>> > +   amd_deinit_dev(ndev);
>> > +err_init_dev:
>> > +   amd_ntb_deinit_pci(ndev);
>> > +err_init_pci:
>> > +   kfree(ndev);
>> > +err_ndev:
>> > +   return rc;
>> > +}
>> > +
>> > +static void amd_ntb_pci_remove(struct pci_dev *pdev) {
>> > +   struct amd_ntb_dev *ndev = pci_get_drvdata(pdev);
>> > +
>> > +   ntb_unregister_device(&ndev->ntb);
>> > +   ndev_deinit_debugfs(ndev);
>> > +   amd_deinit_side_info(ndev);
>> > +   amd_deinit_dev(ndev);
>> > +   amd_ntb_deinit_pci(ndev);
>> > +   kfree(ndev);
>> > +}
>> > +
>> > +static const struct file_operations amd_ntb_debugfs_info = {
>> > +   .owner = THIS_MODULE,
>> > +   .open = simple_open,
>> > +   .read = ndev_debugfs_read,
>> > +};
>> > +
>> > +static const struct pci_device_id amd_ntb_pci_tbl[] = {
>> > +   {PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_NTB)},
>> > +   {0}
>> > +};
>> > +MODULE_DEVICE_TABLE(pci, amd_ntb_pci_tbl);
>> > +
>> > +static struct pci_driver amd_ntb_pci_driver = {
>> > +   .name           = KBUILD_MODNAME,
>> > +   .id_table       = amd_ntb_pci_tbl,
>> > +   .probe          = amd_ntb_pci_probe,
>> > +   .remove         = amd_ntb_pci_remove,
>> > +};
>> > +
>> > +static int __init amd_ntb_pci_driver_init(void) {
>> > +   pr_info("%s %s\n", NTB_DESC, NTB_VER);
>> > +
>> > +   if (debugfs_initialized())
>> > +           debugfs_dir = debugfs_create_dir(KBUILD_MODNAME,
>> NULL);
>> > +
>> > +   return pci_register_driver(&amd_ntb_pci_driver);
>> > +}
>> > +module_init(amd_ntb_pci_driver_init);
>> > +
>> > +static void __exit amd_ntb_pci_driver_exit(void) {
>> > +   pci_unregister_driver(&amd_ntb_pci_driver);
>> > +   debugfs_remove_recursive(debugfs_dir);
>> > +}
>> > +module_exit(amd_ntb_pci_driver_exit);
>> > diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.h
>> > b/drivers/ntb/hw/amd/ntb_hw_amd.h new file mode 100644 index
>> > 0000000..31021c0
>> > --- /dev/null
>> > +++ b/drivers/ntb/hw/amd/ntb_hw_amd.h
>> > @@ -0,0 +1,246 @@
>> > +/*
>> > + * This file is provided under a dual BSD/GPLv2 license.  When using or
>> > + *   redistributing this file, you may do so under either license.
>> > + *
>> > + *   GPL LICENSE SUMMARY
>> > + *
>> > + *   Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
>> > + *
>> > + *   This program is free software; you can redistribute it and/or modify
>> > + *   it under the terms of version 2 of the GNU General Public License as
>> > + *   published by the Free Software Foundation.
>> > + *
>> > + *   BSD LICENSE
>> > + *
>> > + *   Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
>> > + *
>> > + *   Redistribution and use in source and binary forms, with or without
>> > + *   modification, are permitted provided that the following conditions
>> > + *   are met:
>> > + *
>> > + *     * Redistributions of source code must retain the above copyright
>> > + *       notice, this list of conditions and the following disclaimer.
>> > + *     * Redistributions in binary form must reproduce the above copy
>> > + *       notice, this list of conditions and the following disclaimer in
>> > + *       the documentation and/or other materials provided with the
>> > + *       distribution.
>> > + *     * Neither the name of AMD Corporation nor the names of its
>> > + *       contributors may be used to endorse or promote products derived
>> > + *       from this software without specific prior written permission.
>> > + *
>> > + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
>> CONTRIBUTORS
>> > + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
>> NOT
>> > + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
>> FITNESS FOR
>> > + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
>> COPYRIGHT
>> > + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
>> INCIDENTAL,
>> > + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
>> BUT NOT
>> > + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
>> LOSS OF USE,
>> > + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
>> AND ON ANY
>> > + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
>> TORT
>> > + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
>> OF THE USE
>> > + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
>> DAMAGE.
>> > + *
>> > + * AMD PCIe NTB Linux driver
>> > + *
>> > + * Contact Information:
>> > + * Xiangliang Yu<Xiangliang.Yu@amd.com>
>>
>> Same comment as above about the space needed here.
>
> Ok
>
>> > + */
>> > +
>> > +#ifndef NTB_HW_AMD_H
>> > +#define NTB_HW_AMD_H
>> > +
>> > +#include <linux/ntb.h>
>> > +#include <linux/pci.h>
>> > +
>> > +#define PCI_DEVICE_ID_AMD_NTB      0x145B
>> > +#define AMD_LINK_HB_TIMEOUT        msecs_to_jiffies(1000)
>>
>> Tab align the second half of these
>
> I'll check it.
>
>>
>> > +#define AMD_LINK_STATUS_OFFSET     0x68
>> > +#define NTB_LIN_STA_ACTIVE_BIT     0x00000002
>> > +#define NTB_LNK_STA_SPEED_MASK     0x000F0000
>> > +#define NTB_LNK_STA_WIDTH_MASK     0x03F00000
>> > +#define NTB_LNK_STA_ACTIVE(x)      (!!((x) & NTB_LIN_STA_ACTIVE_BIT))
>> > +#define NTB_LNK_STA_SPEED(x)       (((x) &
>> NTB_LNK_STA_SPEED_MASK) >> 16)
>> > +#define NTB_LNK_STA_WIDTH(x)       (((x) &
>> NTB_LNK_STA_WIDTH_MASK) >> 20)
>> > +
>> > +#ifndef ioread64
>> > +#ifdef readq
>> > +#define ioread64 readq
>> > +#else
>> > +#define ioread64 _ioread64
>> > +static inline u64 _ioread64(void __iomem *mmio) {
>> > +   u64 low, high;
>> > +
>> > +   low = ioread32(mmio);
>> > +   high = ioread32(mmio + sizeof(u32));
>> > +   return low | (high << 32);
>> > +}
>> > +#endif
>> > +#endif
>> > +
>> > +#ifndef iowrite64
>> > +#ifdef writeq
>> > +#define iowrite64 writeq
>> > +#else
>> > +#define iowrite64 _iowrite64
>> > +static inline void _iowrite64(u64 val, void __iomem *mmio) {
>> > +   iowrite32(val, mmio);
>> > +   iowrite32(val >> 32, mmio + sizeof(u32)); } #endif #endif
>> > +
>> > +enum {
>> > +   /* AMD NTB Capability */
>> > +   AMD_MW_CNT              = 3,
>> > +   AMD_DB_CNT              = 16,
>> > +   AMD_MSIX_VECTOR_CNT     = 24,
>> > +   AMD_SPADS_CNT           = 16,
>> > +
>> > +   /*  AMD NTB register offset */
>> > +   AMD_CNTL_OFFSET         = 0x200,
>> > +
>> > +   /* NTB control register bits */
>> > +   PMM_REG_CTL             = BIT(21),
>> > +   SMM_REG_CTL             = BIT(20),
>> > +   SMM_REG_ACC_PATH        = BIT(18),
>> > +   PMM_REG_ACC_PATH        = BIT(17),
>> > +   NTB_CLK_EN              = BIT(16),
>> > +
>> > +   AMD_STA_OFFSET          = 0x204,
>> > +   AMD_PGSLV_OFFSET        = 0x208,
>> > +   AMD_SPAD_MUX_OFFSET     = 0x20C,
>> > +   AMD_SPAD_OFFSET         = 0x210,
>> > +   AMD_RSMU_HCID           = 0x250,
>> > +   AMD_RSMU_SIID           = 0x254,
>> > +   AMD_PSION_OFFSET        = 0x300,
>> > +   AMD_SSION_OFFSET        = 0x330,
>> > +   AMD_MMINDEX_OFFSET      = 0x400,
>> > +   AMD_MMDATA_OFFSET       = 0x404,
>> > +   AMD_SIDEINFO_OFFSET     = 0x408,
>> > +
>> > +   AMD_SIDE_MASK           = BIT(0),
>> > +
>> > +   /* limit register */
>> > +   AMD_ROMBARLMT_OFFSET    = 0x410,
>> > +   AMD_BAR1LMT_OFFSET      = 0x414,
>> > +   AMD_BAR23LMT_OFFSET     = 0x418,
>> > +   AMD_BAR45LMT_OFFSET     = 0x420,
>> > +   /* xlat address */
>> > +   AMD_POMBARXLAT_OFFSET   = 0x428,
>> > +   AMD_BAR1XLAT_OFFSET     = 0x430,
>> > +   AMD_BAR23XLAT_OFFSET    = 0x438,
>> > +   AMD_BAR45XLAT_OFFSET    = 0x440,
>> > +   /* doorbell and interrupt */
>> > +   AMD_DBFM_OFFSET         = 0x450,
>> > +   AMD_DBREQ_OFFSET        = 0x454,
>> > +   AMD_MIRRDBSTAT_OFFSET   = 0x458,
>> > +   AMD_DBMASK_OFFSET       = 0x45C,
>> > +   AMD_DBSTAT_OFFSET       = 0x460,
>> > +   AMD_INTMASK_OFFSET      = 0x470,
>> > +   AMD_INTSTAT_OFFSET      = 0x474,
>> > +
>> > +   /* event type */
>> > +   AMD_PEER_FLUSH_EVENT    = BIT(0),
>> > +   AMD_PEER_RESET_EVENT    = BIT(1),
>> > +   AMD_PEER_D3_EVENT       = BIT(2),
>> > +   AMD_PEER_PMETO_EVENT    = BIT(3),
>> > +   AMD_PEER_D0_EVENT       = BIT(4),
>> > +   AMD_EVENT_INTMASK       = (AMD_PEER_FLUSH_EVENT |
>> > +                           AMD_PEER_RESET_EVENT |
>> AMD_PEER_D3_EVENT |
>> > +                           AMD_PEER_PMETO_EVENT |
>> AMD_PEER_D0_EVENT),
>> > +
>> > +   AMD_PMESTAT_OFFSET      = 0x480,
>> > +   AMD_PMSGTRIG_OFFSET     = 0x490,
>> > +   AMD_LTRLATENCY_OFFSET   = 0x494,
>> > +   AMD_FLUSHTRIG_OFFSET    = 0x498,
>> > +
>> > +   /* SMU register*/
>> > +   AMD_SMUACK_OFFSET       = 0x4A0,
>> > +   AMD_SINRST_OFFSET       = 0x4A4,
>> > +   AMD_RSPNUM_OFFSET       = 0x4A8,
>> > +   AMD_SMU_SPADMUTEX       = 0x4B0,
>> > +   AMD_SMU_SPADOFFSET      = 0x4B4,
>> > +
>> > +   AMD_PEER_OFFSET         = 0x400,
>> > +};
>>
>> It seems extremely odd to mix and match different kinds of things in the
>> same enumerated type.  I'd prefer to have these as #defines or separated
>> enum types.
>
> The code following AHCI.h style, it put field definition of register close to register
> Marco definition seems appropriate to me. Actually, I have seen lot of the style in kernel.

It's ugly, but as long as you support it then I'm okay with it :)

>> > +
>> > +struct amd_ntb_dev;
>> > +
>> > +struct amd_ntb_vec {
>> > +   struct amd_ntb_dev      *ndev;
>> > +   int                     num;
>> > +};
>> > +
>> > +struct amd_ntb_dev {
>> > +   struct ntb_dev ntb;
>> > +
>> > +   u32 ntb_side;
>> > +   u32 lnk_sta;
>> > +   u32 cntl_sta;
>> > +   u32 peer_sta;
>> > +
>> > +   unsigned char mw_count;
>> > +   unsigned char spad_count;
>> > +   unsigned char db_count;
>> > +   unsigned char msix_vec_count;
>> > +
>> > +   u64 db_valid_mask;
>> > +   u64 db_mask;
>> > +   u32 int_mask;
>> > +
>> > +   struct msix_entry *msix;
>> > +   struct amd_ntb_vec *vec;
>> > +
>> > +   spinlock_t db_mask_lock;
>> > +
>> > +   void __iomem *self_mmio;
>> > +   void __iomem *peer_mmio;
>> > +   unsigned int self_spad;
>> > +   unsigned int peer_spad;
>> > +
>> > +   struct delayed_work hb_timer;
>> > +
>> > +   struct dentry *debugfs_dir;
>> > +   struct dentry *debugfs_info;
>> > +};
>> > +
>> > +#define ndev_pdev(ndev) ((ndev)->ntb.pdev) #define ndev_name(ndev)
>> > +pci_name(ndev_pdev(ndev)) #define ndev_dev(ndev)
>> > +(&ndev_pdev(ndev)->dev) #define ntb_ndev(__ntb)
>> container_of(__ntb,
>> > +struct amd_ntb_dev, ntb) #define hb_ndev(__work)
>> container_of(__work,
>> > +struct amd_ntb_dev, hb_timer.work)
>> > +
>>
>> This should probably be moved into the device driver C file (as they are not
>> called anywhere else).
>
> I just following intel ntb driver, I'll also change it if you change intel driver.
>
>>
>> > +static int amd_ntb_mw_count(struct ntb_dev *ntb); static int
>> > +amd_ntb_mw_get_range(struct ntb_dev *ntb, int idx,
>> > +                   phys_addr_t *base, resource_size_t *size,
>> > +                   resource_size_t *align, resource_size_t *algin_size);
>> static int
>> > +amd_ntb_mw_set_trans(struct ntb_dev *ndev, int idx,
>> > +                           dma_addr_t addr, resource_size_t size);
>> static int
>> > +amd_ntb_link_is_up(struct ntb_dev *ntb,
>> > +                           enum ntb_speed *speed,
>> > +                           enum ntb_width *width);
>> > +static int amd_ntb_link_enable(struct ntb_dev *ntb,
>> > +                           enum ntb_speed speed,
>> > +                           enum ntb_width width);
>> > +static int amd_ntb_link_disable(struct ntb_dev *ntb); static u64
>> > +amd_ntb_db_valid_mask(struct ntb_dev *ntb); static int
>> > +amd_ntb_db_vector_count(struct ntb_dev *ntb); static u64
>> > +amd_ntb_db_vector_mask(struct ntb_dev *ntb, int db_vector); static
>> > +u64 amd_ntb_db_read(struct ntb_dev *ntb); static int
>> > +amd_ntb_db_clear(struct ntb_dev *ntb, u64 db_bits); static int
>> > +amd_ntb_db_set_mask(struct ntb_dev *ntb, u64 db_bits); static int
>> > +amd_ntb_db_clear_mask(struct ntb_dev *ntb, u64 db_bits); static int
>> > +amd_ntb_peer_db_addr(struct ntb_dev *ntb,
>> > +                           phys_addr_t *db_addr,
>> > +                           resource_size_t *db_size);
>> > +static int amd_ntb_peer_db_set(struct ntb_dev *ntb, u64 db_bits);
>> > +static int amd_ntb_spad_count(struct ntb_dev *ntb); static u32
>> > +amd_ntb_spad_read(struct ntb_dev *ntb, int idx); static int
>> > +amd_ntb_spad_write(struct ntb_dev *ntb, int idx, u32 val); static int
>> > +amd_ntb_peer_spad_addr(struct ntb_dev *ntb, int idx,
>> > +                                   phys_addr_t *spad_addr);
>> > +static u32 amd_ntb_peer_spad_read(struct ntb_dev *ntb, int idx);
>> > +static int amd_ntb_peer_spad_write(struct ntb_dev *ntb, int idx, u32
>> > +val);
>>
>> I believe all of these function declarations should be removed.
> Ok.
>
> And please see my comments, I'll submit v4 patch immediately when getting your feedback.

Thanks again.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: [PATCH V3 1/2] NTB: Add AMD PCI-Express NTB driver
  2016-01-18 15:07     ` Jon Mason
@ 2016-01-18 15:26       ` Yu, Xiangliang
  0 siblings, 0 replies; 9+ messages in thread
From: Yu, Xiangliang @ 2016-01-18 15:26 UTC (permalink / raw)
  To: Jon Mason
  Cc: dave.jiang@intel.com, Allen.Hubbe@emc.com,
	linux-ntb@googlegroups.com, linux-kernel@vger.kernel.org,
	SPG_Linux_Kernel

> >
> >> > diff --git a/drivers/ntb/hw/amd/Kconfig b/drivers/ntb/hw/amd/Kconfig
> >> > new file mode 100644 index 0000000..cfe903c
> >> > --- /dev/null
> >> > +++ b/drivers/ntb/hw/amd/Kconfig
> >> > @@ -0,0 +1,7 @@
> >> > +config NTB_AMD
> >> > +   tristate "AMD Non-Transparent Bridge support"
> >> > +   depends on X86_64
> >> > +   help
> >> > +    This driver supports AMD NTB on capable Zeppelin hardware.
> >> > +
> >> > +    If unsure, say N.
> >> > diff --git a/drivers/ntb/hw/amd/Makefile
> b/drivers/ntb/hw/amd/Makefile
> >> > new file mode 100644 index 0000000..ad54da9
> >> > --- /dev/null
> >> > +++ b/drivers/ntb/hw/amd/Makefile
> >> > @@ -0,0 +1 @@
> >> > +obj-$(CONFIG_NTB_AMD) += ntb_hw_amd.o
> >> > diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.c
> >> > b/drivers/ntb/hw/amd/ntb_hw_amd.c new file mode 100644 index
> >> > 0000000..8df6d7b
> >> > --- /dev/null
> >> > +++ b/drivers/ntb/hw/amd/ntb_hw_amd.c
> >> > @@ -0,0 +1,1148 @@
> >> > +/*
> >> > + * This file is provided under a dual BSD/GPLv2 license.  When using or
> >> > + *   redistributing this file, you may do so under either license.
> >> > + *
> >> > + *   GPL LICENSE SUMMARY
> >> > + *
> >> > + *   Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
> >> > + *
> >> > + *   This program is free software; you can redistribute it and/or modify
> >> > + *   it under the terms of version 2 of the GNU General Public License
> as
> >> > + *   published by the Free Software Foundation.
> >> > + *
> >> > + *   BSD LICENSE
> >> > + *
> >> > + *   Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
> >> > + *
> >> > + *   Redistribution and use in source and binary forms, with or without
> >> > + *   modification, are permitted provided that the following conditions
> >> > + *   are met:
> >> > + *
> >> > + *     * Redistributions of source code must retain the above copyright
> >> > + *       notice, this list of conditions and the following disclaimer.
> >> > + *     * Redistributions in binary form must reproduce the above copy
> >> > + *       notice, this list of conditions and the following disclaimer in
> >> > + *       the documentation and/or other materials provided with the
> >> > + *       distribution.
> >> > + *     * Neither the name of AMD Corporation nor the names of its
> >> > + *       contributors may be used to endorse or promote products
> derived
> >> > + *       from this software without specific prior written permission.
> >> > + *
> >> > + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
> >> CONTRIBUTORS
> >> > + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
> BUT
> >> NOT
> >> > + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
> >> FITNESS FOR
> >> > + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
> >> COPYRIGHT
> >> > + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
> >> INCIDENTAL,
> >> > + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
> >> BUT NOT
> >> > + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
> >> LOSS OF USE,
> >> > + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
> CAUSED
> >> AND ON ANY
> >> > + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
> >> TORT
> >> > + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
> OUT
> >> OF THE USE
> >> > + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> >> DAMAGE.
> >> > + *
> >> > + * AMD PCIe NTB Linux driver
> >> > + *
> >> > + * Contact Information:
> >> > + * Xiangliang Yu<Xiangliang.Yu@amd.com>
> >>
> >> Nit, space needed between 'u' and '<'
> >
> > Ok
> >
> >> > + */
> >> > +
> >> > +#include <linux/debugfs.h>
> >> > +#include <linux/delay.h>
> >> > +#include <linux/init.h>
> >> > +#include <linux/interrupt.h>
> >> > +#include <linux/module.h>
> >> > +#include <linux/acpi.h>
> >> > +#include <linux/pci.h>
> >> > +#include <linux/random.h>
> >> > +#include <linux/slab.h>
> >> > +#include <linux/ntb.h>
> >> > +
> >> > +#include "ntb_hw_amd.h"
> >> > +
> >> > +#define NTB_NAME   "ntb_hw_amd"
> >> > +#define NTB_DESC   "AMD(R) PCI-E Non-Transparent Bridge Driver"
> >> > +#define NTB_VER            "1.0"
> >> > +
> >> > +MODULE_DESCRIPTION(NTB_DESC);
> >> > +MODULE_VERSION(NTB_VER);
> >> > +MODULE_LICENSE("Dual BSD/GPL");
> >> > +MODULE_AUTHOR("AMD Inc.");
> >> > +
> >> > +static const struct file_operations amd_ntb_debugfs_info; static
> >> > +struct dentry *debugfs_dir;
> >> > +
> >> > +static const struct ntb_dev_ops amd_ntb_ops = {
> >> > +   .mw_count               = amd_ntb_mw_count,
> >> > +   .mw_get_range           = amd_ntb_mw_get_range,
> >> > +   .mw_set_trans           = amd_ntb_mw_set_trans,
> >> > +   .link_is_up             = amd_ntb_link_is_up,
> >> > +   .link_enable            = amd_ntb_link_enable,
> >> > +   .link_disable           = amd_ntb_link_disable,
> >> > +   .db_valid_mask          = amd_ntb_db_valid_mask,
> >> > +   .db_vector_count        = amd_ntb_db_vector_count,
> >> > +   .db_vector_mask         = amd_ntb_db_vector_mask,
> >> > +   .db_read                = amd_ntb_db_read,
> >> > +   .db_clear               = amd_ntb_db_clear,
> >> > +   .db_set_mask            = amd_ntb_db_set_mask,
> >> > +   .db_clear_mask          = amd_ntb_db_clear_mask,
> >> > +   .peer_db_addr           = amd_ntb_peer_db_addr,
> >> > +   .peer_db_set            = amd_ntb_peer_db_set,
> >> > +   .spad_count             = amd_ntb_spad_count,
> >> > +   .spad_read              = amd_ntb_spad_read,
> >> > +   .spad_write             = amd_ntb_spad_write,
> >> > +   .peer_spad_addr         = amd_ntb_peer_spad_addr,
> >> > +   .peer_spad_read         = amd_ntb_peer_spad_read,
> >> > +   .peer_spad_write        = amd_ntb_peer_spad_write,
> >> > +};
> >> > +
> >> > +static int ndev_mw_to_bar(struct amd_ntb_dev *ndev, int idx) {
> >> > +   if (idx < 0 || idx > ndev->mw_count)
> >> > +           return -EINVAL;
> >> > +
> >> > +   return 1 << idx;
> >> > +}
> >> > +
> >> > +static int amd_ntb_mw_count(struct ntb_dev *ntb) {
> >> > +   return ntb_ndev(ntb)->mw_count;
> >> > +}
> >> > +
> >> > +static int amd_ntb_mw_get_range(struct ntb_dev *ntb, int idx,
> >> > +                           phys_addr_t *base,
> >> > +                           resource_size_t *size,
> >> > +                           resource_size_t *align,
> >> > +                           resource_size_t *align_size)
> >> > +{
> >> > +   struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> >> > +   int bar;
> >> > +
> >> > +   bar = ndev_mw_to_bar(ndev, idx);
> >> > +   if (bar < 0)
> >> > +           return bar;
> >> > +
> >> > +   if (base)
> >> > +           *base = pci_resource_start(ndev->ntb.pdev, bar);
> >> > +
> >> > +   if (size)
> >> > +           *size = pci_resource_len(ndev->ntb.pdev, bar);
> >> > +
> >> > +   if (align)
> >> > +           *align = 4096;
> >>
> >> Please use SZ_4K from sizes.h
> >
> > Ok
> >
> >> > +
> >> > +   if (align_size)
> >> > +           *align_size = 1;
> >> > +
> >> > +   return 0;
> >> > +}
> >> > +
> >> > +static int amd_ntb_mw_set_trans(struct ntb_dev *ntb, int idx,
> >> > +                           dma_addr_t addr, resource_size_t size) {
> >> > +   struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> >> > +   unsigned long xlat_reg, limit_reg = 0;
> >> > +   resource_size_t mw_size;
> >> > +   void __iomem *mmio, *peer_mmio;
> >> > +   u64 base_addr, limit, reg_val;
> >> > +   int bar;
> >> > +
> >> > +   bar = ndev_mw_to_bar(ndev, idx);
> >> > +   if (bar < 0)
> >> > +           return bar;
> >> > +
> >> > +   mw_size = pci_resource_len(ndev->ntb.pdev, bar);
> >> > +
> >> > +   /* make sure the range fits in the usable mw size */
> >> > +   if (size > mw_size)
> >> > +           return -EINVAL;
> >> > +
> >> > +   mmio = ndev->self_mmio;
> >> > +   peer_mmio = ndev->peer_mmio;
> >> > +
> >> > +   base_addr = pci_resource_start(ndev->ntb.pdev, bar);
> >> > +   if (bar == 1) {
> >> > +           xlat_reg = AMD_BAR1XLAT_OFFSET;
> >> > +           limit_reg = AMD_BAR1LMT_OFFSET;
> >> > +   } else {
> >> > +           xlat_reg = AMD_BAR23XLAT_OFFSET + ((bar - 2) << 3);
> >> > +           limit_reg = AMD_BAR23LMT_OFFSET + ((bar - 2) << 3);
> >> > +   }
> >> > +
> >> > +   if (bar != 1) {
> >> > +           /* Set the limit if supported */
> >> > +           limit = base_addr + size;
> >> > +
> >> > +           /* set and verify setting the translation address */
> >> > +           iowrite64(addr, peer_mmio + xlat_reg);
> >> > +           reg_val = ioread64(peer_mmio + xlat_reg);
> >> > +           if (reg_val != addr) {
> >> > +                   iowrite64(0, peer_mmio + xlat_reg);
> >> > +                   return -EIO;
> >> > +           }
> >> > +
> >> > +           /* set and verify setting the limit */
> >> > +           iowrite64(limit, mmio + limit_reg);
> >> > +           reg_val = ioread64(mmio + limit_reg);
> >> > +           if (reg_val != limit) {
> >> > +                   iowrite64(base_addr, mmio + limit_reg);
> >> > +                   iowrite64(0, peer_mmio + xlat_reg);
> >> > +                   return -EIO;
> >> > +           }
> >> > +   } else {
> >> > +           /* split bar addr range must all be 32 bit */
> >> > +           if (addr & (~0ull << 32))
> >> > +                   return -EINVAL;
> >> > +           if ((addr + size) & (~0ull << 32))
> >> > +                   return -EINVAL;
> >> > +
> >> > +           /* Set the limit if supported */
> >> > +           limit = base_addr + size;
> >> > +
> >> > +           /* set and verify setting the translation address */
> >> > +           iowrite64(addr, peer_mmio + xlat_reg);
> >> > +           reg_val = ioread64(peer_mmio + xlat_reg);
> >> > +           if (reg_val != addr) {
> >> > +                   iowrite64(0, peer_mmio + xlat_reg);
> >> > +                   return -EIO;
> >> > +           }
> >> > +
> >> > +           /* set and verify setting the limit */
> >> > +           writel(limit, mmio + limit_reg);
> >> > +           reg_val = readl(mmio + limit_reg);
> >> > +           if (reg_val != limit) {
> >> > +                   writel(base_addr, mmio + limit_reg);
> >> > +                   writel(0, peer_mmio + xlat_reg);
> >> > +                   return -EIO;
> >> > +           }
> >> > +   }
> >>
> >> Being nit picky here again, but why is it necessary to make 2 consecutive
> >> checks for the same 'bar' variable.  Please combine them.
> >
> > Ok.
> >
> >> > +
> >> > +   return 0;
> >> > +}
> >> > +
> >> > +static int amd_link_is_up(struct amd_ntb_dev *ndev) {
> >> > +   /* set link down and clear flag */
> >> > +   if (ndev->peer_sta & AMD_PEER_RESET_EVENT) {
> >> > +           ndev->peer_sta &= ~AMD_PEER_RESET_EVENT;
> >> > +           return 0;
> >> > +   } else if (ndev->peer_sta & (AMD_PEER_D3_EVENT |
> >> > +                                AMD_PEER_PMETO_EVENT)) {
> >> > +           return 0;
> >> > +   } else if (ndev->peer_sta & AMD_PEER_D0_EVENT) {
> >> > +           ndev->peer_sta = 0;
> >> > +           return 0;
> >> > +   } else
> >> > +           return NTB_LNK_STA_ACTIVE(ndev->cntl_sta);
> >> > +}
> >> > +
> >> > +static int amd_ntb_link_is_up(struct ntb_dev *ntb,
> >> > +                         enum ntb_speed *speed,
> >> > +                         enum ntb_width *width)
> >> > +{
> >> > +   struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> >> > +   int ret = 0;
> >> > +
> >> > +   if (amd_link_is_up(ndev)) {
> >> > +           if (speed)
> >> > +                   *speed = NTB_LNK_STA_SPEED(ndev->lnk_sta);
> >> > +           if (width)
> >> > +                   *width = NTB_LNK_STA_WIDTH(ndev->lnk_sta);
> >> > +
> >> > +           dev_dbg(ndev_dev(ndev), "link is up.\n");
> >> > +
> >> > +           ret = 1;
> >> > +   } else {
> >> > +           if (speed)
> >> > +                   *speed = NTB_SPEED_NONE;
> >> > +           if (width)
> >> > +                   *width = NTB_WIDTH_NONE;
> >> > +
> >> > +           dev_dbg(ndev_dev(ndev), "link is down.\n");
> >> > +   }
> >> > +
> >> > +   return ret;
> >> > +}
> >> > +
> >> > +static int amd_ntb_link_enable(struct ntb_dev *ntb,
> >> > +                          enum ntb_speed max_speed,
> >> > +                          enum ntb_width max_width)
> >> > +{
> >> > +   struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> >> > +   void __iomem *mmio = ndev->self_mmio;
> >> > +   u32 ntb_ctl;
> >> > +
> >> > +   /* Enable event interrupt */
> >> > +   ndev->int_mask &= ~AMD_EVENT_INTMASK;
> >> > +   writel(ndev->int_mask, mmio + AMD_INTMASK_OFFSET);
> >> > +
> >> > +   if (ndev->ntb.topo == NTB_TOPO_SEC)
> >> > +           return -EINVAL;
> >> > +   dev_dbg(ndev_dev(ndev), "Enabling Link.\n");
> >> > +
> >> > +   ntb_ctl = readl(mmio + AMD_CNTL_OFFSET);
> >> > +   ntb_ctl |= (3 << 20);
> >>
> >> This "20" is a bit magical to me.  Please use a #define
> >
> > Yes.
> >
> >> > +   writel(ntb_ctl, mmio + AMD_CNTL_OFFSET);
> >> > +
> >> > +   return 0;
> >> > +}
> >> > +
> >> > +static int amd_ntb_link_disable(struct ntb_dev *ntb) {
> >> > +   struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> >> > +   void __iomem *mmio = ndev->self_mmio;
> >> > +   u32 ntb_ctl;
> >> > +
> >> > +   /* Disable event interrupt */
> >> > +   ndev->int_mask |= AMD_EVENT_INTMASK;
> >> > +   writel(ndev->int_mask, mmio + AMD_INTMASK_OFFSET);
> >> > +
> >> > +   if (ndev->ntb.topo == NTB_TOPO_SEC)
> >> > +           return -EINVAL;
> >> > +   dev_dbg(ndev_dev(ndev), "Enabling Link.\n");
> >> > +
> >> > +   ntb_ctl = readl(mmio + AMD_CNTL_OFFSET);
> >> > +   ntb_ctl &= ~(3 << 16);
> >>
> >> Should this be "20" or "16"?  Seems odd that you would use a different
> offset
> >> to enable than disable.  Either way, a #define here would be nice too.
> >
> > Got it.
> >
> >> > +   writel(ntb_ctl, mmio + AMD_CNTL_OFFSET);
> >> > +
> >> > +   return 0;
> >> > +}
> >> > +
> >> > +static u64 amd_ntb_db_valid_mask(struct ntb_dev *ntb) {
> >> > +   return ntb_ndev(ntb)->db_valid_mask; }
> >> > +
> >> > +static int amd_ntb_db_vector_count(struct ntb_dev *ntb) {
> >> > +   return ntb_ndev(ntb)->db_count;
> >> > +}
> >> > +
> >> > +static u64 amd_ntb_db_vector_mask(struct ntb_dev *ntb, int
> db_vector)
> >> > +{
> >> > +   struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> >> > +
> >> > +   if (db_vector < 0 || db_vector > ndev->db_count)
> >> > +           return 0;
> >> > +
> >> > +   return ntb_ndev(ntb)->db_valid_mask & (1 << db_vector); }
> >> > +
> >> > +static u64 amd_ntb_db_read(struct ntb_dev *ntb) {
> >> > +   struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> >> > +   void __iomem *mmio = ndev->self_mmio;
> >> > +
> >> > +   return (u64)readw(mmio + AMD_DBSTAT_OFFSET);
> >>
> >> Is this cast necessary?
> >
> > Just following Intel ntb driver, keep consistent in Intel code.
> 
> Okay, another thing for me to fix in the Intel driver :)
> Leave it this way, and I'll "correct" both at the same time.
> 
> >>
> >> > +}
> >> > +
> >> > +static int amd_ntb_db_clear(struct ntb_dev *ntb, u64 db_bits) {
> >> > +   struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> >> > +   void __iomem *mmio = ndev->self_mmio;
> >> > +
> >> > +   writew((u16)db_bits, mmio + AMD_DBSTAT_OFFSET);
> >> > +
> >> > +   return 0;
> >> > +}
> >> > +
> >> > +static int amd_ntb_db_set_mask(struct ntb_dev *ntb, u64 db_bits) {
> >> > +   struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> >> > +   void __iomem *mmio = ndev->self_mmio;
> >> > +   unsigned long flags;
> >> > +
> >> > +   if (db_bits & ~ndev->db_valid_mask)
> >> > +           return -EINVAL;
> >> > +
> >> > +   spin_lock_irqsave(&ndev->db_mask_lock, flags);
> >> > +   ndev->db_mask |= db_bits;
> >> > +   writew((u16)ndev->db_mask, mmio + AMD_DBMASK_OFFSET);
> >> > +   spin_unlock_irqrestore(&ndev->db_mask_lock, flags);
> >> > +
> >> > +   return 0;
> >> > +}
> >> > +
> >> > +static int amd_ntb_db_clear_mask(struct ntb_dev *ntb, u64 db_bits) {
> >> > +   struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> >> > +   void __iomem *mmio = ndev->self_mmio;
> >> > +   unsigned long flags;
> >> > +
> >> > +   if (db_bits & ~ndev->db_valid_mask)
> >> > +           return -EINVAL;
> >> > +
> >> > +   spin_lock_irqsave(&ndev->db_mask_lock, flags);
> >> > +   ndev->db_mask &= ~db_bits;
> >> > +   writew((u16)ndev->db_mask, mmio + AMD_DBMASK_OFFSET);
> >> > +   spin_unlock_irqrestore(&ndev->db_mask_lock, flags);
> >> > +
> >> > +   return 0;
> >> > +}
> >> > +
> >> > +static int amd_ntb_peer_db_addr(struct ntb_dev *ntb,
> >> > +           phys_addr_t *db_addr,
> >> > +           resource_size_t *db_size)
> >>
> >> Nit, this doesn't seem like it would be aligned properly.
> >
> > Got it.
> >
> >> > +{
> >> > +   struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> >> > +
> >> > +   if (db_addr)
> >> > +           *db_addr = (phys_addr_t)(ndev->peer_mmio +
> >> AMD_DBREQ_OFFSET);
> >> > +   if (db_size)
> >> > +           *db_size = sizeof(u32);
> >> > +
> >> > +   return 0;
> >> > +}
> >> > +
> >> > +static int amd_ntb_peer_db_set(struct ntb_dev *ntb, u64 db_bits) {
> >> > +   struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> >> > +   void __iomem *mmio = ndev->self_mmio;
> >> > +
> >> > +   writew((u16)db_bits, mmio + AMD_DBREQ_OFFSET);
> >> > +
> >> > +   return 0;
> >> > +}
> >> > +
> >> > +static int amd_ntb_spad_count(struct ntb_dev *ntb) {
> >> > +   return ntb_ndev(ntb)->spad_count;
> >> > +}
> >> > +
> >> > +static u32 amd_ntb_spad_read(struct ntb_dev *ntb, int idx) {
> >> > +   struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> >> > +   void __iomem *mmio = ndev->self_mmio;
> >> > +   u32 offset = 0;
> >>
> >> Unnecessary init of this variable
> >
> > Ok.
> >
> >> > +
> >> > +   if (idx < 0 || idx >= ndev->spad_count)
> >> > +           return 0;
> >> > +
> >> > +   offset = ndev->self_spad + (idx << 2);
> >> > +   return readl(mmio + AMD_SPAD_OFFSET + offset); }
> >> > +
> >> > +static int amd_ntb_spad_write(struct ntb_dev *ntb,
> >> > +                           int idx, u32 val)
> >> > +{
> >> > +   struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> >> > +   void __iomem *mmio = ndev->self_mmio;
> >> > +   u32 offset = 0;
> >>
> >> Unnecessary init of this variable
> >
> > Ok.
> >
> >> > +
> >> > +   if (idx < 0 || idx >= ndev->spad_count)
> >> > +           return -EINVAL;
> >> > +
> >> > +   offset = ndev->self_spad + (idx << 2);
> >> > +   writel(val, mmio + AMD_SPAD_OFFSET + offset);
> >> > +
> >> > +   return 0;
> >> > +}
> >> > +
> >> > +static int amd_ntb_peer_spad_addr(struct ntb_dev *ntb, int idx,
> >> > +                               phys_addr_t *spad_addr)
> >> > +{
> >> > +   struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> >> > +
> >> > +   if (idx < 0 || idx >= ndev->spad_count)
> >> > +           return -EINVAL;
> >> > +
> >> > +   if (spad_addr)
> >> > +           *spad_addr = (phys_addr_t)(ndev->self_mmio +
> >> AMD_SPAD_OFFSET +
> >> > +                           ndev->peer_spad + (idx << 2));
> >>
> >> This doesn't seem aligned properly either.
> >
> > Ok
> >
> >> > +   return 0;
> >> > +}
> >> > +
> >> > +static u32 amd_ntb_peer_spad_read(struct ntb_dev *ntb, int idx) {
> >> > +   struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> >> > +   void __iomem *mmio = ndev->self_mmio;
> >> > +   u32 offset;
> >> > +
> >> > +   if (idx < 0 || idx >= ndev->spad_count)
> >> > +           return -EINVAL;
> >> > +
> >> > +   offset = ndev->peer_spad + (idx << 2);
> >> > +   return readl(mmio + AMD_SPAD_OFFSET + offset); }
> >> > +
> >> > +static int amd_ntb_peer_spad_write(struct ntb_dev *ntb,
> >> > +                                int idx, u32 val)
> >> > +{
> >> > +   struct amd_ntb_dev *ndev = ntb_ndev(ntb);
> >> > +   void __iomem *mmio = ndev->self_mmio;
> >> > +   u32 offset;
> >> > +
> >> > +   if (idx < 0 || idx >= ndev->spad_count)
> >> > +           return -EINVAL;
> >> > +
> >> > +   offset = ndev->peer_spad + (idx << 2);
> >> > +   writel(val, mmio + AMD_SPAD_OFFSET + offset);
> >> > +
> >> > +   return 0;
> >> > +}
> >> > +
> >> > +static void amd_ack_SMU(struct amd_ntb_dev *ndev, u32 bit)
> >>
> >> Nit, Please make this SMU lower case.  Also, it is a bit odd to see SMUACK
> >> and ack_smu.  It is not a big deal, but since there has to be another
> version to
> >> address Allen's comments (and mine), then this won't add to much
> additional
> >> work to address.
> >
> > Ok.
> >
> >> > +{
> >> > +   void __iomem *mmio = ndev->self_mmio;
> >> > +   int reg;
> >> > +
> >> > +   reg = readl(mmio + AMD_SMUACK_OFFSET);
> >> > +   reg |= bit;
> >> > +   writel(reg, mmio + AMD_SMUACK_OFFSET);
> >> > +
> >> > +   ndev->peer_sta |= bit;
> >> > +}
> >> > +
> >> > +static void amd_handle_event(struct amd_ntb_dev *ndev, int vec) {
> >> > +   void __iomem *mmio = ndev->self_mmio;
> >> > +   u32 status;
> >> > +
> >> > +   status = readl(mmio + AMD_INTSTAT_OFFSET);
> >> > +   if (!(status & AMD_EVENT_INTMASK))
> >> > +           return;
> >> > +
> >> > +   dev_dbg(ndev_dev(ndev), "status = 0x%x and vec = %d\n", status,
> >> > +vec);
> >> > +
> >> > +   status &= AMD_EVENT_INTMASK;
> >> > +   switch (status) {
> >> > +   case AMD_PEER_FLUSH_EVENT:
> >> > +           dev_info(ndev_dev(ndev), "Flush is done.\n");
> >> > +           break;
> >> > +   case AMD_PEER_RESET_EVENT:
> >> > +           amd_ack_SMU(ndev, AMD_PEER_RESET_EVENT);
> >> > +
> >> > +           /* link down first */
> >> > +           ntb_link_event(&ndev->ntb);
> >> > +           /* polling peer status */
> >> > +           schedule_delayed_work(&ndev->hb_timer,
> >> AMD_LINK_HB_TIMEOUT);
> >> > +
> >> > +           break;
> >> > +   case AMD_PEER_D3_EVENT:
> >> > +   case AMD_PEER_PMETO_EVENT:
> >> > +           amd_ack_SMU(ndev, status);
> >> > +
> >> > +           /* link down */
> >> > +           ntb_link_event(&ndev->ntb);
> >> > +
> >> > +           break;
> >> > +   case AMD_PEER_D0_EVENT:
> >> > +           mmio = ndev->peer_mmio;
> >> > +           status = readl(mmio + AMD_PMESTAT_OFFSET);
> >> > +           /* check if this is WAKEUP event */
> >> > +           if (status & 0x1)
> >> > +                   dev_info(ndev_dev(ndev), "Wakeup is done.\n");
> >> > +
> >> > +           amd_ack_SMU(ndev, AMD_PEER_D0_EVENT);
> >> > +
> >> > +           if (amd_link_is_up(ndev))
> >> > +                   ntb_link_event(&ndev->ntb);
> >> > +           else
> >> > +                   schedule_delayed_work(&ndev->hb_timer,
> >> > +                                           AMD_LINK_HB_TIMEOUT);
> >> > +           break;
> >> > +   default:
> >> > +           dev_info(ndev_dev(ndev), "event status = 0x%x.\n", status);
> >> > +           break;
> >> > +   }
> >> > +}
> >> > +
> >> > +static irqreturn_t ndev_interrupt(struct amd_ntb_dev *ndev, int vec)
> >> > +{
> >> > +   dev_dbg(ndev_dev(ndev), "vec %d\n", vec);
> >> > +
> >> > +   if (vec > (ndev->msix_vec_count - 1)) {
> >> > +           dev_err(ndev_dev(ndev), "Invalid interrupt.\n");
> >> > +           return IRQ_HANDLED;
> >> > +   }
> >>
> >> Unless this actually happens, you might not want the unnecessary branch
> in
> >> your fast path.
> >
> > Ok, I'll remove it.
> >
> >> > +
> >> > +   if (vec > 15 || (ndev->msix_vec_count == 1))
> >> > +           amd_handle_event(ndev, vec);
> >> > +
> >> > +   if (vec < 16)
> >>
> >> I'd prefer to see a #define here for 16
> >
> > Ok.
> >
> >>
> >> > +           ntb_db_event(&ndev->ntb, vec);
> >> > +
> >> > +   return IRQ_HANDLED;
> >> > +}
> >> > +
> >> > +static irqreturn_t ndev_vec_isr(int irq, void *dev) {
> >> > +   struct amd_ntb_vec *nvec = dev;
> >> > +
> >> > +   return ndev_interrupt(nvec->ndev, nvec->num); }
> >> > +
> >> > +static irqreturn_t ndev_irq_isr(int irq, void *dev) {
> >> > +   struct amd_ntb_dev *ndev = dev;
> >> > +
> >> > +   return ndev_interrupt(ndev, irq - ndev_pdev(ndev)->irq); }
> >> > +
> >> > +static int ndev_init_isr(struct amd_ntb_dev *ndev,
> >> > +                           int msix_min, int msix_max)
> >> > +{
> >> > +   struct pci_dev *pdev;
> >> > +   int rc, i, msix_count, node;
> >> > +
> >> > +   pdev = ndev_pdev(ndev);
> >> > +
> >> > +   node = dev_to_node(&pdev->dev);
> >> > +
> >> > +   ndev->db_mask = ndev->db_valid_mask;
> >> > +
> >> > +   /* Try to set up msix irq */
> >> > +   ndev->vec = kzalloc_node(msix_max * sizeof(*ndev->vec),
> >> > +                            GFP_KERNEL, node);
> >> > +   if (!ndev->vec)
> >> > +           goto err_msix_vec_alloc;
> >> > +
> >> > +   ndev->msix = kzalloc_node(msix_max * sizeof(*ndev->msix),
> >> > +                             GFP_KERNEL, node);
> >> > +   if (!ndev->msix)
> >> > +           goto err_msix_alloc;
> >> > +
> >> > +   for (i = 0; i < msix_max; ++i)
> >> > +           ndev->msix[i].entry = i;
> >> > +
> >> > +   msix_count = pci_enable_msix_range(pdev, ndev->msix,
> >> > +                                      msix_min, msix_max);
> >> > +   if (msix_count < 0)
> >> > +           goto err_msix_enable;
> >> > +
> >> > +   /* Disable MSIX if msix count is less than 16 */
> >>
> >> Why?  Is there a HW errata or limitation (if so, write so in this comment)?
> If
> >> not, MSIX has many benefits over legacy interrupts, and it doesn't make
> >> sense to walk away from that.
> >
> > Yes, this is hardware limitation, i'll add "hardware limitation" into the
> comment.
> >
> >>
> >> > +   if (msix_count < msix_min) {
> >> > +           pci_disable_msix(pdev);
> >> > +           goto err_msix_enable;
> >> > +   }
> >> > +
> >> > +   for (i = 0; i < msix_count; ++i) {
> >> > +           ndev->vec[i].ndev = ndev;
> >> > +           ndev->vec[i].num = i;
> >> > +           rc = request_irq(ndev->msix[i].vector, ndev_vec_isr, 0,
> >> > +                            "ndev_vec_isr", &ndev->vec[i]);
> >> > +           if (rc)
> >> > +                   goto err_msix_request;
> >> > +   }
> >> > +
> >> > +   dev_dbg(ndev_dev(ndev), "Using msix interrupts\n");
> >> > +   ndev->db_count = msix_min;
> >> > +   ndev->msix_vec_count = msix_max;
> >> > +   return 0;
> >> > +
> >> > +err_msix_request:
> >> > +   while (i-- > 0)
> >> > +           free_irq(ndev->msix[i].vector, ndev);
> >> > +   pci_disable_msix(pdev);
> >> > +err_msix_enable:
> >> > +   kfree(ndev->msix);
> >> > +err_msix_alloc:
> >> > +   kfree(ndev->vec);
> >> > +err_msix_vec_alloc:
> >> > +   ndev->msix = NULL;
> >> > +   ndev->vec = NULL;
> >> > +
> >> > +   /* Try to set up msi irq */
> >> > +   rc = pci_enable_msi(pdev);
> >> > +   if (rc)
> >> > +           goto err_msi_enable;
> >> > +
> >> > +   rc = request_irq(pdev->irq, ndev_irq_isr, 0,
> >> > +                    "ndev_irq_isr", ndev);
> >> > +   if (rc)
> >> > +           goto err_msi_request;
> >> > +
> >>
> >> I have problems with the msix/msi/intx selection being done this way, but
> I
> >> see it being done in the Intel driver.  So....I'll fix it there first.
> >
> > Ok, I'll change it according to your patch.
> 
> Don't worry about it.  I'll make a patch to fix both after I accept
> your patch :)
> 
> >>
> >> > +   dev_dbg(ndev_dev(ndev), "Using msi interrupts\n");
> >> > +   ndev->db_count = 1;
> >> > +   ndev->msix_vec_count = 1;
> >> > +   return 0;
> >> > +
> >> > +err_msi_request:
> >> > +   pci_disable_msi(pdev);
> >> > +err_msi_enable:
> >> > +
> >> > +   /* Try to set up intx irq */
> >> > +   pci_intx(pdev, 1);
> >> > +
> >> > +   rc = request_irq(pdev->irq, ndev_irq_isr, IRQF_SHARED,
> >> > +                    "ndev_irq_isr", ndev);
> >> > +   if (rc)
> >> > +           goto err_intx_request;
> >> > +
> >> > +   dev_dbg(ndev_dev(ndev), "Using intx interrupts\n");
> >> > +   ndev->db_count = 1;
> >> > +   ndev->msix_vec_count = 1;
> >> > +   return 0;
> >> > +
> >> > +err_intx_request:
> >> > +   return rc;
> >> > +}
> >> > +
> >> > +static void ndev_deinit_isr(struct amd_ntb_dev *ndev) {
> >> > +   struct pci_dev *pdev;
> >> > +   void __iomem *mmio = ndev->self_mmio;
> >> > +   int i;
> >> > +
> >> > +   pdev = ndev_pdev(ndev);
> >> > +
> >> > +   /* Mask all doorbell interrupts */
> >> > +   ndev->db_mask = ndev->db_valid_mask;
> >> > +   writel(ndev->db_mask, mmio + AMD_DBMASK_OFFSET);
> >> > +
> >> > +   if (ndev->msix) {
> >> > +           i = ndev->msix_vec_count;
> >> > +           while (i--)
> >> > +                   free_irq(ndev->msix[i].vector, &ndev->vec[i]);
> >> > +           pci_disable_msix(pdev);
> >> > +           kfree(ndev->msix);
> >> > +           kfree(ndev->vec);
> >> > +   } else {
> >> > +           free_irq(pdev->irq, ndev);
> >> > +           if (pci_dev_msi_enabled(pdev))
> >> > +                   pci_disable_msi(pdev);
> >> > +           else
> >> > +                   pci_intx(pdev, 0);
> >> > +   }
> >> > +}
> >> > +
> >> > +static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf,
> >> > +                            size_t count, loff_t *offp)
> >> > +{
> >> > +   struct amd_ntb_dev *ndev;
> >> > +   void __iomem *mmio;
> >> > +   char *buf;
> >> > +   size_t buf_size;
> >> > +   ssize_t ret, off;
> >> > +   union { u64 v64; u32 v32; u16 v16; } u;
> >> > +
> >> > +   ndev = filp->private_data;
> >> > +   mmio = ndev->self_mmio;
> >> > +
> >> > +   buf_size = min(count, 0x800ul);
> >> > +
> >> > +   buf = kmalloc(buf_size, GFP_KERNEL);
> >> > +   if (!buf)
> >> > +           return -ENOMEM;
> >> > +
> >> > +   off = 0;
> >> > +
> >> > +   off += scnprintf(buf + off, buf_size - off,
> >> > +                    "NTB Device Information:\n");
> >> > +
> >> > +   off += scnprintf(buf + off, buf_size - off,
> >> > +                    "Connection Topology -\t%s\n",
> >> > +                    ntb_topo_string(ndev->ntb.topo));
> >> > +
> >> > +   off += scnprintf(buf + off, buf_size - off,
> >> > +                    "LNK STA -\t\t%#06x\n", ndev->lnk_sta);
> >> > +
> >> > +   if (!amd_link_is_up(ndev)) {
> >> > +           off += scnprintf(buf + off, buf_size - off,
> >> > +                            "Link Status -\t\tDown\n");
> >> > +   } else {
> >> > +           off += scnprintf(buf + off, buf_size - off,
> >> > +                            "Link Status -\t\tUp\n");
> >> > +           off += scnprintf(buf + off, buf_size - off,
> >> > +                            "Link Speed -\t\tPCI-E Gen %u\n",
> >> > +                            NTB_LNK_STA_SPEED(ndev->lnk_sta));
> >> > +           off += scnprintf(buf + off, buf_size - off,
> >> > +                            "Link Width -\t\tx%u\n",
> >> > +                            NTB_LNK_STA_WIDTH(ndev->lnk_sta));
> >> > +   }
> >> > +
> >> > +   off += scnprintf(buf + off, buf_size - off,
> >> > +                    "Memory Window Count -\t%u\n", ndev-
> >> >mw_count);
> >> > +   off += scnprintf(buf + off, buf_size - off,
> >> > +                    "Scratchpad Count -\t%u\n", ndev->spad_count);
> >> > +   off += scnprintf(buf + off, buf_size - off,
> >> > +                    "Doorbell Count -\t%u\n", ndev->db_count);
> >> > +   off += scnprintf(buf + off, buf_size - off,
> >> > +                    "MSIX Vector Count -\t%u\n", ndev-
> >> >msix_vec_count);
> >> > +
> >> > +   off += scnprintf(buf + off, buf_size - off,
> >> > +                    "Doorbell Valid Mask -\t%#llx\n", ndev-
> >> >db_valid_mask);
> >> > +
> >> > +   u.v32 = readl(ndev->self_mmio + AMD_DBMASK_OFFSET);
> >> > +   off += scnprintf(buf + off, buf_size - off,
> >> > +                    "Doorbell Mask -\t\t\t%#06x\n", u.v32);
> >> > +
> >> > +   u.v32 = readl(mmio + AMD_DBSTAT_OFFSET);
> >> > +   off += scnprintf(buf + off, buf_size - off,
> >> > +                    "Doorbell Bell -\t\t\t%#06x\n", u.v32);
> >> > +
> >> > +   off += scnprintf(buf + off, buf_size - off,
> >> > +                    "\nNTB Incoming XLAT:\n");
> >> > +
> >> > +   u.v64 = ioread64(mmio + AMD_BAR1XLAT_OFFSET);
> >> > +   off += scnprintf(buf + off, buf_size - off,
> >> > +                    "XLAT1 -\t\t%#018llx\n", u.v64);
> >> > +
> >> > +   u.v64 = ioread64(ndev->self_mmio + AMD_BAR23XLAT_OFFSET);
> >> > +   off += scnprintf(buf + off, buf_size - off,
> >> > +                            "XLAT23 -\t\t%#018llx\n", u.v64);
> >> > +
> >> > +   u.v64 = ioread64(ndev->self_mmio + AMD_BAR45XLAT_OFFSET);
> >> > +   off += scnprintf(buf + off, buf_size - off,
> >> > +                            "XLAT45 -\t\t%#018llx\n", u.v64);
> >> > +
> >> > +   u.v32 = readl(mmio + AMD_BAR1LMT_OFFSET);
> >> > +   off += scnprintf(buf + off, buf_size - off,
> >> > +                    "LMT1 -\t\t\t%#06x\n", u.v32);
> >> > +
> >> > +   u.v64 = ioread64(ndev->self_mmio + AMD_BAR23LMT_OFFSET);
> >> > +   off += scnprintf(buf + off, buf_size - off,
> >> > +                            "LMT23 -\t\t\t%#018llx\n", u.v64);
> >> > +
> >> > +   u.v64 = ioread64(ndev->self_mmio + AMD_BAR45LMT_OFFSET);
> >> > +   off += scnprintf(buf + off, buf_size - off,
> >> > +                            "LMT45 -\t\t\t%#018llx\n", u.v64);
> >> > +
> >> > +   ret = simple_read_from_buffer(ubuf, count, offp, buf, off);
> >> > +   kfree(buf);
> >> > +   return ret;
> >> > +}
> >> > +
> >> > +static void ndev_init_debugfs(struct amd_ntb_dev *ndev) {
> >> > +   if (!debugfs_dir) {
> >> > +           ndev->debugfs_dir = NULL;
> >> > +           ndev->debugfs_info = NULL;
> >> > +   } else {
> >> > +           ndev->debugfs_dir =
> >> > +                   debugfs_create_dir(ndev_name(ndev),
> >> debugfs_dir);
> >> > +           if (!ndev->debugfs_dir)
> >> > +                   ndev->debugfs_info = NULL;
> >> > +           else
> >> > +                   ndev->debugfs_info =
> >> > +                           debugfs_create_file("info", S_IRUSR,
> >> > +                                               ndev->debugfs_dir, ndev,
> >> > +                                               &amd_ntb_debugfs_info);
> >> > +   }
> >> > +}
> >> > +
> >> > +static void ndev_deinit_debugfs(struct amd_ntb_dev *ndev) {
> >> > +   debugfs_remove_recursive(ndev->debugfs_dir);
> >> > +}
> >> > +
> >> > +static inline void ndev_init_struct(struct amd_ntb_dev *ndev,
> >> > +                               struct pci_dev *pdev)
> >> > +{
> >> > +   ndev->ntb.pdev = pdev;
> >> > +   ndev->ntb.topo = NTB_TOPO_NONE;
> >> > +   ndev->ntb.ops = &amd_ntb_ops;
> >> > +   ndev->int_mask = AMD_EVENT_INTMASK;
> >> > +   spin_lock_init(&ndev->db_mask_lock);
> >> > +}
> >> > +
> >> > +static int amd_poll_link(struct amd_ntb_dev *ndev) {
> >> > +   void __iomem *mmio = ndev->peer_mmio;
> >> > +   u32 reg, stat;
> >> > +   int rc;
> >> > +
> >> > +   reg = readl(mmio + AMD_SIDEINFO_OFFSET);
> >> > +   reg &= NTB_LIN_STA_ACTIVE_BIT;
> >> > +
> >> > +   dev_dbg(ndev_dev(ndev), "%s: reg_val = 0x%x.\n", __func__, reg);
> >> > +
> >> > +   if (reg == ndev->cntl_sta)
> >> > +           return 0;
> >> > +
> >> > +   ndev->cntl_sta = reg;
> >> > +
> >> > +   rc = pci_read_config_dword(ndev->ntb.pdev,
> >> > +                   AMD_LINK_STATUS_OFFSET, &stat);
> >> > +   if (rc)
> >> > +           return 0;
> >> > +   ndev->lnk_sta = stat;
> >> > +
> >> > +   return 1;
> >> > +}
> >> > +
> >> > +static void amd_link_hb(struct work_struct *work) {
> >> > +   struct amd_ntb_dev *ndev = hb_ndev(work);
> >> > +
> >> > +   if (amd_poll_link(ndev))
> >> > +           ntb_link_event(&ndev->ntb);
> >> > +
> >> > +   if (!amd_link_is_up(ndev))
> >> > +           schedule_delayed_work(&ndev->hb_timer,
> >> AMD_LINK_HB_TIMEOUT); }
> >> > +
> >> > +static int amd_init_isr(struct amd_ntb_dev *ndev) {
> >> > +   return ndev_init_isr(ndev, AMD_DB_CNT,
> >> AMD_MSIX_VECTOR_CNT); }
> >> > +
> >> > +static void amd_init_side_info(struct amd_ntb_dev *ndev) {
> >> > +   void __iomem *mmio = ndev->self_mmio;
> >> > +   unsigned int reg = 0;
> >>
> >> variable initialized unnecessarily
> >
> > Ok.
> >
> >> > +
> >> > +   reg = readl(mmio + AMD_SIDEINFO_OFFSET);
> >> > +   if (!(reg & 0x2)) {
> >> > +           reg |= 0x2;
> >>
> >> I'd like a #define for the "2" above, as you are using it multiple times.
> >
> > Ok.
> >
> >>
> >> > +           writel(reg, mmio + AMD_SIDEINFO_OFFSET);
> >> > +   }
> >> > +}
> >> > +
> >> > +static void amd_deinit_side_info(struct amd_ntb_dev *ndev) {
> >> > +   void __iomem *mmio = ndev->self_mmio;
> >> > +   unsigned int reg = 0;
> >>
> >> variable initialized unnecessarily
> >
> > Ok.
> >
> >> > +
> >> > +   reg = readl(mmio + AMD_SIDEINFO_OFFSET);
> >> > +   if (reg & 0x2) {
> >> > +           reg &= ~(0x2);
> >> > +           writel(reg, mmio + AMD_SIDEINFO_OFFSET);
> >> > +           readl(mmio + AMD_SIDEINFO_OFFSET);
> >> > +   }
> >> > +}
> >> > +
> >> > +static int amd_init_ntb(struct amd_ntb_dev *ndev) {
> >> > +   void __iomem *mmio = ndev->self_mmio;
> >> > +
> >> > +   ndev->mw_count = AMD_MW_CNT;
> >> > +   ndev->spad_count = AMD_SPADS_CNT;
> >> > +   ndev->db_count = AMD_DB_CNT;
> >> > +
> >> > +   switch (ndev->ntb.topo) {
> >> > +   case NTB_TOPO_PRI:
> >> > +   case NTB_TOPO_SEC:
> >> > +           ndev->spad_count >>= 1;
> >> > +           if (ndev->ntb.topo == NTB_TOPO_PRI) {
> >> > +                   ndev->self_spad = 0;
> >> > +                   ndev->peer_spad = 0x20;
> >> > +           } else {
> >> > +                   ndev->self_spad = 0x20;
> >> > +                   ndev->peer_spad = 0;
> >> > +           }
> >> > +
> >> > +           INIT_DELAYED_WORK(&ndev->hb_timer, amd_link_hb);
> >> > +           schedule_delayed_work(&ndev->hb_timer,
> >> AMD_LINK_HB_TIMEOUT);
> >> > +
> >> > +           break;
> >> > +   default:
> >> > +           dev_err(ndev_dev(ndev), "AMD NTB does not support B2B
> >> mode.\n");
> >> > +           return -EINVAL;
> >> > +   }
> >> > +
> >> > +   ndev->db_valid_mask = BIT_ULL(ndev->db_count) - 1;
> >> > +
> >> > +   /* Mask event interrupts */
> >> > +   writel(ndev->int_mask, mmio + AMD_INTMASK_OFFSET);
> >> > +
> >> > +   return 0;
> >> > +}
> >> > +
> >> > +static inline enum ntb_topo amd_get_topo(struct amd_ntb_dev
> *ndev)
> >>
> >> It is usually better to let the compiler determine what to inline and what
> not
> >> to inline..
> >
> > Do you mean removing the key word of inline?
> 
> Yes.  "inline" will force the compiler to inline the function.  If you
> do not specify, then the compiler is free to inline it if it makes
> more optimal code (and won't inline it if the inline makes less
> optimal code).    So, please remove "inline" unless you know for
> certain there is a good reason to inline it.

Got it.

> >> > +{
> >> > +   void __iomem *mmio = ndev->self_mmio;
> >> > +   u32 info;
> >> > +
> >> > +   info = readl(mmio + AMD_SIDEINFO_OFFSET);
> >> > +   if (info & AMD_SIDE_MASK)
> >> > +           return NTB_TOPO_SEC;
> >> > +   else
> >> > +           return NTB_TOPO_PRI;
> >> > +}
> >> > +
> >> > +static int amd_init_dev(struct amd_ntb_dev *ndev) {
> >> > +   struct pci_dev *pdev;
> >> > +   int rc = 0;
> >> > +
> >> > +   pdev = ndev_pdev(ndev);
> >> > +
> >> > +   ndev->ntb.topo = amd_get_topo(ndev);
> >> > +   dev_dbg(ndev_dev(ndev), "AMD NTB topo is %s\n",
> >> > +                   ntb_topo_string(ndev->ntb.topo));
> >> > +
> >> > +   rc = amd_init_ntb(ndev);
> >> > +   if (rc)
> >> > +           return rc;
> >> > +
> >> > +   rc = amd_init_isr(ndev);
> >> > +   if (rc) {
> >> > +           dev_err(ndev_dev(ndev), "fail to init isr.\n");
> >> > +           return rc;
> >> > +   }
> >> > +
> >> > +   ndev->db_valid_mask = BIT_ULL(ndev->db_count) - 1;
> >> > +
> >> > +   return 0;
> >> > +}
> >> > +
> >> > +static void amd_deinit_dev(struct amd_ntb_dev *ndev) {
> >> > +   cancel_delayed_work_sync(&ndev->hb_timer);
> >> > +
> >> > +   ndev_deinit_isr(ndev);
> >> > +}
> >> > +
> >> > +static int amd_ntb_init_pci(struct amd_ntb_dev *ndev,
> >> > +                       struct pci_dev *pdev)
> >> > +{
> >> > +   int rc;
> >> > +
> >> > +   pci_set_drvdata(pdev, ndev);
> >> > +
> >> > +   rc = pci_enable_device(pdev);
> >> > +   if (rc)
> >> > +           goto err_pci_enable;
> >> > +
> >> > +   rc = pci_request_regions(pdev, NTB_NAME);
> >> > +   if (rc)
> >> > +           goto err_pci_regions;
> >> > +
> >> > +   pci_set_master(pdev);
> >> > +
> >> > +   rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
> >> > +   if (rc) {
> >> > +           rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
> >> > +           if (rc)
> >> > +                   goto err_dma_mask;
> >> > +           dev_warn(ndev_dev(ndev), "Cannot DMA highmem\n");
> >> > +   }
> >> > +
> >> > +   rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
> >> > +   if (rc) {
> >> > +           rc = pci_set_consistent_dma_mask(pdev,
> >> DMA_BIT_MASK(32));
> >> > +           if (rc)
> >> > +                   goto err_dma_mask;
> >> > +           dev_warn(ndev_dev(ndev), "Cannot DMA consistent
> >> highmem\n");
> >> > +   }
> >> > +
> >> > +   ndev->self_mmio = pci_iomap(pdev, 0, 0);
> >> > +   if (!ndev->self_mmio) {
> >> > +           rc = -EIO;
> >> > +           goto err_mmio;
> >> > +   }
> >> > +   ndev->peer_mmio = ndev->self_mmio + AMD_PEER_OFFSET;
> >> > +
> >> > +   return 0;
> >> > +
> >> > +err_mmio:
> >>
> >> seems like an unnecessary goto destination here
> >
> > Will entry the goto destination if pci_iomap failed. The code is following
> > Intel NTB and I see many drivers in kernel will check the return value
> > of pci_iomap.
> 
> If you just used err_dma_mask in the err_mmio case, you have the same
> result and won't have an unnecessary goto destination label.

I see

> >> > +err_dma_mask:
> >> > +   pci_clear_master(pdev);
> >> > +err_pci_regions:
> >> > +   pci_disable_device(pdev);
> >> > +err_pci_enable:
> >> > +   pci_set_drvdata(pdev, NULL);
> >> > +   return rc;
> >> > +}
> >> > +
> >> > +static void amd_ntb_deinit_pci(struct amd_ntb_dev *ndev) {
> >> > +   struct pci_dev *pdev = ndev_pdev(ndev);
> >> > +
> >> > +   pci_iounmap(pdev, ndev->self_mmio);
> >> > +
> >> > +   pci_clear_master(pdev);
> >> > +   pci_release_regions(pdev);
> >> > +   pci_disable_device(pdev);
> >> > +   pci_set_drvdata(pdev, NULL);
> >> > +}
> >> > +
> >> > +
> >> > +static int amd_ntb_pci_probe(struct pci_dev *pdev,
> >> > +                        const struct pci_device_id *id) {
> >> > +   struct amd_ntb_dev *ndev;
> >> > +   int rc, node;
> >> > +
> >> > +   node = dev_to_node(&pdev->dev);
> >> > +
> >> > +   ndev = kzalloc_node(sizeof(*ndev), GFP_KERNEL, node);
> >> > +   if (!ndev) {
> >> > +           rc = -ENOMEM;
> >> > +           goto err_ndev;
> >> > +   }
> >> > +
> >> > +   ndev_init_struct(ndev, pdev);
> >> > +
> >> > +   rc = amd_ntb_init_pci(ndev, pdev);
> >> > +   if (rc)
> >> > +           goto err_init_pci;
> >> > +
> >> > +   rc = amd_init_dev(ndev);
> >> > +   if (rc)
> >> > +           goto err_init_dev;
> >> > +
> >> > +   /* write side info */
> >> > +   amd_init_side_info(ndev);
> >> > +
> >> > +   amd_poll_link(ndev);
> >> > +
> >> > +   ndev_init_debugfs(ndev);
> >> > +
> >> > +   rc = ntb_register_device(&ndev->ntb);
> >> > +   if (rc)
> >> > +           goto err_register;
> >> > +
> >> > +   dev_info(&pdev->dev, "NTB device registered.\n");
> >> > +
> >> > +   return 0;
> >> > +
> >> > +err_register:
> >> > +   ndev_deinit_debugfs(ndev);
> >> > +   amd_deinit_dev(ndev);
> >> > +err_init_dev:
> >> > +   amd_ntb_deinit_pci(ndev);
> >> > +err_init_pci:
> >> > +   kfree(ndev);
> >> > +err_ndev:
> >> > +   return rc;
> >> > +}
> >> > +
> >> > +static void amd_ntb_pci_remove(struct pci_dev *pdev) {
> >> > +   struct amd_ntb_dev *ndev = pci_get_drvdata(pdev);
> >> > +
> >> > +   ntb_unregister_device(&ndev->ntb);
> >> > +   ndev_deinit_debugfs(ndev);
> >> > +   amd_deinit_side_info(ndev);
> >> > +   amd_deinit_dev(ndev);
> >> > +   amd_ntb_deinit_pci(ndev);
> >> > +   kfree(ndev);
> >> > +}
> >> > +
> >> > +static const struct file_operations amd_ntb_debugfs_info = {
> >> > +   .owner = THIS_MODULE,
> >> > +   .open = simple_open,
> >> > +   .read = ndev_debugfs_read,
> >> > +};
> >> > +
> >> > +static const struct pci_device_id amd_ntb_pci_tbl[] = {
> >> > +   {PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_NTB)},
> >> > +   {0}
> >> > +};
> >> > +MODULE_DEVICE_TABLE(pci, amd_ntb_pci_tbl);
> >> > +
> >> > +static struct pci_driver amd_ntb_pci_driver = {
> >> > +   .name           = KBUILD_MODNAME,
> >> > +   .id_table       = amd_ntb_pci_tbl,
> >> > +   .probe          = amd_ntb_pci_probe,
> >> > +   .remove         = amd_ntb_pci_remove,
> >> > +};
> >> > +
> >> > +static int __init amd_ntb_pci_driver_init(void) {
> >> > +   pr_info("%s %s\n", NTB_DESC, NTB_VER);
> >> > +
> >> > +   if (debugfs_initialized())
> >> > +           debugfs_dir = debugfs_create_dir(KBUILD_MODNAME,
> >> NULL);
> >> > +
> >> > +   return pci_register_driver(&amd_ntb_pci_driver);
> >> > +}
> >> > +module_init(amd_ntb_pci_driver_init);
> >> > +
> >> > +static void __exit amd_ntb_pci_driver_exit(void) {
> >> > +   pci_unregister_driver(&amd_ntb_pci_driver);
> >> > +   debugfs_remove_recursive(debugfs_dir);
> >> > +}
> >> > +module_exit(amd_ntb_pci_driver_exit);
> >> > diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.h
> >> > b/drivers/ntb/hw/amd/ntb_hw_amd.h new file mode 100644 index
> >> > 0000000..31021c0
> >> > --- /dev/null
> >> > +++ b/drivers/ntb/hw/amd/ntb_hw_amd.h
> >> > @@ -0,0 +1,246 @@
> >> > +/*
> >> > + * This file is provided under a dual BSD/GPLv2 license.  When using or
> >> > + *   redistributing this file, you may do so under either license.
> >> > + *
> >> > + *   GPL LICENSE SUMMARY
> >> > + *
> >> > + *   Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
> >> > + *
> >> > + *   This program is free software; you can redistribute it and/or modify
> >> > + *   it under the terms of version 2 of the GNU General Public License
> as
> >> > + *   published by the Free Software Foundation.
> >> > + *
> >> > + *   BSD LICENSE
> >> > + *
> >> > + *   Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
> >> > + *
> >> > + *   Redistribution and use in source and binary forms, with or without
> >> > + *   modification, are permitted provided that the following conditions
> >> > + *   are met:
> >> > + *
> >> > + *     * Redistributions of source code must retain the above copyright
> >> > + *       notice, this list of conditions and the following disclaimer.
> >> > + *     * Redistributions in binary form must reproduce the above copy
> >> > + *       notice, this list of conditions and the following disclaimer in
> >> > + *       the documentation and/or other materials provided with the
> >> > + *       distribution.
> >> > + *     * Neither the name of AMD Corporation nor the names of its
> >> > + *       contributors may be used to endorse or promote products
> derived
> >> > + *       from this software without specific prior written permission.
> >> > + *
> >> > + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
> >> CONTRIBUTORS
> >> > + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
> BUT
> >> NOT
> >> > + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
> >> FITNESS FOR
> >> > + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
> >> COPYRIGHT
> >> > + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
> >> INCIDENTAL,
> >> > + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
> >> BUT NOT
> >> > + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
> >> LOSS OF USE,
> >> > + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
> CAUSED
> >> AND ON ANY
> >> > + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
> >> TORT
> >> > + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
> OUT
> >> OF THE USE
> >> > + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> >> DAMAGE.
> >> > + *
> >> > + * AMD PCIe NTB Linux driver
> >> > + *
> >> > + * Contact Information:
> >> > + * Xiangliang Yu<Xiangliang.Yu@amd.com>
> >>
> >> Same comment as above about the space needed here.
> >
> > Ok
> >
> >> > + */
> >> > +
> >> > +#ifndef NTB_HW_AMD_H
> >> > +#define NTB_HW_AMD_H
> >> > +
> >> > +#include <linux/ntb.h>
> >> > +#include <linux/pci.h>
> >> > +
> >> > +#define PCI_DEVICE_ID_AMD_NTB      0x145B
> >> > +#define AMD_LINK_HB_TIMEOUT        msecs_to_jiffies(1000)
> >>
> >> Tab align the second half of these
> >
> > I'll check it.
> >
> >>
> >> > +#define AMD_LINK_STATUS_OFFSET     0x68
> >> > +#define NTB_LIN_STA_ACTIVE_BIT     0x00000002
> >> > +#define NTB_LNK_STA_SPEED_MASK     0x000F0000
> >> > +#define NTB_LNK_STA_WIDTH_MASK     0x03F00000
> >> > +#define NTB_LNK_STA_ACTIVE(x)      (!!((x) &
> NTB_LIN_STA_ACTIVE_BIT))
> >> > +#define NTB_LNK_STA_SPEED(x)       (((x) &
> >> NTB_LNK_STA_SPEED_MASK) >> 16)
> >> > +#define NTB_LNK_STA_WIDTH(x)       (((x) &
> >> NTB_LNK_STA_WIDTH_MASK) >> 20)
> >> > +
> >> > +#ifndef ioread64
> >> > +#ifdef readq
> >> > +#define ioread64 readq
> >> > +#else
> >> > +#define ioread64 _ioread64
> >> > +static inline u64 _ioread64(void __iomem *mmio) {
> >> > +   u64 low, high;
> >> > +
> >> > +   low = ioread32(mmio);
> >> > +   high = ioread32(mmio + sizeof(u32));
> >> > +   return low | (high << 32);
> >> > +}
> >> > +#endif
> >> > +#endif
> >> > +
> >> > +#ifndef iowrite64
> >> > +#ifdef writeq
> >> > +#define iowrite64 writeq
> >> > +#else
> >> > +#define iowrite64 _iowrite64
> >> > +static inline void _iowrite64(u64 val, void __iomem *mmio) {
> >> > +   iowrite32(val, mmio);
> >> > +   iowrite32(val >> 32, mmio + sizeof(u32)); } #endif #endif
> >> > +
> >> > +enum {
> >> > +   /* AMD NTB Capability */
> >> > +   AMD_MW_CNT              = 3,
> >> > +   AMD_DB_CNT              = 16,
> >> > +   AMD_MSIX_VECTOR_CNT     = 24,
> >> > +   AMD_SPADS_CNT           = 16,
> >> > +
> >> > +   /*  AMD NTB register offset */
> >> > +   AMD_CNTL_OFFSET         = 0x200,
> >> > +
> >> > +   /* NTB control register bits */
> >> > +   PMM_REG_CTL             = BIT(21),
> >> > +   SMM_REG_CTL             = BIT(20),
> >> > +   SMM_REG_ACC_PATH        = BIT(18),
> >> > +   PMM_REG_ACC_PATH        = BIT(17),
> >> > +   NTB_CLK_EN              = BIT(16),
> >> > +
> >> > +   AMD_STA_OFFSET          = 0x204,
> >> > +   AMD_PGSLV_OFFSET        = 0x208,
> >> > +   AMD_SPAD_MUX_OFFSET     = 0x20C,
> >> > +   AMD_SPAD_OFFSET         = 0x210,
> >> > +   AMD_RSMU_HCID           = 0x250,
> >> > +   AMD_RSMU_SIID           = 0x254,
> >> > +   AMD_PSION_OFFSET        = 0x300,
> >> > +   AMD_SSION_OFFSET        = 0x330,
> >> > +   AMD_MMINDEX_OFFSET      = 0x400,
> >> > +   AMD_MMDATA_OFFSET       = 0x404,
> >> > +   AMD_SIDEINFO_OFFSET     = 0x408,
> >> > +
> >> > +   AMD_SIDE_MASK           = BIT(0),
> >> > +
> >> > +   /* limit register */
> >> > +   AMD_ROMBARLMT_OFFSET    = 0x410,
> >> > +   AMD_BAR1LMT_OFFSET      = 0x414,
> >> > +   AMD_BAR23LMT_OFFSET     = 0x418,
> >> > +   AMD_BAR45LMT_OFFSET     = 0x420,
> >> > +   /* xlat address */
> >> > +   AMD_POMBARXLAT_OFFSET   = 0x428,
> >> > +   AMD_BAR1XLAT_OFFSET     = 0x430,
> >> > +   AMD_BAR23XLAT_OFFSET    = 0x438,
> >> > +   AMD_BAR45XLAT_OFFSET    = 0x440,
> >> > +   /* doorbell and interrupt */
> >> > +   AMD_DBFM_OFFSET         = 0x450,
> >> > +   AMD_DBREQ_OFFSET        = 0x454,
> >> > +   AMD_MIRRDBSTAT_OFFSET   = 0x458,
> >> > +   AMD_DBMASK_OFFSET       = 0x45C,
> >> > +   AMD_DBSTAT_OFFSET       = 0x460,
> >> > +   AMD_INTMASK_OFFSET      = 0x470,
> >> > +   AMD_INTSTAT_OFFSET      = 0x474,
> >> > +
> >> > +   /* event type */
> >> > +   AMD_PEER_FLUSH_EVENT    = BIT(0),
> >> > +   AMD_PEER_RESET_EVENT    = BIT(1),
> >> > +   AMD_PEER_D3_EVENT       = BIT(2),
> >> > +   AMD_PEER_PMETO_EVENT    = BIT(3),
> >> > +   AMD_PEER_D0_EVENT       = BIT(4),
> >> > +   AMD_EVENT_INTMASK       = (AMD_PEER_FLUSH_EVENT |
> >> > +                           AMD_PEER_RESET_EVENT |
> >> AMD_PEER_D3_EVENT |
> >> > +                           AMD_PEER_PMETO_EVENT |
> >> AMD_PEER_D0_EVENT),
> >> > +
> >> > +   AMD_PMESTAT_OFFSET      = 0x480,
> >> > +   AMD_PMSGTRIG_OFFSET     = 0x490,
> >> > +   AMD_LTRLATENCY_OFFSET   = 0x494,
> >> > +   AMD_FLUSHTRIG_OFFSET    = 0x498,
> >> > +
> >> > +   /* SMU register*/
> >> > +   AMD_SMUACK_OFFSET       = 0x4A0,
> >> > +   AMD_SINRST_OFFSET       = 0x4A4,
> >> > +   AMD_RSPNUM_OFFSET       = 0x4A8,
> >> > +   AMD_SMU_SPADMUTEX       = 0x4B0,
> >> > +   AMD_SMU_SPADOFFSET      = 0x4B4,
> >> > +
> >> > +   AMD_PEER_OFFSET         = 0x400,
> >> > +};
> >>
> >> It seems extremely odd to mix and match different kinds of things in the
> >> same enumerated type.  I'd prefer to have these as #defines or
> separated
> >> enum types.
> >
> > The code following AHCI.h style, it put field definition of register close to
> register
> > Marco definition seems appropriate to me. Actually, I have seen lot of the
> style in kernel.
> 
> It's ugly, but as long as you support it then I'm okay with it :)
> 
> >> > +
> >> > +struct amd_ntb_dev;
> >> > +
> >> > +struct amd_ntb_vec {
> >> > +   struct amd_ntb_dev      *ndev;
> >> > +   int                     num;
> >> > +};
> >> > +
> >> > +struct amd_ntb_dev {
> >> > +   struct ntb_dev ntb;
> >> > +
> >> > +   u32 ntb_side;
> >> > +   u32 lnk_sta;
> >> > +   u32 cntl_sta;
> >> > +   u32 peer_sta;
> >> > +
> >> > +   unsigned char mw_count;
> >> > +   unsigned char spad_count;
> >> > +   unsigned char db_count;
> >> > +   unsigned char msix_vec_count;
> >> > +
> >> > +   u64 db_valid_mask;
> >> > +   u64 db_mask;
> >> > +   u32 int_mask;
> >> > +
> >> > +   struct msix_entry *msix;
> >> > +   struct amd_ntb_vec *vec;
> >> > +
> >> > +   spinlock_t db_mask_lock;
> >> > +
> >> > +   void __iomem *self_mmio;
> >> > +   void __iomem *peer_mmio;
> >> > +   unsigned int self_spad;
> >> > +   unsigned int peer_spad;
> >> > +
> >> > +   struct delayed_work hb_timer;
> >> > +
> >> > +   struct dentry *debugfs_dir;
> >> > +   struct dentry *debugfs_info;
> >> > +};
> >> > +
> >> > +#define ndev_pdev(ndev) ((ndev)->ntb.pdev) #define
> ndev_name(ndev)
> >> > +pci_name(ndev_pdev(ndev)) #define ndev_dev(ndev)
> >> > +(&ndev_pdev(ndev)->dev) #define ntb_ndev(__ntb)
> >> container_of(__ntb,
> >> > +struct amd_ntb_dev, ntb) #define hb_ndev(__work)
> >> container_of(__work,
> >> > +struct amd_ntb_dev, hb_timer.work)
> >> > +
> >>
> >> This should probably be moved into the device driver C file (as they are
> not
> >> called anywhere else).
> >
> > I just following intel ntb driver, I'll also change it if you change intel driver.
> >
> >>
> >> > +static int amd_ntb_mw_count(struct ntb_dev *ntb); static int
> >> > +amd_ntb_mw_get_range(struct ntb_dev *ntb, int idx,
> >> > +                   phys_addr_t *base, resource_size_t *size,
> >> > +                   resource_size_t *align, resource_size_t *algin_size);
> >> static int
> >> > +amd_ntb_mw_set_trans(struct ntb_dev *ndev, int idx,
> >> > +                           dma_addr_t addr, resource_size_t size);
> >> static int
> >> > +amd_ntb_link_is_up(struct ntb_dev *ntb,
> >> > +                           enum ntb_speed *speed,
> >> > +                           enum ntb_width *width);
> >> > +static int amd_ntb_link_enable(struct ntb_dev *ntb,
> >> > +                           enum ntb_speed speed,
> >> > +                           enum ntb_width width);
> >> > +static int amd_ntb_link_disable(struct ntb_dev *ntb); static u64
> >> > +amd_ntb_db_valid_mask(struct ntb_dev *ntb); static int
> >> > +amd_ntb_db_vector_count(struct ntb_dev *ntb); static u64
> >> > +amd_ntb_db_vector_mask(struct ntb_dev *ntb, int db_vector); static
> >> > +u64 amd_ntb_db_read(struct ntb_dev *ntb); static int
> >> > +amd_ntb_db_clear(struct ntb_dev *ntb, u64 db_bits); static int
> >> > +amd_ntb_db_set_mask(struct ntb_dev *ntb, u64 db_bits); static int
> >> > +amd_ntb_db_clear_mask(struct ntb_dev *ntb, u64 db_bits); static int
> >> > +amd_ntb_peer_db_addr(struct ntb_dev *ntb,
> >> > +                           phys_addr_t *db_addr,
> >> > +                           resource_size_t *db_size);
> >> > +static int amd_ntb_peer_db_set(struct ntb_dev *ntb, u64 db_bits);
> >> > +static int amd_ntb_spad_count(struct ntb_dev *ntb); static u32
> >> > +amd_ntb_spad_read(struct ntb_dev *ntb, int idx); static int
> >> > +amd_ntb_spad_write(struct ntb_dev *ntb, int idx, u32 val); static int
> >> > +amd_ntb_peer_spad_addr(struct ntb_dev *ntb, int idx,
> >> > +                                   phys_addr_t *spad_addr);
> >> > +static u32 amd_ntb_peer_spad_read(struct ntb_dev *ntb, int idx);
> >> > +static int amd_ntb_peer_spad_write(struct ntb_dev *ntb, int idx, u32
> >> > +val);
> >>
> >> I believe all of these function declarations should be removed.
> > Ok.
> >
> > And please see my comments, I'll submit v4 patch immediately when
> getting your feedback.
> 
> Thanks again.

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2016-01-18 15:26 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-01-14 11:44 [PATCH V3 1/2] NTB: Add AMD PCI-Express NTB driver Xiangliang Yu
2016-01-14 15:56 ` Allen Hubbe
2016-01-15  7:37   ` Yu, Xiangliang
2016-01-15 17:13     ` Allen Hubbe
2016-01-18  6:47       ` Yu, Xiangliang
2016-01-18  4:04 ` Jon Mason
2016-01-18  9:42   ` Yu, Xiangliang
2016-01-18 15:07     ` Jon Mason
2016-01-18 15:26       ` Yu, Xiangliang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox