* [PATCH v7 01/13] PCI/P2PDMA: Support peer-to-peer memory
[not found] ` <20180925162231.4354-1-logang-OTvnGxWRz7hWk0Htik3J/w@public.gmane.org>
@ 2018-09-25 16:22 ` Logan Gunthorpe
[not found] ` <20180925162231.4354-2-logang-OTvnGxWRz7hWk0Htik3J/w@public.gmane.org>
2018-09-25 16:22 ` [PATCH v7 02/13] PCI/P2PDMA: Add sysfs group to display p2pmem stats Logan Gunthorpe
` (9 subsequent siblings)
10 siblings, 1 reply; 25+ messages in thread
From: Logan Gunthorpe @ 2018-09-25 16:22 UTC (permalink / raw)
To: linux-kernel-u79uwXL29TY76Z2rM5mHXA,
linux-pci-u79uwXL29TY76Z2rM5mHXA,
linux-nvme-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
linux-rdma-u79uwXL29TY76Z2rM5mHXA,
linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw,
linux-block-u79uwXL29TY76Z2rM5mHXA
Cc: Jens Axboe, Christian König, Benjamin Herrenschmidt,
Alex Williamson, Jérôme Glisse, Jason Gunthorpe,
Bjorn Helgaas, Max Gurtovoy, Christoph Hellwig
Some PCI devices may have memory mapped in a BAR space that's
intended for use in peer-to-peer transactions. In order to enable
such transactions the memory must be registered with ZONE_DEVICE pages
so it can be used by DMA interfaces in existing drivers.
Add an interface for other subsystems to find and allocate chunks of P2P
memory as necessary to facilitate transfers between two PCI peers:
int pci_p2pdma_add_client();
struct pci_dev *pci_p2pmem_find();
void *pci_alloc_p2pmem();
The new interface requires a driver to collect a list of client devices
involved in the transaction with the pci_p2pmem_add_client*() functions
then call pci_p2pmem_find() to obtain any suitable P2P memory. Once
this is done the list is bound to the memory and the calling driver is
free to add and remove clients as necessary (adding incompatible clients
will fail). With a suitable p2pmem device, memory can then be
allocated with pci_alloc_p2pmem() for use in DMA transactions.
Depending on hardware, using peer-to-peer memory may reduce the bandwidth
of the transfer but can significantly reduce pressure on system memory.
This may be desirable in many cases: for example a system could be designed
with a small CPU connected to a PCIe switch by a small number of lanes
which would maximize the number of lanes available to connect to NVMe
devices.
The code is designed to only utilize the p2pmem device if all the devices
involved in a transfer are behind the same PCI bridge. This is because we
have no way of knowing whether peer-to-peer routing between PCIe Root Ports
is supported (PCIe r4.0, sec 1.3.1). Additionally, the benefits of P2P
transfers that go through the RC is limited to only reducing DRAM usage
and, in some cases, coding convenience. The PCI-SIG may be exploring
adding a new capability bit to advertise whether this is possible for
future hardware.
This commit includes significant rework and feedback from Christoph
Hellwig.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com> # PCI pieces
---
drivers/pci/Kconfig | 17 +
drivers/pci/Makefile | 1 +
drivers/pci/p2pdma.c | 764 +++++++++++++++++++++++++++++++++++++
include/linux/memremap.h | 5 +
include/linux/mm.h | 18 +
include/linux/pci-p2pdma.h | 101 +++++
include/linux/pci.h | 4 +
7 files changed, 910 insertions(+)
create mode 100644 drivers/pci/p2pdma.c
create mode 100644 include/linux/pci-p2pdma.h
diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index 56ff8f6d31fc..deb68be4fdac 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -132,6 +132,23 @@ config PCI_PASID
If unsure, say N.
+config PCI_P2PDMA
+ bool "PCI peer-to-peer transfer support"
+ depends on PCI && ZONE_DEVICE
+ select GENERIC_ALLOCATOR
+ help
+ Enableѕ drivers to do PCI peer-to-peer transactions to and from
+ BARs that are exposed in other devices that are the part of
+ the hierarchy where peer-to-peer DMA is guaranteed by the PCI
+ specification to work (ie. anything below a single PCI bridge).
+
+ Many PCIe root complexes do not support P2P transactions and
+ it's hard to tell which support it at all, so at this time,
+ P2P DMA transations must be between devices behind the same root
+ port.
+
+ If unsure, say N.
+
config PCI_LABEL
def_bool y if (DMI || ACPI)
depends on PCI
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index 1b2cfe51e8d7..85f4a703b2be 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -26,6 +26,7 @@ obj-$(CONFIG_PCI_SYSCALL) += syscall.o
obj-$(CONFIG_PCI_STUB) += pci-stub.o
obj-$(CONFIG_PCI_PF_STUB) += pci-pf-stub.o
obj-$(CONFIG_PCI_ECAM) += ecam.o
+obj-$(CONFIG_PCI_P2PDMA) += p2pdma.o
obj-$(CONFIG_XEN_PCIDEV_FRONTEND) += xen-pcifront.o
# Endpoint library must be initialized before its users
diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
new file mode 100644
index 000000000000..d9616522c1fc
--- /dev/null
+++ b/drivers/pci/p2pdma.c
@@ -0,0 +1,764 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCI Peer 2 Peer DMA support.
+ *
+ * Copyright (c) 2016-2018, Logan Gunthorpe
+ * Copyright (c) 2016-2017, Microsemi Corporation
+ * Copyright (c) 2017, Christoph Hellwig
+ * Copyright (c) 2018, Eideticom Inc.
+ */
+
+#include <linux/pci-p2pdma.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/genalloc.h>
+#include <linux/memremap.h>
+#include <linux/percpu-refcount.h>
+#include <linux/random.h>
+#include <linux/seq_buf.h>
+
+struct pci_p2pdma {
+ struct percpu_ref devmap_ref;
+ struct completion devmap_ref_done;
+ struct gen_pool *pool;
+ bool p2pmem_published;
+};
+
+static void pci_p2pdma_percpu_release(struct percpu_ref *ref)
+{
+ struct pci_p2pdma *p2p =
+ container_of(ref, struct pci_p2pdma, devmap_ref);
+
+ complete_all(&p2p->devmap_ref_done);
+}
+
+static void pci_p2pdma_percpu_kill(void *data)
+{
+ struct percpu_ref *ref = data;
+
+ if (percpu_ref_is_dying(ref))
+ return;
+
+ percpu_ref_kill(ref);
+}
+
+static void pci_p2pdma_release(void *data)
+{
+ struct pci_dev *pdev = data;
+
+ if (!pdev->p2pdma)
+ return;
+
+ wait_for_completion(&pdev->p2pdma->devmap_ref_done);
+ percpu_ref_exit(&pdev->p2pdma->devmap_ref);
+
+ gen_pool_destroy(pdev->p2pdma->pool);
+ pdev->p2pdma = NULL;
+}
+
+static int pci_p2pdma_setup(struct pci_dev *pdev)
+{
+ int error = -ENOMEM;
+ struct pci_p2pdma *p2p;
+
+ p2p = devm_kzalloc(&pdev->dev, sizeof(*p2p), GFP_KERNEL);
+ if (!p2p)
+ return -ENOMEM;
+
+ p2p->pool = gen_pool_create(PAGE_SHIFT, dev_to_node(&pdev->dev));
+ if (!p2p->pool)
+ goto out;
+
+ init_completion(&p2p->devmap_ref_done);
+ error = percpu_ref_init(&p2p->devmap_ref,
+ pci_p2pdma_percpu_release, 0, GFP_KERNEL);
+ if (error)
+ goto out_pool_destroy;
+
+ percpu_ref_switch_to_atomic_sync(&p2p->devmap_ref);
+
+ error = devm_add_action_or_reset(&pdev->dev, pci_p2pdma_release, pdev);
+ if (error)
+ goto out_pool_destroy;
+
+ pdev->p2pdma = p2p;
+
+ return 0;
+
+out_pool_destroy:
+ gen_pool_destroy(p2p->pool);
+out:
+ devm_kfree(&pdev->dev, p2p);
+ return error;
+}
+
+/**
+ * pci_p2pdma_add_resource - add memory for use as p2p memory
+ * @pdev: the device to add the memory to
+ * @bar: PCI BAR to add
+ * @size: size of the memory to add, may be zero to use the whole BAR
+ * @offset: offset into the PCI BAR
+ *
+ * The memory will be given ZONE_DEVICE struct pages so that it may
+ * be used with any DMA request.
+ */
+int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
+ u64 offset)
+{
+ struct dev_pagemap *pgmap;
+ void *addr;
+ int error;
+
+ if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM))
+ return -EINVAL;
+
+ if (offset >= pci_resource_len(pdev, bar))
+ return -EINVAL;
+
+ if (!size)
+ size = pci_resource_len(pdev, bar) - offset;
+
+ if (size + offset > pci_resource_len(pdev, bar))
+ return -EINVAL;
+
+ if (!pdev->p2pdma) {
+ error = pci_p2pdma_setup(pdev);
+ if (error)
+ return error;
+ }
+
+ pgmap = devm_kzalloc(&pdev->dev, sizeof(*pgmap), GFP_KERNEL);
+ if (!pgmap)
+ return -ENOMEM;
+
+ pgmap->res.start = pci_resource_start(pdev, bar) + offset;
+ pgmap->res.end = pgmap->res.start + size - 1;
+ pgmap->res.flags = pci_resource_flags(pdev, bar);
+ pgmap->ref = &pdev->p2pdma->devmap_ref;
+ pgmap->type = MEMORY_DEVICE_PCI_P2PDMA;
+
+ addr = devm_memremap_pages(&pdev->dev, pgmap);
+ if (IS_ERR(addr)) {
+ error = PTR_ERR(addr);
+ goto pgmap_free;
+ }
+
+ error = gen_pool_add_virt(pdev->p2pdma->pool, (unsigned long)addr,
+ pci_bus_address(pdev, bar) + offset,
+ resource_size(&pgmap->res), dev_to_node(&pdev->dev));
+ if (error)
+ goto pgmap_free;
+
+ error = devm_add_action_or_reset(&pdev->dev, pci_p2pdma_percpu_kill,
+ &pdev->p2pdma->devmap_ref);
+ if (error)
+ goto pgmap_free;
+
+ pci_info(pdev, "added peer-to-peer DMA memory %pR\n",
+ &pgmap->res);
+
+ return 0;
+
+pgmap_free:
+ devres_free(pgmap);
+ return error;
+}
+EXPORT_SYMBOL_GPL(pci_p2pdma_add_resource);
+
+static struct pci_dev *find_parent_pci_dev(struct device *dev)
+{
+ struct device *parent;
+
+ dev = get_device(dev);
+
+ while (dev) {
+ if (dev_is_pci(dev))
+ return to_pci_dev(dev);
+
+ parent = get_device(dev->parent);
+ put_device(dev);
+ dev = parent;
+ }
+
+ return NULL;
+}
+
+/*
+ * Check if a PCI bridge has its ACS redirection bits set to redirect P2P
+ * TLPs upstream via ACS. Returns 1 if the packets will be redirected
+ * upstream, 0 otherwise.
+ */
+static int pci_bridge_has_acs_redir(struct pci_dev *pdev)
+{
+ int pos;
+ u16 ctrl;
+
+ pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ACS);
+ if (!pos)
+ return 0;
+
+ pci_read_config_word(pdev, pos + PCI_ACS_CTRL, &ctrl);
+
+ if (ctrl & (PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_EC))
+ return 1;
+
+ return 0;
+}
+
+static void seq_buf_print_bus_devfn(struct seq_buf *buf, struct pci_dev *pdev)
+{
+ if (!buf)
+ return;
+
+ seq_buf_printf(buf, "%s;", pci_name(pdev));
+}
+
+/*
+ * Find the distance through the nearest common upstream bridge between
+ * two PCI devices.
+ *
+ * If the two devices are the same device then 0 will be returned.
+ *
+ * If there are two virtual functions of the same device behind the same
+ * bridge port then 2 will be returned (one step down to the PCIe switch,
+ * then one step back to the same device).
+ *
+ * In the case where two devices are connected to the same PCIe switch, the
+ * value 4 will be returned. This corresponds to the following PCI tree:
+ *
+ * -+ Root Port
+ * \+ Switch Upstream Port
+ * +-+ Switch Downstream Port
+ * + \- Device A
+ * \-+ Switch Downstream Port
+ * \- Device B
+ *
+ * The distance is 4 because we traverse from Device A through the downstream
+ * port of the switch, to the common upstream port, back up to the second
+ * downstream port and then to Device B.
+ *
+ * Any two devices that don't have a common upstream bridge will return -1.
+ * In this way devices on separate PCIe root ports will be rejected, which
+ * is what we want for peer-to-peer seeing each PCIe root port defines a
+ * separate hierarchy domain and there's no way to determine whether the root
+ * complex supports forwarding between them.
+ *
+ * In the case where two devices are connected to different PCIe switches,
+ * this function will still return a positive distance as long as both
+ * switches eventually have a common upstream bridge. Note this covers
+ * the case of using multiple PCIe switches to achieve a desired level of
+ * fan-out from a root port. The exact distance will be a function of the
+ * number of switches between Device A and Device B.
+ *
+ * If a bridge which has any ACS redirection bits set is in the path
+ * then this functions will return -2. This is so we reject any
+ * cases where the TLPs are forwarded up into the root complex.
+ * In this case, a list of all infringing bridge addresses will be
+ * populated in acs_list (assuming it's non-null) for printk purposes.
+ */
+static int upstream_bridge_distance(struct pci_dev *a,
+ struct pci_dev *b,
+ struct seq_buf *acs_list)
+{
+ int dist_a = 0;
+ int dist_b = 0;
+ struct pci_dev *bb = NULL;
+ int acs_cnt = 0;
+
+ /*
+ * Note, we don't need to take references to devices returned by
+ * pci_upstream_bridge() seeing we hold a reference to a child
+ * device which will already hold a reference to the upstream bridge.
+ */
+
+ while (a) {
+ dist_b = 0;
+
+ if (pci_bridge_has_acs_redir(a)) {
+ seq_buf_print_bus_devfn(acs_list, a);
+ acs_cnt++;
+ }
+
+ bb = b;
+
+ while (bb) {
+ if (a == bb)
+ goto check_b_path_acs;
+
+ bb = pci_upstream_bridge(bb);
+ dist_b++;
+ }
+
+ a = pci_upstream_bridge(a);
+ dist_a++;
+ }
+
+ return -1;
+
+check_b_path_acs:
+ bb = b;
+
+ while (bb) {
+ if (a == bb)
+ break;
+
+ if (pci_bridge_has_acs_redir(bb)) {
+ seq_buf_print_bus_devfn(acs_list, bb);
+ acs_cnt++;
+ }
+
+ bb = pci_upstream_bridge(bb);
+ }
+
+ if (acs_cnt)
+ return -2;
+
+ return dist_a + dist_b;
+}
+
+static int upstream_bridge_distance_warn(struct pci_dev *provider,
+ struct pci_dev *client)
+{
+ struct seq_buf acs_list;
+ int ret;
+
+ seq_buf_init(&acs_list, kmalloc(PAGE_SIZE, GFP_KERNEL), PAGE_SIZE);
+ if (!acs_list.buffer)
+ return -ENOMEM;
+
+ ret = upstream_bridge_distance(provider, client, &acs_list);
+ if (ret == -2) {
+ pci_warn(client, "cannot be used for peer-to-peer DMA as ACS redirect is set between the client and provider (%s)\n",
+ pci_name(provider));
+ /* Drop final semicolon */
+ acs_list.buffer[acs_list.len-1] = 0;
+ pci_warn(client, "to disable ACS redirect for this path, add the kernel parameter: pci=disable_acs_redir=%s\n",
+ acs_list.buffer);
+
+ } else if (ret < 0) {
+ pci_warn(client, "cannot be used for peer-to-peer DMA as the client and provider (%s) do not share an upstream bridge\n",
+ pci_name(provider));
+ }
+
+ kfree(acs_list.buffer);
+
+ return ret;
+}
+
+struct pci_p2pdma_client {
+ struct list_head list;
+ struct pci_dev *client;
+ struct pci_dev *provider;
+};
+
+/**
+ * pci_p2pdma_add_client - allocate a new element in a client device list
+ * @head: list head of p2pdma clients
+ * @dev: device to add to the list
+ *
+ * This adds @dev to a list of clients used by a p2pdma device.
+ * This list should be passed to pci_p2pmem_find(). Once pci_p2pmem_find() has
+ * been called successfully, the list will be bound to a specific p2pdma
+ * device and new clients can only be added to the list if they are
+ * supported by that p2pdma device.
+ *
+ * The caller is expected to have a lock which protects @head as necessary
+ * so that none of the pci_p2p functions can be called concurrently
+ * on that list.
+ *
+ * Returns 0 if the client was successfully added.
+ */
+int pci_p2pdma_add_client(struct list_head *head, struct device *dev)
+{
+ struct pci_p2pdma_client *item, *new_item;
+ struct pci_dev *provider = NULL;
+ struct pci_dev *client;
+ int ret;
+
+ if (IS_ENABLED(CONFIG_DMA_VIRT_OPS) && dev->dma_ops == &dma_virt_ops) {
+ dev_warn(dev, "cannot be used for peer-to-peer DMA because the driver makes use of dma_virt_ops\n");
+ return -ENODEV;
+ }
+
+ client = find_parent_pci_dev(dev);
+ if (!client) {
+ dev_warn(dev, "cannot be used for peer-to-peer DMA as it is not a PCI device\n");
+ return -ENODEV;
+ }
+
+ item = list_first_entry_or_null(head, struct pci_p2pdma_client, list);
+ if (item && item->provider) {
+ provider = item->provider;
+
+ ret = upstream_bridge_distance_warn(provider, client);
+ if (ret < 0) {
+ ret = -EXDEV;
+ goto put_client;
+ }
+ }
+
+ new_item = kzalloc(sizeof(*new_item), GFP_KERNEL);
+ if (!new_item) {
+ ret = -ENOMEM;
+ goto put_client;
+ }
+
+ new_item->client = client;
+ new_item->provider = pci_dev_get(provider);
+
+ list_add_tail(&new_item->list, head);
+
+ return 0;
+
+put_client:
+ pci_dev_put(client);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(pci_p2pdma_add_client);
+
+static void pci_p2pdma_client_free(struct pci_p2pdma_client *item)
+{
+ list_del(&item->list);
+ pci_dev_put(item->client);
+ pci_dev_put(item->provider);
+ kfree(item);
+}
+
+/**
+ * pci_p2pdma_remove_client - remove and free a p2pdma client
+ * @head: list head of p2pdma clients
+ * @dev: device to remove from the list
+ *
+ * This removes @dev from a list of clients used by a p2pdma device.
+ * The caller is expected to have a lock which protects @head as necessary
+ * so that none of the pci_p2p functions can be called concurrently
+ * on that list.
+ */
+void pci_p2pdma_remove_client(struct list_head *head, struct device *dev)
+{
+ struct pci_p2pdma_client *pos, *tmp;
+ struct pci_dev *pdev;
+
+ pdev = find_parent_pci_dev(dev);
+ if (!pdev)
+ return;
+
+ list_for_each_entry_safe(pos, tmp, head, list) {
+ if (pos->client != pdev)
+ continue;
+
+ pci_p2pdma_client_free(pos);
+ }
+
+ pci_dev_put(pdev);
+}
+EXPORT_SYMBOL_GPL(pci_p2pdma_remove_client);
+
+/**
+ * pci_p2pdma_client_list_free - free an entire list of p2pdma clients
+ * @head: list head of p2pdma clients
+ *
+ * This removes all devices in a list of clients used by a p2pdma device.
+ * The caller is expected to have a lock which protects @head as necessary
+ * so that none of the pci_p2pdma functions can be called concurrently
+ * on that list.
+ */
+void pci_p2pdma_client_list_free(struct list_head *head)
+{
+ struct pci_p2pdma_client *pos, *tmp;
+
+ list_for_each_entry_safe(pos, tmp, head, list)
+ pci_p2pdma_client_free(pos);
+}
+EXPORT_SYMBOL_GPL(pci_p2pdma_client_list_free);
+
+/**
+ * pci_p2pdma_distance - Determive the cumulative distance between
+ * a p2pdma provider and the clients in use.
+ * @provider: p2pdma provider to check against the client list
+ * @clients: list of devices to check (NULL-terminated)
+ * @verbose: if true, print warnings for devices when we return -1
+ *
+ * Returns -1 if any of the clients are not compatible (behind the same
+ * root port as the provider), otherwise returns a positive number where
+ * the lower number is the preferrable choice. (If there's one client
+ * that's the same as the provider it will return 0, which is best choice).
+ *
+ * For now, "compatible" means the provider and the clients are all behind
+ * the same PCI root port. This cuts out cases that may work but is safest
+ * for the user. Future work can expand this to white-list root complexes that
+ * can safely forward between each ports.
+ */
+int pci_p2pdma_distance(struct pci_dev *provider, struct list_head *clients,
+ bool verbose)
+{
+ struct pci_p2pdma_client *pos;
+ int ret;
+ int distance = 0;
+ bool not_supported = false;
+
+ if (list_empty(clients))
+ return -1;
+
+ list_for_each_entry(pos, clients, list) {
+ if (verbose)
+ ret = upstream_bridge_distance_warn(provider,
+ pos->client);
+ else
+ ret = upstream_bridge_distance(provider, pos->client,
+ NULL);
+
+ if (ret < 0)
+ not_supported = true;
+
+ if (not_supported && !verbose)
+ break;
+
+ distance += ret;
+ }
+
+ if (not_supported)
+ return -1;
+
+ return distance;
+}
+EXPORT_SYMBOL_GPL(pci_p2pdma_distance);
+
+/**
+ * pci_p2pdma_assign_provider - Check compatibility (as per pci_p2pdma_distance)
+ * and assign a provider to a list of clients
+ * @provider: p2pdma provider to assign to the client list
+ * @clients: list of devices to check (NULL-terminated)
+ *
+ * Returns false if any of the clients are not compatible, true if the
+ * provider was successfully assigned to the clients.
+ */
+bool pci_p2pdma_assign_provider(struct pci_dev *provider,
+ struct list_head *clients)
+{
+ struct pci_p2pdma_client *pos;
+
+ if (pci_p2pdma_distance(provider, clients, true) < 0)
+ return false;
+
+ list_for_each_entry(pos, clients, list)
+ pos->provider = provider;
+
+ return true;
+}
+EXPORT_SYMBOL_GPL(pci_p2pdma_assign_provider);
+
+/**
+ * pci_has_p2pmem - check if a given PCI device has published any p2pmem
+ * @pdev: PCI device to check
+ */
+bool pci_has_p2pmem(struct pci_dev *pdev)
+{
+ return pdev->p2pdma && pdev->p2pdma->p2pmem_published;
+}
+EXPORT_SYMBOL_GPL(pci_has_p2pmem);
+
+/**
+ * pci_p2pmem_find - find a peer-to-peer DMA memory device compatible with
+ * the specified list of clients and shortest distance (as determined
+ * by pci_p2pmem_dma())
+ * @clients: list of devices to check (NULL-terminated)
+ *
+ * If multiple devices are behind the same switch, the one "closest" to the
+ * client devices in use will be chosen first. (So if one of the providers are
+ * the same as one of the clients, that provider will be used ahead of any
+ * other providers that are unrelated). If multiple providers are an equal
+ * distance away, one will be chosen at random.
+ *
+ * Returns a pointer to the PCI device with a reference taken (use pci_dev_put
+ * to return the reference) or NULL if no compatible device is found. The
+ * found provider will also be assigned to the client list.
+ */
+struct pci_dev *pci_p2pmem_find(struct list_head *clients)
+{
+ struct pci_dev *pdev = NULL;
+ struct pci_p2pdma_client *pos;
+ int distance;
+ int closest_distance = INT_MAX;
+ struct pci_dev **closest_pdevs;
+ int dev_cnt = 0;
+ const int max_devs = PAGE_SIZE / sizeof(*closest_pdevs);
+ int i;
+
+ closest_pdevs = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!closest_pdevs)
+ return NULL;
+
+ while ((pdev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, pdev))) {
+ if (!pci_has_p2pmem(pdev))
+ continue;
+
+ distance = pci_p2pdma_distance(pdev, clients, false);
+ if (distance < 0 || distance > closest_distance)
+ continue;
+
+ if (distance == closest_distance && dev_cnt >= max_devs)
+ continue;
+
+ if (distance < closest_distance) {
+ for (i = 0; i < dev_cnt; i++)
+ pci_dev_put(closest_pdevs[i]);
+
+ dev_cnt = 0;
+ closest_distance = distance;
+ }
+
+ closest_pdevs[dev_cnt++] = pci_dev_get(pdev);
+ }
+
+ if (dev_cnt)
+ pdev = pci_dev_get(closest_pdevs[prandom_u32_max(dev_cnt)]);
+
+ for (i = 0; i < dev_cnt; i++)
+ pci_dev_put(closest_pdevs[i]);
+
+ if (pdev)
+ list_for_each_entry(pos, clients, list)
+ pos->provider = pdev;
+
+ kfree(closest_pdevs);
+ return pdev;
+}
+EXPORT_SYMBOL_GPL(pci_p2pmem_find);
+
+/**
+ * pci_alloc_p2p_mem - allocate peer-to-peer DMA memory
+ * @pdev: the device to allocate memory from
+ * @size: number of bytes to allocate
+ *
+ * Returns the allocated memory or NULL on error.
+ */
+void *pci_alloc_p2pmem(struct pci_dev *pdev, size_t size)
+{
+ void *ret;
+
+ if (unlikely(!pdev->p2pdma))
+ return NULL;
+
+ if (unlikely(!percpu_ref_tryget_live(&pdev->p2pdma->devmap_ref)))
+ return NULL;
+
+ ret = (void *)gen_pool_alloc(pdev->p2pdma->pool, size);
+
+ if (unlikely(!ret))
+ percpu_ref_put(&pdev->p2pdma->devmap_ref);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(pci_alloc_p2pmem);
+
+/**
+ * pci_free_p2pmem - allocate peer-to-peer DMA memory
+ * @pdev: the device the memory was allocated from
+ * @addr: address of the memory that was allocated
+ * @size: number of bytes that was allocated
+ */
+void pci_free_p2pmem(struct pci_dev *pdev, void *addr, size_t size)
+{
+ gen_pool_free(pdev->p2pdma->pool, (uintptr_t)addr, size);
+ percpu_ref_put(&pdev->p2pdma->devmap_ref);
+}
+EXPORT_SYMBOL_GPL(pci_free_p2pmem);
+
+/**
+ * pci_virt_to_bus - return the PCI bus address for a given virtual
+ * address obtained with pci_alloc_p2pmem()
+ * @pdev: the device the memory was allocated from
+ * @addr: address of the memory that was allocated
+ */
+pci_bus_addr_t pci_p2pmem_virt_to_bus(struct pci_dev *pdev, void *addr)
+{
+ if (!addr)
+ return 0;
+ if (!pdev->p2pdma)
+ return 0;
+
+ /*
+ * Note: when we added the memory to the pool we used the PCI
+ * bus address as the physical address. So gen_pool_virt_to_phys()
+ * actually returns the bus address despite the misleading name.
+ */
+ return gen_pool_virt_to_phys(pdev->p2pdma->pool, (unsigned long)addr);
+}
+EXPORT_SYMBOL_GPL(pci_p2pmem_virt_to_bus);
+
+/**
+ * pci_p2pmem_alloc_sgl - allocate peer-to-peer DMA memory in a scatterlist
+ * @pdev: the device to allocate memory from
+ * @sgl: the allocated scatterlist
+ * @nents: the number of SG entries in the list
+ * @length: number of bytes to allocate
+ *
+ * Returns 0 on success
+ */
+struct scatterlist *pci_p2pmem_alloc_sgl(struct pci_dev *pdev,
+ unsigned int *nents, u32 length)
+{
+ struct scatterlist *sg;
+ void *addr;
+
+ sg = kzalloc(sizeof(*sg), GFP_KERNEL);
+ if (!sg)
+ return NULL;
+
+ sg_init_table(sg, 1);
+
+ addr = pci_alloc_p2pmem(pdev, length);
+ if (!addr)
+ goto out_free_sg;
+
+ sg_set_buf(sg, addr, length);
+ *nents = 1;
+ return sg;
+
+out_free_sg:
+ kfree(sg);
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(pci_p2pmem_alloc_sgl);
+
+/**
+ * pci_p2pmem_free_sgl - free a scatterlist allocated by pci_p2pmem_alloc_sgl()
+ * @pdev: the device to allocate memory from
+ * @sgl: the allocated scatterlist
+ * @nents: the number of SG entries in the list
+ */
+void pci_p2pmem_free_sgl(struct pci_dev *pdev, struct scatterlist *sgl)
+{
+ struct scatterlist *sg;
+ int count;
+
+ for_each_sg(sgl, sg, INT_MAX, count) {
+ if (!sg)
+ break;
+
+ pci_free_p2pmem(pdev, sg_virt(sg), sg->length);
+ }
+ kfree(sgl);
+}
+EXPORT_SYMBOL_GPL(pci_p2pmem_free_sgl);
+
+/**
+ * pci_p2pmem_publish - publish the peer-to-peer DMA memory for use by
+ * other devices with pci_p2pmem_find()
+ * @pdev: the device with peer-to-peer DMA memory to publish
+ * @publish: set to true to publish the memory, false to unpublish it
+ *
+ * Published memory can be used by other PCI device drivers for
+ * peer-2-peer DMA operations. Non-published memory is reserved for
+ * exlusive use of the device driver that registers the peer-to-peer
+ * memory.
+ */
+void pci_p2pmem_publish(struct pci_dev *pdev, bool publish)
+{
+ if (publish && !pdev->p2pdma)
+ return;
+
+ pdev->p2pdma->p2pmem_published = publish;
+}
+EXPORT_SYMBOL_GPL(pci_p2pmem_publish);
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index f91f9e763557..9553370ebdad 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -53,11 +53,16 @@ struct vmem_altmap {
* wakeup event whenever a page is unpinned and becomes idle. This
* wakeup is used to coordinate physical address space management (ex:
* fs truncate/hole punch) vs pinned pages (ex: device dma).
+ *
+ * MEMORY_DEVICE_PCI_P2PDMA:
+ * Device memory residing in a PCI BAR intended for use with Peer-to-Peer
+ * transactions.
*/
enum memory_type {
MEMORY_DEVICE_PRIVATE = 1,
MEMORY_DEVICE_PUBLIC,
MEMORY_DEVICE_FS_DAX,
+ MEMORY_DEVICE_PCI_P2PDMA,
};
/*
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a61ebe8ad4ca..2055df412a77 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -890,6 +890,19 @@ static inline bool is_device_public_page(const struct page *page)
page->pgmap->type == MEMORY_DEVICE_PUBLIC;
}
+#ifdef CONFIG_PCI_P2PDMA
+static inline bool is_pci_p2pdma_page(const struct page *page)
+{
+ return is_zone_device_page(page) &&
+ page->pgmap->type == MEMORY_DEVICE_PCI_P2PDMA;
+}
+#else /* CONFIG_PCI_P2PDMA */
+static inline bool is_pci_p2pdma_page(const struct page *page)
+{
+ return false;
+}
+#endif /* CONFIG_PCI_P2PDMA */
+
#else /* CONFIG_DEV_PAGEMAP_OPS */
static inline void dev_pagemap_get_ops(void)
{
@@ -913,6 +926,11 @@ static inline bool is_device_public_page(const struct page *page)
{
return false;
}
+
+static inline bool is_pci_p2pdma_page(const struct page *page)
+{
+ return false;
+}
#endif /* CONFIG_DEV_PAGEMAP_OPS */
static inline void get_page(struct page *page)
diff --git a/include/linux/pci-p2pdma.h b/include/linux/pci-p2pdma.h
new file mode 100644
index 000000000000..127e95f50491
--- /dev/null
+++ b/include/linux/pci-p2pdma.h
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * PCI Peer 2 Peer DMA support.
+ *
+ * Copyright (c) 2016-2018, Logan Gunthorpe
+ * Copyright (c) 2016-2017, Microsemi Corporation
+ * Copyright (c) 2017, Christoph Hellwig
+ * Copyright (c) 2018, Eideticom Inc.
+ */
+
+#ifndef _LINUX_PCI_P2PDMA_H
+#define _LINUX_PCI_P2PDMA_H
+
+#include <linux/pci.h>
+
+struct block_device;
+struct scatterlist;
+
+#ifdef CONFIG_PCI_P2PDMA
+int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
+ u64 offset);
+int pci_p2pdma_add_client(struct list_head *head, struct device *dev);
+void pci_p2pdma_remove_client(struct list_head *head, struct device *dev);
+void pci_p2pdma_client_list_free(struct list_head *head);
+int pci_p2pdma_distance(struct pci_dev *provider, struct list_head *clients,
+ bool verbose);
+bool pci_p2pdma_assign_provider(struct pci_dev *provider,
+ struct list_head *clients);
+bool pci_has_p2pmem(struct pci_dev *pdev);
+struct pci_dev *pci_p2pmem_find(struct list_head *clients);
+void *pci_alloc_p2pmem(struct pci_dev *pdev, size_t size);
+void pci_free_p2pmem(struct pci_dev *pdev, void *addr, size_t size);
+pci_bus_addr_t pci_p2pmem_virt_to_bus(struct pci_dev *pdev, void *addr);
+struct scatterlist *pci_p2pmem_alloc_sgl(struct pci_dev *pdev,
+ unsigned int *nents, u32 length);
+void pci_p2pmem_free_sgl(struct pci_dev *pdev, struct scatterlist *sgl);
+void pci_p2pmem_publish(struct pci_dev *pdev, bool publish);
+#else /* CONFIG_PCI_P2PDMA */
+static inline int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar,
+ size_t size, u64 offset)
+{
+ return -EOPNOTSUPP;
+}
+static inline int pci_p2pdma_add_client(struct list_head *head,
+ struct device *dev)
+{
+ return 0;
+}
+static inline void pci_p2pdma_remove_client(struct list_head *head,
+ struct device *dev)
+{
+}
+static inline void pci_p2pdma_client_list_free(struct list_head *head)
+{
+}
+static inline int pci_p2pdma_distance(struct pci_dev *provider,
+ struct list_head *clients,
+ bool verbose)
+{
+ return -1;
+}
+static inline bool pci_p2pdma_assign_provider(struct pci_dev *provider,
+ struct list_head *clients)
+{
+ return false;
+}
+static inline bool pci_has_p2pmem(struct pci_dev *pdev)
+{
+ return false;
+}
+static inline struct pci_dev *pci_p2pmem_find(struct list_head *clients)
+{
+ return NULL;
+}
+static inline void *pci_alloc_p2pmem(struct pci_dev *pdev, size_t size)
+{
+ return NULL;
+}
+static inline void pci_free_p2pmem(struct pci_dev *pdev, void *addr,
+ size_t size)
+{
+}
+static inline pci_bus_addr_t pci_p2pmem_virt_to_bus(struct pci_dev *pdev,
+ void *addr)
+{
+ return 0;
+}
+static inline struct scatterlist *pci_p2pmem_alloc_sgl(struct pci_dev *pdev,
+ unsigned int *nents, u32 length)
+{
+ return NULL;
+}
+static inline void pci_p2pmem_free_sgl(struct pci_dev *pdev,
+ struct scatterlist *sgl)
+{
+}
+static inline void pci_p2pmem_publish(struct pci_dev *pdev, bool publish)
+{
+}
+#endif /* CONFIG_PCI_P2PDMA */
+#endif /* _LINUX_PCI_P2P_H */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 6925828f9f25..bf5277768f69 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -281,6 +281,7 @@ struct pcie_link_state;
struct pci_vpd;
struct pci_sriov;
struct pci_ats;
+struct pci_p2pdma;
/* The pci_dev structure describes PCI devices */
struct pci_dev {
@@ -438,6 +439,9 @@ struct pci_dev {
#endif
#ifdef CONFIG_PCI_PASID
u16 pasid_features;
+#endif
+#ifdef CONFIG_PCI_P2PDMA
+ struct pci_p2pdma *p2pdma;
#endif
phys_addr_t rom; /* Physical address if not from BAR */
size_t romlen; /* Length if not from BAR */
--
2.19.0
_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm
^ permalink raw reply related [flat|nested] 25+ messages in thread* [PATCH v7 02/13] PCI/P2PDMA: Add sysfs group to display p2pmem stats
[not found] ` <20180925162231.4354-1-logang-OTvnGxWRz7hWk0Htik3J/w@public.gmane.org>
2018-09-25 16:22 ` [PATCH v7 01/13] PCI/P2PDMA: Support peer-to-peer memory Logan Gunthorpe
@ 2018-09-25 16:22 ` Logan Gunthorpe
[not found] ` <20180925162231.4354-3-logang-OTvnGxWRz7hWk0Htik3J/w@public.gmane.org>
2018-09-25 16:22 ` [PATCH v7 03/13] PCI/P2PDMA: Add PCI p2pmem DMA mappings to adjust the bus offset Logan Gunthorpe
` (8 subsequent siblings)
10 siblings, 1 reply; 25+ messages in thread
From: Logan Gunthorpe @ 2018-09-25 16:22 UTC (permalink / raw)
To: linux-kernel-u79uwXL29TY76Z2rM5mHXA,
linux-pci-u79uwXL29TY76Z2rM5mHXA,
linux-nvme-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
linux-rdma-u79uwXL29TY76Z2rM5mHXA,
linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw,
linux-block-u79uwXL29TY76Z2rM5mHXA
Cc: Jens Axboe, Christian König, Benjamin Herrenschmidt,
Alex Williamson, Jérôme Glisse, Jason Gunthorpe,
Bjorn Helgaas, Max Gurtovoy, Christoph Hellwig
Add a sysfs group to display statistics about P2P memory that is
registered in each PCI device.
Attributes in the group display the total amount of P2P memory, the
amount available and whether it is published or not.
Signed-off-by: Logan Gunthorpe <logang-OTvnGxWRz7hWk0Htik3J/w@public.gmane.org>
Acked-by: Bjorn Helgaas <bhelgaas-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
---
Documentation/ABI/testing/sysfs-bus-pci | 24 +++++++++++
drivers/pci/p2pdma.c | 54 +++++++++++++++++++++++++
2 files changed, 78 insertions(+)
diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci
index 44d4b2be92fd..8bfee557e50e 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci
+++ b/Documentation/ABI/testing/sysfs-bus-pci
@@ -323,3 +323,27 @@ Description:
This is similar to /sys/bus/pci/drivers_autoprobe, but
affects only the VFs associated with a specific PF.
+
+What: /sys/bus/pci/devices/.../p2pmem/size
+Date: November 2017
+Contact: Logan Gunthorpe <logang-OTvnGxWRz7hWk0Htik3J/w@public.gmane.org>
+Description:
+ If the device has any Peer-to-Peer memory registered, this
+ file contains the total amount of memory that the device
+ provides (in decimal).
+
+What: /sys/bus/pci/devices/.../p2pmem/available
+Date: November 2017
+Contact: Logan Gunthorpe <logang-OTvnGxWRz7hWk0Htik3J/w@public.gmane.org>
+Description:
+ If the device has any Peer-to-Peer memory registered, this
+ file contains the amount of memory that has not been
+ allocated (in decimal).
+
+What: /sys/bus/pci/devices/.../p2pmem/published
+Date: November 2017
+Contact: Logan Gunthorpe <logang-OTvnGxWRz7hWk0Htik3J/w@public.gmane.org>
+Description:
+ If the device has any Peer-to-Peer memory registered, this
+ file contains a '1' if the memory has been published for
+ use outside the driver that owns the device.
diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
index d9616522c1fc..d61389945da2 100644
--- a/drivers/pci/p2pdma.c
+++ b/drivers/pci/p2pdma.c
@@ -24,6 +24,54 @@ struct pci_p2pdma {
bool p2pmem_published;
};
+static ssize_t size_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ size_t size = 0;
+
+ if (pdev->p2pdma->pool)
+ size = gen_pool_size(pdev->p2pdma->pool);
+
+ return snprintf(buf, PAGE_SIZE, "%zd\n", size);
+}
+static DEVICE_ATTR_RO(size);
+
+static ssize_t available_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ size_t avail = 0;
+
+ if (pdev->p2pdma->pool)
+ avail = gen_pool_avail(pdev->p2pdma->pool);
+
+ return snprintf(buf, PAGE_SIZE, "%zd\n", avail);
+}
+static DEVICE_ATTR_RO(available);
+
+static ssize_t published_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+
+ return snprintf(buf, PAGE_SIZE, "%d\n",
+ pdev->p2pdma->p2pmem_published);
+}
+static DEVICE_ATTR_RO(published);
+
+static struct attribute *p2pmem_attrs[] = {
+ &dev_attr_size.attr,
+ &dev_attr_available.attr,
+ &dev_attr_published.attr,
+ NULL,
+};
+
+static const struct attribute_group p2pmem_group = {
+ .attrs = p2pmem_attrs,
+ .name = "p2pmem",
+};
+
static void pci_p2pdma_percpu_release(struct percpu_ref *ref)
{
struct pci_p2pdma *p2p =
@@ -53,6 +101,7 @@ static void pci_p2pdma_release(void *data)
percpu_ref_exit(&pdev->p2pdma->devmap_ref);
gen_pool_destroy(pdev->p2pdma->pool);
+ sysfs_remove_group(&pdev->dev.kobj, &p2pmem_group);
pdev->p2pdma = NULL;
}
@@ -83,9 +132,14 @@ static int pci_p2pdma_setup(struct pci_dev *pdev)
pdev->p2pdma = p2p;
+ error = sysfs_create_group(&pdev->dev.kobj, &p2pmem_group);
+ if (error)
+ goto out_pool_destroy;
+
return 0;
out_pool_destroy:
+ pdev->p2pdma = NULL;
gen_pool_destroy(p2p->pool);
out:
devm_kfree(&pdev->dev, p2p);
--
2.19.0
^ permalink raw reply related [flat|nested] 25+ messages in thread* [PATCH v7 03/13] PCI/P2PDMA: Add PCI p2pmem DMA mappings to adjust the bus offset
[not found] ` <20180925162231.4354-1-logang-OTvnGxWRz7hWk0Htik3J/w@public.gmane.org>
2018-09-25 16:22 ` [PATCH v7 01/13] PCI/P2PDMA: Support peer-to-peer memory Logan Gunthorpe
2018-09-25 16:22 ` [PATCH v7 02/13] PCI/P2PDMA: Add sysfs group to display p2pmem stats Logan Gunthorpe
@ 2018-09-25 16:22 ` Logan Gunthorpe
[not found] ` <20180925162231.4354-4-logang-OTvnGxWRz7hWk0Htik3J/w@public.gmane.org>
2018-09-25 16:22 ` [PATCH v7 04/13] PCI/P2PDMA: Introduce configfs/sysfs enable attribute helpers Logan Gunthorpe
` (7 subsequent siblings)
10 siblings, 1 reply; 25+ messages in thread
From: Logan Gunthorpe @ 2018-09-25 16:22 UTC (permalink / raw)
To: linux-kernel-u79uwXL29TY76Z2rM5mHXA,
linux-pci-u79uwXL29TY76Z2rM5mHXA,
linux-nvme-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
linux-rdma-u79uwXL29TY76Z2rM5mHXA,
linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw,
linux-block-u79uwXL29TY76Z2rM5mHXA
Cc: Jens Axboe, Christian König, Benjamin Herrenschmidt,
Alex Williamson, Jérôme Glisse, Jason Gunthorpe,
Bjorn Helgaas, Max Gurtovoy, Christoph Hellwig
The DMA address used when mapping PCI P2P memory must be the PCI bus
address. Thus, introduce pci_p2pmem_map_sg() to map the correct
addresses when using P2P memory.
Memory mapped in this way does not need to be unmapped and thus if we
provided pci_p2pmem_unmap_sg() it would be empty. This breaks the
expected balance between map/unmap but was left out as an empty
function doesn't really provide any benefit. In the future, if
this call becomes necessary it can be added without much difficulty.
For this, we assume that an SGL passed to these functions contain all
P2P memory or no P2P memory.
Signed-off-by: Logan Gunthorpe <logang-OTvnGxWRz7hWk0Htik3J/w@public.gmane.org>
Acked-by: Bjorn Helgaas <bhelgaas-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
---
drivers/pci/p2pdma.c | 43 ++++++++++++++++++++++++++++++++++++++
include/linux/memremap.h | 1 +
include/linux/pci-p2pdma.h | 7 +++++++
3 files changed, 51 insertions(+)
diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
index d61389945da2..bbbf86165428 100644
--- a/drivers/pci/p2pdma.c
+++ b/drivers/pci/p2pdma.c
@@ -190,6 +190,8 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
pgmap->res.flags = pci_resource_flags(pdev, bar);
pgmap->ref = &pdev->p2pdma->devmap_ref;
pgmap->type = MEMORY_DEVICE_PCI_P2PDMA;
+ pgmap->pci_p2pdma_bus_offset = pci_bus_address(pdev, bar) -
+ pci_resource_start(pdev, bar);
addr = devm_memremap_pages(&pdev->dev, pgmap);
if (IS_ERR(addr)) {
@@ -816,3 +818,44 @@ void pci_p2pmem_publish(struct pci_dev *pdev, bool publish)
pdev->p2pdma->p2pmem_published = publish;
}
EXPORT_SYMBOL_GPL(pci_p2pmem_publish);
+
+/**
+ * pci_p2pdma_map_sg - map a PCI peer-to-peer scatterlist for DMA
+ * @dev: device doing the DMA request
+ * @sg: scatter list to map
+ * @nents: elements in the scatterlist
+ * @dir: DMA direction
+ *
+ * Scatterlists mapped with this function should not be unmapped in any way.
+ *
+ * Returns the number of SG entries mapped or 0 on error.
+ */
+int pci_p2pdma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
+ enum dma_data_direction dir)
+{
+ struct dev_pagemap *pgmap;
+ struct scatterlist *s;
+ phys_addr_t paddr;
+ int i;
+
+ /*
+ * p2pdma mappings are not compatible with devices that use
+ * dma_virt_ops. If the upper layers do the right thing
+ * this should never happen because it will be prevented
+ * by the check in pci_p2pdma_add_client()
+ */
+ if (WARN_ON_ONCE(IS_ENABLED(CONFIG_DMA_VIRT_OPS) &&
+ dev->dma_ops == &dma_virt_ops))
+ return 0;
+
+ for_each_sg(sg, s, nents, i) {
+ pgmap = sg_page(s)->pgmap;
+ paddr = sg_phys(s);
+
+ s->dma_address = paddr - pgmap->pci_p2pdma_bus_offset;
+ sg_dma_len(s) = s->length;
+ }
+
+ return nents;
+}
+EXPORT_SYMBOL_GPL(pci_p2pdma_map_sg);
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 9553370ebdad..0ac69ddf5fc4 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -125,6 +125,7 @@ struct dev_pagemap {
struct device *dev;
void *data;
enum memory_type type;
+ u64 pci_p2pdma_bus_offset;
};
#ifdef CONFIG_ZONE_DEVICE
diff --git a/include/linux/pci-p2pdma.h b/include/linux/pci-p2pdma.h
index 127e95f50491..f039c48990a5 100644
--- a/include/linux/pci-p2pdma.h
+++ b/include/linux/pci-p2pdma.h
@@ -35,6 +35,8 @@ struct scatterlist *pci_p2pmem_alloc_sgl(struct pci_dev *pdev,
unsigned int *nents, u32 length);
void pci_p2pmem_free_sgl(struct pci_dev *pdev, struct scatterlist *sgl);
void pci_p2pmem_publish(struct pci_dev *pdev, bool publish);
+int pci_p2pdma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
+ enum dma_data_direction dir);
#else /* CONFIG_PCI_P2PDMA */
static inline int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar,
size_t size, u64 offset)
@@ -97,5 +99,10 @@ static inline void pci_p2pmem_free_sgl(struct pci_dev *pdev,
static inline void pci_p2pmem_publish(struct pci_dev *pdev, bool publish)
{
}
+static inline int pci_p2pdma_map_sg(struct device *dev,
+ struct scatterlist *sg, int nents, enum dma_data_direction dir)
+{
+ return 0;
+}
#endif /* CONFIG_PCI_P2PDMA */
#endif /* _LINUX_PCI_P2P_H */
--
2.19.0
^ permalink raw reply related [flat|nested] 25+ messages in thread* [PATCH v7 04/13] PCI/P2PDMA: Introduce configfs/sysfs enable attribute helpers
[not found] ` <20180925162231.4354-1-logang-OTvnGxWRz7hWk0Htik3J/w@public.gmane.org>
` (2 preceding siblings ...)
2018-09-25 16:22 ` [PATCH v7 03/13] PCI/P2PDMA: Add PCI p2pmem DMA mappings to adjust the bus offset Logan Gunthorpe
@ 2018-09-25 16:22 ` Logan Gunthorpe
2018-09-25 16:22 ` [PATCH v7 05/13] docs-rst: Add a new directory for PCI documentation Logan Gunthorpe
` (6 subsequent siblings)
10 siblings, 0 replies; 25+ messages in thread
From: Logan Gunthorpe @ 2018-09-25 16:22 UTC (permalink / raw)
To: linux-kernel-u79uwXL29TY76Z2rM5mHXA,
linux-pci-u79uwXL29TY76Z2rM5mHXA,
linux-nvme-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
linux-rdma-u79uwXL29TY76Z2rM5mHXA,
linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw,
linux-block-u79uwXL29TY76Z2rM5mHXA
Cc: Jens Axboe, Christian König, Benjamin Herrenschmidt,
Alex Williamson, Jérôme Glisse, Jason Gunthorpe,
Bjorn Helgaas, Max Gurtovoy, Christoph Hellwig
Users of the P2PDMA infrastructure will typically need a way for
the user to tell the kernel to use P2P resources. Typically
this will be a simple on/off boolean operation but sometimes
it may be desirable for the user to specify the exact device to
use for the P2P operation.
Add new helpers for attributes which take a boolean or a PCI device.
Any boolean as accepted by strtobool() turn P2P on or off (such as 'y', 'n',
'1', '0', etc). Specifying a full PCI device name/BDF will select the
specific device.
Signed-off-by: Logan Gunthorpe <logang-OTvnGxWRz7hWk0Htik3J/w@public.gmane.org>
Acked-by: Bjorn Helgaas <bhelgaas-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
---
drivers/pci/p2pdma.c | 82 ++++++++++++++++++++++++++++++++++++++
include/linux/pci-p2pdma.h | 15 +++++++
2 files changed, 97 insertions(+)
diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
index bbbf86165428..094d2b337c50 100644
--- a/drivers/pci/p2pdma.c
+++ b/drivers/pci/p2pdma.c
@@ -8,6 +8,8 @@
* Copyright (c) 2018, Eideticom Inc.
*/
+#define pr_fmt(fmt) "pci-p2pdma: " fmt
+#include <linux/ctype.h>
#include <linux/pci-p2pdma.h>
#include <linux/module.h>
#include <linux/slab.h>
@@ -859,3 +861,83 @@ int pci_p2pdma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
return nents;
}
EXPORT_SYMBOL_GPL(pci_p2pdma_map_sg);
+
+/**
+ * pci_p2pdma_enable_store - parse a configfs/sysfs attribute store
+ * to enable p2pdma
+ * @page: contents of the value to be stored
+ * @p2p_dev: returns the PCI device that was selected to be used
+ * (if one was specified in the stored value)
+ * @use_p2pdma: returns whether to enable p2pdma or not
+ *
+ * Parses an attribute value to decide whether to enable p2pdma.
+ * The value can select a PCI device (using it's full BDF device
+ * name) or a boolean (in any format strtobool() accepts). A false
+ * value disables p2pdma, a true value expects the caller
+ * to automatically find a compatible device and specifying a PCI device
+ * expects the caller to use the specific provider.
+ *
+ * pci_p2pdma_enable_show() should be used as the show operation for
+ * the attribute.
+ *
+ * Returns 0 on success
+ */
+int pci_p2pdma_enable_store(const char *page, struct pci_dev **p2p_dev,
+ bool *use_p2pdma)
+{
+ struct device *dev;
+
+ dev = bus_find_device_by_name(&pci_bus_type, NULL, page);
+ if (dev) {
+ *use_p2pdma = true;
+ *p2p_dev = to_pci_dev(dev);
+
+ if (!pci_has_p2pmem(*p2p_dev)) {
+ pci_err(*p2p_dev,
+ "PCI device has no peer-to-peer memory: %s\n",
+ page);
+ pci_dev_put(*p2p_dev);
+ return -ENODEV;
+ }
+
+ return 0;
+ } else if ((page[0] == '0' || page[0] == '1') && !iscntrl(page[1])) {
+ /*
+ * If the user enters a PCI device that doesn't exist
+ * like "0000:01:00.1", we don't want strtobool to think
+ * it's a '0' when it's clearly not what the user wanted.
+ * So we require 0's and 1's to be exactly one character.
+ */
+ } else if (!strtobool(page, use_p2pdma)) {
+ return 0;
+ }
+
+ pr_err("No such PCI device: %.*s\n", (int)strcspn(page, "\n"), page);
+ return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(pci_p2pdma_enable_store);
+
+/**
+ * pci_p2pdma_enable_show - show a configfs/sysfs attribute indicating
+ * whether p2pdma is enabled
+ * @page: contents of the stored value
+ * @p2p_dev: the selected p2p device (NULL if no device is selected)
+ * @use_p2pdma: whether p2pdme has been enabled
+ *
+ * Attributes that use pci_p2pdma_enable_store() should use this function
+ * to show the value of the attribute.
+ *
+ * Returns 0 on success
+ */
+ssize_t pci_p2pdma_enable_show(char *page, struct pci_dev *p2p_dev,
+ bool use_p2pdma)
+{
+ if (!use_p2pdma)
+ return sprintf(page, "0\n");
+
+ if (!p2p_dev)
+ return sprintf(page, "1\n");
+
+ return sprintf(page, "%s\n", pci_name(p2p_dev));
+}
+EXPORT_SYMBOL_GPL(pci_p2pdma_enable_show);
diff --git a/include/linux/pci-p2pdma.h b/include/linux/pci-p2pdma.h
index f039c48990a5..5248540eead2 100644
--- a/include/linux/pci-p2pdma.h
+++ b/include/linux/pci-p2pdma.h
@@ -37,6 +37,10 @@ void pci_p2pmem_free_sgl(struct pci_dev *pdev, struct scatterlist *sgl);
void pci_p2pmem_publish(struct pci_dev *pdev, bool publish);
int pci_p2pdma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
enum dma_data_direction dir);
+int pci_p2pdma_enable_store(const char *page, struct pci_dev **p2p_dev,
+ bool *use_p2pdma);
+ssize_t pci_p2pdma_enable_show(char *page, struct pci_dev *p2p_dev,
+ bool use_p2pdma);
#else /* CONFIG_PCI_P2PDMA */
static inline int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar,
size_t size, u64 offset)
@@ -104,5 +108,16 @@ static inline int pci_p2pdma_map_sg(struct device *dev,
{
return 0;
}
+static inline int pci_p2pdma_enable_store(const char *page,
+ struct pci_dev **p2p_dev, bool *use_p2pdma)
+{
+ *use_p2pdma = false;
+ return 0;
+}
+static inline ssize_t pci_p2pdma_enable_show(char *page,
+ struct pci_dev *p2p_dev, bool use_p2pdma)
+{
+ return sprintf(page, "none\n");
+}
#endif /* CONFIG_PCI_P2PDMA */
#endif /* _LINUX_PCI_P2P_H */
--
2.19.0
^ permalink raw reply related [flat|nested] 25+ messages in thread* [PATCH v7 05/13] docs-rst: Add a new directory for PCI documentation
[not found] ` <20180925162231.4354-1-logang-OTvnGxWRz7hWk0Htik3J/w@public.gmane.org>
` (3 preceding siblings ...)
2018-09-25 16:22 ` [PATCH v7 04/13] PCI/P2PDMA: Introduce configfs/sysfs enable attribute helpers Logan Gunthorpe
@ 2018-09-25 16:22 ` Logan Gunthorpe
2018-09-25 16:22 ` [PATCH v7 07/13] block: Add PCI P2P flag for request queue and check support for requests Logan Gunthorpe
` (5 subsequent siblings)
10 siblings, 0 replies; 25+ messages in thread
From: Logan Gunthorpe @ 2018-09-25 16:22 UTC (permalink / raw)
To: linux-kernel-u79uwXL29TY76Z2rM5mHXA,
linux-pci-u79uwXL29TY76Z2rM5mHXA,
linux-nvme-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
linux-rdma-u79uwXL29TY76Z2rM5mHXA,
linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw,
linux-block-u79uwXL29TY76Z2rM5mHXA
Cc: Benjamin Herrenschmidt, Linus Walleij, Max Gurtovoy,
Christoph Hellwig, Jonathan Corbet, Vinod Koul, Jason Gunthorpe,
Thierry Reding, Alex Williamson, Bjorn Helgaas,
Mauro Carvalho Chehab, Sagar Dharia, Jens Axboe,
Greg Kroah-Hartman, Jérôme Glisse, Sanyog Kale,
Christian König
Add a new directory in the driver API guide for PCI specific
documentation.
This is in preparation for adding a new PCI P2P DMA driver writers
guide which will go in this directory.
Signed-off-by: Logan Gunthorpe <logang-OTvnGxWRz7hWk0Htik3J/w@public.gmane.org>
Cc: Jonathan Corbet <corbet-T1hC0tSOHrs@public.gmane.org>
Cc: Mauro Carvalho Chehab <mchehab-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
Cc: Greg Kroah-Hartman <gregkh-hQyY1W1yCW8ekmWlsbkhG0B+6BGkLq7r@public.gmane.org>
Cc: Vinod Koul <vinod.koul-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Cc: Linus Walleij <linus.walleij-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org>
Cc: Logan Gunthorpe <logang-OTvnGxWRz7hWk0Htik3J/w@public.gmane.org>
Cc: Thierry Reding <treding-DDmLM1+adcrQT0dZR+AlfA@public.gmane.org>
Cc: Sanyog Kale <sanyog.r.kale-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Cc: Sagar Dharia <sdharia-sgV2jX0FEOL9JmXXK+q4OQ@public.gmane.org>
---
Documentation/driver-api/index.rst | 2 +-
Documentation/driver-api/pci/index.rst | 20 ++++++++++++++++++++
Documentation/driver-api/{ => pci}/pci.rst | 0
3 files changed, 21 insertions(+), 1 deletion(-)
create mode 100644 Documentation/driver-api/pci/index.rst
rename Documentation/driver-api/{ => pci}/pci.rst (100%)
diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst
index 6d9f2f9fe20e..e9e7d24169cf 100644
--- a/Documentation/driver-api/index.rst
+++ b/Documentation/driver-api/index.rst
@@ -29,7 +29,7 @@ available subsections can be seen below.
iio/index
input
usb/index
- pci
+ pci/index
spi
i2c
hsi
diff --git a/Documentation/driver-api/pci/index.rst b/Documentation/driver-api/pci/index.rst
new file mode 100644
index 000000000000..eaf20b24bf7d
--- /dev/null
+++ b/Documentation/driver-api/pci/index.rst
@@ -0,0 +1,20 @@
+.. SPDX-License-Identifier: GPL-2.0
+============================================
+The Linux PCI driver implementer's API guide
+============================================
+
+.. class:: toc-title
+
+ Table of contents
+
+.. toctree::
+ :maxdepth: 2
+
+ pci
+
+.. only:: subproject and html
+
+ Indices
+ =======
+
+ * :ref:`genindex`
diff --git a/Documentation/driver-api/pci.rst b/Documentation/driver-api/pci/pci.rst
similarity index 100%
rename from Documentation/driver-api/pci.rst
rename to Documentation/driver-api/pci/pci.rst
--
2.19.0
^ permalink raw reply related [flat|nested] 25+ messages in thread* [PATCH v7 07/13] block: Add PCI P2P flag for request queue and check support for requests
[not found] ` <20180925162231.4354-1-logang-OTvnGxWRz7hWk0Htik3J/w@public.gmane.org>
` (4 preceding siblings ...)
2018-09-25 16:22 ` [PATCH v7 05/13] docs-rst: Add a new directory for PCI documentation Logan Gunthorpe
@ 2018-09-25 16:22 ` Logan Gunthorpe
2018-09-25 16:22 ` [PATCH v7 08/13] IB/core: Ensure we map P2P memory correctly in rdma_rw_ctx_[init|destroy]() Logan Gunthorpe
` (4 subsequent siblings)
10 siblings, 0 replies; 25+ messages in thread
From: Logan Gunthorpe @ 2018-09-25 16:22 UTC (permalink / raw)
To: linux-kernel-u79uwXL29TY76Z2rM5mHXA,
linux-pci-u79uwXL29TY76Z2rM5mHXA,
linux-nvme-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
linux-rdma-u79uwXL29TY76Z2rM5mHXA,
linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw,
linux-block-u79uwXL29TY76Z2rM5mHXA
Cc: Jens Axboe, Christian König, Benjamin Herrenschmidt,
Alex Williamson, Jérôme Glisse, Jason Gunthorpe,
Bjorn Helgaas, Max Gurtovoy, Christoph Hellwig
QUEUE_FLAG_PCI_P2P is introduced meaning a driver's request queue
supports targeting P2P memory. This will be used by P2P providers and
orchestrators (in subsequent patches) to ensure block devices can
support P2P memory before submitting P2P backed pages to submit_bio().
Signed-off-by: Logan Gunthorpe <logang-OTvnGxWRz7hWk0Htik3J/w@public.gmane.org>
---
include/linux/blkdev.h | 3 +++
1 file changed, 3 insertions(+)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 6980014357d4..87fb1963b721 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -699,6 +699,7 @@ struct request_queue {
#define QUEUE_FLAG_SCSI_PASSTHROUGH 27 /* queue supports SCSI commands */
#define QUEUE_FLAG_QUIESCED 28 /* queue has been quiesced */
#define QUEUE_FLAG_PREEMPT_ONLY 29 /* only process REQ_PREEMPT requests */
+#define QUEUE_FLAG_PCI_P2PDMA 30 /* device supports pci p2p requests */
#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
(1 << QUEUE_FLAG_SAME_COMP) | \
@@ -731,6 +732,8 @@ bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q);
#define blk_queue_dax(q) test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags)
#define blk_queue_scsi_passthrough(q) \
test_bit(QUEUE_FLAG_SCSI_PASSTHROUGH, &(q)->queue_flags)
+#define blk_queue_pci_p2pdma(q) \
+ test_bit(QUEUE_FLAG_PCI_P2PDMA, &(q)->queue_flags)
#define blk_noretry_request(rq) \
((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \
--
2.19.0
^ permalink raw reply related [flat|nested] 25+ messages in thread* [PATCH v7 08/13] IB/core: Ensure we map P2P memory correctly in rdma_rw_ctx_[init|destroy]()
[not found] ` <20180925162231.4354-1-logang-OTvnGxWRz7hWk0Htik3J/w@public.gmane.org>
` (5 preceding siblings ...)
2018-09-25 16:22 ` [PATCH v7 07/13] block: Add PCI P2P flag for request queue and check support for requests Logan Gunthorpe
@ 2018-09-25 16:22 ` Logan Gunthorpe
2018-09-25 16:22 ` [PATCH v7 09/13] nvme-pci: Use PCI p2pmem subsystem to manage the CMB Logan Gunthorpe
` (3 subsequent siblings)
10 siblings, 0 replies; 25+ messages in thread
From: Logan Gunthorpe @ 2018-09-25 16:22 UTC (permalink / raw)
To: linux-kernel-u79uwXL29TY76Z2rM5mHXA,
linux-pci-u79uwXL29TY76Z2rM5mHXA,
linux-nvme-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
linux-rdma-u79uwXL29TY76Z2rM5mHXA,
linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw,
linux-block-u79uwXL29TY76Z2rM5mHXA
Cc: Jens Axboe, Christian König, Benjamin Herrenschmidt,
Alex Williamson, Jérôme Glisse, Jason Gunthorpe,
Bjorn Helgaas, Max Gurtovoy, Christoph Hellwig
In order to use PCI P2P memory the pci_p2pmem_map_sg() function must be
called to map the correct PCI bus address.
To do this, check the first page in the scatter list to see if it is P2P
memory or not. At the moment, scatter lists that contain P2P memory must
be homogeneous so if the first page is P2P the entire SGL should be P2P.
Signed-off-by: Logan Gunthorpe <logang-OTvnGxWRz7hWk0Htik3J/w@public.gmane.org>
Reviewed-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
Reviewed-by: Sagi Grimberg <sagi-NQWnxTmZq1alnMjI0IkVqw@public.gmane.org>
---
drivers/infiniband/core/rw.c | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c
index 683e6d11a564..d22c4a2ebac6 100644
--- a/drivers/infiniband/core/rw.c
+++ b/drivers/infiniband/core/rw.c
@@ -12,6 +12,7 @@
*/
#include <linux/moduleparam.h>
#include <linux/slab.h>
+#include <linux/pci-p2pdma.h>
#include <rdma/mr_pool.h>
#include <rdma/rw.h>
@@ -280,7 +281,11 @@ int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
struct ib_device *dev = qp->pd->device;
int ret;
- ret = ib_dma_map_sg(dev, sg, sg_cnt, dir);
+ if (is_pci_p2pdma_page(sg_page(sg)))
+ ret = pci_p2pdma_map_sg(dev->dma_device, sg, sg_cnt, dir);
+ else
+ ret = ib_dma_map_sg(dev, sg, sg_cnt, dir);
+
if (!ret)
return -ENOMEM;
sg_cnt = ret;
@@ -602,7 +607,9 @@ void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
break;
}
- ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir);
+ /* P2PDMA contexts do not need to be unmapped */
+ if (!is_pci_p2pdma_page(sg_page(sg)))
+ ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir);
}
EXPORT_SYMBOL(rdma_rw_ctx_destroy);
--
2.19.0
^ permalink raw reply related [flat|nested] 25+ messages in thread* [PATCH v7 09/13] nvme-pci: Use PCI p2pmem subsystem to manage the CMB
[not found] ` <20180925162231.4354-1-logang-OTvnGxWRz7hWk0Htik3J/w@public.gmane.org>
` (6 preceding siblings ...)
2018-09-25 16:22 ` [PATCH v7 08/13] IB/core: Ensure we map P2P memory correctly in rdma_rw_ctx_[init|destroy]() Logan Gunthorpe
@ 2018-09-25 16:22 ` Logan Gunthorpe
2018-09-25 16:22 ` [PATCH v7 10/13] nvme-pci: Add support for P2P memory in requests Logan Gunthorpe
` (2 subsequent siblings)
10 siblings, 0 replies; 25+ messages in thread
From: Logan Gunthorpe @ 2018-09-25 16:22 UTC (permalink / raw)
To: linux-kernel-u79uwXL29TY76Z2rM5mHXA,
linux-pci-u79uwXL29TY76Z2rM5mHXA,
linux-nvme-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
linux-rdma-u79uwXL29TY76Z2rM5mHXA,
linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw,
linux-block-u79uwXL29TY76Z2rM5mHXA
Cc: Jens Axboe, Christian König, Benjamin Herrenschmidt,
Alex Williamson, Jérôme Glisse, Jason Gunthorpe,
Bjorn Helgaas, Max Gurtovoy, Christoph Hellwig
Register the CMB buffer as p2pmem and use the appropriate allocation
functions to create and destroy the IO submission queues.
If the CMB supports WDS and RDS, publish it for use as P2P memory
by other devices.
Kernels without CONFIG_PCI_P2PDMA will also no longer support NVMe CMB.
However, seeing the main use-cases for the CMB is P2P operations,
this seems like a reasonable dependency.
We drop the __iomem safety on the buffer seeing that, by convention, it's
safe to directly access memory mapped by memremap()/devm_memremap_pages().
Architectures where this is not safe will not be supported by memremap()
and therefore will not be support PCI P2P and have no support for CMB.
Signed-off-by: Logan Gunthorpe <logang-OTvnGxWRz7hWk0Htik3J/w@public.gmane.org>
---
drivers/nvme/host/pci.c | 80 +++++++++++++++++++++++------------------
1 file changed, 45 insertions(+), 35 deletions(-)
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index d668682f91df..f434706a04e8 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -30,6 +30,7 @@
#include <linux/types.h>
#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/sed-opal.h>
+#include <linux/pci-p2pdma.h>
#include "nvme.h"
@@ -99,9 +100,8 @@ struct nvme_dev {
struct work_struct remove_work;
struct mutex shutdown_lock;
bool subsystem;
- void __iomem *cmb;
- pci_bus_addr_t cmb_bus_addr;
u64 cmb_size;
+ bool cmb_use_sqes;
u32 cmbsz;
u32 cmbloc;
struct nvme_ctrl ctrl;
@@ -158,7 +158,7 @@ struct nvme_queue {
struct nvme_dev *dev;
spinlock_t sq_lock;
struct nvme_command *sq_cmds;
- struct nvme_command __iomem *sq_cmds_io;
+ bool sq_cmds_is_io;
spinlock_t cq_lock ____cacheline_aligned_in_smp;
volatile struct nvme_completion *cqes;
struct blk_mq_tags **tags;
@@ -447,11 +447,8 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set)
static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd)
{
spin_lock(&nvmeq->sq_lock);
- if (nvmeq->sq_cmds_io)
- memcpy_toio(&nvmeq->sq_cmds_io[nvmeq->sq_tail], cmd,
- sizeof(*cmd));
- else
- memcpy(&nvmeq->sq_cmds[nvmeq->sq_tail], cmd, sizeof(*cmd));
+
+ memcpy(&nvmeq->sq_cmds[nvmeq->sq_tail], cmd, sizeof(*cmd));
if (++nvmeq->sq_tail == nvmeq->q_depth)
nvmeq->sq_tail = 0;
@@ -1232,9 +1229,18 @@ static void nvme_free_queue(struct nvme_queue *nvmeq)
{
dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
(void *)nvmeq->cqes, nvmeq->cq_dma_addr);
- if (nvmeq->sq_cmds)
- dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
- nvmeq->sq_cmds, nvmeq->sq_dma_addr);
+
+ if (nvmeq->sq_cmds) {
+ if (nvmeq->sq_cmds_is_io)
+ pci_free_p2pmem(to_pci_dev(nvmeq->q_dmadev),
+ nvmeq->sq_cmds,
+ SQ_SIZE(nvmeq->q_depth));
+ else
+ dma_free_coherent(nvmeq->q_dmadev,
+ SQ_SIZE(nvmeq->q_depth),
+ nvmeq->sq_cmds,
+ nvmeq->sq_dma_addr);
+ }
}
static void nvme_free_queues(struct nvme_dev *dev, int lowest)
@@ -1323,12 +1329,21 @@ static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues,
static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
int qid, int depth)
{
- /* CMB SQEs will be mapped before creation */
- if (qid && dev->cmb && use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS))
- return 0;
+ struct pci_dev *pdev = to_pci_dev(dev->dev);
+
+ if (qid && dev->cmb_use_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) {
+ nvmeq->sq_cmds = pci_alloc_p2pmem(pdev, SQ_SIZE(depth));
+ nvmeq->sq_dma_addr = pci_p2pmem_virt_to_bus(pdev,
+ nvmeq->sq_cmds);
+ nvmeq->sq_cmds_is_io = true;
+ }
+
+ if (!nvmeq->sq_cmds) {
+ nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
+ &nvmeq->sq_dma_addr, GFP_KERNEL);
+ nvmeq->sq_cmds_is_io = false;
+ }
- nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
- &nvmeq->sq_dma_addr, GFP_KERNEL);
if (!nvmeq->sq_cmds)
return -ENOMEM;
return 0;
@@ -1405,13 +1420,6 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
int result;
s16 vector;
- if (dev->cmb && use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) {
- unsigned offset = (qid - 1) * roundup(SQ_SIZE(nvmeq->q_depth),
- dev->ctrl.page_size);
- nvmeq->sq_dma_addr = dev->cmb_bus_addr + offset;
- nvmeq->sq_cmds_io = dev->cmb + offset;
- }
-
/*
* A queue's vector matches the queue identifier unless the controller
* has only one vector available.
@@ -1652,9 +1660,6 @@ static void nvme_map_cmb(struct nvme_dev *dev)
return;
dev->cmbloc = readl(dev->bar + NVME_REG_CMBLOC);
- if (!use_cmb_sqes)
- return;
-
size = nvme_cmb_size_unit(dev) * nvme_cmb_size(dev);
offset = nvme_cmb_size_unit(dev) * NVME_CMB_OFST(dev->cmbloc);
bar = NVME_CMB_BIR(dev->cmbloc);
@@ -1671,11 +1676,18 @@ static void nvme_map_cmb(struct nvme_dev *dev)
if (size > bar_size - offset)
size = bar_size - offset;
- dev->cmb = ioremap_wc(pci_resource_start(pdev, bar) + offset, size);
- if (!dev->cmb)
+ if (pci_p2pdma_add_resource(pdev, bar, size, offset)) {
+ dev_warn(dev->ctrl.device,
+ "failed to register the CMB\n");
return;
- dev->cmb_bus_addr = pci_bus_address(pdev, bar) + offset;
+ }
+
dev->cmb_size = size;
+ dev->cmb_use_sqes = use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS);
+
+ if ((dev->cmbsz & (NVME_CMBSZ_WDS | NVME_CMBSZ_RDS)) ==
+ (NVME_CMBSZ_WDS | NVME_CMBSZ_RDS))
+ pci_p2pmem_publish(pdev, true);
if (sysfs_add_file_to_group(&dev->ctrl.device->kobj,
&dev_attr_cmb.attr, NULL))
@@ -1685,12 +1697,10 @@ static void nvme_map_cmb(struct nvme_dev *dev)
static inline void nvme_release_cmb(struct nvme_dev *dev)
{
- if (dev->cmb) {
- iounmap(dev->cmb);
- dev->cmb = NULL;
+ if (dev->cmb_size) {
sysfs_remove_file_from_group(&dev->ctrl.device->kobj,
&dev_attr_cmb.attr, NULL);
- dev->cmbsz = 0;
+ dev->cmb_size = 0;
}
}
@@ -1889,13 +1899,13 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
if (nr_io_queues == 0)
return 0;
- if (dev->cmb && (dev->cmbsz & NVME_CMBSZ_SQS)) {
+ if (dev->cmb_use_sqes) {
result = nvme_cmb_qdepth(dev, nr_io_queues,
sizeof(struct nvme_command));
if (result > 0)
dev->q_depth = result;
else
- nvme_release_cmb(dev);
+ dev->cmb_use_sqes = false;
}
do {
--
2.19.0
^ permalink raw reply related [flat|nested] 25+ messages in thread* [PATCH v7 10/13] nvme-pci: Add support for P2P memory in requests
[not found] ` <20180925162231.4354-1-logang-OTvnGxWRz7hWk0Htik3J/w@public.gmane.org>
` (7 preceding siblings ...)
2018-09-25 16:22 ` [PATCH v7 09/13] nvme-pci: Use PCI p2pmem subsystem to manage the CMB Logan Gunthorpe
@ 2018-09-25 16:22 ` Logan Gunthorpe
[not found] ` <20180925162231.4354-11-logang-OTvnGxWRz7hWk0Htik3J/w@public.gmane.org>
2018-09-25 16:22 ` [PATCH v7 11/13] nvme-pci: Add a quirk for a pseudo CMB Logan Gunthorpe
2018-09-25 16:22 ` [PATCH v7 12/13] nvmet: Introduce helper functions to allocate and free request SGLs Logan Gunthorpe
10 siblings, 1 reply; 25+ messages in thread
From: Logan Gunthorpe @ 2018-09-25 16:22 UTC (permalink / raw)
To: linux-kernel-u79uwXL29TY76Z2rM5mHXA,
linux-pci-u79uwXL29TY76Z2rM5mHXA,
linux-nvme-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
linux-rdma-u79uwXL29TY76Z2rM5mHXA,
linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw,
linux-block-u79uwXL29TY76Z2rM5mHXA
Cc: Jens Axboe, Christian König, Benjamin Herrenschmidt,
Alex Williamson, Jérôme Glisse, Jason Gunthorpe,
Bjorn Helgaas, Max Gurtovoy, Christoph Hellwig
For P2P requests, we must use the pci_p2pmem_map_sg() function
instead of the dma_map_sg functions.
With that, we can then indicate PCI_P2P support in the request queue.
For this, we create an NVME_F_PCI_P2P flag which tells the core to
set QUEUE_FLAG_PCI_P2P in the request queue.
Signed-off-by: Logan Gunthorpe <logang-OTvnGxWRz7hWk0Htik3J/w@public.gmane.org>
Reviewed-by: Sagi Grimberg <sagi-NQWnxTmZq1alnMjI0IkVqw@public.gmane.org>
Reviewed-by: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
---
drivers/nvme/host/core.c | 4 ++++
drivers/nvme/host/nvme.h | 1 +
drivers/nvme/host/pci.c | 17 +++++++++++++----
3 files changed, 18 insertions(+), 4 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index dd8ec1dd9219..6033ce2fd3e9 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3051,7 +3051,11 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
ns->queue = blk_mq_init_queue(ctrl->tagset);
if (IS_ERR(ns->queue))
goto out_free_ns;
+
blk_queue_flag_set(QUEUE_FLAG_NONROT, ns->queue);
+ if (ctrl->ops->flags & NVME_F_PCI_P2PDMA)
+ blk_queue_flag_set(QUEUE_FLAG_PCI_P2PDMA, ns->queue);
+
ns->queue->queuedata = ns;
ns->ctrl = ctrl;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index bb4a2003c097..4030743c90aa 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -343,6 +343,7 @@ struct nvme_ctrl_ops {
unsigned int flags;
#define NVME_F_FABRICS (1 << 0)
#define NVME_F_METADATA_SUPPORTED (1 << 1)
+#define NVME_F_PCI_P2PDMA (1 << 2)
int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val);
int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val);
int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index f434706a04e8..0d6c41bc2b35 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -745,8 +745,13 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
goto out;
ret = BLK_STS_RESOURCE;
- nr_mapped = dma_map_sg_attrs(dev->dev, iod->sg, iod->nents, dma_dir,
- DMA_ATTR_NO_WARN);
+
+ if (is_pci_p2pdma_page(sg_page(iod->sg)))
+ nr_mapped = pci_p2pdma_map_sg(dev->dev, iod->sg, iod->nents,
+ dma_dir);
+ else
+ nr_mapped = dma_map_sg_attrs(dev->dev, iod->sg, iod->nents,
+ dma_dir, DMA_ATTR_NO_WARN);
if (!nr_mapped)
goto out;
@@ -788,7 +793,10 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
DMA_TO_DEVICE : DMA_FROM_DEVICE;
if (iod->nents) {
- dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
+ /* P2PDMA requests do not need to be unmapped */
+ if (!is_pci_p2pdma_page(sg_page(iod->sg)))
+ dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
+
if (blk_integrity_rq(req))
dma_unmap_sg(dev->dev, &iod->meta_sg, 1, dma_dir);
}
@@ -2400,7 +2408,8 @@ static int nvme_pci_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
.name = "pcie",
.module = THIS_MODULE,
- .flags = NVME_F_METADATA_SUPPORTED,
+ .flags = NVME_F_METADATA_SUPPORTED |
+ NVME_F_PCI_P2PDMA,
.reg_read32 = nvme_pci_reg_read32,
.reg_write32 = nvme_pci_reg_write32,
.reg_read64 = nvme_pci_reg_read64,
--
2.19.0
^ permalink raw reply related [flat|nested] 25+ messages in thread* [PATCH v7 11/13] nvme-pci: Add a quirk for a pseudo CMB
[not found] ` <20180925162231.4354-1-logang-OTvnGxWRz7hWk0Htik3J/w@public.gmane.org>
` (8 preceding siblings ...)
2018-09-25 16:22 ` [PATCH v7 10/13] nvme-pci: Add support for P2P memory in requests Logan Gunthorpe
@ 2018-09-25 16:22 ` Logan Gunthorpe
2018-09-25 16:22 ` [PATCH v7 12/13] nvmet: Introduce helper functions to allocate and free request SGLs Logan Gunthorpe
10 siblings, 0 replies; 25+ messages in thread
From: Logan Gunthorpe @ 2018-09-25 16:22 UTC (permalink / raw)
To: linux-kernel-u79uwXL29TY76Z2rM5mHXA,
linux-pci-u79uwXL29TY76Z2rM5mHXA,
linux-nvme-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
linux-rdma-u79uwXL29TY76Z2rM5mHXA,
linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw,
linux-block-u79uwXL29TY76Z2rM5mHXA
Cc: Jens Axboe, Christian König, Benjamin Herrenschmidt,
Alex Williamson, Jérôme Glisse, Jason Gunthorpe,
Bjorn Helgaas, Max Gurtovoy, Christoph Hellwig
Introduce a quirk to use CMB-like memory on older devices that have
an exposed BAR but do not advertise support for using CMBLOC and
CMBSIZE.
We'd like to use some of these older cards to test P2P memory.
Signed-off-by: Logan Gunthorpe <logang-OTvnGxWRz7hWk0Htik3J/w@public.gmane.org>
Reviewed-by: Sagi Grimberg <sagi-NQWnxTmZq1alnMjI0IkVqw@public.gmane.org>
---
drivers/nvme/host/nvme.h | 7 +++++++
drivers/nvme/host/pci.c | 24 ++++++++++++++++++++----
2 files changed, 27 insertions(+), 4 deletions(-)
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 4030743c90aa..8e6f3bcfe956 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -90,6 +90,13 @@ enum nvme_quirks {
* Set MEDIUM priority on SQ creation
*/
NVME_QUIRK_MEDIUM_PRIO_SQ = (1 << 7),
+
+ /*
+ * Pseudo CMB Support on BAR 4. For adapters like the Microsemi
+ * NVRAM that have CMB-like memory on a BAR but does not set
+ * CMBLOC or CMBSZ.
+ */
+ NVME_QUIRK_PSEUDO_CMB_BAR4 = (1 << 8),
};
/*
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 0d6c41bc2b35..db862ee6e53e 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1644,6 +1644,13 @@ static ssize_t nvme_cmb_show(struct device *dev,
}
static DEVICE_ATTR(cmb, S_IRUGO, nvme_cmb_show, NULL);
+static u32 nvme_pseudo_cmbsz(struct pci_dev *pdev, int bar)
+{
+ return NVME_CMBSZ_WDS | NVME_CMBSZ_RDS |
+ (((ilog2(SZ_16M) - 12) / 4) << NVME_CMBSZ_SZU_SHIFT) |
+ ((pci_resource_len(pdev, bar) / SZ_16M) << NVME_CMBSZ_SZ_SHIFT);
+}
+
static u64 nvme_cmb_size_unit(struct nvme_dev *dev)
{
u8 szu = (dev->cmbsz >> NVME_CMBSZ_SZU_SHIFT) & NVME_CMBSZ_SZU_MASK;
@@ -1663,10 +1670,15 @@ static void nvme_map_cmb(struct nvme_dev *dev)
struct pci_dev *pdev = to_pci_dev(dev->dev);
int bar;
- dev->cmbsz = readl(dev->bar + NVME_REG_CMBSZ);
- if (!dev->cmbsz)
- return;
- dev->cmbloc = readl(dev->bar + NVME_REG_CMBLOC);
+ if (dev->ctrl.quirks & NVME_QUIRK_PSEUDO_CMB_BAR4) {
+ dev->cmbsz = nvme_pseudo_cmbsz(pdev, 4);
+ dev->cmbloc = 4;
+ } else {
+ dev->cmbsz = readl(dev->bar + NVME_REG_CMBSZ);
+ if (!dev->cmbsz)
+ return;
+ dev->cmbloc = readl(dev->bar + NVME_REG_CMBLOC);
+ }
size = nvme_cmb_size_unit(dev) * nvme_cmb_size(dev);
offset = nvme_cmb_size_unit(dev) * NVME_CMB_OFST(dev->cmbloc);
@@ -2715,6 +2727,10 @@ static const struct pci_device_id nvme_id_table[] = {
.driver_data = NVME_QUIRK_LIGHTNVM, },
{ PCI_DEVICE(0x1d1d, 0x2601), /* CNEX Granby */
.driver_data = NVME_QUIRK_LIGHTNVM, },
+ { PCI_DEVICE(0x11f8, 0xf117), /* Microsemi NVRAM adaptor */
+ .driver_data = NVME_QUIRK_PSEUDO_CMB_BAR4, },
+ { PCI_DEVICE(0x1db1, 0x0002), /* Everspin nvNitro adaptor */
+ .driver_data = NVME_QUIRK_PSEUDO_CMB_BAR4, },
{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },
--
2.19.0
^ permalink raw reply related [flat|nested] 25+ messages in thread* [PATCH v7 12/13] nvmet: Introduce helper functions to allocate and free request SGLs
[not found] ` <20180925162231.4354-1-logang-OTvnGxWRz7hWk0Htik3J/w@public.gmane.org>
` (9 preceding siblings ...)
2018-09-25 16:22 ` [PATCH v7 11/13] nvme-pci: Add a quirk for a pseudo CMB Logan Gunthorpe
@ 2018-09-25 16:22 ` Logan Gunthorpe
10 siblings, 0 replies; 25+ messages in thread
From: Logan Gunthorpe @ 2018-09-25 16:22 UTC (permalink / raw)
To: linux-kernel-u79uwXL29TY76Z2rM5mHXA,
linux-pci-u79uwXL29TY76Z2rM5mHXA,
linux-nvme-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
linux-rdma-u79uwXL29TY76Z2rM5mHXA,
linux-nvdimm-hn68Rpc1hR1g9hUCZPvPmw,
linux-block-u79uwXL29TY76Z2rM5mHXA
Cc: Jens Axboe, Christian König, Benjamin Herrenschmidt,
Alex Williamson, Jérôme Glisse, Jason Gunthorpe,
Bjorn Helgaas, Max Gurtovoy, Christoph Hellwig
Add helpers to allocate and free the SGL in a struct nvmet_req:
int nvmet_req_alloc_sgl(struct nvmet_req *req, struct nvmet_sq *sq)
void nvmet_req_free_sgl(struct nvmet_req *req)
This will be expanded in a future patch to implement peer-to-peer
memory DMAs and should be common with all target drivers. The presently
unused 'sq' argument in the alloc function will be necessary to
decide whether to use peer-to-peer memory and obtain the correct
provider to allocate the memory.
The new helpers are used in nvmet-rdma. Seeing we use req.transfer_len
as the length of the SGL it is set earlier and cleared on any error.
It also seems to be unnecessary to accumulate the length as the map_sgl
functions should only ever be called once per request.
Signed-off-by: Logan Gunthorpe <logang-OTvnGxWRz7hWk0Htik3J/w@public.gmane.org>
Acked-by: Sagi Grimberg <sagi-NQWnxTmZq1alnMjI0IkVqw@public.gmane.org>
Cc: Christoph Hellwig <hch-jcswGhMUV9g@public.gmane.org>
---
drivers/nvme/target/core.c | 18 ++++++++++++++++++
drivers/nvme/target/nvmet.h | 2 ++
drivers/nvme/target/rdma.c | 20 ++++++++++++--------
3 files changed, 32 insertions(+), 8 deletions(-)
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index b5ec96abd048..bddd1599b826 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -725,6 +725,24 @@ void nvmet_req_execute(struct nvmet_req *req)
}
EXPORT_SYMBOL_GPL(nvmet_req_execute);
+int nvmet_req_alloc_sgl(struct nvmet_req *req, struct nvmet_sq *sq)
+{
+ req->sg = sgl_alloc(req->transfer_len, GFP_KERNEL, &req->sg_cnt);
+ if (!req->sg)
+ return -ENOMEM;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nvmet_req_alloc_sgl);
+
+void nvmet_req_free_sgl(struct nvmet_req *req)
+{
+ sgl_free(req->sg);
+ req->sg = NULL;
+ req->sg_cnt = 0;
+}
+EXPORT_SYMBOL_GPL(nvmet_req_free_sgl);
+
static inline bool nvmet_cc_en(u32 cc)
{
return (cc >> NVME_CC_EN_SHIFT) & 0x1;
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index ec9af4ee03b6..7d6cb61021e4 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -336,6 +336,8 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
void nvmet_req_uninit(struct nvmet_req *req);
void nvmet_req_execute(struct nvmet_req *req);
void nvmet_req_complete(struct nvmet_req *req, u16 status);
+int nvmet_req_alloc_sgl(struct nvmet_req *req, struct nvmet_sq *sq);
+void nvmet_req_free_sgl(struct nvmet_req *req);
void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, u16 qid,
u16 size);
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index bfc4da660bb4..b0d0cedc74bb 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -503,7 +503,7 @@ static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp)
}
if (rsp->req.sg != rsp->cmd->inline_sg)
- sgl_free(rsp->req.sg);
+ nvmet_req_free_sgl(&rsp->req);
if (unlikely(!list_empty_careful(&queue->rsp_wr_wait_list)))
nvmet_rdma_process_wr_wait_list(queue);
@@ -652,24 +652,24 @@ static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp,
{
struct rdma_cm_id *cm_id = rsp->queue->cm_id;
u64 addr = le64_to_cpu(sgl->addr);
- u32 len = get_unaligned_le24(sgl->length);
u32 key = get_unaligned_le32(sgl->key);
int ret;
+ rsp->req.transfer_len = get_unaligned_le24(sgl->length);
+
/* no data command? */
- if (!len)
+ if (!rsp->req.transfer_len)
return 0;
- rsp->req.sg = sgl_alloc(len, GFP_KERNEL, &rsp->req.sg_cnt);
- if (!rsp->req.sg)
- return NVME_SC_INTERNAL;
+ ret = nvmet_req_alloc_sgl(&rsp->req, &rsp->queue->nvme_sq);
+ if (ret < 0)
+ goto error_out;
ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num,
rsp->req.sg, rsp->req.sg_cnt, 0, addr, key,
nvmet_data_dir(&rsp->req));
if (ret < 0)
- return NVME_SC_INTERNAL;
- rsp->req.transfer_len += len;
+ goto error_out;
rsp->n_rdma += ret;
if (invalidate) {
@@ -678,6 +678,10 @@ static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp,
}
return 0;
+
+error_out:
+ rsp->req.transfer_len = 0;
+ return NVME_SC_INTERNAL;
}
static u16 nvmet_rdma_map_sgl(struct nvmet_rdma_rsp *rsp)
--
2.19.0
^ permalink raw reply related [flat|nested] 25+ messages in thread