linux-pci.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Logan Gunthorpe <logang@deltatee.com>
To: Christoph Hellwig <hch@lst.de>, Sagi Grimberg <sagi@grimberg.me>,
	"James E.J. Bottomley" <jejb@linux.vnet.ibm.com>,
	"Martin K. Petersen" <martin.petersen@oracle.com>,
	Jens Axboe <axboe@kernel.dk>,
	Steve Wise <swise@opengridcomputing.com>,
	Stephen Bates <sbates@raithlin.com>,
	Max Gurtovoy <maxg@mellanox.com>,
	Dan Williams <dan.j.williams@intel.com>,
	Keith Busch <keith.busch@intel.com>,
	Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Cc: linux-pci@vger.kernel.org, linux-scsi@vger.kernel.org,
	linux-nvme@lists.infradead.org, linux-rdma@vger.kernel.org,
	linux-nvdimm@lists.01.org, linux-kernel@vger.kernel.org,
	Logan Gunthorpe <logang@deltatee.com>
Subject: [RFC 7/8] p2pmem: Support device removal
Date: Thu, 30 Mar 2017 16:12:38 -0600	[thread overview]
Message-ID: <1490911959-5146-8-git-send-email-logang@deltatee.com> (raw)
In-Reply-To: <1490911959-5146-1-git-send-email-logang@deltatee.com>

This patch creates a list of callbacks to notify users of this memory
that the p2pmem device is going away or gone.

In nvmet-rdma, we disconnect any queue using p2p memory.
The remote side will then automatically reconnect in a
couple seconds and regular system memory (or a different p2pmem device)
will be used.

Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Stephen Bates <sbates@raithlin.com>
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
---
 drivers/memory/p2pmem.c    | 75 ++++++++++++++++++++++++++++++++---
 drivers/nvme/target/rdma.c | 98 ++++++++++++++++++++++++++--------------------
 include/linux/p2pmem.h     | 19 +++++++--
 3 files changed, 140 insertions(+), 52 deletions(-)

diff --git a/drivers/memory/p2pmem.c b/drivers/memory/p2pmem.c
index 71741c2..499d42c 100644
--- a/drivers/memory/p2pmem.c
+++ b/drivers/memory/p2pmem.c
@@ -105,6 +105,21 @@ static void p2pmem_release(struct device *dev)
 	kfree(p);
 }
 
+struct remove_callback {
+	struct list_head list;
+	void (*callback)(void *context);
+	void *context;
+};
+
+static void p2pmem_remove(struct p2pmem_dev *p)
+{
+	struct remove_callback *remove_call, *tmp;
+
+	p->alive = false;
+	list_for_each_entry_safe(remove_call, tmp, &p->remove_list, list)
+		remove_call->callback(remove_call->context);
+}
+
 /**
  * p2pmem_create() - create a new p2pmem device
  * @parent: the parent device to create it under
@@ -123,6 +138,10 @@ struct p2pmem_dev *p2pmem_create(struct device *parent)
 		return ERR_PTR(-ENOMEM);
 
 	init_completion(&p->cmp);
+	mutex_init(&p->remove_mutex);
+	INIT_LIST_HEAD(&p->remove_list);
+	p->alive = true;
+
 	device_initialize(&p->dev);
 	p->dev.class = p2pmem_class;
 	p->dev.parent = parent;
@@ -187,6 +206,7 @@ void p2pmem_unregister(struct p2pmem_dev *p)
 
 	dev_info(&p->dev, "unregistered");
 	device_del(&p->dev);
+	p2pmem_remove(p);
 	ida_simple_remove(&p2pmem_ida, p->id);
 	put_device(&p->dev);
 }
@@ -291,6 +311,9 @@ EXPORT_SYMBOL(p2pmem_add_pci_region);
  */
 void *p2pmem_alloc(struct p2pmem_dev *p, size_t size)
 {
+	if (!p->alive)
+		return NULL;
+
 	return (void *)gen_pool_alloc(p->pool, size);
 }
 EXPORT_SYMBOL(p2pmem_alloc);
@@ -349,6 +372,9 @@ static int upstream_bridges_match(struct device *p2pmem,
 	struct pci_dev *p2p_up;
 	struct pci_dev *dma_up;
 
+	if (!to_p2pmem(p2pmem)->alive)
+		return false;
+
 	p2p_up = get_upstream_switch_port(p2pmem);
 	if (!p2p_up) {
 		dev_warn(p2pmem, "p2pmem is not behind a pci switch");
@@ -383,6 +409,8 @@ static int upstream_bridges_match(struct device *p2pmem,
  *	specified devices
  * @dma_devices: a null terminated array of device pointers which
  *	all must be compatible with the returned p2pmem device
+ * @remove_callback: this callback will be called if the p2pmem
+ *	device is removed.
  *
  * For now, we only support cases where all the devices that
  * will transfer to the p2pmem device are on the same switch.
@@ -400,9 +428,13 @@ static int upstream_bridges_match(struct device *p2pmem,
  * (use p2pmem_put to return the reference) or NULL if no compatible
  * p2pmem device is found.
  */
-struct p2pmem_dev *p2pmem_find_compat(struct device **dma_devices)
+struct p2pmem_dev *p2pmem_find_compat(struct device **dma_devices,
+				      void (*remove_callback)(void *context),
+				      void *context)
 {
 	struct device *dev;
+	struct p2pmem_dev *p;
+	struct remove_callback *remove_call;
 
 	dev = class_find_device(p2pmem_class, NULL, dma_devices,
 				upstream_bridges_match);
@@ -410,21 +442,54 @@ struct p2pmem_dev *p2pmem_find_compat(struct device **dma_devices)
 	if (!dev)
 		return NULL;
 
-	return to_p2pmem(dev);
+	p = to_p2pmem(dev);
+	mutex_lock(&p->remove_mutex);
+
+	if (!p->alive) {
+		p = NULL;
+		goto out;
+	}
+
+	remove_call = kzalloc(sizeof(*remove_call), GFP_KERNEL);
+	remove_call->callback = remove_callback;
+	remove_call->context = context;
+	INIT_LIST_HEAD(&remove_call->list);
+	list_add(&remove_call->list, &p->remove_list);
+
+out:
+	mutex_unlock(&p->remove_mutex);
+	return p;
 }
 EXPORT_SYMBOL(p2pmem_find_compat);
 
 /**
  * p2pmem_put() - decrement a p2pmem device reference
  * @p: p2pmem device to return
+ * @data: data pointer that was passed to p2pmem_find_compat
  *
  * Dereference and free (if last) the device's reference counter.
  * It's safe to pass a NULL pointer to this function.
  */
-void p2pmem_put(struct p2pmem_dev *p)
+void p2pmem_put(struct p2pmem_dev *p, void *context)
 {
-	if (p)
-		put_device(&p->dev);
+	struct remove_callback *remove_call;
+
+	if (!p)
+		return;
+
+	mutex_lock(&p->remove_mutex);
+
+	list_for_each_entry(remove_call, &p->remove_list, list) {
+		if (remove_call->context != context)
+			continue;
+
+		list_del(&remove_call->list);
+		kfree(remove_call);
+		break;
+	}
+
+	mutex_unlock(&p->remove_mutex);
+	put_device(&p->dev);
 }
 EXPORT_SYMBOL(p2pmem_put);
 
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index abab544..9ebcda6 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -1008,7 +1008,7 @@ static void nvmet_rdma_free_queue(struct nvmet_rdma_queue *queue)
 				!queue->host_qid);
 	}
 	nvmet_rdma_free_rsps(queue);
-	p2pmem_put(queue->p2pmem);
+	p2pmem_put(queue->p2pmem, queue);
 	ida_simple_remove(&nvmet_rdma_queue_ida, queue->idx);
 	kfree(queue);
 }
@@ -1204,6 +1204,58 @@ static int nvmet_rdma_cm_accept(struct rdma_cm_id *cm_id,
 	return ret;
 }
 
+static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue)
+{
+	bool disconnect = false;
+	unsigned long flags;
+
+	pr_debug("cm_id= %p queue->state= %d\n", queue->cm_id, queue->state);
+
+	spin_lock_irqsave(&queue->state_lock, flags);
+	switch (queue->state) {
+	case NVMET_RDMA_Q_CONNECTING:
+	case NVMET_RDMA_Q_LIVE:
+		queue->state = NVMET_RDMA_Q_DISCONNECTING;
+	case NVMET_RDMA_IN_DEVICE_REMOVAL:
+		disconnect = true;
+		break;
+	case NVMET_RDMA_Q_DISCONNECTING:
+		break;
+	}
+	spin_unlock_irqrestore(&queue->state_lock, flags);
+
+	if (disconnect) {
+		rdma_disconnect(queue->cm_id);
+		schedule_work(&queue->release_work);
+	}
+}
+
+static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue)
+{
+	bool disconnect = false;
+
+	mutex_lock(&nvmet_rdma_queue_mutex);
+	if (!list_empty(&queue->queue_list)) {
+		list_del_init(&queue->queue_list);
+		disconnect = true;
+	}
+	mutex_unlock(&nvmet_rdma_queue_mutex);
+
+	if (disconnect)
+		__nvmet_rdma_queue_disconnect(queue);
+}
+
+static void nvmet_rdma_p2pmem_remove(void *context)
+{
+	struct nvmet_rdma_queue *queue = context;
+
+	if (!queue->p2pmem)
+		return;
+
+	nvmet_rdma_queue_disconnect(queue);
+	flush_scheduled_work();
+}
+
 /*
  * If allow_p2pmem is set, we will try to use P2P memory for our
  * sgl lists. This requires the p2pmem device to be compatible with
@@ -1241,7 +1293,8 @@ static void nvmet_rdma_queue_setup_p2pmem(struct nvmet_rdma_queue *queue)
 
 	dma_devs[i++] = NULL;
 
-	queue->p2pmem = p2pmem_find_compat(dma_devs);
+	queue->p2pmem = p2pmem_find_compat(dma_devs, nvmet_rdma_p2pmem_remove,
+					   queue);
 
 	if (queue->p2pmem)
 		pr_debug("using %s for rdma nvme target queue",
@@ -1317,47 +1370,6 @@ static void nvmet_rdma_queue_established(struct nvmet_rdma_queue *queue)
 	spin_unlock_irqrestore(&queue->state_lock, flags);
 }
 
-static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue)
-{
-	bool disconnect = false;
-	unsigned long flags;
-
-	pr_debug("cm_id= %p queue->state= %d\n", queue->cm_id, queue->state);
-
-	spin_lock_irqsave(&queue->state_lock, flags);
-	switch (queue->state) {
-	case NVMET_RDMA_Q_CONNECTING:
-	case NVMET_RDMA_Q_LIVE:
-		queue->state = NVMET_RDMA_Q_DISCONNECTING;
-	case NVMET_RDMA_IN_DEVICE_REMOVAL:
-		disconnect = true;
-		break;
-	case NVMET_RDMA_Q_DISCONNECTING:
-		break;
-	}
-	spin_unlock_irqrestore(&queue->state_lock, flags);
-
-	if (disconnect) {
-		rdma_disconnect(queue->cm_id);
-		schedule_work(&queue->release_work);
-	}
-}
-
-static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue)
-{
-	bool disconnect = false;
-
-	mutex_lock(&nvmet_rdma_queue_mutex);
-	if (!list_empty(&queue->queue_list)) {
-		list_del_init(&queue->queue_list);
-		disconnect = true;
-	}
-	mutex_unlock(&nvmet_rdma_queue_mutex);
-
-	if (disconnect)
-		__nvmet_rdma_queue_disconnect(queue);
-}
-
 static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id,
 		struct nvmet_rdma_queue *queue)
 {
diff --git a/include/linux/p2pmem.h b/include/linux/p2pmem.h
index 4cd6f35..9365b02 100644
--- a/include/linux/p2pmem.h
+++ b/include/linux/p2pmem.h
@@ -22,12 +22,16 @@
 struct p2pmem_dev {
 	struct device dev;
 	int id;
+	bool alive;
 
 	struct percpu_ref ref;
 	struct completion cmp;
 	struct gen_pool *pool;
 
 	struct dentry *debugfs_root;
+
+	struct mutex remove_mutex;	/* protects the remove callback list */
+	struct list_head remove_list;
 };
 
 #ifdef CONFIG_P2PMEM
@@ -41,8 +45,12 @@ int p2pmem_add_pci_region(struct p2pmem_dev *p, struct pci_dev *pdev, int bar);
 void *p2pmem_alloc(struct p2pmem_dev *p, size_t size);
 void p2pmem_free(struct p2pmem_dev *p, void *addr, size_t size);
 
-struct p2pmem_dev *p2pmem_find_compat(struct device **dma_devices);
-void p2pmem_put(struct p2pmem_dev *p);
+struct p2pmem_dev *
+p2pmem_find_compat(struct device **dma_devices,
+		   void (*unregister_callback)(void *context),
+		   void *context);
+
+void p2pmem_put(struct p2pmem_dev *p, void *context);
 
 #else
 
@@ -76,12 +84,15 @@ static inline void p2pmem_free(struct p2pmem_dev *p, void *addr, size_t size)
 {
 }
 
-static inline struct p2pmem_dev *p2pmem_find_compat(struct device **dma_devs)
+static inline struct p2pmem_dev *
+p2pmem_find_compat(struct device **dma_devices,
+		   void (*unregister_callback)(void *context),
+		   void *context)
 {
 	return NULL;
 }
 
-static inline void p2pmem_put(struct p2pmem_dev *p)
+static inline void p2pmem_put(struct p2pmem_dev *p, void *context)
 {
 }
 
-- 
2.1.4

  parent reply	other threads:[~2017-03-30 22:12 UTC|newest]

Thread overview: 81+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-03-30 22:12 [RFC 0/8] Copy Offload with Peer-to-Peer PCI Memory Logan Gunthorpe
2017-03-30 22:12 ` [RFC 1/8] Introduce Peer-to-Peer memory (p2pmem) device Logan Gunthorpe
2017-03-31 18:49   ` Sinan Kaya
2017-03-31 21:23     ` Logan Gunthorpe
2017-03-31 21:38       ` Sinan Kaya
2017-03-31 22:42         ` Logan Gunthorpe
2017-03-31 23:51           ` Sinan Kaya
2017-04-01  1:57             ` Logan Gunthorpe
2017-04-01  2:17               ` okaya
2017-04-01 22:16                 ` Logan Gunthorpe
2017-04-02  2:26                   ` Sinan Kaya
2017-04-02 17:21                     ` Logan Gunthorpe
2017-04-02 21:03                       ` Sinan Kaya
2017-04-03  4:26                         ` Logan Gunthorpe
2017-04-25 11:58                           ` Marta Rybczynska
2017-04-25 16:58                             ` Logan Gunthorpe
2017-03-30 22:12 ` [RFC 2/8] cxgb4: setup pcie memory window 4 and create p2pmem region Logan Gunthorpe
2017-04-04 10:42   ` Sagi Grimberg
2017-04-04 15:56     ` Logan Gunthorpe
2017-04-05 15:41     ` Steve Wise
2017-03-30 22:12 ` [RFC 3/8] nvmet: Use p2pmem in nvme target Logan Gunthorpe
2017-04-04 10:40   ` Sagi Grimberg
2017-04-04 16:16     ` Logan Gunthorpe
2017-04-06  5:47       ` Sagi Grimberg
2017-04-06 15:52         ` Logan Gunthorpe
2017-03-30 22:12 ` [RFC 4/8] p2pmem: Add debugfs "stats" file Logan Gunthorpe
2017-04-04 10:46   ` Sagi Grimberg
2017-04-04 17:25     ` Logan Gunthorpe
2017-04-05 15:43     ` Steve Wise
2017-03-30 22:12 ` [RFC 5/8] scatterlist: Modify SG copy functions to support io memory Logan Gunthorpe
2017-03-31  7:09   ` Christoph Hellwig
2017-03-31 15:41     ` Logan Gunthorpe
2017-04-03 21:20       ` Logan Gunthorpe
2017-04-03 21:44         ` Dan Williams
2017-04-03 22:10           ` Logan Gunthorpe
2017-04-03 22:47             ` Dan Williams
2017-04-03 23:12               ` Logan Gunthorpe
2017-04-04  0:07                 ` Dan Williams
2017-04-07 17:59                   ` Logan Gunthorpe
2017-03-30 22:12 ` [RFC 6/8] nvmet: Be careful about using iomem accesses when dealing with p2pmem Logan Gunthorpe
2017-04-04 10:59   ` Sagi Grimberg
2017-04-04 15:46     ` Jason Gunthorpe
2017-04-04 17:21       ` Logan Gunthorpe
2017-04-06  5:33       ` Sagi Grimberg
2017-04-06 16:02         ` Logan Gunthorpe
2017-04-06 16:35         ` Jason Gunthorpe
2017-04-07 11:19         ` Stephen  Bates
2017-04-10  8:29           ` Sagi Grimberg
2017-04-10 16:03             ` Logan Gunthorpe
2017-03-30 22:12 ` Logan Gunthorpe [this message]
2017-03-30 22:12 ` [RFC 8/8] p2pmem: Added char device user interface Logan Gunthorpe
2017-04-12  5:22 ` [RFC 0/8] Copy Offload with Peer-to-Peer PCI Memory Benjamin Herrenschmidt
2017-04-12 17:09   ` Logan Gunthorpe
2017-04-12 21:55     ` Benjamin Herrenschmidt
2017-04-13 21:22       ` Logan Gunthorpe
2017-04-13 22:37         ` Benjamin Herrenschmidt
2017-04-13 23:26         ` Bjorn Helgaas
2017-04-14  4:16           ` Jason Gunthorpe
2017-04-14  4:40             ` Logan Gunthorpe
2017-04-14 11:37               ` Benjamin Herrenschmidt
2017-04-14 11:39                 ` Benjamin Herrenschmidt
2017-04-14 11:37             ` Benjamin Herrenschmidt
2017-04-14 17:30               ` Logan Gunthorpe
2017-04-14 19:04                 ` Bjorn Helgaas
2017-04-14 22:07                   ` Benjamin Herrenschmidt
2017-04-15 17:41                     ` Logan Gunthorpe
2017-04-15 22:09                       ` Dan Williams
2017-04-16  3:01                         ` Benjamin Herrenschmidt
2017-04-16  4:46                           ` Logan Gunthorpe
2017-04-16 15:53                           ` Dan Williams
2017-04-16 16:34                             ` Logan Gunthorpe
2017-04-16 22:31                               ` Benjamin Herrenschmidt
2017-04-24  7:36                                 ` Knut Omang
2017-04-24 16:14                                   ` Logan Gunthorpe
2017-04-25  6:30                                     ` Knut Omang
2017-04-25 17:03                                       ` Logan Gunthorpe
2017-04-25 21:23                                         ` Stephen  Bates
2017-04-25 21:23                                   ` Stephen  Bates
2017-04-16 22:26                             ` Benjamin Herrenschmidt
2017-04-15 22:17                       ` Benjamin Herrenschmidt
2017-04-16  5:36                         ` Logan Gunthorpe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1490911959-5146-8-git-send-email-logang@deltatee.com \
    --to=logang@deltatee.com \
    --cc=axboe@kernel.dk \
    --cc=dan.j.williams@intel.com \
    --cc=hch@lst.de \
    --cc=jejb@linux.vnet.ibm.com \
    --cc=jgunthorpe@obsidianresearch.com \
    --cc=keith.busch@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-nvdimm@lists.01.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=linux-pci@vger.kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=linux-scsi@vger.kernel.org \
    --cc=martin.petersen@oracle.com \
    --cc=maxg@mellanox.com \
    --cc=sagi@grimberg.me \
    --cc=sbates@raithlin.com \
    --cc=swise@opengridcomputing.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).