qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Kirti Wankhede <kwankhede@nvidia.com>
To: <alex.williamson@redhat.com>, <cjia@nvidia.com>
Cc: Zhengxiao.zx@Alibaba-inc.com, kevin.tian@intel.com,
	yi.l.liu@intel.com, yan.y.zhao@intel.com, kvm@vger.kernel.org,
	eskultet@redhat.com, ziye.yang@intel.com, qemu-devel@nongnu.org,
	cohuck@redhat.com, shuangtai.tst@alibaba-inc.com,
	dgilbert@redhat.com, zhi.a.wang@intel.com, mlevitsk@redhat.com,
	pasic@linux.ibm.com, aik@ozlabs.ru,
	Kirti Wankhede <kwankhede@nvidia.com>,
	eauger@redhat.com, felipe@nutanix.com,
	jonathan.davies@nutanix.com, changpeng.liu@intel.com,
	Ken.Xue@amd.com
Subject: [PATCH v1 1/2] Sample mtty: Add sysfs interface to pin pages
Date: Mon, 4 May 2020 23:24:19 +0530	[thread overview]
Message-ID: <1588614860-16330-2-git-send-email-kwankhede@nvidia.com> (raw)
In-Reply-To: <1588614860-16330-1-git-send-email-kwankhede@nvidia.com>

Added sysfs interface to pin pages which will be used to test migration
when vendor driver pins pages.

Read on pin_pages shows number of pages pinned:
 $ cat /sys/bus/mdev/devices/<mdev UUID>/vendor/pin_pages
Pinned 0x0 pages

Write gpfn to pin_pages to pin that page. One page pinned on write
 $ echo 0x20 > /sys/bus/mdev/devices/<mdev UUID>/vendor/pin_pages

Limitation: Buffer for 2GB system memory is created to track pinned
pages.

This is for testing purpose only.

Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
---
 samples/vfio-mdev/mtty.c | 176 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 175 insertions(+), 1 deletion(-)

diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c
index ce84a300a4da..bf666cce5bb7 100644
--- a/samples/vfio-mdev/mtty.c
+++ b/samples/vfio-mdev/mtty.c
@@ -64,6 +64,10 @@
 				(((u64)(1) << MTTY_VFIO_PCI_OFFSET_SHIFT) - 1)
 #define MAX_MTTYS	24
 
+/* Maximum pages of 4K for upto 2G RAM can be pinned */
+#define MAX_GPFN_COUNT	(512 * 1024)
+#define PFN_NULL	(~0UL)
+
 /*
  * Global Structures
  */
@@ -141,6 +145,10 @@ struct mdev_state {
 	struct mutex rxtx_lock;
 	struct vfio_device_info dev_info;
 	int nr_ports;
+
+	/* List of pinned gpfns, gpfn as index and content is translated hpfn */
+	unsigned long *gpfn_to_hpfn;
+	struct notifier_block nb;
 };
 
 static struct mutex mdev_list_lock;
@@ -745,6 +753,17 @@ static int mtty_create(struct kobject *kobj, struct mdev_device *mdev)
 		return -ENOMEM;
 	}
 
+	mdev_state->gpfn_to_hpfn =
+		 kzalloc(sizeof(unsigned long) * MAX_GPFN_COUNT, GFP_KERNEL);
+	if (mdev_state->gpfn_to_hpfn == NULL) {
+		kfree(mdev_state->vconfig);
+		kfree(mdev_state);
+		return -ENOMEM;
+	}
+
+	memset(mdev_state->gpfn_to_hpfn, ~0,
+	       sizeof(unsigned long) * MAX_GPFN_COUNT);
+
 	mutex_init(&mdev_state->ops_lock);
 	mdev_state->mdev = mdev;
 	mdev_set_drvdata(mdev, mdev_state);
@@ -769,6 +788,7 @@ static int mtty_remove(struct mdev_device *mdev)
 		if (mdev_state == mds) {
 			list_del(&mdev_state->next);
 			mdev_set_drvdata(mdev, NULL);
+			kfree(mdev_state->gpfn_to_hpfn);
 			kfree(mdev_state->vconfig);
 			kfree(mdev_state);
 			ret = 0;
@@ -1246,15 +1266,95 @@ static long mtty_ioctl(struct mdev_device *mdev, unsigned int cmd,
 	return -ENOTTY;
 }
 
+static void unpin_pages_all(struct mdev_state *mdev_state)
+{
+	struct mdev_device *mdev = mdev_state->mdev;
+	unsigned long i;
+
+	mutex_lock(&mdev_state->ops_lock);
+	for (i = 0; i < MAX_GPFN_COUNT; i++) {
+		if (mdev_state->gpfn_to_hpfn[i] != PFN_NULL) {
+			int ret;
+
+			ret = vfio_unpin_pages(mdev_dev(mdev), &i, 1);
+			if (ret <= 0) {
+				pr_err("%s: 0x%lx unpin error %d\n",
+					 __func__, i, ret);
+				continue;
+			}
+			mdev_state->gpfn_to_hpfn[i] = PFN_NULL;
+		}
+	}
+	mutex_unlock(&mdev_state->ops_lock);
+}
+
+static int unmap_notifier(struct notifier_block *nb, unsigned long action,
+			  void *data)
+{
+	if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
+		struct mdev_state *mdev_state = container_of(nb,
+							 struct mdev_state, nb);
+		struct mdev_device *mdev = mdev_state->mdev;
+		struct vfio_iommu_type1_dma_unmap *unmap = data;
+		unsigned long start = unmap->iova >> PAGE_SHIFT;
+		unsigned long end = (unmap->iova + unmap->size) >> PAGE_SHIFT;
+		unsigned long i;
+
+		mutex_lock(&mdev_state->ops_lock);
+		for (i = start; i < end; i++) {
+			if (mdev_state->gpfn_to_hpfn[i] != PFN_NULL) {
+				int ret;
+
+				ret = vfio_unpin_pages(mdev_dev(mdev), &i, 1);
+				if (ret <= 0) {
+					pr_err("%s: 0x%lx unpin error %d\n",
+							__func__, i, ret);
+					continue;
+				}
+				mdev_state->gpfn_to_hpfn[i] = PFN_NULL;
+			}
+		}
+		mutex_unlock(&mdev_state->ops_lock);
+
+	}
+	return 0;
+}
+
 static int mtty_open(struct mdev_device *mdev)
 {
+	unsigned long events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
+	struct mdev_state *mdev_state;
+	int ret;
+
 	pr_info("%s\n", __func__);
-	return 0;
+
+	if (!mdev)
+		return -EINVAL;
+
+	mdev_state = mdev_get_drvdata(mdev);
+	if (!mdev_state)
+		return -ENODEV;
+
+	mdev_state->nb.notifier_call = unmap_notifier;
+
+	ret = vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY, &events,
+				     &mdev_state->nb);
+	return ret;
 }
 
 static void mtty_close(struct mdev_device *mdev)
 {
+	struct mdev_state *mdev_state;
+
 	pr_info("%s\n", __func__);
+
+	mdev_state = mdev_get_drvdata(mdev);
+	if (!mdev_state)
+		return;
+
+	unpin_pages_all(mdev_state);
+	vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
+				 &mdev_state->nb);
 }
 
 static ssize_t
@@ -1293,8 +1393,82 @@ sample_mdev_dev_show(struct device *dev, struct device_attribute *attr,
 
 static DEVICE_ATTR_RO(sample_mdev_dev);
 
+static ssize_t
+pin_pages_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct mdev_device *mdev = mdev_from_dev(dev);
+	struct mdev_state *mdev_state;
+	int i, count = 0;
+
+	if (!mdev)
+		return -EINVAL;
+
+	mdev_state = mdev_get_drvdata(mdev);
+	if (!mdev_state)
+		return -EINVAL;
+
+	mutex_lock(&mdev_state->ops_lock);
+	for (i = 0; i < MAX_GPFN_COUNT; i++) {
+		if (mdev_state->gpfn_to_hpfn[i] != PFN_NULL)
+			count++;
+	}
+	mutex_unlock(&mdev_state->ops_lock);
+	return sprintf(buf, "Pinned 0x%x pages\n", count);
+}
+
+static ssize_t
+pin_pages_store(struct device *dev, struct device_attribute *attr,
+		const char *buf, size_t count)
+{
+	struct mdev_device *mdev = mdev_from_dev(dev);
+	struct mdev_state *mdev_state;
+	unsigned long gpfn, hpfn;
+	int ret;
+
+	if (!mdev)
+		return -EINVAL;
+
+	mdev_state = mdev_get_drvdata(mdev);
+	if (!mdev_state)
+		return -EINVAL;
+
+	ret = kstrtoul(buf, 0, &gpfn);
+	if (ret)
+		return ret;
+
+	if (gpfn >= MAX_GPFN_COUNT) {
+		pr_err("Error 0x%lx > 0x%lx\n",
+		       gpfn, (unsigned long)MAX_GPFN_COUNT);
+		return -EINVAL;
+	}
+
+	mutex_lock(&mdev_state->ops_lock);
+
+	if (mdev_state->gpfn_to_hpfn[gpfn] != PFN_NULL) {
+		ret = -EEXIST;
+		goto out;
+	}
+
+	ret = vfio_pin_pages(mdev_dev(mdev), &gpfn, 1,
+			     IOMMU_READ | IOMMU_WRITE, &hpfn);
+
+	if (ret <= 0) {
+		pr_err("Failed to pin, ret %d\n", ret);
+		goto out;
+	}
+
+	mdev_state->gpfn_to_hpfn[gpfn] = hpfn;
+	ret = count;
+out:
+	mutex_unlock(&mdev_state->ops_lock);
+	return ret;
+}
+
+static DEVICE_ATTR_RW(pin_pages);
+
 static struct attribute *mdev_dev_attrs[] = {
 	&dev_attr_sample_mdev_dev.attr,
+	&dev_attr_pin_pages.attr,
 	NULL,
 };
 
-- 
2.7.0



  reply	other threads:[~2020-05-04 18:29 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-05-04 17:54 [PATCH v1 0/2] Sample mtty: Add migration support Kirti Wankhede
2020-05-04 17:54 ` Kirti Wankhede [this message]
2020-05-04 17:54 ` [PATCH v1 2/2] Sample mtty: Add migration capability to mtty module Kirti Wankhede
2020-05-07  1:01   ` Yan Zhao
2020-05-07  5:49     ` Kirti Wankhede
2020-05-08  1:24       ` Yan Zhao
2020-05-07 18:43   ` Dr. David Alan Gilbert
2020-05-29 18:58   ` Alex Williamson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1588614860-16330-2-git-send-email-kwankhede@nvidia.com \
    --to=kwankhede@nvidia.com \
    --cc=Ken.Xue@amd.com \
    --cc=Zhengxiao.zx@Alibaba-inc.com \
    --cc=aik@ozlabs.ru \
    --cc=alex.williamson@redhat.com \
    --cc=changpeng.liu@intel.com \
    --cc=cjia@nvidia.com \
    --cc=cohuck@redhat.com \
    --cc=dgilbert@redhat.com \
    --cc=eauger@redhat.com \
    --cc=eskultet@redhat.com \
    --cc=felipe@nutanix.com \
    --cc=jonathan.davies@nutanix.com \
    --cc=kevin.tian@intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=mlevitsk@redhat.com \
    --cc=pasic@linux.ibm.com \
    --cc=qemu-devel@nongnu.org \
    --cc=shuangtai.tst@alibaba-inc.com \
    --cc=yan.y.zhao@intel.com \
    --cc=yi.l.liu@intel.com \
    --cc=zhi.a.wang@intel.com \
    --cc=ziye.yang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).