public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
From: Mahmoud Adam <mngyadam@amazon.de>
To: <kvm@vger.kernel.org>
Cc: <alex.williamson@redhat.com>, <jgg@ziepe.ca>, <kbusch@kernel.org>,
	<benh@kernel.crashing.org>, David Woodhouse <dwmw@amazon.co.uk>,
	<pravkmr@amazon.de>, <nagy@khwaternagy.com>,
	<linux-kernel@vger.kernel.org>
Subject: [RFC PATCH 3/7] vfio/pci: add RCU locking for regions access
Date: Wed, 24 Sep 2025 16:09:54 +0200	[thread overview]
Message-ID: <20250924141018.80202-4-mngyadam@amazon.de> (raw)
In-Reply-To: <20250924141018.80202-1-mngyadam@amazon.de>

Since we could request to add more regions after initialization. We
would need locking to avoid racing with readers and cause UAF. use RCU
for read-write synchronization. And region_lock mutex is used to
synchronize the write section.

Changing the value of num_regions is done under the mutex. Since the
num_regions can only increase, using READ_ONCE and WRITE_ONCE should
be enough to make sure we have a valid value. On the write section,
synchronize_rcu() is run before incrementing num_regions. Doing that
makes sure read sections are passed before increasing num_regions to
avoid causing out-of-bound access.

Signed-off-by: Mahmoud Adam <mngyadam@amazon.de>
---
 drivers/vfio/pci/vfio_pci_core.c | 59 +++++++++++++++++++++++---------
 drivers/vfio/pci/vfio_pci_igd.c  | 16 ++++++---
 include/linux/vfio_pci_core.h    |  1 +
 3 files changed, 55 insertions(+), 21 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 6629490c0e46f..78e18bfd973e5 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -882,7 +882,8 @@ static int msix_mmappable_cap(struct vfio_pci_core_device *vdev,
 }
 
 /*
- * Registers a new region to vfio_pci_core_device.
+ * Registers a new region to vfio_pci_core_device. region_lock should
+ * be held when multiple registers could happen.
  * Returns region index on success or a negative errno.
  */
 int vfio_pci_core_register_dev_region(struct vfio_pci_core_device *vdev,
@@ -890,15 +891,20 @@ int vfio_pci_core_register_dev_region(struct vfio_pci_core_device *vdev,
 				      const struct vfio_pci_regops *ops,
 				      size_t size, u32 flags, void *data)
 {
-	int num_regions = vdev->num_regions;
 	struct vfio_pci_region *region, *old_region;
+	int num_regions;
+
+	mutex_lock(&vdev->region_lock);
+	num_regions = READ_ONCE(vdev->num_regions);
 
 	region = kmalloc((num_regions + 1) * sizeof(*region),
 			 GFP_KERNEL_ACCOUNT);
 	if (!region)
 		return -ENOMEM;
 
-	old_region = vdev->region;
+	old_region =
+		rcu_dereference_protected(vdev->region,
+					  lockdep_is_held(&vdev->region_lock));
 	if (old_region)
 		memcpy(region, old_region, num_regions * sizeof(*region));
 
@@ -909,8 +915,10 @@ int vfio_pci_core_register_dev_region(struct vfio_pci_core_device *vdev,
 	region[num_regions].flags = flags;
 	region[num_regions].data = data;
 
-	vdev->region = region;
-	vdev->num_regions++;
+	rcu_assign_pointer(vdev->region, region);
+	synchronize_rcu();
+	WRITE_ONCE(vdev->num_regions, READ_ONCE(vdev->num_regions) + 1);
+	mutex_unlock(&vdev->region_lock);
 	kfree(old_region);
 	return num_regions;
 }
@@ -968,7 +976,7 @@ static int vfio_pci_ioctl_get_info(struct vfio_pci_core_device *vdev,
 	if (vdev->reset_works)
 		info.flags |= VFIO_DEVICE_FLAGS_RESET;
 
-	info.num_regions = VFIO_PCI_NUM_REGIONS + vdev->num_regions;
+	info.num_regions = VFIO_PCI_NUM_REGIONS + READ_ONCE(vdev->num_regions);
 	info.num_irqs = VFIO_PCI_NUM_IRQS;
 
 	ret = vfio_pci_info_zdev_add_caps(vdev, &caps);
@@ -1094,13 +1102,16 @@ static int vfio_pci_ioctl_get_region_info(struct vfio_pci_core_device *vdev,
 			.header.version = 1
 		};
 
-		if (info.index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions)
+		if (info.index >= VFIO_PCI_NUM_REGIONS +
+					  READ_ONCE(vdev->num_regions))
 			return -EINVAL;
-		info.index = array_index_nospec(
-			info.index, VFIO_PCI_NUM_REGIONS + vdev->num_regions);
+		info.index = array_index_nospec(info.index,
+						VFIO_PCI_NUM_REGIONS +
+						READ_ONCE(vdev->num_regions));
 
 		i = info.index - VFIO_PCI_NUM_REGIONS;
-		region = &vdev->region[i];
+		rcu_read_lock();
+		region = &rcu_dereference(vdev->region)[i];
 
 		info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
 		info.size = region->size;
@@ -1111,15 +1122,20 @@ static int vfio_pci_ioctl_get_region_info(struct vfio_pci_core_device *vdev,
 
 		ret = vfio_info_add_capability(&caps, &cap_type.header,
 					       sizeof(cap_type));
-		if (ret)
+		if (ret) {
+			rcu_read_unlock();
 			return ret;
+		}
 
 		if (region->ops->add_capability) {
 			ret = region->ops->add_capability(
 				vdev, region, &caps);
-			if (ret)
+			if (ret) {
+				rcu_read_unlock();
 				return ret;
+			}
 		}
+		rcu_read_unlock();
 	}
 	}
 
@@ -1536,7 +1552,7 @@ static ssize_t vfio_pci_rw(struct vfio_pci_core_device *vdev, char __user *buf,
 	unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
 	int ret;
 
-	if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions)
+	if (index >= VFIO_PCI_NUM_REGIONS + READ_ONCE(vdev->num_regions))
 		return -EINVAL;
 
 	ret = pm_runtime_resume_and_get(&vdev->pdev->dev);
@@ -1568,8 +1584,11 @@ static ssize_t vfio_pci_rw(struct vfio_pci_core_device *vdev, char __user *buf,
 
 	default:
 		index -= VFIO_PCI_NUM_REGIONS;
-		ret = vdev->region[index].ops->rw(vdev, buf,
-						   count, ppos, iswrite);
+		rcu_read_lock();
+		ret = rcu_dereference(vdev->region)[index].ops->rw(vdev, buf,
+								   count, ppos,
+								   iswrite);
+		rcu_read_unlock();
 		break;
 	}
 
@@ -1726,7 +1745,7 @@ int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma
 
 	index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
 
-	if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions)
+	if (index >= VFIO_PCI_NUM_REGIONS + READ_ONCE(vdev->num_regions))
 		return -EINVAL;
 	if (vma->vm_end < vma->vm_start)
 		return -EINVAL;
@@ -1734,12 +1753,16 @@ int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma
 		return -EINVAL;
 	if (index >= VFIO_PCI_NUM_REGIONS) {
 		int regnum = index - VFIO_PCI_NUM_REGIONS;
-		struct vfio_pci_region *region = vdev->region + regnum;
+		struct vfio_pci_region *region;
+
+		rcu_read_lock();
+		region = rcu_dereference(vdev->region) + regnum;
 
 		ret = -EINVAL;
 		if (region->ops && region->ops->mmap &&
 		    (region->flags & VFIO_REGION_INFO_FLAG_MMAP))
 			ret = region->ops->mmap(vdev, region, vma);
+		rcu_read_unlock();
 		return ret;
 	}
 	if (index >= VFIO_PCI_ROM_REGION_INDEX)
@@ -2107,6 +2130,7 @@ int vfio_pci_core_init_dev(struct vfio_device *core_vdev)
 	INIT_LIST_HEAD(&vdev->sriov_pfs_item);
 	init_rwsem(&vdev->memory_lock);
 	xa_init(&vdev->ctx);
+	mutex_init(&vdev->region_lock);
 
 	return 0;
 }
@@ -2119,6 +2143,7 @@ void vfio_pci_core_release_dev(struct vfio_device *core_vdev)
 
 	mutex_destroy(&vdev->igate);
 	mutex_destroy(&vdev->ioeventfds_lock);
+	mutex_destroy(&vdev->region_lock);
 	kfree(vdev->region);
 	kfree(vdev->pm_save);
 }
diff --git a/drivers/vfio/pci/vfio_pci_igd.c b/drivers/vfio/pci/vfio_pci_igd.c
index 93ddef48e4e4c..1f7e9e82ac08c 100644
--- a/drivers/vfio/pci/vfio_pci_igd.c
+++ b/drivers/vfio/pci/vfio_pci_igd.c
@@ -71,13 +71,17 @@ static ssize_t vfio_pci_igd_rw(struct vfio_pci_core_device *vdev,
 	struct vfio_pci_region *region;
 	struct igd_opregion_vbt *opregionvbt;
 
-	region = &vdev->region[i];
+	rcu_read_lock();
+	region = &rcu_dereference(vdev->region)[i];
 	opregionvbt = region->data;
 
-	if (pos >= region->size || iswrite)
+	if (pos >= region->size || iswrite) {
+		rcu_read_unlock();
 		return -EINVAL;
+	}
 
 	count = min_t(size_t, count, region->size - pos);
+	rcu_read_unlock();
 	remaining = count;
 
 	/* Copy until OpRegion version */
@@ -293,13 +297,17 @@ static ssize_t vfio_pci_igd_cfg_rw(struct vfio_pci_core_device *vdev,
 	struct vfio_pci_region *region;
 	struct pci_dev *pdev;
 
-	region = &vdev->region[i];
+	rcu_read_lock();
+	region = &rcu_dereference(vdev->region)[i];
 	pdev = region->data;
 
-	if (pos >= region->size || iswrite)
+	if (pos >= region->size || iswrite) {
+		rcu_read_unlock();
 		return -EINVAL;
+	}
 
 	size = count = min(count, (size_t)(region->size - pos));
+	rcu_read_unlock();
 
 	if ((pos & 1) && size) {
 		u8 val;
diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h
index f541044e42a2a..e106e58f297e9 100644
--- a/include/linux/vfio_pci_core.h
+++ b/include/linux/vfio_pci_core.h
@@ -63,6 +63,7 @@ struct vfio_pci_core_device {
 	int			irq_type;
 	int			num_regions;
 	struct vfio_pci_region	*region;
+	struct mutex		region_lock;
 	u8			msi_qmax;
 	u8			msix_bar;
 	u16			msix_size;
-- 
2.47.3




Amazon Web Services Development Center Germany GmbH
Tamara-Danz-Str. 13
10243 Berlin
Geschaeftsfuehrung: Christian Schlaeger
Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B
Sitz: Berlin
Ust-ID: DE 365 538 597


  parent reply	other threads:[~2025-09-24 14:12 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-09-24 14:09 [RFC PATCH 0/7] vfio: Add alias region uapi for device feature Mahmoud Adam
2025-09-24 14:09 ` [RFC PATCH 1/7] vfio/pci: refactor region dereferences for RCU Mahmoud Adam
2025-09-24 14:09 ` [RFC PATCH 2/7] vfio_pci_core: split krealloc to allow use RCU & return index Mahmoud Adam
2025-09-24 14:09 ` Mahmoud Adam [this message]
2025-09-24 16:15   ` [RFC PATCH 3/7] vfio/pci: add RCU locking for regions access Mahmoud Nagy Adam
2025-09-24 14:09 ` [RFC PATCH 4/7] vfio: add FEATURE_ALIAS_REGION uapi Mahmoud Adam
2025-09-24 14:09 ` [RFC PATCH 5/7] vfio_pci_core: allow regions with no release op Mahmoud Adam
2025-09-24 14:09 ` [RFC PATCH 6/7] vfio-pci: add alias_region mmap ops Mahmoud Adam
2025-09-24 14:09 ` [RFC PATCH 7/7] vfio-pci-core: implement FEATURE_ALIAS_REGION uapi Mahmoud Adam
2025-10-03 21:58 ` [RFC PATCH 0/7] vfio: Add alias region uapi for device feature David Matlack
2025-10-05 10:16   ` Mahmoud Nagy Adam
2025-10-15 19:36 ` Alex Williamson
2025-10-27 16:32 ` David Matlack
2025-10-27 18:19   ` Mahmoud Nagy Adam

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250924141018.80202-4-mngyadam@amazon.de \
    --to=mngyadam@amazon.de \
    --cc=alex.williamson@redhat.com \
    --cc=benh@kernel.crashing.org \
    --cc=dwmw@amazon.co.uk \
    --cc=jgg@ziepe.ca \
    --cc=kbusch@kernel.org \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=nagy@khwaternagy.com \
    --cc=pravkmr@amazon.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox