[PATCH 4.14 17/29] xen/gntdev: Avoid blocking in unmap_grant_pages()

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org,
	Demi Marie Obenour <demi@invisiblethingslab.com>,
	Juergen Gross <jgross@suse.com>
Subject: [PATCH 4.14 17/29] xen/gntdev: Avoid blocking in unmap_grant_pages()
Date: Tue,  5 Jul 2022 13:58:05 +0200	[thread overview]
Message-ID: <20220705115606.853154191@linuxfoundation.org> (raw)
In-Reply-To: <20220705115606.333669144@linuxfoundation.org>

From: Demi Marie Obenour <demi@invisiblethingslab.com>

commit dbe97cff7dd9f0f75c524afdd55ad46be3d15295 upstream.

unmap_grant_pages() currently waits for the pages to no longer be used.
In https://github.com/QubesOS/qubes-issues/issues/7481, this lead to a
deadlock against i915: i915 was waiting for gntdev's MMU notifier to
finish, while gntdev was waiting for i915 to free its pages.  I also
believe this is responsible for various deadlocks I have experienced in
the past.

Avoid these problems by making unmap_grant_pages async.  This requires
making it return void, as any errors will not be available when the
function returns.  Fortunately, the only use of the return value is a
WARN_ON(), which can be replaced by a WARN_ON when the error is
detected.  Additionally, a failed call will not prevent further calls
from being made, but this is harmless.

Because unmap_grant_pages is now async, the grant handle will be sent to
INVALID_GRANT_HANDLE too late to prevent multiple unmaps of the same
handle.  Instead, a separate bool array is allocated for this purpose.
This wastes memory, but stuffing this information in padding bytes is
too fragile.  Furthermore, it is necessary to grab a reference to the
map before making the asynchronous call, and release the reference when
the call returns.

It is also necessary to guard against reentrancy in gntdev_map_put(),
and to handle the case where userspace tries to map a mapping whose
contents have not all been freed yet.

Fixes: 745282256c75 ("xen/gntdev: safely unmap grants in case they are still in use")
Cc: stable@vger.kernel.org
Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20220622022726.2538-1-demi@invisiblethingslab.com
Signed-off-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/xen/gntdev.c |  145 ++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 103 insertions(+), 42 deletions(-)

--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -59,6 +59,7 @@ MODULE_PARM_DESC(limit, "Maximum number
 
 static atomic_t pages_mapped = ATOMIC_INIT(0);
 
+/* True in PV mode, false otherwise */
 static int use_ptemod;
 #define populate_freeable_maps use_ptemod
 
@@ -94,11 +95,16 @@ struct grant_map {
 	struct gnttab_unmap_grant_ref *unmap_ops;
 	struct gnttab_map_grant_ref   *kmap_ops;
 	struct gnttab_unmap_grant_ref *kunmap_ops;
+	bool *being_removed;
 	struct page **pages;
 	unsigned long pages_vm_start;
+	/* Number of live grants */
+	atomic_t live_grants;
+	/* Needed to avoid allocation in unmap_grant_pages */
+	struct gntab_unmap_queue_data unmap_data;
 };
 
-static int unmap_grant_pages(struct grant_map *map, int offset, int pages);
+static void unmap_grant_pages(struct grant_map *map, int offset, int pages);
 
 /* ------------------------------------------------------------------ */
 
@@ -129,6 +135,7 @@ static void gntdev_free_map(struct grant
 	kfree(map->unmap_ops);
 	kfree(map->kmap_ops);
 	kfree(map->kunmap_ops);
+	kfree(map->being_removed);
 	kfree(map);
 }
 
@@ -147,12 +154,15 @@ static struct grant_map *gntdev_alloc_ma
 	add->kmap_ops  = kcalloc(count, sizeof(add->kmap_ops[0]), GFP_KERNEL);
 	add->kunmap_ops = kcalloc(count, sizeof(add->kunmap_ops[0]), GFP_KERNEL);
 	add->pages     = kcalloc(count, sizeof(add->pages[0]), GFP_KERNEL);
+	add->being_removed =
+		kcalloc(count, sizeof(add->being_removed[0]), GFP_KERNEL);
 	if (NULL == add->grants    ||
 	    NULL == add->map_ops   ||
 	    NULL == add->unmap_ops ||
 	    NULL == add->kmap_ops  ||
 	    NULL == add->kunmap_ops ||
-	    NULL == add->pages)
+	    NULL == add->pages     ||
+	    NULL == add->being_removed)
 		goto err;
 
 	if (gnttab_alloc_pages(count, add->pages))
@@ -217,6 +227,35 @@ static void gntdev_put_map(struct gntdev
 		return;
 
 	atomic_sub(map->count, &pages_mapped);
+	if (map->pages && !use_ptemod) {
+		/*
+		 * Increment the reference count.  This ensures that the
+		 * subsequent call to unmap_grant_pages() will not wind up
+		 * re-entering itself.  It *can* wind up calling
+		 * gntdev_put_map() recursively, but such calls will be with a
+		 * reference count greater than 1, so they will return before
+		 * this code is reached.  The recursion depth is thus limited to
+		 * 1.  Do NOT use refcount_inc() here, as it will detect that
+		 * the reference count is zero and WARN().
+		 */
+		refcount_set(&map->users, 1);
+
+		/*
+		 * Unmap the grants.  This may or may not be asynchronous, so it
+		 * is possible that the reference count is 1 on return, but it
+		 * could also be greater than 1.
+		 */
+		unmap_grant_pages(map, 0, map->count);
+
+		/* Check if the memory now needs to be freed */
+		if (!refcount_dec_and_test(&map->users))
+			return;
+
+		/*
+		 * All pages have been returned to the hypervisor, so free the
+		 * map.
+		 */
+	}
 
 	if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
 		notify_remote_via_evtchn(map->notify.event);
@@ -274,6 +313,7 @@ static int set_grant_ptes_as_special(pte
 
 static int map_grant_pages(struct grant_map *map)
 {
+	size_t alloced = 0;
 	int i, err = 0;
 
 	if (!use_ptemod) {
@@ -322,85 +362,107 @@ static int map_grant_pages(struct grant_
 			map->pages, map->count);
 
 	for (i = 0; i < map->count; i++) {
-		if (map->map_ops[i].status == GNTST_okay)
+		if (map->map_ops[i].status == GNTST_okay) {
 			map->unmap_ops[i].handle = map->map_ops[i].handle;
-		else if (!err)
+			if (!use_ptemod)
+				alloced++;
+		} else if (!err)
 			err = -EINVAL;
 
 		if (map->flags & GNTMAP_device_map)
 			map->unmap_ops[i].dev_bus_addr = map->map_ops[i].dev_bus_addr;
 
 		if (use_ptemod) {
-			if (map->kmap_ops[i].status == GNTST_okay)
+			if (map->kmap_ops[i].status == GNTST_okay) {
+				if (map->map_ops[i].status == GNTST_okay)
+					alloced++;
 				map->kunmap_ops[i].handle = map->kmap_ops[i].handle;
-			else if (!err)
+			} else if (!err)
 				err = -EINVAL;
 		}
 	}
+	atomic_add(alloced, &map->live_grants);
 	return err;
 }
 
-static int __unmap_grant_pages(struct grant_map *map, int offset, int pages)
+static void __unmap_grant_pages_done(int result,
+		struct gntab_unmap_queue_data *data)
 {
-	int i, err = 0;
-	struct gntab_unmap_queue_data unmap_data;
+	unsigned int i;
+	struct grant_map *map = data->data;
+	unsigned int offset = data->unmap_ops - map->unmap_ops;
+
+	for (i = 0; i < data->count; i++) {
+		WARN_ON(map->unmap_ops[offset+i].status);
+		pr_debug("unmap handle=%d st=%d\n",
+			map->unmap_ops[offset+i].handle,
+			map->unmap_ops[offset+i].status);
+		map->unmap_ops[offset+i].handle = -1;
+	}
+	/*
+	 * Decrease the live-grant counter.  This must happen after the loop to
+	 * prevent premature reuse of the grants by gnttab_mmap().
+	 */
+	atomic_sub(data->count, &map->live_grants);
+
+	/* Release reference taken by unmap_grant_pages */
+	gntdev_put_map(NULL, map);
+}
 
+static void __unmap_grant_pages(struct grant_map *map, int offset, int pages)
+{
 	if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
 		int pgno = (map->notify.addr >> PAGE_SHIFT);
+
 		if (pgno >= offset && pgno < offset + pages) {
 			/* No need for kmap, pages are in lowmem */
 			uint8_t *tmp = pfn_to_kaddr(page_to_pfn(map->pages[pgno]));
+
 			tmp[map->notify.addr & (PAGE_SIZE-1)] = 0;
 			map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
 		}
 	}
 
-	unmap_data.unmap_ops = map->unmap_ops + offset;
-	unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL;
-	unmap_data.pages = map->pages + offset;
-	unmap_data.count = pages;
+	map->unmap_data.unmap_ops = map->unmap_ops + offset;
+	map->unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL;
+	map->unmap_data.pages = map->pages + offset;
+	map->unmap_data.count = pages;
+	map->unmap_data.done = __unmap_grant_pages_done;
+	map->unmap_data.data = map;
+	refcount_inc(&map->users); /* to keep map alive during async call below */
 
-	err = gnttab_unmap_refs_sync(&unmap_data);
-	if (err)
-		return err;
-
-	for (i = 0; i < pages; i++) {
-		if (map->unmap_ops[offset+i].status)
-			err = -EINVAL;
-		pr_debug("unmap handle=%d st=%d\n",
-			map->unmap_ops[offset+i].handle,
-			map->unmap_ops[offset+i].status);
-		map->unmap_ops[offset+i].handle = -1;
-	}
-	return err;
+	gnttab_unmap_refs_async(&map->unmap_data);
 }
 
-static int unmap_grant_pages(struct grant_map *map, int offset, int pages)
+static void unmap_grant_pages(struct grant_map *map, int offset, int pages)
 {
-	int range, err = 0;
+	int range;
+
+	if (atomic_read(&map->live_grants) == 0)
+		return; /* Nothing to do */
 
 	pr_debug("unmap %d+%d [%d+%d]\n", map->index, map->count, offset, pages);
 
 	/* It is possible the requested range will have a "hole" where we
 	 * already unmapped some of the grants. Only unmap valid ranges.
 	 */
-	while (pages && !err) {
-		while (pages && map->unmap_ops[offset].handle == -1) {
+	while (pages) {
+		while (pages && map->being_removed[offset]) {
 			offset++;
 			pages--;
 		}
 		range = 0;
 		while (range < pages) {
-			if (map->unmap_ops[offset+range].handle == -1)
+			if (map->being_removed[offset + range])
 				break;
+			map->being_removed[offset + range] = true;
 			range++;
 		}
-		err = __unmap_grant_pages(map, offset, range);
+		if (range)
+			__unmap_grant_pages(map, offset, range);
 		offset += range;
 		pages -= range;
 	}
-
-	return err;
 }
 
 /* ------------------------------------------------------------------ */
@@ -456,7 +518,6 @@ static void unmap_if_in_range(struct gra
 			      unsigned long start, unsigned long end)
 {
 	unsigned long mstart, mend;
-	int err;
 
 	if (!map->vma)
 		return;
@@ -470,10 +531,9 @@ static void unmap_if_in_range(struct gra
 			map->index, map->count,
 			map->vma->vm_start, map->vma->vm_end,
 			start, end, mstart, mend);
-	err = unmap_grant_pages(map,
+	unmap_grant_pages(map,
 				(mstart - map->vma->vm_start) >> PAGE_SHIFT,
 				(mend - mstart) >> PAGE_SHIFT);
-	WARN_ON(err);
 }
 
 static void mn_invl_range_start(struct mmu_notifier *mn,
@@ -498,7 +558,6 @@ static void mn_release(struct mmu_notifi
 {
 	struct gntdev_priv *priv = container_of(mn, struct gntdev_priv, mn);
 	struct grant_map *map;
-	int err;
 
 	mutex_lock(&priv->lock);
 	list_for_each_entry(map, &priv->maps, next) {
@@ -507,8 +566,7 @@ static void mn_release(struct mmu_notifi
 		pr_debug("map %d+%d (%lx %lx)\n",
 				map->index, map->count,
 				map->vma->vm_start, map->vma->vm_end);
-		err = unmap_grant_pages(map, /* offset */ 0, map->count);
-		WARN_ON(err);
+		unmap_grant_pages(map, /* offset */ 0, map->count);
 	}
 	list_for_each_entry(map, &priv->freeable_maps, next) {
 		if (!map->vma)
@@ -516,8 +574,7 @@ static void mn_release(struct mmu_notifi
 		pr_debug("map %d+%d (%lx %lx)\n",
 				map->index, map->count,
 				map->vma->vm_start, map->vma->vm_end);
-		err = unmap_grant_pages(map, /* offset */ 0, map->count);
-		WARN_ON(err);
+		unmap_grant_pages(map, /* offset */ 0, map->count);
 	}
 	mutex_unlock(&priv->lock);
 }
@@ -1006,6 +1063,10 @@ static int gntdev_mmap(struct file *flip
 		goto unlock_out;
 	}
 
+	if (atomic_read(&map->live_grants)) {
+		err = -EAGAIN;
+		goto unlock_out;
+	}
 	refcount_inc(&map->users);
 
 	vma->vm_ops = &gntdev_vmops;

next prev parent reply	other threads:[~2022-07-05 12:02 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-07-05 11:57 [PATCH 4.14 00/29] 4.14.287-rc1 review Greg Kroah-Hartman
2022-07-05 11:57 ` [PATCH 4.14 01/29] nvdimm: Fix badblocks clear off-by-one error Greg Kroah-Hartman
2022-07-05 11:57 ` [PATCH 4.14 02/29] dm raid: fix accesses beyond end of raid member array Greg Kroah-Hartman
2022-07-05 11:57 ` [PATCH 4.14 03/29] dm raid: fix KASAN warning in raid5_add_disks Greg Kroah-Hartman
2022-07-05 11:57 ` [PATCH 4.14 04/29] s390/archrandom: simplify back to earlier design and initialize earlier Greg Kroah-Hartman
2022-07-05 11:57 ` [PATCH 4.14 05/29] SUNRPC: Fix READ_PLUS crasher Greg Kroah-Hartman
2022-07-05 11:57 ` [PATCH 4.14 06/29] net: rose: fix UAF bugs caused by timer handler Greg Kroah-Hartman
2022-07-05 11:57 ` [PATCH 4.14 07/29] net: usb: ax88179_178a: Fix packet receiving Greg Kroah-Hartman
2022-07-05 11:57 ` [PATCH 4.14 08/29] RDMA/qedr: Fix reporting QP timeout attribute Greg Kroah-Hartman
2022-07-05 11:57 ` [PATCH 4.14 09/29] usbnet: fix memory allocation in helpers Greg Kroah-Hartman
2022-07-05 11:57 ` [PATCH 4.14 10/29] net: ipv6: unexport __init-annotated seg6_hmac_net_init() Greg Kroah-Hartman
2022-07-05 11:57 ` [PATCH 4.14 11/29] caif_virtio: fix race between virtio_device_ready() and ndo_open() Greg Kroah-Hartman
2022-07-05 11:58 ` [PATCH 4.14 12/29] netfilter: nft_dynset: restore set element counter when failing to update Greg Kroah-Hartman
2022-07-05 11:58 ` [PATCH 4.14 13/29] net: bonding: fix possible NULL deref in rlb code Greg Kroah-Hartman
2022-07-05 11:58 ` [PATCH 4.14 14/29] net: bonding: fix use-after-free after 802.3ad slave unbind Greg Kroah-Hartman
2022-07-05 11:58 ` [PATCH 4.14 15/29] nfc: nfcmrvl: Fix irq_of_parse_and_map() return value Greg Kroah-Hartman
2022-07-05 11:58 ` [PATCH 4.14 16/29] NFC: nxp-nci: Dont issue a zero length i2c_master_read() Greg Kroah-Hartman
2022-07-05 11:58 ` Greg Kroah-Hartman [this message]
2022-07-05 11:58 ` [PATCH 4.14 18/29] hwmon: (ibmaem) dont call platform_device_del() if platform_device_add() fails Greg Kroah-Hartman
2022-07-05 11:58 ` [PATCH 4.14 19/29] net: dsa: bcm_sf2: force pause link settings Greg Kroah-Hartman
2022-07-05 11:58 ` [PATCH 4.14 20/29] sit: use min Greg Kroah-Hartman
2022-07-05 11:58 ` [PATCH 4.14 21/29] ipv6/sit: fix ipip6_tunnel_get_prl return value Greg Kroah-Hartman
2022-07-05 11:58 ` [PATCH 4.14 22/29] net: Rename and export copy_skb_header Greg Kroah-Hartman
2022-07-05 11:58 ` [PATCH 4.14 23/29] xen/blkfront: fix leaking data in shared pages Greg Kroah-Hartman
2022-07-05 11:58 ` [PATCH 4.14 24/29] xen/netfront: " Greg Kroah-Hartman
2022-07-05 11:58 ` [PATCH 4.14 25/29] xen/netfront: force data bouncing when backend is untrusted Greg Kroah-Hartman
2022-07-05 11:58 ` [PATCH 4.14 26/29] xen/blkfront: " Greg Kroah-Hartman
2022-07-05 11:58 ` [PATCH 4.14 27/29] xen/arm: Fix race in RB-tree based P2M accounting Greg Kroah-Hartman
2022-07-05 11:58 ` [PATCH 4.14 28/29] net: usb: qmi_wwan: add Telit 0x1060 composition Greg Kroah-Hartman
2022-07-05 11:58 ` [PATCH 4.14 29/29] net: usb: qmi_wwan: add Telit 0x1070 composition Greg Kroah-Hartman
2022-07-06  7:22 ` [PATCH 4.14 00/29] 4.14.287-rc1 review Naresh Kamboju
2022-07-06 13:42 ` Guenter Roeck

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220705115606.853154191@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=demi@invisiblethingslab.com \
    --cc=jgross@suse.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox