All of lore.kernel.org
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org,
	Demi Marie Obenour <demi@invisiblethingslab.com>,
	Juergen Gross <jgross@suse.com>
Subject: [PATCH 4.9 14/29] xen/gntdev: Avoid blocking in unmap_grant_pages()
Date: Tue,  5 Jul 2022 13:57:55 +0200	[thread overview]
Message-ID: <20220705115606.168668005@linuxfoundation.org> (raw)
In-Reply-To: <20220705115605.742248854@linuxfoundation.org>

From: Demi Marie Obenour <demi@invisiblethingslab.com>

commit dbe97cff7dd9f0f75c524afdd55ad46be3d15295 upstream.

unmap_grant_pages() currently waits for the pages to no longer be used.
In https://github.com/QubesOS/qubes-issues/issues/7481, this lead to a
deadlock against i915: i915 was waiting for gntdev's MMU notifier to
finish, while gntdev was waiting for i915 to free its pages.  I also
believe this is responsible for various deadlocks I have experienced in
the past.

Avoid these problems by making unmap_grant_pages async.  This requires
making it return void, as any errors will not be available when the
function returns.  Fortunately, the only use of the return value is a
WARN_ON(), which can be replaced by a WARN_ON when the error is
detected.  Additionally, a failed call will not prevent further calls
from being made, but this is harmless.

Because unmap_grant_pages is now async, the grant handle will be sent to
INVALID_GRANT_HANDLE too late to prevent multiple unmaps of the same
handle.  Instead, a separate bool array is allocated for this purpose.
This wastes memory, but stuffing this information in padding bytes is
too fragile.  Furthermore, it is necessary to grab a reference to the
map before making the asynchronous call, and release the reference when
the call returns.

It is also necessary to guard against reentrancy in gntdev_map_put(),
and to handle the case where userspace tries to map a mapping whose
contents have not all been freed yet.

Fixes: 745282256c75 ("xen/gntdev: safely unmap grants in case they are still in use")
Cc: stable@vger.kernel.org
Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20220622022726.2538-1-demi@invisiblethingslab.com
Signed-off-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/xen/gntdev.c |  144 ++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 102 insertions(+), 42 deletions(-)

--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -57,6 +57,7 @@ MODULE_PARM_DESC(limit, "Maximum number
 
 static atomic_t pages_mapped = ATOMIC_INIT(0);
 
+/* True in PV mode, false otherwise */
 static int use_ptemod;
 #define populate_freeable_maps use_ptemod
 
@@ -92,11 +93,16 @@ struct grant_map {
 	struct gnttab_unmap_grant_ref *unmap_ops;
 	struct gnttab_map_grant_ref   *kmap_ops;
 	struct gnttab_unmap_grant_ref *kunmap_ops;
+	bool *being_removed;
 	struct page **pages;
 	unsigned long pages_vm_start;
+	/* Number of live grants */
+	atomic_t live_grants;
+	/* Needed to avoid allocation in unmap_grant_pages */
+	struct gntab_unmap_queue_data unmap_data;
 };
 
-static int unmap_grant_pages(struct grant_map *map, int offset, int pages);
+static void unmap_grant_pages(struct grant_map *map, int offset, int pages);
 
 /* ------------------------------------------------------------------ */
 
@@ -127,6 +133,7 @@ static void gntdev_free_map(struct grant
 	kfree(map->unmap_ops);
 	kfree(map->kmap_ops);
 	kfree(map->kunmap_ops);
+	kfree(map->being_removed);
 	kfree(map);
 }
 
@@ -145,12 +152,15 @@ static struct grant_map *gntdev_alloc_ma
 	add->kmap_ops  = kcalloc(count, sizeof(add->kmap_ops[0]), GFP_KERNEL);
 	add->kunmap_ops = kcalloc(count, sizeof(add->kunmap_ops[0]), GFP_KERNEL);
 	add->pages     = kcalloc(count, sizeof(add->pages[0]), GFP_KERNEL);
+	add->being_removed =
+		kcalloc(count, sizeof(add->being_removed[0]), GFP_KERNEL);
 	if (NULL == add->grants    ||
 	    NULL == add->map_ops   ||
 	    NULL == add->unmap_ops ||
 	    NULL == add->kmap_ops  ||
 	    NULL == add->kunmap_ops ||
-	    NULL == add->pages)
+	    NULL == add->pages     ||
+	    NULL == add->being_removed)
 		goto err;
 
 	if (gnttab_alloc_pages(count, add->pages))
@@ -215,6 +225,34 @@ static void gntdev_put_map(struct gntdev
 		return;
 
 	atomic_sub(map->count, &pages_mapped);
+	if (map->pages && !use_ptemod) {
+		/*
+		 * Increment the reference count.  This ensures that the
+		 * subsequent call to unmap_grant_pages() will not wind up
+		 * re-entering itself.  It *can* wind up calling
+		 * gntdev_put_map() recursively, but such calls will be with a
+		 * reference count greater than 1, so they will return before
+		 * this code is reached.  The recursion depth is thus limited to
+		 * 1.
+		 */
+		atomic_set(&map->users, 1);
+
+		/*
+		 * Unmap the grants.  This may or may not be asynchronous, so it
+		 * is possible that the reference count is 1 on return, but it
+		 * could also be greater than 1.
+		 */
+		unmap_grant_pages(map, 0, map->count);
+
+		/* Check if the memory now needs to be freed */
+		if (!atomic_dec_and_test(&map->users))
+			return;
+
+		/*
+		 * All pages have been returned to the hypervisor, so free the
+		 * map.
+		 */
+	}
 
 	if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
 		notify_remote_via_evtchn(map->notify.event);
@@ -272,6 +310,7 @@ static int set_grant_ptes_as_special(pte
 
 static int map_grant_pages(struct grant_map *map)
 {
+	size_t alloced = 0;
 	int i, err = 0;
 
 	if (!use_ptemod) {
@@ -320,85 +359,107 @@ static int map_grant_pages(struct grant_
 			map->pages, map->count);
 
 	for (i = 0; i < map->count; i++) {
-		if (map->map_ops[i].status == GNTST_okay)
+		if (map->map_ops[i].status == GNTST_okay) {
 			map->unmap_ops[i].handle = map->map_ops[i].handle;
-		else if (!err)
+			if (!use_ptemod)
+				alloced++;
+		} else if (!err)
 			err = -EINVAL;
 
 		if (map->flags & GNTMAP_device_map)
 			map->unmap_ops[i].dev_bus_addr = map->map_ops[i].dev_bus_addr;
 
 		if (use_ptemod) {
-			if (map->kmap_ops[i].status == GNTST_okay)
+			if (map->kmap_ops[i].status == GNTST_okay) {
+				if (map->map_ops[i].status == GNTST_okay)
+					alloced++;
 				map->kunmap_ops[i].handle = map->kmap_ops[i].handle;
-			else if (!err)
+			} else if (!err)
 				err = -EINVAL;
 		}
 	}
+	atomic_add(alloced, &map->live_grants);
 	return err;
 }
 
-static int __unmap_grant_pages(struct grant_map *map, int offset, int pages)
+static void __unmap_grant_pages_done(int result,
+		struct gntab_unmap_queue_data *data)
 {
-	int i, err = 0;
-	struct gntab_unmap_queue_data unmap_data;
+	unsigned int i;
+	struct grant_map *map = data->data;
+	unsigned int offset = data->unmap_ops - map->unmap_ops;
+
+	for (i = 0; i < data->count; i++) {
+		WARN_ON(map->unmap_ops[offset+i].status);
+		pr_debug("unmap handle=%d st=%d\n",
+			map->unmap_ops[offset+i].handle,
+			map->unmap_ops[offset+i].status);
+		map->unmap_ops[offset+i].handle = -1;
+	}
+	/*
+	 * Decrease the live-grant counter.  This must happen after the loop to
+	 * prevent premature reuse of the grants by gnttab_mmap().
+	 */
+	atomic_sub(data->count, &map->live_grants);
+
+	/* Release reference taken by unmap_grant_pages */
+	gntdev_put_map(NULL, map);
+}
 
+static void __unmap_grant_pages(struct grant_map *map, int offset, int pages)
+{
 	if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
 		int pgno = (map->notify.addr >> PAGE_SHIFT);
+
 		if (pgno >= offset && pgno < offset + pages) {
 			/* No need for kmap, pages are in lowmem */
 			uint8_t *tmp = pfn_to_kaddr(page_to_pfn(map->pages[pgno]));
+
 			tmp[map->notify.addr & (PAGE_SIZE-1)] = 0;
 			map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
 		}
 	}
 
-	unmap_data.unmap_ops = map->unmap_ops + offset;
-	unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL;
-	unmap_data.pages = map->pages + offset;
-	unmap_data.count = pages;
+	map->unmap_data.unmap_ops = map->unmap_ops + offset;
+	map->unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL;
+	map->unmap_data.pages = map->pages + offset;
+	map->unmap_data.count = pages;
+	map->unmap_data.done = __unmap_grant_pages_done;
+	map->unmap_data.data = map;
+	atomic_inc(&map->users); /* to keep map alive during async call below */
 
-	err = gnttab_unmap_refs_sync(&unmap_data);
-	if (err)
-		return err;
-
-	for (i = 0; i < pages; i++) {
-		if (map->unmap_ops[offset+i].status)
-			err = -EINVAL;
-		pr_debug("unmap handle=%d st=%d\n",
-			map->unmap_ops[offset+i].handle,
-			map->unmap_ops[offset+i].status);
-		map->unmap_ops[offset+i].handle = -1;
-	}
-	return err;
+	gnttab_unmap_refs_async(&map->unmap_data);
 }
 
-static int unmap_grant_pages(struct grant_map *map, int offset, int pages)
+static void unmap_grant_pages(struct grant_map *map, int offset, int pages)
 {
-	int range, err = 0;
+	int range;
+
+	if (atomic_read(&map->live_grants) == 0)
+		return; /* Nothing to do */
 
 	pr_debug("unmap %d+%d [%d+%d]\n", map->index, map->count, offset, pages);
 
 	/* It is possible the requested range will have a "hole" where we
 	 * already unmapped some of the grants. Only unmap valid ranges.
 	 */
-	while (pages && !err) {
-		while (pages && map->unmap_ops[offset].handle == -1) {
+	while (pages) {
+		while (pages && map->being_removed[offset]) {
 			offset++;
 			pages--;
 		}
 		range = 0;
 		while (range < pages) {
-			if (map->unmap_ops[offset+range].handle == -1)
+			if (map->being_removed[offset + range])
 				break;
+			map->being_removed[offset + range] = true;
 			range++;
 		}
-		err = __unmap_grant_pages(map, offset, range);
+		if (range)
+			__unmap_grant_pages(map, offset, range);
 		offset += range;
 		pages -= range;
 	}
-
-	return err;
 }
 
 /* ------------------------------------------------------------------ */
@@ -454,7 +515,6 @@ static void unmap_if_in_range(struct gra
 			      unsigned long start, unsigned long end)
 {
 	unsigned long mstart, mend;
-	int err;
 
 	if (!map->vma)
 		return;
@@ -468,10 +528,9 @@ static void unmap_if_in_range(struct gra
 			map->index, map->count,
 			map->vma->vm_start, map->vma->vm_end,
 			start, end, mstart, mend);
-	err = unmap_grant_pages(map,
+	unmap_grant_pages(map,
 				(mstart - map->vma->vm_start) >> PAGE_SHIFT,
 				(mend - mstart) >> PAGE_SHIFT);
-	WARN_ON(err);
 }
 
 static void mn_invl_range_start(struct mmu_notifier *mn,
@@ -503,7 +562,6 @@ static void mn_release(struct mmu_notifi
 {
 	struct gntdev_priv *priv = container_of(mn, struct gntdev_priv, mn);
 	struct grant_map *map;
-	int err;
 
 	mutex_lock(&priv->lock);
 	list_for_each_entry(map, &priv->maps, next) {
@@ -512,8 +570,7 @@ static void mn_release(struct mmu_notifi
 		pr_debug("map %d+%d (%lx %lx)\n",
 				map->index, map->count,
 				map->vma->vm_start, map->vma->vm_end);
-		err = unmap_grant_pages(map, /* offset */ 0, map->count);
-		WARN_ON(err);
+		unmap_grant_pages(map, /* offset */ 0, map->count);
 	}
 	list_for_each_entry(map, &priv->freeable_maps, next) {
 		if (!map->vma)
@@ -521,8 +578,7 @@ static void mn_release(struct mmu_notifi
 		pr_debug("map %d+%d (%lx %lx)\n",
 				map->index, map->count,
 				map->vma->vm_start, map->vma->vm_end);
-		err = unmap_grant_pages(map, /* offset */ 0, map->count);
-		WARN_ON(err);
+		unmap_grant_pages(map, /* offset */ 0, map->count);
 	}
 	mutex_unlock(&priv->lock);
 }
@@ -1012,6 +1068,10 @@ static int gntdev_mmap(struct file *flip
 		goto unlock_out;
 	}
 
+	if (atomic_read(&map->live_grants)) {
+		err = -EAGAIN;
+		goto unlock_out;
+	}
 	atomic_inc(&map->users);
 
 	vma->vm_ops = &gntdev_vmops;



  parent reply	other threads:[~2022-07-05 11:59 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-07-05 11:57 [PATCH 4.9 00/29] 4.9.322-rc1 review Greg Kroah-Hartman
2022-07-05 11:57 ` [PATCH 4.9 01/29] dm raid: fix KASAN warning in raid5_add_disks Greg Kroah-Hartman
2022-07-05 11:57 ` [PATCH 4.9 02/29] SUNRPC: Fix READ_PLUS crasher Greg Kroah-Hartman
2022-07-05 11:57 ` [PATCH 4.9 03/29] net: rose: fix UAF bugs caused by timer handler Greg Kroah-Hartman
2022-07-05 11:57 ` [PATCH 4.9 04/29] net: usb: ax88179_178a: Fix packet receiving Greg Kroah-Hartman
2022-07-05 11:57 ` [PATCH 4.9 05/29] usbnet: make sure no NULL pointer is passed through Greg Kroah-Hartman
2022-07-05 20:36   ` Pavel Machek
2022-07-06  6:36     ` Greg Kroah-Hartman
2022-07-05 11:57 ` [PATCH 4.9 06/29] usbnet: fix memory allocation in helpers Greg Kroah-Hartman
2022-07-05 11:57 ` [PATCH 4.9 07/29] powerpc/powernv: wire up rng during setup_arch Greg Kroah-Hartman
2022-07-05 11:57 ` [PATCH 4.9 08/29] caif_virtio: fix race between virtio_device_ready() and ndo_open() Greg Kroah-Hartman
2022-07-05 11:57 ` [PATCH 4.9 09/29] netfilter: nft_dynset: restore set element counter when failing to update Greg Kroah-Hartman
2022-07-05 11:57 ` [PATCH 4.9 10/29] net: bonding: fix possible NULL deref in rlb code Greg Kroah-Hartman
2022-07-05 11:57 ` [PATCH 4.9 11/29] net: bonding: fix use-after-free after 802.3ad slave unbind Greg Kroah-Hartman
2022-07-05 11:57 ` [PATCH 4.9 12/29] nfc: nfcmrvl: Fix irq_of_parse_and_map() return value Greg Kroah-Hartman
2022-07-05 11:57 ` [PATCH 4.9 13/29] NFC: nxp-nci: Dont issue a zero length i2c_master_read() Greg Kroah-Hartman
2022-07-05 11:57 ` Greg Kroah-Hartman [this message]
2022-07-05 11:57 ` [PATCH 4.9 15/29] hwmon: (ibmaem) dont call platform_device_del() if platform_device_add() fails Greg Kroah-Hartman
2022-07-05 11:57 ` [PATCH 4.9 16/29] net: dsa: bcm_sf2: force pause link settings Greg Kroah-Hartman
2022-07-05 15:34   ` Florian Fainelli
2022-07-05 16:15     ` Greg Kroah-Hartman
2022-07-05 11:57 ` [PATCH 4.9 17/29] sit: use min Greg Kroah-Hartman
2022-07-05 11:57 ` [PATCH 4.9 18/29] ipv6/sit: fix ipip6_tunnel_get_prl return value Greg Kroah-Hartman
2022-07-05 11:58 ` [PATCH 4.9 19/29] net: Rename and export copy_skb_header Greg Kroah-Hartman
2022-07-05 11:58 ` [PATCH 4.9 20/29] xen/blkfront: fix leaking data in shared pages Greg Kroah-Hartman
2022-07-05 11:58 ` [PATCH 4.9 21/29] xen/netfront: " Greg Kroah-Hartman
2022-07-05 11:58 ` [PATCH 4.9 22/29] xen/netfront: force data bouncing when backend is untrusted Greg Kroah-Hartman
2022-07-05 11:58 ` [PATCH 4.9 23/29] xen/blkfront: " Greg Kroah-Hartman
2022-07-05 11:58 ` [PATCH 4.9 24/29] xen/arm: Fix race in RB-tree based P2M accounting Greg Kroah-Hartman
2022-07-05 11:58 ` [PATCH 4.9 25/29] qmi_wwan: Added support for Telit LN940 series Greg Kroah-Hartman
2022-07-05 11:58 ` [PATCH 4.9 26/29] net: usb: qmi_wwan: add Telit 0x1260 and 0x1261 compositions Greg Kroah-Hartman
2022-07-05 11:58 ` [PATCH 4.9 27/29] net: usb: qmi_wwan: add Telit LE910Cx 0x1230 composition Greg Kroah-Hartman
2022-07-05 11:58 ` [PATCH 4.9 28/29] net: usb: qmi_wwan: add Telit 0x1060 composition Greg Kroah-Hartman
2022-07-05 11:58 ` [PATCH 4.9 29/29] net: usb: qmi_wwan: add Telit 0x1070 composition Greg Kroah-Hartman
2022-07-05 14:35 ` [PATCH 4.9 00/29] 4.9.322-rc1 review Jon Hunter
2022-07-05 16:53 ` Florian Fainelli
2022-07-06  7:27 ` Naresh Kamboju
2022-07-06 13:41 ` Guenter Roeck
2022-07-07  0:06 ` Shuah Khan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220705115606.168668005@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=demi@invisiblethingslab.com \
    --cc=jgross@suse.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.