From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
stable@vger.kernel.org, Steve Wise <swise@opengridcomputing.com>,
Doug Ledford <dledford@redhat.com>
Subject: [PATCH 4.9 42/66] iw_cxgb4: free EQ queue memory on last deref
Date: Tue, 31 Jan 2017 06:36:46 +0100 [thread overview]
Message-ID: <20170131053604.939907468@linuxfoundation.org> (raw)
In-Reply-To: <20170131053603.098140622@linuxfoundation.org>
4.9-stable review patch. If anyone has any objections, please let me know.
------------------
From: Steve Wise <swise@opengridcomputing.com>
commit c12a67fec8d99bb554e8d4e99120d418f1a39c87 upstream.
Commit ad61a4c7a9b7 ("iw_cxgb4: don't block in destroy_qp awaiting
the last deref") introduced a bug where the RDMA QP EQ queue memory
(and QIDs) are possibly freed before the underlying connection has been
fully shutdown. The result being a possible DMA read issued by HW after
the queue memory has been unmapped and freed. This results in possible
WR corruption in the worst case, system bus errors if an IOMMU is in use,
and SGE "bad WR" errors reported in the very least. The fix is to defer
unmap/free of queue memory and QID resources until the QP struct has
been fully dereferenced. To do this, the c4iw_ucontext must also be kept
around until the last QP that references it is fully freed. In addition,
since the last QP deref can happen in an IRQ disabled context, we need
a new workqueue thread to do the final unmap/free of the EQ queue memory.
Fixes: ad61a4c7a9b7 ("iw_cxgb4: don't block in destroy_qp awaiting the last deref")
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
drivers/infiniband/hw/cxgb4/device.c | 9 ++++++++
drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 18 ++++++++++++++++
drivers/infiniband/hw/cxgb4/provider.c | 20 +++++++++++++++---
drivers/infiniband/hw/cxgb4/qp.c | 35 ++++++++++++++++++++++++---------
4 files changed, 69 insertions(+), 13 deletions(-)
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -848,9 +848,17 @@ static int c4iw_rdev_open(struct c4iw_rd
}
}
+ rdev->free_workq = create_singlethread_workqueue("iw_cxgb4_free");
+ if (!rdev->free_workq) {
+ err = -ENOMEM;
+ goto err_free_status_page;
+ }
+
rdev->status_page->db_off = 0;
return 0;
+err_free_status_page:
+ free_page((unsigned long)rdev->status_page);
destroy_ocqp_pool:
c4iw_ocqp_pool_destroy(rdev);
destroy_rqtpool:
@@ -864,6 +872,7 @@ destroy_resource:
static void c4iw_rdev_close(struct c4iw_rdev *rdev)
{
+ destroy_workqueue(rdev->free_workq);
kfree(rdev->wr_log);
free_page((unsigned long)rdev->status_page);
c4iw_pblpool_destroy(rdev);
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -45,6 +45,7 @@
#include <linux/kref.h>
#include <linux/timer.h>
#include <linux/io.h>
+#include <linux/workqueue.h>
#include <asm/byteorder.h>
@@ -107,6 +108,7 @@ struct c4iw_dev_ucontext {
struct list_head qpids;
struct list_head cqids;
struct mutex lock;
+ struct kref kref;
};
enum c4iw_rdev_flags {
@@ -183,6 +185,7 @@ struct c4iw_rdev {
atomic_t wr_log_idx;
struct wr_log_entry *wr_log;
int wr_log_size;
+ struct workqueue_struct *free_workq;
};
static inline int c4iw_fatal_error(struct c4iw_rdev *rdev)
@@ -482,6 +485,8 @@ struct c4iw_qp {
int sq_sig_all;
struct completion rq_drained;
struct completion sq_drained;
+ struct work_struct free_work;
+ struct c4iw_ucontext *ucontext;
};
static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp)
@@ -495,6 +500,7 @@ struct c4iw_ucontext {
u32 key;
spinlock_t mmap_lock;
struct list_head mmaps;
+ struct kref kref;
};
static inline struct c4iw_ucontext *to_c4iw_ucontext(struct ib_ucontext *c)
@@ -502,6 +508,18 @@ static inline struct c4iw_ucontext *to_c
return container_of(c, struct c4iw_ucontext, ibucontext);
}
+void _c4iw_free_ucontext(struct kref *kref);
+
+static inline void c4iw_put_ucontext(struct c4iw_ucontext *ucontext)
+{
+ kref_put(&ucontext->kref, _c4iw_free_ucontext);
+}
+
+static inline void c4iw_get_ucontext(struct c4iw_ucontext *ucontext)
+{
+ kref_get(&ucontext->kref);
+}
+
struct c4iw_mm_entry {
struct list_head entry;
u64 addr;
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -91,17 +91,28 @@ static int c4iw_process_mad(struct ib_de
return -ENOSYS;
}
-static int c4iw_dealloc_ucontext(struct ib_ucontext *context)
+void _c4iw_free_ucontext(struct kref *kref)
{
- struct c4iw_dev *rhp = to_c4iw_dev(context->device);
- struct c4iw_ucontext *ucontext = to_c4iw_ucontext(context);
+ struct c4iw_ucontext *ucontext;
+ struct c4iw_dev *rhp;
struct c4iw_mm_entry *mm, *tmp;
- PDBG("%s context %p\n", __func__, context);
+ ucontext = container_of(kref, struct c4iw_ucontext, kref);
+ rhp = to_c4iw_dev(ucontext->ibucontext.device);
+
+ PDBG("%s ucontext %p\n", __func__, ucontext);
list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry)
kfree(mm);
c4iw_release_dev_ucontext(&rhp->rdev, &ucontext->uctx);
kfree(ucontext);
+}
+
+static int c4iw_dealloc_ucontext(struct ib_ucontext *context)
+{
+ struct c4iw_ucontext *ucontext = to_c4iw_ucontext(context);
+
+ PDBG("%s context %p\n", __func__, context);
+ c4iw_put_ucontext(ucontext);
return 0;
}
@@ -125,6 +136,7 @@ static struct ib_ucontext *c4iw_alloc_uc
c4iw_init_dev_ucontext(&rhp->rdev, &context->uctx);
INIT_LIST_HEAD(&context->mmaps);
spin_lock_init(&context->mmap_lock);
+ kref_init(&context->kref);
if (udata->outlen < sizeof(uresp) - sizeof(uresp.reserved)) {
if (!warned++)
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -714,13 +714,32 @@ static int build_inv_stag(union t4_wr *w
return 0;
}
-static void _free_qp(struct kref *kref)
+static void free_qp_work(struct work_struct *work)
+{
+ struct c4iw_ucontext *ucontext;
+ struct c4iw_qp *qhp;
+ struct c4iw_dev *rhp;
+
+ qhp = container_of(work, struct c4iw_qp, free_work);
+ ucontext = qhp->ucontext;
+ rhp = qhp->rhp;
+
+ PDBG("%s qhp %p ucontext %p\n", __func__, qhp, ucontext);
+ destroy_qp(&rhp->rdev, &qhp->wq,
+ ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
+
+ if (ucontext)
+ c4iw_put_ucontext(ucontext);
+ kfree(qhp);
+}
+
+static void queue_qp_free(struct kref *kref)
{
struct c4iw_qp *qhp;
qhp = container_of(kref, struct c4iw_qp, kref);
PDBG("%s qhp %p\n", __func__, qhp);
- kfree(qhp);
+ queue_work(qhp->rhp->rdev.free_workq, &qhp->free_work);
}
void c4iw_qp_add_ref(struct ib_qp *qp)
@@ -732,7 +751,7 @@ void c4iw_qp_add_ref(struct ib_qp *qp)
void c4iw_qp_rem_ref(struct ib_qp *qp)
{
PDBG("%s ib_qp %p\n", __func__, qp);
- kref_put(&to_c4iw_qp(qp)->kref, _free_qp);
+ kref_put(&to_c4iw_qp(qp)->kref, queue_qp_free);
}
static void add_to_fc_list(struct list_head *head, struct list_head *entry)
@@ -1642,7 +1661,6 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp)
struct c4iw_dev *rhp;
struct c4iw_qp *qhp;
struct c4iw_qp_attributes attrs;
- struct c4iw_ucontext *ucontext;
qhp = to_c4iw_qp(ib_qp);
rhp = qhp->rhp;
@@ -1662,11 +1680,6 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp)
spin_unlock_irq(&rhp->lock);
free_ird(rhp, qhp->attr.max_ird);
- ucontext = ib_qp->uobject ?
- to_c4iw_ucontext(ib_qp->uobject->context) : NULL;
- destroy_qp(&rhp->rdev, &qhp->wq,
- ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
-
c4iw_qp_rem_ref(ib_qp);
PDBG("%s ib_qp %p qpid 0x%0x\n", __func__, ib_qp, qhp->wq.sq.qid);
@@ -1767,6 +1780,7 @@ struct ib_qp *c4iw_create_qp(struct ib_p
mutex_init(&qhp->mutex);
init_waitqueue_head(&qhp->wait);
kref_init(&qhp->kref);
+ INIT_WORK(&qhp->free_work, free_qp_work);
ret = insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid);
if (ret)
@@ -1853,6 +1867,9 @@ struct ib_qp *c4iw_create_qp(struct ib_p
ma_sync_key_mm->len = PAGE_SIZE;
insert_mmap(ucontext, ma_sync_key_mm);
}
+
+ c4iw_get_ucontext(ucontext);
+ qhp->ucontext = ucontext;
}
qhp->ibqp.qp_num = qhp->wq.sq.qid;
init_timer(&(qhp->timer));
next prev parent reply other threads:[~2017-01-31 5:42 UTC|newest]
Thread overview: 64+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-01-31 5:36 [PATCH 4.9 00/66] 4.9.7-stable review Greg Kroah-Hartman
2017-01-31 5:36 ` [PATCH 4.9 01/66] fbdev: color map copying bounds checking Greg Kroah-Hartman
2017-01-31 5:36 ` [PATCH 4.9 02/66] tile/ptrace: Preserve previous registers for short regset write Greg Kroah-Hartman
2017-01-31 5:36 ` [PATCH 4.9 03/66] drm: Schedule the output_poll_work with 1s delay if we have delayed event Greg Kroah-Hartman
2017-01-31 5:36 ` [PATCH 4.9 06/66] drm/vc4: Fix memory leak of the CRTC state Greg Kroah-Hartman
2017-01-31 5:36 ` [PATCH 4.9 07/66] drm/vc4: Fix an integer overflow in temporary allocation layout Greg Kroah-Hartman
2017-01-31 5:36 ` [PATCH 4.9 08/66] drm/vc4: Return -EINVAL on the overflow checks failing Greg Kroah-Hartman
2017-01-31 5:36 ` [PATCH 4.9 09/66] drm/vc4: fix a bounds check Greg Kroah-Hartman
2017-01-31 5:36 ` [PATCH 4.9 10/66] Revert "drm/radeon: always apply pci shutdown callbacks" Greg Kroah-Hartman
2017-01-31 5:36 ` [PATCH 4.9 11/66] drm/atomic: clear out fence when duplicating state Greg Kroah-Hartman
2017-01-31 5:36 ` [PATCH 4.9 12/66] mm/huge_memory.c: respect FOLL_FORCE/FOLL_COW for thp Greg Kroah-Hartman
2017-01-31 5:36 ` [PATCH 4.9 13/66] mm/mempolicy.c: do not put mempolicy before using its nodemask Greg Kroah-Hartman
2017-01-31 5:36 ` [PATCH 4.9 14/66] mm, page_alloc: fix check for NULL preferred_zone Greg Kroah-Hartman
2017-01-31 5:36 ` [PATCH 4.9 15/66] mm, page_alloc: fix fast-path race with cpuset update or removal Greg Kroah-Hartman
2017-01-31 5:36 ` [PATCH 4.9 16/66] mm, page_alloc: move cpuset seqcount checking to slowpath Greg Kroah-Hartman
2017-01-31 5:36 ` [PATCH 4.9 17/66] mm, page_alloc: fix premature OOM when racing with cpuset mems update Greg Kroah-Hartman
2017-01-31 5:36 ` [PATCH 4.9 18/66] vring: Force use of DMA API for ARM-based systems with legacy devices Greg Kroah-Hartman
2017-01-31 5:36 ` [PATCH 4.9 19/66] userns: Make ucounts lock irq-safe Greg Kroah-Hartman
2017-01-31 5:36 ` [PATCH 4.9 20/66] sysctl: fix proc_doulongvec_ms_jiffies_minmax() Greg Kroah-Hartman
2017-01-31 5:36 ` [PATCH 4.9 21/66] xfs: prevent quotacheck from overloading inode lru Greg Kroah-Hartman
2017-01-31 5:36 ` [PATCH 4.9 22/66] ISDN: eicon: silence misleading array-bounds warning Greg Kroah-Hartman
2017-01-31 5:36 ` [PATCH 4.9 23/66] Btrfs: remove old tree_root case in btrfs_read_locked_inode() Greg Kroah-Hartman
2017-01-31 5:36 ` [PATCH 4.9 24/66] Btrfs: disable xattr operations on subvolume directories Greg Kroah-Hartman
2017-01-31 5:36 ` [PATCH 4.9 25/66] Btrfs: remove ->{get, set}_acl() from btrfs_dir_ro_inode_operations Greg Kroah-Hartman
2017-01-31 5:36 ` [PATCH 4.9 26/66] RDMA/cma: Fix unknown symbol when CONFIG_IPV6 is not enabled Greg Kroah-Hartman
2017-01-31 5:36 ` [PATCH 4.9 27/66] s390/mm: Fix cmma unused transfer from pgste into pte Greg Kroah-Hartman
2017-01-31 5:36 ` [PATCH 4.9 28/66] s390/ptrace: Preserve previous registers for short regset write Greg Kroah-Hartman
2017-01-31 5:36 ` [PATCH 4.9 29/66] IB/cxgb3: fix misspelling in header guard Greg Kroah-Hartman
2017-01-31 5:36 ` [PATCH 4.9 30/66] IB/iser: Fix sg_tablesize calculation Greg Kroah-Hartman
2017-01-31 5:36 ` [PATCH 4.9 31/66] IB/srp: fix mr allocation when the device supports sg gaps Greg Kroah-Hartman
2017-01-31 5:36 ` [PATCH 4.9 32/66] IB/srp: fix invalid indirect_sg_entries parameter value Greg Kroah-Hartman
2017-01-31 5:36 ` [PATCH 4.9 34/66] can: ti_hecc: add missing prepare and unprepare of the clock Greg Kroah-Hartman
2017-01-31 5:36 ` [PATCH 4.9 35/66] ARC: udelay: fix inline assembler by adding LP_COUNT to clobber list Greg Kroah-Hartman
2017-01-31 5:36 ` [PATCH 4.9 36/66] ARC: [arcompact] handle unaligned access delay slot corner case Greg Kroah-Hartman
2017-01-31 5:36 ` [PATCH 4.9 37/66] parisc: Dont use BITS_PER_LONG in userspace-exported swab.h header Greg Kroah-Hartman
2017-01-31 5:36 ` [PATCH 4.9 38/66] nfs: Dont increment lock sequence ID after NFS4ERR_MOVED Greg Kroah-Hartman
2017-01-31 5:36 ` [PATCH 4.9 39/66] NFSv4.1: Fix a deadlock in layoutget Greg Kroah-Hartman
2017-01-31 5:36 ` [PATCH 4.9 40/66] NFSv4.0: always send mode in SETATTR after EXCLUSIVE4 Greg Kroah-Hartman
2017-01-31 5:36 ` [PATCH 4.9 41/66] SUNRPC: cleanup ida information when removing sunrpc module Greg Kroah-Hartman
2017-01-31 5:36 ` Greg Kroah-Hartman [this message]
2017-01-31 5:36 ` [PATCH 4.9 43/66] [media] pctv452e: move buffer to heap, no mutex Greg Kroah-Hartman
2017-01-31 5:36 ` [PATCH 4.9 44/66] [media] v4l: tvp5150: Reset device at probe time, not in get/set format handlers Greg Kroah-Hartman
2017-01-31 5:36 ` [PATCH 4.9 45/66] [media] v4l: tvp5150: Fix comment regarding output pin muxing Greg Kroah-Hartman
2017-01-31 5:36 ` [PATCH 4.9 46/66] [media] v4l: tvp5150: Dont override output pinmuxing at stream on/off time Greg Kroah-Hartman
2017-01-31 5:36 ` [PATCH 4.9 47/66] drm/i915: Clear ret before unbinding in i915_gem_evict_something() Greg Kroah-Hartman
2017-01-31 5:36 ` [PATCH 4.9 48/66] drm/i915: prevent crash with .disable_display parameter Greg Kroah-Hartman
2017-01-31 5:36 ` [PATCH 4.9 53/66] IB/umem: Release pid in error and ODP flow Greg Kroah-Hartman
2017-01-31 5:36 ` [PATCH 4.9 54/66] IB/rxe: Fix rxe dev insertion to rxe_dev_list Greg Kroah-Hartman
2017-01-31 5:36 ` [PATCH 4.9 55/66] IB/rxe: Prevent from completer to operate on non valid QP Greg Kroah-Hartman
2017-01-31 5:37 ` [PATCH 4.9 56/66] [media] s5k4ecgx: select CRC32 helper Greg Kroah-Hartman
2017-01-31 5:37 ` [PATCH 4.9 57/66] pinctrl: broxton: Use correct PADCFGLOCK offset Greg Kroah-Hartman
2017-01-31 5:37 ` [PATCH 4.9 58/66] pinctrl: uniphier: fix Ethernet (RMII) pin-mux setting for LD20 Greg Kroah-Hartman
2017-01-31 5:37 ` [PATCH 4.9 59/66] pinctrl: baytrail: Rectify debounce support Greg Kroah-Hartman
2017-01-31 5:37 ` [PATCH 4.9 60/66] memory_hotplug: make zone_can_shift() return a boolean value Greg Kroah-Hartman
2017-01-31 5:37 ` [PATCH 4.9 61/66] virtio_mmio: Set DMA masks appropriately Greg Kroah-Hartman
2017-01-31 5:37 ` [PATCH 4.9 62/66] platform/x86: mlx-platform: free first dev on error Greg Kroah-Hartman
2017-01-31 5:37 ` [PATCH 4.9 63/66] platform/x86: intel_mid_powerbtn: Set IRQ_ONESHOT Greg Kroah-Hartman
2017-01-31 5:37 ` [PATCH 4.9 64/66] mm, memcg: do not retry precharge charges Greg Kroah-Hartman
2017-01-31 5:37 ` [PATCH 4.9 65/66] perf/core: Fix concurrent sys_perf_event_open() vs. move_group race Greg Kroah-Hartman
2017-01-31 5:37 ` [PATCH 4.9 66/66] drm/i915: Remove WaDisableLSQCROPERFforOCL KBL workaround Greg Kroah-Hartman
2017-01-31 17:21 ` [PATCH 4.9 00/66] 4.9.7-stable review Guenter Roeck
2017-01-31 20:16 ` Greg Kroah-Hartman
2017-01-31 22:06 ` Shuah Khan
2017-02-01 7:28 ` Greg Kroah-Hartman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170131053604.939907468@linuxfoundation.org \
--to=gregkh@linuxfoundation.org \
--cc=dledford@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=stable@vger.kernel.org \
--cc=swise@opengridcomputing.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).