From: Selvin Xavier <selvin.xavier-dY08KVG/lbpWk0Htik3J/w@public.gmane.org>
To: dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
Somnath Kotur
<somnath.kotur-dY08KVG/lbpWk0Htik3J/w@public.gmane.org>,
Sriharsha Basavapatna
<sriharsha.basavapatna-dY08KVG/lbpWk0Htik3J/w@public.gmane.org>,
Selvin Xavier
<selvin.xavier-dY08KVG/lbpWk0Htik3J/w@public.gmane.org>
Subject: [PATCH for-next V2 01/13] RDMA/bnxt_re: Fix race between netdev register and unregister events
Date: Thu, 29 Jun 2017 12:28:07 -0700 [thread overview]
Message-ID: <1498764499-24157-2-git-send-email-selvin.xavier@broadcom.com> (raw)
In-Reply-To: <1498764499-24157-1-git-send-email-selvin.xavier-dY08KVG/lbpWk0Htik3J/w@public.gmane.org>
From: Somnath Kotur <somnath.kotur-dY08KVG/lbpWk0Htik3J/w@public.gmane.org>
Upon receipt of the NETDEV_REGISTER event from the netdev notifier
chain, the IB stack registration is spawned off to a workqueue
since that also requires an rtnl lock.
There could be 2 kinds of races between the NETDEV_REGISTER and the
NETDEV_UNREGISTER event handling.
a)The NETDEV_UNREGISTER event is received in rapid succession after
the NETDEV_REGISTER event even before the work queue got a chance
to run
b)The NETDEV_UNREGISTER event is received while the workqueue that
handles registration with the IB stack is still in progress.
Handle both the races with a bit flag that is set as part of the
NETDEV_REGISTER event just before the work item is queued and cleared
in the workqueue after the registration with the IB stack is complete.
Use a barrier to ensure the bit is cleared only after the IB stack
registration code is completed.
Removes the link speed query from the query_port as it causes
deadlock while trying to acquire rtnl_lock. Now querying the
speed from the nedev event itself.
Also, added a code to wait for resources(like CQ) to be freed by the
ULPs, during driver unload
Signed-off-by: Somnath Kotur <somnath.kotur-dY08KVG/lbpWk0Htik3J/w@public.gmane.org>
Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna-dY08KVG/lbpWk0Htik3J/w@public.gmane.org>
Signed-off-by: Selvin Xavier <selvin.xavier-dY08KVG/lbpWk0Htik3J/w@public.gmane.org>
---
drivers/infiniband/hw/bnxt_re/bnxt_re.h | 13 +++++++----
drivers/infiniband/hw/bnxt_re/ib_verbs.c | 21 +++--------------
drivers/infiniband/hw/bnxt_re/main.c | 39 ++++++++++++++++++++++++++++++++
3 files changed, 50 insertions(+), 23 deletions(-)
diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
index 0877283..12950ec 100644
--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
@@ -84,11 +84,13 @@ struct bnxt_re_dev {
struct ib_device ibdev;
struct list_head list;
unsigned long flags;
-#define BNXT_RE_FLAG_NETDEV_REGISTERED 0
-#define BNXT_RE_FLAG_IBDEV_REGISTERED 1
-#define BNXT_RE_FLAG_GOT_MSIX 2
-#define BNXT_RE_FLAG_RCFW_CHANNEL_EN 8
-#define BNXT_RE_FLAG_QOS_WORK_REG 16
+#define BNXT_RE_FLAG_NETDEV_REGISTERED 0
+#define BNXT_RE_FLAG_IBDEV_REGISTERED 1
+#define BNXT_RE_FLAG_GOT_MSIX 2
+#define BNXT_RE_FLAG_RCFW_CHANNEL_EN 4
+#define BNXT_RE_FLAG_QOS_WORK_REG 5
+#define BNXT_RE_FLAG_NETDEV_REG_IN_PROG 6
+
struct net_device *netdev;
unsigned int version, major, minor;
struct bnxt_en_dev *en_dev;
@@ -131,6 +133,7 @@ struct bnxt_re_dev {
struct bnxt_re_qp *qp1_sqp;
struct bnxt_re_ah *sqp_ah;
struct bnxt_re_sqp_entries sqp_tbl[1024];
+ u32 espeed;
};
#define to_bnxt_re_dev(ptr, member) \
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index c7bd683..fa388f2 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -223,20 +223,8 @@ int bnxt_re_modify_device(struct ib_device *ibdev,
return 0;
}
-static void __to_ib_speed_width(struct net_device *netdev, u8 *speed, u8 *width)
+static void __to_ib_speed_width(u32 espeed, u8 *speed, u8 *width)
{
- struct ethtool_link_ksettings lksettings;
- u32 espeed;
-
- if (netdev->ethtool_ops && netdev->ethtool_ops->get_link_ksettings) {
- memset(&lksettings, 0, sizeof(lksettings));
- rtnl_lock();
- netdev->ethtool_ops->get_link_ksettings(netdev, &lksettings);
- rtnl_unlock();
- espeed = lksettings.base.speed;
- } else {
- espeed = SPEED_UNKNOWN;
- }
switch (espeed) {
case SPEED_1000:
*speed = IB_SPEED_SDR;
@@ -303,12 +291,9 @@ int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num,
port_attr->sm_sl = 0;
port_attr->subnet_timeout = 0;
port_attr->init_type_reply = 0;
- /* call the underlying netdev's ethtool hooks to query speed settings
- * for which we acquire rtnl_lock _only_ if it's registered with
- * IB stack to avoid race in the NETDEV_UNREG path
- */
+
if (test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags))
- __to_ib_speed_width(rdev->netdev, &port_attr->active_speed,
+ __to_ib_speed_width(rdev->espeed, &port_attr->active_speed,
&port_attr->active_width);
return 0;
}
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 1fce5e7..91b5208 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -48,6 +48,8 @@
#include <net/ipv6.h>
#include <net/addrconf.h>
#include <linux/if_ether.h>
+#include <linux/atomic.h>
+#include <asm/barrier.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_user_verbs.h>
@@ -922,6 +924,10 @@ static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev, bool lock_wait)
if (test_and_clear_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags))
cancel_delayed_work(&rdev->worker);
+ /* Wait for ULPs to release references */
+ while (atomic_read(&rdev->cq_count))
+ usleep_range(500, 1000);
+
bnxt_re_cleanup_res(rdev);
bnxt_re_free_res(rdev, lock_wait);
@@ -1148,6 +1154,22 @@ static void bnxt_re_remove_one(struct bnxt_re_dev *rdev)
pci_dev_put(rdev->en_dev->pdev);
}
+static void bnxt_re_get_link_speed(struct bnxt_re_dev *rdev)
+{
+ struct ethtool_link_ksettings lksettings;
+ struct net_device *netdev = rdev->netdev;
+
+ if (netdev->ethtool_ops && netdev->ethtool_ops->get_link_ksettings) {
+ memset(&lksettings, 0, sizeof(lksettings));
+ if (rtnl_trylock()) {
+ netdev->ethtool_ops->get_link_ksettings(netdev,
+ &lksettings);
+ rdev->espeed = lksettings.base.speed;
+ rtnl_unlock();
+ }
+ }
+}
+
/* Handle all deferred netevents tasks */
static void bnxt_re_task(struct work_struct *work)
{
@@ -1165,6 +1187,10 @@ static void bnxt_re_task(struct work_struct *work)
switch (re_work->event) {
case NETDEV_REGISTER:
rc = bnxt_re_ib_reg(rdev);
+ /* Use memory barrier to sync the rdev->flags */
+ smp_mb();
+ clear_bit(BNXT_RE_FLAG_NETDEV_REG_IN_PROG,
+ &rdev->flags);
if (rc)
dev_err(rdev_to_dev(rdev),
"Failed to register with IB: %#x", rc);
@@ -1182,6 +1208,7 @@ static void bnxt_re_task(struct work_struct *work)
else if (netif_carrier_ok(rdev->netdev))
bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1,
IB_EVENT_PORT_ACTIVE);
+ bnxt_re_get_link_speed(rdev);
break;
default:
break;
@@ -1240,10 +1267,22 @@ static int bnxt_re_netdev_event(struct notifier_block *notifier,
break;
}
bnxt_re_init_one(rdev);
+ /*
+ * Query the link speed at the time of registration.
+ * Already in rtnl_lock context
+ */
+ bnxt_re_get_link_speed(rdev);
+
+ set_bit(BNXT_RE_FLAG_NETDEV_REG_IN_PROG, &rdev->flags);
sch_work = true;
break;
case NETDEV_UNREGISTER:
+ /* netdev notifier will call NETDEV_UNREGISTER again later since
+ * we are still holding the reference to the netdev
+ */
+ if (test_bit(BNXT_RE_FLAG_NETDEV_REG_IN_PROG, &rdev->flags))
+ goto exit;
bnxt_re_ib_unreg(rdev, false);
bnxt_re_remove_one(rdev);
bnxt_re_dev_unreg(rdev);
--
2.5.5
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
next prev parent reply other threads:[~2017-06-29 19:28 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-06-29 19:28 [PATCH for-next V2 00/13] RDMA/bnxt_re: Misc fixes for bnxt_re Selvin Xavier
[not found] ` <1498764499-24157-1-git-send-email-selvin.xavier-dY08KVG/lbpWk0Htik3J/w@public.gmane.org>
2017-06-29 19:28 ` Selvin Xavier [this message]
2017-06-29 19:28 ` [PATCH for-next V2 02/13] RDMA/bnxt_re: Free doorbell page index (DPI) during dealloc ucontext Selvin Xavier
2017-06-29 19:28 ` [PATCH for-next V2 03/13] RDMA/bnxt_re: Fix WQE Size posted to HW to prevent it from throwing error Selvin Xavier
2017-06-29 19:28 ` [PATCH for-next V2 04/13] RDMA/bnxt_re: Add vlan tag for untagged RoCE traffic when PFC is configured Selvin Xavier
2017-06-29 19:28 ` [PATCH for-next V2 05/13] RDMA/bnxt_re: Do not free the ctx_tbl entry if delete GID fails Selvin Xavier
2017-06-29 19:28 ` [PATCH for-next V2 06/13] RDMA/bnxt_re: Report supported value to IB stack in query_device Selvin Xavier
2017-06-29 19:28 ` [PATCH for-next V2 07/13] RDMA/bnxt_re: Fixed the max_rd_atomic support for initiator and destination QP Selvin Xavier
2017-06-29 19:28 ` [PATCH for-next V2 08/13] RDMA/bnxt_re: Specify RDMA component when allocating stats context Selvin Xavier
2017-06-29 19:28 ` [PATCH for-next V2 09/13] RDMA/bnxt_re: Allow posting when QPs are in error Selvin Xavier
2017-06-29 19:28 ` [PATCH for-next V2 10/13] RDMA/bnxt_re: Enable atomics only if host bios supports Selvin Xavier
2017-06-29 19:28 ` [PATCH for-next V2 11/13] RDMA/bnxt_re: Fix return value of poll routine Selvin Xavier
2017-06-29 19:28 ` [PATCH for-next V2 12/13] RDMA/bnxt_re: Report MISSED_EVENTS in req_notify_cq Selvin Xavier
2017-06-29 19:28 ` [PATCH for-next V2 13/13] RDMA/bnxt_re: Fix the value reported for local ack delay Selvin Xavier
2017-07-22 17:41 ` [PATCH for-next V2 00/13] RDMA/bnxt_re: Misc fixes for bnxt_re Doug Ledford
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1498764499-24157-2-git-send-email-selvin.xavier@broadcom.com \
--to=selvin.xavier-dy08kvg/lbpwk0htik3j/w@public.gmane.org \
--cc=dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org \
--cc=linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
--cc=somnath.kotur-dY08KVG/lbpWk0Htik3J/w@public.gmane.org \
--cc=sriharsha.basavapatna-dY08KVG/lbpWk0Htik3J/w@public.gmane.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox