From: Michael Baum <michaelba@nvidia.com>
To: <dev@dpdk.org>
Cc: Matan Azrad <matan@nvidia.com>,
Raslan Darawsheh <rasland@nvidia.com>,
Viacheslav Ovsiienko <viacheslavo@nvidia.com>
Subject: [dpdk-dev] [RFC 12/21] common/mlx5: add ROCE disable in context device creation
Date: Tue, 17 Aug 2021 16:44:32 +0300 [thread overview]
Message-ID: <20210817134441.1966618-13-michaelba@nvidia.com> (raw)
In-Reply-To: <20210817134441.1966618-1-michaelba@nvidia.com>
Add option to get IB device after disabling RoCE. It is relevant if
there is vDPA class in device arguments list.
Signed-off-by: Michael Baum <michaelba@nvidia.com>
---
drivers/common/mlx5/linux/mlx5_common_os.c | 126 ++++++++++++++++++++-
1 file changed, 125 insertions(+), 1 deletion(-)
diff --git a/drivers/common/mlx5/linux/mlx5_common_os.c b/drivers/common/mlx5/linux/mlx5_common_os.c
index 6f78897390..4a94865241 100644
--- a/drivers/common/mlx5/linux/mlx5_common_os.c
+++ b/drivers/common/mlx5/linux/mlx5_common_os.c
@@ -15,6 +15,7 @@
#include <rte_string_fns.h>
#include "mlx5_common.h"
+#include "mlx5_nl.h"
#include "mlx5_common_log.h"
#include "mlx5_common_os.h"
#include "mlx5_glue.h"
@@ -39,6 +40,9 @@ const struct mlx5_glue *mlx5_glue;
#define MLX5_TXDB_NCACHED 1
#define MLX5_TXDB_HEURISTIC 2
+#define MLX5_VDPA_MAX_RETRIES 20
+#define MLX5_VDPA_USEC 1000
+
int
mlx5_get_pci_addr(const char *dev_path, struct rte_pci_addr *pci_addr)
{
@@ -417,6 +421,123 @@ mlx5_glue_constructor(void)
mlx5_glue = NULL;
}
+/* Try to disable ROCE by Netlink\Devlink. */
+static int
+mlx5_nl_roce_disable(const char *addr)
+{
+ int nlsk_fd = mlx5_nl_init(NETLINK_GENERIC);
+ int devlink_id;
+ int enable;
+ int ret;
+
+ if (nlsk_fd < 0)
+ return nlsk_fd;
+ devlink_id = mlx5_nl_devlink_family_id_get(nlsk_fd);
+ if (devlink_id < 0) {
+ ret = devlink_id;
+ DRV_LOG(DEBUG,
+ "Failed to get devlink id for ROCE operations by Netlink.");
+ goto close;
+ }
+ ret = mlx5_nl_enable_roce_get(nlsk_fd, devlink_id, addr, &enable);
+ if (ret) {
+ DRV_LOG(DEBUG, "Failed to get ROCE enable by Netlink: %d.",
+ ret);
+ goto close;
+ } else if (!enable) {
+ DRV_LOG(INFO, "ROCE has already disabled(Netlink).");
+ goto close;
+ }
+ ret = mlx5_nl_enable_roce_set(nlsk_fd, devlink_id, addr, 0);
+ if (ret)
+ DRV_LOG(DEBUG, "Failed to disable ROCE by Netlink: %d.", ret);
+ else
+ DRV_LOG(INFO, "ROCE is disabled by Netlink successfully.");
+close:
+ close(nlsk_fd);
+ return ret;
+}
+
+/* Try to disable ROCE by sysfs. */
+static int
+mlx5_sys_roce_disable(const char *addr)
+{
+ FILE *file_o;
+ int enable;
+ int ret;
+
+ MKSTR(file_p, "/sys/bus/pci/devices/%s/roce_enable", addr);
+ file_o = fopen(file_p, "rb");
+ if (!file_o) {
+ rte_errno = ENOTSUP;
+ return -ENOTSUP;
+ }
+ ret = fscanf(file_o, "%d", &enable);
+ if (ret != 1) {
+ rte_errno = EINVAL;
+ ret = EINVAL;
+ goto close;
+ } else if (!enable) {
+ ret = 0;
+ DRV_LOG(INFO, "ROCE has already disabled(sysfs).");
+ goto close;
+ }
+ fclose(file_o);
+ file_o = fopen(file_p, "wb");
+ if (!file_o) {
+ rte_errno = ENOTSUP;
+ return -ENOTSUP;
+ }
+ fprintf(file_o, "0\n");
+ ret = 0;
+close:
+ if (ret)
+ DRV_LOG(DEBUG, "Failed to disable ROCE by sysfs: %d.", ret);
+ else
+ DRV_LOG(INFO, "ROCE is disabled by sysfs successfully.");
+ fclose(file_o);
+ return ret;
+}
+
+static int
+mlx5_roce_disable(struct rte_device *dev)
+{
+ char pci_addr[PCI_PRI_STR_SIZE] = { 0 };
+
+ if (mlx5_dev_to_pci_str(dev, pci_addr, sizeof(pci_addr)) < 0)
+ return -rte_errno;
+ /* Firstly try to disable ROCE by Netlink and fallback to sysfs. */
+ if (mlx5_nl_roce_disable(pci_addr) != 0 &&
+ mlx5_sys_roce_disable(pci_addr) != 0)
+ return -rte_errno;
+ return 0;
+}
+
+static struct ibv_device *
+mlx5_vdpa_get_ibv_dev(struct rte_device *dev)
+{
+ struct ibv_device *ibv;
+ int retry;
+
+ if (mlx5_roce_disable(dev) != 0) {
+ DRV_LOG(WARNING, "Failed to disable ROCE for \"%s\".",
+ dev->name);
+ return NULL;
+ }
+ /* Wait for the IB device to appear again after reload. */
+ for (retry = MLX5_VDPA_MAX_RETRIES; retry > 0; --retry) {
+ ibv = mlx5_os_get_ibv_dev(dev);
+ if (ibv != NULL)
+ return ibv;
+ usleep(MLX5_VDPA_USEC);
+ }
+ DRV_LOG(ERR,
+ "Cannot get IB device after disabling RoCE for \"%s\", retries exceed %d.",
+ dev->name, MLX5_VDPA_MAX_RETRIES);
+ rte_errno = EAGAIN;
+ return NULL;
+}
+
static int
mlx5_config_doorbell_mapping_env(int dbnc)
{
@@ -471,7 +592,10 @@ mlx5_os_devx_open_device(struct mlx5_dev_ctx *dev_ctx, struct rte_device *dev,
struct ibv_context *ctx = NULL;
int dbmap_env;
- ibv = mlx5_os_get_ibv_dev(dev);
+ if (classes & MLX5_CLASS_VDPA)
+ ibv = mlx5_vdpa_get_ibv_dev(dev);
+ else
+ ibv = mlx5_os_get_ibv_dev(dev);
if (!ibv)
return -rte_errno;
DRV_LOG(INFO, "Dev information matches for device \"%s\".", ibv->name);
--
2.25.1
next prev parent reply other threads:[~2021-08-17 13:46 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-08-17 13:44 [dpdk-dev] [RFC 00/21] mlx5: sharing global MR cache between drivers Michael Baum
2021-08-17 13:44 ` [dpdk-dev] [RFC 01/21] net/mlx5: fix shared device context creation error flow Michael Baum
2021-08-17 13:44 ` [dpdk-dev] [RFC 02/21] net/mlx5: fix PCI probing " Michael Baum
2021-08-17 13:44 ` [dpdk-dev] [RFC 03/21] common/mlx5: add context device structure Michael Baum
2021-08-17 13:44 ` [dpdk-dev] [RFC 04/21] compress/mlx5: use " Michael Baum
2021-08-17 13:44 ` [dpdk-dev] [RFC 05/21] crypto/mlx5: " Michael Baum
2021-08-17 13:44 ` [dpdk-dev] [RFC 06/21] regex/mlx5: " Michael Baum
2021-08-17 13:44 ` [dpdk-dev] [RFC 07/21] net/mlx5: improve probe function on Windows Michael Baum
2021-08-17 13:44 ` [dpdk-dev] [RFC 08/21] net/mlx5: improve probe function on Linux Michael Baum
2021-08-17 13:44 ` [dpdk-dev] [RFC 09/21] net/mlx5: improve spawn function Michael Baum
2021-08-17 13:44 ` [dpdk-dev] [RFC 10/21] net/mlx5: use context device structure Michael Baum
2021-08-17 13:44 ` [dpdk-dev] [RFC 11/21] net/mlx5: move NUMA node field to context device Michael Baum
2021-08-17 13:44 ` Michael Baum [this message]
2021-08-17 13:44 ` [dpdk-dev] [RFC 13/21] vdpa/mlx5: use context device structure Michael Baum
2021-08-17 13:44 ` [dpdk-dev] [RFC 14/21] mlx5: update device sent to probing Michael Baum
2021-08-17 13:44 ` [dpdk-dev] [RFC 15/21] mlx5: share context device structure between drivers Michael Baum
2021-08-17 13:44 ` [dpdk-dev] [RFC 16/21] common/mlx5: add HCA attributes to context device structure Michael Baum
2021-08-17 13:44 ` [dpdk-dev] [RFC 17/21] regex/mlx5: use HCA attributes from context device Michael Baum
2021-08-17 13:44 ` [dpdk-dev] [RFC 18/21] vdpa/mlx5: " Michael Baum
2021-08-17 13:44 ` [dpdk-dev] [RFC 19/21] compress/mlx5: " Michael Baum
2021-08-17 13:44 ` [dpdk-dev] [RFC 20/21] crypto/mlx5: " Michael Baum
2021-08-17 13:44 ` [dpdk-dev] [RFC 21/21] net/mlx5: " Michael Baum
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210817134441.1966618-13-michaelba@nvidia.com \
--to=michaelba@nvidia.com \
--cc=dev@dpdk.org \
--cc=matan@nvidia.com \
--cc=rasland@nvidia.com \
--cc=viacheslavo@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.