Linux RDMA and InfiniBand development
 help / color / mirror / Atom feed
* [rdma-core patch v3] srp_daemon: improve the debug message for is_enabled_by_rules_file
From: Honggang Li @ 2019-07-15  4:16 UTC (permalink / raw)
  To: bvanassche, jgg; +Cc: linux-rdma, Honggang Li

If the target was disallowed by rule file, user can not distinguish that
from the old debug message.

pr_debug("Found an SRP target with id_ext %s - check if it allowed by rules file\n", target->id_ext);

It implicitly implied by the message next to the old debug message.

pr_debug("Found an SRP target with id_ext %s - check if it is already connected\n", target->id_ext);

The improved debug message will feedback the check result of rule file, user
no longer needs to wonder the target is allowed or not.

Signed-off-by: Honggang Li <honli@redhat.com>
---
 srp_daemon/srp_daemon.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/srp_daemon/srp_daemon.c b/srp_daemon/srp_daemon.c
index a004f6a4..e85b9668 100644
--- a/srp_daemon/srp_daemon.c
+++ b/srp_daemon/srp_daemon.c
@@ -349,10 +349,11 @@ static int is_enabled_by_rules_file(struct target_details *target)
 	int rule;
 	struct config_t *conf = config;
 
-	if (NULL == conf->rules)
+	if (NULL == conf->rules) {
+		pr_debug("Allowing SRP target with id_ext %s because not using a rules file\n", target->id_ext);
 		return 1;
+	}
 
-	pr_debug("Found an SRP target with id_ext %s - check if it allowed by rules file\n", target->id_ext);
 	rule = -1;
 	do {
 		rule++;
@@ -392,6 +393,9 @@ static int is_enabled_by_rules_file(struct target_details *target)
 
 		target->options = conf->rules[rule].options;
 
+		pr_debug("SRP target with id_ext %s %s by rules file\n",
+				target->id_ext,
+				conf->rules[rule].allow ? "allowed" : "disallowed");
 		return conf->rules[rule].allow;
 
 	} while (1);
-- 
2.20.1


^ permalink raw reply related

* [PATCH for-rc] RDMA/bnxt_re: Honor vlan_id in GID entry comparison
From: Selvin Xavier @ 2019-07-15  9:19 UTC (permalink / raw)
  To: linux-rdma, dledford, jgg; +Cc: linux-nvme, Selvin Xavier, stable, Parav Pandit

GID entry consist of GID, vlan, netdev and smac.
Extend GID duplicate check companions to consider vlan_id as well
to support IPv6 VLAN based link local addresses. Introduce
a new structure (bnxt_qplib_gid_info) to hold gid and vlan_id information.

The issue is discussed in the following thread
https://www.spinics.net/lists/linux-rdma/msg81594.html

Fixes: 823b23da7113 ("IB/core: Allow vlan link local address based RoCE GIDs")
Cc: <stable@vger.kernel.org> # v5.2+
Reported-by: Yi Zhang <yi.zhang@redhat.com>
Co-developed-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
---
 drivers/infiniband/hw/bnxt_re/ib_verbs.c  |  7 +++++--
 drivers/infiniband/hw/bnxt_re/qplib_res.c | 13 +++++++++----
 drivers/infiniband/hw/bnxt_re/qplib_res.h |  2 +-
 drivers/infiniband/hw/bnxt_re/qplib_sp.c  | 14 +++++++++-----
 drivers/infiniband/hw/bnxt_re/qplib_sp.h  |  7 ++++++-
 5 files changed, 30 insertions(+), 13 deletions(-)

diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 2c3685faa57a..a4a9f90f2482 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -308,6 +308,7 @@ int bnxt_re_del_gid(const struct ib_gid_attr *attr, void **context)
 	struct bnxt_re_dev *rdev = to_bnxt_re_dev(attr->device, ibdev);
 	struct bnxt_qplib_sgid_tbl *sgid_tbl = &rdev->qplib_res.sgid_tbl;
 	struct bnxt_qplib_gid *gid_to_del;
+	u16 vlan_id = 0xFFFF;
 
 	/* Delete the entry from the hardware */
 	ctx = *context;
@@ -317,7 +318,8 @@ int bnxt_re_del_gid(const struct ib_gid_attr *attr, void **context)
 	if (sgid_tbl && sgid_tbl->active) {
 		if (ctx->idx >= sgid_tbl->max)
 			return -EINVAL;
-		gid_to_del = &sgid_tbl->tbl[ctx->idx];
+		gid_to_del = &sgid_tbl->tbl[ctx->idx].gid;
+		vlan_id = sgid_tbl->tbl[ctx->idx].vlan_id;
 		/* DEL_GID is called in WQ context(netdevice_event_work_handler)
 		 * or via the ib_unregister_device path. In the former case QP1
 		 * may not be destroyed yet, in which case just return as FW
@@ -335,7 +337,8 @@ int bnxt_re_del_gid(const struct ib_gid_attr *attr, void **context)
 		}
 		ctx->refcnt--;
 		if (!ctx->refcnt) {
-			rc = bnxt_qplib_del_sgid(sgid_tbl, gid_to_del, true);
+			rc = bnxt_qplib_del_sgid(sgid_tbl, gid_to_del,
+						 vlan_id,  true);
 			if (rc) {
 				dev_err(rdev_to_dev(rdev),
 					"Failed to remove GID: %#x", rc);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c
index 37928b1111df..bdbde8e22420 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c
@@ -488,7 +488,7 @@ static int bnxt_qplib_alloc_sgid_tbl(struct bnxt_qplib_res *res,
 				     struct bnxt_qplib_sgid_tbl *sgid_tbl,
 				     u16 max)
 {
-	sgid_tbl->tbl = kcalloc(max, sizeof(struct bnxt_qplib_gid), GFP_KERNEL);
+	sgid_tbl->tbl = kcalloc(max, sizeof(*sgid_tbl->tbl), GFP_KERNEL);
 	if (!sgid_tbl->tbl)
 		return -ENOMEM;
 
@@ -526,9 +526,10 @@ static void bnxt_qplib_cleanup_sgid_tbl(struct bnxt_qplib_res *res,
 	for (i = 0; i < sgid_tbl->max; i++) {
 		if (memcmp(&sgid_tbl->tbl[i], &bnxt_qplib_gid_zero,
 			   sizeof(bnxt_qplib_gid_zero)))
-			bnxt_qplib_del_sgid(sgid_tbl, &sgid_tbl->tbl[i], true);
+			bnxt_qplib_del_sgid(sgid_tbl, &sgid_tbl->tbl[i].gid,
+					    sgid_tbl->tbl[i].vlan_id, true);
 	}
-	memset(sgid_tbl->tbl, 0, sizeof(struct bnxt_qplib_gid) * sgid_tbl->max);
+	memset(sgid_tbl->tbl, 0, sizeof(*sgid_tbl->tbl) * sgid_tbl->max);
 	memset(sgid_tbl->hw_id, -1, sizeof(u16) * sgid_tbl->max);
 	memset(sgid_tbl->vlan, 0, sizeof(u8) * sgid_tbl->max);
 	sgid_tbl->active = 0;
@@ -537,7 +538,11 @@ static void bnxt_qplib_cleanup_sgid_tbl(struct bnxt_qplib_res *res,
 static void bnxt_qplib_init_sgid_tbl(struct bnxt_qplib_sgid_tbl *sgid_tbl,
 				     struct net_device *netdev)
 {
-	memset(sgid_tbl->tbl, 0, sizeof(struct bnxt_qplib_gid) * sgid_tbl->max);
+	u32 i;
+
+	for (i = 0; i < sgid_tbl->max; i++)
+		sgid_tbl->tbl[i].vlan_id = 0xffff;
+
 	memset(sgid_tbl->hw_id, -1, sizeof(u16) * sgid_tbl->max);
 }
 
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h
index 30c42c92fac7..fbda11a7ab1a 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h
@@ -111,7 +111,7 @@ struct bnxt_qplib_pd_tbl {
 };
 
 struct bnxt_qplib_sgid_tbl {
-	struct bnxt_qplib_gid		*tbl;
+	struct bnxt_qplib_gid_info	*tbl;
 	u16				*hw_id;
 	u16				max;
 	u16				active;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
index 48793d3512ac..40296b97d21e 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
@@ -213,12 +213,12 @@ int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res,
 			index, sgid_tbl->max);
 		return -EINVAL;
 	}
-	memcpy(gid, &sgid_tbl->tbl[index], sizeof(*gid));
+	memcpy(gid, &sgid_tbl->tbl[index].gid, sizeof(*gid));
 	return 0;
 }
 
 int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
-			struct bnxt_qplib_gid *gid, bool update)
+			struct bnxt_qplib_gid *gid, u16 vlan_id, bool update)
 {
 	struct bnxt_qplib_res *res = to_bnxt_qplib(sgid_tbl,
 						   struct bnxt_qplib_res,
@@ -236,7 +236,8 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
 		return -ENOMEM;
 	}
 	for (index = 0; index < sgid_tbl->max; index++) {
-		if (!memcmp(&sgid_tbl->tbl[index], gid, sizeof(*gid)))
+		if (!memcmp(&sgid_tbl->tbl[index].gid, gid, sizeof(*gid)) &&
+		    vlan_id == sgid_tbl->tbl[index].vlan_id)
 			break;
 	}
 	if (index == sgid_tbl->max) {
@@ -262,8 +263,9 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
 		if (rc)
 			return rc;
 	}
-	memcpy(&sgid_tbl->tbl[index], &bnxt_qplib_gid_zero,
+	memcpy(&sgid_tbl->tbl[index].gid, &bnxt_qplib_gid_zero,
 	       sizeof(bnxt_qplib_gid_zero));
+	sgid_tbl->tbl[index].vlan_id = 0xFFFF;
 	sgid_tbl->vlan[index] = 0;
 	sgid_tbl->active--;
 	dev_dbg(&res->pdev->dev,
@@ -296,7 +298,8 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
 	}
 	free_idx = sgid_tbl->max;
 	for (i = 0; i < sgid_tbl->max; i++) {
-		if (!memcmp(&sgid_tbl->tbl[i], gid, sizeof(*gid))) {
+		if (!memcmp(&sgid_tbl->tbl[i], gid, sizeof(*gid)) &&
+		    sgid_tbl->tbl[i].vlan_id == vlan_id) {
 			dev_dbg(&res->pdev->dev,
 				"SGID entry already exist in entry %d!\n", i);
 			*index = i;
@@ -351,6 +354,7 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
 	}
 	/* Add GID to the sgid_tbl */
 	memcpy(&sgid_tbl->tbl[free_idx], gid, sizeof(*gid));
+	sgid_tbl->tbl[free_idx].vlan_id = vlan_id;
 	sgid_tbl->active++;
 	if (vlan_id != 0xFFFF)
 		sgid_tbl->vlan[free_idx] = 1;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
index 0ec3b12b0bcd..13d9432d5ce2 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
@@ -84,6 +84,11 @@ struct bnxt_qplib_gid {
 	u8				data[16];
 };
 
+struct bnxt_qplib_gid_info {
+	struct bnxt_qplib_gid gid;
+	u16 vlan_id;
+};
+
 struct bnxt_qplib_ah {
 	struct bnxt_qplib_gid		dgid;
 	struct bnxt_qplib_pd		*pd;
@@ -221,7 +226,7 @@ int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res,
 			struct bnxt_qplib_sgid_tbl *sgid_tbl, int index,
 			struct bnxt_qplib_gid *gid);
 int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
-			struct bnxt_qplib_gid *gid, bool update);
+			struct bnxt_qplib_gid *gid, u16 vlan_id, bool update);
 int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
 			struct bnxt_qplib_gid *gid, u8 *mac, u16 vlan_id,
 			bool update, u32 *index);
-- 
2.18.1


^ permalink raw reply related

* Re: [PATCH v4 00/25] InfiniBand Transport (IBTRS) and Network Block Device (IBNBD)
From: Jinpu Wang @ 2019-07-15 11:21 UTC (permalink / raw)
  To: Sagi Grimberg
  Cc: Danil Kipnis, linux-block, linux-rdma, Jens Axboe,
	Christoph Hellwig, bvanassche, jgg, dledford, Roman Pen,
	Greg Kroah-Hartman
In-Reply-To: <11653912-924a-965a-45fe-3abd1ca00053@grimberg.me>

Sagi Grimberg <sagi@grimberg.me> 于2019年7月12日周五 下午9:40写道:
>
>
> > Hi Sagi,
> >
> >>>> Another question, from what I understand from the code, the client
> >>>> always rdma_writes data on writes (with imm) from a remote pool of
> >>>> server buffers dedicated to it. Essentially all writes are immediate (no
> >>>> rdma reads ever). How is that different than using send wrs to a set of
> >>>> pre-posted recv buffers (like all others are doing)? Is it faster?
> >>> At the very beginning of the project we did some measurements and saw,
> >>> that it is faster. I'm not sure if this is still true
> >>
> >> Its not significantly faster (can't imagine why it would be).
> >> What could make a difference is probably the fact that you never
> >> do rdma reads for I/O writes which might be better. Also perhaps the
> >> fact that you normally don't wait for send completions before completing
> >> I/O (which is broken), and the fact that you batch recv operations.
> >
> > I don't know how do you come to the conclusion we don't wait for send
> > completion before completing IO.
> >
> > We do chain wr on successfull read request from server, see funtion
> > rdma_write_sg,
>
> I was referring to the client side
Hi Sagi,

I checked the 3 commits you mentioned in earlier thread again, I now
get your point.
You meant the behavior following commits try to fix.

4af7f7ff92a4 ("nvme-rdma: don't complete requests before a send work
request has completed")
b4b591c87f2b ("nvme-rdma: don't suppress send completions")

In this sense, ibtrs client side are not waiting for the completions
for RDMA WRITE WR to finish.
But we did it right for local invalidation.

I checked SRP/iser, they are not even wait for local invalidation, no
signal flag set.

If it's a problem, we should fix them too, maybe more.

My question is do you see the behavior (HCA retry send due to drop ack
) in the field,
is it possible to reproduce?

Thanks,
Jack

^ permalink raw reply

* [PATCH AUTOSEL 5.2 121/249] ipoib: correcly show a VF hardware address
From: Sasha Levin @ 2019-07-15 13:44 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Denis Kirjanov, Doug Ledford, David S . Miller, Sasha Levin,
	linux-rdma
In-Reply-To: <20190715134655.4076-1-sashal@kernel.org>

From: Denis Kirjanov <kda@linux-powerpc.org>

[ Upstream commit 64d701c608fea362881e823b666327f5d28d7ffd ]

in the case of IPoIB with SRIOV enabled hardware
ip link show command incorrecly prints
0 instead of a VF hardware address.

Before:
11: ib1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 2044 qdisc pfifo_fast
state UP mode DEFAULT group default qlen 256
    link/infiniband
80:00:00:66:fe:80:00:00:00:00:00:00:24:8a:07:03:00:a4:3e:7c brd
00:ff:ff:ff:ff:12:40:1b:ff:ff:00:00:00:00:00:00:ff:ff:ff:ff
    vf 0 MAC 00:00:00:00:00:00, spoof checking off, link-state disable,
trust off, query_rss off
...
After:
11: ib1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 2044 qdisc pfifo_fast
state UP mode DEFAULT group default qlen 256
    link/infiniband
80:00:00:66:fe:80:00:00:00:00:00:00:24:8a:07:03:00:a4:3e:7c brd
00:ff:ff:ff:ff:12:40:1b:ff:ff:00:00:00:00:00:00:ff:ff:ff:ff
    vf 0     link/infiniband
80:00:00:66:fe:80:00:00:00:00:00:00:24:8a:07:03:00:a4:3e:7c brd
00:ff:ff:ff:ff:12:40:1b:ff:ff:00:00:00:00:00:00:ff:ff:ff:ff, spoof
checking off, link-state disable, trust off, query_rss off

v1->v2: just copy an address without modifing ifla_vf_mac
v2->v3: update the changelog

Signed-off-by: Denis Kirjanov <kda@linux-powerpc.org>
Acked-by: Doug Ledford <dledford@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/infiniband/ulp/ipoib/ipoib_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 9b5e11d3fb85..04ea7db08e87 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1998,6 +1998,7 @@ static int ipoib_get_vf_config(struct net_device *dev, int vf,
 		return err;
 
 	ivf->vf = vf;
+	memcpy(ivf->mac, dev->dev_addr, dev->addr_len);
 
 	return 0;
 }
-- 
2.20.1


^ permalink raw reply related

* [PATCH AUTOSEL 5.2 189/249] net/mlx5e: Attach/detach XDP program safely
From: Sasha Levin @ 2019-07-15 13:45 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Maxim Mikityanskiy, Tariq Toukan, Saeed Mahameed, Daniel Borkmann,
	Sasha Levin, netdev, linux-rdma, xdp-newbies, bpf
In-Reply-To: <20190715134655.4076-1-sashal@kernel.org>

From: Maxim Mikityanskiy <maximmi@mellanox.com>

[ Upstream commit e18953240de8b46360a67090c87ee1ef8160b35d ]

When an XDP program is set, a full reopen of all channels happens in two
cases:

1. When there was no program set, and a new one is being set.

2. When there was a program set, but it's being unset.

The full reopen is necessary, because the channel parameters may change
if XDP is enabled or disabled. However, it's performed in an unsafe way:
if the new channels fail to open, the old ones are already closed, and
the interface goes down. Use the safe way to switch channels instead.
The same way is already used for other configuration changes.

Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 .../net/ethernet/mellanox/mlx5/core/en_main.c | 31 ++++++++++++-------
 1 file changed, 20 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index a8e8350b38aa..8db9fdbc03ea 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -4192,8 +4192,6 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
 	/* no need for full reset when exchanging programs */
 	reset = (!priv->channels.params.xdp_prog || !prog);
 
-	if (was_opened && reset)
-		mlx5e_close_locked(netdev);
 	if (was_opened && !reset) {
 		/* num_channels is invariant here, so we can take the
 		 * batched reference right upfront.
@@ -4205,20 +4203,31 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
 		}
 	}
 
-	/* exchange programs, extra prog reference we got from caller
-	 * as long as we don't fail from this point onwards.
-	 */
-	old_prog = xchg(&priv->channels.params.xdp_prog, prog);
+	if (was_opened && reset) {
+		struct mlx5e_channels new_channels = {};
+
+		new_channels.params = priv->channels.params;
+		new_channels.params.xdp_prog = prog;
+		mlx5e_set_rq_type(priv->mdev, &new_channels.params);
+		old_prog = priv->channels.params.xdp_prog;
+
+		err = mlx5e_safe_switch_channels(priv, &new_channels, NULL);
+		if (err)
+			goto unlock;
+	} else {
+		/* exchange programs, extra prog reference we got from caller
+		 * as long as we don't fail from this point onwards.
+		 */
+		old_prog = xchg(&priv->channels.params.xdp_prog, prog);
+	}
+
 	if (old_prog)
 		bpf_prog_put(old_prog);
 
-	if (reset) /* change RQ type according to priv->xdp_prog */
+	if (!was_opened && reset) /* change RQ type according to priv->xdp_prog */
 		mlx5e_set_rq_type(priv->mdev, &priv->channels.params);
 
-	if (was_opened && reset)
-		err = mlx5e_open_locked(netdev);
-
-	if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || reset)
+	if (!was_opened || reset)
 		goto unlock;
 
 	/* exchanging programs w/o reset, we update ref counts on behalf
-- 
2.20.1


^ permalink raw reply related

* [PATCH AUTOSEL 5.1 107/219] ipoib: correcly show a VF hardware address
From: Sasha Levin @ 2019-07-15 14:01 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Denis Kirjanov, Doug Ledford, David S . Miller, Sasha Levin,
	linux-rdma
In-Reply-To: <20190715140341.6443-1-sashal@kernel.org>

From: Denis Kirjanov <kda@linux-powerpc.org>

[ Upstream commit 64d701c608fea362881e823b666327f5d28d7ffd ]

in the case of IPoIB with SRIOV enabled hardware
ip link show command incorrecly prints
0 instead of a VF hardware address.

Before:
11: ib1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 2044 qdisc pfifo_fast
state UP mode DEFAULT group default qlen 256
    link/infiniband
80:00:00:66:fe:80:00:00:00:00:00:00:24:8a:07:03:00:a4:3e:7c brd
00:ff:ff:ff:ff:12:40:1b:ff:ff:00:00:00:00:00:00:ff:ff:ff:ff
    vf 0 MAC 00:00:00:00:00:00, spoof checking off, link-state disable,
trust off, query_rss off
...
After:
11: ib1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 2044 qdisc pfifo_fast
state UP mode DEFAULT group default qlen 256
    link/infiniband
80:00:00:66:fe:80:00:00:00:00:00:00:24:8a:07:03:00:a4:3e:7c brd
00:ff:ff:ff:ff:12:40:1b:ff:ff:00:00:00:00:00:00:ff:ff:ff:ff
    vf 0     link/infiniband
80:00:00:66:fe:80:00:00:00:00:00:00:24:8a:07:03:00:a4:3e:7c brd
00:ff:ff:ff:ff:12:40:1b:ff:ff:00:00:00:00:00:00:ff:ff:ff:ff, spoof
checking off, link-state disable, trust off, query_rss off

v1->v2: just copy an address without modifing ifla_vf_mac
v2->v3: update the changelog

Signed-off-by: Denis Kirjanov <kda@linux-powerpc.org>
Acked-by: Doug Ledford <dledford@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/infiniband/ulp/ipoib/ipoib_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 9b5e11d3fb85..04ea7db08e87 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1998,6 +1998,7 @@ static int ipoib_get_vf_config(struct net_device *dev, int vf,
 		return err;
 
 	ivf->vf = vf;
+	memcpy(ivf->mac, dev->dev_addr, dev->addr_len);
 
 	return 0;
 }
-- 
2.20.1


^ permalink raw reply related

* [PATCH AUTOSEL 5.1 159/219] net/mlx5: Get vport ACL namespace by vport index
From: Sasha Levin @ 2019-07-15 14:02 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Jianbo Liu, Oz Shlomo, Eli Britstein, Roi Dayan, Mark Bloch,
	Saeed Mahameed, Sasha Levin, netdev, linux-rdma
In-Reply-To: <20190715140341.6443-1-sashal@kernel.org>

From: Jianbo Liu <jianbol@mellanox.com>

[ Upstream commit f53297d67800feb5fafd94abd926c889aefee690 ]

The ingress and egress ACL root namespaces are created per vport and
stored into arrays. However, the vport number is not the same as the
index. Passing the array index, instead of vport number, to get the
correct ingress and egress acl namespace.

Fixes: 9b93ab981e3b ("net/mlx5: Separate ingress/egress namespaces for each vport")
Signed-off-by: Jianbo Liu <jianbol@mellanox.com>
Reviewed-by: Oz Shlomo <ozsh@mellanox.com>
Reviewed-by: Eli Britstein <elibr@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 8a67fd197b79..16ed6ebd31ee 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -950,7 +950,7 @@ static int esw_vport_enable_egress_acl(struct mlx5_eswitch *esw,
 		  vport->vport, MLX5_CAP_ESW_EGRESS_ACL(dev, log_max_ft_size));
 
 	root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_EGRESS,
-						    vport->vport);
+			mlx5_eswitch_vport_num_to_index(esw, vport->vport));
 	if (!root_ns) {
 		esw_warn(dev, "Failed to get E-Switch egress flow namespace for vport (%d)\n", vport->vport);
 		return -EOPNOTSUPP;
@@ -1068,7 +1068,7 @@ static int esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw,
 		  vport->vport, MLX5_CAP_ESW_INGRESS_ACL(dev, log_max_ft_size));
 
 	root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS,
-						    vport->vport);
+			mlx5_eswitch_vport_num_to_index(esw, vport->vport));
 	if (!root_ns) {
 		esw_warn(dev, "Failed to get E-Switch ingress flow namespace for vport (%d)\n", vport->vport);
 		return -EOPNOTSUPP;
-- 
2.20.1


^ permalink raw reply related

* Re: [PATCH v8] verbs: Introduce a new reg_mr API for virtual address space
From: Jason Gunthorpe @ 2019-07-15 14:15 UTC (permalink / raw)
  To: Yuval Shaia
  Cc: Yishai Hadas, dledford@redhat.com, leon@kernel.org,
	mark.haywood@oracle.com, Leon Romanovsky,
	linux-rdma@vger.kernel.org
In-Reply-To: <20190715141328.15872-1-yuval.shaia@oracle.com>

On Mon, Jul 15, 2019 at 05:13:28PM +0300, Yuval Shaia wrote:
> The virtual address that is registered is used as a base for any address
> passed later in post_recv and post_send operations.
> 
> On some virtualized environment this is not correct.
> 
> A guest cannot register its memory so hypervisor maps the guest physical
> address to a host virtual address and register it with the HW. Later on,
> at datapath phase, the guest fills the SGEs with addresses from its
> address space.
> Since HW cannot access guest virtual address space an extra translation
> is needed to map those addresses to be based on the host virtual address
> that was registered with the HW.
> This datapath interference affects performances.
> 
> To avoid this, a logical separation between the address that is
> registered and the address that is used as a offset at datapath phase is
> needed.
> This separation is already implemented in the lower layer part
> (ibv_cmd_reg_mr) but blocked at the API level.
> 
> Fix it by introducing a new API function which accepts an address from
> guest virtual address space as well, to be used as offset for later
> datapath operations.
> 
> Signed-off-by: Yuval Shaia <yuval.shaia@oracle.com>
> Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
> v0 -> v1:
> 	* Change reg_mr callback signature instead of adding new callback
> 	* Add the new API to libibverbs/libibverbs.map.in
> v1 -> v2:
> 	* Do not modify reg_mr signature for version 1.0
> 	* Add note to man page
> v2 -> v3:
> 	* Rename function to reg_mr_iova (and arg-name to iova)
> 	* Some checkpatch issues not related to this fix but detected now
> 		* s/__FUNCTION__/__func
> 		* WARNING: function definition argument 'void *' should
> 		  also have an identifier name
> v3 -> v4:
> 	* Fix commit message as suggested by Adit Ranadiv
> 	* Add support for efa
> v4 -> v5:
> 	* Update PABI
> 	* Update debian files
> v5 -> v6:
> 	* Move the new API to section in libibverbs/libibverbs.map.in
> 	  (IBVERBS_1.7) as pointed out by Mark Haywood
> v6 -> v7:
> 	* 
> v7 -> v8:
> 	* Update also redhat and suse specfiles so now all CI checks in
> 	  github passed.
> 	* Leon, i have your r-b from v5, appriciate if you can take a look
> 	  again now, with all the latest changes
>  CMakeLists.txt                    |  4 ++--
>  buildlib/cbuild                   |  6 ++++++
>  debian/control                    |  2 +-
>  debian/libibverbs1.symbols        |  4 +++-
>  libibverbs/CMakeLists.txt         |  2 +-
>  libibverbs/driver.h               |  2 +-
>  libibverbs/dummy_ops.c            |  2 +-
>  libibverbs/libibverbs.map.in      |  5 +++++
>  libibverbs/man/ibv_reg_mr.3       | 15 +++++++++++++--
>  libibverbs/verbs.c                | 23 ++++++++++++++++++++++-
>  libibverbs/verbs.h                |  7 +++++++
>  providers/bnxt_re/verbs.c         |  6 +++---
>  providers/bnxt_re/verbs.h         |  2 +-
>  providers/cxgb3/iwch.h            |  4 ++--
>  providers/cxgb3/verbs.c           | 15 +++++----------
>  providers/cxgb4/libcxgb4.h        |  4 ++--
>  providers/cxgb4/verbs.c           | 15 +++++----------
>  providers/efa/verbs.c             |  4 ++--
>  providers/efa/verbs.h             |  2 +-
>  providers/hfi1verbs/hfiverbs.h    |  4 ++--
>  providers/hfi1verbs/verbs.c       |  8 ++++----
>  providers/hns/hns_roce_u.h        |  2 +-
>  providers/hns/hns_roce_u_verbs.c  |  6 +++---
>  providers/i40iw/i40iw_umain.h     |  3 ++-
>  providers/i40iw/i40iw_uverbs.c    |  8 ++++----
>  providers/ipathverbs/ipathverbs.h |  4 ++--
>  providers/ipathverbs/verbs.c      |  8 ++++----
>  providers/mlx4/mlx4.h             |  4 ++--
>  providers/mlx4/verbs.c            |  7 +++----
>  providers/mlx5/mlx5.h             |  4 ++--
>  providers/mlx5/verbs.c            |  7 +++----
>  providers/mthca/ah.c              |  3 ++-
>  providers/mthca/mthca.h           |  4 ++--
>  providers/mthca/verbs.c           |  6 +++---
>  providers/nes/nes_umain.h         |  3 ++-
>  providers/nes/nes_uverbs.c        |  9 ++++-----
>  providers/ocrdma/ocrdma_main.h    |  4 ++--
>  providers/ocrdma/ocrdma_verbs.c   | 10 ++++------
>  providers/qedr/qelr_main.h        |  4 ++--
>  providers/qedr/qelr_verbs.c       | 11 ++++-------
>  providers/qedr/qelr_verbs.h       |  4 ++--
>  providers/rxe/rxe.c               |  6 +++---
>  providers/siw/siw.c               |  4 ++--
>  providers/vmw_pvrdma/pvrdma.h     |  4 ++--
>  providers/vmw_pvrdma/verbs.c      |  7 +++----
>  redhat/rdma-core.spec             |  2 +-
>  suse/rdma-core.spec               |  2 +-
>  47 files changed, 154 insertions(+), 118 deletions(-)
> 
> diff --git a/CMakeLists.txt b/CMakeLists.txt
> index b2613284..67112ae3 100644
> +++ b/CMakeLists.txt
> @@ -68,11 +68,11 @@ endif()
>  set(PACKAGE_NAME "RDMA")
>  
>  # See Documentation/versioning.md
> -set(PACKAGE_VERSION "25.0")
> +set(PACKAGE_VERSION "26.0")
>  # When this is changed the values in these files need changing too:
>  #   debian/control
>  #   debian/libibverbs1.symbols
> -set(IBVERBS_PABI_VERSION "25")
> +set(IBVERBS_PABI_VERSION "26")
>  set(IBVERBS_PROVIDER_SUFFIX "-rdmav${IBVERBS_PABI_VERSION}.so")

'25' is still the current release-in progress.

Jason

^ permalink raw reply

* Re: [PATCH for-rc] RDMA/bnxt_re: Honor vlan_id in GID entry comparison
From: Yi Zhang @ 2019-07-15 14:17 UTC (permalink / raw)
  To: Selvin Xavier, linux-rdma, dledford, jgg; +Cc: stable, linux-nvme, Parav Pandit
In-Reply-To: <20190715091913.15726-1-selvin.xavier@broadcom.com>

Verified this patch on my nvme rdma bnxt_re environment, thanks.

Tested-by: Yi Zhang <yi.zhang@redhat.com>

On 7/15/19 5:19 PM, Selvin Xavier wrote:
> GID entry consist of GID, vlan, netdev and smac.
> Extend GID duplicate check companions to consider vlan_id as well
> to support IPv6 VLAN based link local addresses. Introduce
> a new structure (bnxt_qplib_gid_info) to hold gid and vlan_id information.
>
> The issue is discussed in the following thread
> https://www.spinics.net/lists/linux-rdma/msg81594.html
>
> Fixes: 823b23da7113 ("IB/core: Allow vlan link local address based RoCE GIDs")
> Cc: <stable@vger.kernel.org> # v5.2+
> Reported-by: Yi Zhang <yi.zhang@redhat.com>
> Co-developed-by: Parav Pandit <parav@mellanox.com>
> Signed-off-by: Parav Pandit <parav@mellanox.com>
> Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
> ---
>   drivers/infiniband/hw/bnxt_re/ib_verbs.c  |  7 +++++--
>   drivers/infiniband/hw/bnxt_re/qplib_res.c | 13 +++++++++----
>   drivers/infiniband/hw/bnxt_re/qplib_res.h |  2 +-
>   drivers/infiniband/hw/bnxt_re/qplib_sp.c  | 14 +++++++++-----
>   drivers/infiniband/hw/bnxt_re/qplib_sp.h  |  7 ++++++-
>   5 files changed, 30 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
> index 2c3685faa57a..a4a9f90f2482 100644
> --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
> +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
> @@ -308,6 +308,7 @@ int bnxt_re_del_gid(const struct ib_gid_attr *attr, void **context)
>   	struct bnxt_re_dev *rdev = to_bnxt_re_dev(attr->device, ibdev);
>   	struct bnxt_qplib_sgid_tbl *sgid_tbl = &rdev->qplib_res.sgid_tbl;
>   	struct bnxt_qplib_gid *gid_to_del;
> +	u16 vlan_id = 0xFFFF;
>   
>   	/* Delete the entry from the hardware */
>   	ctx = *context;
> @@ -317,7 +318,8 @@ int bnxt_re_del_gid(const struct ib_gid_attr *attr, void **context)
>   	if (sgid_tbl && sgid_tbl->active) {
>   		if (ctx->idx >= sgid_tbl->max)
>   			return -EINVAL;
> -		gid_to_del = &sgid_tbl->tbl[ctx->idx];
> +		gid_to_del = &sgid_tbl->tbl[ctx->idx].gid;
> +		vlan_id = sgid_tbl->tbl[ctx->idx].vlan_id;
>   		/* DEL_GID is called in WQ context(netdevice_event_work_handler)
>   		 * or via the ib_unregister_device path. In the former case QP1
>   		 * may not be destroyed yet, in which case just return as FW
> @@ -335,7 +337,8 @@ int bnxt_re_del_gid(const struct ib_gid_attr *attr, void **context)
>   		}
>   		ctx->refcnt--;
>   		if (!ctx->refcnt) {
> -			rc = bnxt_qplib_del_sgid(sgid_tbl, gid_to_del, true);
> +			rc = bnxt_qplib_del_sgid(sgid_tbl, gid_to_del,
> +						 vlan_id,  true);
>   			if (rc) {
>   				dev_err(rdev_to_dev(rdev),
>   					"Failed to remove GID: %#x", rc);
> diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c
> index 37928b1111df..bdbde8e22420 100644
> --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c
> +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c
> @@ -488,7 +488,7 @@ static int bnxt_qplib_alloc_sgid_tbl(struct bnxt_qplib_res *res,
>   				     struct bnxt_qplib_sgid_tbl *sgid_tbl,
>   				     u16 max)
>   {
> -	sgid_tbl->tbl = kcalloc(max, sizeof(struct bnxt_qplib_gid), GFP_KERNEL);
> +	sgid_tbl->tbl = kcalloc(max, sizeof(*sgid_tbl->tbl), GFP_KERNEL);
>   	if (!sgid_tbl->tbl)
>   		return -ENOMEM;
>   
> @@ -526,9 +526,10 @@ static void bnxt_qplib_cleanup_sgid_tbl(struct bnxt_qplib_res *res,
>   	for (i = 0; i < sgid_tbl->max; i++) {
>   		if (memcmp(&sgid_tbl->tbl[i], &bnxt_qplib_gid_zero,
>   			   sizeof(bnxt_qplib_gid_zero)))
> -			bnxt_qplib_del_sgid(sgid_tbl, &sgid_tbl->tbl[i], true);
> +			bnxt_qplib_del_sgid(sgid_tbl, &sgid_tbl->tbl[i].gid,
> +					    sgid_tbl->tbl[i].vlan_id, true);
>   	}
> -	memset(sgid_tbl->tbl, 0, sizeof(struct bnxt_qplib_gid) * sgid_tbl->max);
> +	memset(sgid_tbl->tbl, 0, sizeof(*sgid_tbl->tbl) * sgid_tbl->max);
>   	memset(sgid_tbl->hw_id, -1, sizeof(u16) * sgid_tbl->max);
>   	memset(sgid_tbl->vlan, 0, sizeof(u8) * sgid_tbl->max);
>   	sgid_tbl->active = 0;
> @@ -537,7 +538,11 @@ static void bnxt_qplib_cleanup_sgid_tbl(struct bnxt_qplib_res *res,
>   static void bnxt_qplib_init_sgid_tbl(struct bnxt_qplib_sgid_tbl *sgid_tbl,
>   				     struct net_device *netdev)
>   {
> -	memset(sgid_tbl->tbl, 0, sizeof(struct bnxt_qplib_gid) * sgid_tbl->max);
> +	u32 i;
> +
> +	for (i = 0; i < sgid_tbl->max; i++)
> +		sgid_tbl->tbl[i].vlan_id = 0xffff;
> +
>   	memset(sgid_tbl->hw_id, -1, sizeof(u16) * sgid_tbl->max);
>   }
>   
> diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h
> index 30c42c92fac7..fbda11a7ab1a 100644
> --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h
> +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h
> @@ -111,7 +111,7 @@ struct bnxt_qplib_pd_tbl {
>   };
>   
>   struct bnxt_qplib_sgid_tbl {
> -	struct bnxt_qplib_gid		*tbl;
> +	struct bnxt_qplib_gid_info	*tbl;
>   	u16				*hw_id;
>   	u16				max;
>   	u16				active;
> diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
> index 48793d3512ac..40296b97d21e 100644
> --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c
> +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
> @@ -213,12 +213,12 @@ int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res,
>   			index, sgid_tbl->max);
>   		return -EINVAL;
>   	}
> -	memcpy(gid, &sgid_tbl->tbl[index], sizeof(*gid));
> +	memcpy(gid, &sgid_tbl->tbl[index].gid, sizeof(*gid));
>   	return 0;
>   }
>   
>   int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
> -			struct bnxt_qplib_gid *gid, bool update)
> +			struct bnxt_qplib_gid *gid, u16 vlan_id, bool update)
>   {
>   	struct bnxt_qplib_res *res = to_bnxt_qplib(sgid_tbl,
>   						   struct bnxt_qplib_res,
> @@ -236,7 +236,8 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
>   		return -ENOMEM;
>   	}
>   	for (index = 0; index < sgid_tbl->max; index++) {
> -		if (!memcmp(&sgid_tbl->tbl[index], gid, sizeof(*gid)))
> +		if (!memcmp(&sgid_tbl->tbl[index].gid, gid, sizeof(*gid)) &&
> +		    vlan_id == sgid_tbl->tbl[index].vlan_id)
>   			break;
>   	}
>   	if (index == sgid_tbl->max) {
> @@ -262,8 +263,9 @@ int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
>   		if (rc)
>   			return rc;
>   	}
> -	memcpy(&sgid_tbl->tbl[index], &bnxt_qplib_gid_zero,
> +	memcpy(&sgid_tbl->tbl[index].gid, &bnxt_qplib_gid_zero,
>   	       sizeof(bnxt_qplib_gid_zero));
> +	sgid_tbl->tbl[index].vlan_id = 0xFFFF;
>   	sgid_tbl->vlan[index] = 0;
>   	sgid_tbl->active--;
>   	dev_dbg(&res->pdev->dev,
> @@ -296,7 +298,8 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
>   	}
>   	free_idx = sgid_tbl->max;
>   	for (i = 0; i < sgid_tbl->max; i++) {
> -		if (!memcmp(&sgid_tbl->tbl[i], gid, sizeof(*gid))) {
> +		if (!memcmp(&sgid_tbl->tbl[i], gid, sizeof(*gid)) &&
> +		    sgid_tbl->tbl[i].vlan_id == vlan_id) {
>   			dev_dbg(&res->pdev->dev,
>   				"SGID entry already exist in entry %d!\n", i);
>   			*index = i;
> @@ -351,6 +354,7 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
>   	}
>   	/* Add GID to the sgid_tbl */
>   	memcpy(&sgid_tbl->tbl[free_idx], gid, sizeof(*gid));
> +	sgid_tbl->tbl[free_idx].vlan_id = vlan_id;
>   	sgid_tbl->active++;
>   	if (vlan_id != 0xFFFF)
>   		sgid_tbl->vlan[free_idx] = 1;
> diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
> index 0ec3b12b0bcd..13d9432d5ce2 100644
> --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h
> +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
> @@ -84,6 +84,11 @@ struct bnxt_qplib_gid {
>   	u8				data[16];
>   };
>   
> +struct bnxt_qplib_gid_info {
> +	struct bnxt_qplib_gid gid;
> +	u16 vlan_id;
> +};
> +
>   struct bnxt_qplib_ah {
>   	struct bnxt_qplib_gid		dgid;
>   	struct bnxt_qplib_pd		*pd;
> @@ -221,7 +226,7 @@ int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res,
>   			struct bnxt_qplib_sgid_tbl *sgid_tbl, int index,
>   			struct bnxt_qplib_gid *gid);
>   int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
> -			struct bnxt_qplib_gid *gid, bool update);
> +			struct bnxt_qplib_gid *gid, u16 vlan_id, bool update);
>   int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
>   			struct bnxt_qplib_gid *gid, u8 *mac, u16 vlan_id,
>   			bool update, u32 *index);

^ permalink raw reply

* [PATCH AUTOSEL 4.19 080/158] ipoib: correcly show a VF hardware address
From: Sasha Levin @ 2019-07-15 14:16 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Denis Kirjanov, Doug Ledford, David S . Miller, Sasha Levin,
	linux-rdma
In-Reply-To: <20190715141809.8445-1-sashal@kernel.org>

From: Denis Kirjanov <kda@linux-powerpc.org>

[ Upstream commit 64d701c608fea362881e823b666327f5d28d7ffd ]

in the case of IPoIB with SRIOV enabled hardware
ip link show command incorrecly prints
0 instead of a VF hardware address.

Before:
11: ib1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 2044 qdisc pfifo_fast
state UP mode DEFAULT group default qlen 256
    link/infiniband
80:00:00:66:fe:80:00:00:00:00:00:00:24:8a:07:03:00:a4:3e:7c brd
00:ff:ff:ff:ff:12:40:1b:ff:ff:00:00:00:00:00:00:ff:ff:ff:ff
    vf 0 MAC 00:00:00:00:00:00, spoof checking off, link-state disable,
trust off, query_rss off
...
After:
11: ib1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 2044 qdisc pfifo_fast
state UP mode DEFAULT group default qlen 256
    link/infiniband
80:00:00:66:fe:80:00:00:00:00:00:00:24:8a:07:03:00:a4:3e:7c brd
00:ff:ff:ff:ff:12:40:1b:ff:ff:00:00:00:00:00:00:ff:ff:ff:ff
    vf 0     link/infiniband
80:00:00:66:fe:80:00:00:00:00:00:00:24:8a:07:03:00:a4:3e:7c brd
00:ff:ff:ff:ff:12:40:1b:ff:ff:00:00:00:00:00:00:ff:ff:ff:ff, spoof
checking off, link-state disable, trust off, query_rss off

v1->v2: just copy an address without modifing ifla_vf_mac
v2->v3: update the changelog

Signed-off-by: Denis Kirjanov <kda@linux-powerpc.org>
Acked-by: Doug Ledford <dledford@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/infiniband/ulp/ipoib/ipoib_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 30f840f874b3..009615499b37 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1997,6 +1997,7 @@ static int ipoib_get_vf_config(struct net_device *dev, int vf,
 		return err;
 
 	ivf->vf = vf;
+	memcpy(ivf->mac, dev->dev_addr, dev->addr_len);
 
 	return 0;
 }
-- 
2.20.1


^ permalink raw reply related

* [PATCH AUTOSEL 4.9 42/73] ipoib: correcly show a VF hardware address
From: Sasha Levin @ 2019-07-15 14:35 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Denis Kirjanov, Doug Ledford, David S . Miller, Sasha Levin,
	linux-rdma
In-Reply-To: <20190715143629.10893-1-sashal@kernel.org>

From: Denis Kirjanov <kda@linux-powerpc.org>

[ Upstream commit 64d701c608fea362881e823b666327f5d28d7ffd ]

in the case of IPoIB with SRIOV enabled hardware
ip link show command incorrecly prints
0 instead of a VF hardware address.

Before:
11: ib1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 2044 qdisc pfifo_fast
state UP mode DEFAULT group default qlen 256
    link/infiniband
80:00:00:66:fe:80:00:00:00:00:00:00:24:8a:07:03:00:a4:3e:7c brd
00:ff:ff:ff:ff:12:40:1b:ff:ff:00:00:00:00:00:00:ff:ff:ff:ff
    vf 0 MAC 00:00:00:00:00:00, spoof checking off, link-state disable,
trust off, query_rss off
...
After:
11: ib1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 2044 qdisc pfifo_fast
state UP mode DEFAULT group default qlen 256
    link/infiniband
80:00:00:66:fe:80:00:00:00:00:00:00:24:8a:07:03:00:a4:3e:7c brd
00:ff:ff:ff:ff:12:40:1b:ff:ff:00:00:00:00:00:00:ff:ff:ff:ff
    vf 0     link/infiniband
80:00:00:66:fe:80:00:00:00:00:00:00:24:8a:07:03:00:a4:3e:7c brd
00:ff:ff:ff:ff:12:40:1b:ff:ff:00:00:00:00:00:00:ff:ff:ff:ff, spoof
checking off, link-state disable, trust off, query_rss off

v1->v2: just copy an address without modifing ifla_vf_mac
v2->v3: update the changelog

Signed-off-by: Denis Kirjanov <kda@linux-powerpc.org>
Acked-by: Doug Ledford <dledford@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/infiniband/ulp/ipoib/ipoib_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 17c5bc7e8957..45504febbc2a 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1751,6 +1751,7 @@ static int ipoib_get_vf_config(struct net_device *dev, int vf,
 		return err;
 
 	ivf->vf = vf;
+	memcpy(ivf->mac, dev->dev_addr, dev->addr_len);
 
 	return 0;
 }
-- 
2.20.1


^ permalink raw reply related

* [PATCH AUTOSEL 4.14 057/105] ipoib: correcly show a VF hardware address
From: Sasha Levin @ 2019-07-15 14:27 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Denis Kirjanov, Doug Ledford, David S . Miller, Sasha Levin,
	linux-rdma
In-Reply-To: <20190715142839.9896-1-sashal@kernel.org>

From: Denis Kirjanov <kda@linux-powerpc.org>

[ Upstream commit 64d701c608fea362881e823b666327f5d28d7ffd ]

in the case of IPoIB with SRIOV enabled hardware
ip link show command incorrecly prints
0 instead of a VF hardware address.

Before:
11: ib1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 2044 qdisc pfifo_fast
state UP mode DEFAULT group default qlen 256
    link/infiniband
80:00:00:66:fe:80:00:00:00:00:00:00:24:8a:07:03:00:a4:3e:7c brd
00:ff:ff:ff:ff:12:40:1b:ff:ff:00:00:00:00:00:00:ff:ff:ff:ff
    vf 0 MAC 00:00:00:00:00:00, spoof checking off, link-state disable,
trust off, query_rss off
...
After:
11: ib1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 2044 qdisc pfifo_fast
state UP mode DEFAULT group default qlen 256
    link/infiniband
80:00:00:66:fe:80:00:00:00:00:00:00:24:8a:07:03:00:a4:3e:7c brd
00:ff:ff:ff:ff:12:40:1b:ff:ff:00:00:00:00:00:00:ff:ff:ff:ff
    vf 0     link/infiniband
80:00:00:66:fe:80:00:00:00:00:00:00:24:8a:07:03:00:a4:3e:7c brd
00:ff:ff:ff:ff:12:40:1b:ff:ff:00:00:00:00:00:00:ff:ff:ff:ff, spoof
checking off, link-state disable, trust off, query_rss off

v1->v2: just copy an address without modifing ifla_vf_mac
v2->v3: update the changelog

Signed-off-by: Denis Kirjanov <kda@linux-powerpc.org>
Acked-by: Doug Ledford <dledford@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/infiniband/ulp/ipoib/ipoib_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index e6ff16b27acd..1a93d3d58c8a 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1833,6 +1833,7 @@ static int ipoib_get_vf_config(struct net_device *dev, int vf,
 		return err;
 
 	ivf->vf = vf;
+	memcpy(ivf->mac, dev->dev_addr, dev->addr_len);
 
 	return 0;
 }
-- 
2.20.1


^ permalink raw reply related

* Re: [PATCH v8] verbs: Introduce a new reg_mr API for virtual address space
From: Yuval Shaia @ 2019-07-15 14:15 UTC (permalink / raw)
  To: yishaih, dledford, leon, jgg, mark.haywood, leonro, linux-rdma
In-Reply-To: <20190715141328.15872-1-yuval.shaia@oracle.com>

On Mon, Jul 15, 2019 at 05:13:28PM +0300, Yuval Shaia wrote:
> The virtual address that is registered is used as a base for any address
> passed later in post_recv and post_send operations.
> 
> On some virtualized environment this is not correct.
> 
> A guest cannot register its memory so hypervisor maps the guest physical
> address to a host virtual address and register it with the HW. Later on,
> at datapath phase, the guest fills the SGEs with addresses from its
> address space.
> Since HW cannot access guest virtual address space an extra translation
> is needed to map those addresses to be based on the host virtual address
> that was registered with the HW.
> This datapath interference affects performances.
> 
> To avoid this, a logical separation between the address that is
> registered and the address that is used as a offset at datapath phase is
> needed.
> This separation is already implemented in the lower layer part
> (ibv_cmd_reg_mr) but blocked at the API level.
> 
> Fix it by introducing a new API function which accepts an address from
> guest virtual address space as well, to be used as offset for later
> datapath operations.
> 
> Signed-off-by: Yuval Shaia <yuval.shaia@oracle.com>
> Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
> ---
> v0 -> v1:
> 	* Change reg_mr callback signature instead of adding new callback
> 	* Add the new API to libibverbs/libibverbs.map.in
> v1 -> v2:
> 	* Do not modify reg_mr signature for version 1.0
> 	* Add note to man page
> v2 -> v3:
> 	* Rename function to reg_mr_iova (and arg-name to iova)
> 	* Some checkpatch issues not related to this fix but detected now
> 		* s/__FUNCTION__/__func
> 		* WARNING: function definition argument 'void *' should
> 		  also have an identifier name
> v3 -> v4:
> 	* Fix commit message as suggested by Adit Ranadiv
> 	* Add support for efa
> v4 -> v5:
> 	* Update PABI
> 	* Update debian files
> v5 -> v6:
> 	* Move the new API to section in libibverbs/libibverbs.map.in
> 	  (IBVERBS_1.7) as pointed out by Mark Haywood
> v6 -> v7:
> 	* 
> v7 -> v8:
> 	* Update also redhat and suse specfiles so now all CI checks in
> 	  github passed.

Also rebased to latest and fix siw as well.

> 	* Leon, i have your r-b from v5, appriciate if you can take a look
> 	  again now, with all the latest changes
> ---
>  CMakeLists.txt                    |  4 ++--
>  buildlib/cbuild                   |  6 ++++++
>  debian/control                    |  2 +-
>  debian/libibverbs1.symbols        |  4 +++-
>  libibverbs/CMakeLists.txt         |  2 +-
>  libibverbs/driver.h               |  2 +-
>  libibverbs/dummy_ops.c            |  2 +-
>  libibverbs/libibverbs.map.in      |  5 +++++
>  libibverbs/man/ibv_reg_mr.3       | 15 +++++++++++++--
>  libibverbs/verbs.c                | 23 ++++++++++++++++++++++-
>  libibverbs/verbs.h                |  7 +++++++
>  providers/bnxt_re/verbs.c         |  6 +++---
>  providers/bnxt_re/verbs.h         |  2 +-
>  providers/cxgb3/iwch.h            |  4 ++--
>  providers/cxgb3/verbs.c           | 15 +++++----------
>  providers/cxgb4/libcxgb4.h        |  4 ++--
>  providers/cxgb4/verbs.c           | 15 +++++----------
>  providers/efa/verbs.c             |  4 ++--
>  providers/efa/verbs.h             |  2 +-
>  providers/hfi1verbs/hfiverbs.h    |  4 ++--
>  providers/hfi1verbs/verbs.c       |  8 ++++----
>  providers/hns/hns_roce_u.h        |  2 +-
>  providers/hns/hns_roce_u_verbs.c  |  6 +++---
>  providers/i40iw/i40iw_umain.h     |  3 ++-
>  providers/i40iw/i40iw_uverbs.c    |  8 ++++----
>  providers/ipathverbs/ipathverbs.h |  4 ++--
>  providers/ipathverbs/verbs.c      |  8 ++++----
>  providers/mlx4/mlx4.h             |  4 ++--
>  providers/mlx4/verbs.c            |  7 +++----
>  providers/mlx5/mlx5.h             |  4 ++--
>  providers/mlx5/verbs.c            |  7 +++----
>  providers/mthca/ah.c              |  3 ++-
>  providers/mthca/mthca.h           |  4 ++--
>  providers/mthca/verbs.c           |  6 +++---
>  providers/nes/nes_umain.h         |  3 ++-
>  providers/nes/nes_uverbs.c        |  9 ++++-----
>  providers/ocrdma/ocrdma_main.h    |  4 ++--
>  providers/ocrdma/ocrdma_verbs.c   | 10 ++++------
>  providers/qedr/qelr_main.h        |  4 ++--
>  providers/qedr/qelr_verbs.c       | 11 ++++-------
>  providers/qedr/qelr_verbs.h       |  4 ++--
>  providers/rxe/rxe.c               |  6 +++---
>  providers/siw/siw.c               |  4 ++--
>  providers/vmw_pvrdma/pvrdma.h     |  4 ++--
>  providers/vmw_pvrdma/verbs.c      |  7 +++----
>  redhat/rdma-core.spec             |  2 +-
>  suse/rdma-core.spec               |  2 +-
>  47 files changed, 154 insertions(+), 118 deletions(-)
> 
> diff --git a/CMakeLists.txt b/CMakeLists.txt
> index b2613284..67112ae3 100644
> --- a/CMakeLists.txt
> +++ b/CMakeLists.txt
> @@ -68,11 +68,11 @@ endif()
>  set(PACKAGE_NAME "RDMA")
>  
>  # See Documentation/versioning.md
> -set(PACKAGE_VERSION "25.0")
> +set(PACKAGE_VERSION "26.0")
>  # When this is changed the values in these files need changing too:
>  #   debian/control
>  #   debian/libibverbs1.symbols
> -set(IBVERBS_PABI_VERSION "25")
> +set(IBVERBS_PABI_VERSION "26")
>  set(IBVERBS_PROVIDER_SUFFIX "-rdmav${IBVERBS_PABI_VERSION}.so")
>  
>  #-------------------------
> diff --git a/buildlib/cbuild b/buildlib/cbuild
> index 83ada8ee..1658cc9c 100755
> --- a/buildlib/cbuild
> +++ b/buildlib/cbuild
> @@ -1054,6 +1054,9 @@ def cmd_make_dist_tar(args):
>      """Make the standard distribution tar. The BUILD argument must point to a build
>      output directory that has pandoc-prebuilt"""
>      ver = get_version();
> +    #print "file=%s"%(spec_file);
> +    #print "ver=%s"%(ver);
> +    #print "get_version()=%s"%(ver);
>  
>      if not args.tarfn:
>          args.tarfn = "%s-%s.tar.gz"%(project,ver)
> @@ -1072,6 +1075,7 @@ def cmd_make_dist_tar(args):
>                                 "HEAD"]);
>  
>          # Mangle the paths and append the prebuilt stuff to the tar file
> +        print "file=%s"%(tmp_tarfn);
>          if args.BUILD:
>              subprocess.check_call([
>                  "tar",
> @@ -1080,7 +1084,9 @@ def cmd_make_dist_tar(args):
>                  "./",
>                  "--xform",r"s|^\.|%sbuildlib/pandoc-prebuilt|g"%(prefix)]);
>  
> +        print "file=%s"%(tmp_tarfn);
>          assert args.tarfn.endswith(".gz") or args.tarfn.endswith(".tgz");
> +        print "file=%s"%(tmp_tarfn);
>          with open(os.path.join(args.script_pwd,args.tarfn),"w") as F:
>              subprocess.check_call(["gzip","-9c",tmp_tarfn],stdout=F);
>  
> diff --git a/debian/control b/debian/control
> index dfd0184a..22010aed 100644
> --- a/debian/control
> +++ b/debian/control
> @@ -162,7 +162,7 @@ Section: libs
>  Pre-Depends: ${misc:Pre-Depends}
>  Depends: adduser, ${misc:Depends}, ${shlibs:Depends}
>  Recommends: ibverbs-providers
> -Breaks: ibverbs-providers (<< 25~)
> +Breaks: ibverbs-providers (<< 26~)
>  Description: Library for direct userspace use of RDMA (InfiniBand/iWARP)
>   libibverbs is a library that allows userspace processes to use RDMA
>   "verbs" as described in the InfiniBand Architecture Specification and
> diff --git a/debian/libibverbs1.symbols b/debian/libibverbs1.symbols
> index 39b3d4a9..8df78756 100644
> --- a/debian/libibverbs1.symbols
> +++ b/debian/libibverbs1.symbols
> @@ -4,7 +4,8 @@ libibverbs.so.1 libibverbs1 #MINVER#
>   IBVERBS_1.1@IBVERBS_1.1 1.1.6
>   IBVERBS_1.5@IBVERBS_1.5 20
>   IBVERBS_1.6@IBVERBS_1.6 24
> - (symver)IBVERBS_PRIVATE_25 25
> + IBVERBS_1.7@IBVERBS_1.7 25.0-1
> + (symver)IBVERBS_PRIVATE_26 25.0-1
>   ibv_ack_async_event@IBVERBS_1.0 1.1.6
>   ibv_ack_async_event@IBVERBS_1.1 1.1.6
>   ibv_ack_cq_events@IBVERBS_1.0 1.1.6
> @@ -89,6 +90,7 @@ libibverbs.so.1 libibverbs1 #MINVER#
>   ibv_read_sysfs_file@IBVERBS_1.0 1.1.6
>   ibv_reg_mr@IBVERBS_1.0 1.1.6
>   ibv_reg_mr@IBVERBS_1.1 1.1.6
> + ibv_reg_mr_iova@IBVERBS_1.7 25.0-1
>   ibv_register_driver@IBVERBS_1.1 1.1.6
>   ibv_rereg_mr@IBVERBS_1.1 1.2.1
>   ibv_resize_cq@IBVERBS_1.0 1.1.6
> diff --git a/libibverbs/CMakeLists.txt b/libibverbs/CMakeLists.txt
> index 1f5c59ed..a5926bbd 100644
> --- a/libibverbs/CMakeLists.txt
> +++ b/libibverbs/CMakeLists.txt
> @@ -21,7 +21,7 @@ configure_file("libibverbs.map.in"
>  
>  rdma_library(ibverbs "${CMAKE_CURRENT_BINARY_DIR}/libibverbs.map"
>    # See Documentation/versioning.md
> -  1 1.6.${PACKAGE_VERSION}
> +  1 1.7.${PACKAGE_VERSION}
>    all_providers.c
>    cmd.c
>    cmd_ah.c
> diff --git a/libibverbs/driver.h b/libibverbs/driver.h
> index 2e2131f2..88ed2b5e 100644
> --- a/libibverbs/driver.h
> +++ b/libibverbs/driver.h
> @@ -362,7 +362,7 @@ struct verbs_context_ops {
>  				    uint64_t dm_offset, size_t length,
>  				    unsigned int access);
>  	struct ibv_mr *(*reg_mr)(struct ibv_pd *pd, void *addr, size_t length,
> -				 int access);
> +				 uint64_t hca_va, int access);
>  	int (*req_notify_cq)(struct ibv_cq *cq, int solicited_only);
>  	int (*rereg_mr)(struct verbs_mr *vmr, int flags, struct ibv_pd *pd,
>  			void *addr, size_t length, int access);
> diff --git a/libibverbs/dummy_ops.c b/libibverbs/dummy_ops.c
> index ebc6eddd..6560371a 100644
> --- a/libibverbs/dummy_ops.c
> +++ b/libibverbs/dummy_ops.c
> @@ -411,7 +411,7 @@ static struct ibv_mr *reg_dm_mr(struct ibv_pd *pd, struct ibv_dm *dm,
>  }
>  
>  static struct ibv_mr *reg_mr(struct ibv_pd *pd, void *addr, size_t length,
> -			     int access)
> +			     uint64_t hca_va,  int access)
>  {
>  	errno = ENOSYS;
>  	return NULL;
> diff --git a/libibverbs/libibverbs.map.in b/libibverbs/libibverbs.map.in
> index ee253ec0..c1b4537a 100644
> --- a/libibverbs/libibverbs.map.in
> +++ b/libibverbs/libibverbs.map.in
> @@ -116,6 +116,11 @@ IBVERBS_1.6 {
>  		ibv_qp_to_qp_ex;
>  } IBVERBS_1.5;
>  
> +IBVERBS_1.7 {
> +	global:
> +		ibv_reg_mr_iova;
> +} IBVERBS_1.6;
> +
>  /* If any symbols in this stanza change ABI then the entire staza gets a new symbol
>     version. See the top level CMakeLists.txt for this setting. */
>  
> diff --git a/libibverbs/man/ibv_reg_mr.3 b/libibverbs/man/ibv_reg_mr.3
> index 631e5fe8..be90a57b 100644
> --- a/libibverbs/man/ibv_reg_mr.3
> +++ b/libibverbs/man/ibv_reg_mr.3
> @@ -3,7 +3,7 @@
>  .\"
>  .TH IBV_REG_MR 3 2006-10-31 libibverbs "Libibverbs Programmer's Manual"
>  .SH "NAME"
> -ibv_reg_mr, ibv_dereg_mr \- register or deregister a memory region (MR)
> +ibv_reg_mr, ibv_reg_mr_iova, ibv_dereg_mr \- register or deregister a memory region (MR)
>  .SH "SYNOPSIS"
>  .nf
>  .B #include <infiniband/verbs.h>
> @@ -11,6 +11,10 @@ ibv_reg_mr, ibv_dereg_mr \- register or deregister a memory region (MR)
>  .BI "struct ibv_mr *ibv_reg_mr(struct ibv_pd " "*pd" ", void " "*addr" ,
>  .BI "                          size_t " "length" ", int " "access" );
>  .sp
> +.BI "struct ibv_mr *ibv_reg_mr_iova(struct ibv_pd " "*pd" ", void " "*addr" ,
> +.BI "                               size_t " "length" ", uint64_t " "hca_va" ,
> +.BI "                               int " "access" );
> +.sp
>  .BI "int ibv_dereg_mr(struct ibv_mr " "*mr" );
>  .fi
>  .SH "DESCRIPTION"
> @@ -52,11 +56,18 @@ Local read access is always enabled for the MR.
>  .PP
>  To create an implicit ODP MR, IBV_ACCESS_ON_DEMAND should be set, addr should be 0 and length should be SIZE_MAX.
>  .PP
> +.B ibv_reg_mr_iova()
> +ibv_reg_mr_iova is the same as the normal reg_mr, except that the user is
> +allowed to specify the virtual base address of the MR when accessed through
> +a lkey or rkey. The offset in the memory region is computed as 'addr +
> +(iova - hca_va)'. Specifying 0 for hca_va has the same effect as
> +IBV_ACCESS_ZERO_BASED.
> +.PP
>  .B ibv_dereg_mr()
>  deregisters the MR
>  .I mr\fR.
>  .SH "RETURN VALUE"
> -.B ibv_reg_mr()
> +.B ibv_reg_mr() / ibv_reg_mr_iova()
>  returns a pointer to the registered MR, or NULL if the request fails.
>  The local key (\fBL_Key\fR) field
>  .B lkey
> diff --git a/libibverbs/verbs.c b/libibverbs/verbs.c
> index c7e8e8e9..e5063af2 100644
> --- a/libibverbs/verbs.c
> +++ b/libibverbs/verbs.c
> @@ -306,7 +306,28 @@ LATEST_SYMVER_FUNC(ibv_reg_mr, 1_1, "IBVERBS_1.1",
>  	if (ibv_dontfork_range(addr, length))
>  		return NULL;
>  
> -	mr = get_ops(pd->context)->reg_mr(pd, addr, length, access);
> +	mr = get_ops(pd->context)->reg_mr(pd, addr, length, (uintptr_t) addr,
> +					  access);
> +	if (mr) {
> +		mr->context = pd->context;
> +		mr->pd      = pd;
> +		mr->addr    = addr;
> +		mr->length  = length;
> +	} else
> +		ibv_dofork_range(addr, length);
> +
> +	return mr;
> +}
> +
> +struct ibv_mr *ibv_reg_mr_iova(struct ibv_pd *pd, void *addr, size_t length,
> +			       uint64_t iova, int access)
> +{
> +	struct ibv_mr *mr;
> +
> +	if (ibv_dontfork_range(addr, length))
> +		return NULL;
> +
> +	mr = get_ops(pd->context)->reg_mr(pd, addr, length, iova, access);
>  	if (mr) {
>  		mr->context = pd->context;
>  		mr->pd      = pd;
> diff --git a/libibverbs/verbs.h b/libibverbs/verbs.h
> index 1b0aef03..1e01b5db 100644
> --- a/libibverbs/verbs.h
> +++ b/libibverbs/verbs.h
> @@ -2376,6 +2376,13 @@ static inline int ibv_close_xrcd(struct ibv_xrcd *xrcd)
>  struct ibv_mr *ibv_reg_mr(struct ibv_pd *pd, void *addr,
>  			  size_t length, int access);
>  
> +/**
> + * ibv_reg_mr_iova - Register a memory region with a virtual offset
> + * address
> + */
> +struct ibv_mr *ibv_reg_mr_iova(struct ibv_pd *pd, void *addr, size_t length,
> +			       uint64_t iova, int access);
> +
>  
>  enum ibv_rereg_mr_err_code {
>  	/* Old MR is valid, invalid input */
> diff --git a/providers/bnxt_re/verbs.c b/providers/bnxt_re/verbs.c
> index ed7ddb6e..2218e3a0 100644
> --- a/providers/bnxt_re/verbs.c
> +++ b/providers/bnxt_re/verbs.c
> @@ -131,7 +131,7 @@ int bnxt_re_free_pd(struct ibv_pd *ibvpd)
>  }
>  
>  struct ibv_mr *bnxt_re_reg_mr(struct ibv_pd *ibvpd, void *sva, size_t len,
> -			      int access)
> +			      uint64_t hca_va, int access)
>  {
>  	struct bnxt_re_mr *mr;
>  	struct ibv_reg_mr cmd;
> @@ -141,8 +141,8 @@ struct ibv_mr *bnxt_re_reg_mr(struct ibv_pd *ibvpd, void *sva, size_t len,
>  	if (!mr)
>  		return NULL;
>  
> -	if (ibv_cmd_reg_mr(ibvpd, sva, len, (uintptr_t)sva, access, &mr->vmr,
> -			   &cmd, sizeof(cmd), &resp.ibv_resp, sizeof(resp))) {
> +	if (ibv_cmd_reg_mr(ibvpd, sva, len, hca_va, access, &mr->vmr, &cmd,
> +			   sizeof(cmd), &resp.ibv_resp, sizeof(resp))) {
>  		free(mr);
>  		return NULL;
>  	}
> diff --git a/providers/bnxt_re/verbs.h b/providers/bnxt_re/verbs.h
> index b565d7e6..2e994880 100644
> --- a/providers/bnxt_re/verbs.h
> +++ b/providers/bnxt_re/verbs.h
> @@ -61,7 +61,7 @@ int bnxt_re_query_port(struct ibv_context *uctx, uint8_t port,
>  struct ibv_pd *bnxt_re_alloc_pd(struct ibv_context *uctx);
>  int bnxt_re_free_pd(struct ibv_pd *ibvpd);
>  struct ibv_mr *bnxt_re_reg_mr(struct ibv_pd *ibvpd, void *buf, size_t len,
> -			      int ibv_access_flags);
> +			      uint64_t hca_va, int ibv_access_flags);
>  int bnxt_re_dereg_mr(struct verbs_mr *vmr);
>  
>  struct ibv_cq *bnxt_re_create_cq(struct ibv_context *uctx, int ncqe,
> diff --git a/providers/cxgb3/iwch.h b/providers/cxgb3/iwch.h
> index c8de44e9..c7d85d3a 100644
> --- a/providers/cxgb3/iwch.h
> +++ b/providers/cxgb3/iwch.h
> @@ -150,8 +150,8 @@ extern int iwch_query_port(struct ibv_context *context, uint8_t port,
>  extern struct ibv_pd *iwch_alloc_pd(struct ibv_context *context);
>  extern int iwch_free_pd(struct ibv_pd *pd);
>  
> -extern struct ibv_mr *iwch_reg_mr(struct ibv_pd *pd, void *addr,
> -				  size_t length, int access);
> +extern struct ibv_mr *iwch_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
> +				  uint64_t hca_va, int access);
>  extern int iwch_dereg_mr(struct verbs_mr *mr);
>  
>  struct ibv_cq *iwch_create_cq(struct ibv_context *context, int cqe,
> diff --git a/providers/cxgb3/verbs.c b/providers/cxgb3/verbs.c
> index 8b90482a..39a44192 100644
> --- a/providers/cxgb3/verbs.c
> +++ b/providers/cxgb3/verbs.c
> @@ -103,15 +103,17 @@ int iwch_free_pd(struct ibv_pd *pd)
>  	return 0;
>  }
>  
> -static struct ibv_mr *__iwch_reg_mr(struct ibv_pd *pd, void *addr,
> -				    size_t length, uint64_t hca_va,
> -				    int access)
> +struct ibv_mr *iwch_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
> +			   uint64_t hca_va, int access)
>  {
>  	struct iwch_mr *mhp;
>  	struct ibv_reg_mr cmd;
>  	struct uiwch_reg_mr_resp resp;
>  	struct iwch_device *dev = to_iwch_dev(pd->context->device);
>  
> +	PDBG("%s addr %p length %ld hca_va %p\n", __func__, addr, length,
> +	     hca_va);
> +
>  	mhp = malloc(sizeof *mhp);
>  	if (!mhp)
>  		return NULL;
> @@ -140,13 +142,6 @@ static struct ibv_mr *__iwch_reg_mr(struct ibv_pd *pd, void *addr,
>  	return &mhp->vmr.ibv_mr;
>  }
>  
> -struct ibv_mr *iwch_reg_mr(struct ibv_pd *pd, void *addr,
> -			   size_t length, int access)
> -{
> -	PDBG("%s addr %p length %ld\n", __FUNCTION__, addr, length);
> -	return __iwch_reg_mr(pd, addr, length, (uintptr_t) addr, access);
> -}
> -
>  int iwch_dereg_mr(struct verbs_mr *vmr)
>  {
>  	int ret;
> diff --git a/providers/cxgb4/libcxgb4.h b/providers/cxgb4/libcxgb4.h
> index 0fbceab0..ce8f29dd 100644
> --- a/providers/cxgb4/libcxgb4.h
> +++ b/providers/cxgb4/libcxgb4.h
> @@ -198,8 +198,8 @@ int c4iw_query_port(struct ibv_context *context, uint8_t port,
>  struct ibv_pd *c4iw_alloc_pd(struct ibv_context *context);
>  int c4iw_free_pd(struct ibv_pd *pd);
>  
> -struct ibv_mr *c4iw_reg_mr(struct ibv_pd *pd, void *addr,
> -				  size_t length, int access);
> +struct ibv_mr *c4iw_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
> +			   uint64_t hca_va, int access);
>  int c4iw_dereg_mr(struct verbs_mr *vmr);
>  
>  struct ibv_cq *c4iw_create_cq(struct ibv_context *context, int cqe,
> diff --git a/providers/cxgb4/verbs.c b/providers/cxgb4/verbs.c
> index 452e4f1f..4240f6b3 100644
> --- a/providers/cxgb4/verbs.c
> +++ b/providers/cxgb4/verbs.c
> @@ -109,15 +109,17 @@ int c4iw_free_pd(struct ibv_pd *pd)
>  	return 0;
>  }
>  
> -static struct ibv_mr *__c4iw_reg_mr(struct ibv_pd *pd, void *addr,
> -				    size_t length, uint64_t hca_va,
> -				    int access)
> +struct ibv_mr *c4iw_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
> +			   uint64_t hca_va, int access)
>  {
>  	struct c4iw_mr *mhp;
>  	struct ibv_reg_mr cmd;
>  	struct ib_uverbs_reg_mr_resp resp;
>  	struct c4iw_dev *dev = to_c4iw_dev(pd->context->device);
>  
> +	PDBG("%s addr %p length %ld hca_va %p\n", __func__, addr, length,
> +	     hca_va);
> +
>  	mhp = malloc(sizeof *mhp);
>  	if (!mhp)
>  		return NULL;
> @@ -142,13 +144,6 @@ static struct ibv_mr *__c4iw_reg_mr(struct ibv_pd *pd, void *addr,
>  	return &mhp->vmr.ibv_mr;
>  }
>  
> -struct ibv_mr *c4iw_reg_mr(struct ibv_pd *pd, void *addr,
> -			   size_t length, int access)
> -{
> -	PDBG("%s addr %p length %ld\n", __func__, addr, length);
> -	return __c4iw_reg_mr(pd, addr, length, (uintptr_t) addr, access);
> -}
> -
>  int c4iw_dereg_mr(struct verbs_mr *vmr)
>  {
>  	int ret;
> diff --git a/providers/efa/verbs.c b/providers/efa/verbs.c
> index 4d36f9e1..d2500ecb 100644
> --- a/providers/efa/verbs.c
> +++ b/providers/efa/verbs.c
> @@ -126,7 +126,7 @@ int efa_dealloc_pd(struct ibv_pd *ibvpd)
>  }
>  
>  struct ibv_mr *efa_reg_mr(struct ibv_pd *ibvpd, void *sva, size_t len,
> -			  int access)
> +			  uint64_t hca_va, int access)
>  {
>  	struct ib_uverbs_reg_mr_resp resp;
>  	struct ibv_reg_mr cmd;
> @@ -136,7 +136,7 @@ struct ibv_mr *efa_reg_mr(struct ibv_pd *ibvpd, void *sva, size_t len,
>  	if (!mr)
>  		return NULL;
>  
> -	if (ibv_cmd_reg_mr(ibvpd, sva, len, (uintptr_t)sva, access, &mr->vmr,
> +	if (ibv_cmd_reg_mr(ibvpd, sva, len, hca_va, access, &mr->vmr,
>  			   &cmd, sizeof(cmd), &resp, sizeof(resp))) {
>  		free(mr);
>  		return NULL;
> diff --git a/providers/efa/verbs.h b/providers/efa/verbs.h
> index 1a49653f..7b532adc 100644
> --- a/providers/efa/verbs.h
> +++ b/providers/efa/verbs.h
> @@ -18,7 +18,7 @@ int efa_query_device_ex(struct ibv_context *context,
>  struct ibv_pd *efa_alloc_pd(struct ibv_context *uctx);
>  int efa_dealloc_pd(struct ibv_pd *ibvpd);
>  struct ibv_mr *efa_reg_mr(struct ibv_pd *ibvpd, void *buf, size_t len,
> -			  int ibv_access_flags);
> +			  uint64_t hca_va, int ibv_access_flags);
>  int efa_dereg_mr(struct verbs_mr *vmr);
>  
>  struct ibv_cq *efa_create_cq(struct ibv_context *uctx, int ncqe,
> diff --git a/providers/hfi1verbs/hfiverbs.h b/providers/hfi1verbs/hfiverbs.h
> index 070a01c9..b9e91d80 100644
> --- a/providers/hfi1verbs/hfiverbs.h
> +++ b/providers/hfi1verbs/hfiverbs.h
> @@ -204,8 +204,8 @@ struct ibv_pd *hfi1_alloc_pd(struct ibv_context *pd);
>  
>  int hfi1_free_pd(struct ibv_pd *pd);
>  
> -struct ibv_mr *hfi1_reg_mr(struct ibv_pd *pd, void *addr,
> -			    size_t length, int access);
> +struct ibv_mr *hfi1_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
> +			   uint64_t hca_va, int access);
>  
>  int hfi1_dereg_mr(struct verbs_mr *vmr);
>  
> diff --git a/providers/hfi1verbs/verbs.c b/providers/hfi1verbs/verbs.c
> index ff001f6d..275f8d51 100644
> --- a/providers/hfi1verbs/verbs.c
> +++ b/providers/hfi1verbs/verbs.c
> @@ -129,8 +129,8 @@ int hfi1_free_pd(struct ibv_pd *pd)
>  	return 0;
>  }
>  
> -struct ibv_mr *hfi1_reg_mr(struct ibv_pd *pd, void *addr,
> -			    size_t length, int access)
> +struct ibv_mr *hfi1_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
> +			   uint64_t hca_va, int access)
>  {
>  	struct verbs_mr *vmr;
>  	struct ibv_reg_mr cmd;
> @@ -141,8 +141,8 @@ struct ibv_mr *hfi1_reg_mr(struct ibv_pd *pd, void *addr,
>  	if (!vmr)
>  		return NULL;
>  
> -	ret = ibv_cmd_reg_mr(pd, addr, length, (uintptr_t)addr, access, vmr,
> -			     &cmd, sizeof cmd, &resp, sizeof resp);
> +	ret = ibv_cmd_reg_mr(pd, addr, length, hca_va, access, vmr, &cmd,
> +			     sizeof(cmd), &resp, sizeof(resp));
>  
>  	if (ret) {
>  		free(vmr);
> diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
> index 93bc47c1..517d97b3 100644
> --- a/providers/hns/hns_roce_u.h
> +++ b/providers/hns/hns_roce_u.h
> @@ -293,7 +293,7 @@ struct ibv_pd *hns_roce_u_alloc_pd(struct ibv_context *context);
>  int hns_roce_u_free_pd(struct ibv_pd *pd);
>  
>  struct ibv_mr *hns_roce_u_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
> -				 int access);
> +				 uint64_t hca_va, int access);
>  int hns_roce_u_rereg_mr(struct verbs_mr *mr, int flags, struct ibv_pd *pd,
>  			void *addr, size_t length, int access);
>  int hns_roce_u_dereg_mr(struct verbs_mr *mr);
> diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
> index 9ba65a1d..44bc32a4 100644
> --- a/providers/hns/hns_roce_u_verbs.c
> +++ b/providers/hns/hns_roce_u_verbs.c
> @@ -120,7 +120,7 @@ int hns_roce_u_free_pd(struct ibv_pd *pd)
>  }
>  
>  struct ibv_mr *hns_roce_u_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
> -				 int access)
> +				 uint64_t hca_va, int access)
>  {
>  	int ret;
>  	struct verbs_mr *vmr;
> @@ -141,8 +141,8 @@ struct ibv_mr *hns_roce_u_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
>  	if (!vmr)
>  		return NULL;
>  
> -	ret = ibv_cmd_reg_mr(pd, addr, length, (uintptr_t)addr, access, vmr,
> -			     &cmd, sizeof(cmd), &resp, sizeof(resp));
> +	ret = ibv_cmd_reg_mr(pd, addr, length, hca_va, access, vmr, &cmd,
> +			     sizeof(cmd), &resp, sizeof(resp));
>  	if (ret) {
>  		free(vmr);
>  		return NULL;
> diff --git a/providers/i40iw/i40iw_umain.h b/providers/i40iw/i40iw_umain.h
> index 4055933a..af84f284 100644
> --- a/providers/i40iw/i40iw_umain.h
> +++ b/providers/i40iw/i40iw_umain.h
> @@ -155,7 +155,8 @@ int i40iw_uquery_device(struct ibv_context *, struct ibv_device_attr *);
>  int i40iw_uquery_port(struct ibv_context *, uint8_t, struct ibv_port_attr *);
>  struct ibv_pd *i40iw_ualloc_pd(struct ibv_context *);
>  int i40iw_ufree_pd(struct ibv_pd *);
> -struct ibv_mr *i40iw_ureg_mr(struct ibv_pd *, void *, size_t, int);
> +struct ibv_mr *i40iw_ureg_mr(struct ibv_pd *pd, void *addr, size_t length,
> +			     uint64_t hca_va, int access);
>  int i40iw_udereg_mr(struct verbs_mr *vmr);
>  struct ibv_cq *i40iw_ucreate_cq(struct ibv_context *, int, struct ibv_comp_channel *, int);
>  int i40iw_uresize_cq(struct ibv_cq *, int);
> diff --git a/providers/i40iw/i40iw_uverbs.c b/providers/i40iw/i40iw_uverbs.c
> index 83b504fa..240150b9 100644
> --- a/providers/i40iw/i40iw_uverbs.c
> +++ b/providers/i40iw/i40iw_uverbs.c
> @@ -149,7 +149,8 @@ int i40iw_ufree_pd(struct ibv_pd *pd)
>   * @length: length of the memory
>   * @access: access allowed on this mr
>   */
> -struct ibv_mr *i40iw_ureg_mr(struct ibv_pd *pd, void *addr, size_t length, int access)
> +struct ibv_mr *i40iw_ureg_mr(struct ibv_pd *pd, void *addr, size_t length,
> +			     uint64_t hca_va, int access)
>  {
>  	struct verbs_mr *vmr;
>  	struct i40iw_ureg_mr cmd;
> @@ -161,9 +162,8 @@ struct ibv_mr *i40iw_ureg_mr(struct ibv_pd *pd, void *addr, size_t length, int a
>  
>  	cmd.reg_type = IW_MEMREG_TYPE_MEM;
>  
> -	if (ibv_cmd_reg_mr(pd, addr, length, (uintptr_t)addr,
> -			   access, vmr, &cmd.ibv_cmd, sizeof(cmd),
> -			   &resp, sizeof(resp))) {
> +	if (ibv_cmd_reg_mr(pd, addr, length, hca_va, access, vmr, &cmd.ibv_cmd,
> +			   sizeof(cmd), &resp, sizeof(resp))) {
>  		fprintf(stderr, PFX "%s: Failed to register memory\n", __func__);
>  		free(vmr);
>  		return NULL;
> diff --git a/providers/ipathverbs/ipathverbs.h b/providers/ipathverbs/ipathverbs.h
> index cfb5cc38..694f1f44 100644
> --- a/providers/ipathverbs/ipathverbs.h
> +++ b/providers/ipathverbs/ipathverbs.h
> @@ -183,8 +183,8 @@ struct ibv_pd *ipath_alloc_pd(struct ibv_context *pd);
>  
>  int ipath_free_pd(struct ibv_pd *pd);
>  
> -struct ibv_mr *ipath_reg_mr(struct ibv_pd *pd, void *addr,
> -			    size_t length, int access);
> +struct ibv_mr *ipath_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
> +			    uint64_t hca_va, int access);
>  
>  int ipath_dereg_mr(struct verbs_mr *vmr);
>  
> diff --git a/providers/ipathverbs/verbs.c b/providers/ipathverbs/verbs.c
> index de4722b2..505ea584 100644
> --- a/providers/ipathverbs/verbs.c
> +++ b/providers/ipathverbs/verbs.c
> @@ -109,8 +109,8 @@ int ipath_free_pd(struct ibv_pd *pd)
>  	return 0;
>  }
>  
> -struct ibv_mr *ipath_reg_mr(struct ibv_pd *pd, void *addr,
> -			    size_t length, int access)
> +struct ibv_mr *ipath_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
> +			    uint64_t hca_va, int access)
>  {
>  	struct verbs_mr *vmr;
>  	struct ibv_reg_mr cmd;
> @@ -121,8 +121,8 @@ struct ibv_mr *ipath_reg_mr(struct ibv_pd *pd, void *addr,
>  	if (!vmr)
>  		return NULL;
>  
> -	ret = ibv_cmd_reg_mr(pd, addr, length, (uintptr_t)addr, access, vmr,
> -			     &cmd, sizeof cmd, &resp, sizeof resp);
> +	ret = ibv_cmd_reg_mr(pd, addr, length, hca_va, access, vmr, &cmd,
> +			     sizeof(cmd), &resp, sizeof(resp));
>  	if (ret) {
>  		free(vmr);
>  		return NULL;
> diff --git a/providers/mlx4/mlx4.h b/providers/mlx4/mlx4.h
> index 9c21d775..3c161e8e 100644
> --- a/providers/mlx4/mlx4.h
> +++ b/providers/mlx4/mlx4.h
> @@ -320,8 +320,8 @@ struct ibv_xrcd *mlx4_open_xrcd(struct ibv_context *context,
>  				struct ibv_xrcd_init_attr *attr);
>  int mlx4_close_xrcd(struct ibv_xrcd *xrcd);
>  
> -struct ibv_mr *mlx4_reg_mr(struct ibv_pd *pd, void *addr,
> -			    size_t length, int access);
> +struct ibv_mr *mlx4_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
> +			   uint64_t hca_va, int access);
>  int mlx4_rereg_mr(struct verbs_mr *vmr, int flags, struct ibv_pd *pd,
>  		  void *addr, size_t length, int access);
>  int mlx4_dereg_mr(struct verbs_mr *vmr);
> diff --git a/providers/mlx4/verbs.c b/providers/mlx4/verbs.c
> index 9a5affe7..d814a2bc 100644
> --- a/providers/mlx4/verbs.c
> +++ b/providers/mlx4/verbs.c
> @@ -275,7 +275,7 @@ int mlx4_close_xrcd(struct ibv_xrcd *ib_xrcd)
>  }
>  
>  struct ibv_mr *mlx4_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
> -			   int access)
> +			   uint64_t hca_va, int access)
>  {
>  	struct verbs_mr *vmr;
>  	struct ibv_reg_mr cmd;
> @@ -286,9 +286,8 @@ struct ibv_mr *mlx4_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
>  	if (!vmr)
>  		return NULL;
>  
> -	ret = ibv_cmd_reg_mr(pd, addr, length, (uintptr_t) addr,
> -			     access, vmr, &cmd, sizeof(cmd),
> -			     &resp, sizeof(resp));
> +	ret = ibv_cmd_reg_mr(pd, addr, length, hca_va, access, vmr, &cmd,
> +			     sizeof(cmd), &resp, sizeof(resp));
>  	if (ret) {
>  		free(vmr);
>  		return NULL;
> diff --git a/providers/mlx5/mlx5.h b/providers/mlx5/mlx5.h
> index d9fccdcc..ab3c2c1a 100644
> --- a/providers/mlx5/mlx5.h
> +++ b/providers/mlx5/mlx5.h
> @@ -821,8 +821,8 @@ void mlx5_async_event(struct ibv_context *context,
>  		      struct ibv_async_event *event);
>  
>  struct ibv_mr *mlx5_alloc_null_mr(struct ibv_pd *pd);
> -struct ibv_mr *mlx5_reg_mr(struct ibv_pd *pd, void *addr,
> -			   size_t length, int access);
> +struct ibv_mr *mlx5_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
> +			   uint64_t hca_va, int access);
>  int mlx5_rereg_mr(struct verbs_mr *mr, int flags, struct ibv_pd *pd, void *addr,
>  		  size_t length, int access);
>  int mlx5_dereg_mr(struct verbs_mr *mr);
> diff --git a/providers/mlx5/verbs.c b/providers/mlx5/verbs.c
> index c13e81f4..714c5f7e 100644
> --- a/providers/mlx5/verbs.c
> +++ b/providers/mlx5/verbs.c
> @@ -388,7 +388,7 @@ int mlx5_free_pd(struct ibv_pd *pd)
>  }
>  
>  struct ibv_mr *mlx5_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
> -			   int acc)
> +			   uint64_t hca_va, int acc)
>  {
>  	struct mlx5_mr *mr;
>  	struct ibv_reg_mr cmd;
> @@ -400,9 +400,8 @@ struct ibv_mr *mlx5_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
>  	if (!mr)
>  		return NULL;
>  
> -	ret = ibv_cmd_reg_mr(pd, addr, length, (uintptr_t)addr, access,
> -			     &mr->vmr, &cmd, sizeof(cmd), &resp,
> -			     sizeof resp);
> +	ret = ibv_cmd_reg_mr(pd, addr, length, hca_va, access, &mr->vmr, &cmd,
> +			     sizeof(cmd), &resp, sizeof(resp));
>  	if (ret) {
>  		mlx5_free_buf(&(mr->buf));
>  		free(mr);
> diff --git a/providers/mthca/ah.c b/providers/mthca/ah.c
> index df0cb281..adefb178 100644
> --- a/providers/mthca/ah.c
> +++ b/providers/mthca/ah.c
> @@ -61,7 +61,8 @@ static struct mthca_ah_page *__add_page(struct mthca_pd *pd, int page_size, int
>  		return NULL;
>  	}
>  
> -	page->mr = mthca_reg_mr(&pd->ibv_pd, page->buf.buf, page_size, 0);
> +	page->mr = mthca_reg_mr(&pd->ibv_pd, page->buf.buf, page_size,
> +				(uintptr_t) page->buf.buf, 0);
>  	if (!page->mr) {
>  		mthca_free_buf(&page->buf);
>  		free(page);
> diff --git a/providers/mthca/mthca.h b/providers/mthca/mthca.h
> index 61042de3..b7df2f73 100644
> --- a/providers/mthca/mthca.h
> +++ b/providers/mthca/mthca.h
> @@ -280,8 +280,8 @@ int mthca_query_port(struct ibv_context *context, uint8_t port,
>  struct ibv_pd *mthca_alloc_pd(struct ibv_context *context);
>  int mthca_free_pd(struct ibv_pd *pd);
>  
> -struct ibv_mr *mthca_reg_mr(struct ibv_pd *pd, void *addr,
> -			    size_t length, int access);
> +struct ibv_mr *mthca_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
> +			    uint64_t hca_va, int access);
>  int mthca_dereg_mr(struct verbs_mr *mr);
>  
>  struct ibv_cq *mthca_create_cq(struct ibv_context *context, int cqe,
> diff --git a/providers/mthca/verbs.c b/providers/mthca/verbs.c
> index e7a1c357..99e5ec66 100644
> --- a/providers/mthca/verbs.c
> +++ b/providers/mthca/verbs.c
> @@ -145,10 +145,10 @@ static struct ibv_mr *__mthca_reg_mr(struct ibv_pd *pd, void *addr,
>  	return &vmr->ibv_mr;
>  }
>  
> -struct ibv_mr *mthca_reg_mr(struct ibv_pd *pd, void *addr,
> -			    size_t length, int access)
> +struct ibv_mr *mthca_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
> +			    uint64_t hca_va, int access)
>  {
> -	return __mthca_reg_mr(pd, addr, length, (uintptr_t) addr, access, 0);
> +	return __mthca_reg_mr(pd, addr, length, hca_va, access, 0);
>  }
>  
>  int mthca_dereg_mr(struct verbs_mr *vmr)
> diff --git a/providers/nes/nes_umain.h b/providers/nes/nes_umain.h
> index edb38622..1070ce42 100644
> --- a/providers/nes/nes_umain.h
> +++ b/providers/nes/nes_umain.h
> @@ -350,7 +350,8 @@ int nes_uquery_device(struct ibv_context *, struct ibv_device_attr *);
>  int nes_uquery_port(struct ibv_context *, uint8_t, struct ibv_port_attr *);
>  struct ibv_pd *nes_ualloc_pd(struct ibv_context *);
>  int nes_ufree_pd(struct ibv_pd *);
> -struct ibv_mr *nes_ureg_mr(struct ibv_pd *, void *, size_t, int);
> +struct ibv_mr *nes_ureg_mr(struct ibv_pd *pd, void *addr, size_t length,
> +			   uint64_t hca_va, int access);
>  int nes_udereg_mr(struct verbs_mr *vmr);
>  struct ibv_cq *nes_ucreate_cq(struct ibv_context *, int, struct ibv_comp_channel *, int);
>  int nes_uresize_cq(struct ibv_cq *, int);
> diff --git a/providers/nes/nes_uverbs.c b/providers/nes/nes_uverbs.c
> index 8523e923..2b78468b 100644
> --- a/providers/nes/nes_uverbs.c
> +++ b/providers/nes/nes_uverbs.c
> @@ -165,8 +165,8 @@ int nes_ufree_pd(struct ibv_pd *pd)
>  /**
>   * nes_ureg_mr
>   */
> -struct ibv_mr *nes_ureg_mr(struct ibv_pd *pd, void *addr,
> -		size_t length, int access)
> +struct ibv_mr *nes_ureg_mr(struct ibv_pd *pd, void *addr, size_t length,
> +			   uint64_t hca_va, int access)
>  {
>  	struct verbs_mr *vmr;
>  	struct nes_ureg_mr cmd;
> @@ -177,9 +177,8 @@ struct ibv_mr *nes_ureg_mr(struct ibv_pd *pd, void *addr,
>  		return NULL;
>  
>  	cmd.reg_type = IWNES_MEMREG_TYPE_MEM;
> -	if (ibv_cmd_reg_mr(pd, addr, length, (uintptr_t) addr,
> -			access, vmr, &cmd.ibv_cmd, sizeof(cmd),
> -			&resp, sizeof(resp))) {
> +	if (ibv_cmd_reg_mr(pd, addr, length, hca_va, access, vmr, &cmd.ibv_cmd,
> +			   sizeof(cmd), &resp, sizeof(resp))) {
>  		free(vmr);
>  
>  		return NULL;
> diff --git a/providers/ocrdma/ocrdma_main.h b/providers/ocrdma/ocrdma_main.h
> index 33def78c..aadefd96 100644
> --- a/providers/ocrdma/ocrdma_main.h
> +++ b/providers/ocrdma/ocrdma_main.h
> @@ -269,8 +269,8 @@ int ocrdma_query_device(struct ibv_context *, struct ibv_device_attr *);
>  int ocrdma_query_port(struct ibv_context *, uint8_t, struct ibv_port_attr *);
>  struct ibv_pd *ocrdma_alloc_pd(struct ibv_context *);
>  int ocrdma_free_pd(struct ibv_pd *);
> -struct ibv_mr *ocrdma_reg_mr(struct ibv_pd *, void *, size_t,
> -			     int ibv_access_flags);
> +struct ibv_mr *ocrdma_reg_mr(struct ibv_pd *pd, void *addr, size_t len,
> +			     uint64_t hca_va, int access);
>  int ocrdma_dereg_mr(struct verbs_mr *vmr);
>  
>  struct ibv_cq *ocrdma_create_cq(struct ibv_context *, int,
> diff --git a/providers/ocrdma/ocrdma_verbs.c b/providers/ocrdma/ocrdma_verbs.c
> index 3b3e1a60..4ae35be9 100644
> --- a/providers/ocrdma/ocrdma_verbs.c
> +++ b/providers/ocrdma/ocrdma_verbs.c
> @@ -185,22 +185,20 @@ int ocrdma_free_pd(struct ibv_pd *ibpd)
>  /*
>   * ocrdma_reg_mr
>   */
> -struct ibv_mr *ocrdma_reg_mr(struct ibv_pd *pd, void *addr,
> -			     size_t len, int access)
> +struct ibv_mr *ocrdma_reg_mr(struct ibv_pd *pd, void *addr, size_t len,
> +			     uint64_t hca_va, int access)
>  {
>  	struct ocrdma_mr *mr;
>  	struct ibv_reg_mr cmd;
>  	struct uocrdma_reg_mr_resp resp;
> -	uint64_t hca_va = (uintptr_t) addr;
>  
>  	mr = malloc(sizeof *mr);
>  	if (!mr)
>  		return NULL;
>  	bzero(mr, sizeof *mr);
>  
> -	if (ibv_cmd_reg_mr(pd, addr, len, hca_va,
> -			   access, &mr->vmr, &cmd, sizeof(cmd),
> -			   &resp.ibv_resp, sizeof(resp))) {
> +	if (ibv_cmd_reg_mr(pd, addr, len, hca_va, access, &mr->vmr, &cmd,
> +			   sizeof(cmd), &resp.ibv_resp, sizeof(resp))) {
>  		free(mr);
>  		return NULL;
>  	}
> diff --git a/providers/qedr/qelr_main.h b/providers/qedr/qelr_main.h
> index 77aa9c2f..fae87130 100644
> --- a/providers/qedr/qelr_main.h
> +++ b/providers/qedr/qelr_main.h
> @@ -46,8 +46,8 @@ int qelr_query_port(struct ibv_context *, uint8_t, struct ibv_port_attr *);
>  struct ibv_pd *qelr_alloc_pd(struct ibv_context *);
>  int qelr_dealloc_pd(struct ibv_pd *);
>  
> -struct ibv_mr *qelr_reg_mr(struct ibv_pd *, void *, size_t,
> -			   int ibv_access_flags);
> +struct ibv_mr *qelr_reg_mr(struct ibv_pd *ibpd, void *addr, size_t len,
> +			   uint64_t hca_va, int access);
>  int qelr_dereg_mr(struct verbs_mr *vmr);
>  
>  struct ibv_cq *qelr_create_cq(struct ibv_context *, int,
> diff --git a/providers/qedr/qelr_verbs.c b/providers/qedr/qelr_verbs.c
> index a347714d..4b19ccb0 100644
> --- a/providers/qedr/qelr_verbs.c
> +++ b/providers/qedr/qelr_verbs.c
> @@ -156,8 +156,8 @@ int qelr_dealloc_pd(struct ibv_pd *ibpd)
>  	return rc;
>  }
>  
> -struct ibv_mr *qelr_reg_mr(struct ibv_pd *ibpd, void *addr,
> -			   size_t len, int access)
> +struct ibv_mr *qelr_reg_mr(struct ibv_pd *ibpd, void *addr, size_t len,
> +			   uint64_t hca_va, int access)
>  {
>  	struct qelr_mr *mr;
>  	struct ibv_reg_mr cmd;
> @@ -165,17 +165,14 @@ struct ibv_mr *qelr_reg_mr(struct ibv_pd *ibpd, void *addr,
>  	struct qelr_pd *pd = get_qelr_pd(ibpd);
>  	struct qelr_devctx *cxt = get_qelr_ctx(ibpd->context);
>  
> -	uint64_t hca_va = (uintptr_t) addr;
> -
>  	mr = malloc(sizeof(*mr));
>  	if (!mr)
>  		return NULL;
>  
>  	bzero(mr, sizeof(*mr));
>  
> -	if (ibv_cmd_reg_mr(ibpd, addr, len, hca_va,
> -			   access, &mr->vmr, &cmd, sizeof(cmd),
> -			   &resp.ibv_resp, sizeof(resp))) {
> +	if (ibv_cmd_reg_mr(ibpd, addr, len, hca_va, access, &mr->vmr, &cmd,
> +			   sizeof(cmd), &resp.ibv_resp, sizeof(resp))) {
>  		free(mr);
>  		return NULL;
>  	}
> diff --git a/providers/qedr/qelr_verbs.h b/providers/qedr/qelr_verbs.h
> index cf2ce047..d0eacbfe 100644
> --- a/providers/qedr/qelr_verbs.h
> +++ b/providers/qedr/qelr_verbs.h
> @@ -48,8 +48,8 @@ int qelr_query_port(struct ibv_context *context, uint8_t port,
>  struct ibv_pd *qelr_alloc_pd(struct ibv_context *context);
>  int qelr_dealloc_pd(struct ibv_pd *ibpd);
>  
> -struct ibv_mr *qelr_reg_mr(struct ibv_pd *ibpd, void *addr,
> -			   size_t len, int access);
> +struct ibv_mr *qelr_reg_mr(struct ibv_pd *ibpd, void *addr, size_t len,
> +			   uint64_t hca_va, int access);
>  int qelr_dereg_mr(struct verbs_mr *mr);
>  
>  struct ibv_cq *qelr_create_cq(struct ibv_context *context, int cqe,
> diff --git a/providers/rxe/rxe.c b/providers/rxe/rxe.c
> index 2efb646c..4e05d5b9 100644
> --- a/providers/rxe/rxe.c
> +++ b/providers/rxe/rxe.c
> @@ -123,7 +123,7 @@ static int rxe_dealloc_pd(struct ibv_pd *pd)
>  }
>  
>  static struct ibv_mr *rxe_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
> -				 int access)
> +				 uint64_t hca_va, int access)
>  {
>  	struct verbs_mr *vmr;
>  	struct ibv_reg_mr cmd;
> @@ -134,8 +134,8 @@ static struct ibv_mr *rxe_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
>  	if (!vmr)
>  		return NULL;
>  
> -	ret = ibv_cmd_reg_mr(pd, addr, length, (uintptr_t)addr, access, vmr,
> -			     &cmd, sizeof cmd, &resp, sizeof resp);
> +	ret = ibv_cmd_reg_mr(pd, addr, length, hca_va, access, vmr, &cmd,
> +			     sizeof(cmd), &resp, sizeof(resp));
>  	if (ret) {
>  		free(vmr);
>  		return NULL;
> diff --git a/providers/siw/siw.c b/providers/siw/siw.c
> index 41f33fa1..c1acf398 100644
> --- a/providers/siw/siw.c
> +++ b/providers/siw/siw.c
> @@ -96,7 +96,7 @@ static int siw_free_pd(struct ibv_pd *pd)
>  }
>  
>  static struct ibv_mr *siw_reg_mr(struct ibv_pd *pd, void *addr, size_t len,
> -				 int access)
> +				 uint64_t hca_va, int access)
>  {
>  	struct siw_cmd_reg_mr cmd = {};
>  	struct siw_cmd_reg_mr_resp resp = {};
> @@ -107,7 +107,7 @@ static struct ibv_mr *siw_reg_mr(struct ibv_pd *pd, void *addr, size_t len,
>  	if (!mr)
>  		return NULL;
>  
> -	rv = ibv_cmd_reg_mr(pd, addr, len, (uintptr_t)addr, access,
> +	rv = ibv_cmd_reg_mr(pd, addr, len, hca_va, access,
>  			    &mr->base_mr, &cmd.ibv_cmd, sizeof(cmd),
>  			    &resp.ibv_resp, sizeof(resp));
>  	if (rv) {
> diff --git a/providers/vmw_pvrdma/pvrdma.h b/providers/vmw_pvrdma/pvrdma.h
> index ebd50ce1..d90bd809 100644
> --- a/providers/vmw_pvrdma/pvrdma.h
> +++ b/providers/vmw_pvrdma/pvrdma.h
> @@ -281,8 +281,8 @@ int pvrdma_query_port(struct ibv_context *context, uint8_t port,
>  struct ibv_pd *pvrdma_alloc_pd(struct ibv_context *context);
>  int pvrdma_free_pd(struct ibv_pd *pd);
>  
> -struct ibv_mr *pvrdma_reg_mr(struct ibv_pd *pd, void *addr,
> -			     size_t length, int access);
> +struct ibv_mr *pvrdma_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
> +			     uint64_t hca_va, int access);
>  int pvrdma_dereg_mr(struct verbs_mr *mr);
>  
>  struct ibv_cq *pvrdma_create_cq(struct ibv_context *context, int cqe,
> diff --git a/providers/vmw_pvrdma/verbs.c b/providers/vmw_pvrdma/verbs.c
> index e27952bf..e8423c01 100644
> --- a/providers/vmw_pvrdma/verbs.c
> +++ b/providers/vmw_pvrdma/verbs.c
> @@ -112,7 +112,7 @@ int pvrdma_free_pd(struct ibv_pd *pd)
>  }
>  
>  struct ibv_mr *pvrdma_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
> -			     int access)
> +			     uint64_t hca_va, int access)
>  {
>  	struct verbs_mr *vmr;
>  	struct ibv_reg_mr cmd;
> @@ -123,9 +123,8 @@ struct ibv_mr *pvrdma_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
>  	if (!vmr)
>  		return NULL;
>  
> -	ret = ibv_cmd_reg_mr(pd, addr, length, (uintptr_t) addr,
> -			     access, vmr, &cmd, sizeof(cmd),
> -			     &resp, sizeof(resp));
> +	ret = ibv_cmd_reg_mr(pd, addr, length, hca_va, access, vmr, &cmd,
> +			     sizeof(cmd), &resp, sizeof(resp));
>  	if (ret) {
>  		free(vmr);
>  		return NULL;
> diff --git a/redhat/rdma-core.spec b/redhat/rdma-core.spec
> index 6149f6a6..f07919cc 100644
> --- a/redhat/rdma-core.spec
> +++ b/redhat/rdma-core.spec
> @@ -1,5 +1,5 @@
>  Name: rdma-core
> -Version: 25.0
> +Version: 26.0
>  Release: 1%{?dist}
>  Summary: RDMA core userspace libraries and daemons
>  
> diff --git a/suse/rdma-core.spec b/suse/rdma-core.spec
> index 273bf45d..5a01327c 100644
> --- a/suse/rdma-core.spec
> +++ b/suse/rdma-core.spec
> @@ -23,7 +23,7 @@
>  
>  %define         git_ver %{nil}
>  Name:           rdma-core
> -Version:        25.0
> +Version:        26.0
>  Release:        0
>  Summary:        RDMA core userspace libraries and daemons
>  License:        GPL-2.0-only OR BSD-2-Clause
> -- 
> 2.20.1
> 

^ permalink raw reply

* [PATCH v8] verbs: Introduce a new reg_mr API for virtual address space
From: Yuval Shaia @ 2019-07-15 14:13 UTC (permalink / raw)
  To: yishaih, dledford, leon, jgg, mark.haywood, leonro, linux-rdma
  Cc: Yuval Shaia

The virtual address that is registered is used as a base for any address
passed later in post_recv and post_send operations.

On some virtualized environment this is not correct.

A guest cannot register its memory so hypervisor maps the guest physical
address to a host virtual address and register it with the HW. Later on,
at datapath phase, the guest fills the SGEs with addresses from its
address space.
Since HW cannot access guest virtual address space an extra translation
is needed to map those addresses to be based on the host virtual address
that was registered with the HW.
This datapath interference affects performances.

To avoid this, a logical separation between the address that is
registered and the address that is used as a offset at datapath phase is
needed.
This separation is already implemented in the lower layer part
(ibv_cmd_reg_mr) but blocked at the API level.

Fix it by introducing a new API function which accepts an address from
guest virtual address space as well, to be used as offset for later
datapath operations.

Signed-off-by: Yuval Shaia <yuval.shaia@oracle.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
---
v0 -> v1:
	* Change reg_mr callback signature instead of adding new callback
	* Add the new API to libibverbs/libibverbs.map.in
v1 -> v2:
	* Do not modify reg_mr signature for version 1.0
	* Add note to man page
v2 -> v3:
	* Rename function to reg_mr_iova (and arg-name to iova)
	* Some checkpatch issues not related to this fix but detected now
		* s/__FUNCTION__/__func
		* WARNING: function definition argument 'void *' should
		  also have an identifier name
v3 -> v4:
	* Fix commit message as suggested by Adit Ranadiv
	* Add support for efa
v4 -> v5:
	* Update PABI
	* Update debian files
v5 -> v6:
	* Move the new API to section in libibverbs/libibverbs.map.in
	  (IBVERBS_1.7) as pointed out by Mark Haywood
v6 -> v7:
	* 
v7 -> v8:
	* Update also redhat and suse specfiles so now all CI checks in
	  github passed.
	* Leon, i have your r-b from v5, appriciate if you can take a look
	  again now, with all the latest changes
---
 CMakeLists.txt                    |  4 ++--
 buildlib/cbuild                   |  6 ++++++
 debian/control                    |  2 +-
 debian/libibverbs1.symbols        |  4 +++-
 libibverbs/CMakeLists.txt         |  2 +-
 libibverbs/driver.h               |  2 +-
 libibverbs/dummy_ops.c            |  2 +-
 libibverbs/libibverbs.map.in      |  5 +++++
 libibverbs/man/ibv_reg_mr.3       | 15 +++++++++++++--
 libibverbs/verbs.c                | 23 ++++++++++++++++++++++-
 libibverbs/verbs.h                |  7 +++++++
 providers/bnxt_re/verbs.c         |  6 +++---
 providers/bnxt_re/verbs.h         |  2 +-
 providers/cxgb3/iwch.h            |  4 ++--
 providers/cxgb3/verbs.c           | 15 +++++----------
 providers/cxgb4/libcxgb4.h        |  4 ++--
 providers/cxgb4/verbs.c           | 15 +++++----------
 providers/efa/verbs.c             |  4 ++--
 providers/efa/verbs.h             |  2 +-
 providers/hfi1verbs/hfiverbs.h    |  4 ++--
 providers/hfi1verbs/verbs.c       |  8 ++++----
 providers/hns/hns_roce_u.h        |  2 +-
 providers/hns/hns_roce_u_verbs.c  |  6 +++---
 providers/i40iw/i40iw_umain.h     |  3 ++-
 providers/i40iw/i40iw_uverbs.c    |  8 ++++----
 providers/ipathverbs/ipathverbs.h |  4 ++--
 providers/ipathverbs/verbs.c      |  8 ++++----
 providers/mlx4/mlx4.h             |  4 ++--
 providers/mlx4/verbs.c            |  7 +++----
 providers/mlx5/mlx5.h             |  4 ++--
 providers/mlx5/verbs.c            |  7 +++----
 providers/mthca/ah.c              |  3 ++-
 providers/mthca/mthca.h           |  4 ++--
 providers/mthca/verbs.c           |  6 +++---
 providers/nes/nes_umain.h         |  3 ++-
 providers/nes/nes_uverbs.c        |  9 ++++-----
 providers/ocrdma/ocrdma_main.h    |  4 ++--
 providers/ocrdma/ocrdma_verbs.c   | 10 ++++------
 providers/qedr/qelr_main.h        |  4 ++--
 providers/qedr/qelr_verbs.c       | 11 ++++-------
 providers/qedr/qelr_verbs.h       |  4 ++--
 providers/rxe/rxe.c               |  6 +++---
 providers/siw/siw.c               |  4 ++--
 providers/vmw_pvrdma/pvrdma.h     |  4 ++--
 providers/vmw_pvrdma/verbs.c      |  7 +++----
 redhat/rdma-core.spec             |  2 +-
 suse/rdma-core.spec               |  2 +-
 47 files changed, 154 insertions(+), 118 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index b2613284..67112ae3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -68,11 +68,11 @@ endif()
 set(PACKAGE_NAME "RDMA")
 
 # See Documentation/versioning.md
-set(PACKAGE_VERSION "25.0")
+set(PACKAGE_VERSION "26.0")
 # When this is changed the values in these files need changing too:
 #   debian/control
 #   debian/libibverbs1.symbols
-set(IBVERBS_PABI_VERSION "25")
+set(IBVERBS_PABI_VERSION "26")
 set(IBVERBS_PROVIDER_SUFFIX "-rdmav${IBVERBS_PABI_VERSION}.so")
 
 #-------------------------
diff --git a/buildlib/cbuild b/buildlib/cbuild
index 83ada8ee..1658cc9c 100755
--- a/buildlib/cbuild
+++ b/buildlib/cbuild
@@ -1054,6 +1054,9 @@ def cmd_make_dist_tar(args):
     """Make the standard distribution tar. The BUILD argument must point to a build
     output directory that has pandoc-prebuilt"""
     ver = get_version();
+    #print "file=%s"%(spec_file);
+    #print "ver=%s"%(ver);
+    #print "get_version()=%s"%(ver);
 
     if not args.tarfn:
         args.tarfn = "%s-%s.tar.gz"%(project,ver)
@@ -1072,6 +1075,7 @@ def cmd_make_dist_tar(args):
                                "HEAD"]);
 
         # Mangle the paths and append the prebuilt stuff to the tar file
+        print "file=%s"%(tmp_tarfn);
         if args.BUILD:
             subprocess.check_call([
                 "tar",
@@ -1080,7 +1084,9 @@ def cmd_make_dist_tar(args):
                 "./",
                 "--xform",r"s|^\.|%sbuildlib/pandoc-prebuilt|g"%(prefix)]);
 
+        print "file=%s"%(tmp_tarfn);
         assert args.tarfn.endswith(".gz") or args.tarfn.endswith(".tgz");
+        print "file=%s"%(tmp_tarfn);
         with open(os.path.join(args.script_pwd,args.tarfn),"w") as F:
             subprocess.check_call(["gzip","-9c",tmp_tarfn],stdout=F);
 
diff --git a/debian/control b/debian/control
index dfd0184a..22010aed 100644
--- a/debian/control
+++ b/debian/control
@@ -162,7 +162,7 @@ Section: libs
 Pre-Depends: ${misc:Pre-Depends}
 Depends: adduser, ${misc:Depends}, ${shlibs:Depends}
 Recommends: ibverbs-providers
-Breaks: ibverbs-providers (<< 25~)
+Breaks: ibverbs-providers (<< 26~)
 Description: Library for direct userspace use of RDMA (InfiniBand/iWARP)
  libibverbs is a library that allows userspace processes to use RDMA
  "verbs" as described in the InfiniBand Architecture Specification and
diff --git a/debian/libibverbs1.symbols b/debian/libibverbs1.symbols
index 39b3d4a9..8df78756 100644
--- a/debian/libibverbs1.symbols
+++ b/debian/libibverbs1.symbols
@@ -4,7 +4,8 @@ libibverbs.so.1 libibverbs1 #MINVER#
  IBVERBS_1.1@IBVERBS_1.1 1.1.6
  IBVERBS_1.5@IBVERBS_1.5 20
  IBVERBS_1.6@IBVERBS_1.6 24
- (symver)IBVERBS_PRIVATE_25 25
+ IBVERBS_1.7@IBVERBS_1.7 25.0-1
+ (symver)IBVERBS_PRIVATE_26 25.0-1
  ibv_ack_async_event@IBVERBS_1.0 1.1.6
  ibv_ack_async_event@IBVERBS_1.1 1.1.6
  ibv_ack_cq_events@IBVERBS_1.0 1.1.6
@@ -89,6 +90,7 @@ libibverbs.so.1 libibverbs1 #MINVER#
  ibv_read_sysfs_file@IBVERBS_1.0 1.1.6
  ibv_reg_mr@IBVERBS_1.0 1.1.6
  ibv_reg_mr@IBVERBS_1.1 1.1.6
+ ibv_reg_mr_iova@IBVERBS_1.7 25.0-1
  ibv_register_driver@IBVERBS_1.1 1.1.6
  ibv_rereg_mr@IBVERBS_1.1 1.2.1
  ibv_resize_cq@IBVERBS_1.0 1.1.6
diff --git a/libibverbs/CMakeLists.txt b/libibverbs/CMakeLists.txt
index 1f5c59ed..a5926bbd 100644
--- a/libibverbs/CMakeLists.txt
+++ b/libibverbs/CMakeLists.txt
@@ -21,7 +21,7 @@ configure_file("libibverbs.map.in"
 
 rdma_library(ibverbs "${CMAKE_CURRENT_BINARY_DIR}/libibverbs.map"
   # See Documentation/versioning.md
-  1 1.6.${PACKAGE_VERSION}
+  1 1.7.${PACKAGE_VERSION}
   all_providers.c
   cmd.c
   cmd_ah.c
diff --git a/libibverbs/driver.h b/libibverbs/driver.h
index 2e2131f2..88ed2b5e 100644
--- a/libibverbs/driver.h
+++ b/libibverbs/driver.h
@@ -362,7 +362,7 @@ struct verbs_context_ops {
 				    uint64_t dm_offset, size_t length,
 				    unsigned int access);
 	struct ibv_mr *(*reg_mr)(struct ibv_pd *pd, void *addr, size_t length,
-				 int access);
+				 uint64_t hca_va, int access);
 	int (*req_notify_cq)(struct ibv_cq *cq, int solicited_only);
 	int (*rereg_mr)(struct verbs_mr *vmr, int flags, struct ibv_pd *pd,
 			void *addr, size_t length, int access);
diff --git a/libibverbs/dummy_ops.c b/libibverbs/dummy_ops.c
index ebc6eddd..6560371a 100644
--- a/libibverbs/dummy_ops.c
+++ b/libibverbs/dummy_ops.c
@@ -411,7 +411,7 @@ static struct ibv_mr *reg_dm_mr(struct ibv_pd *pd, struct ibv_dm *dm,
 }
 
 static struct ibv_mr *reg_mr(struct ibv_pd *pd, void *addr, size_t length,
-			     int access)
+			     uint64_t hca_va,  int access)
 {
 	errno = ENOSYS;
 	return NULL;
diff --git a/libibverbs/libibverbs.map.in b/libibverbs/libibverbs.map.in
index ee253ec0..c1b4537a 100644
--- a/libibverbs/libibverbs.map.in
+++ b/libibverbs/libibverbs.map.in
@@ -116,6 +116,11 @@ IBVERBS_1.6 {
 		ibv_qp_to_qp_ex;
 } IBVERBS_1.5;
 
+IBVERBS_1.7 {
+	global:
+		ibv_reg_mr_iova;
+} IBVERBS_1.6;
+
 /* If any symbols in this stanza change ABI then the entire staza gets a new symbol
    version. See the top level CMakeLists.txt for this setting. */
 
diff --git a/libibverbs/man/ibv_reg_mr.3 b/libibverbs/man/ibv_reg_mr.3
index 631e5fe8..be90a57b 100644
--- a/libibverbs/man/ibv_reg_mr.3
+++ b/libibverbs/man/ibv_reg_mr.3
@@ -3,7 +3,7 @@
 .\"
 .TH IBV_REG_MR 3 2006-10-31 libibverbs "Libibverbs Programmer's Manual"
 .SH "NAME"
-ibv_reg_mr, ibv_dereg_mr \- register or deregister a memory region (MR)
+ibv_reg_mr, ibv_reg_mr_iova, ibv_dereg_mr \- register or deregister a memory region (MR)
 .SH "SYNOPSIS"
 .nf
 .B #include <infiniband/verbs.h>
@@ -11,6 +11,10 @@ ibv_reg_mr, ibv_dereg_mr \- register or deregister a memory region (MR)
 .BI "struct ibv_mr *ibv_reg_mr(struct ibv_pd " "*pd" ", void " "*addr" ,
 .BI "                          size_t " "length" ", int " "access" );
 .sp
+.BI "struct ibv_mr *ibv_reg_mr_iova(struct ibv_pd " "*pd" ", void " "*addr" ,
+.BI "                               size_t " "length" ", uint64_t " "hca_va" ,
+.BI "                               int " "access" );
+.sp
 .BI "int ibv_dereg_mr(struct ibv_mr " "*mr" );
 .fi
 .SH "DESCRIPTION"
@@ -52,11 +56,18 @@ Local read access is always enabled for the MR.
 .PP
 To create an implicit ODP MR, IBV_ACCESS_ON_DEMAND should be set, addr should be 0 and length should be SIZE_MAX.
 .PP
+.B ibv_reg_mr_iova()
+ibv_reg_mr_iova is the same as the normal reg_mr, except that the user is
+allowed to specify the virtual base address of the MR when accessed through
+a lkey or rkey. The offset in the memory region is computed as 'addr +
+(iova - hca_va)'. Specifying 0 for hca_va has the same effect as
+IBV_ACCESS_ZERO_BASED.
+.PP
 .B ibv_dereg_mr()
 deregisters the MR
 .I mr\fR.
 .SH "RETURN VALUE"
-.B ibv_reg_mr()
+.B ibv_reg_mr() / ibv_reg_mr_iova()
 returns a pointer to the registered MR, or NULL if the request fails.
 The local key (\fBL_Key\fR) field
 .B lkey
diff --git a/libibverbs/verbs.c b/libibverbs/verbs.c
index c7e8e8e9..e5063af2 100644
--- a/libibverbs/verbs.c
+++ b/libibverbs/verbs.c
@@ -306,7 +306,28 @@ LATEST_SYMVER_FUNC(ibv_reg_mr, 1_1, "IBVERBS_1.1",
 	if (ibv_dontfork_range(addr, length))
 		return NULL;
 
-	mr = get_ops(pd->context)->reg_mr(pd, addr, length, access);
+	mr = get_ops(pd->context)->reg_mr(pd, addr, length, (uintptr_t) addr,
+					  access);
+	if (mr) {
+		mr->context = pd->context;
+		mr->pd      = pd;
+		mr->addr    = addr;
+		mr->length  = length;
+	} else
+		ibv_dofork_range(addr, length);
+
+	return mr;
+}
+
+struct ibv_mr *ibv_reg_mr_iova(struct ibv_pd *pd, void *addr, size_t length,
+			       uint64_t iova, int access)
+{
+	struct ibv_mr *mr;
+
+	if (ibv_dontfork_range(addr, length))
+		return NULL;
+
+	mr = get_ops(pd->context)->reg_mr(pd, addr, length, iova, access);
 	if (mr) {
 		mr->context = pd->context;
 		mr->pd      = pd;
diff --git a/libibverbs/verbs.h b/libibverbs/verbs.h
index 1b0aef03..1e01b5db 100644
--- a/libibverbs/verbs.h
+++ b/libibverbs/verbs.h
@@ -2376,6 +2376,13 @@ static inline int ibv_close_xrcd(struct ibv_xrcd *xrcd)
 struct ibv_mr *ibv_reg_mr(struct ibv_pd *pd, void *addr,
 			  size_t length, int access);
 
+/**
+ * ibv_reg_mr_iova - Register a memory region with a virtual offset
+ * address
+ */
+struct ibv_mr *ibv_reg_mr_iova(struct ibv_pd *pd, void *addr, size_t length,
+			       uint64_t iova, int access);
+
 
 enum ibv_rereg_mr_err_code {
 	/* Old MR is valid, invalid input */
diff --git a/providers/bnxt_re/verbs.c b/providers/bnxt_re/verbs.c
index ed7ddb6e..2218e3a0 100644
--- a/providers/bnxt_re/verbs.c
+++ b/providers/bnxt_re/verbs.c
@@ -131,7 +131,7 @@ int bnxt_re_free_pd(struct ibv_pd *ibvpd)
 }
 
 struct ibv_mr *bnxt_re_reg_mr(struct ibv_pd *ibvpd, void *sva, size_t len,
-			      int access)
+			      uint64_t hca_va, int access)
 {
 	struct bnxt_re_mr *mr;
 	struct ibv_reg_mr cmd;
@@ -141,8 +141,8 @@ struct ibv_mr *bnxt_re_reg_mr(struct ibv_pd *ibvpd, void *sva, size_t len,
 	if (!mr)
 		return NULL;
 
-	if (ibv_cmd_reg_mr(ibvpd, sva, len, (uintptr_t)sva, access, &mr->vmr,
-			   &cmd, sizeof(cmd), &resp.ibv_resp, sizeof(resp))) {
+	if (ibv_cmd_reg_mr(ibvpd, sva, len, hca_va, access, &mr->vmr, &cmd,
+			   sizeof(cmd), &resp.ibv_resp, sizeof(resp))) {
 		free(mr);
 		return NULL;
 	}
diff --git a/providers/bnxt_re/verbs.h b/providers/bnxt_re/verbs.h
index b565d7e6..2e994880 100644
--- a/providers/bnxt_re/verbs.h
+++ b/providers/bnxt_re/verbs.h
@@ -61,7 +61,7 @@ int bnxt_re_query_port(struct ibv_context *uctx, uint8_t port,
 struct ibv_pd *bnxt_re_alloc_pd(struct ibv_context *uctx);
 int bnxt_re_free_pd(struct ibv_pd *ibvpd);
 struct ibv_mr *bnxt_re_reg_mr(struct ibv_pd *ibvpd, void *buf, size_t len,
-			      int ibv_access_flags);
+			      uint64_t hca_va, int ibv_access_flags);
 int bnxt_re_dereg_mr(struct verbs_mr *vmr);
 
 struct ibv_cq *bnxt_re_create_cq(struct ibv_context *uctx, int ncqe,
diff --git a/providers/cxgb3/iwch.h b/providers/cxgb3/iwch.h
index c8de44e9..c7d85d3a 100644
--- a/providers/cxgb3/iwch.h
+++ b/providers/cxgb3/iwch.h
@@ -150,8 +150,8 @@ extern int iwch_query_port(struct ibv_context *context, uint8_t port,
 extern struct ibv_pd *iwch_alloc_pd(struct ibv_context *context);
 extern int iwch_free_pd(struct ibv_pd *pd);
 
-extern struct ibv_mr *iwch_reg_mr(struct ibv_pd *pd, void *addr,
-				  size_t length, int access);
+extern struct ibv_mr *iwch_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
+				  uint64_t hca_va, int access);
 extern int iwch_dereg_mr(struct verbs_mr *mr);
 
 struct ibv_cq *iwch_create_cq(struct ibv_context *context, int cqe,
diff --git a/providers/cxgb3/verbs.c b/providers/cxgb3/verbs.c
index 8b90482a..39a44192 100644
--- a/providers/cxgb3/verbs.c
+++ b/providers/cxgb3/verbs.c
@@ -103,15 +103,17 @@ int iwch_free_pd(struct ibv_pd *pd)
 	return 0;
 }
 
-static struct ibv_mr *__iwch_reg_mr(struct ibv_pd *pd, void *addr,
-				    size_t length, uint64_t hca_va,
-				    int access)
+struct ibv_mr *iwch_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
+			   uint64_t hca_va, int access)
 {
 	struct iwch_mr *mhp;
 	struct ibv_reg_mr cmd;
 	struct uiwch_reg_mr_resp resp;
 	struct iwch_device *dev = to_iwch_dev(pd->context->device);
 
+	PDBG("%s addr %p length %ld hca_va %p\n", __func__, addr, length,
+	     hca_va);
+
 	mhp = malloc(sizeof *mhp);
 	if (!mhp)
 		return NULL;
@@ -140,13 +142,6 @@ static struct ibv_mr *__iwch_reg_mr(struct ibv_pd *pd, void *addr,
 	return &mhp->vmr.ibv_mr;
 }
 
-struct ibv_mr *iwch_reg_mr(struct ibv_pd *pd, void *addr,
-			   size_t length, int access)
-{
-	PDBG("%s addr %p length %ld\n", __FUNCTION__, addr, length);
-	return __iwch_reg_mr(pd, addr, length, (uintptr_t) addr, access);
-}
-
 int iwch_dereg_mr(struct verbs_mr *vmr)
 {
 	int ret;
diff --git a/providers/cxgb4/libcxgb4.h b/providers/cxgb4/libcxgb4.h
index 0fbceab0..ce8f29dd 100644
--- a/providers/cxgb4/libcxgb4.h
+++ b/providers/cxgb4/libcxgb4.h
@@ -198,8 +198,8 @@ int c4iw_query_port(struct ibv_context *context, uint8_t port,
 struct ibv_pd *c4iw_alloc_pd(struct ibv_context *context);
 int c4iw_free_pd(struct ibv_pd *pd);
 
-struct ibv_mr *c4iw_reg_mr(struct ibv_pd *pd, void *addr,
-				  size_t length, int access);
+struct ibv_mr *c4iw_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
+			   uint64_t hca_va, int access);
 int c4iw_dereg_mr(struct verbs_mr *vmr);
 
 struct ibv_cq *c4iw_create_cq(struct ibv_context *context, int cqe,
diff --git a/providers/cxgb4/verbs.c b/providers/cxgb4/verbs.c
index 452e4f1f..4240f6b3 100644
--- a/providers/cxgb4/verbs.c
+++ b/providers/cxgb4/verbs.c
@@ -109,15 +109,17 @@ int c4iw_free_pd(struct ibv_pd *pd)
 	return 0;
 }
 
-static struct ibv_mr *__c4iw_reg_mr(struct ibv_pd *pd, void *addr,
-				    size_t length, uint64_t hca_va,
-				    int access)
+struct ibv_mr *c4iw_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
+			   uint64_t hca_va, int access)
 {
 	struct c4iw_mr *mhp;
 	struct ibv_reg_mr cmd;
 	struct ib_uverbs_reg_mr_resp resp;
 	struct c4iw_dev *dev = to_c4iw_dev(pd->context->device);
 
+	PDBG("%s addr %p length %ld hca_va %p\n", __func__, addr, length,
+	     hca_va);
+
 	mhp = malloc(sizeof *mhp);
 	if (!mhp)
 		return NULL;
@@ -142,13 +144,6 @@ static struct ibv_mr *__c4iw_reg_mr(struct ibv_pd *pd, void *addr,
 	return &mhp->vmr.ibv_mr;
 }
 
-struct ibv_mr *c4iw_reg_mr(struct ibv_pd *pd, void *addr,
-			   size_t length, int access)
-{
-	PDBG("%s addr %p length %ld\n", __func__, addr, length);
-	return __c4iw_reg_mr(pd, addr, length, (uintptr_t) addr, access);
-}
-
 int c4iw_dereg_mr(struct verbs_mr *vmr)
 {
 	int ret;
diff --git a/providers/efa/verbs.c b/providers/efa/verbs.c
index 4d36f9e1..d2500ecb 100644
--- a/providers/efa/verbs.c
+++ b/providers/efa/verbs.c
@@ -126,7 +126,7 @@ int efa_dealloc_pd(struct ibv_pd *ibvpd)
 }
 
 struct ibv_mr *efa_reg_mr(struct ibv_pd *ibvpd, void *sva, size_t len,
-			  int access)
+			  uint64_t hca_va, int access)
 {
 	struct ib_uverbs_reg_mr_resp resp;
 	struct ibv_reg_mr cmd;
@@ -136,7 +136,7 @@ struct ibv_mr *efa_reg_mr(struct ibv_pd *ibvpd, void *sva, size_t len,
 	if (!mr)
 		return NULL;
 
-	if (ibv_cmd_reg_mr(ibvpd, sva, len, (uintptr_t)sva, access, &mr->vmr,
+	if (ibv_cmd_reg_mr(ibvpd, sva, len, hca_va, access, &mr->vmr,
 			   &cmd, sizeof(cmd), &resp, sizeof(resp))) {
 		free(mr);
 		return NULL;
diff --git a/providers/efa/verbs.h b/providers/efa/verbs.h
index 1a49653f..7b532adc 100644
--- a/providers/efa/verbs.h
+++ b/providers/efa/verbs.h
@@ -18,7 +18,7 @@ int efa_query_device_ex(struct ibv_context *context,
 struct ibv_pd *efa_alloc_pd(struct ibv_context *uctx);
 int efa_dealloc_pd(struct ibv_pd *ibvpd);
 struct ibv_mr *efa_reg_mr(struct ibv_pd *ibvpd, void *buf, size_t len,
-			  int ibv_access_flags);
+			  uint64_t hca_va, int ibv_access_flags);
 int efa_dereg_mr(struct verbs_mr *vmr);
 
 struct ibv_cq *efa_create_cq(struct ibv_context *uctx, int ncqe,
diff --git a/providers/hfi1verbs/hfiverbs.h b/providers/hfi1verbs/hfiverbs.h
index 070a01c9..b9e91d80 100644
--- a/providers/hfi1verbs/hfiverbs.h
+++ b/providers/hfi1verbs/hfiverbs.h
@@ -204,8 +204,8 @@ struct ibv_pd *hfi1_alloc_pd(struct ibv_context *pd);
 
 int hfi1_free_pd(struct ibv_pd *pd);
 
-struct ibv_mr *hfi1_reg_mr(struct ibv_pd *pd, void *addr,
-			    size_t length, int access);
+struct ibv_mr *hfi1_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
+			   uint64_t hca_va, int access);
 
 int hfi1_dereg_mr(struct verbs_mr *vmr);
 
diff --git a/providers/hfi1verbs/verbs.c b/providers/hfi1verbs/verbs.c
index ff001f6d..275f8d51 100644
--- a/providers/hfi1verbs/verbs.c
+++ b/providers/hfi1verbs/verbs.c
@@ -129,8 +129,8 @@ int hfi1_free_pd(struct ibv_pd *pd)
 	return 0;
 }
 
-struct ibv_mr *hfi1_reg_mr(struct ibv_pd *pd, void *addr,
-			    size_t length, int access)
+struct ibv_mr *hfi1_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
+			   uint64_t hca_va, int access)
 {
 	struct verbs_mr *vmr;
 	struct ibv_reg_mr cmd;
@@ -141,8 +141,8 @@ struct ibv_mr *hfi1_reg_mr(struct ibv_pd *pd, void *addr,
 	if (!vmr)
 		return NULL;
 
-	ret = ibv_cmd_reg_mr(pd, addr, length, (uintptr_t)addr, access, vmr,
-			     &cmd, sizeof cmd, &resp, sizeof resp);
+	ret = ibv_cmd_reg_mr(pd, addr, length, hca_va, access, vmr, &cmd,
+			     sizeof(cmd), &resp, sizeof(resp));
 
 	if (ret) {
 		free(vmr);
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index 93bc47c1..517d97b3 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -293,7 +293,7 @@ struct ibv_pd *hns_roce_u_alloc_pd(struct ibv_context *context);
 int hns_roce_u_free_pd(struct ibv_pd *pd);
 
 struct ibv_mr *hns_roce_u_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
-				 int access);
+				 uint64_t hca_va, int access);
 int hns_roce_u_rereg_mr(struct verbs_mr *mr, int flags, struct ibv_pd *pd,
 			void *addr, size_t length, int access);
 int hns_roce_u_dereg_mr(struct verbs_mr *mr);
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index 9ba65a1d..44bc32a4 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -120,7 +120,7 @@ int hns_roce_u_free_pd(struct ibv_pd *pd)
 }
 
 struct ibv_mr *hns_roce_u_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
-				 int access)
+				 uint64_t hca_va, int access)
 {
 	int ret;
 	struct verbs_mr *vmr;
@@ -141,8 +141,8 @@ struct ibv_mr *hns_roce_u_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
 	if (!vmr)
 		return NULL;
 
-	ret = ibv_cmd_reg_mr(pd, addr, length, (uintptr_t)addr, access, vmr,
-			     &cmd, sizeof(cmd), &resp, sizeof(resp));
+	ret = ibv_cmd_reg_mr(pd, addr, length, hca_va, access, vmr, &cmd,
+			     sizeof(cmd), &resp, sizeof(resp));
 	if (ret) {
 		free(vmr);
 		return NULL;
diff --git a/providers/i40iw/i40iw_umain.h b/providers/i40iw/i40iw_umain.h
index 4055933a..af84f284 100644
--- a/providers/i40iw/i40iw_umain.h
+++ b/providers/i40iw/i40iw_umain.h
@@ -155,7 +155,8 @@ int i40iw_uquery_device(struct ibv_context *, struct ibv_device_attr *);
 int i40iw_uquery_port(struct ibv_context *, uint8_t, struct ibv_port_attr *);
 struct ibv_pd *i40iw_ualloc_pd(struct ibv_context *);
 int i40iw_ufree_pd(struct ibv_pd *);
-struct ibv_mr *i40iw_ureg_mr(struct ibv_pd *, void *, size_t, int);
+struct ibv_mr *i40iw_ureg_mr(struct ibv_pd *pd, void *addr, size_t length,
+			     uint64_t hca_va, int access);
 int i40iw_udereg_mr(struct verbs_mr *vmr);
 struct ibv_cq *i40iw_ucreate_cq(struct ibv_context *, int, struct ibv_comp_channel *, int);
 int i40iw_uresize_cq(struct ibv_cq *, int);
diff --git a/providers/i40iw/i40iw_uverbs.c b/providers/i40iw/i40iw_uverbs.c
index 83b504fa..240150b9 100644
--- a/providers/i40iw/i40iw_uverbs.c
+++ b/providers/i40iw/i40iw_uverbs.c
@@ -149,7 +149,8 @@ int i40iw_ufree_pd(struct ibv_pd *pd)
  * @length: length of the memory
  * @access: access allowed on this mr
  */
-struct ibv_mr *i40iw_ureg_mr(struct ibv_pd *pd, void *addr, size_t length, int access)
+struct ibv_mr *i40iw_ureg_mr(struct ibv_pd *pd, void *addr, size_t length,
+			     uint64_t hca_va, int access)
 {
 	struct verbs_mr *vmr;
 	struct i40iw_ureg_mr cmd;
@@ -161,9 +162,8 @@ struct ibv_mr *i40iw_ureg_mr(struct ibv_pd *pd, void *addr, size_t length, int a
 
 	cmd.reg_type = IW_MEMREG_TYPE_MEM;
 
-	if (ibv_cmd_reg_mr(pd, addr, length, (uintptr_t)addr,
-			   access, vmr, &cmd.ibv_cmd, sizeof(cmd),
-			   &resp, sizeof(resp))) {
+	if (ibv_cmd_reg_mr(pd, addr, length, hca_va, access, vmr, &cmd.ibv_cmd,
+			   sizeof(cmd), &resp, sizeof(resp))) {
 		fprintf(stderr, PFX "%s: Failed to register memory\n", __func__);
 		free(vmr);
 		return NULL;
diff --git a/providers/ipathverbs/ipathverbs.h b/providers/ipathverbs/ipathverbs.h
index cfb5cc38..694f1f44 100644
--- a/providers/ipathverbs/ipathverbs.h
+++ b/providers/ipathverbs/ipathverbs.h
@@ -183,8 +183,8 @@ struct ibv_pd *ipath_alloc_pd(struct ibv_context *pd);
 
 int ipath_free_pd(struct ibv_pd *pd);
 
-struct ibv_mr *ipath_reg_mr(struct ibv_pd *pd, void *addr,
-			    size_t length, int access);
+struct ibv_mr *ipath_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
+			    uint64_t hca_va, int access);
 
 int ipath_dereg_mr(struct verbs_mr *vmr);
 
diff --git a/providers/ipathverbs/verbs.c b/providers/ipathverbs/verbs.c
index de4722b2..505ea584 100644
--- a/providers/ipathverbs/verbs.c
+++ b/providers/ipathverbs/verbs.c
@@ -109,8 +109,8 @@ int ipath_free_pd(struct ibv_pd *pd)
 	return 0;
 }
 
-struct ibv_mr *ipath_reg_mr(struct ibv_pd *pd, void *addr,
-			    size_t length, int access)
+struct ibv_mr *ipath_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
+			    uint64_t hca_va, int access)
 {
 	struct verbs_mr *vmr;
 	struct ibv_reg_mr cmd;
@@ -121,8 +121,8 @@ struct ibv_mr *ipath_reg_mr(struct ibv_pd *pd, void *addr,
 	if (!vmr)
 		return NULL;
 
-	ret = ibv_cmd_reg_mr(pd, addr, length, (uintptr_t)addr, access, vmr,
-			     &cmd, sizeof cmd, &resp, sizeof resp);
+	ret = ibv_cmd_reg_mr(pd, addr, length, hca_va, access, vmr, &cmd,
+			     sizeof(cmd), &resp, sizeof(resp));
 	if (ret) {
 		free(vmr);
 		return NULL;
diff --git a/providers/mlx4/mlx4.h b/providers/mlx4/mlx4.h
index 9c21d775..3c161e8e 100644
--- a/providers/mlx4/mlx4.h
+++ b/providers/mlx4/mlx4.h
@@ -320,8 +320,8 @@ struct ibv_xrcd *mlx4_open_xrcd(struct ibv_context *context,
 				struct ibv_xrcd_init_attr *attr);
 int mlx4_close_xrcd(struct ibv_xrcd *xrcd);
 
-struct ibv_mr *mlx4_reg_mr(struct ibv_pd *pd, void *addr,
-			    size_t length, int access);
+struct ibv_mr *mlx4_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
+			   uint64_t hca_va, int access);
 int mlx4_rereg_mr(struct verbs_mr *vmr, int flags, struct ibv_pd *pd,
 		  void *addr, size_t length, int access);
 int mlx4_dereg_mr(struct verbs_mr *vmr);
diff --git a/providers/mlx4/verbs.c b/providers/mlx4/verbs.c
index 9a5affe7..d814a2bc 100644
--- a/providers/mlx4/verbs.c
+++ b/providers/mlx4/verbs.c
@@ -275,7 +275,7 @@ int mlx4_close_xrcd(struct ibv_xrcd *ib_xrcd)
 }
 
 struct ibv_mr *mlx4_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
-			   int access)
+			   uint64_t hca_va, int access)
 {
 	struct verbs_mr *vmr;
 	struct ibv_reg_mr cmd;
@@ -286,9 +286,8 @@ struct ibv_mr *mlx4_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
 	if (!vmr)
 		return NULL;
 
-	ret = ibv_cmd_reg_mr(pd, addr, length, (uintptr_t) addr,
-			     access, vmr, &cmd, sizeof(cmd),
-			     &resp, sizeof(resp));
+	ret = ibv_cmd_reg_mr(pd, addr, length, hca_va, access, vmr, &cmd,
+			     sizeof(cmd), &resp, sizeof(resp));
 	if (ret) {
 		free(vmr);
 		return NULL;
diff --git a/providers/mlx5/mlx5.h b/providers/mlx5/mlx5.h
index d9fccdcc..ab3c2c1a 100644
--- a/providers/mlx5/mlx5.h
+++ b/providers/mlx5/mlx5.h
@@ -821,8 +821,8 @@ void mlx5_async_event(struct ibv_context *context,
 		      struct ibv_async_event *event);
 
 struct ibv_mr *mlx5_alloc_null_mr(struct ibv_pd *pd);
-struct ibv_mr *mlx5_reg_mr(struct ibv_pd *pd, void *addr,
-			   size_t length, int access);
+struct ibv_mr *mlx5_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
+			   uint64_t hca_va, int access);
 int mlx5_rereg_mr(struct verbs_mr *mr, int flags, struct ibv_pd *pd, void *addr,
 		  size_t length, int access);
 int mlx5_dereg_mr(struct verbs_mr *mr);
diff --git a/providers/mlx5/verbs.c b/providers/mlx5/verbs.c
index c13e81f4..714c5f7e 100644
--- a/providers/mlx5/verbs.c
+++ b/providers/mlx5/verbs.c
@@ -388,7 +388,7 @@ int mlx5_free_pd(struct ibv_pd *pd)
 }
 
 struct ibv_mr *mlx5_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
-			   int acc)
+			   uint64_t hca_va, int acc)
 {
 	struct mlx5_mr *mr;
 	struct ibv_reg_mr cmd;
@@ -400,9 +400,8 @@ struct ibv_mr *mlx5_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
 	if (!mr)
 		return NULL;
 
-	ret = ibv_cmd_reg_mr(pd, addr, length, (uintptr_t)addr, access,
-			     &mr->vmr, &cmd, sizeof(cmd), &resp,
-			     sizeof resp);
+	ret = ibv_cmd_reg_mr(pd, addr, length, hca_va, access, &mr->vmr, &cmd,
+			     sizeof(cmd), &resp, sizeof(resp));
 	if (ret) {
 		mlx5_free_buf(&(mr->buf));
 		free(mr);
diff --git a/providers/mthca/ah.c b/providers/mthca/ah.c
index df0cb281..adefb178 100644
--- a/providers/mthca/ah.c
+++ b/providers/mthca/ah.c
@@ -61,7 +61,8 @@ static struct mthca_ah_page *__add_page(struct mthca_pd *pd, int page_size, int
 		return NULL;
 	}
 
-	page->mr = mthca_reg_mr(&pd->ibv_pd, page->buf.buf, page_size, 0);
+	page->mr = mthca_reg_mr(&pd->ibv_pd, page->buf.buf, page_size,
+				(uintptr_t) page->buf.buf, 0);
 	if (!page->mr) {
 		mthca_free_buf(&page->buf);
 		free(page);
diff --git a/providers/mthca/mthca.h b/providers/mthca/mthca.h
index 61042de3..b7df2f73 100644
--- a/providers/mthca/mthca.h
+++ b/providers/mthca/mthca.h
@@ -280,8 +280,8 @@ int mthca_query_port(struct ibv_context *context, uint8_t port,
 struct ibv_pd *mthca_alloc_pd(struct ibv_context *context);
 int mthca_free_pd(struct ibv_pd *pd);
 
-struct ibv_mr *mthca_reg_mr(struct ibv_pd *pd, void *addr,
-			    size_t length, int access);
+struct ibv_mr *mthca_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
+			    uint64_t hca_va, int access);
 int mthca_dereg_mr(struct verbs_mr *mr);
 
 struct ibv_cq *mthca_create_cq(struct ibv_context *context, int cqe,
diff --git a/providers/mthca/verbs.c b/providers/mthca/verbs.c
index e7a1c357..99e5ec66 100644
--- a/providers/mthca/verbs.c
+++ b/providers/mthca/verbs.c
@@ -145,10 +145,10 @@ static struct ibv_mr *__mthca_reg_mr(struct ibv_pd *pd, void *addr,
 	return &vmr->ibv_mr;
 }
 
-struct ibv_mr *mthca_reg_mr(struct ibv_pd *pd, void *addr,
-			    size_t length, int access)
+struct ibv_mr *mthca_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
+			    uint64_t hca_va, int access)
 {
-	return __mthca_reg_mr(pd, addr, length, (uintptr_t) addr, access, 0);
+	return __mthca_reg_mr(pd, addr, length, hca_va, access, 0);
 }
 
 int mthca_dereg_mr(struct verbs_mr *vmr)
diff --git a/providers/nes/nes_umain.h b/providers/nes/nes_umain.h
index edb38622..1070ce42 100644
--- a/providers/nes/nes_umain.h
+++ b/providers/nes/nes_umain.h
@@ -350,7 +350,8 @@ int nes_uquery_device(struct ibv_context *, struct ibv_device_attr *);
 int nes_uquery_port(struct ibv_context *, uint8_t, struct ibv_port_attr *);
 struct ibv_pd *nes_ualloc_pd(struct ibv_context *);
 int nes_ufree_pd(struct ibv_pd *);
-struct ibv_mr *nes_ureg_mr(struct ibv_pd *, void *, size_t, int);
+struct ibv_mr *nes_ureg_mr(struct ibv_pd *pd, void *addr, size_t length,
+			   uint64_t hca_va, int access);
 int nes_udereg_mr(struct verbs_mr *vmr);
 struct ibv_cq *nes_ucreate_cq(struct ibv_context *, int, struct ibv_comp_channel *, int);
 int nes_uresize_cq(struct ibv_cq *, int);
diff --git a/providers/nes/nes_uverbs.c b/providers/nes/nes_uverbs.c
index 8523e923..2b78468b 100644
--- a/providers/nes/nes_uverbs.c
+++ b/providers/nes/nes_uverbs.c
@@ -165,8 +165,8 @@ int nes_ufree_pd(struct ibv_pd *pd)
 /**
  * nes_ureg_mr
  */
-struct ibv_mr *nes_ureg_mr(struct ibv_pd *pd, void *addr,
-		size_t length, int access)
+struct ibv_mr *nes_ureg_mr(struct ibv_pd *pd, void *addr, size_t length,
+			   uint64_t hca_va, int access)
 {
 	struct verbs_mr *vmr;
 	struct nes_ureg_mr cmd;
@@ -177,9 +177,8 @@ struct ibv_mr *nes_ureg_mr(struct ibv_pd *pd, void *addr,
 		return NULL;
 
 	cmd.reg_type = IWNES_MEMREG_TYPE_MEM;
-	if (ibv_cmd_reg_mr(pd, addr, length, (uintptr_t) addr,
-			access, vmr, &cmd.ibv_cmd, sizeof(cmd),
-			&resp, sizeof(resp))) {
+	if (ibv_cmd_reg_mr(pd, addr, length, hca_va, access, vmr, &cmd.ibv_cmd,
+			   sizeof(cmd), &resp, sizeof(resp))) {
 		free(vmr);
 
 		return NULL;
diff --git a/providers/ocrdma/ocrdma_main.h b/providers/ocrdma/ocrdma_main.h
index 33def78c..aadefd96 100644
--- a/providers/ocrdma/ocrdma_main.h
+++ b/providers/ocrdma/ocrdma_main.h
@@ -269,8 +269,8 @@ int ocrdma_query_device(struct ibv_context *, struct ibv_device_attr *);
 int ocrdma_query_port(struct ibv_context *, uint8_t, struct ibv_port_attr *);
 struct ibv_pd *ocrdma_alloc_pd(struct ibv_context *);
 int ocrdma_free_pd(struct ibv_pd *);
-struct ibv_mr *ocrdma_reg_mr(struct ibv_pd *, void *, size_t,
-			     int ibv_access_flags);
+struct ibv_mr *ocrdma_reg_mr(struct ibv_pd *pd, void *addr, size_t len,
+			     uint64_t hca_va, int access);
 int ocrdma_dereg_mr(struct verbs_mr *vmr);
 
 struct ibv_cq *ocrdma_create_cq(struct ibv_context *, int,
diff --git a/providers/ocrdma/ocrdma_verbs.c b/providers/ocrdma/ocrdma_verbs.c
index 3b3e1a60..4ae35be9 100644
--- a/providers/ocrdma/ocrdma_verbs.c
+++ b/providers/ocrdma/ocrdma_verbs.c
@@ -185,22 +185,20 @@ int ocrdma_free_pd(struct ibv_pd *ibpd)
 /*
  * ocrdma_reg_mr
  */
-struct ibv_mr *ocrdma_reg_mr(struct ibv_pd *pd, void *addr,
-			     size_t len, int access)
+struct ibv_mr *ocrdma_reg_mr(struct ibv_pd *pd, void *addr, size_t len,
+			     uint64_t hca_va, int access)
 {
 	struct ocrdma_mr *mr;
 	struct ibv_reg_mr cmd;
 	struct uocrdma_reg_mr_resp resp;
-	uint64_t hca_va = (uintptr_t) addr;
 
 	mr = malloc(sizeof *mr);
 	if (!mr)
 		return NULL;
 	bzero(mr, sizeof *mr);
 
-	if (ibv_cmd_reg_mr(pd, addr, len, hca_va,
-			   access, &mr->vmr, &cmd, sizeof(cmd),
-			   &resp.ibv_resp, sizeof(resp))) {
+	if (ibv_cmd_reg_mr(pd, addr, len, hca_va, access, &mr->vmr, &cmd,
+			   sizeof(cmd), &resp.ibv_resp, sizeof(resp))) {
 		free(mr);
 		return NULL;
 	}
diff --git a/providers/qedr/qelr_main.h b/providers/qedr/qelr_main.h
index 77aa9c2f..fae87130 100644
--- a/providers/qedr/qelr_main.h
+++ b/providers/qedr/qelr_main.h
@@ -46,8 +46,8 @@ int qelr_query_port(struct ibv_context *, uint8_t, struct ibv_port_attr *);
 struct ibv_pd *qelr_alloc_pd(struct ibv_context *);
 int qelr_dealloc_pd(struct ibv_pd *);
 
-struct ibv_mr *qelr_reg_mr(struct ibv_pd *, void *, size_t,
-			   int ibv_access_flags);
+struct ibv_mr *qelr_reg_mr(struct ibv_pd *ibpd, void *addr, size_t len,
+			   uint64_t hca_va, int access);
 int qelr_dereg_mr(struct verbs_mr *vmr);
 
 struct ibv_cq *qelr_create_cq(struct ibv_context *, int,
diff --git a/providers/qedr/qelr_verbs.c b/providers/qedr/qelr_verbs.c
index a347714d..4b19ccb0 100644
--- a/providers/qedr/qelr_verbs.c
+++ b/providers/qedr/qelr_verbs.c
@@ -156,8 +156,8 @@ int qelr_dealloc_pd(struct ibv_pd *ibpd)
 	return rc;
 }
 
-struct ibv_mr *qelr_reg_mr(struct ibv_pd *ibpd, void *addr,
-			   size_t len, int access)
+struct ibv_mr *qelr_reg_mr(struct ibv_pd *ibpd, void *addr, size_t len,
+			   uint64_t hca_va, int access)
 {
 	struct qelr_mr *mr;
 	struct ibv_reg_mr cmd;
@@ -165,17 +165,14 @@ struct ibv_mr *qelr_reg_mr(struct ibv_pd *ibpd, void *addr,
 	struct qelr_pd *pd = get_qelr_pd(ibpd);
 	struct qelr_devctx *cxt = get_qelr_ctx(ibpd->context);
 
-	uint64_t hca_va = (uintptr_t) addr;
-
 	mr = malloc(sizeof(*mr));
 	if (!mr)
 		return NULL;
 
 	bzero(mr, sizeof(*mr));
 
-	if (ibv_cmd_reg_mr(ibpd, addr, len, hca_va,
-			   access, &mr->vmr, &cmd, sizeof(cmd),
-			   &resp.ibv_resp, sizeof(resp))) {
+	if (ibv_cmd_reg_mr(ibpd, addr, len, hca_va, access, &mr->vmr, &cmd,
+			   sizeof(cmd), &resp.ibv_resp, sizeof(resp))) {
 		free(mr);
 		return NULL;
 	}
diff --git a/providers/qedr/qelr_verbs.h b/providers/qedr/qelr_verbs.h
index cf2ce047..d0eacbfe 100644
--- a/providers/qedr/qelr_verbs.h
+++ b/providers/qedr/qelr_verbs.h
@@ -48,8 +48,8 @@ int qelr_query_port(struct ibv_context *context, uint8_t port,
 struct ibv_pd *qelr_alloc_pd(struct ibv_context *context);
 int qelr_dealloc_pd(struct ibv_pd *ibpd);
 
-struct ibv_mr *qelr_reg_mr(struct ibv_pd *ibpd, void *addr,
-			   size_t len, int access);
+struct ibv_mr *qelr_reg_mr(struct ibv_pd *ibpd, void *addr, size_t len,
+			   uint64_t hca_va, int access);
 int qelr_dereg_mr(struct verbs_mr *mr);
 
 struct ibv_cq *qelr_create_cq(struct ibv_context *context, int cqe,
diff --git a/providers/rxe/rxe.c b/providers/rxe/rxe.c
index 2efb646c..4e05d5b9 100644
--- a/providers/rxe/rxe.c
+++ b/providers/rxe/rxe.c
@@ -123,7 +123,7 @@ static int rxe_dealloc_pd(struct ibv_pd *pd)
 }
 
 static struct ibv_mr *rxe_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
-				 int access)
+				 uint64_t hca_va, int access)
 {
 	struct verbs_mr *vmr;
 	struct ibv_reg_mr cmd;
@@ -134,8 +134,8 @@ static struct ibv_mr *rxe_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
 	if (!vmr)
 		return NULL;
 
-	ret = ibv_cmd_reg_mr(pd, addr, length, (uintptr_t)addr, access, vmr,
-			     &cmd, sizeof cmd, &resp, sizeof resp);
+	ret = ibv_cmd_reg_mr(pd, addr, length, hca_va, access, vmr, &cmd,
+			     sizeof(cmd), &resp, sizeof(resp));
 	if (ret) {
 		free(vmr);
 		return NULL;
diff --git a/providers/siw/siw.c b/providers/siw/siw.c
index 41f33fa1..c1acf398 100644
--- a/providers/siw/siw.c
+++ b/providers/siw/siw.c
@@ -96,7 +96,7 @@ static int siw_free_pd(struct ibv_pd *pd)
 }
 
 static struct ibv_mr *siw_reg_mr(struct ibv_pd *pd, void *addr, size_t len,
-				 int access)
+				 uint64_t hca_va, int access)
 {
 	struct siw_cmd_reg_mr cmd = {};
 	struct siw_cmd_reg_mr_resp resp = {};
@@ -107,7 +107,7 @@ static struct ibv_mr *siw_reg_mr(struct ibv_pd *pd, void *addr, size_t len,
 	if (!mr)
 		return NULL;
 
-	rv = ibv_cmd_reg_mr(pd, addr, len, (uintptr_t)addr, access,
+	rv = ibv_cmd_reg_mr(pd, addr, len, hca_va, access,
 			    &mr->base_mr, &cmd.ibv_cmd, sizeof(cmd),
 			    &resp.ibv_resp, sizeof(resp));
 	if (rv) {
diff --git a/providers/vmw_pvrdma/pvrdma.h b/providers/vmw_pvrdma/pvrdma.h
index ebd50ce1..d90bd809 100644
--- a/providers/vmw_pvrdma/pvrdma.h
+++ b/providers/vmw_pvrdma/pvrdma.h
@@ -281,8 +281,8 @@ int pvrdma_query_port(struct ibv_context *context, uint8_t port,
 struct ibv_pd *pvrdma_alloc_pd(struct ibv_context *context);
 int pvrdma_free_pd(struct ibv_pd *pd);
 
-struct ibv_mr *pvrdma_reg_mr(struct ibv_pd *pd, void *addr,
-			     size_t length, int access);
+struct ibv_mr *pvrdma_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
+			     uint64_t hca_va, int access);
 int pvrdma_dereg_mr(struct verbs_mr *mr);
 
 struct ibv_cq *pvrdma_create_cq(struct ibv_context *context, int cqe,
diff --git a/providers/vmw_pvrdma/verbs.c b/providers/vmw_pvrdma/verbs.c
index e27952bf..e8423c01 100644
--- a/providers/vmw_pvrdma/verbs.c
+++ b/providers/vmw_pvrdma/verbs.c
@@ -112,7 +112,7 @@ int pvrdma_free_pd(struct ibv_pd *pd)
 }
 
 struct ibv_mr *pvrdma_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
-			     int access)
+			     uint64_t hca_va, int access)
 {
 	struct verbs_mr *vmr;
 	struct ibv_reg_mr cmd;
@@ -123,9 +123,8 @@ struct ibv_mr *pvrdma_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
 	if (!vmr)
 		return NULL;
 
-	ret = ibv_cmd_reg_mr(pd, addr, length, (uintptr_t) addr,
-			     access, vmr, &cmd, sizeof(cmd),
-			     &resp, sizeof(resp));
+	ret = ibv_cmd_reg_mr(pd, addr, length, hca_va, access, vmr, &cmd,
+			     sizeof(cmd), &resp, sizeof(resp));
 	if (ret) {
 		free(vmr);
 		return NULL;
diff --git a/redhat/rdma-core.spec b/redhat/rdma-core.spec
index 6149f6a6..f07919cc 100644
--- a/redhat/rdma-core.spec
+++ b/redhat/rdma-core.spec
@@ -1,5 +1,5 @@
 Name: rdma-core
-Version: 25.0
+Version: 26.0
 Release: 1%{?dist}
 Summary: RDMA core userspace libraries and daemons
 
diff --git a/suse/rdma-core.spec b/suse/rdma-core.spec
index 273bf45d..5a01327c 100644
--- a/suse/rdma-core.spec
+++ b/suse/rdma-core.spec
@@ -23,7 +23,7 @@
 
 %define         git_ver %{nil}
 Name:           rdma-core
-Version:        25.0
+Version:        26.0
 Release:        0
 Summary:        RDMA core userspace libraries and daemons
 License:        GPL-2.0-only OR BSD-2-Clause
-- 
2.20.1


^ permalink raw reply related

* [PATCH AUTOSEL 5.2 181/249] net/mlx5: Get vport ACL namespace by vport index
From: Sasha Levin @ 2019-07-15 13:45 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Jianbo Liu, Oz Shlomo, Eli Britstein, Roi Dayan, Mark Bloch,
	Saeed Mahameed, Sasha Levin, netdev, linux-rdma
In-Reply-To: <20190715134655.4076-1-sashal@kernel.org>

From: Jianbo Liu <jianbol@mellanox.com>

[ Upstream commit f53297d67800feb5fafd94abd926c889aefee690 ]

The ingress and egress ACL root namespaces are created per vport and
stored into arrays. However, the vport number is not the same as the
index. Passing the array index, instead of vport number, to get the
correct ingress and egress acl namespace.

Fixes: 9b93ab981e3b ("net/mlx5: Separate ingress/egress namespaces for each vport")
Signed-off-by: Jianbo Liu <jianbol@mellanox.com>
Reviewed-by: Oz Shlomo <ozsh@mellanox.com>
Reviewed-by: Eli Britstein <elibr@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 6a921e24cd5e..acab26b88261 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -939,7 +939,7 @@ int esw_vport_enable_egress_acl(struct mlx5_eswitch *esw,
 		  vport->vport, MLX5_CAP_ESW_EGRESS_ACL(dev, log_max_ft_size));
 
 	root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_EGRESS,
-						    vport->vport);
+			mlx5_eswitch_vport_num_to_index(esw, vport->vport));
 	if (!root_ns) {
 		esw_warn(dev, "Failed to get E-Switch egress flow namespace for vport (%d)\n", vport->vport);
 		return -EOPNOTSUPP;
@@ -1057,7 +1057,7 @@ int esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw,
 		  vport->vport, MLX5_CAP_ESW_INGRESS_ACL(dev, log_max_ft_size));
 
 	root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS,
-						    vport->vport);
+			mlx5_eswitch_vport_num_to_index(esw, vport->vport));
 	if (!root_ns) {
 		esw_warn(dev, "Failed to get E-Switch ingress flow namespace for vport (%d)\n", vport->vport);
 		return -EOPNOTSUPP;
-- 
2.20.1


^ permalink raw reply related

* [PATCH v9] verbs: Introduce a new reg_mr API for virtual address space
From: Yuval Shaia @ 2019-07-15 15:20 UTC (permalink / raw)
  To: yishaih, dledford, leon, jgg, mark.haywood, leonro, linux-rdma
  Cc: Yuval Shaia

The virtual address that is registered is used as a base for any address
passed later in post_recv and post_send operations.

On some virtualized environment this is not correct.

A guest cannot register its memory so hypervisor maps the guest physical
address to a host virtual address and register it with the HW. Later on,
at datapath phase, the guest fills the SGEs with addresses from its
address space.
Since HW cannot access guest virtual address space an extra translation
is needed to map those addresses to be based on the host virtual address
that was registered with the HW.
This datapath interference affects performances.

To avoid this, a logical separation between the address that is
registered and the address that is used as a offset at datapath phase is
needed.
This separation is already implemented in the lower layer part
(ibv_cmd_reg_mr) but blocked at the API level.

Fix it by introducing a new API function which accepts an address from
guest virtual address space as well, to be used as offset for later
datapath operations.

Signed-off-by: Yuval Shaia <yuval.shaia@oracle.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
---
v0 -> v1:
        * Change reg_mr callback signature instead of adding new callback
        * Add the new API to libibverbs/libibverbs.map.in
v1 -> v2:
        * Do not modify reg_mr signature for version 1.0
        * Add note to man page
v2 -> v3:
        * Rename function to reg_mr_iova (and arg-name to iova)
        * Some checkpatch issues not related to this fix but detected now
                * s/__FUNCTION__/__func
                * WARNING: function definition argument 'void *' should
                  also have an identifier name
v3 -> v4:
        * Fix commit message as suggested by Adit Ranadiv
        * Add support for efa
v4 -> v5:
        * Update PABI
        * Update debian files
v5 -> v6:
        * Move the new API to section in libibverbs/libibverbs.map.in
          (IBVERBS_1.7) as pointed out by Mark Haywood
v6 -> v7:
        *
v7 -> v8:
        * Update also redhat and suse specfiles so now all CI checks in
          github passed.
        * Leon, i have your r-b from v5, appriciate if you can take a look
          again now, with all the latest changes
v8 -> v9:
	* Back to 25 per Jason comment
---
 debian/libibverbs1.symbols        |  4 +++-
 libibverbs/CMakeLists.txt         |  2 +-
 libibverbs/driver.h               |  2 +-
 libibverbs/dummy_ops.c            |  2 +-
 libibverbs/libibverbs.map.in      |  5 +++++
 libibverbs/man/ibv_reg_mr.3       | 15 +++++++++++++--
 libibverbs/verbs.c                | 23 ++++++++++++++++++++++-
 libibverbs/verbs.h                |  7 +++++++
 providers/bnxt_re/verbs.c         |  6 +++---
 providers/bnxt_re/verbs.h         |  2 +-
 providers/cxgb3/iwch.h            |  4 ++--
 providers/cxgb3/verbs.c           | 15 +++++----------
 providers/cxgb4/libcxgb4.h        |  4 ++--
 providers/cxgb4/verbs.c           | 15 +++++----------
 providers/efa/verbs.c             |  4 ++--
 providers/efa/verbs.h             |  2 +-
 providers/hfi1verbs/hfiverbs.h    |  4 ++--
 providers/hfi1verbs/verbs.c       |  8 ++++----
 providers/hns/hns_roce_u.h        |  2 +-
 providers/hns/hns_roce_u_verbs.c  |  6 +++---
 providers/i40iw/i40iw_umain.h     |  3 ++-
 providers/i40iw/i40iw_uverbs.c    |  8 ++++----
 providers/ipathverbs/ipathverbs.h |  4 ++--
 providers/ipathverbs/verbs.c      |  8 ++++----
 providers/mlx4/mlx4.h             |  4 ++--
 providers/mlx4/verbs.c            |  7 +++----
 providers/mlx5/mlx5.h             |  4 ++--
 providers/mlx5/verbs.c            |  7 +++----
 providers/mthca/ah.c              |  3 ++-
 providers/mthca/mthca.h           |  4 ++--
 providers/mthca/verbs.c           |  6 +++---
 providers/nes/nes_umain.h         |  3 ++-
 providers/nes/nes_uverbs.c        |  9 ++++-----
 providers/ocrdma/ocrdma_main.h    |  4 ++--
 providers/ocrdma/ocrdma_verbs.c   | 10 ++++------
 providers/qedr/qelr_main.h        |  4 ++--
 providers/qedr/qelr_verbs.c       | 11 ++++-------
 providers/qedr/qelr_verbs.h       |  4 ++--
 providers/rxe/rxe.c               |  6 +++---
 providers/siw/siw.c               |  4 ++--
 providers/vmw_pvrdma/pvrdma.h     |  4 ++--
 providers/vmw_pvrdma/verbs.c      |  7 +++----
 42 files changed, 143 insertions(+), 113 deletions(-)

diff --git a/debian/libibverbs1.symbols b/debian/libibverbs1.symbols
index 39b3d4a9..aaac3830 100644
--- a/debian/libibverbs1.symbols
+++ b/debian/libibverbs1.symbols
@@ -4,7 +4,8 @@ libibverbs.so.1 libibverbs1 #MINVER#
  IBVERBS_1.1@IBVERBS_1.1 1.1.6
  IBVERBS_1.5@IBVERBS_1.5 20
  IBVERBS_1.6@IBVERBS_1.6 24
- (symver)IBVERBS_PRIVATE_25 25
+ IBVERBS_1.7@IBVERBS_1.7 25.0-1
+ (symver)IBVERBS_PRIVATE_25 25.0-1
  ibv_ack_async_event@IBVERBS_1.0 1.1.6
  ibv_ack_async_event@IBVERBS_1.1 1.1.6
  ibv_ack_cq_events@IBVERBS_1.0 1.1.6
@@ -89,6 +90,7 @@ libibverbs.so.1 libibverbs1 #MINVER#
  ibv_read_sysfs_file@IBVERBS_1.0 1.1.6
  ibv_reg_mr@IBVERBS_1.0 1.1.6
  ibv_reg_mr@IBVERBS_1.1 1.1.6
+ ibv_reg_mr_iova@IBVERBS_1.7 25.0-1
  ibv_register_driver@IBVERBS_1.1 1.1.6
  ibv_rereg_mr@IBVERBS_1.1 1.2.1
  ibv_resize_cq@IBVERBS_1.0 1.1.6
diff --git a/libibverbs/CMakeLists.txt b/libibverbs/CMakeLists.txt
index 1f5c59ed..a5926bbd 100644
--- a/libibverbs/CMakeLists.txt
+++ b/libibverbs/CMakeLists.txt
@@ -21,7 +21,7 @@ configure_file("libibverbs.map.in"
 
 rdma_library(ibverbs "${CMAKE_CURRENT_BINARY_DIR}/libibverbs.map"
   # See Documentation/versioning.md
-  1 1.6.${PACKAGE_VERSION}
+  1 1.7.${PACKAGE_VERSION}
   all_providers.c
   cmd.c
   cmd_ah.c
diff --git a/libibverbs/driver.h b/libibverbs/driver.h
index 2e2131f2..88ed2b5e 100644
--- a/libibverbs/driver.h
+++ b/libibverbs/driver.h
@@ -362,7 +362,7 @@ struct verbs_context_ops {
 				    uint64_t dm_offset, size_t length,
 				    unsigned int access);
 	struct ibv_mr *(*reg_mr)(struct ibv_pd *pd, void *addr, size_t length,
-				 int access);
+				 uint64_t hca_va, int access);
 	int (*req_notify_cq)(struct ibv_cq *cq, int solicited_only);
 	int (*rereg_mr)(struct verbs_mr *vmr, int flags, struct ibv_pd *pd,
 			void *addr, size_t length, int access);
diff --git a/libibverbs/dummy_ops.c b/libibverbs/dummy_ops.c
index ebc6eddd..6560371a 100644
--- a/libibverbs/dummy_ops.c
+++ b/libibverbs/dummy_ops.c
@@ -411,7 +411,7 @@ static struct ibv_mr *reg_dm_mr(struct ibv_pd *pd, struct ibv_dm *dm,
 }
 
 static struct ibv_mr *reg_mr(struct ibv_pd *pd, void *addr, size_t length,
-			     int access)
+			     uint64_t hca_va,  int access)
 {
 	errno = ENOSYS;
 	return NULL;
diff --git a/libibverbs/libibverbs.map.in b/libibverbs/libibverbs.map.in
index ee253ec0..c1b4537a 100644
--- a/libibverbs/libibverbs.map.in
+++ b/libibverbs/libibverbs.map.in
@@ -116,6 +116,11 @@ IBVERBS_1.6 {
 		ibv_qp_to_qp_ex;
 } IBVERBS_1.5;
 
+IBVERBS_1.7 {
+	global:
+		ibv_reg_mr_iova;
+} IBVERBS_1.6;
+
 /* If any symbols in this stanza change ABI then the entire staza gets a new symbol
    version. See the top level CMakeLists.txt for this setting. */
 
diff --git a/libibverbs/man/ibv_reg_mr.3 b/libibverbs/man/ibv_reg_mr.3
index 631e5fe8..be90a57b 100644
--- a/libibverbs/man/ibv_reg_mr.3
+++ b/libibverbs/man/ibv_reg_mr.3
@@ -3,7 +3,7 @@
 .\"
 .TH IBV_REG_MR 3 2006-10-31 libibverbs "Libibverbs Programmer's Manual"
 .SH "NAME"
-ibv_reg_mr, ibv_dereg_mr \- register or deregister a memory region (MR)
+ibv_reg_mr, ibv_reg_mr_iova, ibv_dereg_mr \- register or deregister a memory region (MR)
 .SH "SYNOPSIS"
 .nf
 .B #include <infiniband/verbs.h>
@@ -11,6 +11,10 @@ ibv_reg_mr, ibv_dereg_mr \- register or deregister a memory region (MR)
 .BI "struct ibv_mr *ibv_reg_mr(struct ibv_pd " "*pd" ", void " "*addr" ,
 .BI "                          size_t " "length" ", int " "access" );
 .sp
+.BI "struct ibv_mr *ibv_reg_mr_iova(struct ibv_pd " "*pd" ", void " "*addr" ,
+.BI "                               size_t " "length" ", uint64_t " "hca_va" ,
+.BI "                               int " "access" );
+.sp
 .BI "int ibv_dereg_mr(struct ibv_mr " "*mr" );
 .fi
 .SH "DESCRIPTION"
@@ -52,11 +56,18 @@ Local read access is always enabled for the MR.
 .PP
 To create an implicit ODP MR, IBV_ACCESS_ON_DEMAND should be set, addr should be 0 and length should be SIZE_MAX.
 .PP
+.B ibv_reg_mr_iova()
+ibv_reg_mr_iova is the same as the normal reg_mr, except that the user is
+allowed to specify the virtual base address of the MR when accessed through
+a lkey or rkey. The offset in the memory region is computed as 'addr +
+(iova - hca_va)'. Specifying 0 for hca_va has the same effect as
+IBV_ACCESS_ZERO_BASED.
+.PP
 .B ibv_dereg_mr()
 deregisters the MR
 .I mr\fR.
 .SH "RETURN VALUE"
-.B ibv_reg_mr()
+.B ibv_reg_mr() / ibv_reg_mr_iova()
 returns a pointer to the registered MR, or NULL if the request fails.
 The local key (\fBL_Key\fR) field
 .B lkey
diff --git a/libibverbs/verbs.c b/libibverbs/verbs.c
index c7e8e8e9..e5063af2 100644
--- a/libibverbs/verbs.c
+++ b/libibverbs/verbs.c
@@ -306,7 +306,28 @@ LATEST_SYMVER_FUNC(ibv_reg_mr, 1_1, "IBVERBS_1.1",
 	if (ibv_dontfork_range(addr, length))
 		return NULL;
 
-	mr = get_ops(pd->context)->reg_mr(pd, addr, length, access);
+	mr = get_ops(pd->context)->reg_mr(pd, addr, length, (uintptr_t) addr,
+					  access);
+	if (mr) {
+		mr->context = pd->context;
+		mr->pd      = pd;
+		mr->addr    = addr;
+		mr->length  = length;
+	} else
+		ibv_dofork_range(addr, length);
+
+	return mr;
+}
+
+struct ibv_mr *ibv_reg_mr_iova(struct ibv_pd *pd, void *addr, size_t length,
+			       uint64_t iova, int access)
+{
+	struct ibv_mr *mr;
+
+	if (ibv_dontfork_range(addr, length))
+		return NULL;
+
+	mr = get_ops(pd->context)->reg_mr(pd, addr, length, iova, access);
 	if (mr) {
 		mr->context = pd->context;
 		mr->pd      = pd;
diff --git a/libibverbs/verbs.h b/libibverbs/verbs.h
index 1b0aef03..1e01b5db 100644
--- a/libibverbs/verbs.h
+++ b/libibverbs/verbs.h
@@ -2376,6 +2376,13 @@ static inline int ibv_close_xrcd(struct ibv_xrcd *xrcd)
 struct ibv_mr *ibv_reg_mr(struct ibv_pd *pd, void *addr,
 			  size_t length, int access);
 
+/**
+ * ibv_reg_mr_iova - Register a memory region with a virtual offset
+ * address
+ */
+struct ibv_mr *ibv_reg_mr_iova(struct ibv_pd *pd, void *addr, size_t length,
+			       uint64_t iova, int access);
+
 
 enum ibv_rereg_mr_err_code {
 	/* Old MR is valid, invalid input */
diff --git a/providers/bnxt_re/verbs.c b/providers/bnxt_re/verbs.c
index ed7ddb6e..2218e3a0 100644
--- a/providers/bnxt_re/verbs.c
+++ b/providers/bnxt_re/verbs.c
@@ -131,7 +131,7 @@ int bnxt_re_free_pd(struct ibv_pd *ibvpd)
 }
 
 struct ibv_mr *bnxt_re_reg_mr(struct ibv_pd *ibvpd, void *sva, size_t len,
-			      int access)
+			      uint64_t hca_va, int access)
 {
 	struct bnxt_re_mr *mr;
 	struct ibv_reg_mr cmd;
@@ -141,8 +141,8 @@ struct ibv_mr *bnxt_re_reg_mr(struct ibv_pd *ibvpd, void *sva, size_t len,
 	if (!mr)
 		return NULL;
 
-	if (ibv_cmd_reg_mr(ibvpd, sva, len, (uintptr_t)sva, access, &mr->vmr,
-			   &cmd, sizeof(cmd), &resp.ibv_resp, sizeof(resp))) {
+	if (ibv_cmd_reg_mr(ibvpd, sva, len, hca_va, access, &mr->vmr, &cmd,
+			   sizeof(cmd), &resp.ibv_resp, sizeof(resp))) {
 		free(mr);
 		return NULL;
 	}
diff --git a/providers/bnxt_re/verbs.h b/providers/bnxt_re/verbs.h
index b565d7e6..2e994880 100644
--- a/providers/bnxt_re/verbs.h
+++ b/providers/bnxt_re/verbs.h
@@ -61,7 +61,7 @@ int bnxt_re_query_port(struct ibv_context *uctx, uint8_t port,
 struct ibv_pd *bnxt_re_alloc_pd(struct ibv_context *uctx);
 int bnxt_re_free_pd(struct ibv_pd *ibvpd);
 struct ibv_mr *bnxt_re_reg_mr(struct ibv_pd *ibvpd, void *buf, size_t len,
-			      int ibv_access_flags);
+			      uint64_t hca_va, int ibv_access_flags);
 int bnxt_re_dereg_mr(struct verbs_mr *vmr);
 
 struct ibv_cq *bnxt_re_create_cq(struct ibv_context *uctx, int ncqe,
diff --git a/providers/cxgb3/iwch.h b/providers/cxgb3/iwch.h
index c8de44e9..c7d85d3a 100644
--- a/providers/cxgb3/iwch.h
+++ b/providers/cxgb3/iwch.h
@@ -150,8 +150,8 @@ extern int iwch_query_port(struct ibv_context *context, uint8_t port,
 extern struct ibv_pd *iwch_alloc_pd(struct ibv_context *context);
 extern int iwch_free_pd(struct ibv_pd *pd);
 
-extern struct ibv_mr *iwch_reg_mr(struct ibv_pd *pd, void *addr,
-				  size_t length, int access);
+extern struct ibv_mr *iwch_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
+				  uint64_t hca_va, int access);
 extern int iwch_dereg_mr(struct verbs_mr *mr);
 
 struct ibv_cq *iwch_create_cq(struct ibv_context *context, int cqe,
diff --git a/providers/cxgb3/verbs.c b/providers/cxgb3/verbs.c
index 8b90482a..39a44192 100644
--- a/providers/cxgb3/verbs.c
+++ b/providers/cxgb3/verbs.c
@@ -103,15 +103,17 @@ int iwch_free_pd(struct ibv_pd *pd)
 	return 0;
 }
 
-static struct ibv_mr *__iwch_reg_mr(struct ibv_pd *pd, void *addr,
-				    size_t length, uint64_t hca_va,
-				    int access)
+struct ibv_mr *iwch_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
+			   uint64_t hca_va, int access)
 {
 	struct iwch_mr *mhp;
 	struct ibv_reg_mr cmd;
 	struct uiwch_reg_mr_resp resp;
 	struct iwch_device *dev = to_iwch_dev(pd->context->device);
 
+	PDBG("%s addr %p length %ld hca_va %p\n", __func__, addr, length,
+	     hca_va);
+
 	mhp = malloc(sizeof *mhp);
 	if (!mhp)
 		return NULL;
@@ -140,13 +142,6 @@ static struct ibv_mr *__iwch_reg_mr(struct ibv_pd *pd, void *addr,
 	return &mhp->vmr.ibv_mr;
 }
 
-struct ibv_mr *iwch_reg_mr(struct ibv_pd *pd, void *addr,
-			   size_t length, int access)
-{
-	PDBG("%s addr %p length %ld\n", __FUNCTION__, addr, length);
-	return __iwch_reg_mr(pd, addr, length, (uintptr_t) addr, access);
-}
-
 int iwch_dereg_mr(struct verbs_mr *vmr)
 {
 	int ret;
diff --git a/providers/cxgb4/libcxgb4.h b/providers/cxgb4/libcxgb4.h
index 0fbceab0..ce8f29dd 100644
--- a/providers/cxgb4/libcxgb4.h
+++ b/providers/cxgb4/libcxgb4.h
@@ -198,8 +198,8 @@ int c4iw_query_port(struct ibv_context *context, uint8_t port,
 struct ibv_pd *c4iw_alloc_pd(struct ibv_context *context);
 int c4iw_free_pd(struct ibv_pd *pd);
 
-struct ibv_mr *c4iw_reg_mr(struct ibv_pd *pd, void *addr,
-				  size_t length, int access);
+struct ibv_mr *c4iw_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
+			   uint64_t hca_va, int access);
 int c4iw_dereg_mr(struct verbs_mr *vmr);
 
 struct ibv_cq *c4iw_create_cq(struct ibv_context *context, int cqe,
diff --git a/providers/cxgb4/verbs.c b/providers/cxgb4/verbs.c
index 452e4f1f..4240f6b3 100644
--- a/providers/cxgb4/verbs.c
+++ b/providers/cxgb4/verbs.c
@@ -109,15 +109,17 @@ int c4iw_free_pd(struct ibv_pd *pd)
 	return 0;
 }
 
-static struct ibv_mr *__c4iw_reg_mr(struct ibv_pd *pd, void *addr,
-				    size_t length, uint64_t hca_va,
-				    int access)
+struct ibv_mr *c4iw_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
+			   uint64_t hca_va, int access)
 {
 	struct c4iw_mr *mhp;
 	struct ibv_reg_mr cmd;
 	struct ib_uverbs_reg_mr_resp resp;
 	struct c4iw_dev *dev = to_c4iw_dev(pd->context->device);
 
+	PDBG("%s addr %p length %ld hca_va %p\n", __func__, addr, length,
+	     hca_va);
+
 	mhp = malloc(sizeof *mhp);
 	if (!mhp)
 		return NULL;
@@ -142,13 +144,6 @@ static struct ibv_mr *__c4iw_reg_mr(struct ibv_pd *pd, void *addr,
 	return &mhp->vmr.ibv_mr;
 }
 
-struct ibv_mr *c4iw_reg_mr(struct ibv_pd *pd, void *addr,
-			   size_t length, int access)
-{
-	PDBG("%s addr %p length %ld\n", __func__, addr, length);
-	return __c4iw_reg_mr(pd, addr, length, (uintptr_t) addr, access);
-}
-
 int c4iw_dereg_mr(struct verbs_mr *vmr)
 {
 	int ret;
diff --git a/providers/efa/verbs.c b/providers/efa/verbs.c
index 4d36f9e1..d2500ecb 100644
--- a/providers/efa/verbs.c
+++ b/providers/efa/verbs.c
@@ -126,7 +126,7 @@ int efa_dealloc_pd(struct ibv_pd *ibvpd)
 }
 
 struct ibv_mr *efa_reg_mr(struct ibv_pd *ibvpd, void *sva, size_t len,
-			  int access)
+			  uint64_t hca_va, int access)
 {
 	struct ib_uverbs_reg_mr_resp resp;
 	struct ibv_reg_mr cmd;
@@ -136,7 +136,7 @@ struct ibv_mr *efa_reg_mr(struct ibv_pd *ibvpd, void *sva, size_t len,
 	if (!mr)
 		return NULL;
 
-	if (ibv_cmd_reg_mr(ibvpd, sva, len, (uintptr_t)sva, access, &mr->vmr,
+	if (ibv_cmd_reg_mr(ibvpd, sva, len, hca_va, access, &mr->vmr,
 			   &cmd, sizeof(cmd), &resp, sizeof(resp))) {
 		free(mr);
 		return NULL;
diff --git a/providers/efa/verbs.h b/providers/efa/verbs.h
index 1a49653f..7b532adc 100644
--- a/providers/efa/verbs.h
+++ b/providers/efa/verbs.h
@@ -18,7 +18,7 @@ int efa_query_device_ex(struct ibv_context *context,
 struct ibv_pd *efa_alloc_pd(struct ibv_context *uctx);
 int efa_dealloc_pd(struct ibv_pd *ibvpd);
 struct ibv_mr *efa_reg_mr(struct ibv_pd *ibvpd, void *buf, size_t len,
-			  int ibv_access_flags);
+			  uint64_t hca_va, int ibv_access_flags);
 int efa_dereg_mr(struct verbs_mr *vmr);
 
 struct ibv_cq *efa_create_cq(struct ibv_context *uctx, int ncqe,
diff --git a/providers/hfi1verbs/hfiverbs.h b/providers/hfi1verbs/hfiverbs.h
index 070a01c9..b9e91d80 100644
--- a/providers/hfi1verbs/hfiverbs.h
+++ b/providers/hfi1verbs/hfiverbs.h
@@ -204,8 +204,8 @@ struct ibv_pd *hfi1_alloc_pd(struct ibv_context *pd);
 
 int hfi1_free_pd(struct ibv_pd *pd);
 
-struct ibv_mr *hfi1_reg_mr(struct ibv_pd *pd, void *addr,
-			    size_t length, int access);
+struct ibv_mr *hfi1_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
+			   uint64_t hca_va, int access);
 
 int hfi1_dereg_mr(struct verbs_mr *vmr);
 
diff --git a/providers/hfi1verbs/verbs.c b/providers/hfi1verbs/verbs.c
index ff001f6d..275f8d51 100644
--- a/providers/hfi1verbs/verbs.c
+++ b/providers/hfi1verbs/verbs.c
@@ -129,8 +129,8 @@ int hfi1_free_pd(struct ibv_pd *pd)
 	return 0;
 }
 
-struct ibv_mr *hfi1_reg_mr(struct ibv_pd *pd, void *addr,
-			    size_t length, int access)
+struct ibv_mr *hfi1_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
+			   uint64_t hca_va, int access)
 {
 	struct verbs_mr *vmr;
 	struct ibv_reg_mr cmd;
@@ -141,8 +141,8 @@ struct ibv_mr *hfi1_reg_mr(struct ibv_pd *pd, void *addr,
 	if (!vmr)
 		return NULL;
 
-	ret = ibv_cmd_reg_mr(pd, addr, length, (uintptr_t)addr, access, vmr,
-			     &cmd, sizeof cmd, &resp, sizeof resp);
+	ret = ibv_cmd_reg_mr(pd, addr, length, hca_va, access, vmr, &cmd,
+			     sizeof(cmd), &resp, sizeof(resp));
 
 	if (ret) {
 		free(vmr);
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h
index 93bc47c1..517d97b3 100644
--- a/providers/hns/hns_roce_u.h
+++ b/providers/hns/hns_roce_u.h
@@ -293,7 +293,7 @@ struct ibv_pd *hns_roce_u_alloc_pd(struct ibv_context *context);
 int hns_roce_u_free_pd(struct ibv_pd *pd);
 
 struct ibv_mr *hns_roce_u_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
-				 int access);
+				 uint64_t hca_va, int access);
 int hns_roce_u_rereg_mr(struct verbs_mr *mr, int flags, struct ibv_pd *pd,
 			void *addr, size_t length, int access);
 int hns_roce_u_dereg_mr(struct verbs_mr *mr);
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c
index 9ba65a1d..44bc32a4 100644
--- a/providers/hns/hns_roce_u_verbs.c
+++ b/providers/hns/hns_roce_u_verbs.c
@@ -120,7 +120,7 @@ int hns_roce_u_free_pd(struct ibv_pd *pd)
 }
 
 struct ibv_mr *hns_roce_u_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
-				 int access)
+				 uint64_t hca_va, int access)
 {
 	int ret;
 	struct verbs_mr *vmr;
@@ -141,8 +141,8 @@ struct ibv_mr *hns_roce_u_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
 	if (!vmr)
 		return NULL;
 
-	ret = ibv_cmd_reg_mr(pd, addr, length, (uintptr_t)addr, access, vmr,
-			     &cmd, sizeof(cmd), &resp, sizeof(resp));
+	ret = ibv_cmd_reg_mr(pd, addr, length, hca_va, access, vmr, &cmd,
+			     sizeof(cmd), &resp, sizeof(resp));
 	if (ret) {
 		free(vmr);
 		return NULL;
diff --git a/providers/i40iw/i40iw_umain.h b/providers/i40iw/i40iw_umain.h
index 4055933a..af84f284 100644
--- a/providers/i40iw/i40iw_umain.h
+++ b/providers/i40iw/i40iw_umain.h
@@ -155,7 +155,8 @@ int i40iw_uquery_device(struct ibv_context *, struct ibv_device_attr *);
 int i40iw_uquery_port(struct ibv_context *, uint8_t, struct ibv_port_attr *);
 struct ibv_pd *i40iw_ualloc_pd(struct ibv_context *);
 int i40iw_ufree_pd(struct ibv_pd *);
-struct ibv_mr *i40iw_ureg_mr(struct ibv_pd *, void *, size_t, int);
+struct ibv_mr *i40iw_ureg_mr(struct ibv_pd *pd, void *addr, size_t length,
+			     uint64_t hca_va, int access);
 int i40iw_udereg_mr(struct verbs_mr *vmr);
 struct ibv_cq *i40iw_ucreate_cq(struct ibv_context *, int, struct ibv_comp_channel *, int);
 int i40iw_uresize_cq(struct ibv_cq *, int);
diff --git a/providers/i40iw/i40iw_uverbs.c b/providers/i40iw/i40iw_uverbs.c
index 83b504fa..240150b9 100644
--- a/providers/i40iw/i40iw_uverbs.c
+++ b/providers/i40iw/i40iw_uverbs.c
@@ -149,7 +149,8 @@ int i40iw_ufree_pd(struct ibv_pd *pd)
  * @length: length of the memory
  * @access: access allowed on this mr
  */
-struct ibv_mr *i40iw_ureg_mr(struct ibv_pd *pd, void *addr, size_t length, int access)
+struct ibv_mr *i40iw_ureg_mr(struct ibv_pd *pd, void *addr, size_t length,
+			     uint64_t hca_va, int access)
 {
 	struct verbs_mr *vmr;
 	struct i40iw_ureg_mr cmd;
@@ -161,9 +162,8 @@ struct ibv_mr *i40iw_ureg_mr(struct ibv_pd *pd, void *addr, size_t length, int a
 
 	cmd.reg_type = IW_MEMREG_TYPE_MEM;
 
-	if (ibv_cmd_reg_mr(pd, addr, length, (uintptr_t)addr,
-			   access, vmr, &cmd.ibv_cmd, sizeof(cmd),
-			   &resp, sizeof(resp))) {
+	if (ibv_cmd_reg_mr(pd, addr, length, hca_va, access, vmr, &cmd.ibv_cmd,
+			   sizeof(cmd), &resp, sizeof(resp))) {
 		fprintf(stderr, PFX "%s: Failed to register memory\n", __func__);
 		free(vmr);
 		return NULL;
diff --git a/providers/ipathverbs/ipathverbs.h b/providers/ipathverbs/ipathverbs.h
index cfb5cc38..694f1f44 100644
--- a/providers/ipathverbs/ipathverbs.h
+++ b/providers/ipathverbs/ipathverbs.h
@@ -183,8 +183,8 @@ struct ibv_pd *ipath_alloc_pd(struct ibv_context *pd);
 
 int ipath_free_pd(struct ibv_pd *pd);
 
-struct ibv_mr *ipath_reg_mr(struct ibv_pd *pd, void *addr,
-			    size_t length, int access);
+struct ibv_mr *ipath_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
+			    uint64_t hca_va, int access);
 
 int ipath_dereg_mr(struct verbs_mr *vmr);
 
diff --git a/providers/ipathverbs/verbs.c b/providers/ipathverbs/verbs.c
index de4722b2..505ea584 100644
--- a/providers/ipathverbs/verbs.c
+++ b/providers/ipathverbs/verbs.c
@@ -109,8 +109,8 @@ int ipath_free_pd(struct ibv_pd *pd)
 	return 0;
 }
 
-struct ibv_mr *ipath_reg_mr(struct ibv_pd *pd, void *addr,
-			    size_t length, int access)
+struct ibv_mr *ipath_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
+			    uint64_t hca_va, int access)
 {
 	struct verbs_mr *vmr;
 	struct ibv_reg_mr cmd;
@@ -121,8 +121,8 @@ struct ibv_mr *ipath_reg_mr(struct ibv_pd *pd, void *addr,
 	if (!vmr)
 		return NULL;
 
-	ret = ibv_cmd_reg_mr(pd, addr, length, (uintptr_t)addr, access, vmr,
-			     &cmd, sizeof cmd, &resp, sizeof resp);
+	ret = ibv_cmd_reg_mr(pd, addr, length, hca_va, access, vmr, &cmd,
+			     sizeof(cmd), &resp, sizeof(resp));
 	if (ret) {
 		free(vmr);
 		return NULL;
diff --git a/providers/mlx4/mlx4.h b/providers/mlx4/mlx4.h
index 9c21d775..3c161e8e 100644
--- a/providers/mlx4/mlx4.h
+++ b/providers/mlx4/mlx4.h
@@ -320,8 +320,8 @@ struct ibv_xrcd *mlx4_open_xrcd(struct ibv_context *context,
 				struct ibv_xrcd_init_attr *attr);
 int mlx4_close_xrcd(struct ibv_xrcd *xrcd);
 
-struct ibv_mr *mlx4_reg_mr(struct ibv_pd *pd, void *addr,
-			    size_t length, int access);
+struct ibv_mr *mlx4_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
+			   uint64_t hca_va, int access);
 int mlx4_rereg_mr(struct verbs_mr *vmr, int flags, struct ibv_pd *pd,
 		  void *addr, size_t length, int access);
 int mlx4_dereg_mr(struct verbs_mr *vmr);
diff --git a/providers/mlx4/verbs.c b/providers/mlx4/verbs.c
index 9a5affe7..d814a2bc 100644
--- a/providers/mlx4/verbs.c
+++ b/providers/mlx4/verbs.c
@@ -275,7 +275,7 @@ int mlx4_close_xrcd(struct ibv_xrcd *ib_xrcd)
 }
 
 struct ibv_mr *mlx4_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
-			   int access)
+			   uint64_t hca_va, int access)
 {
 	struct verbs_mr *vmr;
 	struct ibv_reg_mr cmd;
@@ -286,9 +286,8 @@ struct ibv_mr *mlx4_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
 	if (!vmr)
 		return NULL;
 
-	ret = ibv_cmd_reg_mr(pd, addr, length, (uintptr_t) addr,
-			     access, vmr, &cmd, sizeof(cmd),
-			     &resp, sizeof(resp));
+	ret = ibv_cmd_reg_mr(pd, addr, length, hca_va, access, vmr, &cmd,
+			     sizeof(cmd), &resp, sizeof(resp));
 	if (ret) {
 		free(vmr);
 		return NULL;
diff --git a/providers/mlx5/mlx5.h b/providers/mlx5/mlx5.h
index d9fccdcc..ab3c2c1a 100644
--- a/providers/mlx5/mlx5.h
+++ b/providers/mlx5/mlx5.h
@@ -821,8 +821,8 @@ void mlx5_async_event(struct ibv_context *context,
 		      struct ibv_async_event *event);
 
 struct ibv_mr *mlx5_alloc_null_mr(struct ibv_pd *pd);
-struct ibv_mr *mlx5_reg_mr(struct ibv_pd *pd, void *addr,
-			   size_t length, int access);
+struct ibv_mr *mlx5_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
+			   uint64_t hca_va, int access);
 int mlx5_rereg_mr(struct verbs_mr *mr, int flags, struct ibv_pd *pd, void *addr,
 		  size_t length, int access);
 int mlx5_dereg_mr(struct verbs_mr *mr);
diff --git a/providers/mlx5/verbs.c b/providers/mlx5/verbs.c
index c13e81f4..714c5f7e 100644
--- a/providers/mlx5/verbs.c
+++ b/providers/mlx5/verbs.c
@@ -388,7 +388,7 @@ int mlx5_free_pd(struct ibv_pd *pd)
 }
 
 struct ibv_mr *mlx5_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
-			   int acc)
+			   uint64_t hca_va, int acc)
 {
 	struct mlx5_mr *mr;
 	struct ibv_reg_mr cmd;
@@ -400,9 +400,8 @@ struct ibv_mr *mlx5_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
 	if (!mr)
 		return NULL;
 
-	ret = ibv_cmd_reg_mr(pd, addr, length, (uintptr_t)addr, access,
-			     &mr->vmr, &cmd, sizeof(cmd), &resp,
-			     sizeof resp);
+	ret = ibv_cmd_reg_mr(pd, addr, length, hca_va, access, &mr->vmr, &cmd,
+			     sizeof(cmd), &resp, sizeof(resp));
 	if (ret) {
 		mlx5_free_buf(&(mr->buf));
 		free(mr);
diff --git a/providers/mthca/ah.c b/providers/mthca/ah.c
index df0cb281..adefb178 100644
--- a/providers/mthca/ah.c
+++ b/providers/mthca/ah.c
@@ -61,7 +61,8 @@ static struct mthca_ah_page *__add_page(struct mthca_pd *pd, int page_size, int
 		return NULL;
 	}
 
-	page->mr = mthca_reg_mr(&pd->ibv_pd, page->buf.buf, page_size, 0);
+	page->mr = mthca_reg_mr(&pd->ibv_pd, page->buf.buf, page_size,
+				(uintptr_t) page->buf.buf, 0);
 	if (!page->mr) {
 		mthca_free_buf(&page->buf);
 		free(page);
diff --git a/providers/mthca/mthca.h b/providers/mthca/mthca.h
index 61042de3..b7df2f73 100644
--- a/providers/mthca/mthca.h
+++ b/providers/mthca/mthca.h
@@ -280,8 +280,8 @@ int mthca_query_port(struct ibv_context *context, uint8_t port,
 struct ibv_pd *mthca_alloc_pd(struct ibv_context *context);
 int mthca_free_pd(struct ibv_pd *pd);
 
-struct ibv_mr *mthca_reg_mr(struct ibv_pd *pd, void *addr,
-			    size_t length, int access);
+struct ibv_mr *mthca_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
+			    uint64_t hca_va, int access);
 int mthca_dereg_mr(struct verbs_mr *mr);
 
 struct ibv_cq *mthca_create_cq(struct ibv_context *context, int cqe,
diff --git a/providers/mthca/verbs.c b/providers/mthca/verbs.c
index e7a1c357..99e5ec66 100644
--- a/providers/mthca/verbs.c
+++ b/providers/mthca/verbs.c
@@ -145,10 +145,10 @@ static struct ibv_mr *__mthca_reg_mr(struct ibv_pd *pd, void *addr,
 	return &vmr->ibv_mr;
 }
 
-struct ibv_mr *mthca_reg_mr(struct ibv_pd *pd, void *addr,
-			    size_t length, int access)
+struct ibv_mr *mthca_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
+			    uint64_t hca_va, int access)
 {
-	return __mthca_reg_mr(pd, addr, length, (uintptr_t) addr, access, 0);
+	return __mthca_reg_mr(pd, addr, length, hca_va, access, 0);
 }
 
 int mthca_dereg_mr(struct verbs_mr *vmr)
diff --git a/providers/nes/nes_umain.h b/providers/nes/nes_umain.h
index edb38622..1070ce42 100644
--- a/providers/nes/nes_umain.h
+++ b/providers/nes/nes_umain.h
@@ -350,7 +350,8 @@ int nes_uquery_device(struct ibv_context *, struct ibv_device_attr *);
 int nes_uquery_port(struct ibv_context *, uint8_t, struct ibv_port_attr *);
 struct ibv_pd *nes_ualloc_pd(struct ibv_context *);
 int nes_ufree_pd(struct ibv_pd *);
-struct ibv_mr *nes_ureg_mr(struct ibv_pd *, void *, size_t, int);
+struct ibv_mr *nes_ureg_mr(struct ibv_pd *pd, void *addr, size_t length,
+			   uint64_t hca_va, int access);
 int nes_udereg_mr(struct verbs_mr *vmr);
 struct ibv_cq *nes_ucreate_cq(struct ibv_context *, int, struct ibv_comp_channel *, int);
 int nes_uresize_cq(struct ibv_cq *, int);
diff --git a/providers/nes/nes_uverbs.c b/providers/nes/nes_uverbs.c
index 8523e923..2b78468b 100644
--- a/providers/nes/nes_uverbs.c
+++ b/providers/nes/nes_uverbs.c
@@ -165,8 +165,8 @@ int nes_ufree_pd(struct ibv_pd *pd)
 /**
  * nes_ureg_mr
  */
-struct ibv_mr *nes_ureg_mr(struct ibv_pd *pd, void *addr,
-		size_t length, int access)
+struct ibv_mr *nes_ureg_mr(struct ibv_pd *pd, void *addr, size_t length,
+			   uint64_t hca_va, int access)
 {
 	struct verbs_mr *vmr;
 	struct nes_ureg_mr cmd;
@@ -177,9 +177,8 @@ struct ibv_mr *nes_ureg_mr(struct ibv_pd *pd, void *addr,
 		return NULL;
 
 	cmd.reg_type = IWNES_MEMREG_TYPE_MEM;
-	if (ibv_cmd_reg_mr(pd, addr, length, (uintptr_t) addr,
-			access, vmr, &cmd.ibv_cmd, sizeof(cmd),
-			&resp, sizeof(resp))) {
+	if (ibv_cmd_reg_mr(pd, addr, length, hca_va, access, vmr, &cmd.ibv_cmd,
+			   sizeof(cmd), &resp, sizeof(resp))) {
 		free(vmr);
 
 		return NULL;
diff --git a/providers/ocrdma/ocrdma_main.h b/providers/ocrdma/ocrdma_main.h
index 33def78c..aadefd96 100644
--- a/providers/ocrdma/ocrdma_main.h
+++ b/providers/ocrdma/ocrdma_main.h
@@ -269,8 +269,8 @@ int ocrdma_query_device(struct ibv_context *, struct ibv_device_attr *);
 int ocrdma_query_port(struct ibv_context *, uint8_t, struct ibv_port_attr *);
 struct ibv_pd *ocrdma_alloc_pd(struct ibv_context *);
 int ocrdma_free_pd(struct ibv_pd *);
-struct ibv_mr *ocrdma_reg_mr(struct ibv_pd *, void *, size_t,
-			     int ibv_access_flags);
+struct ibv_mr *ocrdma_reg_mr(struct ibv_pd *pd, void *addr, size_t len,
+			     uint64_t hca_va, int access);
 int ocrdma_dereg_mr(struct verbs_mr *vmr);
 
 struct ibv_cq *ocrdma_create_cq(struct ibv_context *, int,
diff --git a/providers/ocrdma/ocrdma_verbs.c b/providers/ocrdma/ocrdma_verbs.c
index 3b3e1a60..4ae35be9 100644
--- a/providers/ocrdma/ocrdma_verbs.c
+++ b/providers/ocrdma/ocrdma_verbs.c
@@ -185,22 +185,20 @@ int ocrdma_free_pd(struct ibv_pd *ibpd)
 /*
  * ocrdma_reg_mr
  */
-struct ibv_mr *ocrdma_reg_mr(struct ibv_pd *pd, void *addr,
-			     size_t len, int access)
+struct ibv_mr *ocrdma_reg_mr(struct ibv_pd *pd, void *addr, size_t len,
+			     uint64_t hca_va, int access)
 {
 	struct ocrdma_mr *mr;
 	struct ibv_reg_mr cmd;
 	struct uocrdma_reg_mr_resp resp;
-	uint64_t hca_va = (uintptr_t) addr;
 
 	mr = malloc(sizeof *mr);
 	if (!mr)
 		return NULL;
 	bzero(mr, sizeof *mr);
 
-	if (ibv_cmd_reg_mr(pd, addr, len, hca_va,
-			   access, &mr->vmr, &cmd, sizeof(cmd),
-			   &resp.ibv_resp, sizeof(resp))) {
+	if (ibv_cmd_reg_mr(pd, addr, len, hca_va, access, &mr->vmr, &cmd,
+			   sizeof(cmd), &resp.ibv_resp, sizeof(resp))) {
 		free(mr);
 		return NULL;
 	}
diff --git a/providers/qedr/qelr_main.h b/providers/qedr/qelr_main.h
index 77aa9c2f..fae87130 100644
--- a/providers/qedr/qelr_main.h
+++ b/providers/qedr/qelr_main.h
@@ -46,8 +46,8 @@ int qelr_query_port(struct ibv_context *, uint8_t, struct ibv_port_attr *);
 struct ibv_pd *qelr_alloc_pd(struct ibv_context *);
 int qelr_dealloc_pd(struct ibv_pd *);
 
-struct ibv_mr *qelr_reg_mr(struct ibv_pd *, void *, size_t,
-			   int ibv_access_flags);
+struct ibv_mr *qelr_reg_mr(struct ibv_pd *ibpd, void *addr, size_t len,
+			   uint64_t hca_va, int access);
 int qelr_dereg_mr(struct verbs_mr *vmr);
 
 struct ibv_cq *qelr_create_cq(struct ibv_context *, int,
diff --git a/providers/qedr/qelr_verbs.c b/providers/qedr/qelr_verbs.c
index a347714d..4b19ccb0 100644
--- a/providers/qedr/qelr_verbs.c
+++ b/providers/qedr/qelr_verbs.c
@@ -156,8 +156,8 @@ int qelr_dealloc_pd(struct ibv_pd *ibpd)
 	return rc;
 }
 
-struct ibv_mr *qelr_reg_mr(struct ibv_pd *ibpd, void *addr,
-			   size_t len, int access)
+struct ibv_mr *qelr_reg_mr(struct ibv_pd *ibpd, void *addr, size_t len,
+			   uint64_t hca_va, int access)
 {
 	struct qelr_mr *mr;
 	struct ibv_reg_mr cmd;
@@ -165,17 +165,14 @@ struct ibv_mr *qelr_reg_mr(struct ibv_pd *ibpd, void *addr,
 	struct qelr_pd *pd = get_qelr_pd(ibpd);
 	struct qelr_devctx *cxt = get_qelr_ctx(ibpd->context);
 
-	uint64_t hca_va = (uintptr_t) addr;
-
 	mr = malloc(sizeof(*mr));
 	if (!mr)
 		return NULL;
 
 	bzero(mr, sizeof(*mr));
 
-	if (ibv_cmd_reg_mr(ibpd, addr, len, hca_va,
-			   access, &mr->vmr, &cmd, sizeof(cmd),
-			   &resp.ibv_resp, sizeof(resp))) {
+	if (ibv_cmd_reg_mr(ibpd, addr, len, hca_va, access, &mr->vmr, &cmd,
+			   sizeof(cmd), &resp.ibv_resp, sizeof(resp))) {
 		free(mr);
 		return NULL;
 	}
diff --git a/providers/qedr/qelr_verbs.h b/providers/qedr/qelr_verbs.h
index cf2ce047..d0eacbfe 100644
--- a/providers/qedr/qelr_verbs.h
+++ b/providers/qedr/qelr_verbs.h
@@ -48,8 +48,8 @@ int qelr_query_port(struct ibv_context *context, uint8_t port,
 struct ibv_pd *qelr_alloc_pd(struct ibv_context *context);
 int qelr_dealloc_pd(struct ibv_pd *ibpd);
 
-struct ibv_mr *qelr_reg_mr(struct ibv_pd *ibpd, void *addr,
-			   size_t len, int access);
+struct ibv_mr *qelr_reg_mr(struct ibv_pd *ibpd, void *addr, size_t len,
+			   uint64_t hca_va, int access);
 int qelr_dereg_mr(struct verbs_mr *mr);
 
 struct ibv_cq *qelr_create_cq(struct ibv_context *context, int cqe,
diff --git a/providers/rxe/rxe.c b/providers/rxe/rxe.c
index 2efb646c..4e05d5b9 100644
--- a/providers/rxe/rxe.c
+++ b/providers/rxe/rxe.c
@@ -123,7 +123,7 @@ static int rxe_dealloc_pd(struct ibv_pd *pd)
 }
 
 static struct ibv_mr *rxe_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
-				 int access)
+				 uint64_t hca_va, int access)
 {
 	struct verbs_mr *vmr;
 	struct ibv_reg_mr cmd;
@@ -134,8 +134,8 @@ static struct ibv_mr *rxe_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
 	if (!vmr)
 		return NULL;
 
-	ret = ibv_cmd_reg_mr(pd, addr, length, (uintptr_t)addr, access, vmr,
-			     &cmd, sizeof cmd, &resp, sizeof resp);
+	ret = ibv_cmd_reg_mr(pd, addr, length, hca_va, access, vmr, &cmd,
+			     sizeof(cmd), &resp, sizeof(resp));
 	if (ret) {
 		free(vmr);
 		return NULL;
diff --git a/providers/siw/siw.c b/providers/siw/siw.c
index 41f33fa1..c1acf398 100644
--- a/providers/siw/siw.c
+++ b/providers/siw/siw.c
@@ -96,7 +96,7 @@ static int siw_free_pd(struct ibv_pd *pd)
 }
 
 static struct ibv_mr *siw_reg_mr(struct ibv_pd *pd, void *addr, size_t len,
-				 int access)
+				 uint64_t hca_va, int access)
 {
 	struct siw_cmd_reg_mr cmd = {};
 	struct siw_cmd_reg_mr_resp resp = {};
@@ -107,7 +107,7 @@ static struct ibv_mr *siw_reg_mr(struct ibv_pd *pd, void *addr, size_t len,
 	if (!mr)
 		return NULL;
 
-	rv = ibv_cmd_reg_mr(pd, addr, len, (uintptr_t)addr, access,
+	rv = ibv_cmd_reg_mr(pd, addr, len, hca_va, access,
 			    &mr->base_mr, &cmd.ibv_cmd, sizeof(cmd),
 			    &resp.ibv_resp, sizeof(resp));
 	if (rv) {
diff --git a/providers/vmw_pvrdma/pvrdma.h b/providers/vmw_pvrdma/pvrdma.h
index ebd50ce1..d90bd809 100644
--- a/providers/vmw_pvrdma/pvrdma.h
+++ b/providers/vmw_pvrdma/pvrdma.h
@@ -281,8 +281,8 @@ int pvrdma_query_port(struct ibv_context *context, uint8_t port,
 struct ibv_pd *pvrdma_alloc_pd(struct ibv_context *context);
 int pvrdma_free_pd(struct ibv_pd *pd);
 
-struct ibv_mr *pvrdma_reg_mr(struct ibv_pd *pd, void *addr,
-			     size_t length, int access);
+struct ibv_mr *pvrdma_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
+			     uint64_t hca_va, int access);
 int pvrdma_dereg_mr(struct verbs_mr *mr);
 
 struct ibv_cq *pvrdma_create_cq(struct ibv_context *context, int cqe,
diff --git a/providers/vmw_pvrdma/verbs.c b/providers/vmw_pvrdma/verbs.c
index e27952bf..e8423c01 100644
--- a/providers/vmw_pvrdma/verbs.c
+++ b/providers/vmw_pvrdma/verbs.c
@@ -112,7 +112,7 @@ int pvrdma_free_pd(struct ibv_pd *pd)
 }
 
 struct ibv_mr *pvrdma_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
-			     int access)
+			     uint64_t hca_va, int access)
 {
 	struct verbs_mr *vmr;
 	struct ibv_reg_mr cmd;
@@ -123,9 +123,8 @@ struct ibv_mr *pvrdma_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
 	if (!vmr)
 		return NULL;
 
-	ret = ibv_cmd_reg_mr(pd, addr, length, (uintptr_t) addr,
-			     access, vmr, &cmd, sizeof(cmd),
-			     &resp, sizeof(resp));
+	ret = ibv_cmd_reg_mr(pd, addr, length, hca_va, access, vmr, &cmd,
+			     sizeof(cmd), &resp, sizeof(resp));
 	if (ret) {
 		free(vmr);
 		return NULL;
-- 
2.20.1


^ permalink raw reply related

* [GIT PULL] Please pull RDMA subsystem changes
From: Jason Gunthorpe @ 2019-07-15 15:26 UTC (permalink / raw)
  To: Linus Torvalds, Doug Ledford
  Cc: linux-rdma@vger.kernel.org, linux-kernel@vger.kernel.org

[-- Attachment #1: Type: text/plain, Size: 29247 bytes --]

Hi Linus,

These are the proposed RDMA patches for 5.3

This has been a more exciting merge window than usual, the late merge of the
new siw driver, combined with the absence of the 0-Day testing service and the
July long weekend created a stream of build fixes for siw this week. I'm
hoping we got them all, but if there are more I will send them for rc1.

There is one non-textual conflict with an API change in netdev that will
require this patch in the merge commit to resolve:

diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c
index 43f7f12e5f7f81..a7cde98e73e8c8 100644
--- a/drivers/infiniband/sw/siw/siw_cm.c
+++ b/drivers/infiniband/sw/siw/siw_cm.c
@@ -1963,6 +1963,7 @@ int siw_create_listen(struct iw_cm_id *id, int backlog)
 	if (id->local_addr.ss_family == AF_INET) {
 		struct in_device *in_dev = in_dev_get(dev);
 		struct sockaddr_in s_laddr, *s_raddr;
+		const struct in_ifaddr *ifa;

 		memcpy(&s_laddr, &id->local_addr, sizeof(s_laddr));
 		s_raddr = (struct sockaddr_in *)&id->remote_addr;
@@ -1973,8 +1974,7 @@ int siw_create_listen(struct iw_cm_id *id, int backlog)
 			&s_raddr->sin_addr, ntohs(s_raddr->sin_port));

 		rtnl_lock();
-		for_ifa(in_dev)
-		{
+		in_dev_for_each_ifa_rtnl(ifa, in_dev) {
 			if (ipv4_is_zeronet(s_laddr.sin_addr.s_addr) ||
 			    s_laddr.sin_addr.s_addr == ifa->ifa_address) {
 				s_laddr.sin_addr.s_addr = ifa->ifa_address;
@@ -1986,7 +1986,6 @@ int siw_create_listen(struct iw_cm_id *id, int backlog)
 					listeners++;
 			}
 		}
-		endfor_ifa(in_dev);
 		rtnl_unlock();
 		in_dev_put(in_dev);
 	} else if (id->local_addr.ss_family == AF_INET6) {

The tag for-linus-merged with my merge resolution to your tree is also available to pull.

The following changes since commit f8efee08dd9d41ab71010e9b16c9ead51753b7d6:

  net/mlx5: Add rts2rts_qp_counters_set_id field in hca cap (2019-07-04 21:36:33 +0300)

are available in the Git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git tags/for-linus

for you to fetch changes up to 0b043644c0ca601cb19943a81aa1f1455dbe9461:

  RMDA/siw: Require a 64 bit arch (2019-07-12 12:12:06 -0300)

----------------------------------------------------------------
5.3 Merge window RDMA pull request

A smaller cycle this time. Notably we see another new driver, 'Soft
iWarp', and the deletion of an ancient unused driver for nes.

- Revise and simplify the signature offload RDMA MR APIs

- More progress on hoisting object allocation boiler plate code out of the
  drivers

- Driver bug fixes and revisions for hns, hfi1, efa, cxgb4, qib, i40iw

- Tree wide cleanups: struct_size, put_user_page, xarray, rst doc conversion

- Removal of obsolete ib_ucm chardev and nes driver

- netlink based discovery of chardevs and autoloading of the modules
  providing them

- Move more of the rdamvt/hfi1 uapi to include/uapi/rdma

- New driver 'siw' for software based iWarp running on top of netdev,
  much like rxe's software RoCE.

- mlx5 feature to report events in their raw devx format to userspace

- Expose per-object counters through rdma tool

- Adaptive interrupt moderation for RDMA (DIM), sharing the DIM core
  from netdev

----------------------------------------------------------------
Bart Van Assche (1):
      RDMA/srp: Accept again source addresses that do not have a port number

Bernard Metzler (12):
      rdma/siw: iWarp wire packet format
      rdma/siw: main include file
      rdma/siw: network and RDMA core interface
      rdma/siw: connection management
      rdma/siw: application interface
      rdma/siw: application buffer management
      rdma/siw: queue pair methods
      rdma/siw: transmit path
      rdma/siw: receive path
      rdma/siw: completion queue methods
      rdma/siw: addition to kernel build environment
      RDMA/siw: Remove unnecessary kthread create/destroy printouts

Colin Ian King (4):
      RDMA/hns: fix inverted logic of readl read and shift
      RDMA/hns: fix potential integer overflow on left shift
      RDMA/hns: fix spelling mistake "attatch" -> "attach"
      RDMA/uverbs: remove redundant assignment to variable ret

Dag Moxnes (1):
      RDMA/core: Fix race when resolving IP address

Dan Carpenter (2):
      RDMA/uverbs: check for allocation failure in uapi_add_elm()
      RDMA/hns: Fix an error code in hns_roce_set_user_sq_size()

Daniel Kranzdorf (1):
      RDMA/efa: Entropy in admin commands id

Danit Goldberg (1):
      IB/mlx5: Report correctly tag matching rendezvous capability

Dennis Dalessandro (2):
      IB/hfi1: Remove extra brackets from an if
      IB/hfi1: No need to use try_module_get for debugfs

Doug Ledford (3):
      Merge remote-tracking branch 'mlx5-next/mlx5-next' into HEAD
      RDMA/netlink: Resort policy array
      RDMA/netlink: Audit policy settings for netlink attributes

Firas Jahjah (1):
      RDMA/efa: Print address on AH creation failure

Fuqian Huang (3):
      IB: Remove unneeded memset
      IB/ipoib: Remove memset after vzalloc in ipoib_cm.c
      IB/i40iw: Use kmemdup rather than open coding

Gal Pressman (6):
      RDMA/efa: Use kvzalloc instead of kzalloc with fallback
      RDMA/efa: Remove unneeded admin commands abort flow
      RDMA/efa: Use rdma block iterator in chunk list creation
      RDMA/efa: Remove unused includes
      RDMA/efa: Use API to get contiguous memory blocks aligned to device supported page size
      RDMA/efa: Be consistent with success flow return value

Geert Uytterhoeven (2):
      IB/hfi1: Spelling s/statisfied/satisfied/
      rdma/siw: Add missing dependencies on LIBCRC32C and DMA_VIRT_OPS

Gustavo A. R. Silva (5):
      IB/rdmavt: Use struct_size() helper
      IB/qib: Use struct_size() helper
      IB/hfi1: Use struct_size() helper
      RDMA/ucma: Use struct_size() helper
      RDMA/siw: Mark expected switch fall-throughs

Israel Rukshin (15):
      IB/iser: Refactor iscsi_iser_check_protection function
      IB/iser: Remove unused sig_attrs argument
      IB/isert: Remove unused sig_attrs argument
      RDMA/rw: Fix doc typo
      RDMA/rw: Print the correct number of sig MRs
      RDMA/core: Fix doc typo
      RDMA/core: Introduce IB_MR_TYPE_INTEGRITY and ib_alloc_mr_integrity API
      IB/iser: Use IB_WR_REG_MR_INTEGRITY for PI handover
      IB/iser: Unwind WR union at iser_tx_desc
      RDMA/core: Add an integrity MR pool support
      RDMA/core: Rename signature qp create flag and signature device capability
      RDMA/rw: Introduce rdma_rw_inv_key helper
      RDMA/rw: Use IB_WR_REG_MR_INTEGRITY for PI handover
      RDMA/mlx5: Remove unused IB_WR_REG_SIG_MR code
      RDMA/mlx5: Improve PI handover performance

Jason Gunthorpe (23):
      RDMA/umem: Move page_shift from ib_umem to ib_odp_umem
      rdma: Delete the ib_ucm module
      RDMA: Move driver_id into struct ib_device_ops
      RDMA: Move uverbs_abi_ver into struct ib_device_ops
      RDMA: Move owner into struct ib_device_ops
      rdma: Remove nes
      RDMA: Move rdma_node_type to uapi/
      RDMA: Add NLDEV_GET_CHARDEV to allow char dev discovery and autoload
      RDMA: Report available cdevs through RDMA_NLDEV_CMD_GET_CHARDEV
      RDMA/odp: Fix missed unlock in non-blocking invalidate_start
      RDMA/uverbs: Use offsetofend instead of opencoding
      RDMA/odp: Do not leak dma maps when working with huge pages
      Merge tag 'v5.2-rc6' into rdma.git for-next
      Merge branch 'siw' into rdma.git for-next
      Merge mlx5-next into rdma for-next
      Merge mlx5-next into rdma for-next
      RDMA/siw: Fix DEFINE_PER_CPU compilation when ARCH_NEEDS_WEAK_PER_CPU
      RDMA/rvt: Do not use a kernel header in the ABI
      Merge branch 'vhca-tunnel' into rdma.git for-next
      Merge tag 'blk-dim-v2' into rdma.git for-next
      RDMA/core: Make rdma_counter.h compile stand alone
      RDMA/siw: Add missing rtnl_lock around access to ifa
      RMDA/siw: Require a 64 bit arch

John Hubbard (1):
      RDMA: Convert put_page() to put_user_page*()

Kamal Heib (3):
      RDMA/core: Return void from ib_device_check_mandatory()
      RDMA/ipoib: implement ethtool .get_link() callback
      RDMA/ipoib: Remove check for ETH_SS_TEST

Kamenee Arumugam (4):
      IB/hfi1: Move rvt_cq_wc struct into uapi directory
      IB/hfi1: Move receive work queue struct into uapi directory
      IB/rdmavt: Fracture single lock used for posting and processing RWQEs
      IB/{hfi1, qib, rdmavt}: Put qp in error state when cq is full

Konstantin Taranov (1):
      RDMA/rxe: Fill in wc byte_len with IB_WC_RECV_RDMA_WITH_IMM

Lang Cheng (6):
      RDMA/hns: Move spin_lock_irqsave to the correct place
      RDMA/hns: Remove jiffies operation in disable interrupt context
      RDMA/hns: reset function when removing module
      RDMA/hns: Set reset flag when hw resetting
      RDMA/hns: Use %pK format pointer print
      RDMA/hns: Clean up unnecessary variable initialization

Leon Romanovsky (20):
      rds: Don't check return value from destroy CQ
      RDMA/ipoib: Remove check of destroy CQ
      RDMA/core: Make ib_destroy_cq() void
      RDMA/nes: Remove useless NULL checks
      RDMA/i40iw: Remove useless NULL checks
      RDMA/nes: Remove second wait queue initialization call
      RDMA/efa: Remove check that prevents destroy of resources in error flows
      RDMA/cxgb3: Use sizeof() notation instead of plain sizeof
      RDMA/cxgb3: Don't expose DMA addresses
      RDMA/cxgb3: Delete and properly mark unimplemented resize CQ function
      RDMA/cxgb4: Use sizeof() notation
      RDMA/cxgb4: Don't expose DMA addresses
      RDMA/nes: Avoid memory allocation during CQ destroy
      RDMA: Clean destroy CQ in drivers do not return errors
      RDMA: Convert CQ allocations to be under core responsibility
      RDMA: Convert destroy_wq to be void
      RDMA: Check umem pointer validity prior to release
      RDMa/hns: Don't stuck in endless timeout loop
      RDMA/mlx5: Use proper allocation API to get zeroed memory
      RDMA/mlx5: Set RDMA DIM to be enabled by default

Lijun Ou (11):
      RDMA/hns: Update CQE specifications
      RDMA/hns: Replace magic numbers with #defines
      RDMA/hns: Bugfix for posting multiple srq work request
      RDMA/hns: Bugfix for filling the sge of srq
      RDMA/hns: Add mtr support for mixed multihop addressing
      RDMA/hns: Add a group interfaces for optimizing buffers getting flow
      RDMA/hns: Fix bug when wqe num is larger than 16K
      RDMA/hns: Cleanup unnecessary exported symbols
      RDMA/hns: Fix building modular hns
      RDMA/hns: Bugfix for cleaning mtr
      RDMA/hns: Bugfix for calculating qp buffer size

Liu, Changcheng (1):
      RDMA/i40iw: Set queue pair state when being queried

Maksym Planeta (1):
      ibverbs/rxe: Remove variable self-initialization

Maor Gottlieb (2):
      RDMA/mlx5: Consider eswitch encap mode
      RDMA/mlx5: Enable decap and packet reformat on FDB

Mark Zhang (16):
      RDMA/restrack: Introduce statistic counter
      RDMA/restrack: Add an API to attach a task to a resource
      RDMA/restrack: Make is_visible_in_pid_ns() as an API
      RDMA/counter: Add set/clear per-port auto mode support
      RDMA/counter: Add "auto" configuration mode support
      IB/mlx5: Support set qp counter
      IB/mlx5: Add counter set id as a parameter for mlx5_ib_query_q_counters()
      IB/mlx5: Support statistic q counter configuration
      RDMA/nldev: Allow counter auto mode configration through RDMA netlink
      RDMA/netlink: Implement counter dumpit calback
      IB/mlx5: Add counter_alloc_stats() and counter_update_stats() support
      RDMA/core: Get sum value of all counters when perform a sysfs stat read
      RDMA/counter: Allow manual mode configuration support
      RDMA/nldev: Allow counter manual mode configration through RDMA netlink
      RDMA/nldev: Allow get counter mode through RDMA netlink
      RDMA/nldev: Allow get default counter statistics through RDMA netlink

Matthew Wilcox (2):
      ucma: Convert multicast_idr to XArray
      ucma: Convert ctx_idr to XArray

Mauro Carvalho Chehab (2):
      docs: infiniband: convert docs to ReST and rename to *.rst
      docs: infiniband: add it to the driver-api bookset

Max Gurtovoy (15):
      RDMA/rw: Add info regarding SG count failure
      RDMA/core: Introduce new header file for signature operations
      RDMA/core: Save the MR type in the ib_mr structure
      RDMA/core: Introduce ib_map_mr_sg_pi to map data/protection sgl's
      RDMA/core: Add signature attrs element for ib_mr structure
      RDMA/mlx5: Implement mlx5_ib_map_mr_sg_pi and mlx5_ib_alloc_mr_integrity
      RDMA/mlx5: Add attr for max number page list length for PI operation
      RDMA/mlx5: Pass UMR segment flags instead of boolean
      RDMA/mlx5: Update set_sig_data_segment attribute for new signature API
      RDMA/mlx5: Introduce and implement new IB_WR_REG_MR_INTEGRITY work request
      RDMA/core: Validate integrity handover device cap
      RDMA/mlx5: Use PA mapping for PI handover
      RDMA/mlx5: Refactor MR descriptors allocation
      net/mlx5: Introduce VHCA tunnel device capability
      IB/mlx5: Implement VHCA tunnel mechanism in DEVX

Michael J. Ruhl (4):
      IB/rdmavt: Set QP allowed opcodes after QP allocation
      IB/{rdmavt, hfi1, qib}: Remove AH refcount for UD QPs
      IB/{rdmavt, hfi1, qib}: Add helpers to hide SWQE WR details
      IB/hfi1: Reduce excessive aspm inlines

Mike Marciniszyn (5):
      IB/rdmavt: Add new completion inline
      IB/{rdmavt, qib, hfi1}: Convert to new completion API
      IB/hfi1: Add missing INVALIDATE opcodes for trace
      IB/rdmavt: Enhance trace information for FRWR debug
      IB/rdmavt: Add trace for map_mr_sg

Nathan Chancellor (2):
      IB/rdmavt: Fix variable shadowing issue in rvt_create_cq
      rdma/siw: Use proper enumerated type in map_cqe_status

Nirranjan Kirubaharan (1):
      iw_cxgb4: Fix qpid leak

Parav Pandit (2):
      IB/mlx5: Fixed reporting counters on 2nd port for Dual port RoCE
      IB/core: Work on the caller socket net namespace in nldev_newlink()

Qian Cai (1):
      RDMA/core: Fix -Wunused-const-variable warnings

Sagiv Ozeri (1):
      RDMA/qedr: Fix incorrect device rate.

Valentine Fatiev (1):
      IB/ipoib: Add child to parent list only if device initialized

Xi Wang (1):
      RDMA/hns: Fixs hw access invalid dma memory error

Yamin Friedman (3):
      linux/dim: Implement RDMA adaptive moderation (DIM)
      RDMA/core: Provide RDMA DIM support for ULPs
      RDMA/nldev: Added configuration of RDMA dynamic interrupt moderation to netlink

Yangyang Li (1):
      RDMA/hns: Modify ba page size for cqe

Yishai Hadas (6):
      IB/mlx5: Introduce MLX5_IB_OBJECT_DEVX_ASYNC_EVENT_FD
      IB/mlx5: Register DEVX with mlx5_core to get async events
      IB/mlx5: Enable subscription for device events over DEVX
      IB/mlx5: Implement DEVX dispatching event
      IB/mlx5: Add DEVX support for CQ events
      IB/mlx5: DEVX cleanup mdev

Yixian Liu (1):
      RDMA/hns: Remove unnecessary print message in aeq

YueHaibing (3):
      IB/hfi1: Remove set but not used variables 'offset' and 'fspsn'
      RDMA/hns: Remove set but not used variable 'fclr_write_fail_flag'
      rdma/siw: Remove set but not used variable 's'

Yuval Shaia (1):
      IB/mlx4: Delete unused func arg

chenglang (1):
      RDMA/hns: Fixup qp release bug

 Documentation/ABI/stable/sysfs-class-infiniband    |   17 -
 Documentation/index.rst                            |    1 +
 .../{core_locking.txt => core_locking.rst}         |   64 +-
 Documentation/infiniband/index.rst                 |   23 +
 Documentation/infiniband/{ipoib.txt => ipoib.rst}  |   24 +-
 .../infiniband/{opa_vnic.txt => opa_vnic.rst}      |  110 +-
 Documentation/infiniband/{sysfs.txt => sysfs.rst}  |    4 +-
 .../{tag_matching.txt => tag_matching.rst}         |    5 +
 .../infiniband/{user_mad.txt => user_mad.rst}      |   33 +-
 .../infiniband/{user_verbs.txt => user_verbs.rst}  |   12 +-
 MAINTAINERS                                        |   15 +-
 drivers/infiniband/Kconfig                         |   14 +-
 drivers/infiniband/core/Makefile                   |    5 +-
 drivers/infiniband/core/addr.c                     |    2 +-
 drivers/infiniband/core/core_priv.h                |   10 +
 drivers/infiniband/core/counters.c                 |  634 ++++
 drivers/infiniband/core/cq.c                       |   95 +-
 drivers/infiniband/core/device.c                   |  150 +-
 drivers/infiniband/core/mr_pool.c                  |    8 +-
 drivers/infiniband/core/nldev.c                    |  800 +++-
 drivers/infiniband/core/restrack.c                 |   49 +-
 drivers/infiniband/core/restrack.h                 |    3 +
 drivers/infiniband/core/rw.c                       |  201 +-
 drivers/infiniband/core/sysfs.c                    |   16 +-
 drivers/infiniband/core/ucm.c                      | 1350 -------
 drivers/infiniband/core/ucma.c                     |  114 +-
 drivers/infiniband/core/umem.c                     |   13 +-
 drivers/infiniband/core/umem_odp.c                 |  106 +-
 drivers/infiniband/core/user_mad.c                 |   53 +-
 drivers/infiniband/core/uverbs_cmd.c               |   28 +-
 drivers/infiniband/core/uverbs_main.c              |   40 +-
 drivers/infiniband/core/uverbs_std_types_cq.c      |   19 +-
 drivers/infiniband/core/uverbs_std_types_mr.c      |    1 +
 drivers/infiniband/core/uverbs_uapi.c              |    4 +-
 drivers/infiniband/core/verbs.c                    |  165 +-
 drivers/infiniband/hw/Makefile                     |    1 -
 drivers/infiniband/hw/bnxt_re/ib_verbs.c           |   66 +-
 drivers/infiniband/hw/bnxt_re/ib_verbs.h           |    9 +-
 drivers/infiniband/hw/bnxt_re/main.c               |    8 +-
 drivers/infiniband/hw/cxgb3/cxio_hal.c             |   33 +-
 drivers/infiniband/hw/cxgb3/cxio_hal.h             |    3 +-
 drivers/infiniband/hw/cxgb3/iwch_cm.c              |    2 +-
 drivers/infiniband/hw/cxgb3/iwch_provider.c        |  160 +-
 drivers/infiniband/hw/cxgb4/cm.c                   |   21 +-
 drivers/infiniband/hw/cxgb4/cq.c                   |   55 +-
 drivers/infiniband/hw/cxgb4/device.c               |    9 +-
 drivers/infiniband/hw/cxgb4/iw_cxgb4.h             |   11 +-
 drivers/infiniband/hw/cxgb4/mem.c                  |    8 +-
 drivers/infiniband/hw/cxgb4/provider.c             |    9 +-
 drivers/infiniband/hw/cxgb4/qp.c                   |   95 +-
 drivers/infiniband/hw/cxgb4/resource.c             |   16 +-
 drivers/infiniband/hw/efa/efa.h                    |    9 +-
 drivers/infiniband/hw/efa/efa_com.c                |  118 +-
 drivers/infiniband/hw/efa/efa_com.h                |    1 -
 drivers/infiniband/hw/efa/efa_com_cmd.c            |    8 +-
 drivers/infiniband/hw/efa/efa_main.c               |   10 +-
 drivers/infiniband/hw/efa/efa_verbs.c              |  248 +-
 drivers/infiniband/hw/hfi1/Makefile                |    1 +
 drivers/infiniband/hw/hfi1/aspm.c                  |  270 ++
 drivers/infiniband/hw/hfi1/aspm.h                  |  262 +-
 drivers/infiniband/hw/hfi1/debugfs.c               |    5 +-
 drivers/infiniband/hw/hfi1/mad.c                   |    9 +-
 drivers/infiniband/hw/hfi1/pcie.c                  |    6 +-
 drivers/infiniband/hw/hfi1/pio.c                   |    3 +-
 drivers/infiniband/hw/hfi1/qp.c                    |    8 +-
 drivers/infiniband/hw/hfi1/rc.c                    |   29 +-
 drivers/infiniband/hw/hfi1/tid_rdma.c              |    7 +-
 drivers/infiniband/hw/hfi1/trace_ibhdrs.h          |    2 +
 drivers/infiniband/hw/hfi1/uc.c                    |    3 +-
 drivers/infiniband/hw/hfi1/ud.c                    |   36 +-
 drivers/infiniband/hw/hfi1/user_pages.c            |   11 +-
 drivers/infiniband/hw/hfi1/verbs.c                 |    6 +-
 drivers/infiniband/hw/hns/Kconfig                  |   15 +-
 drivers/infiniband/hw/hns/Makefile                 |   15 +-
 drivers/infiniband/hw/hns/hns_roce_alloc.c         |  101 +-
 drivers/infiniband/hw/hns/hns_roce_cmd.c           |    6 +-
 drivers/infiniband/hw/hns/hns_roce_cq.c            |   81 +-
 drivers/infiniband/hw/hns/hns_roce_db.c            |   12 +-
 drivers/infiniband/hw/hns/hns_roce_device.h        |  108 +-
 drivers/infiniband/hw/hns/hns_roce_hem.c           |  504 ++-
 drivers/infiniband/hw/hns/hns_roce_hem.h           |   16 +-
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c         |   79 +-
 drivers/infiniband/hw/hns/hns_roce_hw_v2.c         |  280 +-
 drivers/infiniband/hw/hns/hns_roce_hw_v2.h         |   23 +-
 drivers/infiniband/hw/hns/hns_roce_main.c          |   31 +-
 drivers/infiniband/hw/hns/hns_roce_mr.c            |  166 +-
 drivers/infiniband/hw/hns/hns_roce_pd.c            |    4 +-
 drivers/infiniband/hw/hns/hns_roce_qp.c            |  220 +-
 drivers/infiniband/hw/hns/hns_roce_srq.c           |   40 +-
 drivers/infiniband/hw/i40iw/i40iw_cm.c             |    4 +-
 drivers/infiniband/hw/i40iw/i40iw_verbs.c          |   56 +-
 drivers/infiniband/hw/mlx4/cq.c                    |   43 +-
 drivers/infiniband/hw/mlx4/main.c                  |   21 +-
 drivers/infiniband/hw/mlx4/mlx4_ib.h               |    9 +-
 drivers/infiniband/hw/mlx4/mr.c                    |   16 +-
 drivers/infiniband/hw/mlx4/qp.c                    |   11 +-
 drivers/infiniband/hw/mlx4/srq.c                   |    9 +-
 drivers/infiniband/hw/mlx5/cq.c                    |   56 +-
 drivers/infiniband/hw/mlx5/devx.c                  | 1053 +++++-
 drivers/infiniband/hw/mlx5/mad.c                   |   60 +-
 drivers/infiniband/hw/mlx5/main.c                  |  157 +-
 drivers/infiniband/hw/mlx5/mem.c                   |   20 +-
 drivers/infiniband/hw/mlx5/mlx5_ib.h               |   47 +-
 drivers/infiniband/hw/mlx5/mr.c                    |  554 ++-
 drivers/infiniband/hw/mlx5/odp.c                   |   23 +-
 drivers/infiniband/hw/mlx5/qp.c                    |  312 +-
 drivers/infiniband/hw/mthca/mthca_allocator.c      |    2 -
 drivers/infiniband/hw/mthca/mthca_memfree.c        |    6 +-
 drivers/infiniband/hw/mthca/mthca_provider.c       |   52 +-
 drivers/infiniband/hw/nes/Kconfig                  |   16 -
 drivers/infiniband/hw/nes/Makefile                 |    4 -
 drivers/infiniband/hw/nes/nes.c                    | 1211 ------
 drivers/infiniband/hw/nes/nes.h                    |  574 ---
 drivers/infiniband/hw/nes/nes_cm.c                 | 3992 --------------------
 drivers/infiniband/hw/nes/nes_cm.h                 |  470 ---
 drivers/infiniband/hw/nes/nes_context.h            |  193 -
 drivers/infiniband/hw/nes/nes_hw.c                 | 3887 -------------------
 drivers/infiniband/hw/nes/nes_hw.h                 | 1380 -------
 drivers/infiniband/hw/nes/nes_mgt.c                | 1155 ------
 drivers/infiniband/hw/nes/nes_mgt.h                |   97 -
 drivers/infiniband/hw/nes/nes_nic.c                | 1870 ---------
 drivers/infiniband/hw/nes/nes_utils.c              |  916 -----
 drivers/infiniband/hw/nes/nes_verbs.c              | 3759 ------------------
 drivers/infiniband/hw/nes/nes_verbs.h              |  198 -
 drivers/infiniband/hw/ocrdma/ocrdma_hw.c           |   11 +-
 drivers/infiniband/hw/ocrdma/ocrdma_hw.h           |    2 +-
 drivers/infiniband/hw/ocrdma/ocrdma_main.c         |    8 +-
 drivers/infiniband/hw/ocrdma/ocrdma_verbs.c        |   38 +-
 drivers/infiniband/hw/ocrdma/ocrdma_verbs.h        |    7 +-
 drivers/infiniband/hw/qedr/main.c                  |    8 +-
 drivers/infiniband/hw/qedr/verbs.c                 |   82 +-
 drivers/infiniband/hw/qedr/verbs.h                 |    7 +-
 drivers/infiniband/hw/qib/qib_qp.c                 |    4 +-
 drivers/infiniband/hw/qib/qib_rc.c                 |   29 +-
 drivers/infiniband/hw/qib/qib_uc.c                 |    3 +-
 drivers/infiniband/hw/qib/qib_ud.c                 |   28 +-
 drivers/infiniband/hw/qib/qib_user_pages.c         |   11 +-
 drivers/infiniband/hw/qib/qib_user_sdma.c          |   11 +-
 drivers/infiniband/hw/qib/qib_verbs.c              |    6 +-
 drivers/infiniband/hw/usnic/usnic_ib.h             |    4 +
 drivers/infiniband/hw/usnic/usnic_ib_main.c        |    8 +-
 drivers/infiniband/hw/usnic/usnic_ib_verbs.c       |   22 +-
 drivers/infiniband/hw/usnic/usnic_ib_verbs.h       |    7 +-
 drivers/infiniband/hw/usnic/usnic_uiom.c           |    7 +-
 drivers/infiniband/hw/vmw_pvrdma/pvrdma.h          |    2 +-
 drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c       |   46 +-
 drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c     |    8 +-
 drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c       |    3 +-
 drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c       |   16 +-
 drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h    |    7 +-
 drivers/infiniband/sw/Makefile                     |    1 +
 drivers/infiniband/sw/rdmavt/ah.c                  |    6 +-
 drivers/infiniband/sw/rdmavt/cq.c                  |  250 +-
 drivers/infiniband/sw/rdmavt/cq.h                  |    7 +-
 drivers/infiniband/sw/rdmavt/mr.c                  |    6 +-
 drivers/infiniband/sw/rdmavt/qp.c                  |  402 +-
 drivers/infiniband/sw/rdmavt/qp.h                  |    2 +
 drivers/infiniband/sw/rdmavt/rc.c                  |   41 +-
 drivers/infiniband/sw/rdmavt/srq.c                 |   69 +-
 drivers/infiniband/sw/rdmavt/trace_mr.h            |   56 +-
 drivers/infiniband/sw/rdmavt/vt.c                  |    7 +-
 drivers/infiniband/sw/rdmavt/vt.h                  |    9 +
 drivers/infiniband/sw/rxe/rxe_comp.c               |    2 +-
 drivers/infiniband/sw/rxe/rxe_mr.c                 |    3 +-
 drivers/infiniband/sw/rxe/rxe_pool.c               |    1 +
 drivers/infiniband/sw/rxe/rxe_resp.c               |    5 +-
 drivers/infiniband/sw/rxe/rxe_verbs.c              |   40 +-
 drivers/infiniband/sw/rxe/rxe_verbs.h              |    3 +-
 drivers/infiniband/sw/siw/Kconfig                  |   18 +
 drivers/infiniband/sw/siw/Makefile                 |   11 +
 drivers/infiniband/sw/siw/iwarp.h                  |  380 ++
 drivers/infiniband/sw/siw/siw.h                    |  745 ++++
 drivers/infiniband/sw/siw/siw_cm.c                 | 2070 ++++++++++
 drivers/infiniband/sw/siw/siw_cm.h                 |  133 +
 drivers/infiniband/sw/siw/siw_cq.c                 |  101 +
 drivers/infiniband/sw/siw/siw_main.c               |  685 ++++
 drivers/infiniband/sw/siw/siw_mem.c                |  460 +++
 drivers/infiniband/sw/siw/siw_mem.h                |   74 +
 drivers/infiniband/sw/siw/siw_qp.c                 | 1322 +++++++
 drivers/infiniband/sw/siw/siw_qp_rx.c              | 1458 +++++++
 drivers/infiniband/sw/siw/siw_qp_tx.c              | 1269 +++++++
 drivers/infiniband/sw/siw/siw_verbs.c              | 1760 +++++++++
 drivers/infiniband/sw/siw/siw_verbs.h              |   91 +
 drivers/infiniband/ulp/ipoib/Kconfig               |    2 +-
 drivers/infiniband/ulp/ipoib/ipoib_cm.c            |    1 -
 drivers/infiniband/ulp/ipoib/ipoib_ethtool.c       |    3 +-
 drivers/infiniband/ulp/ipoib/ipoib_main.c          |   34 +-
 drivers/infiniband/ulp/ipoib/ipoib_verbs.c         |    7 +-
 drivers/infiniband/ulp/iser/iscsi_iser.c           |   12 +-
 drivers/infiniband/ulp/iser/iscsi_iser.h           |   64 +-
 drivers/infiniband/ulp/iser/iser_initiator.c       |   12 +-
 drivers/infiniband/ulp/iser/iser_memory.c          |  121 +-
 drivers/infiniband/ulp/iser/iser_verbs.c           |  156 +-
 drivers/infiniband/ulp/isert/ib_isert.c            |   19 +-
 drivers/infiniband/ulp/srp/ib_srp.c                |   21 +-
 drivers/nvme/host/rdma.c                           |    2 +-
 include/linux/dim.h                                |   23 +
 include/linux/mlx5/mlx5_ifc.h                      |    6 +-
 include/linux/mlx5/qp.h                            |    4 +-
 include/rdma/ib_umem.h                             |   19 +-
 include/rdma/ib_umem_odp.h                         |   20 +
 include/rdma/ib_verbs.h                            |  247 +-
 include/rdma/mr_pool.h                             |    2 +-
 include/rdma/rdma_counter.h                        |   65 +
 include/rdma/rdma_netlink.h                        |    8 +
 include/rdma/rdma_vt.h                             |    5 +-
 include/rdma/rdmavt_cq.h                           |   25 +-
 include/rdma/rdmavt_qp.h                           |  312 +-
 include/rdma/restrack.h                            |    9 +-
 include/rdma/rw.h                                  |    9 -
 include/rdma/signature.h                           |  122 +
 include/uapi/rdma/ib_user_cm.h                     |  326 --
 include/uapi/rdma/mlx5_user_ioctl_cmds.h           |   19 +
 include/uapi/rdma/mlx5_user_ioctl_verbs.h          |    9 +
 include/uapi/rdma/rdma_netlink.h                   |   86 +-
 include/uapi/rdma/rdma_user_ioctl_cmds.h           |    1 +
 include/uapi/rdma/rvt-abi.h                        |   66 +
 include/uapi/rdma/siw-abi.h                        |  185 +
 lib/dim/Makefile                                   |    6 +-
 lib/dim/rdma_dim.c                                 |  108 +
 net/rds/ib_cm.c                                    |    8 +-
 221 files changed, 18855 insertions(+), 24841 deletions(-)
(diffstat from tag for-linus-merged)

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply related

* Re: [PATCH v18 07/15] fs/namespace: untag user pointers in copy_mount_options
From: Andrey Konovalov @ 2019-07-15 16:00 UTC (permalink / raw)
  To: Al Viro
  Cc: Linux ARM, Linux Memory Management List, LKML, amd-gfx, dri-devel,
	linux-rdma, linux-media, kvm, open list:KERNEL SELFTEST FRAMEWORK,
	Vincenzo Frascino, Will Deacon, Mark Rutland, Andrew Morton,
	Greg Kroah-Hartman, Kees Cook, Yishai Hadas, Felix Kuehling,
	Alexander Deucher, Christian Koenig, Mauro Carvalho Chehab,
	Jens Wiklander, Alex Williamson, Leon Romanovsky,
	Luc Van Oostenryck, Dave Martin, Khalid Aziz, enh,
	Jason Gunthorpe, Christoph Hellwig, Dmitry Vyukov,
	Kostya Serebryany, Evgeniy Stepanov, Lee Smith,
	Ramana Radhakrishnan, Jacob Bramley, Ruben Ayrapetyan,
	Robin Murphy, Kevin Brodsky, Szabolcs Nagy, Catalin Marinas
In-Reply-To: <20190624175009.GM29120@arrakis.emea.arm.com>

On Mon, Jun 24, 2019 at 7:50 PM Catalin Marinas <catalin.marinas@arm.com> wrote:
>
> On Mon, Jun 24, 2019 at 04:32:52PM +0200, Andrey Konovalov wrote:
> > This patch is a part of a series that extends kernel ABI to allow to pass
> > tagged user pointers (with the top byte set to something else other than
> > 0x00) as syscall arguments.
> >
> > In copy_mount_options a user address is being subtracted from TASK_SIZE.
> > If the address is lower than TASK_SIZE, the size is calculated to not
> > allow the exact_copy_from_user() call to cross TASK_SIZE boundary.
> > However if the address is tagged, then the size will be calculated
> > incorrectly.
> >
> > Untag the address before subtracting.
> >
> > Reviewed-by: Khalid Aziz <khalid.aziz@oracle.com>
> > Reviewed-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
> > Reviewed-by: Kees Cook <keescook@chromium.org>
> > Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
> > Signed-off-by: Andrey Konovalov <andreyknvl@google.com>
> > ---
> >  fs/namespace.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/fs/namespace.c b/fs/namespace.c
> > index 7660c2749c96..ec78f7223917 100644
> > --- a/fs/namespace.c
> > +++ b/fs/namespace.c
> > @@ -2994,7 +2994,7 @@ void *copy_mount_options(const void __user * data)
> >        * the remainder of the page.
> >        */
> >       /* copy_from_user cannot cross TASK_SIZE ! */
> > -     size = TASK_SIZE - (unsigned long)data;
> > +     size = TASK_SIZE - (unsigned long)untagged_addr(data);
> >       if (size > PAGE_SIZE)
> >               size = PAGE_SIZE;
>
> I think this patch needs an ack from Al Viro (cc'ed).
>
> --
> Catalin

Hi Al,

Could you take a look and give your acked-by?

Thanks!

^ permalink raw reply

* Re: [PATCH v18 08/15] userfaultfd: untag user pointers
From: Andrey Konovalov @ 2019-07-15 16:00 UTC (permalink / raw)
  To: Catalin Marinas
  Cc: Linux ARM, Linux Memory Management List, LKML, amd-gfx, dri-devel,
	linux-rdma, linux-media, kvm, open list:KERNEL SELFTEST FRAMEWORK,
	Vincenzo Frascino, Will Deacon, Mark Rutland, Andrew Morton,
	Greg Kroah-Hartman, Kees Cook, Yishai Hadas, Felix Kuehling,
	Alexander Deucher, Christian Koenig, Mauro Carvalho Chehab,
	Jens Wiklander, Alex Williamson, Leon Romanovsky,
	Luc Van Oostenryck, Dave Martin, Khalid Aziz, enh,
	Jason Gunthorpe, Christoph Hellwig, Dmitry Vyukov,
	Kostya Serebryany, Evgeniy Stepanov, Lee Smith,
	Ramana Radhakrishnan, Jacob Bramley, Ruben Ayrapetyan,
	Robin Murphy, Kevin Brodsky, Szabolcs Nagy, Al Viro
In-Reply-To: <20190624175120.GN29120@arrakis.emea.arm.com>

On Mon, Jun 24, 2019 at 7:51 PM Catalin Marinas <catalin.marinas@arm.com> wrote:
>
> On Mon, Jun 24, 2019 at 04:32:53PM +0200, Andrey Konovalov wrote:
> > This patch is a part of a series that extends kernel ABI to allow to pass
> > tagged user pointers (with the top byte set to something else other than
> > 0x00) as syscall arguments.
> >
> > userfaultfd code use provided user pointers for vma lookups, which can
> > only by done with untagged pointers.
> >
> > Untag user pointers in validate_range().
> >
> > Reviewed-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
> > Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
> > Reviewed-by: Kees Cook <keescook@chromium.org>
> > Signed-off-by: Andrey Konovalov <andreyknvl@google.com>
> > ---
> >  fs/userfaultfd.c | 22 ++++++++++++----------
> >  1 file changed, 12 insertions(+), 10 deletions(-)
>
> Same here, it needs an ack from Al Viro.

Hi Al,

Could you take a look at this one as well and give your acked-by?

Thanks!

>
> > diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
> > index ae0b8b5f69e6..c2be36a168ca 100644
> > --- a/fs/userfaultfd.c
> > +++ b/fs/userfaultfd.c
> > @@ -1261,21 +1261,23 @@ static __always_inline void wake_userfault(struct userfaultfd_ctx *ctx,
> >  }
> >
> >  static __always_inline int validate_range(struct mm_struct *mm,
> > -                                       __u64 start, __u64 len)
> > +                                       __u64 *start, __u64 len)
> >  {
> >       __u64 task_size = mm->task_size;
> >
> > -     if (start & ~PAGE_MASK)
> > +     *start = untagged_addr(*start);
> > +
> > +     if (*start & ~PAGE_MASK)
> >               return -EINVAL;
> >       if (len & ~PAGE_MASK)
> >               return -EINVAL;
> >       if (!len)
> >               return -EINVAL;
> > -     if (start < mmap_min_addr)
> > +     if (*start < mmap_min_addr)
> >               return -EINVAL;
> > -     if (start >= task_size)
> > +     if (*start >= task_size)
> >               return -EINVAL;
> > -     if (len > task_size - start)
> > +     if (len > task_size - *start)
> >               return -EINVAL;
> >       return 0;
> >  }
> > @@ -1325,7 +1327,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
> >               goto out;
> >       }
> >
> > -     ret = validate_range(mm, uffdio_register.range.start,
> > +     ret = validate_range(mm, &uffdio_register.range.start,
> >                            uffdio_register.range.len);
> >       if (ret)
> >               goto out;
> > @@ -1514,7 +1516,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
> >       if (copy_from_user(&uffdio_unregister, buf, sizeof(uffdio_unregister)))
> >               goto out;
> >
> > -     ret = validate_range(mm, uffdio_unregister.start,
> > +     ret = validate_range(mm, &uffdio_unregister.start,
> >                            uffdio_unregister.len);
> >       if (ret)
> >               goto out;
> > @@ -1665,7 +1667,7 @@ static int userfaultfd_wake(struct userfaultfd_ctx *ctx,
> >       if (copy_from_user(&uffdio_wake, buf, sizeof(uffdio_wake)))
> >               goto out;
> >
> > -     ret = validate_range(ctx->mm, uffdio_wake.start, uffdio_wake.len);
> > +     ret = validate_range(ctx->mm, &uffdio_wake.start, uffdio_wake.len);
> >       if (ret)
> >               goto out;
> >
> > @@ -1705,7 +1707,7 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
> >                          sizeof(uffdio_copy)-sizeof(__s64)))
> >               goto out;
> >
> > -     ret = validate_range(ctx->mm, uffdio_copy.dst, uffdio_copy.len);
> > +     ret = validate_range(ctx->mm, &uffdio_copy.dst, uffdio_copy.len);
> >       if (ret)
> >               goto out;
> >       /*
> > @@ -1761,7 +1763,7 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx,
> >                          sizeof(uffdio_zeropage)-sizeof(__s64)))
> >               goto out;
> >
> > -     ret = validate_range(ctx->mm, uffdio_zeropage.range.start,
> > +     ret = validate_range(ctx->mm, &uffdio_zeropage.range.start,
> >                            uffdio_zeropage.range.len);
> >       if (ret)
> >               goto out;
> > --
> > 2.22.0.410.gd8fdbe21b5-goog

^ permalink raw reply

* Re: [PATCH v18 11/15] IB/mlx4: untag user pointers in mlx4_get_umem_mr
From: Andrey Konovalov @ 2019-07-15 16:01 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: Linux ARM, Linux Memory Management List, LKML, amd-gfx, dri-devel,
	linux-rdma, linux-media, kvm, open list:KERNEL SELFTEST FRAMEWORK,
	Vincenzo Frascino, Will Deacon, Mark Rutland, Andrew Morton,
	Greg Kroah-Hartman, Kees Cook, Yishai Hadas, Felix Kuehling,
	Alexander Deucher, Christian Koenig, Mauro Carvalho Chehab,
	Jens Wiklander, Alex Williamson, Leon Romanovsky,
	Luc Van Oostenryck, Dave Martin, Khalid Aziz, enh,
	Christoph Hellwig, Dmitry Vyukov, Kostya Serebryany,
	Evgeniy Stepanov, Lee Smith, Ramana Radhakrishnan, Jacob Bramley,
	Ruben Ayrapetyan, Robin Murphy, Kevin Brodsky, Szabolcs Nagy,
	Catalin Marinas
In-Reply-To: <20190624174015.GL29120@arrakis.emea.arm.com>

On Mon, Jun 24, 2019 at 7:40 PM Catalin Marinas <catalin.marinas@arm.com> wrote:
>
> On Mon, Jun 24, 2019 at 04:32:56PM +0200, Andrey Konovalov wrote:
> > This patch is a part of a series that extends kernel ABI to allow to pass
> > tagged user pointers (with the top byte set to something else other than
> > 0x00) as syscall arguments.
> >
> > mlx4_get_umem_mr() uses provided user pointers for vma lookups, which can
> > only by done with untagged pointers.
> >
> > Untag user pointers in this function.
> >
> > Signed-off-by: Andrey Konovalov <andreyknvl@google.com>
> > ---
> >  drivers/infiniband/hw/mlx4/mr.c | 7 ++++---
> >  1 file changed, 4 insertions(+), 3 deletions(-)
>
> Acked-by: Catalin Marinas <catalin.marinas@arm.com>
>
> This patch also needs an ack from the infiniband maintainers (Jason).

Hi Jason,

Could you take a look and give your acked-by?

Thanks!

>
> --
> Catalin

^ permalink raw reply

* [PATCH 0/6] More 5.3 patches
From: Mike Marciniszyn @ 2019-07-15 16:45 UTC (permalink / raw)
  To: jgg, dledford; +Cc: linux-rdma

The following series contains fixes and a cleanup.

I noticed that 5.3 rc1 hasn't happened yet? So I'm not quite sure of
the destination here.

5 of the patches are stable, and should be held for the rc or pulled for 5.3.

Deleting the unused define can wait if necessary.

---

John Fleck (1):
      IB/hfi1: Check for error on call to alloc_rsm_map_table

Kaike Wan (4):
      IB/hfi1: Unreserve a flushed OPFN request
      IB/hfi1: Field not zero-ed when allocating TID flow memory
      IB/hfi1: Drop all TID RDMA READ RESP packets after r_next_psn
      IB/hfi1: Do not update hcrc for a KDETH packet during fault injection

Mike Marciniszyn (1):
      IB/hfi1: Remove unused define


 drivers/infiniband/hw/hfi1/chip.c      |   11 +++++++-
 drivers/infiniband/hw/hfi1/rc.c        |    2 -
 drivers/infiniband/hw/hfi1/tid_rdma.c  |   43 +-------------------------------
 drivers/infiniband/hw/hfi1/user_sdma.h |    6 ----
 drivers/infiniband/hw/hfi1/verbs.c     |   17 +++++++------
 include/rdma/rdmavt_qp.h               |    9 +++----
 6 files changed, 24 insertions(+), 64 deletions(-)

-- 
Mike

^ permalink raw reply

* [PATCH 1/6] IB/hfi1: Check for error on call to alloc_rsm_map_table
From: Mike Marciniszyn @ 2019-07-15 16:45 UTC (permalink / raw)
  To: jgg, dledford; +Cc: linux-rdma
In-Reply-To: <20190715164423.74174.4994.stgit@awfm-01.aw.intel.com>

From: John Fleck <john.fleck@intel.com>

The call to alloc_rsm_map_table does not check if the kmalloc fails.
Check for a NULL on alloc, and bail if it fails.

Fixes: 372cc85a13c9 ("IB/hfi1: Extract RSM map table init from QOS")
Cc: <stable@vger.kernel.org>
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: John Fleck <john.fleck@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
---
 drivers/infiniband/hw/hfi1/chip.c |   11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index d5b643a..67052dc 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -14452,7 +14452,7 @@ void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd)
 		clear_rcvctrl(dd, RCV_CTRL_RCV_RSM_ENABLE_SMASK);
 }
 
-static void init_rxe(struct hfi1_devdata *dd)
+static int init_rxe(struct hfi1_devdata *dd)
 {
 	struct rsm_map_table *rmt;
 	u64 val;
@@ -14461,6 +14461,9 @@ static void init_rxe(struct hfi1_devdata *dd)
 	write_csr(dd, RCV_ERR_MASK, ~0ull);
 
 	rmt = alloc_rsm_map_table(dd);
+	if (!rmt)
+		return -ENOMEM;
+
 	/* set up QOS, including the QPN map table */
 	init_qos(dd, rmt);
 	init_fecn_handling(dd, rmt);
@@ -14487,6 +14490,7 @@ static void init_rxe(struct hfi1_devdata *dd)
 	val |= ((4ull & RCV_BYPASS_HDR_SIZE_MASK) <<
 		RCV_BYPASS_HDR_SIZE_SHIFT);
 	write_csr(dd, RCV_BYPASS, val);
+	return 0;
 }
 
 static void init_other(struct hfi1_devdata *dd)
@@ -15024,7 +15028,10 @@ int hfi1_init_dd(struct hfi1_devdata *dd)
 		goto bail_cleanup;
 
 	/* set initial RXE CSRs */
-	init_rxe(dd);
+	ret = init_rxe(dd);
+	if (ret)
+		goto bail_cleanup;
+
 	/* set initial TXE CSRs */
 	init_txe(dd);
 	/* set initial non-RXE, non-TXE CSRs */


^ permalink raw reply related

* [PATCH 2/6] IB/hfi1: Unreserve a flushed OPFN request
From: Mike Marciniszyn @ 2019-07-15 16:45 UTC (permalink / raw)
  To: jgg, dledford; +Cc: linux-rdma
In-Reply-To: <20190715164423.74174.4994.stgit@awfm-01.aw.intel.com>

From: Kaike Wan <kaike.wan@intel.com>

When an OPFN request is flushed, the request is completed without
unreserving itself from the send queue. Subsequently, when a new
request is post sent, the following warning will be triggered:

WARNING: CPU: 4 PID: 8130 at rdmavt/qp.c:1761 rvt_post_send+0x72a/0x880 [rdmavt]
Call Trace:
[<ffffffffbbb61e41>] dump_stack+0x19/0x1b
[<ffffffffbb497688>] __warn+0xd8/0x100
[<ffffffffbb4977cd>] warn_slowpath_null+0x1d/0x20
[<ffffffffc01c941a>] rvt_post_send+0x72a/0x880 [rdmavt]
[<ffffffffbb4dcabe>] ? account_entity_dequeue+0xae/0xd0
[<ffffffffbb61d645>] ? __kmalloc+0x55/0x230
[<ffffffffc04e1a4c>] ib_uverbs_post_send+0x37c/0x5d0 [ib_uverbs]
[<ffffffffc04e5e36>] ? rdma_lookup_put_uobject+0x26/0x60 [ib_uverbs]
[<ffffffffc04dbce6>] ib_uverbs_write+0x286/0x460 [ib_uverbs]
[<ffffffffbb6f9457>] ? security_file_permission+0x27/0xa0
[<ffffffffbb641650>] vfs_write+0xc0/0x1f0
[<ffffffffbb64246f>] SyS_write+0x7f/0xf0
[<ffffffffbbb74ddb>] system_call_fastpath+0x22/0x27

This patch fixes the problem by moving rvt_qp_wqe_unreserve() into
rvt_qp_complete_swqe() to simplify the code and make it less
error-prone.

Fixes: ca95f802ef51 ("IB/hfi1: Unreserve a reserved request when it is completed")
Cc: <stable@vger.kernel.org>
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
---
 drivers/infiniband/hw/hfi1/rc.c |    2 --
 include/rdma/rdmavt_qp.h        |    9 ++++-----
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 0477c14..024a7c2 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -1835,7 +1835,6 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah)
 		    cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)
 			break;
 		trdma_clean_swqe(qp, wqe);
-		rvt_qp_wqe_unreserve(qp, wqe);
 		trace_hfi1_qp_send_completion(qp, wqe, qp->s_last);
 		rvt_qp_complete_swqe(qp,
 				     wqe,
@@ -1882,7 +1881,6 @@ struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
 	if (cmp_psn(wqe->lpsn, qp->s_sending_psn) < 0 ||
 	    cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
 		trdma_clean_swqe(qp, wqe);
-		rvt_qp_wqe_unreserve(qp, wqe);
 		trace_hfi1_qp_send_completion(qp, wqe, qp->s_last);
 		rvt_qp_complete_swqe(qp,
 				     wqe,
diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
index 0eeea52..e06c77d 100644
--- a/include/rdma/rdmavt_qp.h
+++ b/include/rdma/rdmavt_qp.h
@@ -608,7 +608,7 @@ static inline void rvt_qp_wqe_reserve(
 /**
  * rvt_qp_wqe_unreserve - clean reserved operation
  * @qp - the rvt qp
- * @wqe - the send wqe
+ * @flags - send wqe flags
  *
  * This decrements the reserve use count.
  *
@@ -620,11 +620,9 @@ static inline void rvt_qp_wqe_reserve(
  * the compiler does not juggle the order of the s_last
  * ring index and the decrementing of s_reserved_used.
  */
-static inline void rvt_qp_wqe_unreserve(
-	struct rvt_qp *qp,
-	struct rvt_swqe *wqe)
+static inline void rvt_qp_wqe_unreserve(struct rvt_qp *qp, int flags)
 {
-	if (unlikely(wqe->wr.send_flags & RVT_SEND_RESERVE_USED)) {
+	if (unlikely(flags & RVT_SEND_RESERVE_USED)) {
 		atomic_dec(&qp->s_reserved_used);
 		/* insure no compiler re-order up to s_last change */
 		smp_mb__after_atomic();
@@ -853,6 +851,7 @@ static inline void rvt_send_cq(struct rvt_qp *qp, struct ib_wc *wc,
 	u32 byte_len, last;
 	int flags = wqe->wr.send_flags;
 
+	rvt_qp_wqe_unreserve(qp, flags);
 	rvt_put_qp_swqe(qp, wqe);
 
 	need_completion =


^ permalink raw reply related

* [PATCH 3/6] IB/hfi1: Field not zero-ed when allocating TID flow memory
From: Mike Marciniszyn @ 2019-07-15 16:45 UTC (permalink / raw)
  To: jgg, dledford; +Cc: linux-rdma
In-Reply-To: <20190715164423.74174.4994.stgit@awfm-01.aw.intel.com>

From: Kaike Wan <kaike.wan@intel.com>

The field flow->resync_npkts is added for TID RDMA WRITE request and
zero-ed when a TID RDMA WRITE RESP packet is received by the requester.
This field is used to rewind a request during retry in the function
hfi1_tid_rdma_restart_req() shared by both TID RDMA WRITE and TID RDMA
READ requests. Therefore, when a TID RDMA READ request is retried,
this field may not be initialized at all, which causes the retry to
start at an incorrect psn, leading to the drop of the retry request
by the responder.

This patch fixes the problem by zeroing out the field when the flow
memory is allocated.

Fixes: 838b6fd2d9ca ("IB/hfi1: TID RDMA RcvArray programming and TID allocation")
Cc: <stable@vger.kernel.org>
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
---
 drivers/infiniband/hw/hfi1/tid_rdma.c |    1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c
index 92accca..7fcbeee 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.c
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.c
@@ -1620,6 +1620,7 @@ static int hfi1_kern_exp_rcv_alloc_flows(struct tid_rdma_request *req,
 		flows[i].req = req;
 		flows[i].npagesets = 0;
 		flows[i].pagesets[0].mapped =  0;
+		flows[i].resync_npkts = 0;
 	}
 	req->flows = flows;
 	return 0;


^ permalink raw reply related

* [PATCH 4/6] IB/hfi1: Drop all TID RDMA READ RESP packets after r_next_psn
From: Mike Marciniszyn @ 2019-07-15 16:45 UTC (permalink / raw)
  To: jgg, dledford; +Cc: linux-rdma
In-Reply-To: <20190715164423.74174.4994.stgit@awfm-01.aw.intel.com>

From: Kaike Wan <kaike.wan@intel.com>

When a TID sequence error occurs while receiving TID RDMA READ RESP
packets, all packets after flow->flow_state.r_next_psn should be
dropped, including those response packets for subsequent segments.

The current implementation will drop the subsequent response packets
for the segment to complete next, but may accept packets for subsequent
segments and therefore mistakenly advance the r_next_psn fields
for the corresponding software flows. This may result in failures
to complete subsequent segments after the current segment is completed.

The fix is to only use the flow pointed by req->clear_tail for checking
KDETH PSN instead of finding a flow from the request's flow array.

Fixes: b885d5be9ca1 ("IB/hfi1: Unify the software PSN check for TID RDMA READ/WRITE")
Cc: <stable@vger.kernel.org>
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
---
 drivers/infiniband/hw/hfi1/tid_rdma.c |   42 +--------------------------------
 1 file changed, 1 insertion(+), 41 deletions(-)

diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c
index 7fcbeee..996fc2982 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.c
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.c
@@ -1674,34 +1674,6 @@ static struct tid_rdma_flow *find_flow_ib(struct tid_rdma_request *req,
 	return NULL;
 }
 
-static struct tid_rdma_flow *
-__find_flow_ranged(struct tid_rdma_request *req, u16 head, u16 tail,
-		   u32 psn, u16 *fidx)
-{
-	for ( ; CIRC_CNT(head, tail, MAX_FLOWS);
-	      tail = CIRC_NEXT(tail, MAX_FLOWS)) {
-		struct tid_rdma_flow *flow = &req->flows[tail];
-		u32 spsn, lpsn;
-
-		spsn = full_flow_psn(flow, flow->flow_state.spsn);
-		lpsn = full_flow_psn(flow, flow->flow_state.lpsn);
-
-		if (cmp_psn(psn, spsn) >= 0 && cmp_psn(psn, lpsn) <= 0) {
-			if (fidx)
-				*fidx = tail;
-			return flow;
-		}
-	}
-	return NULL;
-}
-
-static struct tid_rdma_flow *find_flow(struct tid_rdma_request *req,
-				       u32 psn, u16 *fidx)
-{
-	return __find_flow_ranged(req, req->setup_head, req->clear_tail, psn,
-				  fidx);
-}
-
 /* TID RDMA READ functions */
 u32 hfi1_build_tid_rdma_read_packet(struct rvt_swqe *wqe,
 				    struct ib_other_headers *ohdr, u32 *bth1,
@@ -2789,19 +2761,7 @@ static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd,
 			 * to prevent continuous Flow Sequence errors for any
 			 * packets that could be still in the fabric.
 			 */
-			flow = find_flow(req, psn, NULL);
-			if (!flow) {
-				/*
-				 * We can't find the IB PSN matching the
-				 * received KDETH PSN. The only thing we can
-				 * do at this point is report the error to
-				 * the QP.
-				 */
-				hfi1_kern_read_tid_flow_free(qp);
-				spin_unlock(&qp->s_lock);
-				rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
-				return ret;
-			}
+			flow = &req->flows[req->clear_tail];
 			if (priv->s_flags & HFI1_R_TID_SW_PSN) {
 				diff = cmp_psn(psn,
 					       flow->flow_state.r_next_psn);


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox