From: Maor Gottlieb <maorg@mellanox.com>
To: davem@davemloft.net, jgg@mellanox.com, dledford@redhat.com,
j.vosburgh@gmail.com, vfalico@gmail.com, andy@greyhouse.net,
kuba@kernel.org, jiri@mellanox.com, dsahern@kernel.org
Cc: leonro@mellanox.com, saeedm@mellanox.com,
linux-rdma@vger.kernel.org, netdev@vger.kernel.org,
alexr@mellanox.com, Maor Gottlieb <maorg@mellanox.com>
Subject: [PATCH V4 mlx5-next 10/15] RDMA/core: Add LAG functionality
Date: Wed, 22 Apr 2020 11:39:46 +0300 [thread overview]
Message-ID: <20200422083951.17424-11-maorg@mellanox.com> (raw)
In-Reply-To: <20200422083951.17424-1-maorg@mellanox.com>
Add support to get the RoCE LAG xmit slave by building skb
of the RoCE packet and call to master_get_xmit_slave.
If driver wants to get the slave assume all slaves are available,
then need to set RDMA_LAG_FLAGS_HASH_ALL_SLAVES in flags.
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
---
drivers/infiniband/core/Makefile | 2 +-
drivers/infiniband/core/lag.c | 138 +++++++++++++++++++++++++++++++
include/rdma/ib_verbs.h | 2 +
include/rdma/lag.h | 22 +++++
4 files changed, 163 insertions(+), 1 deletion(-)
create mode 100644 drivers/infiniband/core/lag.c
create mode 100644 include/rdma/lag.h
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index d1b14887960e..870f0fcd54d5 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -12,7 +12,7 @@ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \
multicast.o mad.o smi.o agent.o mad_rmpp.o \
nldev.o restrack.o counters.o ib_core_uverbs.o \
- trace.o
+ trace.o lag.o
ib_core-$(CONFIG_SECURITY_INFINIBAND) += security.o
ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o
diff --git a/drivers/infiniband/core/lag.c b/drivers/infiniband/core/lag.c
new file mode 100644
index 000000000000..3036fb3dc43a
--- /dev/null
+++ b/drivers/infiniband/core/lag.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2020 Mellanox Technologies. All rights reserved.
+ */
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_cache.h>
+#include <rdma/lag.h>
+
+static struct sk_buff *rdma_build_skb(struct ib_device *device,
+ struct net_device *netdev,
+ struct rdma_ah_attr *ah_attr)
+{
+ struct ipv6hdr *ip6h;
+ struct sk_buff *skb;
+ struct ethhdr *eth;
+ struct iphdr *iph;
+ struct udphdr *uh;
+ u8 smac[ETH_ALEN];
+ bool is_ipv4;
+ int hdr_len;
+
+ is_ipv4 = ipv6_addr_v4mapped((struct in6_addr *)ah_attr->grh.dgid.raw);
+ hdr_len = ETH_HLEN + sizeof(struct udphdr) + LL_RESERVED_SPACE(netdev);
+ hdr_len += is_ipv4 ? sizeof(struct iphdr) : sizeof(struct ipv6hdr);
+
+ skb = alloc_skb(hdr_len, GFP_ATOMIC);
+ if (!skb)
+ return NULL;
+
+ skb->dev = netdev;
+ skb_reserve(skb, hdr_len);
+ skb_push(skb, sizeof(struct udphdr));
+ skb_reset_transport_header(skb);
+ uh = udp_hdr(skb);
+ uh->source = htons(0xC000);
+ uh->dest = htons(ROCE_V2_UDP_DPORT);
+ uh->len = htons(sizeof(struct udphdr));
+
+ if (is_ipv4) {
+ skb_push(skb, sizeof(struct iphdr));
+ skb_reset_network_header(skb);
+ iph = ip_hdr(skb);
+ iph->frag_off = 0;
+ iph->version = 4;
+ iph->protocol = IPPROTO_UDP;
+ iph->ihl = 0x5;
+ iph->tot_len = htons(sizeof(struct udphdr) + sizeof(struct
+ iphdr));
+ memcpy(&iph->saddr, ah_attr->grh.sgid_attr->gid.raw + 12,
+ sizeof(struct in_addr));
+ memcpy(&iph->daddr, ah_attr->grh.dgid.raw + 12,
+ sizeof(struct in_addr));
+ } else {
+ skb_push(skb, sizeof(struct ipv6hdr));
+ skb_reset_network_header(skb);
+ ip6h = ipv6_hdr(skb);
+ ip6h->version = 6;
+ ip6h->nexthdr = IPPROTO_UDP;
+ memcpy(&ip6h->flow_lbl, &ah_attr->grh.flow_label,
+ sizeof(*ip6h->flow_lbl));
+ memcpy(&ip6h->saddr, ah_attr->grh.sgid_attr->gid.raw,
+ sizeof(struct in6_addr));
+ memcpy(&ip6h->daddr, ah_attr->grh.dgid.raw,
+ sizeof(struct in6_addr));
+ }
+
+ skb_push(skb, sizeof(struct ethhdr));
+ skb_reset_mac_header(skb);
+ eth = eth_hdr(skb);
+ skb->protocol = eth->h_proto = htons(is_ipv4 ? ETH_P_IP : ETH_P_IPV6);
+ rdma_read_gid_l2_fields(ah_attr->grh.sgid_attr, NULL, smac);
+ memcpy(eth->h_source, smac, ETH_ALEN);
+ memcpy(eth->h_dest, ah_attr->roce.dmac, ETH_ALEN);
+
+ return skb;
+}
+
+static struct net_device *rdma_get_xmit_slave_udp(struct ib_device *device,
+ struct net_device *master,
+ struct rdma_ah_attr *ah_attr)
+{
+ struct net_device *slave;
+ struct sk_buff *skb;
+
+ skb = rdma_build_skb(device, master, ah_attr);
+ if (!skb)
+ return NULL;
+
+ slave = netdev_get_xmit_slave(master, skb,
+ !!(device->lag_flags &
+ RDMA_LAG_FLAGS_HASH_ALL_SLAVES));
+ kfree_skb(skb);
+ return slave;
+}
+
+void rdma_lag_put_ah_roce_slave(struct rdma_ah_attr *ah_attr)
+{
+ if (ah_attr->roce.xmit_slave)
+ dev_put(ah_attr->roce.xmit_slave);
+}
+
+int rdma_lag_get_ah_roce_slave(struct ib_device *device,
+ struct rdma_ah_attr *ah_attr)
+{
+ struct net_device *master;
+ struct net_device *slave;
+
+ if (!(ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE &&
+ ah_attr->grh.sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP))
+ return 0;
+
+ rcu_read_lock();
+ master = rdma_read_gid_attr_ndev_rcu(ah_attr->grh.sgid_attr);
+ if (IS_ERR(master)) {
+ rcu_read_unlock();
+ return PTR_ERR(master);
+ }
+ dev_hold(master);
+ rcu_read_unlock();
+
+ if (!netif_is_bond_master(master)) {
+ dev_put(master);
+ return 0;
+ }
+
+ slave = rdma_get_xmit_slave_udp(device, master, ah_attr);
+
+ dev_put(master);
+ if (!slave) {
+ ibdev_warn(device, "Failed to get lag xmit slave\n");
+ return -EINVAL;
+ }
+
+ ah_attr->roce.xmit_slave = slave;
+
+ return 0;
+}
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index bbc5cfb57cd2..60f9969b6d83 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -894,6 +894,7 @@ struct ib_ah_attr {
struct roce_ah_attr {
u8 dmac[ETH_ALEN];
+ struct net_device *xmit_slave;
};
struct opa_ah_attr {
@@ -2709,6 +2710,7 @@ struct ib_device {
/* Used by iWarp CM */
char iw_ifname[IFNAMSIZ];
u32 iw_driver_flags;
+ u32 lag_flags;
};
struct ib_client_nl_info;
diff --git a/include/rdma/lag.h b/include/rdma/lag.h
new file mode 100644
index 000000000000..a71511824207
--- /dev/null
+++ b/include/rdma/lag.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2020 Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef _RDMA_LAG_H_
+#define _RDMA_LAG_H_
+
+#include <net/lag.h>
+
+struct ib_device;
+struct rdma_ah_attr;
+
+enum rdma_lag_flags {
+ RDMA_LAG_FLAGS_HASH_ALL_SLAVES = 1 << 0
+};
+
+void rdma_lag_put_ah_roce_slave(struct rdma_ah_attr *ah_attr);
+int rdma_lag_get_ah_roce_slave(struct ib_device *device,
+ struct rdma_ah_attr *ah_attr);
+
+#endif /* _RDMA_LAG_H_ */
--
2.17.2
next prev parent reply other threads:[~2020-04-22 8:40 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-04-22 8:39 [PATCH V4 mlx5-next 00/15] Add support to get xmit slave Maor Gottlieb
2020-04-22 8:39 ` [PATCH V4 mlx5-next 01/15] net/core: Introduce netdev_get_xmit_slave Maor Gottlieb
2020-04-22 12:50 ` Jiri Pirko
2020-04-22 15:09 ` David Ahern
2020-04-22 8:39 ` [PATCH V4 mlx5-next 02/15] bonding: Export skip slave logic to function Maor Gottlieb
2020-04-22 8:39 ` [PATCH V4 mlx5-next 03/15] bonding: Rename slave_arr to usable_slaves Maor Gottlieb
2020-04-22 8:39 ` [PATCH V4 mlx5-next 04/15] bonding/alb: Add helper functions to get the xmit slave Maor Gottlieb
2020-04-22 8:39 ` [PATCH V4 mlx5-next 05/15] bonding: Add helper function to get the xmit slave based on hash Maor Gottlieb
2020-04-22 8:39 ` [PATCH V4 mlx5-next 06/15] bonding: Add helper function to get the xmit slave in rr mode Maor Gottlieb
2020-04-22 8:39 ` [PATCH V4 mlx5-next 07/15] bonding: Add function to get the xmit slave in active-backup mode Maor Gottlieb
2020-04-22 8:39 ` [PATCH V4 mlx5-next mlx5-next 08/15] bonding: Add array of all slaves Maor Gottlieb
2020-04-22 8:39 ` [PATCH V4 mlx5-next mlx5-next 09/15] bonding: Implement ndo_get_xmit_slave Maor Gottlieb
2020-04-22 12:53 ` Jiri Pirko
2020-04-22 8:39 ` Maor Gottlieb [this message]
2020-04-22 12:50 ` [PATCH V4 mlx5-next 10/15] RDMA/core: Add LAG functionality Jason Gunthorpe
2020-04-22 13:06 ` Maor Gottlieb
2020-04-22 15:12 ` David Ahern
2020-04-22 8:39 ` [PATCH V4 mlx5-next 11/15] RDMA/core: Get xmit slave for LAG Maor Gottlieb
2020-04-22 13:01 ` Jason Gunthorpe
2020-04-22 8:39 ` [PATCH V4 mlx5-next 12/15] net/mlx5: Change lag mutex lock to spin lock Maor Gottlieb
2020-04-22 8:39 ` [PATCH V4 mlx5-next 13/15] net/mlx5: Add support to get lag physical port Maor Gottlieb
2020-04-22 8:39 ` [PATCH V4 mlx5-next 14/15] RDMA/mlx5: Refactor affinity related code Maor Gottlieb
2020-04-22 8:39 ` [PATCH V4 mlx5-next 15/15] RDMA/mlx5: Set lag tx affinity according to slave Maor Gottlieb
2020-04-22 12:46 ` [PATCH V4 mlx5-next 00/15] Add support to get xmit slave Jiri Pirko
2020-04-22 12:56 ` Maor Gottlieb
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200422083951.17424-11-maorg@mellanox.com \
--to=maorg@mellanox.com \
--cc=alexr@mellanox.com \
--cc=andy@greyhouse.net \
--cc=davem@davemloft.net \
--cc=dledford@redhat.com \
--cc=dsahern@kernel.org \
--cc=j.vosburgh@gmail.com \
--cc=jgg@mellanox.com \
--cc=jiri@mellanox.com \
--cc=kuba@kernel.org \
--cc=leonro@mellanox.com \
--cc=linux-rdma@vger.kernel.org \
--cc=netdev@vger.kernel.org \
--cc=saeedm@mellanox.com \
--cc=vfalico@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox