From: Bob Pearson <rpearsonhpe@gmail.com>
To: jgg@nvidia.com, leon@kernel.org, zyjzyj2000@gmail.com,
linux-rdma@vger.kernel.org
Cc: Bob Pearson <rpearsonhpe@gmail.com>
Subject: [PATCH for-next v2 12/18] RDMA/rxe: Extend rxe_init_packet() to support frags
Date: Mon, 31 Oct 2022 15:28:01 -0500 [thread overview]
Message-ID: <20221031202805.19138-12-rpearsonhpe@gmail.com> (raw)
In-Reply-To: <20221031202805.19138-1-rpearsonhpe@gmail.com>
Add a subroutine rxe_can_use_sg() to determine if a packet is
a candidate for a fragmented skb. Add a global variable rxe_use_sg
to control whether to support nonlinear skbs. Modify rxe_init_packet()
to test if the packet should use a fragmented skb. Fixup calls to
rxe_init_packet() to use the new API but disable creating nonlinear
skbs for now.
This is in preparation for using fragmented skbs.
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
---
drivers/infiniband/sw/rxe/rxe.c | 3 ++
drivers/infiniband/sw/rxe/rxe.h | 3 ++
drivers/infiniband/sw/rxe/rxe_loc.h | 2 +-
drivers/infiniband/sw/rxe/rxe_mr.c | 12 +++--
drivers/infiniband/sw/rxe/rxe_net.c | 79 +++++++++++++++++++++++++---
drivers/infiniband/sw/rxe/rxe_req.c | 2 +-
drivers/infiniband/sw/rxe/rxe_resp.c | 7 ++-
7 files changed, 92 insertions(+), 16 deletions(-)
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index 51daac5c4feb..388d8103ec20 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -13,6 +13,9 @@ MODULE_AUTHOR("Bob Pearson, Frank Zago, John Groves, Kamal Heib");
MODULE_DESCRIPTION("Soft RDMA transport");
MODULE_LICENSE("Dual BSD/GPL");
+/* if true allow using fragmented skbs */
+bool rxe_use_sg;
+
/* free resources for a rxe device all objects created for this device must
* have been destroyed
*/
diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h
index 30fbdf3bc76a..c78fb497d9c3 100644
--- a/drivers/infiniband/sw/rxe/rxe.h
+++ b/drivers/infiniband/sw/rxe/rxe.h
@@ -30,6 +30,9 @@
#include "rxe_verbs.h"
#include "rxe_loc.h"
+/* if true allow using fragmented skbs */
+extern bool rxe_use_sg;
+
/*
* Version 1 and Version 2 are identical on 64 bit machines, but on 32 bit
* machines Version 2 has a different struct layout.
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index 4e5fbc33277d..12fd5811cd79 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -101,7 +101,7 @@ void rxe_mw_cleanup(struct rxe_pool_elem *elem);
/* rxe_net.c */
struct sk_buff *rxe_init_packet(struct rxe_qp *qp, struct rxe_av *av,
- struct rxe_pkt_info *pkt);
+ struct rxe_pkt_info *pkt, bool *is_frag);
int rxe_prepare(struct rxe_av *av, struct rxe_pkt_info *pkt,
struct sk_buff *skb);
int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index 6fe5bbe43a60..cf538d97c7a5 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -541,7 +541,7 @@ int rxe_num_dma_frags(const struct rxe_pd *pd, const struct rxe_dma_info *dma,
struct rxe_mr *mr = NULL;
int bytes;
u64 iova;
- int ret;
+ int nf;
int num_frags = 0;
if (length == 0)
@@ -572,18 +572,22 @@ int rxe_num_dma_frags(const struct rxe_pd *pd, const struct rxe_dma_info *dma,
bytes = min_t(int, length, sge->length - buf_offset);
if (bytes > 0) {
iova = sge->addr + buf_offset;
- ret = rxe_num_mr_frags(mr, iova, length);
- if (ret < 0) {
+ nf = rxe_num_mr_frags(mr, iova, length);
+ if (nf < 0) {
rxe_put(mr);
- return ret;
+ return nf;
}
+ num_frags += nf;
buf_offset += bytes;
resid -= bytes;
length -= bytes;
}
}
+ if (mr)
+ rxe_put(mr);
+
return num_frags;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index faabc444d546..c6d8f5c80562 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -442,8 +442,60 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
return err;
}
+/**
+ * rxe_can_use_sg() - determine if packet is a candidate for fragmenting
+ * @rxe: the rxe device
+ * @pkt: packet info
+ *
+ * Limit to packets with:
+ * rxe_use_sg set
+ * qp is RC
+ * ndev supports SG
+ * #sges less than #frags for sends
+ *
+ * Returns: true if conditions are met else 0
+ */
+static bool rxe_can_use_sg(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
+{
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+ int length = pkt->paylen - rxe_opcode[pkt->opcode].length
+ - RXE_ICRC_SIZE;
+ int nf;
+
+ if (!rxe_use_sg)
+ return false;
+ if (qp_type(pkt->qp) != IB_QPT_RC)
+ return false;
+ if (!(rxe->ndev->features & NETIF_F_SG))
+ return false;
+
+ /* check we don't have a pathological sge list with lots of
+ * short segments. Recall we need one extra frag for icrc.
+ */
+ if (pkt->mask & RXE_SEND_MASK) {
+ nf = rxe_num_dma_frags(qp->pd, &pkt->wqe->dma, length);
+ return (nf >= 0 && nf <= MAX_SKB_FRAGS - 1) ? true : false;
+ }
+
+ return true;
+}
+
+#define RXE_MIN_SKB_SIZE (256)
+
+/**
+ * rxe_init_packet - allocate and initialize a new skb
+ * @qp: the queue pair
+ * @av: remote address vector
+ * @pkt: packet info
+ * @frag: optional return value for fragmented skb
+ * on call if frag == NULL do not use fragmented skb
+ * on return if not NULL set *frag to 1
+ * if packet will be fragmented else 0
+ *
+ * Returns: an skb on success else NULL
+ */
struct sk_buff *rxe_init_packet(struct rxe_qp *qp, struct rxe_av *av,
- struct rxe_pkt_info *pkt)
+ struct rxe_pkt_info *pkt, bool *frag)
{
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
unsigned int hdr_len;
@@ -451,6 +503,7 @@ struct sk_buff *rxe_init_packet(struct rxe_qp *qp, struct rxe_av *av,
struct net_device *ndev;
const struct ib_gid_attr *attr;
const int port_num = 1;
+ int skb_size;
attr = rdma_get_gid_attr(&rxe->ib_dev, port_num, av->grh.sgid_index);
if (IS_ERR(attr))
@@ -469,9 +522,19 @@ struct sk_buff *rxe_init_packet(struct rxe_qp *qp, struct rxe_av *av,
rcu_read_unlock();
goto out;
}
- skb = alloc_skb(pkt->paylen + hdr_len + LL_RESERVED_SPACE(ndev),
- GFP_ATOMIC);
+ skb_size = LL_RESERVED_SPACE(ndev) + hdr_len + pkt->paylen;
+ if (frag) {
+ if (rxe_use_sg && (skb_size > RXE_MIN_SKB_SIZE) &&
+ rxe_can_use_sg(qp, pkt)) {
+ skb_size = RXE_MIN_SKB_SIZE;
+ *frag = true;
+ } else {
+ *frag = false;
+ }
+ }
+
+ skb = alloc_skb(skb_size, GFP_ATOMIC);
if (unlikely(!skb)) {
rcu_read_unlock();
goto out;
@@ -480,7 +543,7 @@ struct sk_buff *rxe_init_packet(struct rxe_qp *qp, struct rxe_av *av,
skb_reserve(skb, hdr_len + LL_RESERVED_SPACE(ndev));
/* FIXME: hold reference to this netdev until life of this skb. */
- skb->dev = ndev;
+ skb->dev = ndev;
rcu_read_unlock();
if (av->network_type == RXE_NETWORK_TYPE_IPV4)
@@ -488,10 +551,10 @@ struct sk_buff *rxe_init_packet(struct rxe_qp *qp, struct rxe_av *av,
else
skb->protocol = htons(ETH_P_IPV6);
- pkt->rxe = rxe;
- pkt->port_num = port_num;
- pkt->hdr = skb_put(skb, pkt->paylen);
- pkt->mask |= RXE_GRH_MASK;
+ if (frag && *frag)
+ pkt->hdr = skb_put(skb, rxe_opcode[pkt->opcode].length);
+ else
+ pkt->hdr = skb_put(skb, pkt->paylen);
out:
rdma_put_gid_attr(attr);
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 0a4b8825bd55..71a65f2a5d6d 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -491,7 +491,7 @@ static struct sk_buff *rxe_init_req_packet(struct rxe_qp *qp,
pad + RXE_ICRC_SIZE;
/* init skb */
- skb = rxe_init_packet(qp, av, pkt);
+ skb = rxe_init_packet(qp, av, pkt, NULL);
if (unlikely(!skb))
goto err_out;
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 8503d22f9114..8868415b71b6 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -665,6 +665,7 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
u32 psn,
u8 syndrome)
{
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
struct sk_buff *skb;
int paylen;
int pad;
@@ -672,14 +673,16 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
pad = (-payload) & 0x3;
paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE;
- ack->paylen = paylen;
+ ack->rxe = rxe;
ack->qp = qp;
ack->opcode = opcode;
ack->mask = rxe_opcode[opcode].mask;
+ ack->paylen = paylen;
ack->psn = psn;
+ ack->port_num = 1;
- skb = rxe_init_packet(qp, &qp->pri_av, ack);
+ skb = rxe_init_packet(qp, &qp->pri_av, ack, NULL);
if (!skb)
return NULL;
--
2.34.1
next prev parent reply other threads:[~2022-10-31 20:28 UTC|newest]
Thread overview: 36+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-10-31 20:27 [PATCH for-next v2 01/18] RDMA/rxe: Isolate code to fill request roce headers Bob Pearson
2022-10-31 20:27 ` [PATCH for-next v2 02/18] RDMA/rxe: Isolate request payload code in a subroutine Bob Pearson
2022-10-31 20:27 ` [PATCH for-next v2 03/18] RDMA/rxe: Remove paylen parameter from rxe_init_packet Bob Pearson
2022-10-31 20:27 ` [PATCH for-next v2 04/18] RDMA/rxe: Isolate code to build request packet Bob Pearson
2022-10-31 20:27 ` [PATCH for-next v2 05/18] RDMA/rxe: Add sg fragment ops Bob Pearson
2022-11-24 19:05 ` Jason Gunthorpe
2022-10-31 20:27 ` [PATCH for-next v2 06/18] RDMA/rxe: Add rxe_add_frag() to rxe_mr.c Bob Pearson
2022-11-24 19:10 ` Jason Gunthorpe
2022-11-30 20:53 ` Bob Pearson
2022-11-30 23:36 ` Jason Gunthorpe
2022-12-01 0:16 ` Bob Pearson
2022-12-01 0:20 ` Jason Gunthorpe
2022-12-01 0:36 ` Bob Pearson
2022-12-01 0:41 ` Jason Gunthorpe
2022-12-01 5:05 ` Bob Pearson
2022-12-01 12:51 ` Jason Gunthorpe
2022-12-01 15:04 ` Bob Pearson
2022-12-01 15:16 ` Bob Pearson
2022-12-01 15:38 ` Bob Pearson
2022-12-01 15:39 ` Jason Gunthorpe
2022-12-01 17:11 ` Bob Pearson
2022-12-01 18:00 ` Jason Gunthorpe
2022-10-31 20:27 ` [PATCH for-next v2 07/18] RDMA/rxe: Add routine to compute the number of frags Bob Pearson
2022-11-24 19:15 ` Jason Gunthorpe
2022-10-31 20:27 ` [PATCH for-next v2 08/18] RDMA/rxe: Extend rxe_mr_copy to support skb frags Bob Pearson
2022-10-31 20:27 ` [PATCH for-next v2 09/18] RDMA/rxe: Add routine to compute number of frags for dma Bob Pearson
2022-11-24 19:16 ` Jason Gunthorpe
2022-10-31 20:27 ` [PATCH for-next v2 10/18] RDMA/rxe: Extend copy_data to support skb frags Bob Pearson
2022-10-31 20:28 ` [PATCH for-next v2 11/18] RDMA/rxe: Replace rxe by qp as a parameter Bob Pearson
2022-10-31 20:28 ` Bob Pearson [this message]
2022-10-31 20:28 ` [PATCH for-next v2 13/18] RDMA/rxe: Extend rxe_icrc.c to support frags Bob Pearson
2022-10-31 20:28 ` [PATCH for-next v2 14/18] RDMA/rxe: Extend rxe_init_req_packet() for frags Bob Pearson
2022-10-31 20:28 ` [PATCH for-next v2 15/18] RDMA/rxe: Extend response packets " Bob Pearson
2022-10-31 20:28 ` [PATCH for-next v2 16/18] RDMA/rxe: Extend send/write_data_in() " Bob Pearson
2022-10-31 20:28 ` [PATCH for-next v2 17/18] RDMA/rxe: Extend do_read() in rxe_comp,c " Bob Pearson
2022-10-31 20:28 ` [PATCH for-next v2 18/18] RDMA/rxe: Enable sg code in rxe Bob Pearson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20221031202805.19138-12-rpearsonhpe@gmail.com \
--to=rpearsonhpe@gmail.com \
--cc=jgg@nvidia.com \
--cc=leon@kernel.org \
--cc=linux-rdma@vger.kernel.org \
--cc=zyjzyj2000@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.