From: "yangx.jy@fujitsu.com" <yangx.jy@fujitsu.com>
To: "linux-rdma@vger.kernel.org" <linux-rdma@vger.kernel.org>
Cc: "leon@kernel.org" <leon@kernel.org>,
"jgg@ziepe.ca" <jgg@ziepe.ca>,
"rpearsonhpe@gmail.com" <rpearsonhpe@gmail.com>,
"zyjzyj2000@gmail.com" <zyjzyj2000@gmail.com>,
"lizhijian@fujitsu.com" <lizhijian@fujitsu.com>,
"yangx.jy@fujitsu.com" <yangx.jy@fujitsu.com>
Subject: [RESEND PATCH v5 1/2] RDMA/rxe: Support RDMA Atomic Write operation
Date: Fri, 8 Jul 2022 04:02:36 +0000 [thread overview]
Message-ID: <20220708040228.6703-2-yangx.jy@fujitsu.com> (raw)
In-Reply-To: <20220708040228.6703-1-yangx.jy@fujitsu.com>
This patch implements RDMA Atomic Write operation for RC service.
Signed-off-by: Xiao Yang <yangx.jy@fujitsu.com>
---
drivers/infiniband/sw/rxe/rxe_comp.c | 4 ++
drivers/infiniband/sw/rxe/rxe_opcode.c | 18 +++++
drivers/infiniband/sw/rxe/rxe_opcode.h | 3 +
drivers/infiniband/sw/rxe/rxe_req.c | 15 +++-
drivers/infiniband/sw/rxe/rxe_resp.c | 94 ++++++++++++++++++++++++--
include/rdma/ib_pack.h | 2 +
include/rdma/ib_verbs.h | 2 +
include/uapi/rdma/ib_user_verbs.h | 2 +
include/uapi/rdma/rdma_user_rxe.h | 1 +
9 files changed, 134 insertions(+), 7 deletions(-)
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index da3a398053b8..16b90d68d2cb 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -104,6 +104,7 @@ static enum ib_wc_opcode wr_to_wc_opcode(enum ib_wr_opcode opcode)
case IB_WR_LOCAL_INV: return IB_WC_LOCAL_INV;
case IB_WR_REG_MR: return IB_WC_REG_MR;
case IB_WR_BIND_MW: return IB_WC_BIND_MW;
+ case IB_WR_RDMA_ATOMIC_WRITE: return IB_WC_RDMA_ATOMIC_WRITE;
default:
return 0xff;
@@ -256,6 +257,9 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
if ((syn & AETH_TYPE_MASK) != AETH_ACK)
return COMPST_ERROR;
+ if (wqe->wr.opcode == IB_WR_RDMA_ATOMIC_WRITE)
+ return COMPST_WRITE_SEND;
+
fallthrough;
/* (IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE doesn't have an AETH)
*/
diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.c b/drivers/infiniband/sw/rxe/rxe_opcode.c
index d4ba4d506f17..d284fa8798c3 100644
--- a/drivers/infiniband/sw/rxe/rxe_opcode.c
+++ b/drivers/infiniband/sw/rxe/rxe_opcode.c
@@ -101,6 +101,12 @@ struct rxe_wr_opcode_info rxe_wr_opcode_info[] = {
[IB_QPT_UC] = WR_LOCAL_OP_MASK,
},
},
+ [IB_WR_RDMA_ATOMIC_WRITE] = {
+ .name = "IB_WR_RDMA_ATOMIC_WRITE",
+ .mask = {
+ [IB_QPT_RC] = WR_ATOMIC_WRITE_MASK,
+ },
+ },
};
struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = {
@@ -378,6 +384,18 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = {
RXE_IETH_BYTES,
}
},
+ [IB_OPCODE_RC_RDMA_ATOMIC_WRITE] = {
+ .name = "IB_OPCODE_RC_RDMA_ATOMIC_WRITE",
+ .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK |
+ RXE_ATOMIC_WRITE_MASK | RXE_START_MASK |
+ RXE_END_MASK,
+ .length = RXE_BTH_BYTES + RXE_RETH_BYTES,
+ .offset = {
+ [RXE_BTH] = 0,
+ [RXE_RETH] = RXE_BTH_BYTES,
+ [RXE_PAYLOAD] = RXE_BTH_BYTES + RXE_RETH_BYTES,
+ }
+ },
/* UC */
[IB_OPCODE_UC_SEND_FIRST] = {
diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.h b/drivers/infiniband/sw/rxe/rxe_opcode.h
index 8f9aaaf260f2..5962f5fc66a6 100644
--- a/drivers/infiniband/sw/rxe/rxe_opcode.h
+++ b/drivers/infiniband/sw/rxe/rxe_opcode.h
@@ -20,6 +20,7 @@ enum rxe_wr_mask {
WR_READ_MASK = BIT(3),
WR_WRITE_MASK = BIT(4),
WR_LOCAL_OP_MASK = BIT(5),
+ WR_ATOMIC_WRITE_MASK = BIT(7),
WR_READ_OR_WRITE_MASK = WR_READ_MASK | WR_WRITE_MASK,
WR_WRITE_OR_SEND_MASK = WR_WRITE_MASK | WR_SEND_MASK,
@@ -81,6 +82,8 @@ enum rxe_hdr_mask {
RXE_LOOPBACK_MASK = BIT(NUM_HDR_TYPES + 12),
+ RXE_ATOMIC_WRITE_MASK = BIT(NUM_HDR_TYPES + 14),
+
RXE_READ_OR_ATOMIC_MASK = (RXE_READ_MASK | RXE_ATOMIC_MASK),
RXE_WRITE_OR_SEND_MASK = (RXE_WRITE_MASK | RXE_SEND_MASK),
RXE_READ_OR_WRITE_MASK = (RXE_READ_MASK | RXE_WRITE_MASK),
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 15fefc689ca3..613c7031f562 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -235,6 +235,10 @@ static int next_opcode_rc(struct rxe_qp *qp, u32 opcode, int fits)
else
return fits ? IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE :
IB_OPCODE_RC_SEND_FIRST;
+
+ case IB_WR_RDMA_ATOMIC_WRITE:
+ return IB_OPCODE_RC_RDMA_ATOMIC_WRITE;
+
case IB_WR_REG_MR:
case IB_WR_LOCAL_INV:
return opcode;
@@ -463,6 +467,11 @@ static int finish_packet(struct rxe_qp *qp, struct rxe_av *av,
}
}
+ if (pkt->mask & RXE_ATOMIC_WRITE_MASK) {
+ memcpy(payload_addr(pkt), wqe->wr.wr.rdma.atomic_wr, payload);
+ wqe->dma.resid -= payload;
+ }
+
return 0;
}
@@ -663,13 +672,15 @@ int rxe_requester(void *arg)
}
mask = rxe_opcode[opcode].mask;
- if (unlikely(mask & RXE_READ_OR_ATOMIC_MASK)) {
+ if (unlikely(mask & (RXE_READ_OR_ATOMIC_MASK |
+ RXE_ATOMIC_WRITE_MASK))) {
if (check_init_depth(qp, wqe))
goto exit;
}
mtu = get_mtu(qp);
- payload = (mask & RXE_WRITE_OR_SEND_MASK) ? wqe->dma.resid : 0;
+ payload = (mask & (RXE_WRITE_OR_SEND_MASK | RXE_ATOMIC_WRITE_MASK)) ?
+ wqe->dma.resid : 0;
if (payload > mtu) {
if (qp_type(qp) == IB_QPT_UD) {
/* C10-93.1.1: If the total sum of all the buffer lengths specified for a
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 28033849d404..2cf544abe0dc 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -22,6 +22,7 @@ enum resp_states {
RESPST_EXECUTE,
RESPST_READ_REPLY,
RESPST_ATOMIC_REPLY,
+ RESPST_ATOMIC_WRITE_REPLY,
RESPST_COMPLETE,
RESPST_ACKNOWLEDGE,
RESPST_CLEANUP,
@@ -57,6 +58,7 @@ static char *resp_state_name[] = {
[RESPST_EXECUTE] = "EXECUTE",
[RESPST_READ_REPLY] = "READ_REPLY",
[RESPST_ATOMIC_REPLY] = "ATOMIC_REPLY",
+ [RESPST_ATOMIC_WRITE_REPLY] = "ATOMIC_WRITE_REPLY",
[RESPST_COMPLETE] = "COMPLETE",
[RESPST_ACKNOWLEDGE] = "ACKNOWLEDGE",
[RESPST_CLEANUP] = "CLEANUP",
@@ -260,7 +262,7 @@ static enum resp_states check_op_valid(struct rxe_qp *qp,
case IB_QPT_RC:
if (((pkt->mask & RXE_READ_MASK) &&
!(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) ||
- ((pkt->mask & RXE_WRITE_MASK) &&
+ ((pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) &&
!(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) ||
((pkt->mask & RXE_ATOMIC_MASK) &&
!(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) {
@@ -364,7 +366,7 @@ static enum resp_states check_resource(struct rxe_qp *qp,
}
}
- if (pkt->mask & RXE_READ_OR_ATOMIC_MASK) {
+ if (pkt->mask & (RXE_READ_OR_ATOMIC_MASK | RXE_ATOMIC_WRITE_MASK)) {
/* it is the requesters job to not send
* too many read/atomic ops, we just
* recycle the responder resource queue
@@ -415,7 +417,7 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
enum resp_states state;
int access;
- if (pkt->mask & RXE_READ_OR_WRITE_MASK) {
+ if (pkt->mask & (RXE_READ_OR_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) {
if (pkt->mask & RXE_RETH_MASK) {
qp->resp.va = reth_va(pkt);
qp->resp.offset = 0;
@@ -483,7 +485,7 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
goto err;
}
- if (pkt->mask & RXE_WRITE_MASK) {
+ if (pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) {
if (resid > mtu) {
if (pktlen != mtu || bth_pad(pkt)) {
state = RESPST_ERR_LENGTH;
@@ -583,6 +585,7 @@ static struct resp_res *rxe_prepare_res(struct rxe_qp *qp,
res->state = rdatm_res_state_new;
break;
case RXE_ATOMIC_MASK:
+ case RXE_ATOMIC_WRITE_MASK:
res->first_psn = pkt->psn;
res->last_psn = pkt->psn;
res->cur_psn = pkt->psn;
@@ -652,6 +655,53 @@ static enum resp_states atomic_reply(struct rxe_qp *qp,
return ret;
}
+static enum resp_states atomic_write_reply(struct rxe_qp *qp,
+ struct rxe_pkt_info *pkt)
+{
+ u64 src, *dst;
+ struct resp_res *res = qp->resp.res;
+ struct rxe_mr *mr = qp->resp.mr;
+ int payload = payload_size(pkt);
+
+ if (!res) {
+ res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_WRITE_MASK);
+ qp->resp.res = res;
+ }
+
+ if (!res->replay) {
+#ifdef CONFIG_64BIT
+ memcpy(&src, payload_addr(pkt), payload);
+
+ dst = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset, payload);
+ /* check vaddr is 8 bytes aligned. */
+ if (!dst || (uintptr_t)dst & 7)
+ return RESPST_ERR_MISALIGNED_ATOMIC;
+
+ /* Do atomic write after all prior operations have completed */
+ smp_store_release(dst, src);
+
+ /* decrease resp.resid to zero */
+ qp->resp.resid -= sizeof(payload);
+
+ qp->resp.msn++;
+
+ /* next expected psn, read handles this separately */
+ qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
+ qp->resp.ack_psn = qp->resp.psn;
+
+ qp->resp.opcode = pkt->opcode;
+ qp->resp.status = IB_WC_SUCCESS;
+
+ return RESPST_ACKNOWLEDGE;
+#else
+ pr_err("32-bit arch doesn't support 8-byte atomic write\n");
+ return RESPST_ERR_UNSUPPORTED_OPCODE;
+#endif /* CONFIG_64BIT */
+ }
+
+ return RESPST_ACKNOWLEDGE;
+}
+
static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
struct rxe_pkt_info *ack,
int opcode,
@@ -892,6 +942,8 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
return RESPST_READ_REPLY;
} else if (pkt->mask & RXE_ATOMIC_MASK) {
return RESPST_ATOMIC_REPLY;
+ } else if (pkt->mask & RXE_ATOMIC_WRITE_MASK) {
+ return RESPST_ATOMIC_WRITE_REPLY;
} else {
/* Unreachable */
WARN_ON_ONCE(1);
@@ -1074,6 +1126,31 @@ static int send_atomic_ack(struct rxe_qp *qp, u8 syndrome, u32 psn)
return err;
}
+static int send_read_response(struct rxe_qp *qp, u8 syndrome, u32 psn)
+{
+ int err = 0;
+ struct rxe_pkt_info ack_pkt;
+ struct sk_buff *skb;
+
+ skb = prepare_ack_packet(qp, &ack_pkt, IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY,
+ 0, psn, syndrome);
+ if (!skb) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = rxe_xmit_packet(qp, &ack_pkt, skb);
+ if (err)
+ pr_err_ratelimited("Failed sending read response\n");
+
+ /* have to clear this since it is used to trigger
+ * long read replies
+ */
+ qp->resp.res = NULL;
+out:
+ return err;
+}
+
static enum resp_states acknowledge(struct rxe_qp *qp,
struct rxe_pkt_info *pkt)
{
@@ -1084,6 +1161,8 @@ static enum resp_states acknowledge(struct rxe_qp *qp,
send_ack(qp, qp->resp.aeth_syndrome, pkt->psn);
else if (pkt->mask & RXE_ATOMIC_MASK)
send_atomic_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
+ else if (pkt->mask & RXE_ATOMIC_WRITE_MASK)
+ send_read_response(qp, AETH_ACK_UNLIMITED, pkt->psn);
else if (bth_ack(pkt))
send_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
@@ -1195,7 +1274,9 @@ static enum resp_states duplicate_request(struct rxe_qp *qp,
res->replay = 1;
res->cur_psn = pkt->psn;
qp->resp.res = res;
- rc = RESPST_ATOMIC_REPLY;
+ rc = pkt->mask & RXE_ATOMIC_MASK ?
+ RESPST_ATOMIC_REPLY :
+ RESPST_ATOMIC_WRITE_REPLY;
goto out;
}
@@ -1335,6 +1416,9 @@ int rxe_responder(void *arg)
case RESPST_ATOMIC_REPLY:
state = atomic_reply(qp, pkt);
break;
+ case RESPST_ATOMIC_WRITE_REPLY:
+ state = atomic_write_reply(qp, pkt);
+ break;
case RESPST_ACKNOWLEDGE:
state = acknowledge(qp, pkt);
break;
diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h
index a9162f25beaf..519ec6b841e7 100644
--- a/include/rdma/ib_pack.h
+++ b/include/rdma/ib_pack.h
@@ -84,6 +84,7 @@ enum {
/* opcode 0x15 is reserved */
IB_OPCODE_SEND_LAST_WITH_INVALIDATE = 0x16,
IB_OPCODE_SEND_ONLY_WITH_INVALIDATE = 0x17,
+ IB_OPCODE_RDMA_ATOMIC_WRITE = 0x1D,
/* real constants follow -- see comment about above IB_OPCODE()
macro for more details */
@@ -112,6 +113,7 @@ enum {
IB_OPCODE(RC, FETCH_ADD),
IB_OPCODE(RC, SEND_LAST_WITH_INVALIDATE),
IB_OPCODE(RC, SEND_ONLY_WITH_INVALIDATE),
+ IB_OPCODE(RC, RDMA_ATOMIC_WRITE),
/* UC */
IB_OPCODE(UC, SEND_FIRST),
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 9c6317cf80d5..7834285c8498 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -985,6 +985,7 @@ enum ib_wc_opcode {
IB_WC_REG_MR,
IB_WC_MASKED_COMP_SWAP,
IB_WC_MASKED_FETCH_ADD,
+ IB_WC_RDMA_ATOMIC_WRITE = IB_UVERBS_WC_RDMA_ATOMIC_WRITE,
/*
* Set value of IB_WC_RECV so consumers can test if a completion is a
* receive by testing (opcode & IB_WC_RECV).
@@ -1325,6 +1326,7 @@ enum ib_wr_opcode {
IB_UVERBS_WR_MASKED_ATOMIC_CMP_AND_SWP,
IB_WR_MASKED_ATOMIC_FETCH_AND_ADD =
IB_UVERBS_WR_MASKED_ATOMIC_FETCH_AND_ADD,
+ IB_WR_RDMA_ATOMIC_WRITE = IB_UVERBS_WR_RDMA_ATOMIC_WRITE,
/* These are kernel only and can not be issued by userspace */
IB_WR_REG_MR = 0x20,
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 7dd903d932e5..175ade79e358 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -466,6 +466,7 @@ enum ib_uverbs_wc_opcode {
IB_UVERBS_WC_BIND_MW = 5,
IB_UVERBS_WC_LOCAL_INV = 6,
IB_UVERBS_WC_TSO = 7,
+ IB_UVERBS_WC_RDMA_ATOMIC_WRITE = 9,
};
struct ib_uverbs_wc {
@@ -784,6 +785,7 @@ enum ib_uverbs_wr_opcode {
IB_UVERBS_WR_RDMA_READ_WITH_INV = 11,
IB_UVERBS_WR_MASKED_ATOMIC_CMP_AND_SWP = 12,
IB_UVERBS_WR_MASKED_ATOMIC_FETCH_AND_ADD = 13,
+ IB_UVERBS_WR_RDMA_ATOMIC_WRITE = 15,
/* Review enum ib_wr_opcode before modifying this */
};
diff --git a/include/uapi/rdma/rdma_user_rxe.h b/include/uapi/rdma/rdma_user_rxe.h
index f09c5c9e3dd5..845da9cb04fd 100644
--- a/include/uapi/rdma/rdma_user_rxe.h
+++ b/include/uapi/rdma/rdma_user_rxe.h
@@ -86,6 +86,7 @@ struct rxe_send_wr {
__aligned_u64 remote_addr;
__u32 rkey;
__u32 reserved;
+ __u8 atomic_wr[8];
} rdma;
struct {
__aligned_u64 remote_addr;
--
2.34.1
next prev parent reply other threads:[~2022-07-08 4:02 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-07-08 4:02 [RESEND PATCH v5 0/2] RDMA/rxe: Add RDMA Atomic Write operation yangx.jy
2022-07-08 4:02 ` yangx.jy [this message]
2022-09-23 22:22 ` [RESEND PATCH v5 1/2] RDMA/rxe: Support " Jason Gunthorpe
2022-09-26 6:55 ` Yang, Xiao/杨 晓
2022-10-06 7:53 ` Yang, Xiao/杨 晓
2022-10-14 16:00 ` Jason Gunthorpe
2022-10-20 13:25 ` Yang, Xiao/杨 晓
2022-10-24 17:02 ` Jason Gunthorpe
2022-09-27 8:18 ` Li Zhijian
2022-09-27 13:17 ` Jason Gunthorpe
2022-09-29 3:58 ` Yang, Xiao/杨 晓
2022-09-29 5:36 ` Yang, Xiao/杨 晓
2022-07-08 4:02 ` [RESEND PATCH v5 2/2] RDMA/rxe: Add RDMA Atomic Write attribute for rxe device yangx.jy
2022-08-04 7:30 ` [RESEND PATCH v5 0/2] RDMA/rxe: Add RDMA Atomic Write operation yangx.jy
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220708040228.6703-2-yangx.jy@fujitsu.com \
--to=yangx.jy@fujitsu.com \
--cc=jgg@ziepe.ca \
--cc=leon@kernel.org \
--cc=linux-rdma@vger.kernel.org \
--cc=lizhijian@fujitsu.com \
--cc=rpearsonhpe@gmail.com \
--cc=zyjzyj2000@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox