Linux-HyperV List
 help / color / mirror / Atom feed
* [PATCH v3 02/10] IB/rdmavt: Don't abuse udata and ib_respond_udata()
From: Jason Gunthorpe @ 2026-05-12  0:09 UTC (permalink / raw)
  To: Abhijit Gangurde, Allen Hubbe,
	Broadcom internal kernel review list, Bernard Metzler,
	Potnuri Bharat Teja, Bryan Tan, Cheng Xu, Dennis Dalessandro,
	Gal Pressman, Junxian Huang, Kai Shen, Kalesh AP,
	Konstantin Taranov, Krzysztof Czurylo, Leon Romanovsky,
	linux-hyperv, linux-rdma, Long Li, Michal Kalderon,
	Michael Margolin, Nelson Escobar, Satish Kharat, Selvin Xavier,
	Yossi Leybovich, Chengchang Tang, Tatyana Nikolova, Vishnu Dasa,
	Yishai Hadas
  Cc: patches
In-Reply-To: <0-v3-4effdebad75a+e1-rdma_udata_rep_jgg@nvidia.com>

Use copy_to_user() directly since the data is not being placed in the
udata response memory.

It is unclear why this is trying to do two copies, but leave it alone.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rdmavt/srq.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/drivers/infiniband/sw/rdmavt/srq.c b/drivers/infiniband/sw/rdmavt/srq.c
index fe125bf85b2726..d022aa56c5bfd5 100644
--- a/drivers/infiniband/sw/rdmavt/srq.c
+++ b/drivers/infiniband/sw/rdmavt/srq.c
@@ -128,6 +128,7 @@ int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
 	struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq);
 	struct rvt_dev_info *dev = ib_to_rvt(ibsrq->device);
 	struct rvt_rq tmp_rq = {};
+	__u64 offset_addr;
 	int ret = 0;
 
 	if (attr_mask & IB_SRQ_MAX_WR) {
@@ -149,19 +150,17 @@ int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
 			return -ENOMEM;
 		/* Check that we can write the offset to mmap. */
 		if (udata && udata->inlen >= sizeof(__u64)) {
-			__u64 offset_addr;
 			__u64 offset = 0;
 
 			ret = ib_copy_from_udata(&offset_addr, udata,
 						 sizeof(offset_addr));
 			if (ret)
 				goto bail_free;
-			udata->outbuf = (void __user *)
-					(unsigned long)offset_addr;
-			ret = ib_copy_to_udata(udata, &offset,
-					       sizeof(offset));
-			if (ret)
+			if (copy_to_user(u64_to_user_ptr(offset_addr), &offset,
+					 sizeof(offset))) {
+				ret = -EFAULT;
 				goto bail_free;
+			}
 		}
 
 		spin_lock_irq(&srq->rq.kwq->c_lock);
@@ -236,10 +235,10 @@ int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
 			 * See rvt_mmap() for details.
 			 */
 			if (udata && udata->inlen >= sizeof(__u64)) {
-				ret = ib_copy_to_udata(udata, &ip->offset,
-						       sizeof(ip->offset));
-				if (ret)
-					return ret;
+				if (copy_to_user(u64_to_user_ptr(offset_addr),
+						 &ip->offset,
+						 sizeof(ip->offset)))
+					return -EFAULT;
 			}
 
 			/*
-- 
2.43.0


^ permalink raw reply related

* [PATCH v3 06/10] RDMA/qedr: Replace qedr_ib_copy_to_udata() with ib_respond_udata()
From: Jason Gunthorpe @ 2026-05-12  0:09 UTC (permalink / raw)
  To: Abhijit Gangurde, Allen Hubbe,
	Broadcom internal kernel review list, Bernard Metzler,
	Potnuri Bharat Teja, Bryan Tan, Cheng Xu, Dennis Dalessandro,
	Gal Pressman, Junxian Huang, Kai Shen, Kalesh AP,
	Konstantin Taranov, Krzysztof Czurylo, Leon Romanovsky,
	linux-hyperv, linux-rdma, Long Li, Michal Kalderon,
	Michael Margolin, Nelson Escobar, Satish Kharat, Selvin Xavier,
	Yossi Leybovich, Chengchang Tang, Tatyana Nikolova, Vishnu Dasa,
	Yishai Hadas
  Cc: patches
In-Reply-To: <0-v3-4effdebad75a+e1-rdma_udata_rep_jgg@nvidia.com>

This is another instance of the min() pattern.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/hw/qedr/verbs.c | 35 +++++-------------------------
 1 file changed, 6 insertions(+), 29 deletions(-)

diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 3b86ea1cf88883..79190c5b8b50b0 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -64,14 +64,6 @@ enum {
 	QEDR_USER_MMAP_PHYS_PAGE,
 };
 
-static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src,
-					size_t len)
-{
-	size_t min_len = min_t(size_t, len, udata->outlen);
-
-	return ib_copy_to_udata(udata, src, min_len);
-}
-
 int qedr_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey)
 {
 	if (index >= QEDR_ROCE_PKEY_TABLE_LEN)
@@ -340,7 +332,7 @@ int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
 	uresp.sges_per_srq_wr = dev->attr.max_srq_sge;
 	uresp.max_cqes = QEDR_MAX_CQES;
 
-	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+	rc = ib_respond_udata(udata, uresp);
 	if (rc)
 		goto err;
 
@@ -459,9 +451,8 @@ int qedr_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 		struct qedr_ucontext *context = rdma_udata_to_drv_context(
 			udata, struct qedr_ucontext, ibucontext);
 
-		rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+		rc = ib_respond_udata(udata, uresp);
 		if (rc) {
-			DP_ERR(dev, "copy error pd_id=0x%x.\n", pd_id);
 			dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd_id);
 			return rc;
 		}
@@ -696,12 +687,10 @@ static void qedr_db_recovery_del(struct qedr_dev *dev,
 	dev->ops->common->db_recovery_del(dev->cdev, db_addr, db_data);
 }
 
-static int qedr_copy_cq_uresp(struct qedr_dev *dev,
-			      struct qedr_cq *cq, struct ib_udata *udata,
+static int qedr_copy_cq_uresp(struct qedr_cq *cq, struct ib_udata *udata,
 			      u32 db_offset)
 {
 	struct qedr_create_cq_uresp uresp;
-	int rc;
 
 	memset(&uresp, 0, sizeof(uresp));
 
@@ -711,11 +700,7 @@ static int qedr_copy_cq_uresp(struct qedr_dev *dev,
 		uresp.db_rec_addr =
 			rdma_user_mmap_get_offset(cq->q.db_mmap_entry);
 
-	rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
-	if (rc)
-		DP_ERR(dev, "copy error cqid=0x%x.\n", cq->icid);
-
-	return rc;
+	return ib_respond_udata(udata, uresp);
 }
 
 static void consume_cqe(struct qedr_cq *cq)
@@ -994,7 +979,7 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 	spin_lock_init(&cq->cq_lock);
 
 	if (udata) {
-		rc = qedr_copy_cq_uresp(dev, cq, udata, db_offset);
+		rc = qedr_copy_cq_uresp(cq, udata, db_offset);
 		if (rc)
 			goto err2;
 
@@ -1298,8 +1283,6 @@ static int qedr_copy_qp_uresp(struct qedr_dev *dev,
 			      struct qedr_qp *qp, struct ib_udata *udata,
 			      struct qedr_create_qp_uresp *uresp)
 {
-	int rc;
-
 	memset(uresp, 0, sizeof(*uresp));
 
 	if (qedr_qp_has_sq(qp))
@@ -1311,13 +1294,7 @@ static int qedr_copy_qp_uresp(struct qedr_dev *dev,
 	uresp->atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE;
 	uresp->qp_id = qp->qp_id;
 
-	rc = qedr_ib_copy_to_udata(udata, uresp, sizeof(*uresp));
-	if (rc)
-		DP_ERR(dev,
-		       "create qp: failed a copy to user space with qp icid=0x%x.\n",
-		       qp->icid);
-
-	return rc;
+	return ib_respond_udata(udata, *uresp);
 }
 
 static void qedr_reset_qp_hwq_info(struct qedr_qp_hwq_info *qph)
-- 
2.43.0


^ permalink raw reply related

* [PATCH v3 10/10] RDMA: Replace memset with = {} pattern for ib_respond_udata()
From: Jason Gunthorpe @ 2026-05-12  0:09 UTC (permalink / raw)
  To: Abhijit Gangurde, Allen Hubbe,
	Broadcom internal kernel review list, Bernard Metzler,
	Potnuri Bharat Teja, Bryan Tan, Cheng Xu, Dennis Dalessandro,
	Gal Pressman, Junxian Huang, Kai Shen, Kalesh AP,
	Konstantin Taranov, Krzysztof Czurylo, Leon Romanovsky,
	linux-hyperv, linux-rdma, Long Li, Michal Kalderon,
	Michael Margolin, Nelson Escobar, Satish Kharat, Selvin Xavier,
	Yossi Leybovich, Chengchang Tang, Tatyana Nikolova, Vishnu Dasa,
	Yishai Hadas
  Cc: patches
In-Reply-To: <0-v3-4effdebad75a+e1-rdma_udata_rep_jgg@nvidia.com>

Most drivers do this already, but some open-code a memset. Switch
all instances found. qedr_copy_qp_uresp() is already called with
zeroed memory so that memset is redundant.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/hw/cxgb4/cq.c             |  3 +--
 drivers/infiniband/hw/cxgb4/qp.c             |  6 ++----
 drivers/infiniband/hw/erdma/erdma_verbs.c    |  4 +---
 drivers/infiniband/hw/ocrdma/ocrdma_verbs.c  | 12 ++++--------
 drivers/infiniband/hw/qedr/verbs.c           |  6 +-----
 drivers/infiniband/hw/usnic/usnic_ib_verbs.c |  4 +---
 6 files changed, 10 insertions(+), 25 deletions(-)

diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index 47508df4cec023..d1517f2560b981 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -1004,7 +1004,7 @@ int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 	struct c4iw_dev *rhp = to_c4iw_dev(ibcq->device);
 	struct c4iw_cq *chp = to_c4iw_cq(ibcq);
 	struct c4iw_create_cq ucmd;
-	struct c4iw_create_cq_resp uresp;
+	struct c4iw_create_cq_resp uresp = {};
 	int ret, wr_len;
 	size_t memsize, hwentries;
 	struct c4iw_mm_entry *mm, *mm2;
@@ -1102,7 +1102,6 @@ int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 		if (!mm2)
 			goto err_free_mm;
 
-		memset(&uresp, 0, sizeof(uresp));
 		uresp.qid_mask = rhp->rdev.cqmask;
 		uresp.cqid = chp->cq.cqid;
 		uresp.size = chp->cq.size;
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index f9c7030ac6bfd0..e295f79e0cd3e5 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -2120,7 +2120,7 @@ int c4iw_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *attrs,
 	struct c4iw_pd *php;
 	struct c4iw_cq *schp;
 	struct c4iw_cq *rchp;
-	struct c4iw_create_qp_resp uresp;
+	struct c4iw_create_qp_resp uresp = {};
 	unsigned int sqsize, rqsize = 0;
 	struct c4iw_ucontext *ucontext = rdma_udata_to_drv_context(
 		udata, struct c4iw_ucontext, ibucontext);
@@ -2242,7 +2242,6 @@ int c4iw_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *attrs,
 				goto err_free_sq_db_key;
 			}
 		}
-		memset(&uresp, 0, sizeof(uresp));
 		if (t4_sq_onchip(&qhp->wq.sq)) {
 			ma_sync_key_mm = kmalloc_obj(*ma_sync_key_mm);
 			if (!ma_sync_key_mm) {
@@ -2686,7 +2685,7 @@ int c4iw_create_srq(struct ib_srq *ib_srq, struct ib_srq_init_attr *attrs,
 	struct c4iw_dev *rhp;
 	struct c4iw_srq *srq = to_c4iw_srq(ib_srq);
 	struct c4iw_pd *php;
-	struct c4iw_create_srq_resp uresp;
+	struct c4iw_create_srq_resp uresp = {};
 	struct c4iw_ucontext *ucontext;
 	struct c4iw_mm_entry *srq_key_mm, *srq_db_key_mm;
 	int rqsize;
@@ -2764,7 +2763,6 @@ int c4iw_create_srq(struct ib_srq *ib_srq, struct ib_srq_init_attr *attrs,
 			ret = -ENOMEM;
 			goto err_free_srq_key_mm;
 		}
-		memset(&uresp, 0, sizeof(uresp));
 		uresp.flags = srq->flags;
 		uresp.qid_mask = rhp->rdev.qpmask;
 		uresp.srqid = srq->wq.qid;
diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c
index c8a35337ba51e8..b59c2e3a5306d1 100644
--- a/drivers/infiniband/hw/erdma/erdma_verbs.c
+++ b/drivers/infiniband/hw/erdma/erdma_verbs.c
@@ -996,7 +996,7 @@ int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
 	struct erdma_ucontext *uctx = rdma_udata_to_drv_context(
 		udata, struct erdma_ucontext, ibucontext);
 	struct erdma_ureq_create_qp ureq;
-	struct erdma_uresp_create_qp uresp;
+	struct erdma_uresp_create_qp uresp = {};
 	void *old_entry;
 	int ret = 0;
 
@@ -1048,8 +1048,6 @@ int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
 		if (ret)
 			goto err_out_xa;
 
-		memset(&uresp, 0, sizeof(uresp));
-
 		uresp.num_sqe = qp->attrs.sq_size;
 		uresp.num_rqe = qp->attrs.rq_size;
 		uresp.qp_id = QP_ID(qp);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 2a174d0fe6ca1e..383f1d9c15d151 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -586,11 +586,10 @@ static int ocrdma_copy_pd_uresp(struct ocrdma_dev *dev, struct ocrdma_pd *pd,
 	u64 db_page_addr;
 	u64 dpp_page_addr = 0;
 	u32 db_page_size;
-	struct ocrdma_alloc_pd_uresp rsp;
+	struct ocrdma_alloc_pd_uresp rsp = {};
 	struct ocrdma_ucontext *uctx = rdma_udata_to_drv_context(
 		udata, struct ocrdma_ucontext, ibucontext);
 
-	memset(&rsp, 0, sizeof(rsp));
 	rsp.id = pd->id;
 	rsp.dpp_enabled = pd->dpp_enabled;
 	db_page_addr = ocrdma_get_db_addr(dev, pd->id);
@@ -930,13 +929,12 @@ static int ocrdma_copy_cq_uresp(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
 	int status;
 	struct ocrdma_ucontext *uctx = rdma_udata_to_drv_context(
 		udata, struct ocrdma_ucontext, ibucontext);
-	struct ocrdma_create_cq_uresp uresp;
+	struct ocrdma_create_cq_uresp uresp = {};
 
 	/* this must be user flow! */
 	if (!udata)
 		return -EINVAL;
 
-	memset(&uresp, 0, sizeof(uresp));
 	uresp.cq_id = cq->id;
 	uresp.page_size = PAGE_ALIGN(cq->len);
 	uresp.num_pages = 1;
@@ -1173,11 +1171,10 @@ static int ocrdma_copy_qp_uresp(struct ocrdma_qp *qp,
 {
 	int status;
 	u64 usr_db;
-	struct ocrdma_create_qp_uresp uresp;
+	struct ocrdma_create_qp_uresp uresp = {};
 	struct ocrdma_pd *pd = qp->pd;
 	struct ocrdma_dev *dev = get_ocrdma_dev(pd->ibpd.device);
 
-	memset(&uresp, 0, sizeof(uresp));
 	usr_db = dev->nic_info.unmapped_db +
 			(pd->id * dev->nic_info.db_page_size);
 	uresp.qp_id = qp->id;
@@ -1730,9 +1727,8 @@ static int ocrdma_copy_srq_uresp(struct ocrdma_dev *dev, struct ocrdma_srq *srq,
 				struct ib_udata *udata)
 {
 	int status;
-	struct ocrdma_create_srq_uresp uresp;
+	struct ocrdma_create_srq_uresp uresp = {};
 
-	memset(&uresp, 0, sizeof(uresp));
 	uresp.rq_dbid = srq->rq.dbid;
 	uresp.num_rq_pages = 1;
 	uresp.rq_page_addr[0] = virt_to_phys(srq->rq.va);
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 79190c5b8b50b0..1af908275ca729 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -690,9 +690,7 @@ static void qedr_db_recovery_del(struct qedr_dev *dev,
 static int qedr_copy_cq_uresp(struct qedr_cq *cq, struct ib_udata *udata,
 			      u32 db_offset)
 {
-	struct qedr_create_cq_uresp uresp;
-
-	memset(&uresp, 0, sizeof(uresp));
+	struct qedr_create_cq_uresp uresp = {};
 
 	uresp.db_offset = db_offset;
 	uresp.icid = cq->icid;
@@ -1283,8 +1281,6 @@ static int qedr_copy_qp_uresp(struct qedr_dev *dev,
 			      struct qedr_qp *qp, struct ib_udata *udata,
 			      struct qedr_create_qp_uresp *uresp)
 {
-	memset(uresp, 0, sizeof(*uresp));
-
 	if (qedr_qp_has_sq(qp))
 		qedr_copy_sq_uresp(dev, uresp, qp);
 
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
index e887f03a84d063..261f18a8368543 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -82,15 +82,13 @@ static void usnic_ib_fw_string_to_u64(char *fw_ver_str, u64 *fw_ver)
 static int usnic_ib_fill_create_qp_resp(struct usnic_ib_qp_grp *qp_grp,
 					struct ib_udata *udata)
 {
-	struct usnic_ib_create_qp_resp resp;
+	struct usnic_ib_create_qp_resp resp = {};
 	struct pci_dev *pdev;
 	struct vnic_dev_bar *bar;
 	struct usnic_vnic_res_chunk *chunk;
 	struct usnic_ib_qp_grp_flow *default_flow;
 	int i, err;
 
-	memset(&resp, 0, sizeof(resp));
-
 	pdev = usnic_vnic_get_pdev(qp_grp->vf->vnic);
 	if (!pdev) {
 		usnic_err("Failed to get pdev of qp_grp %d\n",
-- 
2.43.0


^ permalink raw reply related

* [PATCH v3 03/10] RDMA: Convert drivers using min to ib_respond_udata()
From: Jason Gunthorpe @ 2026-05-12  0:09 UTC (permalink / raw)
  To: Abhijit Gangurde, Allen Hubbe,
	Broadcom internal kernel review list, Bernard Metzler,
	Potnuri Bharat Teja, Bryan Tan, Cheng Xu, Dennis Dalessandro,
	Gal Pressman, Junxian Huang, Kai Shen, Kalesh AP,
	Konstantin Taranov, Krzysztof Czurylo, Leon Romanovsky,
	linux-hyperv, linux-rdma, Long Li, Michal Kalderon,
	Michael Margolin, Nelson Escobar, Satish Kharat, Selvin Xavier,
	Yossi Leybovich, Chengchang Tang, Tatyana Nikolova, Vishnu Dasa,
	Yishai Hadas
  Cc: patches
In-Reply-To: <0-v3-4effdebad75a+e1-rdma_udata_rep_jgg@nvidia.com>

Convert the pattern:

   ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen));

Using Coccinelle:

@@
identifier resp;
expression udata;
@@

- ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen))
+ ib_respond_udata(udata, resp)

@@
identifier resp;
expression udata;
@@

- ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp)))
+ ib_respond_udata(udata, resp)

Run another pass with AI to propagate the return code correctly and
remove redundant prints.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/hw/efa/efa_verbs.c        | 44 +++++-------------
 drivers/infiniband/hw/erdma/erdma_verbs.c    |  3 +-
 drivers/infiniband/hw/hns/hns_roce_ah.c      |  4 +-
 drivers/infiniband/hw/hns/hns_roce_cq.c      |  3 +-
 drivers/infiniband/hw/hns/hns_roce_main.c    |  3 +-
 drivers/infiniband/hw/hns/hns_roce_pd.c      |  8 ++--
 drivers/infiniband/hw/hns/hns_roce_qp.c      | 13 ++----
 drivers/infiniband/hw/hns/hns_roce_srq.c     |  6 +--
 drivers/infiniband/hw/irdma/verbs.c          | 48 +++++++-------------
 drivers/infiniband/hw/mana/cq.c              |  6 +--
 drivers/infiniband/hw/mana/qp.c              |  6 +--
 drivers/infiniband/hw/mlx5/srq.c             |  7 +--
 drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c |  8 ++--
 13 files changed, 49 insertions(+), 110 deletions(-)

diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c
index 3ad5d6e27b1590..395290ab05847a 100644
--- a/drivers/infiniband/hw/efa/efa_verbs.c
+++ b/drivers/infiniband/hw/efa/efa_verbs.c
@@ -270,13 +270,9 @@ int efa_query_device(struct ib_device *ibdev,
 		if (dev->neqs)
 			resp.device_caps |= EFA_QUERY_DEVICE_CAPS_CQ_NOTIFICATIONS;
 
-		err = ib_copy_to_udata(udata, &resp,
-				       min(sizeof(resp), udata->outlen));
-		if (err) {
-			ibdev_dbg(ibdev,
-				  "Failed to copy udata for query_device\n");
+		err = ib_respond_udata(udata, resp);
+		if (err)
 			return err;
-		}
 	}
 
 	return 0;
@@ -442,13 +438,9 @@ int efa_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 	resp.pdn = result.pdn;
 
 	if (udata->outlen) {
-		err = ib_copy_to_udata(udata, &resp,
-				       min(sizeof(resp), udata->outlen));
-		if (err) {
-			ibdev_dbg(&dev->ibdev,
-				  "Failed to copy udata for alloc_pd\n");
+		err = ib_respond_udata(udata, resp);
+		if (err)
 			goto err_dealloc_pd;
-		}
 	}
 
 	ibdev_dbg(&dev->ibdev, "Allocated pd[%d]\n", pd->pdn);
@@ -782,14 +774,9 @@ int efa_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr,
 	qp->max_inline_data = init_attr->cap.max_inline_data;
 
 	if (udata->outlen) {
-		err = ib_copy_to_udata(udata, &resp,
-				       min(sizeof(resp), udata->outlen));
-		if (err) {
-			ibdev_dbg(&dev->ibdev,
-				  "Failed to copy udata for qp[%u]\n",
-				  create_qp_resp.qp_num);
+		err = ib_respond_udata(udata, resp);
+		if (err)
 			goto err_remove_mmap_entries;
-		}
 	}
 
 	ibdev_dbg(&dev->ibdev, "Created qp[%d]\n", qp->ibqp.qp_num);
@@ -1226,13 +1213,9 @@ int efa_create_user_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 	}
 
 	if (udata->outlen) {
-		err = ib_copy_to_udata(udata, &resp,
-				       min(sizeof(resp), udata->outlen));
-		if (err) {
-			ibdev_dbg(ibdev,
-				  "Failed to copy udata for create_cq\n");
+		err = ib_respond_udata(udata, resp);
+		if (err)
 			goto err_xa_erase;
-		}
 	}
 
 	ibdev_dbg(ibdev, "Created cq[%d], cq depth[%u]. dma[%pad] virt[0x%p]\n",
@@ -1935,8 +1918,7 @@ int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata)
 	resp.max_tx_batch = dev->dev_attr.max_tx_batch;
 	resp.min_sq_wr = dev->dev_attr.min_sq_depth;
 
-	err = ib_copy_to_udata(udata, &resp,
-			       min(sizeof(resp), udata->outlen));
+	err = ib_respond_udata(udata, resp);
 	if (err)
 		goto err_dealloc_uar;
 
@@ -2087,13 +2069,9 @@ int efa_create_ah(struct ib_ah *ibah,
 	resp.efa_address_handle = result.ah;
 
 	if (udata->outlen) {
-		err = ib_copy_to_udata(udata, &resp,
-				       min(sizeof(resp), udata->outlen));
-		if (err) {
-			ibdev_dbg(&dev->ibdev,
-				  "Failed to copy udata for create_ah response\n");
+		err = ib_respond_udata(udata, resp);
+		if (err)
 			goto err_destroy_ah;
-		}
 	}
 	ibdev_dbg(&dev->ibdev, "Created ah[%d]\n", ah->ah);
 
diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c
index 5523b4e151e1ff..9bba470c6e3257 100644
--- a/drivers/infiniband/hw/erdma/erdma_verbs.c
+++ b/drivers/infiniband/hw/erdma/erdma_verbs.c
@@ -1990,8 +1990,7 @@ int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 		uresp.cq_id = cq->cqn;
 		uresp.num_cqe = depth;
 
-		ret = ib_copy_to_udata(udata, &uresp,
-				       min(sizeof(uresp), udata->outlen));
+		ret = ib_respond_udata(udata, uresp);
 		if (ret)
 			goto err_free_res;
 	} else {
diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c
index 8a605da8a93c97..925ddf15b68102 100644
--- a/drivers/infiniband/hw/hns/hns_roce_ah.c
+++ b/drivers/infiniband/hw/hns/hns_roce_ah.c
@@ -32,6 +32,7 @@
 
 #include <rdma/ib_addr.h>
 #include <rdma/ib_cache.h>
+#include <rdma/uverbs_ioctl.h>
 #include "hns_roce_device.h"
 #include "hns_roce_hw_v2.h"
 
@@ -112,8 +113,7 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
 		resp.priority = ah->av.sl;
 		resp.tc_mode = tc_mode;
 		memcpy(resp.dmac, ah_attr->roce.dmac, ETH_ALEN);
-		ret = ib_copy_to_udata(udata, &resp,
-				       min(udata->outlen, sizeof(resp)));
+		ret = ib_respond_udata(udata, resp);
 	}
 
 err_out:
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
index 621568e114054b..24de651f735e03 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -452,8 +452,7 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
 
 	if (udata) {
 		resp.cqn = hr_cq->cqn;
-		ret = ib_copy_to_udata(udata, &resp,
-				       min(udata->outlen, sizeof(resp)));
+		ret = ib_respond_udata(udata, resp);
 		if (ret)
 			goto err_cqc;
 	}
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 0dbe99aab6ad21..c17ff5347a0147 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -477,8 +477,7 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
 
 	resp.cqe_size = hr_dev->caps.cqe_sz;
 
-	ret = ib_copy_to_udata(udata, &resp,
-			       min(udata->outlen, sizeof(resp)));
+	ret = ib_respond_udata(udata, resp);
 	if (ret)
 		goto error_fail_copy_to_udata;
 
diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c
index 225c3e328e0e08..73bb000574c50d 100644
--- a/drivers/infiniband/hw/hns/hns_roce_pd.c
+++ b/drivers/infiniband/hw/hns/hns_roce_pd.c
@@ -30,6 +30,7 @@
  * SOFTWARE.
  */
 
+#include <rdma/uverbs_ioctl.h>
 #include "hns_roce_device.h"
 
 void hns_roce_init_pd_table(struct hns_roce_dev *hr_dev)
@@ -61,12 +62,9 @@ int hns_roce_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 	if (udata) {
 		struct hns_roce_ib_alloc_pd_resp resp = {.pdn = pd->pdn};
 
-		ret = ib_copy_to_udata(udata, &resp,
-				       min(udata->outlen, sizeof(resp)));
-		if (ret) {
+		ret = ib_respond_udata(udata, resp);
+		if (ret)
 			ida_free(&pd_ida->ida, id);
-			ibdev_err(ib_dev, "failed to copy to udata, ret = %d\n", ret);
-		}
 	}
 
 	return ret;
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index bf04ee84a94392..e333a8c4acb52c 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -1236,12 +1236,9 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
 
 	if (udata) {
 		resp.cap_flags = hr_qp->en_flags;
-		ret = ib_copy_to_udata(udata, &resp,
-				       min(udata->outlen, sizeof(resp)));
-		if (ret) {
-			ibdev_err(ibdev, "copy qp resp failed!\n");
+		ret = ib_respond_udata(udata, resp);
+		if (ret)
 			goto err_flow_ctrl;
-		}
 	}
 
 	if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) {
@@ -1494,11 +1491,7 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	if (udata && udata->outlen) {
 		resp.tc_mode = hr_qp->tc_mode;
 		resp.priority = hr_qp->sl;
-		ret = ib_copy_to_udata(udata, &resp,
-				       min(udata->outlen, sizeof(resp)));
-		if (ret)
-			ibdev_err_ratelimited(&hr_dev->ib_dev,
-					      "failed to copy modify qp resp.\n");
+		ret = ib_respond_udata(udata, resp);
 	}
 
 out:
diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c
index 8b94cbdfa54dfa..241fc9980f4f51 100644
--- a/drivers/infiniband/hw/hns/hns_roce_srq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_srq.c
@@ -477,11 +477,9 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
 	if (udata) {
 		resp.cap_flags = srq->cap_flags;
 		resp.srqn = srq->srqn;
-		if (ib_copy_to_udata(udata, &resp,
-				     min(udata->outlen, sizeof(resp)))) {
-			ret = -EFAULT;
+		ret = ib_respond_udata(udata, resp);
+		if (ret)
 			goto err_srqc;
-		}
 	}
 
 	return 0;
diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
index 17086048d2d7fc..79e72a457e7983 100644
--- a/drivers/infiniband/hw/irdma/verbs.c
+++ b/drivers/infiniband/hw/irdma/verbs.c
@@ -325,9 +325,9 @@ static int irdma_alloc_ucontext(struct ib_ucontext *uctx,
 		uresp.max_pds = iwdev->rf->sc_dev.hw_attrs.max_hw_pds;
 		uresp.wq_size = iwdev->rf->sc_dev.hw_attrs.max_qp_wr * 2;
 		uresp.kernel_ver = req.userspace_ver;
-		if (ib_copy_to_udata(udata, &uresp,
-				     min(sizeof(uresp), udata->outlen)))
-			return -EFAULT;
+		ret = ib_respond_udata(udata, uresp);
+		if (ret)
+			return ret;
 	} else {
 		u64 bar_off = (uintptr_t)iwdev->rf->sc_dev.hw_regs[IRDMA_DB_ADDR_OFFSET];
 
@@ -354,10 +354,10 @@ static int irdma_alloc_ucontext(struct ib_ucontext *uctx,
 		uresp.comp_mask |= IRDMA_ALLOC_UCTX_MIN_HW_WQ_SIZE;
 		uresp.max_hw_srq_quanta = uk_attrs->max_hw_srq_quanta;
 		uresp.comp_mask |= IRDMA_ALLOC_UCTX_MAX_HW_SRQ_QUANTA;
-		if (ib_copy_to_udata(udata, &uresp,
-				     min(sizeof(uresp), udata->outlen))) {
+		ret = ib_respond_udata(udata, uresp);
+		if (ret) {
 			rdma_user_mmap_entry_remove(ucontext->db_mmap_entry);
-			return -EFAULT;
+			return ret;
 		}
 	}
 
@@ -420,11 +420,9 @@ static int irdma_alloc_pd(struct ib_pd *pd, struct ib_udata *udata)
 						  ibucontext);
 		irdma_sc_pd_init(dev, sc_pd, pd_id, ucontext->abi_ver);
 		uresp.pd_id = pd_id;
-		if (ib_copy_to_udata(udata, &uresp,
-				     min(sizeof(uresp), udata->outlen))) {
-			err = -EFAULT;
+		err = ib_respond_udata(udata, uresp);
+		if (err)
 			goto error;
-		}
 	} else {
 		irdma_sc_pd_init(dev, sc_pd, pd_id, IRDMA_ABI_VER);
 	}
@@ -1124,10 +1122,8 @@ static int irdma_create_qp(struct ib_qp *ibqp,
 		uresp.qp_id = qp_num;
 		uresp.qp_caps = qp->qp_uk.qp_caps;
 
-		err_code = ib_copy_to_udata(udata, &uresp,
-					    min(sizeof(uresp), udata->outlen));
+		err_code = ib_respond_udata(udata, uresp);
 		if (err_code) {
-			ibdev_dbg(&iwdev->ibdev, "VERBS: copy_to_udata failed\n");
 			irdma_destroy_qp(&iwqp->ibqp, udata);
 			return err_code;
 		}
@@ -1612,12 +1608,9 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 				uresp.push_valid = 1;
 				uresp.push_offset = iwqp->sc_qp.push_offset;
 			}
-			ret = ib_copy_to_udata(udata, &uresp, min(sizeof(uresp),
-					       udata->outlen));
+			ret = ib_respond_udata(udata, uresp);
 			if (ret) {
 				irdma_remove_push_mmap_entries(iwqp);
-				ibdev_dbg(&iwdev->ibdev,
-					  "VERBS: copy_to_udata failed\n");
 				return ret;
 			}
 		}
@@ -1860,12 +1853,9 @@ int irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
 			uresp.push_offset = iwqp->sc_qp.push_offset;
 		}
 
-		err = ib_copy_to_udata(udata, &uresp, min(sizeof(uresp),
-				       udata->outlen));
+		err = ib_respond_udata(udata, uresp);
 		if (err) {
 			irdma_remove_push_mmap_entries(iwqp);
-			ibdev_dbg(&iwdev->ibdev,
-				  "VERBS: copy_to_udata failed\n");
 			return err;
 		}
 	}
@@ -2418,11 +2408,9 @@ static int irdma_create_srq(struct ib_srq *ibsrq,
 
 		resp.srq_id = iwsrq->srq_num;
 		resp.srq_size = ukinfo->srq_size;
-		if (ib_copy_to_udata(udata, &resp,
-				     min(sizeof(resp), udata->outlen))) {
-			err_code = -EPROTO;
+		err_code = ib_respond_udata(udata, resp);
+		if (err_code)
 			goto srq_destroy;
-		}
 	}
 
 	return 0;
@@ -2664,13 +2652,9 @@ static int irdma_create_cq(struct ib_cq *ibcq,
 
 		resp.cq_id = info.cq_uk_init_info.cq_id;
 		resp.cq_size = info.cq_uk_init_info.cq_size;
-		if (ib_copy_to_udata(udata, &resp,
-				     min(sizeof(resp), udata->outlen))) {
-			ibdev_dbg(&iwdev->ibdev,
-				  "VERBS: copy to user data\n");
-			err_code = -EPROTO;
+		err_code = ib_respond_udata(udata, resp);
+		if (err_code)
 			goto cq_destroy;
-		}
 	}
 
 	init_completion(&iwcq->free_cq);
@@ -5330,7 +5314,7 @@ static int irdma_create_user_ah(struct ib_ah *ibah,
 	mutex_unlock(&iwdev->rf->ah_tbl_lock);
 
 	uresp.ah_id = ah->sc_ah.ah_info.ah_idx;
-	err = ib_copy_to_udata(udata, &uresp, min(sizeof(uresp), udata->outlen));
+	err = ib_respond_udata(udata, uresp);
 	if (err)
 		irdma_destroy_ah(ibah, attr->flags);
 
diff --git a/drivers/infiniband/hw/mana/cq.c b/drivers/infiniband/hw/mana/cq.c
index 2d682428ef202a..f2547989f42290 100644
--- a/drivers/infiniband/hw/mana/cq.c
+++ b/drivers/infiniband/hw/mana/cq.c
@@ -79,11 +79,9 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 
 	if (udata) {
 		resp.cqid = cq->queue.id;
-		err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen));
-		if (err) {
-			ibdev_dbg(&mdev->ib_dev, "Failed to copy to udata, %d\n", err);
+		err = ib_respond_udata(udata, resp);
+		if (err)
 			goto err_remove_cq_cb;
-		}
 	}
 
 	spin_lock_init(&cq->cq_lock);
diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c
index 0fbcf449c134b5..ecf5910dbf0702 100644
--- a/drivers/infiniband/hw/mana/qp.c
+++ b/drivers/infiniband/hw/mana/qp.c
@@ -557,11 +557,9 @@ static int mana_ib_create_rc_qp(struct ib_qp *ibqp, struct ib_pd *ibpd,
 			resp.queue_id[j] = qp->rc_qp.queues[i].id;
 			j++;
 		}
-		err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen));
-		if (err) {
-			ibdev_dbg(&mdev->ib_dev, "Failed to copy to udata, %d\n", err);
+		err = ib_respond_udata(udata, resp);
+		if (err)
 			goto destroy_qp;
-		}
 	}
 
 	err = mana_table_store_qp(mdev, qp);
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 852f6f502d14d0..3fb8519a4ce0d7 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -292,12 +292,9 @@ int mlx5_ib_create_srq(struct ib_srq *ib_srq,
 			.srqn = srq->msrq.srqn,
 		};
 
-		if (ib_copy_to_udata(udata, &resp, min(udata->outlen,
-				     sizeof(resp)))) {
-			mlx5_ib_dbg(dev, "copy to user failed\n");
-			err = -EFAULT;
+		err = ib_respond_udata(udata, resp);
+		if (err)
 			goto err_core;
-		}
 	}
 
 	init_attr->attr.max_wr = srq->msrq.max - 1;
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
index 16aab967a20308..cefcb243c3a6f2 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
@@ -406,12 +406,10 @@ int pvrdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr,
 		qp_resp.qpn = qp->ibqp.qp_num;
 		qp_resp.qp_handle = qp->qp_handle;
 
-		if (ib_copy_to_udata(udata, &qp_resp,
-				     min(udata->outlen, sizeof(qp_resp)))) {
-			dev_warn(&dev->pdev->dev,
-				 "failed to copy back udata\n");
+		ret = ib_respond_udata(udata, qp_resp);
+		if (ret) {
 			__pvrdma_destroy_qp(dev, qp);
-			return -EINVAL;
+			return ret;
 		}
 	}
 
-- 
2.43.0


^ permalink raw reply related

* [PATCH v3 05/10] RDMA/cxgb4: Convert to ib_respond_udata()
From: Jason Gunthorpe @ 2026-05-12  0:09 UTC (permalink / raw)
  To: Abhijit Gangurde, Allen Hubbe,
	Broadcom internal kernel review list, Bernard Metzler,
	Potnuri Bharat Teja, Bryan Tan, Cheng Xu, Dennis Dalessandro,
	Gal Pressman, Junxian Huang, Kai Shen, Kalesh AP,
	Konstantin Taranov, Krzysztof Czurylo, Leon Romanovsky,
	linux-hyperv, linux-rdma, Long Li, Michal Kalderon,
	Michael Margolin, Nelson Escobar, Satish Kharat, Selvin Xavier,
	Yossi Leybovich, Chengchang Tang, Tatyana Nikolova, Vishnu Dasa,
	Yishai Hadas
  Cc: patches
In-Reply-To: <0-v3-4effdebad75a+e1-rdma_udata_rep_jgg@nvidia.com>

These cases carefully work around 32-bit unpadded structures, but
the min integrated into ib_respond_udata() handles this
automatically. Zero-initialize data that would not have been copied.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/hw/cxgb4/cq.c       | 8 +++-----
 drivers/infiniband/hw/cxgb4/provider.c | 5 ++---
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index e31fb9134aa818..47508df4cec023 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -1115,13 +1115,11 @@ int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
 		/* communicate to the userspace that
 		 * kernel driver supports 64B CQE
 		 */
-		uresp.flags |= C4IW_64B_CQE;
+		if (!ucontext->is_32b_cqe)
+			uresp.flags |= C4IW_64B_CQE;
 
 		spin_unlock(&ucontext->mmap_lock);
-		ret = ib_copy_to_udata(udata, &uresp,
-				       ucontext->is_32b_cqe ?
-				       sizeof(uresp) - sizeof(uresp.flags) :
-				       sizeof(uresp));
+		ret = ib_respond_udata(udata, uresp);
 		if (ret)
 			goto err_free_mm2;
 
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index a119e8793aef40..0e3827022c63da 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -80,7 +80,7 @@ static int c4iw_alloc_ucontext(struct ib_ucontext *ucontext,
 	struct ib_device *ibdev = ucontext->device;
 	struct c4iw_ucontext *context = to_c4iw_ucontext(ucontext);
 	struct c4iw_dev *rhp = to_c4iw_dev(ibdev);
-	struct c4iw_alloc_ucontext_resp uresp;
+	struct c4iw_alloc_ucontext_resp uresp = {};
 	int ret = 0;
 	struct c4iw_mm_entry *mm = NULL;
 
@@ -106,8 +106,7 @@ static int c4iw_alloc_ucontext(struct ib_ucontext *ucontext,
 		context->key += PAGE_SIZE;
 		spin_unlock(&context->mmap_lock);
 
-		ret = ib_copy_to_udata(udata, &uresp,
-				       sizeof(uresp) - sizeof(uresp.reserved));
+		ret = ib_respond_udata(udata, uresp);
 		if (ret)
 			goto err_mm;
 
-- 
2.43.0


^ permalink raw reply related

* [PATCH v3 00/10] Convert all drivers to the new udata response flow
From: Jason Gunthorpe @ 2026-05-12  0:09 UTC (permalink / raw)
  To: Abhijit Gangurde, Allen Hubbe,
	Broadcom internal kernel review list, Bernard Metzler,
	Potnuri Bharat Teja, Bryan Tan, Cheng Xu, Dennis Dalessandro,
	Gal Pressman, Junxian Huang, Kai Shen, Kalesh AP,
	Konstantin Taranov, Krzysztof Czurylo, Leon Romanovsky,
	linux-hyperv, linux-rdma, Long Li, Michal Kalderon,
	Michael Margolin, Nelson Escobar, Satish Kharat, Selvin Xavier,
	Yossi Leybovich, Chengchang Tang, Tatyana Nikolova, Vishnu Dasa,
	Yishai Hadas
  Cc: patches

Go through the drivers and migrate them to use ib_respond_udata(). Remove
debugging prints on failure paths.  Ensure the error propagates from
ib_respond_udata(). Use the = {} pattern to initialize the uresp.

There are a couple of oddball cases which are fixed up in their own
commits, but otherwise this is fairly straightforward.

v3:
 - Rebased to v7.1-rc3 which has the error flow bugs, now removed from
   this series
v2: https://patch.msgid.link/r/0-v2-1c49eeb88c48+91-rdma_udata_rep_jgg@nvidia.com
 - More patches fixing the pre-existing error flow bugs
   found by Sashiko. I left the rvt issue behind because it was too broken
   and scary.
v1: https://patch.msgid.link/r/0-v1-e911b76a94d1+65d95-rdma_udata_rep_jgg@nvidia.com

Jason Gunthorpe (10):
  RDMA: Use ib_is_udata_in_empty() for places calling
    ib_is_udata_cleared()
  IB/rdmavt: Don't abuse udata and ib_respond_udata()
  RDMA: Convert drivers using min to ib_respond_udata()
  RDMA: Convert drivers using sizeof() to ib_respond_udata()
  RDMA/cxgb4: Convert to ib_respond_udata()
  RDMA/qedr: Replace qedr_ib_copy_to_udata() with ib_respond_udata()
  RDMA/mlx: Replace response_len with ib_respond_udata()
  RDMA: Use proper driver data response structs instead of open coding
  RDMA: Add missed = {} initialization to uresp structs
  RDMA: Replace memset with = {} pattern for ib_respond_udata()

 drivers/infiniband/hw/bnxt_re/ib_verbs.c      |  2 +-
 drivers/infiniband/hw/cxgb4/cq.c              | 11 +--
 drivers/infiniband/hw/cxgb4/provider.c        | 14 +--
 drivers/infiniband/hw/cxgb4/qp.c              | 10 +--
 drivers/infiniband/hw/efa/efa_verbs.c         | 87 ++++++-------------
 drivers/infiniband/hw/erdma/erdma_verbs.c     | 13 ++-
 drivers/infiniband/hw/hns/hns_roce_ah.c       |  4 +-
 drivers/infiniband/hw/hns/hns_roce_cq.c       |  3 +-
 drivers/infiniband/hw/hns/hns_roce_main.c     |  3 +-
 drivers/infiniband/hw/hns/hns_roce_pd.c       |  8 +-
 drivers/infiniband/hw/hns/hns_roce_qp.c       | 13 +--
 drivers/infiniband/hw/hns/hns_roce_srq.c      |  6 +-
 .../infiniband/hw/ionic/ionic_controlpath.c   |  8 +-
 drivers/infiniband/hw/irdma/verbs.c           | 48 ++++------
 drivers/infiniband/hw/mana/cq.c               |  6 +-
 drivers/infiniband/hw/mana/qp.c               | 22 ++---
 drivers/infiniband/hw/mlx4/cq.c               |  7 +-
 drivers/infiniband/hw/mlx4/main.c             | 31 ++++---
 drivers/infiniband/hw/mlx4/qp.c               |  9 +-
 drivers/infiniband/hw/mlx4/srq.c              | 12 ++-
 drivers/infiniband/hw/mlx5/ah.c               |  2 +-
 drivers/infiniband/hw/mlx5/cq.c               |  7 +-
 drivers/infiniband/hw/mlx5/main.c             | 16 ++--
 drivers/infiniband/hw/mlx5/mr.c               |  2 +-
 drivers/infiniband/hw/mlx5/qp.c               | 17 ++--
 drivers/infiniband/hw/mlx5/srq.c              |  7 +-
 drivers/infiniband/hw/mthca/mthca_provider.c  | 40 ++++++---
 drivers/infiniband/hw/ocrdma/ocrdma_verbs.c   | 31 +++----
 drivers/infiniband/hw/qedr/verbs.c            | 48 ++--------
 drivers/infiniband/hw/usnic/usnic_ib_verbs.c  | 13 +--
 drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c  |  7 +-
 drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c  |  8 +-
 drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c |  6 +-
 .../infiniband/hw/vmw_pvrdma/pvrdma_verbs.c   | 11 ++-
 drivers/infiniband/sw/rdmavt/cq.c             |  2 +-
 drivers/infiniband/sw/rdmavt/qp.c             |  3 +-
 drivers/infiniband/sw/rdmavt/srq.c            | 19 ++--
 drivers/infiniband/sw/siw/siw_verbs.c         | 10 +--
 38 files changed, 225 insertions(+), 341 deletions(-)


base-commit: 5d6919055dec134de3c40167a490f33c74c12581
-- 
2.43.0


^ permalink raw reply

* [PATCH v3 08/10] RDMA: Use proper driver data response structs instead of open coding
From: Jason Gunthorpe @ 2026-05-12  0:09 UTC (permalink / raw)
  To: Abhijit Gangurde, Allen Hubbe,
	Broadcom internal kernel review list, Bernard Metzler,
	Potnuri Bharat Teja, Bryan Tan, Cheng Xu, Dennis Dalessandro,
	Gal Pressman, Junxian Huang, Kai Shen, Kalesh AP,
	Konstantin Taranov, Krzysztof Czurylo, Leon Romanovsky,
	linux-hyperv, linux-rdma, Long Li, Michal Kalderon,
	Michael Margolin, Nelson Escobar, Satish Kharat, Selvin Xavier,
	Yossi Leybovich, Chengchang Tang, Tatyana Nikolova, Vishnu Dasa,
	Yishai Hadas
  Cc: patches
In-Reply-To: <0-v3-4effdebad75a+e1-rdma_udata_rep_jgg@nvidia.com>

At some point the response structs were added and rdma-core is using
them, but the kernel was not changed to use them as well. Replace
the open-coded copy with the right struct and ib_respond_udata().

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/hw/mlx4/cq.c              |  7 ++--
 drivers/infiniband/hw/mlx4/main.c            | 11 ++++--
 drivers/infiniband/hw/mlx4/srq.c             | 12 ++++---
 drivers/infiniband/hw/mlx5/cq.c              |  7 ++--
 drivers/infiniband/hw/mthca/mthca_provider.c | 35 ++++++++++++++------
 5 files changed, 48 insertions(+), 24 deletions(-)

diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 7a6eb602d4a6de..7e4505f6c78b30 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -142,6 +142,7 @@ int mlx4_ib_create_user_cq(struct ib_cq *ibcq,
 {
 	struct ib_udata *udata = &attrs->driver_udata;
 	struct ib_device *ibdev = ibcq->device;
+	struct mlx4_ib_create_cq_resp uresp = {};
 	int entries = attr->cqe;
 	int vector = attr->comp_vector;
 	struct mlx4_ib_dev *dev = to_mdev(ibdev);
@@ -219,10 +220,10 @@ int mlx4_ib_create_user_cq(struct ib_cq *ibcq,
 	cq->mcq.event = mlx4_ib_cq_event;
 	cq->mcq.usage = MLX4_RES_USAGE_USER_VERBS;
 
-	if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof(__u32))) {
-		err = -EFAULT;
+	uresp.cqn = cq->mcq.cqn;
+	err = ib_respond_udata(udata, uresp);
+	if (err)
 		goto err_cq_free;
-	}
 
 	return 0;
 
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 4b187ec9e01738..25f9738bd77223 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -1199,9 +1199,14 @@ static int mlx4_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 	if (err)
 		return err;
 
-	if (udata && ib_copy_to_udata(udata, &pd->pdn, sizeof(__u32))) {
-		mlx4_pd_free(to_mdev(ibdev)->dev, pd->pdn);
-		return -EFAULT;
+	if (udata) {
+		struct mlx4_ib_alloc_pd_resp uresp = { .pdn = pd->pdn };
+
+		err = ib_respond_udata(udata, uresp);
+		if (err) {
+			mlx4_pd_free(to_mdev(ibdev)->dev, pd->pdn);
+			return err;
+		}
 	}
 	return 0;
 }
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index 767840736d583b..dd868f9b893d70 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -191,11 +191,15 @@ int mlx4_ib_create_srq(struct ib_srq *ib_srq,
 	srq->msrq.event = mlx4_ib_srq_event;
 	srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn;
 
-	if (udata)
-		if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof (__u32))) {
-			err = -EFAULT;
+	if (udata) {
+		struct mlx4_ib_create_srq_resp uresp = {
+			.srqn = srq->msrq.srqn
+		};
+
+		err = ib_respond_udata(udata, uresp);
+		if (err)
 			goto err_srq;
-		}
+	}
 
 	init_attr->attr.max_wr = srq->msrq.max - 1;
 
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index a76b7a36087d98..c548d4dfbbc96a 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -949,6 +949,7 @@ int mlx5_ib_create_user_cq(struct ib_cq *ibcq,
 {
 	struct ib_udata *udata = &attrs->driver_udata;
 	struct ib_device *ibdev = ibcq->device;
+	struct mlx5_ib_create_cq_resp uresp = {};
 	int entries = attr->cqe;
 	int vector = attr->comp_vector;
 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
@@ -1015,10 +1016,10 @@ int mlx5_ib_create_user_cq(struct ib_cq *ibcq,
 
 	INIT_LIST_HEAD(&cq->wc_list);
 
-	if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof(__u32))) {
-		err = -EFAULT;
+	uresp.cqn = cq->mcq.cqn;
+	err = ib_respond_udata(udata, uresp);
+	if (err)
 		goto err_cmd;
-	}
 
 	kvfree(cqb);
 	return 0;
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 07c60797c86091..afa97d3801f783 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -357,9 +357,12 @@ static int mthca_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 		return err;
 
 	if (udata) {
-		if (ib_copy_to_udata(udata, &pd->pd_num, sizeof (__u32))) {
+		struct mthca_alloc_pd_resp uresp = { .pdn = pd->pd_num };
+
+		err = ib_respond_udata(udata, uresp);
+		if (err) {
 			mthca_pd_free(to_mdev(ibdev), pd);
-			return -EFAULT;
+			return err;
 		}
 	}
 
@@ -428,11 +431,17 @@ static int mthca_create_srq(struct ib_srq *ibsrq,
 	if (err)
 		return err;
 
-	if (context && ib_copy_to_udata(udata, &srq->srqn, sizeof(__u32))) {
-		mthca_free_srq(to_mdev(ibsrq->device), srq);
-		mthca_unmap_user_db(to_mdev(ibsrq->device), &context->uar,
-				    context->db_tab, ucmd.db_index);
-		return -EFAULT;
+	if (context) {
+		struct mthca_create_srq_resp uresp = { .srqn = srq->srqn };
+
+		err = ib_respond_udata(udata, uresp);
+		if (err) {
+			mthca_free_srq(to_mdev(ibsrq->device), srq);
+			mthca_unmap_user_db(to_mdev(ibsrq->device),
+					    &context->uar, context->db_tab,
+					    ucmd.db_index);
+			return err;
+		}
 	}
 
 	return 0;
@@ -631,10 +640,14 @@ static int mthca_create_cq(struct ib_cq *ibcq,
 	if (err)
 		goto err_unmap_arm;
 
-	if (udata && ib_copy_to_udata(udata, &cq->cqn, sizeof(__u32))) {
-		mthca_free_cq(to_mdev(ibdev), cq);
-		err = -EFAULT;
-		goto err_unmap_arm;
+	if (udata) {
+		struct mthca_create_cq_resp uresp = { .cqn = cq->cqn };
+
+		err = ib_respond_udata(udata, uresp);
+		if (err) {
+			mthca_free_cq(to_mdev(ibdev), cq);
+			goto err_unmap_arm;
+		}
 	}
 
 	cq->resize_buf = NULL;
-- 
2.43.0


^ permalink raw reply related

* [PATCH v3 07/10] RDMA/mlx: Replace response_len with ib_respond_udata()
From: Jason Gunthorpe @ 2026-05-12  0:09 UTC (permalink / raw)
  To: Abhijit Gangurde, Allen Hubbe,
	Broadcom internal kernel review list, Bernard Metzler,
	Potnuri Bharat Teja, Bryan Tan, Cheng Xu, Dennis Dalessandro,
	Gal Pressman, Junxian Huang, Kai Shen, Kalesh AP,
	Konstantin Taranov, Krzysztof Czurylo, Leon Romanovsky,
	linux-hyperv, linux-rdma, Long Li, Michal Kalderon,
	Michael Margolin, Nelson Escobar, Satish Kharat, Selvin Xavier,
	Yossi Leybovich, Chengchang Tang, Tatyana Nikolova, Vishnu Dasa,
	Yishai Hadas
  Cc: patches
In-Reply-To: <0-v3-4effdebad75a+e1-rdma_udata_rep_jgg@nvidia.com>

The Mellanox drivers have a pattern where they compute the response
length they think they need based on what the user asked for, then
blindly write that ignoring the provided size limit on the response
structure.

Drop this and just use ib_respond_udata() which caps the response
struct to the user's memory, which is fine for what mlx5 is doing.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/hw/mlx4/main.c |  2 +-
 drivers/infiniband/hw/mlx4/qp.c   |  2 +-
 drivers/infiniband/hw/mlx5/ah.c   |  2 +-
 drivers/infiniband/hw/mlx5/main.c |  4 ++--
 drivers/infiniband/hw/mlx5/mr.c   |  2 +-
 drivers/infiniband/hw/mlx5/qp.c   | 10 +++++-----
 6 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index ce77e893065c92..4b187ec9e01738 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -626,7 +626,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
 	}
 
 	if (uhw->outlen) {
-		err = ib_copy_to_udata(uhw, &resp, resp.response_length);
+		err = ib_respond_udata(uhw, resp);
 		if (err)
 			goto out;
 	}
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index aca8a985ce33cd..8dc4196218bf05 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -4331,7 +4331,7 @@ int mlx4_ib_create_rwq_ind_table(struct ib_rwq_ind_table *rwq_ind_table,
 	if (udata->outlen) {
 		resp.response_length = offsetof(typeof(resp), response_length) +
 					sizeof(resp.response_length);
-		err = ib_copy_to_udata(udata, &resp, resp.response_length);
+		err = ib_respond_udata(udata, resp);
 	}
 
 	return err;
diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c
index 531a57f9ee7e8b..a3aa700d08355d 100644
--- a/drivers/infiniband/hw/mlx5/ah.c
+++ b/drivers/infiniband/hw/mlx5/ah.c
@@ -121,7 +121,7 @@ int mlx5_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
 		resp.response_length = min_resp_len;
 
 		memcpy(resp.dmac, ah_attr->roce.dmac, ETH_ALEN);
-		err = ib_copy_to_udata(udata, &resp, resp.response_length);
+		err = ib_respond_udata(udata, resp);
 		if (err)
 			return err;
 	}
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 0b3eda9b0ad0c4..fb9689e453bce4 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1356,7 +1356,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
 	}
 
 	if (uhw_outlen) {
-		err = ib_copy_to_udata(uhw, &resp, resp.response_length);
+		err = ib_respond_udata(uhw, resp);
 
 		if (err)
 			return err;
@@ -2281,7 +2281,7 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
 		goto out_mdev;
 
 	resp.response_length = min(udata->outlen, sizeof(resp));
-	err = ib_copy_to_udata(udata, &resp, resp.response_length);
+	err = ib_respond_udata(udata, resp);
 	if (err)
 		goto out_mdev;
 
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 3b6da45061a552..d7d8f3ae8b647a 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1809,7 +1809,7 @@ int mlx5_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
 	resp.response_length =
 		min(offsetofend(typeof(resp), response_length), udata->outlen);
 	if (resp.response_length) {
-		err = ib_copy_to_udata(udata, &resp, resp.response_length);
+		err = ib_respond_udata(udata, resp);
 		if (err)
 			goto free_mkey;
 	}
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 23abea36cf71cf..6859e8ba2732ac 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -3332,7 +3332,7 @@ int mlx5_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr,
 		 * including MLX5_IB_QPT_DCT, which doesn't need it.
 		 * In that case, resp will be filled with zeros.
 		 */
-		err = ib_copy_to_udata(udata, &params.resp, params.outlen);
+		err = ib_respond_udata(udata, params.resp);
 	if (err)
 		goto destroy_qp;
 
@@ -4631,7 +4631,7 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 		resp.dctn = qp->dct.mdct.mqp.qpn;
 		if (MLX5_CAP_GEN(dev->mdev, ece_support))
 			resp.ece_options = MLX5_GET(create_dct_out, out, ece);
-		err = ib_copy_to_udata(udata, &resp, resp.response_length);
+		err = ib_respond_udata(udata, resp);
 		if (err) {
 			mlx5_core_destroy_dct(dev, &qp->dct.mdct);
 			return err;
@@ -4790,7 +4790,7 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	if (!err && resp.response_length &&
 	    udata->outlen >= resp.response_length)
 		/* Return -EFAULT to the user and expect him to destroy QP. */
-		err = ib_copy_to_udata(udata, &resp, resp.response_length);
+		err = ib_respond_udata(udata, resp);
 
 out:
 	mutex_unlock(&qp->mutex);
@@ -5490,7 +5490,7 @@ struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
 	if (udata->outlen) {
 		resp.response_length = offsetofend(
 			struct mlx5_ib_create_wq_resp, response_length);
-		err = ib_copy_to_udata(udata, &resp, resp.response_length);
+		err = ib_respond_udata(udata, resp);
 		if (err)
 			goto err_copy;
 	}
@@ -5581,7 +5581,7 @@ int mlx5_ib_create_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_table,
 		resp.response_length =
 			offsetofend(struct mlx5_ib_create_rwq_ind_tbl_resp,
 				    response_length);
-		err = ib_copy_to_udata(udata, &resp, resp.response_length);
+		err = ib_respond_udata(udata, resp);
 		if (err)
 			goto err_copy;
 	}
-- 
2.43.0


^ permalink raw reply related

* [PATCH v3 01/10] RDMA: Use ib_is_udata_in_empty() for places calling ib_is_udata_cleared()
From: Jason Gunthorpe @ 2026-05-12  0:09 UTC (permalink / raw)
  To: Abhijit Gangurde, Allen Hubbe,
	Broadcom internal kernel review list, Bernard Metzler,
	Potnuri Bharat Teja, Bryan Tan, Cheng Xu, Dennis Dalessandro,
	Gal Pressman, Junxian Huang, Kai Shen, Kalesh AP,
	Konstantin Taranov, Krzysztof Czurylo, Leon Romanovsky,
	linux-hyperv, linux-rdma, Long Li, Michal Kalderon,
	Michael Margolin, Nelson Escobar, Satish Kharat, Selvin Xavier,
	Yossi Leybovich, Chengchang Tang, Tatyana Nikolova, Vishnu Dasa,
	Yishai Hadas
  Cc: patches
In-Reply-To: <0-v3-4effdebad75a+e1-rdma_udata_rep_jgg@nvidia.com>

Convert the pattern:

  if (udata->inlen && !ib_is_udata_cleared(udata, 0, udata->inlen))

Using Coccinelle:

virtual patch
virtual context
virtual report

@@
expression udata;
@@
(
- udata->inlen && !ib_is_udata_cleared(udata, 0, udata->inlen)
+ !ib_is_udata_in_empty(udata)
|
- udata->inlen > 0 && !ib_is_udata_cleared(udata, 0, udata->inlen)
+ !ib_is_udata_in_empty(udata)
)

@@
expression udata;
@@
- udata && udata->inlen && !ib_is_udata_cleared(udata, 0, udata->inlen)
+ !ib_is_udata_in_empty(udata)

These cases are already checking for zeroed data that the kernel does
not understand.

Run another pass with AI to propagate the return code correctly and
remove redundant prints.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/hw/efa/efa_verbs.c | 43 +++++++++------------------
 drivers/infiniband/hw/mlx4/main.c     |  6 ++--
 drivers/infiniband/hw/mlx4/qp.c       |  7 ++---
 drivers/infiniband/hw/mlx5/main.c     |  5 ++--
 drivers/infiniband/hw/mlx5/qp.c       |  7 ++---
 5 files changed, 26 insertions(+), 42 deletions(-)

diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c
index 7bd0838ebc99e4..3ad5d6e27b1590 100644
--- a/drivers/infiniband/hw/efa/efa_verbs.c
+++ b/drivers/infiniband/hw/efa/efa_verbs.c
@@ -218,12 +218,9 @@ int efa_query_device(struct ib_device *ibdev,
 	struct efa_dev *dev = to_edev(ibdev);
 	int err;
 
-	if (udata && udata->inlen &&
-	    !ib_is_udata_cleared(udata, 0, udata->inlen)) {
-		ibdev_dbg(ibdev,
-			  "Incompatible ABI params, udata not cleared\n");
-		return -EINVAL;
-	}
+	err = ib_is_udata_in_empty(udata);
+	if (err)
+		return err;
 
 	dev_attr = &dev->dev_attr;
 
@@ -433,13 +430,9 @@ int efa_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 	struct efa_pd *pd = to_epd(ibpd);
 	int err;
 
-	if (udata->inlen &&
-	    !ib_is_udata_cleared(udata, 0, udata->inlen)) {
-		ibdev_dbg(&dev->ibdev,
-			  "Incompatible ABI params, udata not cleared\n");
-		err = -EINVAL;
+	err = ib_is_udata_in_empty(udata);
+	if (err)
 		goto err_out;
-	}
 
 	err = efa_com_alloc_pd(&dev->edev, &result);
 	if (err)
@@ -982,12 +975,9 @@ int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
 	if (qp_attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
 		return -EOPNOTSUPP;
 
-	if (udata->inlen &&
-	    !ib_is_udata_cleared(udata, 0, udata->inlen)) {
-		ibdev_dbg(&dev->ibdev,
-			  "Incompatible ABI params, udata not cleared\n");
-		return -EINVAL;
-	}
+	err = ib_is_udata_in_empty(udata);
+	if (err)
+		return err;
 
 	cur_state = qp_attr_mask & IB_QP_CUR_STATE ? qp_attr->cur_qp_state :
 						     qp->state;
@@ -1612,13 +1602,11 @@ static struct efa_mr *efa_alloc_mr(struct ib_pd *ibpd, int access_flags,
 	struct efa_dev *dev = to_edev(ibpd->device);
 	int supp_access_flags;
 	struct efa_mr *mr;
+	int ret;
 
-	if (udata && udata->inlen &&
-	    !ib_is_udata_cleared(udata, 0, udata->inlen)) {
-		ibdev_dbg(&dev->ibdev,
-			  "Incompatible ABI params, udata not cleared\n");
-		return ERR_PTR(-EINVAL);
-	}
+	ret = ib_is_udata_in_empty(udata);
+	if (ret)
+		return ERR_PTR(ret);
 
 	supp_access_flags =
 		IB_ACCESS_LOCAL_WRITE |
@@ -2082,12 +2070,9 @@ int efa_create_ah(struct ib_ah *ibah,
 		goto err_out;
 	}
 
-	if (udata->inlen &&
-	    !ib_is_udata_cleared(udata, 0, udata->inlen)) {
-		ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n");
-		err = -EINVAL;
+	err = ib_is_udata_in_empty(udata);
+	if (err)
 		goto err_out;
-	}
 
 	memcpy(params.dest_addr, ah_attr->grh.dgid.raw,
 	       sizeof(params.dest_addr));
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 464c9ab4251636..16e9ce8138cb30 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -1696,9 +1696,9 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
 	    (flow_attr->type != IB_FLOW_ATTR_NORMAL))
 		return ERR_PTR(-EOPNOTSUPP);
 
-	if (udata &&
-	    udata->inlen && !ib_is_udata_cleared(udata, 0, udata->inlen))
-		return ERR_PTR(-EOPNOTSUPP);
+	err = ib_is_udata_in_empty(udata);
+	if (err)
+		return ERR_PTR(err);
 
 	memset(type, 0, sizeof(type));
 
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 790be09d985a1a..aca8a985ce33cd 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -4297,10 +4297,9 @@ int mlx4_ib_create_rwq_ind_table(struct ib_rwq_ind_table *rwq_ind_table,
 	size_t min_resp_len;
 	int i, err = 0;
 
-	if (udata->inlen > 0 &&
-	    !ib_is_udata_cleared(udata, 0,
-				 udata->inlen))
-		return -EOPNOTSUPP;
+	err = ib_is_udata_in_empty(udata);
+	if (err)
+		return err;
 
 	min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved);
 	if (udata->outlen && udata->outlen < min_resp_len)
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 61078281953d6c..2bb5caf5a89266 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -965,8 +965,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
 
 	resp.response_length = resp_len;
 
-	if (uhw && uhw->inlen && !ib_is_udata_cleared(uhw, 0, uhw->inlen))
-		return -EINVAL;
+	err = ib_is_udata_in_empty(uhw);
+	if (err)
+		return err;
 
 	memset(props, 0, sizeof(*props));
 	err = mlx5_query_system_image_guid(ibdev,
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 8fd05532c09cc7..23abea36cf71cf 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -5538,10 +5538,9 @@ int mlx5_ib_create_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_table,
 	u32 *in;
 	void *rqtc;
 
-	if (udata->inlen > 0 &&
-	    !ib_is_udata_cleared(udata, 0,
-				 udata->inlen))
-		return -EOPNOTSUPP;
+	err = ib_is_udata_in_empty(udata);
+	if (err)
+		return err;
 
 	if (init_attr->log_ind_tbl_size >
 	    MLX5_CAP_GEN(dev->mdev, log_max_rqt_size)) {
-- 
2.43.0


^ permalink raw reply related

* Re: [PATCH v2 0/5] treewide: Convert buses to use generic driver_override
From: Danilo Krummrich @ 2026-05-11 18:02 UTC (permalink / raw)
  To: gregkh, rafael, linux, nipun.gupta, nikhil.agarwal, kys, haiyangz,
	wei.liu, decui, longli, andersson, mathieu.poirier
  Cc: driver-core, linux-kernel, linux-hyperv, linux-arm-msm,
	linux-remoteproc, Danilo Krummrich
In-Reply-To: <20260505133935.3772495-1-dakr@kernel.org>

On Tue May 5, 2026 at 3:37 PM CEST, Danilo Krummrich wrote:
> This series is based on v7.1-rc1 with no additional dependencies, hence those
> patches can be picked up by subsystems individually.

Gentle ping on this one; I can alternatively also take those patches through the
driver-core tree if you prefer.

Thanks,
Danilo

^ permalink raw reply

* [PATCH v1 4/4] iommu/hyperv: Add page-selective IOTLB flush support
From: Yu Zhang @ 2026-05-11 16:24 UTC (permalink / raw)
  To: linux-kernel, linux-hyperv, iommu, linux-pci, linux-arch
  Cc: wei.liu, kys, haiyangz, decui, longli, joro, will, robin.murphy,
	bhelgaas, kwilczynski, lpieralisi, mani, robh, arnd, jgg,
	mhklinux, jacob.pan, tgopinath, easwar.hariharan
In-Reply-To: <20260511162408.1180069-1-zhangyu1@linux.microsoft.com>

Add page-selective IOTLB flush using HVCALL_FLUSH_DEVICE_DOMAIN_LIST.
This hypercall accepts a list of (page_number, page_mask_shift) entries,
enabling finer-grained IOTLB invalidation compared to the domain-wide
HVCALL_FLUSH_DEVICE_DOMAIN used by hv_iommu_flush_iotlb_all().

hv_iommu_fill_iova_list() decomposes a contiguous IOVA range into a
minimal set of aligned power-of-two regions that fit in a single
hypercall input page. When the range exceeds the page capacity, the
code falls back to a full domain flush automatically.

Signed-off-by: Yu Zhang <zhangyu1@linux.microsoft.com>
Signed-off-by: Easwar Hariharan <easwar.hariharan@linux.microsoft.com>
---
 drivers/iommu/hyperv/iommu.c | 91 +++++++++++++++++++++++++++++++++++-
 include/hyperv/hvgdk_mini.h  |  1 +
 include/hyperv/hvhdk_mini.h  | 17 +++++++
 3 files changed, 108 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/hyperv/iommu.c b/drivers/iommu/hyperv/iommu.c
index e5fc625314b5..3bca362b7815 100644
--- a/drivers/iommu/hyperv/iommu.c
+++ b/drivers/iommu/hyperv/iommu.c
@@ -486,10 +486,98 @@ static void hv_iommu_flush_iotlb_all(struct iommu_domain *domain)
 	hv_flush_device_domain(to_hv_iommu_domain(domain));
 }
 
+/* Max number of iova_list entries in a single hypercall input page. */
+#define HV_IOMMU_MAX_FLUSH_VA_COUNT \
+	((HV_HYP_PAGE_SIZE - sizeof(struct hv_input_flush_device_domain_list)) / \
+	 sizeof(union hv_iommu_flush_va))
+
+/* Returned by hv_iommu_fill_iova_list() when the range exceeds the capacity */
+#define HV_IOMMU_FLUSH_VA_OVERFLOW	U16_MAX
+
+static inline u16 hv_iommu_fill_iova_list(union hv_iommu_flush_va *iova_list,
+					  unsigned long start,
+					  unsigned long end)
+{
+	unsigned long start_pfn = start >> PAGE_SHIFT;
+	unsigned long end_pfn = PAGE_ALIGN(end) >> PAGE_SHIFT;
+	unsigned long nr_pages = end_pfn - start_pfn;
+	u16 count = 0;
+
+	while (nr_pages > 0) {
+		unsigned long flush_pages;
+		int order;
+		unsigned long pfn_align;
+		unsigned long size_align;
+
+		if (count >= HV_IOMMU_MAX_FLUSH_VA_COUNT) {
+			count = HV_IOMMU_FLUSH_VA_OVERFLOW;
+			break;
+		}
+
+		if (start_pfn)
+			pfn_align = __ffs(start_pfn);
+		else
+			pfn_align = BITS_PER_LONG - 1;
+
+		size_align = __fls(nr_pages);
+		order = min(pfn_align, size_align);
+		iova_list[count].page_mask_shift = order;
+		iova_list[count].page_number = start_pfn;
+
+		flush_pages = 1UL << order;
+		start_pfn += flush_pages;
+		nr_pages -= flush_pages;
+		count++;
+	}
+
+	return count;
+}
+
+static void hv_flush_device_domain_list(struct hv_iommu_domain *hv_domain,
+					struct iommu_iotlb_gather *iotlb_gather)
+{
+	u64 status;
+	u16 count;
+	unsigned long flags;
+	struct hv_input_flush_device_domain_list *input;
+
+	local_irq_save(flags);
+
+	input = *this_cpu_ptr(hyperv_pcpu_input_arg);
+	memset(input, 0, sizeof(*input));
+
+	input->device_domain = hv_domain->device_domain;
+	input->flags |= HV_FLUSH_DEVICE_DOMAIN_LIST_IOMMU_FORMAT;
+	count = hv_iommu_fill_iova_list(input->iova_list,
+					iotlb_gather->start,
+					iotlb_gather->end);
+	if (count == HV_IOMMU_FLUSH_VA_OVERFLOW) {
+		/*
+		 * Range exceeds hypercall page capacity. Fall back to a full
+		 * domain flush.
+		 */
+		struct hv_input_flush_device_domain *flush_all = (void *)input;
+
+		memset(flush_all, 0, sizeof(*flush_all));
+		flush_all->device_domain = hv_domain->device_domain;
+		status = hv_do_hypercall(HVCALL_FLUSH_DEVICE_DOMAIN,
+					flush_all, NULL);
+	} else {
+		status = hv_do_rep_hypercall(
+				HVCALL_FLUSH_DEVICE_DOMAIN_LIST,
+				count, 0, input, NULL);
+	}
+
+	local_irq_restore(flags);
+
+	if (!hv_result_success(status))
+		pr_err("HVCALL_FLUSH_DEVICE_DOMAIN_LIST failed, status %lld\n", status);
+}
+
 static void hv_iommu_iotlb_sync(struct iommu_domain *domain,
 				struct iommu_iotlb_gather *iotlb_gather)
 {
-	hv_flush_device_domain(to_hv_iommu_domain(domain));
+	hv_flush_device_domain_list(to_hv_iommu_domain(domain), iotlb_gather);
 
 	iommu_put_pages_list(&iotlb_gather->freelist);
 }
@@ -543,6 +631,7 @@ static struct iommu_domain *hv_iommu_domain_alloc_paging(struct device *dev)
 
 	cfg.common.hw_max_vasz_lg2 = hv_iommu_device->max_iova_width;
 	cfg.common.hw_max_oasz_lg2 = 52;
+	cfg.common.features |= BIT(PT_FEAT_FLUSH_RANGE);
 	cfg.top_level = (hv_iommu_device->max_iova_width > 48) ? 4 : 3;
 
 	ret = pt_iommu_x86_64_init(&hv_domain->pt_iommu_x86_64, &cfg, GFP_KERNEL);
diff --git a/include/hyperv/hvgdk_mini.h b/include/hyperv/hvgdk_mini.h
index 5bdbb44da112..eaaf87171478 100644
--- a/include/hyperv/hvgdk_mini.h
+++ b/include/hyperv/hvgdk_mini.h
@@ -496,6 +496,7 @@ union hv_vp_assist_msr_contents {	 /* HV_REGISTER_VP_ASSIST_PAGE */
 #define HVCALL_GET_GPA_PAGES_ACCESS_STATES		0x00c9
 #define HVCALL_CONFIGURE_DEVICE_DOMAIN			0x00ce
 #define HVCALL_FLUSH_DEVICE_DOMAIN			0x00d0
+#define HVCALL_FLUSH_DEVICE_DOMAIN_LIST			0x00d1
 #define HVCALL_ACQUIRE_SPARSE_SPA_PAGE_HOST_ACCESS	0x00d7
 #define HVCALL_RELEASE_SPARSE_SPA_PAGE_HOST_ACCESS	0x00d8
 #define HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY	0x00db
diff --git a/include/hyperv/hvhdk_mini.h b/include/hyperv/hvhdk_mini.h
index 493608e791b4..f51d5d9467f1 100644
--- a/include/hyperv/hvhdk_mini.h
+++ b/include/hyperv/hvhdk_mini.h
@@ -671,4 +671,21 @@ struct hv_input_flush_device_domain {
 	u32 reserved;
 } __packed;
 
+union hv_iommu_flush_va {
+	u64 iova;
+	struct {
+		u64 page_mask_shift : 12;
+		u64 page_number : 52;
+	};
+} __packed;
+
+
+struct hv_input_flush_device_domain_list {
+	struct hv_input_device_domain device_domain;
+#define HV_FLUSH_DEVICE_DOMAIN_LIST_IOMMU_FORMAT (1 << 0)
+	u32 flags;
+	u32 reserved;
+	union hv_iommu_flush_va iova_list[];
+} __packed;
+
 #endif /* _HV_HVHDK_MINI_H */
-- 
2.52.0


^ permalink raw reply related

* [PATCH v1 3/4] iommu/hyperv: Add para-virtualized IOMMU support for Hyper-V guest
From: Yu Zhang @ 2026-05-11 16:24 UTC (permalink / raw)
  To: linux-kernel, linux-hyperv, iommu, linux-pci, linux-arch
  Cc: wei.liu, kys, haiyangz, decui, longli, joro, will, robin.murphy,
	bhelgaas, kwilczynski, lpieralisi, mani, robh, arnd, jgg,
	mhklinux, jacob.pan, tgopinath, easwar.hariharan
In-Reply-To: <20260511162408.1180069-1-zhangyu1@linux.microsoft.com>

Add a para-virtualized IOMMU driver for Linux guests running on Hyper-V.
This driver implements stage-1 IO translation within the guest OS.
It integrates with the Linux IOMMU core, utilizing Hyper-V hypercalls
for:
 - Capability discovery
 - Domain allocation, configuration, and deallocation
 - Device attachment and detachment
 - IOTLB invalidation

The driver constructs x86-compatible stage-1 IO page tables in the
guest memory using consolidated IO page table helpers. This allows
the guest to manage stage-1 translations independently of vendor-
specific drivers (like Intel VT-d or AMD IOMMU).

Hyper-V consumes this stage-1 IO page table when a device domain is
created and configured, and nests it with the host's stage-2 IO page
tables, therefore eliminating the VM exits for guest IOMMU mapping
operations. For unmapping operations, VM exits to perform the IOTLB
flush are still unavoidable.

Hyper-V identifies each PCI pass-thru device by a logical device ID
in its hypercall interface. The vPCI driver (pci-hyperv) registers the
per-bus portion of this ID with the pvIOMMU driver during bus probe.
The pvIOMMU driver stores this mapping and combines it with the function
number of the endpoint PCI device to form the complete ID for hypercalls.

Co-developed-by: Wei Liu <wei.liu@kernel.org>
Signed-off-by: Wei Liu <wei.liu@kernel.org>
Co-developed-by: Easwar Hariharan <easwar.hariharan@linux.microsoft.com>
Signed-off-by: Easwar Hariharan <easwar.hariharan@linux.microsoft.com>
Signed-off-by: Yu Zhang <zhangyu1@linux.microsoft.com>
---
 arch/x86/hyperv/hv_init.c           |   4 +
 arch/x86/include/asm/mshyperv.h     |   4 +
 drivers/iommu/hyperv/Kconfig        |  17 +
 drivers/iommu/hyperv/Makefile       |   1 +
 drivers/iommu/hyperv/iommu.c        | 705 ++++++++++++++++++++++++++++
 drivers/iommu/hyperv/iommu.h        |  54 +++
 drivers/pci/controller/pci-hyperv.c |  19 +-
 include/asm-generic/mshyperv.h      |  12 +
 8 files changed, 815 insertions(+), 1 deletion(-)
 create mode 100644 drivers/iommu/hyperv/iommu.c
 create mode 100644 drivers/iommu/hyperv/iommu.h

diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 323adc93f2dc..2c8ff8e06249 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -578,6 +578,10 @@ void __init hyperv_init(void)
 	old_setup_percpu_clockev = x86_init.timers.setup_percpu_clockev;
 	x86_init.timers.setup_percpu_clockev = hv_stimer_setup_percpu_clockev;
 
+#ifdef CONFIG_HYPERV_PVIOMMU
+	x86_init.iommu.iommu_init = hv_iommu_init;
+#endif
+
 	hv_apic_init();
 
 	x86_init.pci.arch_init = hv_pci_init;
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index f64393e853ee..20d947c2c758 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -313,6 +313,10 @@ static inline void mshv_vtl_return_hypercall(void) {}
 static inline void __mshv_vtl_return_call(struct mshv_vtl_cpu_context *vtl0) {}
 #endif
 
+#ifdef CONFIG_HYPERV_PVIOMMU
+int __init hv_iommu_init(void);
+#endif
+
 #include <asm-generic/mshyperv.h>
 
 #endif
diff --git a/drivers/iommu/hyperv/Kconfig b/drivers/iommu/hyperv/Kconfig
index 30f40d867036..9e658d5c9a77 100644
--- a/drivers/iommu/hyperv/Kconfig
+++ b/drivers/iommu/hyperv/Kconfig
@@ -8,3 +8,20 @@ config HYPERV_IOMMU
 	help
 	  Stub IOMMU driver to handle IRQs to support Hyper-V Linux
 	  guest and root partitions.
+
+if HYPERV_IOMMU
+config HYPERV_PVIOMMU
+	bool "Microsoft Hypervisor para-virtualized IOMMU support"
+	depends on X86 && HYPERV
+	select IOMMU_API
+	select GENERIC_PT
+	select IOMMU_PT
+	select IOMMU_PT_X86_64
+	select IOMMU_IOVA
+	default HYPERV
+	help
+	  Para-virtualized IOMMU driver for Linux guests running on
+	  Microsoft Hyper-V. Provides DMA remapping and IOTLB
+	  flush support to enable DMA isolation for devices
+	  assigned to the guest.
+endif
diff --git a/drivers/iommu/hyperv/Makefile b/drivers/iommu/hyperv/Makefile
index 9f557bad94ff..8669741c0a51 100644
--- a/drivers/iommu/hyperv/Makefile
+++ b/drivers/iommu/hyperv/Makefile
@@ -1,2 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_HYPERV_IOMMU) += irq_remapping.o
+obj-$(CONFIG_HYPERV_PVIOMMU) += iommu.o
diff --git a/drivers/iommu/hyperv/iommu.c b/drivers/iommu/hyperv/iommu.c
new file mode 100644
index 000000000000..e5fc625314b5
--- /dev/null
+++ b/drivers/iommu/hyperv/iommu.c
@@ -0,0 +1,705 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Hyper-V IOMMU driver.
+ *
+ * Copyright (C) 2019, 2024-2026 Microsoft, Inc.
+ */
+
+#define pr_fmt(fmt) "Hyper-V pvIOMMU: " fmt
+#define dev_fmt(fmt) pr_fmt(fmt)
+
+#include <linux/iommu.h>
+#include <linux/pci.h>
+#include <linux/dma-map-ops.h>
+#include <linux/generic_pt/iommu.h>
+#include <linux/pci-ats.h>
+
+#include <asm/iommu.h>
+#include <asm/hypervisor.h>
+#include <asm/mshyperv.h>
+
+#include "iommu.h"
+#include "../iommu-pages.h"
+
+struct hv_iommu_dev *hv_iommu_device;
+
+/*
+ * Identity and blocking domains are static singletons: identity is a 1:1
+ * passthrough with no page table, blocking rejects all DMA. Neither holds
+ * per-IOMMU state, so one instance suffices even with multiple vIOMMUs.
+ */
+static struct hv_iommu_domain hv_identity_domain;
+static struct hv_iommu_domain hv_blocking_domain;
+static const struct iommu_domain_ops hv_iommu_identity_domain_ops;
+static const struct iommu_domain_ops hv_iommu_blocking_domain_ops;
+static struct iommu_ops hv_iommu_ops;
+static LIST_HEAD(hv_iommu_pci_bus_list);
+static DEFINE_SPINLOCK(hv_iommu_pci_bus_lock);
+
+#define hv_iommu_present(iommu_cap) (iommu_cap & HV_IOMMU_CAP_PRESENT)
+#define hv_iommu_s1_domain_supported(iommu_cap) (iommu_cap & HV_IOMMU_CAP_S1)
+#define hv_iommu_5lvl_supported(iommu_cap) (iommu_cap & HV_IOMMU_CAP_S1_5LVL)
+#define hv_iommu_ats_supported(iommu_cap) (iommu_cap & HV_IOMMU_CAP_ATS)
+
+int hv_iommu_register_pci_bus(int pci_domain_nr, u32 logical_dev_id_prefix)
+{
+	struct hv_pci_busdata *bus, *new;
+	int ret = 0;
+
+	if (no_iommu || !iommu_detected)
+		return 0;
+
+	new = kzalloc_obj(*new, GFP_KERNEL);
+	if (!new)
+		return -ENOMEM;
+
+	spin_lock(&hv_iommu_pci_bus_lock);
+	list_for_each_entry(bus, &hv_iommu_pci_bus_list, list) {
+		if (bus->pci_domain_nr != pci_domain_nr)
+			continue;
+
+		if (bus->logical_dev_id_prefix != logical_dev_id_prefix) {
+			pr_err("stale registration for PCI domain %d (old prefix 0x%08x, new 0x%08x)\n",
+			       pci_domain_nr, bus->logical_dev_id_prefix,
+			       logical_dev_id_prefix);
+			ret = -EEXIST;
+		}
+
+		goto out_free;
+	}
+
+	new->pci_domain_nr = pci_domain_nr;
+	new->logical_dev_id_prefix = logical_dev_id_prefix;
+	list_add(&new->list, &hv_iommu_pci_bus_list);
+	spin_unlock(&hv_iommu_pci_bus_lock);
+	return 0;
+
+out_free:
+	spin_unlock(&hv_iommu_pci_bus_lock);
+	kfree(new);
+	return ret;
+}
+EXPORT_SYMBOL_FOR_MODULES(hv_iommu_register_pci_bus, "pci-hyperv");
+
+void hv_iommu_unregister_pci_bus(int pci_domain_nr)
+{
+	struct hv_pci_busdata *bus, *tmp;
+
+	spin_lock(&hv_iommu_pci_bus_lock);
+	list_for_each_entry_safe(bus, tmp, &hv_iommu_pci_bus_list, list) {
+		if (bus->pci_domain_nr == pci_domain_nr) {
+			list_del(&bus->list);
+			kfree(bus);
+			break;
+		}
+	}
+	spin_unlock(&hv_iommu_pci_bus_lock);
+}
+EXPORT_SYMBOL_FOR_MODULES(hv_iommu_unregister_pci_bus, "pci-hyperv");
+
+/*
+ * Look up the logical device ID for a vPCI device. Returns 0 on success
+ * with *logical_id filled in; -ENODEV if no entry registered for this
+ * device's vPCI bus.
+ */
+static int hv_iommu_lookup_logical_dev_id(struct pci_dev *pdev, u64 *logical_id)
+{
+	struct hv_pci_busdata *bus;
+	int domain = pci_domain_nr(pdev->bus);
+	int ret = -ENODEV;
+
+	spin_lock(&hv_iommu_pci_bus_lock);
+	list_for_each_entry(bus, &hv_iommu_pci_bus_list, list) {
+		if (bus->pci_domain_nr == domain) {
+			*logical_id = (u64)bus->logical_dev_id_prefix |
+				      PCI_FUNC(pdev->devfn);
+			ret = 0;
+			break;
+		}
+	}
+	spin_unlock(&hv_iommu_pci_bus_lock);
+	return ret;
+}
+
+static int hv_create_device_domain(struct hv_iommu_domain *hv_domain, u32 domain_stage)
+{
+	int ret;
+	u64 status;
+	unsigned long flags;
+	struct hv_input_create_device_domain *input;
+
+	ret = ida_alloc_range(&hv_iommu_device->domain_ids,
+			hv_iommu_device->first_domain, hv_iommu_device->last_domain,
+			GFP_KERNEL);
+	if (ret < 0)
+		return ret;
+
+	hv_domain->device_domain.partition_id = HV_PARTITION_ID_SELF;
+	hv_domain->device_domain.domain_id.type = domain_stage;
+	hv_domain->device_domain.domain_id.id = ret;
+	hv_domain->hv_iommu = hv_iommu_device;
+
+	local_irq_save(flags);
+
+	input = *this_cpu_ptr(hyperv_pcpu_input_arg);
+	memset(input, 0, sizeof(*input));
+	input->device_domain = hv_domain->device_domain;
+	input->create_device_domain_flags.forward_progress_required = 1;
+	input->create_device_domain_flags.inherit_owning_vtl = 0;
+	status = hv_do_hypercall(HVCALL_CREATE_DEVICE_DOMAIN, input, NULL);
+
+	local_irq_restore(flags);
+
+	if (!hv_result_success(status)) {
+		pr_err("HVCALL_CREATE_DEVICE_DOMAIN failed, status %lld\n", status);
+		ida_free(&hv_iommu_device->domain_ids, hv_domain->device_domain.domain_id.id);
+	}
+
+	return hv_result_to_errno(status);
+}
+
+static void hv_delete_device_domain(struct hv_iommu_domain *hv_domain)
+{
+	u64 status;
+	unsigned long flags;
+	struct hv_input_delete_device_domain *input;
+
+	local_irq_save(flags);
+
+	input = *this_cpu_ptr(hyperv_pcpu_input_arg);
+	memset(input, 0, sizeof(*input));
+	input->device_domain = hv_domain->device_domain;
+	status = hv_do_hypercall(HVCALL_DELETE_DEVICE_DOMAIN, input, NULL);
+
+	local_irq_restore(flags);
+
+	if (!hv_result_success(status))
+		pr_err("HVCALL_DELETE_DEVICE_DOMAIN failed, status %lld\n", status);
+
+	ida_free(&hv_domain->hv_iommu->domain_ids, hv_domain->device_domain.domain_id.id);
+}
+
+static bool hv_iommu_capable(struct device *dev, enum iommu_cap cap)
+{
+	switch (cap) {
+	case IOMMU_CAP_CACHE_COHERENCY:
+		return true;
+	case IOMMU_CAP_DEFERRED_FLUSH:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static void hv_flush_device_domain(struct hv_iommu_domain *hv_domain)
+{
+	u64 status;
+	unsigned long flags;
+	struct hv_input_flush_device_domain *input;
+
+	local_irq_save(flags);
+
+	input = *this_cpu_ptr(hyperv_pcpu_input_arg);
+	memset(input, 0, sizeof(*input));
+	input->device_domain = hv_domain->device_domain;
+	status = hv_do_hypercall(HVCALL_FLUSH_DEVICE_DOMAIN, input, NULL);
+
+	local_irq_restore(flags);
+
+	if (!hv_result_success(status))
+		pr_err("HVCALL_FLUSH_DEVICE_DOMAIN failed, status %lld\n", status);
+}
+
+static void hv_iommu_detach_dev(struct iommu_domain *domain, struct device *dev)
+{
+	u64 status;
+	unsigned long flags;
+	struct hv_input_detach_device_domain *input;
+	struct pci_dev *pdev;
+	struct hv_iommu_domain *hv_domain = to_hv_iommu_domain(domain);
+	struct hv_iommu_endpoint *vdev = dev_iommu_priv_get(dev);
+
+	/* See the attach function, only PCI devices for now */
+	if (!dev_is_pci(dev) || vdev->hv_domain != hv_domain)
+		return;
+
+	pdev = to_pci_dev(dev);
+
+	dev_dbg(dev, "detaching from domain %d\n", hv_domain->device_domain.domain_id.id);
+
+	local_irq_save(flags);
+
+	input = *this_cpu_ptr(hyperv_pcpu_input_arg);
+	memset(input, 0, sizeof(*input));
+	input->partition_id = HV_PARTITION_ID_SELF;
+	if (hv_iommu_lookup_logical_dev_id(pdev, &input->device_id.as_uint64)) {
+		local_irq_restore(flags);
+		dev_warn(&pdev->dev, "no IOMMU registration for vPCI bus on detach\n");
+		return;
+	}
+	status = hv_do_hypercall(HVCALL_DETACH_DEVICE_DOMAIN, input, NULL);
+
+	local_irq_restore(flags);
+
+	if (!hv_result_success(status))
+		pr_err("HVCALL_DETACH_DEVICE_DOMAIN failed, status %lld\n", status);
+
+	hv_flush_device_domain(hv_domain);
+
+	vdev->hv_domain = NULL;
+}
+
+static int hv_iommu_attach_dev(struct iommu_domain *domain, struct device *dev,
+			       struct iommu_domain *old)
+{
+	u64 status;
+	unsigned long flags;
+	struct pci_dev *pdev;
+	struct hv_input_attach_device_domain *input;
+	struct hv_iommu_endpoint *vdev = dev_iommu_priv_get(dev);
+	struct hv_iommu_domain *hv_domain = to_hv_iommu_domain(domain);
+	int ret;
+
+	/* Only allow PCI devices for now */
+	if (!dev_is_pci(dev))
+		return -EINVAL;
+
+	if (vdev->hv_domain == hv_domain)
+		return 0;
+
+	if (vdev->hv_domain)
+		hv_iommu_detach_dev(&vdev->hv_domain->domain, dev);
+
+	pdev = to_pci_dev(dev);
+	dev_dbg(dev, "attaching to domain %d\n",
+		hv_domain->device_domain.domain_id.id);
+
+	local_irq_save(flags);
+
+	input = *this_cpu_ptr(hyperv_pcpu_input_arg);
+	memset(input, 0, sizeof(*input));
+	input->device_domain = hv_domain->device_domain;
+	ret = hv_iommu_lookup_logical_dev_id(pdev, &input->device_id.as_uint64);
+	if (ret) {
+		local_irq_restore(flags);
+		dev_err(&pdev->dev, "no IOMMU registration for vPCI bus\n");
+		return ret;
+	}
+	status = hv_do_hypercall(HVCALL_ATTACH_DEVICE_DOMAIN, input, NULL);
+
+	local_irq_restore(flags);
+
+	if (!hv_result_success(status))
+		pr_err("HVCALL_ATTACH_DEVICE_DOMAIN failed, status %lld\n", status);
+	else
+		vdev->hv_domain = hv_domain;
+
+	return hv_result_to_errno(status);
+}
+
+static int hv_iommu_get_logical_device_property(struct device *dev,
+					u32 code,
+					struct hv_output_get_logical_device_property *property)
+{
+	u64 status, lid;
+	unsigned long flags;
+	int ret;
+	struct hv_input_get_logical_device_property *input;
+	struct hv_output_get_logical_device_property *output;
+
+	local_irq_save(flags);
+
+	input = *this_cpu_ptr(hyperv_pcpu_input_arg);
+	output = *this_cpu_ptr(hyperv_pcpu_input_arg) + sizeof(*input);
+	memset(input, 0, sizeof(*input));
+	input->partition_id = HV_PARTITION_ID_SELF;
+	ret = hv_iommu_lookup_logical_dev_id(to_pci_dev(dev), &lid);
+	if (ret) {
+		local_irq_restore(flags);
+		return ret;
+	}
+	input->logical_device_id = lid;
+	input->code = code;
+	status = hv_do_hypercall(HVCALL_GET_LOGICAL_DEVICE_PROPERTY, input, output);
+	*property = *output;
+
+	local_irq_restore(flags);
+
+	if (!hv_result_success(status))
+		pr_err("HVCALL_GET_LOGICAL_DEVICE_PROPERTY failed, status %lld\n", status);
+
+	return hv_result_to_errno(status);
+}
+
+static struct iommu_device *hv_iommu_probe_device(struct device *dev)
+{
+	struct pci_dev *pdev;
+	struct hv_iommu_endpoint *vdev;
+	struct hv_output_get_logical_device_property device_iommu_property = {0};
+
+	if (!dev_is_pci(dev))
+		return ERR_PTR(-ENODEV);
+
+	pdev = to_pci_dev(dev);
+
+	if (hv_iommu_get_logical_device_property(dev,
+						 HV_LOGICAL_DEVICE_PROPERTY_PVIOMMU,
+						 &device_iommu_property) ||
+	    !(device_iommu_property.device_iommu & HV_DEVICE_IOMMU_ENABLED))
+		return ERR_PTR(-ENODEV);
+
+	vdev = kzalloc_obj(*vdev, GFP_KERNEL);
+	if (!vdev)
+		return ERR_PTR(-ENOMEM);
+
+	vdev->dev = dev;
+	vdev->hv_iommu = hv_iommu_device;
+	dev_iommu_priv_set(dev, vdev);
+
+	if (hv_iommu_ats_supported(hv_iommu_device->cap) &&
+	    pci_ats_supported(pdev))
+		pci_enable_ats(pdev, __ffs(hv_iommu_device->pgsize_bitmap));
+
+	return &vdev->hv_iommu->iommu;
+}
+
+static void hv_iommu_release_device(struct device *dev)
+{
+	struct hv_iommu_endpoint *vdev = dev_iommu_priv_get(dev);
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	if (pdev->ats_enabled)
+		pci_disable_ats(pdev);
+
+	dev_iommu_priv_set(dev, NULL);
+	set_dma_ops(dev, NULL);
+
+	kfree(vdev);
+}
+
+static struct iommu_group *hv_iommu_device_group(struct device *dev)
+{
+	if (dev_is_pci(dev))
+		return pci_device_group(dev);
+	else
+		return generic_device_group(dev);
+}
+
+static int hv_configure_device_domain(struct hv_iommu_domain *hv_domain, u32 domain_type)
+{
+	u64 status;
+	unsigned long flags;
+	struct pt_iommu_x86_64_hw_info pt_info;
+	struct hv_input_configure_device_domain *input;
+
+	local_irq_save(flags);
+
+	input = *this_cpu_ptr(hyperv_pcpu_input_arg);
+	memset(input, 0, sizeof(*input));
+	input->device_domain = hv_domain->device_domain;
+	input->settings.flags.blocked = (domain_type == IOMMU_DOMAIN_BLOCKED);
+	input->settings.flags.translation_enabled = (domain_type != IOMMU_DOMAIN_IDENTITY);
+
+	if (domain_type & __IOMMU_DOMAIN_PAGING) {
+		pt_iommu_x86_64_hw_info(&hv_domain->pt_iommu_x86_64, &pt_info);
+		input->settings.page_table_root = pt_info.gcr3_pt;
+		input->settings.flags.first_stage_paging_mode =
+			pt_info.levels == 5;
+	}
+	status = hv_do_hypercall(HVCALL_CONFIGURE_DEVICE_DOMAIN, input, NULL);
+
+	local_irq_restore(flags);
+
+	if (!hv_result_success(status))
+		pr_err("HVCALL_CONFIGURE_DEVICE_DOMAIN failed, status %lld\n", status);
+
+	return hv_result_to_errno(status);
+}
+
+static int __init hv_initialize_static_domains(void)
+{
+	int ret;
+	struct hv_iommu_domain *hv_domain;
+
+	/* Default stage-1 identity domain */
+	hv_domain = &hv_identity_domain;
+
+	ret = hv_create_device_domain(hv_domain, HV_DEVICE_DOMAIN_TYPE_S1);
+	if (ret)
+		return ret;
+
+	ret = hv_configure_device_domain(hv_domain, IOMMU_DOMAIN_IDENTITY);
+	if (ret)
+		goto delete_identity_domain;
+
+	hv_domain->domain.type = IOMMU_DOMAIN_IDENTITY;
+	hv_domain->domain.ops = &hv_iommu_identity_domain_ops;
+	hv_domain->domain.owner = &hv_iommu_ops;
+	hv_domain->domain.geometry = hv_iommu_device->geometry;
+	hv_domain->domain.pgsize_bitmap = hv_iommu_device->pgsize_bitmap;
+
+	/* Default stage-1 blocked domain */
+	hv_domain = &hv_blocking_domain;
+
+	ret = hv_create_device_domain(hv_domain, HV_DEVICE_DOMAIN_TYPE_S1);
+	if (ret)
+		goto delete_identity_domain;
+
+	ret = hv_configure_device_domain(hv_domain, IOMMU_DOMAIN_BLOCKED);
+	if (ret)
+		goto delete_blocked_domain;
+
+	hv_domain->domain.type = IOMMU_DOMAIN_BLOCKED;
+	hv_domain->domain.ops = &hv_iommu_blocking_domain_ops;
+	hv_domain->domain.owner = &hv_iommu_ops;
+	hv_domain->domain.geometry = hv_iommu_device->geometry;
+	hv_domain->domain.pgsize_bitmap = hv_iommu_device->pgsize_bitmap;
+
+	return 0;
+
+delete_blocked_domain:
+	hv_delete_device_domain(&hv_blocking_domain);
+delete_identity_domain:
+	hv_delete_device_domain(&hv_identity_domain);
+	return ret;
+}
+
+#define INTERRUPT_RANGE_START	(0xfee00000)
+#define INTERRUPT_RANGE_END	(0xfeefffff)
+static void hv_iommu_get_resv_regions(struct device *dev,
+		struct list_head *head)
+{
+	struct iommu_resv_region *region;
+
+	region = iommu_alloc_resv_region(INTERRUPT_RANGE_START,
+				      INTERRUPT_RANGE_END - INTERRUPT_RANGE_START + 1,
+				      0, IOMMU_RESV_MSI, GFP_KERNEL);
+	if (!region)
+		return;
+
+	list_add_tail(&region->list, head);
+}
+
+static void hv_iommu_flush_iotlb_all(struct iommu_domain *domain)
+{
+	hv_flush_device_domain(to_hv_iommu_domain(domain));
+}
+
+static void hv_iommu_iotlb_sync(struct iommu_domain *domain,
+				struct iommu_iotlb_gather *iotlb_gather)
+{
+	hv_flush_device_domain(to_hv_iommu_domain(domain));
+
+	iommu_put_pages_list(&iotlb_gather->freelist);
+}
+
+static void hv_iommu_paging_domain_free(struct iommu_domain *domain)
+{
+	struct hv_iommu_domain *hv_domain = to_hv_iommu_domain(domain);
+
+	/* Free all remaining mappings */
+	pt_iommu_deinit(&hv_domain->pt_iommu);
+
+	hv_delete_device_domain(hv_domain);
+
+	kfree(hv_domain);
+}
+
+static const struct iommu_domain_ops hv_iommu_identity_domain_ops = {
+	.attach_dev	= hv_iommu_attach_dev,
+};
+
+static const struct iommu_domain_ops hv_iommu_blocking_domain_ops = {
+	.attach_dev	= hv_iommu_attach_dev,
+};
+
+static const struct iommu_domain_ops hv_iommu_paging_domain_ops = {
+	.attach_dev	= hv_iommu_attach_dev,
+	IOMMU_PT_DOMAIN_OPS(x86_64),
+	.flush_iotlb_all = hv_iommu_flush_iotlb_all,
+	.iotlb_sync = hv_iommu_iotlb_sync,
+	.free = hv_iommu_paging_domain_free,
+};
+
+static struct iommu_domain *hv_iommu_domain_alloc_paging(struct device *dev)
+{
+	int ret;
+	struct hv_iommu_domain *hv_domain;
+	struct pt_iommu_x86_64_cfg cfg = {};
+
+	hv_domain = kzalloc_obj(*hv_domain, GFP_KERNEL);
+	if (!hv_domain)
+		return ERR_PTR(-ENOMEM);
+
+	ret = hv_create_device_domain(hv_domain, HV_DEVICE_DOMAIN_TYPE_S1);
+	if (ret) {
+		kfree(hv_domain);
+		return ERR_PTR(ret);
+	}
+
+	hv_domain->domain.geometry = hv_iommu_device->geometry;
+	hv_domain->pt_iommu.nid = dev_to_node(dev);
+
+	cfg.common.hw_max_vasz_lg2 = hv_iommu_device->max_iova_width;
+	cfg.common.hw_max_oasz_lg2 = 52;
+	cfg.top_level = (hv_iommu_device->max_iova_width > 48) ? 4 : 3;
+
+	ret = pt_iommu_x86_64_init(&hv_domain->pt_iommu_x86_64, &cfg, GFP_KERNEL);
+	if (ret) {
+		hv_delete_device_domain(hv_domain);
+		kfree(hv_domain);
+		return ERR_PTR(ret);
+	}
+
+	/* Constrain to page sizes the hypervisor supports */
+	hv_domain->domain.pgsize_bitmap &= hv_iommu_device->pgsize_bitmap;
+
+	hv_domain->domain.ops = &hv_iommu_paging_domain_ops;
+
+	ret = hv_configure_device_domain(hv_domain, __IOMMU_DOMAIN_PAGING);
+	if (ret) {
+		pt_iommu_deinit(&hv_domain->pt_iommu);
+		hv_delete_device_domain(hv_domain);
+		kfree(hv_domain);
+		return ERR_PTR(ret);
+	}
+
+	return &hv_domain->domain;
+}
+
+static struct iommu_ops hv_iommu_ops = {
+	.capable		  = hv_iommu_capable,
+	.domain_alloc_paging	  = hv_iommu_domain_alloc_paging,
+	.probe_device		  = hv_iommu_probe_device,
+	.release_device		  = hv_iommu_release_device,
+	.device_group		  = hv_iommu_device_group,
+	.get_resv_regions	  = hv_iommu_get_resv_regions,
+	.owner			  = THIS_MODULE,
+	.identity_domain	  = &hv_identity_domain.domain,
+	.blocked_domain		  = &hv_blocking_domain.domain,
+	.release_domain		  = &hv_blocking_domain.domain,
+};
+
+static int hv_iommu_detect(struct hv_output_get_iommu_capabilities *hv_iommu_cap)
+{
+	u64 status;
+	unsigned long flags;
+	struct hv_input_get_iommu_capabilities *input;
+	struct hv_output_get_iommu_capabilities *output;
+
+	local_irq_save(flags);
+
+	input = *this_cpu_ptr(hyperv_pcpu_input_arg);
+	output = *this_cpu_ptr(hyperv_pcpu_input_arg) + sizeof(*input);
+	memset(input, 0, sizeof(*input));
+	input->partition_id = HV_PARTITION_ID_SELF;
+	status = hv_do_hypercall(HVCALL_GET_IOMMU_CAPABILITIES, input, output);
+	*hv_iommu_cap = *output;
+
+	local_irq_restore(flags);
+
+	if (!hv_result_success(status))
+		pr_err("HVCALL_GET_IOMMU_CAPABILITIES failed, status %lld\n", status);
+
+	return hv_result_to_errno(status);
+}
+
+static void __init hv_init_iommu_device(struct hv_iommu_dev *hv_iommu,
+			struct hv_output_get_iommu_capabilities *hv_iommu_cap)
+{
+	ida_init(&hv_iommu->domain_ids);
+
+	hv_iommu->cap = hv_iommu_cap->iommu_cap;
+	hv_iommu->max_iova_width = hv_iommu_cap->max_iova_width;
+	if (!hv_iommu_5lvl_supported(hv_iommu->cap) &&
+	    hv_iommu->max_iova_width > 48) {
+		pr_info("5-level paging not supported, limiting iova width to 48.\n");
+		hv_iommu->max_iova_width = 48;
+	}
+
+	hv_iommu->geometry = (struct iommu_domain_geometry) {
+		.aperture_start = 0,
+		.aperture_end = (((u64)1) << hv_iommu->max_iova_width) - 1,
+		.force_aperture = true,
+	};
+
+	hv_iommu->first_domain = HV_DEVICE_DOMAIN_ID_DEFAULT + 1;
+	hv_iommu->last_domain = HV_DEVICE_DOMAIN_ID_NULL - 1;
+	/* Only x86 page sizes (4K/2M/1G) are supported */
+	hv_iommu->pgsize_bitmap = hv_iommu_cap->pgsize_bitmap &
+				  (SZ_4K | SZ_2M | SZ_1G);
+	if (hv_iommu->pgsize_bitmap != hv_iommu_cap->pgsize_bitmap)
+		pr_warn("unsupported page sizes masked: 0x%llx -> 0x%llx\n",
+			hv_iommu_cap->pgsize_bitmap, hv_iommu->pgsize_bitmap);
+	if (!hv_iommu->pgsize_bitmap) {
+		pr_warn("no supported page sizes, defaulting to 4K\n");
+		hv_iommu->pgsize_bitmap = SZ_4K;
+	}
+	hv_iommu_device = hv_iommu;
+}
+
+int __init hv_iommu_init(void)
+{
+	int ret = 0;
+	struct hv_iommu_dev *hv_iommu = NULL;
+	struct hv_output_get_iommu_capabilities hv_iommu_cap = {0};
+
+	if (no_iommu || iommu_detected)
+		return -ENODEV;
+
+	if (!hv_is_hyperv_initialized())
+		return -ENODEV;
+
+	ret = hv_iommu_detect(&hv_iommu_cap);
+	if (ret) {
+		pr_err("HVCALL_GET_IOMMU_CAPABILITIES failed: %d\n", ret);
+		return -ENODEV;
+	}
+
+	if (!hv_iommu_present(hv_iommu_cap.iommu_cap) ||
+	    !hv_iommu_s1_domain_supported(hv_iommu_cap.iommu_cap)) {
+		pr_err("IOMMU capabilities not sufficient: cap=0x%llx\n",
+		       hv_iommu_cap.iommu_cap);
+		return -ENODEV;
+	}
+
+	iommu_detected = 1;
+	pci_request_acs();
+
+	hv_iommu = kzalloc_obj(*hv_iommu, GFP_KERNEL);
+	if (!hv_iommu)
+		return -ENOMEM;
+
+	hv_init_iommu_device(hv_iommu, &hv_iommu_cap);
+
+	ret = hv_initialize_static_domains();
+	if (ret) {
+		pr_err("static domains init failed: %d\n", ret);
+		goto err_free;
+	}
+
+	ret = iommu_device_sysfs_add(&hv_iommu->iommu, NULL, NULL, "%s", "hv-iommu");
+	if (ret) {
+		pr_err("iommu_device_sysfs_add failed: %d\n", ret);
+		goto err_delete_static_domains;
+	}
+
+	ret = iommu_device_register(&hv_iommu->iommu, &hv_iommu_ops, NULL);
+	if (ret) {
+		pr_err("iommu_device_register failed: %d\n", ret);
+		goto err_sysfs_remove;
+	}
+
+	pr_info("successfully initialized\n");
+	return 0;
+
+err_sysfs_remove:
+	iommu_device_sysfs_remove(&hv_iommu->iommu);
+err_delete_static_domains:
+	hv_delete_device_domain(&hv_blocking_domain);
+	hv_delete_device_domain(&hv_identity_domain);
+err_free:
+	kfree(hv_iommu);
+	return ret;
+}
diff --git a/drivers/iommu/hyperv/iommu.h b/drivers/iommu/hyperv/iommu.h
new file mode 100644
index 000000000000..43f20d371245
--- /dev/null
+++ b/drivers/iommu/hyperv/iommu.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Hyper-V IOMMU driver.
+ *
+ * Copyright (C) 2024-2025, Microsoft, Inc.
+ *
+ */
+
+#ifndef _HYPERV_IOMMU_H
+#define _HYPERV_IOMMU_H
+
+struct hv_iommu_dev {
+	struct iommu_device iommu;
+	struct ida domain_ids;
+
+	/* Device configuration */
+	u8  max_iova_width;
+	u8  max_pasid_width;
+	u64 cap;
+	u64 pgsize_bitmap;
+
+	struct iommu_domain_geometry geometry;
+	u64 first_domain;
+	u64 last_domain;
+};
+
+struct hv_iommu_domain {
+	union {
+		struct iommu_domain    domain;
+		struct pt_iommu        pt_iommu;
+		struct pt_iommu_x86_64 pt_iommu_x86_64;
+	};
+	struct hv_iommu_dev *hv_iommu;
+	struct hv_input_device_domain device_domain;
+	u64		pgsize_bitmap;
+};
+
+struct hv_pci_busdata {
+	int               pci_domain_nr;
+	u32               logical_dev_id_prefix;
+	struct list_head  list;
+};
+
+struct hv_iommu_endpoint {
+	struct device *dev;
+	struct hv_iommu_dev *hv_iommu;
+	struct hv_iommu_domain *hv_domain;
+};
+
+#define to_hv_iommu_domain(d) \
+	container_of(d, struct hv_iommu_domain, domain)
+
+#endif /* _HYPERV_IOMMU_H */
diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index cfc8fa403dad..a4af9c8c2220 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -3715,6 +3715,7 @@ static int hv_pci_probe(struct hv_device *hdev,
 	struct hv_pcibus_device *hbus;
 	int ret, dom;
 	u16 dom_req;
+	u32 prefix;
 	char *name;
 
 	bridge = devm_pci_alloc_host_bridge(&hdev->device, 0);
@@ -3857,13 +3858,25 @@ static int hv_pci_probe(struct hv_device *hdev,
 
 	hbus->state = hv_pcibus_probed;
 
-	ret = create_root_hv_pci_bus(hbus);
+	/* Notify pvIOMMU before any device on the bus is scanned. */
+	prefix = (hdev->dev_instance.b[5] << 24) |
+		 (hdev->dev_instance.b[4] << 16) |
+		 (hdev->dev_instance.b[7] <<  8) |
+		 (hdev->dev_instance.b[6] & 0xf8);
+
+	ret = hv_iommu_register_pci_bus(dom, prefix);
 	if (ret)
 		goto free_windows;
 
+	ret = create_root_hv_pci_bus(hbus);
+	if (ret)
+		goto unregister_pviommu;
+
 	mutex_unlock(&hbus->state_lock);
 	return 0;
 
+unregister_pviommu:
+	hv_iommu_unregister_pci_bus(dom);
 free_windows:
 	hv_pci_free_bridge_windows(hbus);
 exit_d0:
@@ -3974,8 +3987,10 @@ static int hv_pci_bus_exit(struct hv_device *hdev, bool keep_devs)
 static void hv_pci_remove(struct hv_device *hdev)
 {
 	struct hv_pcibus_device *hbus;
+	int dom;
 
 	hbus = hv_get_drvdata(hdev);
+	dom = hbus->bridge->domain_nr;
 	if (hbus->state == hv_pcibus_installed) {
 		tasklet_disable(&hdev->channel->callback_event);
 		hbus->state = hv_pcibus_removing;
@@ -3994,6 +4009,8 @@ static void hv_pci_remove(struct hv_device *hdev)
 		hv_pci_remove_slots(hbus);
 		pci_remove_root_bus(hbus->bridge->bus);
 		pci_unlock_rescan_remove();
+
+		hv_iommu_unregister_pci_bus(dom);
 	}
 
 	hv_pci_bus_exit(hdev, false);
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index bf601d67cecb..b71345c74568 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -73,6 +73,18 @@ extern enum hv_partition_type hv_curr_partition_type;
 extern void * __percpu *hyperv_pcpu_input_arg;
 extern void * __percpu *hyperv_pcpu_output_arg;
 
+#ifdef CONFIG_HYPERV_PVIOMMU
+int  hv_iommu_register_pci_bus(int pci_domain_nr, u32 logical_dev_id_prefix);
+void hv_iommu_unregister_pci_bus(int pci_domain_nr);
+#else
+static inline int hv_iommu_register_pci_bus(int pci_domain_nr,
+					    u32 logical_dev_id_prefix)
+{
+	return 0;
+}
+static inline void hv_iommu_unregister_pci_bus(int pci_domain_nr) { }
+#endif
+
 u64 hv_do_hypercall(u64 control, void *inputaddr, void *outputaddr);
 u64 hv_do_fast_hypercall8(u16 control, u64 input8);
 u64 hv_do_fast_hypercall16(u16 control, u64 input1, u64 input2);
-- 
2.52.0


^ permalink raw reply related

* [PATCH v1 2/4] hyperv: Introduce new hypercall interfaces used by Hyper-V guest IOMMU
From: Yu Zhang @ 2026-05-11 16:24 UTC (permalink / raw)
  To: linux-kernel, linux-hyperv, iommu, linux-pci, linux-arch
  Cc: wei.liu, kys, haiyangz, decui, longli, joro, will, robin.murphy,
	bhelgaas, kwilczynski, lpieralisi, mani, robh, arnd, jgg,
	mhklinux, jacob.pan, tgopinath, easwar.hariharan
In-Reply-To: <20260511162408.1180069-1-zhangyu1@linux.microsoft.com>

From: Wei Liu <wei.liu@kernel.org>

Hyper-V guest IOMMU is a para-virtualized IOMMU based on hypercalls.
Introduce the hypercalls used by the child partition to interact with
this facility.

These hypercalls fall into below categories:
- Detection and capability: HVCALL_GET_IOMMU_CAPABILITIES is used to
  detect the existence and capabilities of the guest IOMMU.

- Device management: HVCALL_GET_LOGICAL_DEVICE_PROPERTY is used to
  check whether an endpoint device is managed by the guest IOMMU.

- Domain management: A set of hypercalls is provided to handle the
  creation, configuration, and deletion of guest domains, as well as
  the attachment/detachment of endpoint devices to/from those domains.

- IOTLB flushing: HVCALL_FLUSH_DEVICE_DOMAIN is used to ask Hyper-V
  for a domain-selective IOTLB flush (which in its handler may flush
  the device TLB as well).

Signed-off-by: Wei Liu <wei.liu@kernel.org>
Co-developed-by: Easwar Hariharan <easwar.hariharan@linux.microsoft.com>
Signed-off-by: Easwar Hariharan <easwar.hariharan@linux.microsoft.com>
Co-developed-by: Yu Zhang <zhangyu1@linux.microsoft.com>
Signed-off-by: Yu Zhang <zhangyu1@linux.microsoft.com>
---
 include/hyperv/hvgdk_mini.h |   8 +++
 include/hyperv/hvhdk_mini.h | 124 ++++++++++++++++++++++++++++++++++++
 2 files changed, 132 insertions(+)

diff --git a/include/hyperv/hvgdk_mini.h b/include/hyperv/hvgdk_mini.h
index 6a4e8b9d570f..5bdbb44da112 100644
--- a/include/hyperv/hvgdk_mini.h
+++ b/include/hyperv/hvgdk_mini.h
@@ -486,10 +486,16 @@ union hv_vp_assist_msr_contents {	 /* HV_REGISTER_VP_ASSIST_PAGE */
 #define HVCALL_GET_VP_INDEX_FROM_APIC_ID		0x009a
 #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE	0x00af
 #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST	0x00b0
+#define HVCALL_CREATE_DEVICE_DOMAIN			0x00b1
+#define HVCALL_ATTACH_DEVICE_DOMAIN			0x00b2
 #define HVCALL_SIGNAL_EVENT_DIRECT			0x00c0
 #define HVCALL_POST_MESSAGE_DIRECT			0x00c1
 #define HVCALL_DISPATCH_VP				0x00c2
+#define HVCALL_DETACH_DEVICE_DOMAIN			0x00c4
+#define HVCALL_DELETE_DEVICE_DOMAIN			0x00c5
 #define HVCALL_GET_GPA_PAGES_ACCESS_STATES		0x00c9
+#define HVCALL_CONFIGURE_DEVICE_DOMAIN			0x00ce
+#define HVCALL_FLUSH_DEVICE_DOMAIN			0x00d0
 #define HVCALL_ACQUIRE_SPARSE_SPA_PAGE_HOST_ACCESS	0x00d7
 #define HVCALL_RELEASE_SPARSE_SPA_PAGE_HOST_ACCESS	0x00d8
 #define HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY	0x00db
@@ -502,6 +508,8 @@ union hv_vp_assist_msr_contents {	 /* HV_REGISTER_VP_ASSIST_PAGE */
 #define HVCALL_MMIO_READ				0x0106
 #define HVCALL_MMIO_WRITE				0x0107
 #define HVCALL_DISABLE_HYP_EX                           0x010f
+#define HVCALL_GET_IOMMU_CAPABILITIES			0x0125
+#define HVCALL_GET_LOGICAL_DEVICE_PROPERTY		0x0127
 #define HVCALL_MAP_STATS_PAGE2				0x0131
 
 /* HV_HYPERCALL_INPUT */
diff --git a/include/hyperv/hvhdk_mini.h b/include/hyperv/hvhdk_mini.h
index b4cb2fa26e9b..493608e791b4 100644
--- a/include/hyperv/hvhdk_mini.h
+++ b/include/hyperv/hvhdk_mini.h
@@ -547,4 +547,128 @@ union hv_device_id {		/* HV_DEVICE_ID */
 	} acpi;
 } __packed;
 
+/* Device domain types */
+#define HV_DEVICE_DOMAIN_TYPE_S1	1 /* Stage 1 domain */
+
+/* ID for default domain and NULL domain */
+#define HV_DEVICE_DOMAIN_ID_DEFAULT 0
+#define HV_DEVICE_DOMAIN_ID_NULL    0xFFFFFFFFULL
+
+union hv_device_domain_id {
+	u64 as_uint64;
+	struct {
+		u32 type: 4;
+		u32 reserved: 28;
+		u32 id;
+	} __packed;
+};
+
+struct hv_input_device_domain {
+	u64 partition_id;
+	union hv_input_vtl owner_vtl;
+	u8 padding[7];
+	union hv_device_domain_id domain_id;
+} __packed;
+
+union hv_create_device_domain_flags {
+	u32 as_uint32;
+	struct {
+		u32 forward_progress_required: 1;
+		u32 inherit_owning_vtl: 1;
+		u32 reserved: 30;
+	} __packed;
+};
+
+struct hv_input_create_device_domain {
+	struct hv_input_device_domain device_domain;
+	union hv_create_device_domain_flags create_device_domain_flags;
+} __packed;
+
+struct hv_input_delete_device_domain {
+	struct hv_input_device_domain device_domain;
+} __packed;
+
+struct hv_input_attach_device_domain {
+	struct hv_input_device_domain device_domain;
+	union hv_device_id device_id;
+} __packed;
+
+struct hv_input_detach_device_domain {
+	u64 partition_id;
+	union hv_device_id device_id;
+} __packed;
+
+struct hv_device_domain_settings {
+	struct {
+		/*
+		 * Enable translations. If not enabled, all transaction bypass
+		 * S1 translations.
+		 */
+		u64 translation_enabled: 1;
+		u64 blocked: 1;
+		/*
+		 * First stage address translation paging mode:
+		 * 0: 4-level paging (default)
+		 * 1: 5-level paging
+		 */
+		u64 first_stage_paging_mode: 1;
+		u64 reserved: 61;
+	} flags;
+
+	/* Address of translation table */
+	u64 page_table_root;
+} __packed;
+
+struct hv_input_configure_device_domain {
+	struct hv_input_device_domain device_domain;
+	struct hv_device_domain_settings settings;
+} __packed;
+
+struct hv_input_get_iommu_capabilities {
+	u64 partition_id;
+	u64 reserved;
+} __packed;
+
+struct hv_output_get_iommu_capabilities {
+	u32 size;
+	u16 reserved;
+	u8  max_iova_width;
+	u8  max_pasid_width;
+
+#define HV_IOMMU_CAP_PRESENT (1ULL << 0)
+#define HV_IOMMU_CAP_S2 (1ULL << 1)
+#define HV_IOMMU_CAP_S1 (1ULL << 2)
+#define HV_IOMMU_CAP_S1_5LVL (1ULL << 3)
+#define HV_IOMMU_CAP_PASID (1ULL << 4)
+#define HV_IOMMU_CAP_ATS (1ULL << 5)
+#define HV_IOMMU_CAP_PRI (1ULL << 6)
+
+	u64 iommu_cap;
+	u64 pgsize_bitmap;
+} __packed;
+
+enum hv_logical_device_property_code {
+	HV_LOGICAL_DEVICE_PROPERTY_PVIOMMU = 10,
+};
+
+struct hv_input_get_logical_device_property {
+	u64 partition_id;
+	u64 logical_device_id;
+	/* Takes values from enum hv_logical_device_property_code. */
+	u32 code;
+	u32 reserved;
+} __packed;
+
+struct hv_output_get_logical_device_property {
+#define HV_DEVICE_IOMMU_ENABLED (1ULL << 0)
+	u64 device_iommu;
+	u64 reserved;
+} __packed;
+
+struct hv_input_flush_device_domain {
+	struct hv_input_device_domain device_domain;
+	u32 flags;
+	u32 reserved;
+} __packed;
+
 #endif /* _HV_HVHDK_MINI_H */
-- 
2.52.0


^ permalink raw reply related

* [PATCH v1 1/4] iommu: Move Hyper-V IOMMU driver to its own subdirectory
From: Yu Zhang @ 2026-05-11 16:24 UTC (permalink / raw)
  To: linux-kernel, linux-hyperv, iommu, linux-pci, linux-arch
  Cc: wei.liu, kys, haiyangz, decui, longli, joro, will, robin.murphy,
	bhelgaas, kwilczynski, lpieralisi, mani, robh, arnd, jgg,
	mhklinux, jacob.pan, tgopinath, easwar.hariharan
In-Reply-To: <20260511162408.1180069-1-zhangyu1@linux.microsoft.com>

From: Easwar Hariharan <eahariha@linux.microsoft.com>

The Hyper-V IOMMU driver currently only supports IRQ remapping.
As it will be adding DMA remapping support, prepare a directory
to contain all the different feature files.

This is a simple rename commit and has no functional changes.

Signed-off-by: Easwar Hariharan <eahariha@linux.microsoft.com>
Signed-off-by: Yu Zhang <zhangyu1@linux.microsoft.com>
---
 MAINTAINERS                                            |  2 +-
 drivers/iommu/Kconfig                                  | 10 +---------
 drivers/iommu/Makefile                                 |  2 +-
 drivers/iommu/hyperv/Kconfig                           | 10 ++++++++++
 drivers/iommu/hyperv/Makefile                          |  2 ++
 .../iommu/{hyperv-iommu.c => hyperv/irq_remapping.c}   |  2 +-
 6 files changed, 16 insertions(+), 12 deletions(-)
 create mode 100644 drivers/iommu/hyperv/Kconfig
 create mode 100644 drivers/iommu/hyperv/Makefile
 rename drivers/iommu/{hyperv-iommu.c => hyperv/irq_remapping.c} (99%)

diff --git a/MAINTAINERS b/MAINTAINERS
index b2040011a386..1519de2a2a09 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11979,7 +11979,7 @@ F:	drivers/clocksource/hyperv_timer.c
 F:	drivers/hid/hid-hyperv.c
 F:	drivers/hv/
 F:	drivers/input/serio/hyperv-keyboard.c
-F:	drivers/iommu/hyperv-iommu.c
+F:	drivers/iommu/hyperv/
 F:	drivers/net/ethernet/microsoft/
 F:	drivers/net/hyperv/
 F:	drivers/pci/controller/pci-hyperv-intf.c
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index f86262b11416..0bdf2ac3d782 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -195,6 +195,7 @@ config MSM_IOMMU
 source "drivers/iommu/amd/Kconfig"
 source "drivers/iommu/arm/Kconfig"
 source "drivers/iommu/intel/Kconfig"
+source "drivers/iommu/hyperv/Kconfig"
 source "drivers/iommu/iommufd/Kconfig"
 source "drivers/iommu/riscv/Kconfig"
 
@@ -351,15 +352,6 @@ config MTK_IOMMU_V1
 
 	  if unsure, say N here.
 
-config HYPERV_IOMMU
-	bool "Hyper-V IRQ Handling"
-	depends on HYPERV && X86
-	select IOMMU_API
-	default HYPERV
-	help
-	  Stub IOMMU driver to handle IRQs to support Hyper-V Linux
-	  guest and root partitions.
-
 config VIRTIO_IOMMU
 	tristate "Virtio IOMMU driver"
 	depends on VIRTIO
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index 0275821f4ef9..32f3ca758556 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -4,6 +4,7 @@ obj-$(CONFIG_AMD_IOMMU) += amd/
 obj-$(CONFIG_INTEL_IOMMU) += intel/
 obj-$(CONFIG_RISCV_IOMMU) += riscv/
 obj-$(CONFIG_GENERIC_PT) += generic_pt/fmt/
+obj-$(CONFIG_HYPERV_IOMMU) += hyperv/
 obj-$(CONFIG_IOMMU_API) += iommu.o
 obj-$(CONFIG_IOMMU_SUPPORT) += iommu-pages.o
 obj-$(CONFIG_IOMMU_API) += iommu-traces.o
@@ -30,7 +31,6 @@ obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o
 obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o
 obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o
 obj-$(CONFIG_S390_IOMMU) += s390-iommu.o
-obj-$(CONFIG_HYPERV_IOMMU) += hyperv-iommu.o
 obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o
 obj-$(CONFIG_IOMMU_SVA) += iommu-sva.o
 obj-$(CONFIG_IOMMU_IOPF) += io-pgfault.o
diff --git a/drivers/iommu/hyperv/Kconfig b/drivers/iommu/hyperv/Kconfig
new file mode 100644
index 000000000000..30f40d867036
--- /dev/null
+++ b/drivers/iommu/hyperv/Kconfig
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
+# HyperV paravirtualized IOMMU support
+config HYPERV_IOMMU
+	bool "Hyper-V IRQ Handling"
+	depends on HYPERV && X86
+	select IOMMU_API
+	default HYPERV
+	help
+	  Stub IOMMU driver to handle IRQs to support Hyper-V Linux
+	  guest and root partitions.
diff --git a/drivers/iommu/hyperv/Makefile b/drivers/iommu/hyperv/Makefile
new file mode 100644
index 000000000000..9f557bad94ff
--- /dev/null
+++ b/drivers/iommu/hyperv/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_HYPERV_IOMMU) += irq_remapping.o
diff --git a/drivers/iommu/hyperv-iommu.c b/drivers/iommu/hyperv/irq_remapping.c
similarity index 99%
rename from drivers/iommu/hyperv-iommu.c
rename to drivers/iommu/hyperv/irq_remapping.c
index 479103261ae6..62a94a8c9e95 100644
--- a/drivers/iommu/hyperv-iommu.c
+++ b/drivers/iommu/hyperv/irq_remapping.c
@@ -22,7 +22,7 @@
 #include <asm/hypervisor.h>
 #include <asm/mshyperv.h>
 
-#include "irq_remapping.h"
+#include "../irq_remapping.h"
 
 #ifdef CONFIG_IRQ_REMAP
 
-- 
2.52.0


^ permalink raw reply related

* [PATCH v1 0/4] Hyper-V: Add para-virtualized IOMMU support for Linux guests
From: Yu Zhang @ 2026-05-11 16:24 UTC (permalink / raw)
  To: linux-kernel, linux-hyperv, iommu, linux-pci, linux-arch
  Cc: wei.liu, kys, haiyangz, decui, longli, joro, will, robin.murphy,
	bhelgaas, kwilczynski, lpieralisi, mani, robh, arnd, jgg,
	mhklinux, jacob.pan, tgopinath, easwar.hariharan

This patch series introduces a para-virtualized IOMMU driver for
Linux guests running on Microsoft Hyper-V. The driver enables two
primary use cases:
  1) In-kernel DMA protection for devices assigned to the guest.
  2) Device assignment to guest user space (e.g., via VFIO).

The driver implements the following core functionality:
*   Hypercall-based Enumeration
    Unlike traditional ACPI-based discovery (e.g., DMAR/IVRS),
    this driver enumerates the Hyper-V IOMMU capabilities directly
    via hypercalls. This approach allows the guest to discover
    IOMMU presence and features without requiring specific virtual
    firmware extensions or modifications.

*   Domain Management
    The driver manages IOMMU domains through a new set of Hyper-V
    hypercall interfaces, handling domain allocation, attachment,
    and detachment for endpoint devices.

*   Nested Translation Support
    This implementation leverages guest-managed stage-1 I/O page
    tables nested with host stage-2 translations. It is built
    upon the consolidated IOMMU page table framework (IOMMU_PT).
    This design eliminates the need for emulating map operations.
    Both Intel VT-d and AMD IOMMU platforms are supported.

*   IOTLB Invalidation
    IOTLB invalidation requests are marshaled and issued to the
    hypervisor through the same hypercall mechanism. Both domain-
    selective and page-selective flushes are supported.

Implementation Notes:
*   Platform Support
    The current implementation targets x86 platforms with Intel
    VT-d and AMD IOMMU hardware.

*   MSI Region Handling
    The hardware MSI region is hard-coded to the standard x86
    interrupt range (0xfee00000 - 0xfeefffff). Future updates may
    allow this configuration to be queried via hypercalls if new
    hardware platforms are to be supported.

*   Reserved Regions (RMRR)
    There is currently no requirement to support assigned devices with
    ACPI RMRR limitations. Consequently, this patch series does not
    specify or query reserved memory regions.

Testing:
This series has been validated with the following configurations:
- Intel DSA devices assigned to the guest, tested with dmatest.
- NVMe devices assigned to the guest on AMD platforms, tested
  with fio.
- dma_map_benchmark for DMA mapping performance evaluation.

Changes since RFC v1 [1]:
- Scoped platform support to x86 only (Intel VT-d and AMD IOMMU);
  initialization now uses x86_init.iommu.iommu_init
- Added page-selective IOTLB flush support (new Patch 4)
- Disable device ATS in hv_iommu_release_device()
- Addressed review comments from Michael Kelley:
  - Reversed dependency: pvIOMMU exports registration API for
    pci-hyperv to call, instead of pci-hyperv exporting
    hv_build_logical_dev_id()
  - Dropped separate output page allocation patch; hypercall input
    and output now share the same per-CPU page
  - Cleaned up Kconfig (removed PCI_HYPERV dependency, unnecessary
    selects)
  - Removed dev_list, per-domain spinlock, and syscore_ops
  - Removed forward declarations by reordering functions
  - Fixed typos, cleaned up Kconfig selects, improved pr_info
    messages, etc.

[1] https://lore.kernel.org/linux-hyperv/20251209051128.76913-1-zhangyu1@linux.microsoft.com/

Easwar Hariharan (1):
  iommu: Move Hyper-V IOMMU driver to its own subdirectory

Wei Liu (1):
  hyperv: Introduce new hypercall interfaces used by Hyper-V guest IOMMU

Yu Zhang (2):
  iommu/hyperv: Add para-virtualized IOMMU support for Hyper-V guest
  iommu/hyperv: Add page-selective IOTLB flush support

 MAINTAINERS                                   |   2 +-
 arch/x86/hyperv/hv_init.c                     |   4 +
 arch/x86/include/asm/mshyperv.h               |   4 +
 drivers/iommu/Kconfig                         |  10 +-
 drivers/iommu/Makefile                        |   2 +-
 drivers/iommu/hyperv/Kconfig                  |  27 +
 drivers/iommu/hyperv/Makefile                 |   3 +
 drivers/iommu/hyperv/iommu.c                  | 794 ++++++++++++++++++
 drivers/iommu/hyperv/iommu.h                  |  54 ++
 .../irq_remapping.c}                          |   2 +-
 drivers/pci/controller/pci-hyperv.c           |  19 +-
 include/asm-generic/mshyperv.h                |  12 +
 include/hyperv/hvgdk_mini.h                   |   9 +
 include/hyperv/hvhdk_mini.h                   | 141 ++++
 14 files changed, 1070 insertions(+), 13 deletions(-)
 create mode 100644 drivers/iommu/hyperv/Kconfig
 create mode 100644 drivers/iommu/hyperv/Makefile
 create mode 100644 drivers/iommu/hyperv/iommu.c
 create mode 100644 drivers/iommu/hyperv/iommu.h
 rename drivers/iommu/{hyperv-iommu.c => hyperv/irq_remapping.c} (99%)

-- 
2.52.0


^ permalink raw reply

* Re: [PATCH v4 17/18] mshv: Publish VP to pt_vp_array before installing the file descriptor
From: Stanislav Kinsburskii @ 2026-05-11 15:29 UTC (permalink / raw)
  To: Anirudh Rayabharam
  Cc: kys, haiyangz, wei.liu, decui, longli, linux-hyperv, linux-kernel
In-Reply-To: <20260511-warping-determined-kagu-eb5fcd@anirudhrb>

On Mon, May 11, 2026 at 02:26:45PM +0000, Anirudh Rayabharam wrote:
> On Thu, May 07, 2026 at 03:44:32PM +0000, Stanislav Kinsburskii wrote:
> > mshv_partition_ioctl_create_vp() called anon_inode_getfd() before
> > publishing the new VP into partition->pt_vp_array.  anon_inode_getfd()
> > includes fd_install(), so the fd was live in current->files before the
> > publish ran.
> > 
> > A concurrent MSHV_RUN_VP ioctl on that fd does not serialise against the
> > in-progress MSHV_CREATE_VP — it takes vp->vp_mutex, not the partition
> > mutex.  Once the VP starts running and traps, mshv_intercept_isr() can look
> > up partition->pt_vp_array[vp_index] and observe NULL, silently dropping the
> > intercept message.
> > 
> > Split the fd creation: reserve an fd with get_unused_fd_flags(), create the
> > file with anon_inode_getfile(), publish the VP via smp_store_release(), and
> > finally call fd_install() as the userspace-visibility commit point.
> > 
> > Fixes: 621191d709b14 ("Drivers: hv: Introduce mshv_root module to expose /dev/mshv to VMMs")
> > Signed-off-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
> > ---
> >  drivers/hv/mshv_root_main.c |   29 ++++++++++++++++++++++-------
> >  1 file changed, 22 insertions(+), 7 deletions(-)
> > 
> > diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c
> > index e32f6e0f9f637..1c18d1c1f7947 100644
> > --- a/drivers/hv/mshv_root_main.c
> > +++ b/drivers/hv/mshv_root_main.c
> > @@ -1142,6 +1142,8 @@ mshv_partition_ioctl_create_vp(struct mshv_partition *partition,
> >  	struct mshv_vp *vp;
> >  	struct page *intercept_msg_page, *register_page, *ghcb_page;
> >  	struct hv_stats_page *stats_pages[2];
> > +	struct file *file;
> > +	int fd;
> >  	long ret;
> >  
> >  	if (copy_from_user(&args, arg, sizeof(args)))
> > @@ -1214,14 +1216,18 @@ mshv_partition_ioctl_create_vp(struct mshv_partition *partition,
> >  	if (ret)
> >  		goto put_partition;
> >  
> > -	/*
> > -	 * Keep anon_inode_getfd last: it installs fd in the file struct and
> > -	 * thus makes the state accessible in user space.
> > -	 */
> > -	ret = anon_inode_getfd("mshv_vp", &mshv_vp_fops, vp,
> > -			       O_RDWR | O_CLOEXEC);
> 
> Why not just move this anon_inode_getfd() after the smp_store_release()
> call?
> 

Because anon_inode_getfd() can still fail at the fd-table step after the
VP is already visible to lockless ISR readers, and unpublishing it would
require a grace-period wait under pt_mutex while ISRs may have already
observed and acted on the doomed VP. The split form keeps every failable
step before the publish, so failure paths stay simple and the publish
itself cannot fail.

Thanks,
Stanislav


> Thanks,
> Anirudh.

^ permalink raw reply

* Re: [PATCH v4 02/18] mshv: Fix mshv_prepare_pinned_region error path for unencrypted partitions
From: Stanislav Kinsburskii @ 2026-05-11 15:12 UTC (permalink / raw)
  To: Anirudh Rayabharam
  Cc: kys, haiyangz, wei.liu, decui, longli, linux-hyperv, linux-kernel
In-Reply-To: <20260511-ancient-naughty-bat-f58c2c@anirudhrb>

On Mon, May 11, 2026 at 01:48:56PM +0000, Anirudh Rayabharam wrote:
> On Thu, May 07, 2026 at 03:43:09PM +0000, Stanislav Kinsburskii wrote:
> > mshv_prepare_pinned_region() returns 0 (success) when mshv_region_map()
> > fails on an unencrypted partition. The condition on the error path:
> > 
> >     if (ret && mshv_partition_encrypted(partition))
> > 
> > only handles map failures for encrypted partitions — if the partition is
> > not encrypted and the map fails, execution falls through to 'return 0',
> > silently ignoring the error.
> > 
> > Additionally, calling mshv_region_invalidate() inline on map failure
> > zeroes the mreg_pages array before the caller's cleanup path
> > (mshv_region_destroy) can call mshv_region_unmap(). Since unmap skips
> > pages where mreg_pages[offset] is NULL, this can leave stale SLAT
> > mappings for partially-mapped pages.
> > 
> > Fix by returning immediately on success and falling through to error
> > return on failure. For unencrypted partitions, the caller's
> > mshv_region_destroy() handles unmap followed by invalidate in the
> > correct order. For encrypted partitions where re-sharing fails, zero
> > the page array without unpinning — the pages are inaccessible to the
> > host and must not be unpinned, but zeroing prevents
> > mshv_region_destroy() from attempting to unpin them.
> 
> mshv_region_destroy() calls mshv_region_invalidate() which calls
> mshv_region_invalidate_pages() which just does:
> 
> 	static void mshv_region_invalidate_pages(struct mshv_mem_region *region,
> 						 u64 page_offset, u64 page_count)
> 	{
> 		if (region->mreg_type == MSHV_REGION_TYPE_MEM_PINNED)
> 			unpin_user_pages(region->mreg_pages + page_offset, page_count);
> 
> 		memset(region->mreg_pages + page_offset, 0,
> 		       page_count * sizeof(struct page *));
> 	}
> 
> It doesn't checked for zeroed pages before unpinning.
> 

unpin_user_pages skips NULL pages.

Thanks,
Stanislav

> > 
> > Fixes: 621191d709b14 ("Drivers: hv: Introduce mshv_root module to expose /dev/mshv to VMMs")
> > Signed-off-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
> > ---
> >  drivers/hv/mshv_root_main.c |   26 ++++++++++++++++----------
> >  1 file changed, 16 insertions(+), 10 deletions(-)
> > 
> > diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c
> > index 665d565899c15..7e4252b6bc65c 100644
> > --- a/drivers/hv/mshv_root_main.c
> > +++ b/drivers/hv/mshv_root_main.c
> > @@ -1360,32 +1360,38 @@ static int mshv_prepare_pinned_region(struct mshv_mem_region *region)
> >  			pt_err(partition,
> >  			       "Failed to unshare memory region (guest_pfn: %llu): %d\n",
> >  			       region->start_gfn, ret);
> > -			goto invalidate_region;
> > +			goto err_out;
> >  		}
> >  	}
> >  
> >  	ret = mshv_region_map(region);
> > -	if (ret && mshv_partition_encrypted(partition)) {
> > +	if (ret)
> > +		goto share_region;
> > +
> > +	return 0;
> > +
> > +share_region:
> > +	if (mshv_partition_encrypted(partition)) {
> >  		int shrc;
> >  
> >  		shrc = mshv_region_share(region);
> >  		if (!shrc)
> > -			goto invalidate_region;
> > +			goto err_out;
> >  
> >  		pt_err(partition,
> >  		       "Failed to share memory region (guest_pfn: %llu): %d\n",
> >  		       region->start_gfn, shrc);
> >  		/*
> > -		 * Don't unpin if marking shared failed because pages are no
> > -		 * longer mapped in the host, ie root, anymore.
> > +		 * Re-sharing failed — the pages remain inaccessible to the
> > +		 * host.  Zero the page array so that mshv_region_destroy()
> > +		 * won't attempt to unpin them (leaking the page references
> > +		 * is intentional; unpinning host-inaccessible pages would be
> > +		 * unsafe).
> >  		 */
> 
> Actually, mshv_region_destroy() also does mshv_region_share(). Maybe we
> can remove it from here altogether. Either way, should this zeroing
> logic be added there too so as not to crash the host by unpinning pages
> that weren't marked shared?
> 
> >From mshv_region_destroy():
> 
> 	if (mshv_partition_encrypted(partition)) {
> 		ret = mshv_region_share(region);
> 		if (ret) {
> 			pt_err(partition,
> 			       "Failed to regain access to memory, unpinning user pages will fail and crash the host error: %d\n",
> 			       ret);
> 			return;
> 		}
> 	}
> 
> Thanks,
> Anirudh.

^ permalink raw reply

* Re: [PATCH v4 02/18] mshv: Fix mshv_prepare_pinned_region error path for unencrypted partitions
From: Stanislav Kinsburskii @ 2026-05-11 15:06 UTC (permalink / raw)
  To: Anirudh Rayabharam
  Cc: kys, haiyangz, wei.liu, decui, longli, linux-hyperv, linux-kernel
In-Reply-To: <20260511-ancient-naughty-bat-f58c2c@anirudhrb>

On Mon, May 11, 2026 at 01:48:56PM +0000, Anirudh Rayabharam wrote:
> On Thu, May 07, 2026 at 03:43:09PM +0000, Stanislav Kinsburskii wrote:
> > mshv_prepare_pinned_region() returns 0 (success) when mshv_region_map()
> > fails on an unencrypted partition. The condition on the error path:
> > 
> >     if (ret && mshv_partition_encrypted(partition))
> > 
> > only handles map failures for encrypted partitions — if the partition is
> > not encrypted and the map fails, execution falls through to 'return 0',
> > silently ignoring the error.
> > 
> > Additionally, calling mshv_region_invalidate() inline on map failure
> > zeroes the mreg_pages array before the caller's cleanup path
> > (mshv_region_destroy) can call mshv_region_unmap(). Since unmap skips
> > pages where mreg_pages[offset] is NULL, this can leave stale SLAT
> > mappings for partially-mapped pages.
> > 
> > Fix by returning immediately on success and falling through to error
> > return on failure. For unencrypted partitions, the caller's
> > mshv_region_destroy() handles unmap followed by invalidate in the
> > correct order. For encrypted partitions where re-sharing fails, zero
> > the page array without unpinning — the pages are inaccessible to the
> > host and must not be unpinned, but zeroing prevents
> > mshv_region_destroy() from attempting to unpin them.
> 
> mshv_region_destroy() calls mshv_region_invalidate() which calls
> mshv_region_invalidate_pages() which just does:
> 
> 	static void mshv_region_invalidate_pages(struct mshv_mem_region *region,
> 						 u64 page_offset, u64 page_count)
> 	{
> 		if (region->mreg_type == MSHV_REGION_TYPE_MEM_PINNED)
> 			unpin_user_pages(region->mreg_pages + page_offset, page_count);
> 
> 		memset(region->mreg_pages + page_offset, 0,
> 		       page_count * sizeof(struct page *));
> 	}
> 
> It doesn't checked for zeroed pages before unpinning.
> 
> > 
> > Fixes: 621191d709b14 ("Drivers: hv: Introduce mshv_root module to expose /dev/mshv to VMMs")
> > Signed-off-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
> > ---
> >  drivers/hv/mshv_root_main.c |   26 ++++++++++++++++----------
> >  1 file changed, 16 insertions(+), 10 deletions(-)
> > 
> > diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c
> > index 665d565899c15..7e4252b6bc65c 100644
> > --- a/drivers/hv/mshv_root_main.c
> > +++ b/drivers/hv/mshv_root_main.c
> > @@ -1360,32 +1360,38 @@ static int mshv_prepare_pinned_region(struct mshv_mem_region *region)
> >  			pt_err(partition,
> >  			       "Failed to unshare memory region (guest_pfn: %llu): %d\n",
> >  			       region->start_gfn, ret);
> > -			goto invalidate_region;
> > +			goto err_out;
> >  		}
> >  	}
> >  
> >  	ret = mshv_region_map(region);
> > -	if (ret && mshv_partition_encrypted(partition)) {
> > +	if (ret)
> > +		goto share_region;
> > +
> > +	return 0;
> > +
> > +share_region:
> > +	if (mshv_partition_encrypted(partition)) {
> >  		int shrc;
> >  
> >  		shrc = mshv_region_share(region);
> >  		if (!shrc)
> > -			goto invalidate_region;
> > +			goto err_out;
> >  
> >  		pt_err(partition,
> >  		       "Failed to share memory region (guest_pfn: %llu): %d\n",
> >  		       region->start_gfn, shrc);
> >  		/*
> > -		 * Don't unpin if marking shared failed because pages are no
> > -		 * longer mapped in the host, ie root, anymore.
> > +		 * Re-sharing failed — the pages remain inaccessible to the
> > +		 * host.  Zero the page array so that mshv_region_destroy()
> > +		 * won't attempt to unpin them (leaking the page references
> > +		 * is intentional; unpinning host-inaccessible pages would be
> > +		 * unsafe).
> >  		 */
> 
> Actually, mshv_region_destroy() also does mshv_region_share(). Maybe we
> can remove it from here altogether. Either way, should this zeroing
> logic be added there too so as not to crash the host by unpinning pages
> that weren't marked shared?
> 

Indeed.
The issue with all this code is that we are leaking state in
mshv_region_pin if we wimply return from it: we don't know if the pages
are pinned or unshared (or mapped) in the destruction callback.
We should either introduce a state variable to track this or just don't
call the generic mshv_region_put on case of region creation error.
The latter approch make mshv_map_user_memory idempotent and thus looks
like a better way forward.
What do you think?

Thanks,
Stanislav

> >From mshv_region_destroy():
> 
> 	if (mshv_partition_encrypted(partition)) {
> 		ret = mshv_region_share(region);
> 		if (ret) {
> 			pt_err(partition,
> 			       "Failed to regain access to memory, unpinning user pages will fail and crash the host error: %d\n",
> 			       ret);
> 			return;
> 		}
> 	}
> 
> Thanks,
> Anirudh.
> 

^ permalink raw reply

* Re: [PATCH v4 17/18] mshv: Publish VP to pt_vp_array before installing the file descriptor
From: Anirudh Rayabharam @ 2026-05-11 14:26 UTC (permalink / raw)
  To: Stanislav Kinsburskii
  Cc: kys, haiyangz, wei.liu, decui, longli, linux-hyperv, linux-kernel
In-Reply-To: <177816867231.21765.15171005242069873878.stgit@skinsburskii-cloud-desktop.internal.cloudapp.net>

On Thu, May 07, 2026 at 03:44:32PM +0000, Stanislav Kinsburskii wrote:
> mshv_partition_ioctl_create_vp() called anon_inode_getfd() before
> publishing the new VP into partition->pt_vp_array.  anon_inode_getfd()
> includes fd_install(), so the fd was live in current->files before the
> publish ran.
> 
> A concurrent MSHV_RUN_VP ioctl on that fd does not serialise against the
> in-progress MSHV_CREATE_VP — it takes vp->vp_mutex, not the partition
> mutex.  Once the VP starts running and traps, mshv_intercept_isr() can look
> up partition->pt_vp_array[vp_index] and observe NULL, silently dropping the
> intercept message.
> 
> Split the fd creation: reserve an fd with get_unused_fd_flags(), create the
> file with anon_inode_getfile(), publish the VP via smp_store_release(), and
> finally call fd_install() as the userspace-visibility commit point.
> 
> Fixes: 621191d709b14 ("Drivers: hv: Introduce mshv_root module to expose /dev/mshv to VMMs")
> Signed-off-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
> ---
>  drivers/hv/mshv_root_main.c |   29 ++++++++++++++++++++++-------
>  1 file changed, 22 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c
> index e32f6e0f9f637..1c18d1c1f7947 100644
> --- a/drivers/hv/mshv_root_main.c
> +++ b/drivers/hv/mshv_root_main.c
> @@ -1142,6 +1142,8 @@ mshv_partition_ioctl_create_vp(struct mshv_partition *partition,
>  	struct mshv_vp *vp;
>  	struct page *intercept_msg_page, *register_page, *ghcb_page;
>  	struct hv_stats_page *stats_pages[2];
> +	struct file *file;
> +	int fd;
>  	long ret;
>  
>  	if (copy_from_user(&args, arg, sizeof(args)))
> @@ -1214,14 +1216,18 @@ mshv_partition_ioctl_create_vp(struct mshv_partition *partition,
>  	if (ret)
>  		goto put_partition;
>  
> -	/*
> -	 * Keep anon_inode_getfd last: it installs fd in the file struct and
> -	 * thus makes the state accessible in user space.
> -	 */
> -	ret = anon_inode_getfd("mshv_vp", &mshv_vp_fops, vp,
> -			       O_RDWR | O_CLOEXEC);

Why not just move this anon_inode_getfd() after the smp_store_release()
call?

Thanks,
Anirudh.


^ permalink raw reply

* Re: [PATCH v4 03/18] mshv: Fix race in mshv_irqfd_deassign
From: Anirudh Rayabharam @ 2026-05-11 13:57 UTC (permalink / raw)
  To: Stanislav Kinsburskii
  Cc: kys, haiyangz, wei.liu, decui, longli, linux-hyperv, linux-kernel
In-Reply-To: <177816859556.21765.6200058614819106223.stgit@skinsburskii-cloud-desktop.internal.cloudapp.net>

On Thu, May 07, 2026 at 03:43:15PM +0000, Stanislav Kinsburskii wrote:
> mshv_irqfd_deactivate() and the hlist traversal of pt_irqfds_list
> require pt->pt_irqfds_lock to be held, but mshv_irqfd_deassign()
> omits it. This races with the EPOLLHUP path in mshv_irqfd_wakeup(),
> which does take the lock before calling mshv_irqfd_deactivate().
> 
> Additionally, mshv_irqfd_deactivate() uses hlist_del() which poisons
> the node pointers rather than resetting them. Since
> mshv_irqfd_is_active() relies on hlist_unhashed() (checks pprev ==
> NULL), a poisoned node still appears active. If a concurrent path calls
> mshv_irqfd_deactivate() again on the same irqfd, the guard fails to
> prevent a double hlist_del() on poisoned pointers.
> 
> Fix both issues:
> - Add the missing spin_lock_irq/spin_unlock_irq around the list
>   traversal in mshv_irqfd_deassign(), matching mshv_irqfd_release().
> - Use hlist_del_init() instead of hlist_del() so the node is properly
>   marked as unhashed after removal, making the is_active guard reliable.
> 
> Fixes: 621191d709b14 ("Drivers: hv: Introduce mshv_root module to expose /dev/mshv to VMMs")
> Signed-off-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
> ---
>  drivers/hv/mshv_eventfd.c |    5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)

Reviewed-by: Anirudh Rayabharam (Microsoft) <anirudh@anirudhrb.com>


^ permalink raw reply

* Re: [PATCH v4 02/18] mshv: Fix mshv_prepare_pinned_region error path for unencrypted partitions
From: Anirudh Rayabharam @ 2026-05-11 13:48 UTC (permalink / raw)
  To: Stanislav Kinsburskii
  Cc: kys, haiyangz, wei.liu, decui, longli, linux-hyperv, linux-kernel
In-Reply-To: <177816858991.21765.2088226987194959542.stgit@skinsburskii-cloud-desktop.internal.cloudapp.net>

On Thu, May 07, 2026 at 03:43:09PM +0000, Stanislav Kinsburskii wrote:
> mshv_prepare_pinned_region() returns 0 (success) when mshv_region_map()
> fails on an unencrypted partition. The condition on the error path:
> 
>     if (ret && mshv_partition_encrypted(partition))
> 
> only handles map failures for encrypted partitions — if the partition is
> not encrypted and the map fails, execution falls through to 'return 0',
> silently ignoring the error.
> 
> Additionally, calling mshv_region_invalidate() inline on map failure
> zeroes the mreg_pages array before the caller's cleanup path
> (mshv_region_destroy) can call mshv_region_unmap(). Since unmap skips
> pages where mreg_pages[offset] is NULL, this can leave stale SLAT
> mappings for partially-mapped pages.
> 
> Fix by returning immediately on success and falling through to error
> return on failure. For unencrypted partitions, the caller's
> mshv_region_destroy() handles unmap followed by invalidate in the
> correct order. For encrypted partitions where re-sharing fails, zero
> the page array without unpinning — the pages are inaccessible to the
> host and must not be unpinned, but zeroing prevents
> mshv_region_destroy() from attempting to unpin them.

mshv_region_destroy() calls mshv_region_invalidate() which calls
mshv_region_invalidate_pages() which just does:

	static void mshv_region_invalidate_pages(struct mshv_mem_region *region,
						 u64 page_offset, u64 page_count)
	{
		if (region->mreg_type == MSHV_REGION_TYPE_MEM_PINNED)
			unpin_user_pages(region->mreg_pages + page_offset, page_count);

		memset(region->mreg_pages + page_offset, 0,
		       page_count * sizeof(struct page *));
	}

It doesn't checked for zeroed pages before unpinning.

> 
> Fixes: 621191d709b14 ("Drivers: hv: Introduce mshv_root module to expose /dev/mshv to VMMs")
> Signed-off-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
> ---
>  drivers/hv/mshv_root_main.c |   26 ++++++++++++++++----------
>  1 file changed, 16 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/hv/mshv_root_main.c b/drivers/hv/mshv_root_main.c
> index 665d565899c15..7e4252b6bc65c 100644
> --- a/drivers/hv/mshv_root_main.c
> +++ b/drivers/hv/mshv_root_main.c
> @@ -1360,32 +1360,38 @@ static int mshv_prepare_pinned_region(struct mshv_mem_region *region)
>  			pt_err(partition,
>  			       "Failed to unshare memory region (guest_pfn: %llu): %d\n",
>  			       region->start_gfn, ret);
> -			goto invalidate_region;
> +			goto err_out;
>  		}
>  	}
>  
>  	ret = mshv_region_map(region);
> -	if (ret && mshv_partition_encrypted(partition)) {
> +	if (ret)
> +		goto share_region;
> +
> +	return 0;
> +
> +share_region:
> +	if (mshv_partition_encrypted(partition)) {
>  		int shrc;
>  
>  		shrc = mshv_region_share(region);
>  		if (!shrc)
> -			goto invalidate_region;
> +			goto err_out;
>  
>  		pt_err(partition,
>  		       "Failed to share memory region (guest_pfn: %llu): %d\n",
>  		       region->start_gfn, shrc);
>  		/*
> -		 * Don't unpin if marking shared failed because pages are no
> -		 * longer mapped in the host, ie root, anymore.
> +		 * Re-sharing failed — the pages remain inaccessible to the
> +		 * host.  Zero the page array so that mshv_region_destroy()
> +		 * won't attempt to unpin them (leaking the page references
> +		 * is intentional; unpinning host-inaccessible pages would be
> +		 * unsafe).
>  		 */

Actually, mshv_region_destroy() also does mshv_region_share(). Maybe we
can remove it from here altogether. Either way, should this zeroing
logic be added there too so as not to crash the host by unpinning pages
that weren't marked shared?

From mshv_region_destroy():

	if (mshv_partition_encrypted(partition)) {
		ret = mshv_region_share(region);
		if (ret) {
			pt_err(partition,
			       "Failed to regain access to memory, unpinning user pages will fail and crash the host error: %d\n",
			       ret);
			return;
		}
	}

Thanks,
Anirudh.


^ permalink raw reply

* [PATCH v2 10/10] drm/vmwgfx: Remove unused field struct vmwgfx_du_update_plane.old_state
From: Thomas Zimmermann @ 2026-05-11 12:22 UTC (permalink / raw)
  To: mripard, maarten.lankhorst, airlied, airlied, simona, admin,
	gargaditya08, paul, zack.rusin, bcm-kernel-feedback-list
  Cc: dri-devel, linux-hyperv, intel-gfx, intel-xe, linux-mips,
	virtualization, Thomas Zimmermann
In-Reply-To: <20260511122421.114014-1-tzimmermann@suse.de>

Plane updates no longer require the old plane state. Remove the field
from struct vmwgfx_du_update_plane and fix all callers.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_kms.h  |  2 --
 drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c | 12 ++----------
 drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c | 11 ++---------
 3 files changed, 4 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
index 2224d7d91d1b..8c2072b82062 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
@@ -19,7 +19,6 @@
 /**
  * struct vmw_du_update_plane - Closure structure for vmw_du_helper_plane_update
  * @plane: Plane which is being updated.
- * @old_state: Old state of plane.
  * @dev_priv: Device private.
  * @du: Display unit on which to update the plane.
  * @vfb: Framebuffer which is blitted to display unit.
@@ -102,7 +101,6 @@ struct vmw_du_update_plane {
 				    struct drm_rect *bb);
 
 	struct drm_plane *plane;
-	struct drm_plane_state *old_state;
 	struct vmw_private *dev_priv;
 	struct vmw_display_unit *du;
 	struct vmw_framebuffer *vfb;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
index c83061cf7455..fa84bc7ab5bb 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
@@ -530,7 +530,6 @@ static uint32_t vmw_stud_bo_post_clip(struct vmw_du_update_plane  *update,
  */
 static int vmw_sou_plane_update_bo(struct vmw_private *dev_priv,
 				   struct drm_plane *plane,
-				   struct drm_plane_state *old_state,
 				   struct vmw_framebuffer *vfb,
 				   struct vmw_fence_obj **out_fence)
 {
@@ -538,7 +537,6 @@ static int vmw_sou_plane_update_bo(struct vmw_private *dev_priv,
 
 	memset(&bo_update, 0, sizeof(struct vmw_du_update_plane_buffer));
 	bo_update.base.plane = plane;
-	bo_update.base.old_state = old_state;
 	bo_update.base.dev_priv = dev_priv;
 	bo_update.base.du = vmw_crtc_to_du(plane->state->crtc);
 	bo_update.base.vfb = vfb;
@@ -692,7 +690,6 @@ static uint32_t vmw_sou_surface_post_clip(struct vmw_du_update_plane *update,
  */
 static int vmw_sou_plane_update_surface(struct vmw_private *dev_priv,
 					struct drm_plane *plane,
-					struct drm_plane_state *old_state,
 					struct vmw_framebuffer *vfb,
 					struct vmw_fence_obj **out_fence)
 {
@@ -700,7 +697,6 @@ static int vmw_sou_plane_update_surface(struct vmw_private *dev_priv,
 
 	memset(&srf_update, 0, sizeof(struct vmw_du_update_plane_surface));
 	srf_update.base.plane = plane;
-	srf_update.base.old_state = old_state;
 	srf_update.base.dev_priv = dev_priv;
 	srf_update.base.du = vmw_crtc_to_du(plane->state->crtc);
 	srf_update.base.vfb = vfb;
@@ -721,7 +717,6 @@ static void
 vmw_sou_primary_plane_atomic_update(struct drm_plane *plane,
 				    struct drm_atomic_commit *state)
 {
-	struct drm_plane_state *old_state = drm_atomic_get_old_plane_state(state, plane);
 	struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, plane);
 	struct drm_crtc *crtc = new_state->crtc;
 	struct vmw_fence_obj *fence = NULL;
@@ -734,12 +729,9 @@ vmw_sou_primary_plane_atomic_update(struct drm_plane *plane,
 			vmw_framebuffer_to_vfb(new_state->fb);
 
 		if (vfb->bo)
-			ret = vmw_sou_plane_update_bo(dev_priv, plane,
-						      old_state, vfb, &fence);
+			ret = vmw_sou_plane_update_bo(dev_priv, plane, vfb, &fence);
 		else
-			ret = vmw_sou_plane_update_surface(dev_priv, plane,
-							   old_state, vfb,
-							   &fence);
+			ret = vmw_sou_plane_update_surface(dev_priv, plane, vfb, &fence);
 		if (ret != 0)
 			DRM_ERROR("Failed to update screen.\n");
 	} else {
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
index f0df2b1c8465..474e3badb80f 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
@@ -1240,7 +1240,6 @@ vmw_stdu_bo_populate_update_cpu(struct vmw_du_update_plane  *update, void *cmd,
  */
 static int vmw_stdu_plane_update_bo(struct vmw_private *dev_priv,
 				    struct drm_plane *plane,
-				    struct drm_plane_state *old_state,
 				    struct vmw_framebuffer *vfb,
 				    struct vmw_fence_obj **out_fence)
 {
@@ -1248,7 +1247,6 @@ static int vmw_stdu_plane_update_bo(struct vmw_private *dev_priv,
 
 	memset(&bo_update, 0, sizeof(struct vmw_du_update_plane_buffer));
 	bo_update.base.plane = plane;
-	bo_update.base.old_state = old_state;
 	bo_update.base.dev_priv = dev_priv;
 	bo_update.base.du = vmw_crtc_to_du(plane->state->crtc);
 	bo_update.base.vfb = vfb;
@@ -1350,7 +1348,6 @@ vmw_stdu_surface_populate_update(struct vmw_du_update_plane  *update, void *cmd,
  */
 static int vmw_stdu_plane_update_surface(struct vmw_private *dev_priv,
 					 struct drm_plane *plane,
-					 struct drm_plane_state *old_state,
 					 struct vmw_framebuffer *vfb,
 					 struct vmw_fence_obj **out_fence)
 {
@@ -1363,7 +1360,6 @@ static int vmw_stdu_plane_update_surface(struct vmw_private *dev_priv,
 
 	memset(&srf_update, 0, sizeof(struct vmw_du_update_plane));
 	srf_update.plane = plane;
-	srf_update.old_state = old_state;
 	srf_update.dev_priv = dev_priv;
 	srf_update.du = vmw_crtc_to_du(plane->state->crtc);
 	srf_update.vfb = vfb;
@@ -1424,12 +1420,9 @@ vmw_stdu_primary_plane_atomic_update(struct drm_plane *plane,
 			DRM_ERROR("Failed to bind surface to STDU.\n");
 
 		if (vfb->bo)
-			ret = vmw_stdu_plane_update_bo(dev_priv, plane,
-						       old_state, vfb, &fence);
+			ret = vmw_stdu_plane_update_bo(dev_priv, plane, vfb, &fence);
 		else
-			ret = vmw_stdu_plane_update_surface(dev_priv, plane,
-							    old_state, vfb,
-							    &fence);
+			ret = vmw_stdu_plane_update_surface(dev_priv, plane, vfb, &fence);
 		if (ret)
 			DRM_ERROR("Failed to update STDU.\n");
 	} else {
-- 
2.54.0


^ permalink raw reply related

* [PATCH v2 09/10] drm/damage-helper: Rename state parameters in damage helpers
From: Thomas Zimmermann @ 2026-05-11 12:22 UTC (permalink / raw)
  To: mripard, maarten.lankhorst, airlied, airlied, simona, admin,
	gargaditya08, paul, zack.rusin, bcm-kernel-feedback-list
  Cc: dri-devel, linux-hyperv, intel-gfx, intel-xe, linux-mips,
	virtualization, Thomas Zimmermann
In-Reply-To: <20260511122421.114014-1-tzimmermann@suse.de>

Rename some of the state parameters of the damage-helper functions to
align them with each other and other helpers. No functional changes.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
---
 drivers/gpu/drm/drm_damage_helper.c | 20 ++++++++++----------
 include/drm/drm_damage_helper.h     |  4 ++--
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/drm_damage_helper.c b/drivers/gpu/drm/drm_damage_helper.c
index 28b847636253..23701e5c51b7 100644
--- a/drivers/gpu/drm/drm_damage_helper.c
+++ b/drivers/gpu/drm/drm_damage_helper.c
@@ -209,7 +209,7 @@ EXPORT_SYMBOL(drm_atomic_helper_dirtyfb);
 /**
  * drm_atomic_helper_damage_iter_init - Initialize the damage iterator.
  * @iter: The iterator to initialize.
- * @state: Plane state from which to iterate the damage clips.
+ * @plane_state: Plane state from which to iterate the damage clips.
  *
  * Initialize an iterator, which clips plane damage
  * &drm_plane_state.fb_damage_clips to plane &drm_plane_state.src. This iterator
@@ -225,26 +225,26 @@ EXPORT_SYMBOL(drm_atomic_helper_dirtyfb);
  */
 void
 drm_atomic_helper_damage_iter_init(struct drm_atomic_helper_damage_iter *iter,
-				   const struct drm_plane_state *state)
+				   const struct drm_plane_state *plane_state)
 {
 	struct drm_rect src;
 	memset(iter, 0, sizeof(*iter));
 
-	if (!state || !state->crtc || !state->fb || !state->visible)
+	if (!plane_state || !plane_state->crtc || !plane_state->fb || !plane_state->visible)
 		return;
 
-	iter->clips = (struct drm_rect *)drm_plane_get_damage_clips(state);
-	iter->num_clips = drm_plane_get_damage_clips_count(state);
+	iter->clips = (struct drm_rect *)drm_plane_get_damage_clips(plane_state);
+	iter->num_clips = drm_plane_get_damage_clips_count(plane_state);
 
 	/* Round down for x1/y1 and round up for x2/y2 to catch all pixels */
-	src = drm_plane_state_src(state);
+	src = drm_plane_state_src(plane_state);
 
 	iter->plane_src.x1 = src.x1 >> 16;
 	iter->plane_src.y1 = src.y1 >> 16;
 	iter->plane_src.x2 = (src.x2 >> 16) + !!(src.x2 & 0xFFFF);
 	iter->plane_src.y2 = (src.y2 >> 16) + !!(src.y2 & 0xFFFF);
 
-	if (!iter->clips || state->ignore_damage_clips) {
+	if (!iter->clips || plane_state->ignore_damage_clips) {
 		iter->clips = NULL;
 		iter->num_clips = 0;
 		iter->full_update = true;
@@ -296,7 +296,7 @@ EXPORT_SYMBOL(drm_atomic_helper_damage_iter_next);
 
 /**
  * drm_atomic_helper_damage_merged - Merged plane damage
- * @state: Plane state from which to iterate the damage clips.
+ * @plane_state: Plane state from which to iterate the damage clips.
  * @rect: Returns the merged damage rectangle
  *
  * This function merges any valid plane damage clips into one rectangle and
@@ -308,7 +308,7 @@ EXPORT_SYMBOL(drm_atomic_helper_damage_iter_next);
  * Returns:
  * True if there is valid plane damage otherwise false.
  */
-bool drm_atomic_helper_damage_merged(const struct drm_plane_state *state,
+bool drm_atomic_helper_damage_merged(const struct drm_plane_state *plane_state,
 				     struct drm_rect *rect)
 {
 	struct drm_atomic_helper_damage_iter iter;
@@ -320,7 +320,7 @@ bool drm_atomic_helper_damage_merged(const struct drm_plane_state *state,
 	rect->x2 = 0;
 	rect->y2 = 0;
 
-	drm_atomic_helper_damage_iter_init(&iter, state);
+	drm_atomic_helper_damage_iter_init(&iter, plane_state);
 	drm_atomic_for_each_plane_damage(&iter, &clip) {
 		rect->x1 = min(rect->x1, clip.x1);
 		rect->y1 = min(rect->y1, clip.y1);
diff --git a/include/drm/drm_damage_helper.h b/include/drm/drm_damage_helper.h
index b5a4de779db6..4a1ac47b9051 100644
--- a/include/drm/drm_damage_helper.h
+++ b/include/drm/drm_damage_helper.h
@@ -73,11 +73,11 @@ int drm_atomic_helper_dirtyfb(struct drm_framebuffer *fb,
 			      unsigned int num_clips);
 void
 drm_atomic_helper_damage_iter_init(struct drm_atomic_helper_damage_iter *iter,
-				   const struct drm_plane_state *state);
+				   const struct drm_plane_state *plane_state);
 bool
 drm_atomic_helper_damage_iter_next(struct drm_atomic_helper_damage_iter *iter,
 				   struct drm_rect *rect);
-bool drm_atomic_helper_damage_merged(const struct drm_plane_state *state,
+bool drm_atomic_helper_damage_merged(const struct drm_plane_state *plane_state,
 				     struct drm_rect *rect);
 
 #endif
-- 
2.54.0


^ permalink raw reply related

* [PATCH v2 08/10] drm/atomic_helper: Do not evaluate plane damage before atomic_check
From: Thomas Zimmermann @ 2026-05-11 12:22 UTC (permalink / raw)
  To: mripard, maarten.lankhorst, airlied, airlied, simona, admin,
	gargaditya08, paul, zack.rusin, bcm-kernel-feedback-list
  Cc: dri-devel, linux-hyperv, intel-gfx, intel-xe, linux-mips,
	virtualization, Thomas Zimmermann
In-Reply-To: <20260511122421.114014-1-tzimmermann@suse.de>

Remove the call to drm_atomic_helper_check_plane_damage() from before
calling the atomic_check helpers. The call has no longer any purpose,
as the actual evaluation happens after running atomic_check.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
---
 drivers/gpu/drm/drm_atomic_helper.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index 823ce60d45b4..57ffa8c8d641 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -1034,8 +1034,6 @@ drm_atomic_helper_check_planes(struct drm_device *dev,
 
 		drm_atomic_helper_plane_changed(state, old_plane_state, new_plane_state, plane);
 
-		drm_atomic_helper_check_plane_damage(state, old_plane_state, new_plane_state);
-
 		if (!funcs || !funcs->atomic_check)
 			continue;
 
-- 
2.54.0


^ permalink raw reply related

* [PATCH v2 07/10] drm/damage-helper: Remove old state from drm_atomic_helper_damage_merged()
From: Thomas Zimmermann @ 2026-05-11 12:22 UTC (permalink / raw)
  To: mripard, maarten.lankhorst, airlied, airlied, simona, admin,
	gargaditya08, paul, zack.rusin, bcm-kernel-feedback-list
  Cc: dri-devel, linux-hyperv, intel-gfx, intel-xe, linux-mips,
	virtualization, Thomas Zimmermann
In-Reply-To: <20260511122421.114014-1-tzimmermann@suse.de>

Nothing in drm_atomic_helper_damage_merged() requires the old
plane state. Remove the parameter and mass-convert callers.

Most callers now no longer require the old plane state in their plane's
atomic_update helper. Remove it as well.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
---
 drivers/gpu/drm/ast/ast_cursor.c           |  3 +--
 drivers/gpu/drm/drm_damage_helper.c        |  4 +---
 drivers/gpu/drm/drm_mipi_dbi.c             |  3 +--
 drivers/gpu/drm/i915/display/intel_plane.c | 11 ++---------
 drivers/gpu/drm/i915/display/intel_psr.c   |  3 +--
 drivers/gpu/drm/sitronix/st7586.c          |  3 +--
 drivers/gpu/drm/tiny/gm12u320.c            |  2 +-
 drivers/gpu/drm/tiny/ili9225.c             |  3 +--
 drivers/gpu/drm/tiny/repaper.c             |  2 +-
 drivers/gpu/drm/tiny/sharp-memory.c        |  3 +--
 drivers/gpu/drm/virtio/virtgpu_plane.c     |  2 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c       |  4 +---
 include/drm/drm_damage_helper.h            |  3 +--
 13 files changed, 14 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/ast/ast_cursor.c b/drivers/gpu/drm/ast/ast_cursor.c
index fd19c45f2abe..12d5f93eec5f 100644
--- a/drivers/gpu/drm/ast/ast_cursor.c
+++ b/drivers/gpu/drm/ast/ast_cursor.c
@@ -251,7 +251,6 @@ static void ast_cursor_plane_helper_atomic_update(struct drm_plane *plane,
 	struct drm_plane_state *plane_state = drm_atomic_get_new_plane_state(state, plane);
 	struct drm_shadow_plane_state *shadow_plane_state = to_drm_shadow_plane_state(plane_state);
 	struct drm_framebuffer *fb = plane_state->fb;
-	struct drm_plane_state *old_plane_state = drm_atomic_get_old_plane_state(state, plane);
 	struct ast_device *ast = to_ast_device(plane->dev);
 	struct drm_rect damage;
 	u64 dst_off = ast_plane->offset;
@@ -266,7 +265,7 @@ static void ast_cursor_plane_helper_atomic_update(struct drm_plane *plane,
 	 * engine to the offset.
 	 */
 
-	if (drm_atomic_helper_damage_merged(old_plane_state, plane_state, &damage)) {
+	if (drm_atomic_helper_damage_merged(plane_state, &damage)) {
 		const u8 *argb4444 = ast_cursor_plane_get_argb4444(ast_cursor_plane,
 								   shadow_plane_state,
 								   &damage);
diff --git a/drivers/gpu/drm/drm_damage_helper.c b/drivers/gpu/drm/drm_damage_helper.c
index 28f26234523d..28b847636253 100644
--- a/drivers/gpu/drm/drm_damage_helper.c
+++ b/drivers/gpu/drm/drm_damage_helper.c
@@ -296,7 +296,6 @@ EXPORT_SYMBOL(drm_atomic_helper_damage_iter_next);
 
 /**
  * drm_atomic_helper_damage_merged - Merged plane damage
- * @old_state: Old plane state for validation.
  * @state: Plane state from which to iterate the damage clips.
  * @rect: Returns the merged damage rectangle
  *
@@ -309,8 +308,7 @@ EXPORT_SYMBOL(drm_atomic_helper_damage_iter_next);
  * Returns:
  * True if there is valid plane damage otherwise false.
  */
-bool drm_atomic_helper_damage_merged(const struct drm_plane_state *old_state,
-				     const struct drm_plane_state *state,
+bool drm_atomic_helper_damage_merged(const struct drm_plane_state *state,
 				     struct drm_rect *rect)
 {
 	struct drm_atomic_helper_damage_iter iter;
diff --git a/drivers/gpu/drm/drm_mipi_dbi.c b/drivers/gpu/drm/drm_mipi_dbi.c
index 25cf04d029f7..4da201c38c93 100644
--- a/drivers/gpu/drm/drm_mipi_dbi.c
+++ b/drivers/gpu/drm/drm_mipi_dbi.c
@@ -380,7 +380,6 @@ void drm_mipi_dbi_plane_helper_atomic_update(struct drm_plane *plane,
 	struct drm_plane_state *plane_state = plane->state;
 	struct drm_shadow_plane_state *shadow_plane_state = to_drm_shadow_plane_state(plane_state);
 	struct drm_framebuffer *fb = plane_state->fb;
-	struct drm_plane_state *old_plane_state = drm_atomic_get_old_plane_state(state, plane);
 	struct drm_rect rect;
 	int idx;
 
@@ -388,7 +387,7 @@ void drm_mipi_dbi_plane_helper_atomic_update(struct drm_plane *plane,
 		return;
 
 	if (drm_dev_enter(plane->dev, &idx)) {
-		if (drm_atomic_helper_damage_merged(old_plane_state, plane_state, &rect))
+		if (drm_atomic_helper_damage_merged(plane_state, &rect))
 			mipi_dbi_fb_dirty(&shadow_plane_state->data[0], fb, &rect,
 					  &shadow_plane_state->fmtcnv_state);
 		drm_dev_exit(idx);
diff --git a/drivers/gpu/drm/i915/display/intel_plane.c b/drivers/gpu/drm/i915/display/intel_plane.c
index c181a7d063ec..57107cce9267 100644
--- a/drivers/gpu/drm/i915/display/intel_plane.c
+++ b/drivers/gpu/drm/i915/display/intel_plane.c
@@ -346,7 +346,6 @@ static void intel_plane_clear_hw_state(struct intel_plane_state *plane_state)
 
 static void
 intel_plane_copy_uapi_plane_damage(struct intel_plane_state *new_plane_state,
-				   const struct intel_plane_state *old_uapi_plane_state,
 				   const struct intel_plane_state *new_uapi_plane_state)
 {
 	struct intel_display *display = to_intel_display(new_plane_state);
@@ -356,10 +355,9 @@ intel_plane_copy_uapi_plane_damage(struct intel_plane_state *new_plane_state,
 	if (DISPLAY_VER(display) < 12)
 		return;
 
-	if (!drm_atomic_helper_damage_merged(&old_uapi_plane_state->uapi,
-					     &new_uapi_plane_state->uapi,
+	if (!drm_atomic_helper_damage_merged(&new_uapi_plane_state->uapi,
 					     damage))
-		/* Incase helper fails, mark whole plane region as damage */
+		/* In case the helper fails, mark whole plane region as damage */
 		*damage = drm_plane_state_src(&new_uapi_plane_state->uapi);
 }
 
@@ -815,7 +813,6 @@ static int plane_atomic_check(struct intel_atomic_state *state,
 	const struct intel_plane_state *old_plane_state =
 		intel_atomic_get_old_plane_state(state, plane);
 	const struct intel_plane_state *new_primary_crtc_plane_state;
-	const struct intel_plane_state *old_primary_crtc_plane_state;
 	struct intel_crtc *crtc = intel_crtc_for_pipe(display, plane->pipe);
 	const struct intel_crtc_state *old_crtc_state =
 		intel_atomic_get_old_crtc_state(state, crtc);
@@ -830,15 +827,11 @@ static int plane_atomic_check(struct intel_atomic_state *state,
 
 		new_primary_crtc_plane_state =
 			intel_atomic_get_new_plane_state(state, primary_crtc_plane);
-		old_primary_crtc_plane_state =
-			intel_atomic_get_old_plane_state(state, primary_crtc_plane);
 	} else {
 		new_primary_crtc_plane_state = new_plane_state;
-		old_primary_crtc_plane_state = old_plane_state;
 	}
 
 	intel_plane_copy_uapi_plane_damage(new_plane_state,
-					   old_primary_crtc_plane_state,
 					   new_primary_crtc_plane_state);
 
 	intel_plane_copy_uapi_to_hw_state(new_plane_state,
diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
index 9958230a3dd9..adb936806488 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.c
+++ b/drivers/gpu/drm/i915/display/intel_psr.c
@@ -2970,8 +2970,7 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state,
 		src = drm_plane_state_src(&new_plane_state->uapi);
 		drm_rect_fp_to_int(&src, &src);
 
-		if (!drm_atomic_helper_damage_merged(&old_plane_state->uapi,
-						     &new_plane_state->uapi, &damaged_area))
+		if (!drm_atomic_helper_damage_merged(&new_plane_state->uapi, &damaged_area))
 			continue;
 
 		damaged_area.y1 += new_plane_state->uapi.dst.y1 - src.y1;
diff --git a/drivers/gpu/drm/sitronix/st7586.c b/drivers/gpu/drm/sitronix/st7586.c
index 28b2245f6b79..2cc0312595a4 100644
--- a/drivers/gpu/drm/sitronix/st7586.c
+++ b/drivers/gpu/drm/sitronix/st7586.c
@@ -176,7 +176,6 @@ static void st7586_plane_helper_atomic_update(struct drm_plane *plane,
 	struct drm_plane_state *plane_state = plane->state;
 	struct drm_shadow_plane_state *shadow_plane_state = to_drm_shadow_plane_state(plane_state);
 	struct drm_framebuffer *fb = plane_state->fb;
-	struct drm_plane_state *old_plane_state = drm_atomic_get_old_plane_state(state, plane);
 	struct drm_rect rect;
 	int idx;
 
@@ -186,7 +185,7 @@ static void st7586_plane_helper_atomic_update(struct drm_plane *plane,
 	if (!drm_dev_enter(plane->dev, &idx))
 		return;
 
-	if (drm_atomic_helper_damage_merged(old_plane_state, plane_state, &rect))
+	if (drm_atomic_helper_damage_merged(plane_state, &rect))
 		st7586_fb_dirty(&shadow_plane_state->data[0], fb, &rect,
 				&shadow_plane_state->fmtcnv_state);
 
diff --git a/drivers/gpu/drm/tiny/gm12u320.c b/drivers/gpu/drm/tiny/gm12u320.c
index d73dfebb4353..880b965e283a 100644
--- a/drivers/gpu/drm/tiny/gm12u320.c
+++ b/drivers/gpu/drm/tiny/gm12u320.c
@@ -582,7 +582,7 @@ static void gm12u320_pipe_update(struct drm_simple_display_pipe *pipe,
 	struct drm_shadow_plane_state *shadow_plane_state = to_drm_shadow_plane_state(state);
 	struct drm_rect rect;
 
-	if (drm_atomic_helper_damage_merged(old_state, state, &rect))
+	if (drm_atomic_helper_damage_merged(state, &rect))
 		gm12u320_fb_mark_dirty(state->fb, &shadow_plane_state->data[0], &rect);
 }
 
diff --git a/drivers/gpu/drm/tiny/ili9225.c b/drivers/gpu/drm/tiny/ili9225.c
index 5bf52a8fd75b..d821a659a585 100644
--- a/drivers/gpu/drm/tiny/ili9225.c
+++ b/drivers/gpu/drm/tiny/ili9225.c
@@ -185,7 +185,6 @@ static void ili9225_plane_helper_atomic_update(struct drm_plane *plane,
 	struct drm_plane_state *plane_state = plane->state;
 	struct drm_shadow_plane_state *shadow_plane_state = to_drm_shadow_plane_state(plane_state);
 	struct drm_framebuffer *fb = plane_state->fb;
-	struct drm_plane_state *old_plane_state = drm_atomic_get_old_plane_state(state, plane);
 	struct drm_rect rect;
 	int idx;
 
@@ -195,7 +194,7 @@ static void ili9225_plane_helper_atomic_update(struct drm_plane *plane,
 	if (!drm_dev_enter(drm, &idx))
 		return;
 
-	if (drm_atomic_helper_damage_merged(old_plane_state, plane_state, &rect))
+	if (drm_atomic_helper_damage_merged(plane_state, &rect))
 		ili9225_fb_dirty(&shadow_plane_state->data[0], fb, &rect,
 				 &shadow_plane_state->fmtcnv_state);
 
diff --git a/drivers/gpu/drm/tiny/repaper.c b/drivers/gpu/drm/tiny/repaper.c
index c8270591afc7..531831d2b73f 100644
--- a/drivers/gpu/drm/tiny/repaper.c
+++ b/drivers/gpu/drm/tiny/repaper.c
@@ -837,7 +837,7 @@ static void repaper_pipe_update(struct drm_simple_display_pipe *pipe,
 	if (!pipe->crtc.state->active)
 		return;
 
-	if (drm_atomic_helper_damage_merged(old_state, state, &rect))
+	if (drm_atomic_helper_damage_merged(state, &rect))
 		repaper_fb_dirty(state->fb, shadow_plane_state->data,
 				 &shadow_plane_state->fmtcnv_state);
 }
diff --git a/drivers/gpu/drm/tiny/sharp-memory.c b/drivers/gpu/drm/tiny/sharp-memory.c
index 506e6432e70d..1dacd41ddbaa 100644
--- a/drivers/gpu/drm/tiny/sharp-memory.c
+++ b/drivers/gpu/drm/tiny/sharp-memory.c
@@ -241,7 +241,6 @@ static int sharp_memory_plane_atomic_check(struct drm_plane *plane,
 static void sharp_memory_plane_atomic_update(struct drm_plane *plane,
 					     struct drm_atomic_commit *state)
 {
-	struct drm_plane_state *old_state = drm_atomic_get_old_plane_state(state, plane);
 	struct drm_plane_state *plane_state = plane->state;
 	struct drm_shadow_plane_state *shadow_plane_state = to_drm_shadow_plane_state(plane_state);
 	struct sharp_memory_device *smd;
@@ -251,7 +250,7 @@ static void sharp_memory_plane_atomic_update(struct drm_plane *plane,
 	if (!smd->crtc.state->active)
 		return;
 
-	if (drm_atomic_helper_damage_merged(old_state, plane_state, &rect))
+	if (drm_atomic_helper_damage_merged(plane_state, &rect))
 		sharp_memory_fb_dirty(plane_state->fb, shadow_plane_state->data,
 				      &rect, &shadow_plane_state->fmtcnv_state);
 }
diff --git a/drivers/gpu/drm/virtio/virtgpu_plane.c b/drivers/gpu/drm/virtio/virtgpu_plane.c
index 73aa70bd4231..24b786e1555e 100644
--- a/drivers/gpu/drm/virtio/virtgpu_plane.c
+++ b/drivers/gpu/drm/virtio/virtgpu_plane.c
@@ -257,7 +257,7 @@ static void virtio_gpu_primary_plane_update(struct drm_plane *plane,
 		return;
 	}
 
-	if (!drm_atomic_helper_damage_merged(old_state, plane->state, &rect))
+	if (!drm_atomic_helper_damage_merged(plane->state, &rect))
 		return;
 
 	bo = gem_to_virtio_gpu_obj(plane->state->fb->obj[0]);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
index 4139837f4caf..f0df2b1c8465 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
@@ -977,7 +977,6 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane,
 	enum stdu_content_type new_content_type;
 	struct vmw_framebuffer_surface *new_vfbs;
 	uint32_t hdisplay = new_state->crtc_w, vdisplay = new_state->crtc_h;
-	struct drm_plane_state *old_state = plane->state;
 	struct drm_rect rect;
 	int ret;
 
@@ -1101,8 +1100,7 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane,
 		struct vmw_surface *surf = vmw_user_object_surface(&vps->uo);
 		struct vmw_resource *res = &surf->res;
 
-		if (!res->res_dirty && drm_atomic_helper_damage_merged(old_state,
-								       new_state,
+		if (!res->res_dirty && drm_atomic_helper_damage_merged(new_state,
 								       &rect)) {
 			/*
 			 * At some point it might be useful to actually translate
diff --git a/include/drm/drm_damage_helper.h b/include/drm/drm_damage_helper.h
index fafe29b50fc6..b5a4de779db6 100644
--- a/include/drm/drm_damage_helper.h
+++ b/include/drm/drm_damage_helper.h
@@ -77,8 +77,7 @@ drm_atomic_helper_damage_iter_init(struct drm_atomic_helper_damage_iter *iter,
 bool
 drm_atomic_helper_damage_iter_next(struct drm_atomic_helper_damage_iter *iter,
 				   struct drm_rect *rect);
-bool drm_atomic_helper_damage_merged(const struct drm_plane_state *old_state,
-				     const struct drm_plane_state *state,
+bool drm_atomic_helper_damage_merged(const struct drm_plane_state *state,
 				     struct drm_rect *rect);
 
 #endif
-- 
2.54.0


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox