From: Joel Granados <j.granados@samsung.com>
To: Lu Baolu <baolu.lu@linux.intel.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>, Kevin Tian <kevin.tian@intel.com>,
Joerg Roedel <joro@8bytes.org>, Will Deacon <will@kernel.org>,
Robin Murphy <robin.murphy@arm.com>,
Jean-Philippe Brucker <jean-philippe@linaro.org>,
Nicolin Chen <nicolinc@nvidia.com>, Yi Liu <yi.l.liu@intel.com>,
Jacob Pan <jacob.jun.pan@linux.intel.com>,
<iommu@lists.linux.dev>, <linux-kselftest@vger.kernel.org>,
<virtualization@lists.linux-foundation.org>,
<linux-kernel@vger.kernel.org>
Subject: Re: [PATCH v2 4/6] iommufd: Deliver fault messages to user space
Date: Thu, 7 Dec 2023 17:34:10 +0100 [thread overview]
Message-ID: <20231207163410.ap3w4faii6wkgwed@localhost> (raw)
In-Reply-To: <20231026024930.382898-5-baolu.lu@linux.intel.com>
[-- Attachment #1: Type: text/plain, Size: 10080 bytes --]
On Thu, Oct 26, 2023 at 10:49:28AM +0800, Lu Baolu wrote:
> Add the file interface that provides a simple and efficient way for
> userspace to handle page faults. The file interface allows userspace
> to read fault messages sequentially, and to respond to the handling
> result by writing to the same file.
>
> Userspace applications are recommended to use io_uring to speed up read
> and write efficiency.
>
> With this done, allow userspace application to allocate a hw page table
> with IOMMU_HWPT_ALLOC_IOPF_CAPABLE flag set.
>
> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
> ---
> drivers/iommu/iommufd/iommufd_private.h | 2 +
> drivers/iommu/iommufd/hw_pagetable.c | 204 +++++++++++++++++++++++-
> 2 files changed, 205 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
> index 0dbaa2dc5b22..ff063bc48150 100644
> --- a/drivers/iommu/iommufd/iommufd_private.h
> +++ b/drivers/iommu/iommufd/iommufd_private.h
> @@ -237,6 +237,8 @@ struct hw_pgtable_fault {
> struct mutex mutex;
> struct list_head deliver;
> struct list_head response;
> + struct file *fault_file;
> + int fault_fd;
> };
>
> /*
> diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c
> index 9f94c824cf86..f0aac1bb2d2d 100644
> --- a/drivers/iommu/iommufd/hw_pagetable.c
> +++ b/drivers/iommu/iommufd/hw_pagetable.c
> @@ -3,6 +3,8 @@
> * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
> */
> #include <linux/iommu.h>
> +#include <linux/file.h>
> +#include <linux/anon_inodes.h>
> #include <uapi/linux/iommufd.h>
>
> #include "../iommu-priv.h"
> @@ -38,9 +40,198 @@ static void iommufd_kernel_managed_hwpt_destroy(struct iommufd_object *obj)
> refcount_dec(&hwpt->ioas->obj.users);
> }
>
> +static int iommufd_compose_fault_message(struct iommu_fault *fault,
> + struct iommu_hwpt_pgfault *hwpt_fault,
> + struct device *dev)
> +{
> + struct iommufd_device *idev = iopf_pasid_cookie_get(dev, IOMMU_NO_PASID);
> +
> + if (!idev)
> + return -ENODEV;
> +
> + if (IS_ERR(idev))
> + return PTR_ERR(idev);
> +
> + hwpt_fault->size = sizeof(*hwpt_fault);
> + hwpt_fault->flags = fault->prm.flags;
> + hwpt_fault->dev_id = idev->obj.id;
> + hwpt_fault->pasid = fault->prm.pasid;
> + hwpt_fault->grpid = fault->prm.grpid;
> + hwpt_fault->perm = fault->prm.perm;
> + hwpt_fault->addr = fault->prm.addr;
> + hwpt_fault->private_data[0] = fault->prm.private_data[0];
> + hwpt_fault->private_data[1] = fault->prm.private_data[1];
> +
> + return 0;
> +}
> +
> +static ssize_t hwpt_fault_fops_read(struct file *filep, char __user *buf,
> + size_t count, loff_t *ppos)
> +{
> + size_t fault_size = sizeof(struct iommu_hwpt_pgfault);
> + struct hw_pgtable_fault *fault = filep->private_data;
> + struct iommu_hwpt_pgfault data;
> + struct iopf_group *group;
> + struct iopf_fault *iopf;
> + size_t done = 0;
> + int rc;
> +
> + if (*ppos || count % fault_size)
> + return -ESPIPE;
> +
> + mutex_lock(&fault->mutex);
> + while (!list_empty(&fault->deliver) && count > done) {
> + group = list_first_entry(&fault->deliver,
> + struct iopf_group, node);
> +
> + if (list_count_nodes(&group->faults) * fault_size > count - done)
> + break;
> +
> + list_for_each_entry(iopf, &group->faults, list) {
> + rc = iommufd_compose_fault_message(&iopf->fault,
> + &data, group->dev);
> + if (rc)
> + goto err_unlock;
> + rc = copy_to_user(buf + done, &data, fault_size);
> + if (rc)
> + goto err_unlock;
> + done += fault_size;
> + }
> +
> + list_move_tail(&group->node, &fault->response);
> + }
> + mutex_unlock(&fault->mutex);
> +
> + return done;
> +err_unlock:
> + mutex_unlock(&fault->mutex);
> + return rc;
> +}
> +
> +static ssize_t hwpt_fault_fops_write(struct file *filep,
> + const char __user *buf,
> + size_t count, loff_t *ppos)
> +{
> + size_t response_size = sizeof(struct iommu_hwpt_page_response);
> + struct hw_pgtable_fault *fault = filep->private_data;
> + struct iommu_hwpt_page_response response;
> + struct iommufd_hw_pagetable *hwpt;
> + struct iopf_group *iter, *group;
> + struct iommufd_device *idev;
> + size_t done = 0;
> + int rc = 0;
> +
> + if (*ppos || count % response_size)
> + return -ESPIPE;
> +
> + mutex_lock(&fault->mutex);
> + while (!list_empty(&fault->response) && count > done) {
> + rc = copy_from_user(&response, buf + done, response_size);
> + if (rc)
> + break;
> +
> + /* Get the device that this response targets at. */
> + idev = container_of(iommufd_get_object(fault->ictx,
> + response.dev_id,
> + IOMMUFD_OBJ_DEVICE),
> + struct iommufd_device, obj);
> + if (IS_ERR(idev)) {
> + rc = PTR_ERR(idev);
> + break;
> + }
> +
> + /*
> + * Get the hw page table that this response was generated for.
> + * It must match the one stored in the fault data.
> + */
> + hwpt = container_of(iommufd_get_object(fault->ictx,
> + response.hwpt_id,
> + IOMMUFD_OBJ_HW_PAGETABLE),
> + struct iommufd_hw_pagetable, obj);
> + if (IS_ERR(hwpt)) {
> + iommufd_put_object(&idev->obj);
> + rc = PTR_ERR(hwpt);
> + break;
> + }
> +
> + if (hwpt != fault->hwpt) {
> + rc = -EINVAL;
> + goto put_obj;
> + }
> +
> + group = NULL;
> + list_for_each_entry(iter, &fault->response, node) {
> + if (response.grpid != iter->last_fault.fault.prm.grpid)
> + continue;
> +
> + if (idev->dev != iter->dev)
> + continue;
> +
> + if ((iter->last_fault.fault.prm.flags &
> + IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) &&
> + response.pasid != iter->last_fault.fault.prm.pasid)
> + continue;
> +
> + group = iter;
> + break;
> + }
> +
> + if (!group) {
> + rc = -ENODEV;
> + goto put_obj;
> + }
> +
> + rc = iopf_group_response(group, response.code);
> + if (rc)
> + goto put_obj;
> +
> + list_del(&group->node);
> + iopf_free_group(group);
> + done += response_size;
> +put_obj:
> + iommufd_put_object(&hwpt->obj);
> + iommufd_put_object(&idev->obj);
> + if (rc)
> + break;
> + }
> + mutex_unlock(&fault->mutex);
> +
> + return (rc < 0) ? rc : done;
> +}
> +
> +static const struct file_operations hwpt_fault_fops = {
> + .owner = THIS_MODULE,
> + .read = hwpt_fault_fops_read,
> + .write = hwpt_fault_fops_write,
> +};
> +
> +static int hw_pagetable_get_fault_fd(struct hw_pgtable_fault *fault)
> +{
> + struct file *filep;
> + int fdno;
> +
> + fdno = get_unused_fd_flags(O_CLOEXEC);
> + if (fdno < 0)
> + return fdno;
> +
> + filep = anon_inode_getfile("[iommufd-pgfault]", &hwpt_fault_fops,
> + fault, O_RDWR);
> + if (IS_ERR(filep)) {
> + put_unused_fd(fdno);
> + return PTR_ERR(filep);
> + }
> +
> + fd_install(fdno, filep);
> + fault->fault_file = filep;
> + fault->fault_fd = fdno;
> +
> + return 0;
> +}
> +
> static struct hw_pgtable_fault *hw_pagetable_fault_alloc(void)
> {
> struct hw_pgtable_fault *fault;
> + int rc;
>
> fault = kzalloc(sizeof(*fault), GFP_KERNEL);
> if (!fault)
> @@ -50,6 +241,12 @@ static struct hw_pgtable_fault *hw_pagetable_fault_alloc(void)
> INIT_LIST_HEAD(&fault->response);
> mutex_init(&fault->mutex);
>
> + rc = hw_pagetable_get_fault_fd(fault);
> + if (rc) {
> + kfree(fault);
> + return ERR_PTR(rc);
> + }
> +
> return fault;
> }
>
> @@ -58,6 +255,8 @@ static void hw_pagetable_fault_free(struct hw_pgtable_fault *fault)
> WARN_ON(!list_empty(&fault->deliver));
> WARN_ON(!list_empty(&fault->response));
>
> + fput(fault->fault_file);
> + put_unused_fd(fault->fault_fd);
I have been running your code and have run into some invalid memory in
this line. When `put_unused_fd` is called the files of the current task
is accessed with `current->files`. In my case this is 0x0.
The reason for it being 0x0 is that `do_exit` calls `exit_files` where
the task files get set to NULL; this call is made in `do_exit` before we
execute `exit_task_work`.
'exit_task_work` is the call that eventually arrives here to `hw_pagetable_fault_free`.
The way I have arrived to this state is the following:
1. Version of linux kernel that I'm using : commit 357b5abcba0477f7f1391dd0fa3a919a6f06bdf0 (HEAD, lubaolu/iommufd-io-pgfault-delivery-v2)
2. Version of qemu that Im using : commit 577ef478780597d3f449feb01e853b93fa5c5530 (HEAD, yiliu/zhenzhong/wip/iommufd_nesting_rfcv1)
3. This error happens when my user space app is exiting. (hence the call
to `do_exit`
4. I call the IOMMU_HWPT_ALLOC ioctl with
.flags = IOMMU_HWPT_ALLOC_IOPF_CAPABLE and
.hwpt_type = IOMMU_HWPT_TYPE_DEFAULT
.pt_id = the default ioas id.
I have resolved this in a naive way by just not calling the
put_unused_fd function.
Have you run into this? Is this a path that you were expecting?
Also, please get back to me if you need more information about how I got
to this place. I have provided what I think is enough info, but I might
be missing something obvious.
Best
> kfree(fault);
> }
>
> @@ -347,7 +546,9 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd)
> struct mutex *mutex;
> int rc;
>
> - if (cmd->flags & ~IOMMU_HWPT_ALLOC_NEST_PARENT || cmd->__reserved)
> + if ((cmd->flags & ~(IOMMU_HWPT_ALLOC_NEST_PARENT |
> + IOMMU_HWPT_ALLOC_IOPF_CAPABLE)) ||
> + cmd->__reserved)
> return -EOPNOTSUPP;
> if (!cmd->data_len && cmd->hwpt_type != IOMMU_HWPT_TYPE_DEFAULT)
> return -EINVAL;
> @@ -416,6 +617,7 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd)
> hwpt->fault->hwpt = hwpt;
> hwpt->domain->iopf_handler = iommufd_hw_pagetable_iopf_handler;
> hwpt->domain->fault_data = hwpt;
> + cmd->out_fault_fd = hwpt->fault->fault_fd;
> }
>
> cmd->out_hwpt_id = hwpt->obj.id;
> --
> 2.34.1
>
--
Joel Granados
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 659 bytes --]
next prev parent reply other threads:[~2023-12-07 16:34 UTC|newest]
Thread overview: 46+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <CGME20231204150747eucas1p2365e92a7ac33ba99b801d7c800acaf6a@eucas1p2.samsung.com>
2023-10-26 2:49 ` [PATCH v2 0/6] IOMMUFD: Deliver IO page faults to user space Lu Baolu
2023-10-26 2:49 ` [PATCH v2 1/6] iommu: Add iommu page fault cookie helpers Lu Baolu
2023-12-01 14:38 ` Jason Gunthorpe
2023-12-08 6:24 ` Baolu Lu
2023-10-26 2:49 ` [PATCH v2 2/6] iommufd: Add iommu page fault uapi data Lu Baolu
2023-12-01 15:14 ` Jason Gunthorpe
2023-12-08 6:35 ` Baolu Lu
2023-10-26 2:49 ` [PATCH v2 3/6] iommufd: Initializing and releasing IO page fault data Lu Baolu
2023-12-12 13:10 ` Joel Granados
2023-12-12 14:12 ` Jason Gunthorpe
2023-12-13 2:04 ` Baolu Lu
2023-12-13 2:15 ` Tian, Kevin
2023-12-13 13:19 ` Jason Gunthorpe
2023-10-26 2:49 ` [PATCH v2 4/6] iommufd: Deliver fault messages to user space Lu Baolu
2023-12-01 15:24 ` Jason Gunthorpe
2023-12-08 11:43 ` Baolu Lu
2023-12-07 16:34 ` Joel Granados [this message]
2023-12-07 17:17 ` Jason Gunthorpe
2023-12-08 5:47 ` Baolu Lu
2023-12-08 13:41 ` Jason Gunthorpe
2024-01-12 17:46 ` Shameerali Kolothum Thodi
2024-01-15 16:47 ` Jason Gunthorpe
2024-01-15 17:44 ` Shameerali Kolothum Thodi
2024-01-15 17:58 ` Jason Gunthorpe
2023-10-26 2:49 ` [PATCH v2 5/6] iommufd/selftest: Add IOMMU_TEST_OP_TRIGGER_IOPF test support Lu Baolu
2023-10-26 2:49 ` [PATCH v2 6/6] iommufd/selftest: Add coverage for IOMMU_TEST_OP_TRIGGER_IOPF Lu Baolu
2023-11-02 12:47 ` [PATCH v2 0/6] IOMMUFD: Deliver IO page faults to user space Jason Gunthorpe
2023-11-07 8:35 ` Tian, Kevin
2023-11-07 17:54 ` Jason Gunthorpe
2023-11-08 8:53 ` Tian, Kevin
2023-11-08 17:39 ` Jason Gunthorpe
[not found] ` <c774e157-9b47-4fb8-80dd-37441c69b43d@linux.intel.com>
2023-11-15 13:58 ` Jason Gunthorpe
2023-11-16 1:42 ` Liu, Jing2
2023-11-21 0:14 ` Jason Gunthorpe
2023-11-29 9:08 ` Shameerali Kolothum Thodi
2023-11-30 3:44 ` Baolu Lu
2023-12-01 14:24 ` Jason Gunthorpe
2023-12-08 5:57 ` Baolu Lu
2023-12-08 13:43 ` Jason Gunthorpe
2023-12-04 15:07 ` Joel Granados
2023-12-04 15:32 ` Jason Gunthorpe
2023-12-08 5:10 ` Baolu Lu
2024-01-12 21:56 ` Joel Granados
2024-01-14 13:13 ` Baolu Lu
2024-01-14 17:18 ` Joel Granados
2024-01-15 1:25 ` Baolu Lu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20231207163410.ap3w4faii6wkgwed@localhost \
--to=j.granados@samsung.com \
--cc=baolu.lu@linux.intel.com \
--cc=iommu@lists.linux.dev \
--cc=jacob.jun.pan@linux.intel.com \
--cc=jean-philippe@linaro.org \
--cc=jgg@ziepe.ca \
--cc=joro@8bytes.org \
--cc=kevin.tian@intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-kselftest@vger.kernel.org \
--cc=nicolinc@nvidia.com \
--cc=robin.murphy@arm.com \
--cc=virtualization@lists.linux-foundation.org \
--cc=will@kernel.org \
--cc=yi.l.liu@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox