From: Steven Sistare <steven.sistare@oracle.com>
To: Alexey Kardashevskiy <aik@amd.com>, iommu@lists.linux.dev
Cc: Jason Gunthorpe <jgg@nvidia.com>,
Kevin Tian <kevin.tian@intel.com>,
Nicolin Chen <nicolinc@nvidia.com>
Subject: Re: [PATCH V7 6/9] iommufd: pfn reader for file mappings
Date: Wed, 6 Nov 2024 08:19:35 -0500 [thread overview]
Message-ID: <2bbfb5fa-20e5-4935-b52d-73b5186c6123@oracle.com> (raw)
In-Reply-To: <7788773d-471d-48ad-9f9c-ddd5671b26e5@amd.com>
On 11/5/2024 10:18 PM, Alexey Kardashevskiy wrote:
> On 31/10/24 14:34, Alexey Kardashevskiy wrote:
>> On 26/10/24 00:11, Steve Sistare wrote:
>>> Extend pfn_reader_user to pin file mappings, by calling memfd_pin_folios.
>>> Repin at small page granularity, and fill the batch from folios. Expand
>>> folios to upages for the iopt_pages_fill path.
>>>
>>> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
>>> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
>>> Reviewed-by: Kevin Tian <kevin.tian@intel.com>
>>> ---
>>> drivers/iommu/iommufd/io_pagetable.h | 5 ++
>>> drivers/iommu/iommufd/pages.c | 128 ++++++++++++++++++++++++++++++-----
>>> 2 files changed, 116 insertions(+), 17 deletions(-)
>>>
>>> diff --git a/drivers/iommu/iommufd/io_pagetable.h b/drivers/iommu/iommufd/io_pagetable.h
>>> index 8e48266..5ac4eed 100644
>>> --- a/drivers/iommu/iommufd/io_pagetable.h
>>> +++ b/drivers/iommu/iommufd/io_pagetable.h
>>> @@ -177,6 +177,7 @@ enum {
>>> enum iopt_address_type {
>>> IOPT_ADDRESS_USER = 0,
>>> + IOPT_ADDRESS_FILE = 1,
>>> };
>>> /*
>>> @@ -202,6 +203,10 @@ struct iopt_pages {
>>> enum iopt_address_type type;
>>> union {
>>> void __user *uptr; /* IOPT_ADDRESS_USER */
>>> + struct { /* IOPT_ADDRESS_FILE */
>>> + struct file *file;
>>> + unsigned long start;
>>> + };
>>> };
>>> bool writable:1;
>>> u8 account_mode;
>>> diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c
>>> index aa79504..5f371fa 100644
>>> --- a/drivers/iommu/iommufd/pages.c
>>> +++ b/drivers/iommu/iommufd/pages.c
>>> @@ -752,19 +752,32 @@ struct pfn_reader_user {
>>> * neither
>>> */
>>> int locked;
>>> +
>>> + /* The following are only valid if file != NULL. */
>>
>>
>> Are "struct page **upages" and "size_t upages_len" still valid in this case? The code (kind of) suggests they are not... Thanks,
>
>
> Ping?
Sorry, I don't recall seeing your first email, but others asked this same question
and I answered. upages and upages_len are still used when file!=NULL for mediated
device access, to point to the pages array returned to the kernel client. Maybe
"u" for user is not a good description in that case, but changing that and still
using "u"pages elsewhere would complicate the code for little gain in clarity IMO.
> Also does "user" in function names mean "userspace addresses" (as opposite to "file"?
Yes, except for mdev as noted above.
> If so, the patchset makes many names misleading then. Thanks,
The new stuff I added is prefixed with file. I left the existing upages names
as is. That made sense to me, and made the maintainers happy enough to accept it :)
- Steve
>>> + struct file *file;
>>> + struct folio **ufolios;
>>> + size_t ufolios_len;
>>> + unsigned long ufolios_offset;
>>> + struct folio **ufolios_next;
>>> };
>>> static void pfn_reader_user_init(struct pfn_reader_user *user,
>>> struct iopt_pages *pages)
>>> {
>>> user->upages = NULL;
>>> + user->upages_len = 0;
>>> user->upages_start = 0;
>>> user->upages_end = 0;
>>> user->locked = -1;
>>> -
>>> user->gup_flags = FOLL_LONGTERM;
>>> if (pages->writable)
>>> user->gup_flags |= FOLL_WRITE;
>>> +
>>> + user->file = (pages->type == IOPT_ADDRESS_FILE) ? pages->file : NULL;
>>> + user->ufolios = NULL;
>>> + user->ufolios_len = 0;
>>> + user->ufolios_next = NULL;
>>> + user->ufolios_offset = 0;
>>> }
>>> static void pfn_reader_user_destroy(struct pfn_reader_user *user,
>>> @@ -773,13 +786,67 @@ static void pfn_reader_user_destroy(struct pfn_reader_user *user,
>>> if (user->locked != -1) {
>>> if (user->locked)
>>> mmap_read_unlock(pages->source_mm);
>>> - if (pages->source_mm != current->mm)
>>> + if (!user->file && pages->source_mm != current->mm)
>>> mmput(pages->source_mm);
>>> user->locked = -1;
>>> }
>>> kfree(user->upages);
>>> user->upages = NULL;
>>> + kfree(user->ufolios);
>>> + user->ufolios = NULL;
>>> +}
>>> +
>>> +static long pin_memfd_pages(struct pfn_reader_user *user, unsigned long start,
>>> + unsigned long npages)
>>> +{
>>> + unsigned long i;
>>> + unsigned long offset;
>>> + unsigned long npages_out = 0;
>>> + struct page **upages = user->upages;
>>> + unsigned long end = start + (npages << PAGE_SHIFT) - 1;
>>> + long nfolios = user->ufolios_len / sizeof(*user->ufolios);
>>> +
>>> + /*
>>> + * todo: memfd_pin_folios should return the last pinned offset so
>>> + * we can compute npages pinned, and avoid looping over folios here
>>> + * if upages == NULL.
>>> + */
>>> + nfolios = memfd_pin_folios(user->file, start, end, user->ufolios,
>>> + nfolios, &offset);
>>> + if (nfolios <= 0)
>>> + return nfolios;
>>> +
>>> + offset >>= PAGE_SHIFT;
>>> + user->ufolios_next = user->ufolios;
>>> + user->ufolios_offset = offset;
>>> +
>>> + for (i = 0; i < nfolios; i++) {
>>> + struct folio *folio = user->ufolios[i];
>>> + unsigned long nr = folio_nr_pages(folio);
>>> + unsigned long npin = min(nr - offset, npages);
>>> +
>>> + npages -= npin;
>>> + npages_out += npin;
>>> +
>>> + if (upages) {
>>> + if (npin == 1) {
>>> + *upages++ = folio_page(folio, offset);
>>> + } else {
>>> + int rc = folio_add_pins(folio, npin - 1);
>>> +
>>> + if (rc)
>>> + return rc;
>>> +
>>> + while (npin--)
>>> + *upages++ = folio_page(folio, offset++);
>>> + }
>>> + }
>>> +
>>> + offset = 0;
>>> + }
>>> +
>>> + return npages_out;
>>> }
>>> static int pfn_reader_user_pin(struct pfn_reader_user *user,
>>> @@ -788,7 +855,9 @@ static int pfn_reader_user_pin(struct pfn_reader_user *user,
>>> unsigned long last_index)
>>> {
>>> bool remote_mm = pages->source_mm != current->mm;
>>> - unsigned long npages;
>>> + unsigned long npages = last_index - start_index + 1;
>>> + unsigned long start;
>>> + unsigned long unum;
>>> uintptr_t uptr;
>>> long rc;
>>> @@ -796,40 +865,50 @@ static int pfn_reader_user_pin(struct pfn_reader_user *user,
>>> WARN_ON(last_index < start_index))
>>> return -EINVAL;
>>> - if (!user->upages) {
>>> + if (!user->file && !user->upages) {
>>> /* All undone in pfn_reader_destroy() */
>>> - user->upages_len =
>>> - (last_index - start_index + 1) * sizeof(*user->upages);
>>> + user->upages_len = npages * sizeof(*user->upages);
>>> user->upages = temp_kmalloc(&user->upages_len, NULL, 0);
>>> if (!user->upages)
>>> return -ENOMEM;
>>> }
>>> + if (user->file && !user->ufolios) {
>>> + user->ufolios_len = npages * sizeof(*user->ufolios);
>>> + user->ufolios = temp_kmalloc(&user->ufolios_len, NULL, 0);
>>> + if (!user->ufolios)
>>> + return -ENOMEM;
>>> + }
>>> +
>>> if (user->locked == -1) {
>>> /*
>>> * The majority of usages will run the map task within the mm
>>> * providing the pages, so we can optimize into
>>> * get_user_pages_fast()
>>> */
>>> - if (remote_mm) {
>>> + if (!user->file && remote_mm) {
>>> if (!mmget_not_zero(pages->source_mm))
>>> return -EFAULT;
>>> }
>>> user->locked = 0;
>>> }
>>> - npages = min_t(unsigned long, last_index - start_index + 1,
>>> - user->upages_len / sizeof(*user->upages));
>>> -
>>> + unum = user->file ? user->ufolios_len / sizeof(*user->ufolios) :
>>> + user->upages_len / sizeof(*user->upages);
>>> + npages = min_t(unsigned long, npages, unum);
>>> if (iommufd_should_fail())
>>> return -EFAULT;
>>> - uptr = (uintptr_t)(pages->uptr + start_index * PAGE_SIZE);
>>> - if (!remote_mm)
>>> + if (user->file) {
>>> + start = pages->start + (start_index * PAGE_SIZE);
>>> + rc = pin_memfd_pages(user, start, npages);
>>> + } else if (!remote_mm) {
>>> + uptr = (uintptr_t)(pages->uptr + start_index * PAGE_SIZE);
>>> rc = pin_user_pages_fast(uptr, npages, user->gup_flags,
>>> user->upages);
>>> - else {
>>> + } else {
>>> + uptr = (uintptr_t)(pages->uptr + start_index * PAGE_SIZE);
>>> if (!user->locked) {
>>> mmap_read_lock(pages->source_mm);
>>> user->locked = 1;
>>> @@ -887,7 +966,8 @@ static int update_mm_locked_vm(struct iopt_pages *pages, unsigned long npages,
>>> mmap_read_unlock(pages->source_mm);
>>> user->locked = 0;
>>> /* If we had the lock then we also have a get */
>>> - } else if ((!user || !user->upages) &&
>>> +
>>> + } else if ((!user || (!user->upages && !user->ufolios)) &&
>>> pages->source_mm != current->mm) {
>>> if (!mmget_not_zero(pages->source_mm))
>>> return -EINVAL;
>>> @@ -1068,8 +1148,15 @@ static int pfn_reader_fill_span(struct pfn_reader *pfns)
>>> npages = user->upages_end - start_index;
>>> start_index -= user->upages_start;
>>> - batch_from_pages(&pfns->batch, user->upages + start_index, npages);
>>> - return 0;
>>> + rc = 0;
>>> +
>>> + if (!user->file)
>>> + batch_from_pages(&pfns->batch, user->upages + start_index,
>>> + npages);
>>> + else
>>> + rc = batch_from_folios(&pfns->batch, &user->ufolios_next,
>>> + &user->ufolios_offset, npages);
>>> + return rc;
>>> }
>>> static bool pfn_reader_done(struct pfn_reader *pfns)
>>> @@ -1151,7 +1238,14 @@ static void pfn_reader_release_pins(struct pfn_reader *pfns)
>>> unsigned long start_index = pfns->batch_end_index -
>>> user->upages_start;
>>> - unpin_user_pages(user->upages + start_index, npages);
>>> + if (!user->file) {
>>> + unpin_user_pages(user->upages + start_index, npages);
>>> + } else {
>>> + long n = user->ufolios_len / sizeof(*user->ufolios);
>>> +
>>> + unpin_folios(user->ufolios_next,
>>> + user->ufolios + n - user->ufolios_next);
>>> + }
>>> iopt_pages_sub_npinned(pages, npages);
>>> user->upages_end = pfns->batch_end_index;
>>> }
>>
>
next prev parent reply other threads:[~2024-11-06 13:19 UTC|newest]
Thread overview: 35+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-10-25 13:11 [PATCH V7 0/9] iommu_ioas_map_file Steve Sistare
2024-10-25 13:11 ` [PATCH V7 1/9] mm/gup: folio_add_pins Steve Sistare
2024-10-25 13:11 ` [PATCH V7 2/9] iommufd: rename uptr in iopt_alloc_iova Steve Sistare
2024-10-25 13:11 ` [PATCH V7 3/9] iommufd: generalize iopt_pages address Steve Sistare
2024-10-25 13:11 ` [PATCH V7 4/9] iommufd: pfn reader local variables Steve Sistare
2024-10-25 13:11 ` [PATCH V7 5/9] iommufd: folio subroutines Steve Sistare
2024-10-25 13:11 ` [PATCH V7 6/9] iommufd: pfn reader for file mappings Steve Sistare
2024-10-31 3:34 ` Alexey Kardashevskiy
2024-11-06 3:18 ` Alexey Kardashevskiy
2024-11-06 13:19 ` Steven Sistare [this message]
2024-11-07 10:00 ` Alexey Kardashevskiy
2024-11-07 13:21 ` Steven Sistare
2024-11-07 14:08 ` Jason Gunthorpe
2024-11-08 0:01 ` Alexey Kardashevskiy
2024-11-14 4:03 ` Alexey Kardashevskiy
2024-11-14 16:17 ` Jason Gunthorpe
2024-11-18 1:24 ` Alexey Kardashevskiy
2024-10-25 13:11 ` [PATCH V7 7/9] iommufd: IOMMU_IOAS_MAP_FILE Steve Sistare
2024-10-25 13:11 ` [PATCH V7 8/9] iommufd: file mappings for mdev Steve Sistare
2024-10-25 13:11 ` [PATCH V7 9/9] iommufd: map file selftest Steve Sistare
2024-10-25 13:14 ` Steven Sistare
2024-10-25 17:00 ` Nicolin Chen
2024-10-25 17:04 ` Nicolin Chen
2024-10-25 17:58 ` Steven Sistare
2024-10-25 18:39 ` Nicolin Chen
2024-10-25 23:58 ` Jason Gunthorpe
2024-10-26 19:13 ` Steven Sistare
2024-10-26 19:16 ` Steven Sistare
2024-10-26 23:09 ` Nicolin Chen
2024-10-27 14:38 ` Steven Sistare
2024-10-30 0:11 ` [PATCH V7 0/9] iommu_ioas_map_file Jason Gunthorpe
2024-10-30 12:43 ` Steven Sistare
2024-11-04 13:58 ` Steven Sistare
2024-11-04 14:18 ` Jason Gunthorpe
2024-11-04 14:24 ` Steven Sistare
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=2bbfb5fa-20e5-4935-b52d-73b5186c6123@oracle.com \
--to=steven.sistare@oracle.com \
--cc=aik@amd.com \
--cc=iommu@lists.linux.dev \
--cc=jgg@nvidia.com \
--cc=kevin.tian@intel.com \
--cc=nicolinc@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox