linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
From: Zhi Wang <zhi.wang.linux@gmail.com>
To: David Stevens <stevensd@chromium.org>
Cc: Sean Christopherson <seanjc@google.com>,
	Marc Zyngier <maz@kernel.org>,
	Michael Ellerman <mpe@ellerman.id.au>,
	Peter Xu <peterx@redhat.com>,
	linux-arm-kernel@lists.infradead.org, kvmarm@lists.linux.dev,
	linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org,
	kvm@vger.kernel.org
Subject: Re: [PATCH v7 2/8] KVM: Introduce __kvm_follow_pfn function
Date: Tue, 11 Jul 2023 20:37:25 +0300	[thread overview]
Message-ID: <20230711203725.0000453c.zhi.wang.linux@gmail.com> (raw)
In-Reply-To: <CAD=HUj4+20vtQTKiE69vWV1QVhx1o0uzRqvM+PMGn_=KGpfjHg@mail.gmail.com>

On Wed, 5 Jul 2023 18:08:17 +0900
David Stevens <stevensd@chromium.org> wrote:

> On Wed, Jul 5, 2023 at 5:47___PM Zhi Wang <zhi.wang.linux@gmail.com> wrote:
> >
> > On Tue,  4 Jul 2023 16:50:47 +0900
> > David Stevens <stevensd@chromium.org> wrote:
> >  
> > > From: David Stevens <stevensd@chromium.org>
> > >
> > > Introduce __kvm_follow_pfn, which will replace __gfn_to_pfn_memslot.
> > > __kvm_follow_pfn refactors the old API's arguments into a struct and,
> > > where possible, combines the boolean arguments into a single flags
> > > argument.
> > >
> > > Signed-off-by: David Stevens <stevensd@chromium.org>
> > > ---
> > >  include/linux/kvm_host.h |  16 ++++
> > >  virt/kvm/kvm_main.c      | 171 ++++++++++++++++++++++-----------------
> > >  virt/kvm/kvm_mm.h        |   3 +-
> > >  virt/kvm/pfncache.c      |   8 +-
> > >  4 files changed, 122 insertions(+), 76 deletions(-)
> > >
> > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > > index 9d3ac7720da9..ef2763c2b12e 100644
> > > --- a/include/linux/kvm_host.h
> > > +++ b/include/linux/kvm_host.h
> > > @@ -97,6 +97,7 @@
> > >  #define KVM_PFN_ERR_HWPOISON (KVM_PFN_ERR_MASK + 1)
> > >  #define KVM_PFN_ERR_RO_FAULT (KVM_PFN_ERR_MASK + 2)
> > >  #define KVM_PFN_ERR_SIGPENDING       (KVM_PFN_ERR_MASK + 3)
> > > +#define KVM_PFN_ERR_NEEDS_IO (KVM_PFN_ERR_MASK + 4)
> > >
> > >  /*
> > >   * error pfns indicate that the gfn is in slot but faild to
> > > @@ -1156,6 +1157,21 @@ unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, gfn_t gfn,
> > >  void kvm_release_page_clean(struct page *page);
> > >  void kvm_release_page_dirty(struct page *page);
> > >
> > > +struct kvm_follow_pfn {
> > > +     const struct kvm_memory_slot *slot;
> > > +     gfn_t gfn;
> > > +     unsigned int flags;
> > > +     bool atomic;
> > > +     /* Allow a read fault to create a writeable mapping. */
> > > +     bool allow_write_mapping;
> > > +
> > > +     /* Outputs of __kvm_follow_pfn */
> > > +     hva_t hva;
> > > +     bool writable;
> > > +};
> > > +
> > > +kvm_pfn_t __kvm_follow_pfn(struct kvm_follow_pfn *foll);
> > > +
> > >  kvm_pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
> > >  kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
> > >                     bool *writable);
> > > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > > index 371bd783ff2b..b13f22861d2f 100644
> > > --- a/virt/kvm/kvm_main.c
> > > +++ b/virt/kvm/kvm_main.c
> > > @@ -2486,24 +2486,22 @@ static inline int check_user_page_hwpoison(unsigned long addr)
> > >   * true indicates success, otherwise false is returned.  It's also the
> > >   * only part that runs if we can in atomic context.
> > >   */
> > > -static bool hva_to_pfn_fast(unsigned long addr, bool write_fault,
> > > -                         bool *writable, kvm_pfn_t *pfn)
> > > +static bool hva_to_pfn_fast(struct kvm_follow_pfn *foll, kvm_pfn_t *pfn)
> > >  {
> > >       struct page *page[1];
> > > +     bool write_fault = foll->flags & FOLL_WRITE;
> > >
> > >       /*
> > >        * Fast pin a writable pfn only if it is a write fault request
> > >        * or the caller allows to map a writable pfn for a read fault
> > >        * request.
> > >        */
> > > -     if (!(write_fault || writable))
> > > +     if (!(write_fault || foll->allow_write_mapping))
> > >               return false;
> > >
> > > -     if (get_user_page_fast_only(addr, FOLL_WRITE, page)) {
> > > +     if (get_user_page_fast_only(foll->hva, FOLL_WRITE, page)) {
> > >               *pfn = page_to_pfn(page[0]);
> > > -
> > > -             if (writable)
> > > -                     *writable = true;
> > > +             foll->writable = foll->allow_write_mapping;
> > >               return true;
> > >       }
> > >
> > > @@ -2514,35 +2512,26 @@ static bool hva_to_pfn_fast(unsigned long addr, bool write_fault,
> > >   * The slow path to get the pfn of the specified host virtual address,
> > >   * 1 indicates success, -errno is returned if error is detected.
> > >   */
> > > -static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,
> > > -                        bool interruptible, bool *writable, kvm_pfn_t *pfn)
> > > +static int hva_to_pfn_slow(struct kvm_follow_pfn *foll, kvm_pfn_t *pfn)
> > >  {
> > > -     unsigned int flags = FOLL_HWPOISON;
> > > +     unsigned int flags = FOLL_HWPOISON | FOLL_GET | foll->flags;
> > >       struct page *page;
> > >       int npages;
> > >
> > >       might_sleep();
> > >
> > > -     if (writable)
> > > -             *writable = write_fault;
> > > -
> > > -     if (write_fault)
> > > -             flags |= FOLL_WRITE;
> > > -     if (async)
> > > -             flags |= FOLL_NOWAIT;
> > > -     if (interruptible)
> > > -             flags |= FOLL_INTERRUPTIBLE;
> > > -
> > > -     npages = get_user_pages_unlocked(addr, 1, &page, flags);
> > > +     npages = get_user_pages_unlocked(foll->hva, 1, &page, flags);
> > >       if (npages != 1)
> > >               return npages;
> > >
> > > +     foll->writable = (foll->flags & FOLL_WRITE) && foll->allow_write_mapping;
> > > +
> > >       /* map read fault as writable if possible */
> > > -     if (unlikely(!write_fault) && writable) {
> > > +     if (unlikely(!foll->writable) && foll->allow_write_mapping) {
> > >               struct page *wpage;
> > >
> > > -             if (get_user_page_fast_only(addr, FOLL_WRITE, &wpage)) {
> > > -                     *writable = true;
> > > +             if (get_user_page_fast_only(foll->hva, FOLL_WRITE, &wpage)) {
> > > +                     foll->writable = true;
> > >                       put_page(page);
> > >                       page = wpage;
> > >               }
> > > @@ -2572,23 +2561,23 @@ static int kvm_try_get_pfn(kvm_pfn_t pfn)
> > >       return get_page_unless_zero(page);
> > >  }
> > >
> > > -static int hva_to_pfn_remapped(struct vm_area_struct *vma,
> > > -                            unsigned long addr, bool write_fault,
> > > -                            bool *writable, kvm_pfn_t *p_pfn)
> > > +static int hva_to_pfn_remapped(struct vm_area_struct *vma, struct kvm_follow_pfn *foll,
> > > +                            kvm_pfn_t *p_pfn)
> > >  {
> > >       kvm_pfn_t pfn;
> > >       pte_t *ptep;
> > >       spinlock_t *ptl;
> > > +     bool write_fault = foll->flags & FOLL_WRITE;
> > >       int r;
> > >
> > > -     r = follow_pte(vma->vm_mm, addr, &ptep, &ptl);
> > > +     r = follow_pte(vma->vm_mm, foll->hva, &ptep, &ptl);
> > >       if (r) {
> > >               /*
> > >                * get_user_pages fails for VM_IO and VM_PFNMAP vmas and does
> > >                * not call the fault handler, so do it here.
> > >                */
> > >               bool unlocked = false;
> > > -             r = fixup_user_fault(current->mm, addr,
> > > +             r = fixup_user_fault(current->mm, foll->hva,
> > >                                    (write_fault ? FAULT_FLAG_WRITE : 0),
> > >                                    &unlocked);
> > >               if (unlocked)
> > > @@ -2596,7 +2585,7 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
> > >               if (r)
> > >                       return r;
> > >
> > > -             r = follow_pte(vma->vm_mm, addr, &ptep, &ptl);
> > > +             r = follow_pte(vma->vm_mm, foll->hva, &ptep, &ptl);
> > >               if (r)
> > >                       return r;
> > >       }
> > > @@ -2606,8 +2595,7 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
> > >               goto out;
> > >       }
> > >
> > > -     if (writable)
> > > -             *writable = pte_write(*ptep);
> > > +     foll->writable = pte_write(*ptep) && foll->allow_write_mapping;
> > >       pfn = pte_pfn(*ptep);
> > >
> > >       /*
> > > @@ -2652,24 +2640,22 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
> > >   * 2): @write_fault = false && @writable, @writable will tell the caller
> > >   *     whether the mapping is writable.
> > >   */
> > > -kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool interruptible,
> > > -                  bool *async, bool write_fault, bool *writable)
> > > +kvm_pfn_t hva_to_pfn(struct kvm_follow_pfn *foll)
> > >  {
> > >       struct vm_area_struct *vma;
> > >       kvm_pfn_t pfn;
> > >       int npages, r;
> > >
> > >       /* we can do it either atomically or asynchronously, not both */
> > > -     BUG_ON(atomic && async);
> > > +     BUG_ON(foll->atomic && (foll->flags & FOLL_NOWAIT));
> > >
> > > -     if (hva_to_pfn_fast(addr, write_fault, writable, &pfn))
> > > +     if (hva_to_pfn_fast(foll, &pfn))
> > >               return pfn;
> > >
> > > -     if (atomic)
> > > +     if (foll->atomic)
> > >               return KVM_PFN_ERR_FAULT;
> > >
> > > -     npages = hva_to_pfn_slow(addr, async, write_fault, interruptible,
> > > -                              writable, &pfn);
> > > +     npages = hva_to_pfn_slow(foll, &pfn);
> > >       if (npages == 1)
> > >               return pfn;
> > >       if (npages == -EINTR)
> > > @@ -2677,83 +2663,122 @@ kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool interruptible,
> > >
> > >       mmap_read_lock(current->mm);
> > >       if (npages == -EHWPOISON ||
> > > -           (!async && check_user_page_hwpoison(addr))) {
> > > +           (!(foll->flags & FOLL_NOWAIT) && check_user_page_hwpoison(foll->hva))) {
> > >               pfn = KVM_PFN_ERR_HWPOISON;
> > >               goto exit;
> > >       }
> > >
> > >  retry:
> > > -     vma = vma_lookup(current->mm, addr);
> > > +     vma = vma_lookup(current->mm, foll->hva);
> > >
> > >       if (vma == NULL)
> > >               pfn = KVM_PFN_ERR_FAULT;
> > >       else if (vma->vm_flags & (VM_IO | VM_PFNMAP)) {
> > > -             r = hva_to_pfn_remapped(vma, addr, write_fault, writable, &pfn);
> > > +             r = hva_to_pfn_remapped(vma, foll, &pfn);
> > >               if (r == -EAGAIN)
> > >                       goto retry;
> > >               if (r < 0)
> > >                       pfn = KVM_PFN_ERR_FAULT;
> > >       } else {
> > > -             if (async && vma_is_valid(vma, write_fault))
> > > -                     *async = true;
> > > -             pfn = KVM_PFN_ERR_FAULT;
> > > +             if ((foll->flags & FOLL_NOWAIT) &&
> > > +                 vma_is_valid(vma, foll->flags & FOLL_WRITE))
> > > +                     pfn = KVM_PFN_ERR_NEEDS_IO;
> > > +             else
> > > +                     pfn = KVM_PFN_ERR_FAULT;
> > >       }
> > >  exit:
> > >       mmap_read_unlock(current->mm);
> > >       return pfn;
> > >  }
> > >
> > > -kvm_pfn_t __gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn,
> > > -                            bool atomic, bool interruptible, bool *async,
> > > -                            bool write_fault, bool *writable, hva_t *hva)
> > > +kvm_pfn_t __kvm_follow_pfn(struct kvm_follow_pfn *foll)
> > >  {
> > > -     unsigned long addr = __gfn_to_hva_many(slot, gfn, NULL, write_fault);
> > > -
> > > -     if (hva)
> > > -             *hva = addr;
> > > +     foll->hva = __gfn_to_hva_many(foll->slot, foll->gfn, NULL,
> > > +                                   foll->flags & FOLL_WRITE);
> > >
> > > -     if (addr == KVM_HVA_ERR_RO_BAD) {
> > > -             if (writable)
> > > -                     *writable = false;
> > > +     if (foll->hva == KVM_HVA_ERR_RO_BAD)
> > >               return KVM_PFN_ERR_RO_FAULT;
> > > -     }
> > >  
> >
> > Can you explain why updating foll->writable = false (previously *writeable
> > = false) is omitted here?
> >
> > In the caller where the struct kvm_follow_pfn is initialized, e.g.
> > __gfn_to_pfn_memslot()/gfn_to_pfn_prot(), .writable is not initialized.
> > IIUC, they expect __kvm_follow_pfn() to update it and return .writable to
> > upper caller.
> >
> > As the one of the output, it would be better to initalize it either in the
> > caller or update it in __kvm_follow_pfn(). Or
> > __gfn_to_pfn_memslot()/gfn_to_pfn_prot() will return random data in the
> > stack to the caller via bool *writable. It doesn't sound nice.  
> 
> Entries omitted from an initializer are initialized to zero, so
> .writable does get initialized in all of the patches in this series
> via designated initializers. Although you're right that explicitly
> setting it to false is a good idea, in case someday someone adds a
> caller that doesn't use an initializer when declaring its
> kvm_follow_pfn.
>

Nice trick and nice to know that. :) Agreed on improving readability and
preventing a risk from the caller. 

> -David


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

  reply	other threads:[~2023-07-11 17:38 UTC|newest]

Thread overview: 55+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-07-04  7:50 [PATCH v7 0/8] KVM: allow mapping non-refcounted pages David Stevens
2023-07-04  7:50 ` [PATCH v7 1/8] KVM: Assert that a page's refcount is elevated when marking accessed/dirty David Stevens
2023-07-04  7:50 ` [PATCH v7 2/8] KVM: Introduce __kvm_follow_pfn function David Stevens
2023-07-05  3:10   ` Yu Zhang
2023-07-05  9:22     ` David Stevens
2023-07-05 10:53       ` Yu Zhang
2023-07-06  5:29         ` David Stevens
2023-07-06 14:52           ` Yu Zhang
2023-08-04 22:03             ` Sean Christopherson
2023-07-05  8:47   ` Zhi Wang
2023-07-05  9:08     ` David Stevens
2023-07-11 17:37       ` Zhi Wang [this message]
2023-07-06  1:34   ` Isaku Yamahata
2023-07-06  5:52     ` David Stevens
2023-08-04 22:13   ` Sean Christopherson
2023-07-04  7:50 ` [PATCH v7 3/8] KVM: Make __kvm_follow_pfn not imply FOLL_GET David Stevens
2023-07-05  7:23   ` Yu Zhang
2023-07-05 11:56   ` Yu Zhang
2023-07-06  6:09     ` David Stevens
2023-07-05 13:19   ` Zhi Wang
2023-07-06  6:49     ` David Stevens
2023-07-11 17:33       ` Zhi Wang
2023-07-11 21:59         ` Sean Christopherson
2023-09-05  8:26           ` David Stevens
2023-09-06  0:45             ` Sean Christopherson
2023-09-06  3:24               ` David Stevens
2023-09-06 22:03                 ` Sean Christopherson
2023-07-04  7:50 ` [PATCH v7 4/8] KVM: x86/mmu: Migrate to __kvm_follow_pfn David Stevens
2023-07-05  8:07   ` Yu Zhang
2023-08-04 22:30     ` Sean Christopherson
2023-07-06  1:54   ` Isaku Yamahata
2023-08-24  8:03     ` David Stevens
2023-07-04  7:50 ` [PATCH v7 5/8] KVM: x86/mmu: Don't pass FOLL_GET " David Stevens
2023-07-05 10:18   ` Yu Zhang
2023-07-05 14:17     ` Yu Zhang
2023-07-06  4:52     ` David Stevens
2023-07-06  7:19       ` Yu Zhang
2023-07-06 15:58       ` Isaku Yamahata
2023-07-07  1:35         ` David Stevens
2023-07-10 16:34           ` Isaku Yamahata
2023-07-11  2:59             ` David Stevens
2023-08-04 22:45       ` Sean Christopherson
2023-07-05 10:25   ` Yu Zhang
2023-08-24  8:03     ` David Stevens
2023-08-24 15:15       ` Sean Christopherson
2023-08-25  1:38         ` David Stevens
2023-08-31 21:18           ` Sean Christopherson
2023-07-06  2:10   ` Isaku Yamahata
2023-07-06  5:18     ` David Stevens
2023-07-19  6:09   ` Yan Zhao
2023-07-19  7:16     ` David Stevens
2023-07-04  7:50 ` [PATCH v7 6/8] KVM: arm64: Migrate " David Stevens
2023-07-04  7:50 ` [PATCH v7 7/8] KVM: PPC: " David Stevens
2023-07-04  7:50 ` [PATCH v7 8/8] KVM: remove __gfn_to_pfn_memslot David Stevens
2023-08-04 22:47 ` [PATCH v7 0/8] KVM: allow mapping non-refcounted pages Sean Christopherson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230711203725.0000453c.zhi.wang.linux@gmail.com \
    --to=zhi.wang.linux@gmail.com \
    --cc=kvm@vger.kernel.org \
    --cc=kvmarm@lists.linux.dev \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=maz@kernel.org \
    --cc=mpe@ellerman.id.au \
    --cc=peterx@redhat.com \
    --cc=seanjc@google.com \
    --cc=stevensd@chromium.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).