linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
From: David Gibson <david@gibson.dropbear.id.au>
To: Alexey Kardashevskiy <aik@ozlabs.ru>
Cc: linuxppc-dev@lists.ozlabs.org, Paul Mackerras <paulus@samba.org>,
	Alex Williamson <alex.williamson@redhat.com>,
	kvm-ppc@vger.kernel.org, kvm@vger.kernel.org
Subject: Re: [PATCH kernel 6/9] KVM: PPC: Associate IOMMU group with guest view of TCE table
Date: Tue, 8 Mar 2016 15:55:08 +1100	[thread overview]
Message-ID: <20160308045508.GY22546@voom.fritz.box> (raw)
In-Reply-To: <56DD4C05.4060109@ozlabs.ru>

[-- Attachment #1: Type: text/plain, Size: 9192 bytes --]

On Mon, Mar 07, 2016 at 08:38:13PM +1100, Alexey Kardashevskiy wrote:
> On 03/07/2016 05:25 PM, David Gibson wrote:
> >On Mon, Mar 07, 2016 at 02:41:14PM +1100, Alexey Kardashevskiy wrote:
> >>The existing in-kernel TCE table for emulated devices contains
> >>guest physical addresses which are accesses by emulated devices.
> >>Since we need to keep this information for VFIO devices too
> >>in order to implement H_GET_TCE, we are reusing it.
> >>
> >>This adds IOMMU group list to kvmppc_spapr_tce_table. Each group
> >>will have an iommu_table pointer.
> >>
> >>This adds kvm_spapr_tce_attach_iommu_group() helper and its detach
> >>counterpart to manage the lists.
> >>
> >>This puts a group when:
> >>- guest copy of TCE table is destroyed when TCE table fd is closed;
> >>- kvm_spapr_tce_detach_iommu_group() is called from
> >>the KVM_DEV_VFIO_GROUP_DEL ioctl handler in the case vfio-pci hotunplug
> >>(will be added in the following patch).
> >>
> >>Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> >>---
> >>  arch/powerpc/include/asm/kvm_host.h |   8 +++
> >>  arch/powerpc/include/asm/kvm_ppc.h  |   6 ++
> >>  arch/powerpc/kvm/book3s_64_vio.c    | 108 ++++++++++++++++++++++++++++++++++++
> >>  3 files changed, 122 insertions(+)
> >>
> >>diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
> >>index 2e7c791..2c5c823 100644
> >>--- a/arch/powerpc/include/asm/kvm_host.h
> >>+++ b/arch/powerpc/include/asm/kvm_host.h
> >>@@ -178,6 +178,13 @@ struct kvmppc_pginfo {
> >>  	atomic_t refcnt;
> >>  };
> >>
> >>+struct kvmppc_spapr_tce_group {
> >>+	struct list_head next;
> >>+	struct rcu_head rcu;
> >>+	struct iommu_group *refgrp;/* for reference counting only */
> >>+	struct iommu_table *tbl;
> >>+};
> >>+
> >>  struct kvmppc_spapr_tce_table {
> >>  	struct list_head list;
> >>  	struct kvm *kvm;
> >>@@ -186,6 +193,7 @@ struct kvmppc_spapr_tce_table {
> >>  	u32 page_shift;
> >>  	u64 offset;		/* in pages */
> >>  	u64 size;		/* window size in pages */
> >>+	struct list_head groups;
> >>  	struct page *pages[0];
> >>  };
> >>
> >>diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
> >>index 2544eda..d1482dc 100644
> >>--- a/arch/powerpc/include/asm/kvm_ppc.h
> >>+++ b/arch/powerpc/include/asm/kvm_ppc.h
> >>@@ -164,6 +164,12 @@ extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
> >>  			struct kvm_memory_slot *memslot, unsigned long porder);
> >>  extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu);
> >>
> >>+extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm,
> >>+				unsigned long liobn,
> >>+				phys_addr_t start_addr,
> >>+				struct iommu_group *grp);
> >>+extern void kvm_spapr_tce_detach_iommu_group(struct kvm *kvm,
> >>+				struct iommu_group *grp);
> >>  extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
> >>  				struct kvm_create_spapr_tce_64 *args);
> >>  extern struct kvmppc_spapr_tce_table *kvmppc_find_table(
> >>diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
> >>index 2c2d103..846d16d 100644
> >>--- a/arch/powerpc/kvm/book3s_64_vio.c
> >>+++ b/arch/powerpc/kvm/book3s_64_vio.c
> >>@@ -27,6 +27,7 @@
> >>  #include <linux/hugetlb.h>
> >>  #include <linux/list.h>
> >>  #include <linux/anon_inodes.h>
> >>+#include <linux/iommu.h>
> >>
> >>  #include <asm/tlbflush.h>
> >>  #include <asm/kvm_ppc.h>
> >>@@ -95,10 +96,18 @@ static void release_spapr_tce_table(struct rcu_head *head)
> >>  	struct kvmppc_spapr_tce_table *stt = container_of(head,
> >>  			struct kvmppc_spapr_tce_table, rcu);
> >>  	unsigned long i, npages = kvmppc_tce_pages(stt->size);
> >>+	struct kvmppc_spapr_tce_group *kg;
> >>
> >>  	for (i = 0; i < npages; i++)
> >>  		__free_page(stt->pages[i]);
> >>
> >>+	while (!list_empty(&stt->groups)) {
> >>+		kg = list_first_entry(&stt->groups,
> >>+				struct kvmppc_spapr_tce_group, next);
> >>+		list_del(&kg->next);
> >>+		kfree(kg);
> >>+	}
> >>+
> >>  	kfree(stt);
> >>  }
> >>
> >>@@ -129,9 +138,15 @@ static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma)
> >>  static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
> >>  {
> >>  	struct kvmppc_spapr_tce_table *stt = filp->private_data;
> >>+	struct kvmppc_spapr_tce_group *kg;
> >>
> >>  	list_del_rcu(&stt->list);
> >>
> >>+	list_for_each_entry_rcu(kg, &stt->groups, next)	{
> >>+		iommu_group_put(kg->refgrp);
> >>+		kg->refgrp = NULL;
> >>+	}
> >
> >What's the reason for this kind of two-phase deletion?  Dereffing the
> >group here, and setting to NULL, then actually removing from the liast above.
> 
> Well, this way I have only one RCU-delayed release_spapr_tce_table(). The
> other option would be to call for each @kg:
> - list_del(&kg->next);
> - call_rcu()
> 
> as release_spapr_tce_table() won't be able to delete them - they are not in
> the list anymore.

Ah, ok, that makes sense.

> I suppose I can reuse kvm_spapr_tce_put_group(), this looks inaccurate...
> 
> 
> 
> >
> >>  	kvm_put_kvm(stt->kvm);
> >>
> >>  	kvmppc_account_memlimit(
> >>@@ -146,6 +161,98 @@ static const struct file_operations kvm_spapr_tce_fops = {
> >>  	.release	= kvm_spapr_tce_release,
> >>  };
> >>
> >>+extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm,
> >>+				unsigned long liobn,
> >>+				phys_addr_t start_addr,
> >>+				struct iommu_group *grp)
> >>+{
> >>+	struct kvmppc_spapr_tce_table *stt = NULL;
> >>+	struct iommu_table_group *table_group;
> >>+	long i;
> >>+	bool found = false;
> >>+	struct kvmppc_spapr_tce_group *kg;
> >>+	struct iommu_table *tbltmp;
> >>+
> >>+	/* Check this LIOBN hasn't been previously allocated */
> >
> >This comment does not appear to be correct.
> >
> >>+	list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) {
> >>+		if (stt->liobn == liobn) {
> >>+			if ((stt->offset << stt->page_shift) != start_addr)
> >>+				return -EINVAL;
> >>+
> >>+			found = true;
> >>+			break;
> >>+		}
> >>+	}
> >>+
> >>+	if (!found)
> >>+		return -ENODEV;
> >>+
> >>+	/* Find IOMMU group and table at @start_addr */
> >>+	table_group = iommu_group_get_iommudata(grp);
> >>+	if (!table_group)
> >>+		return -EFAULT;
> >>+
> >>+	tbltmp = NULL;
> >>+	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
> >>+		struct iommu_table *tbl = table_group->tables[i];
> >>+
> >>+		if (!tbl)
> >>+			continue;
> >>+
> >>+		if ((tbl->it_page_shift == stt->page_shift) &&
> >>+				(tbl->it_offset == stt->offset)) {
> >>+			tbltmp = tbl;
> >>+			break;
> >>+		}
> >>+	}
> >>+	if (!tbltmp)
> >>+		return -ENODEV;
> >>+
> >>+	list_for_each_entry_rcu(kg, &stt->groups, next) {
> >>+		if (kg->refgrp == grp)
> >>+			return -EBUSY;
> >>+	}
> >>+
> >>+	kg = kzalloc(sizeof(*kg), GFP_KERNEL);
> >>+	kg->refgrp = grp;
> >>+	kg->tbl = tbltmp;
> >>+	list_add_rcu(&kg->next, &stt->groups);
> >>+
> >>+	return 0;
> >>+}
> >>+
> >>+static void kvm_spapr_tce_put_group(struct rcu_head *head)
> >>+{
> >>+	struct kvmppc_spapr_tce_group *kg = container_of(head,
> >>+			struct kvmppc_spapr_tce_group, rcu);
> >>+
> >>+	iommu_group_put(kg->refgrp);
> >>+	kg->refgrp = NULL;
> >>+	kfree(kg);
> >>+}
> >>+
> >>+extern void kvm_spapr_tce_detach_iommu_group(struct kvm *kvm,
> >>+				struct iommu_group *grp)
> >
> >Hrm.  attach takes an explicit liobn, but this one iterates over all
> >liobns.  Why the asymmetry?
> 
> 
> For attach(), LIOBN is specified in an additional (to VFIO KVM device's "add
> group") ioctl(). There is no need for "detach" ioctl() as we only want this
> detach() to happen when a group is removed from a container, and in this
> case the usual KVM_DEV_VFIO_GROUP_DEL is good enough hint that we need to
> detach LIOBN. Since _DEL does not take LIOBN, here I have a loop.
> 
> I'll put this in the commit log next time.
> 
> 
> >
> >>+{
> >>+	struct kvmppc_spapr_tce_table *stt;
> >>+	struct iommu_table_group *table_group;
> >>+	struct kvmppc_spapr_tce_group *kg;
> >>+
> >>+	table_group = iommu_group_get_iommudata(grp);
> >>+	if (!table_group)
> >>+		return;
> >>+
> >>+	list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) {
> >>+		list_for_each_entry_rcu(kg, &stt->groups, next) {
> >>+			if (kg->refgrp == grp) {
> >>+				list_del_rcu(&kg->next);
> >>+				call_rcu(&kg->rcu, kvm_spapr_tce_put_group);
> >>+				break;
> >>+			}
> >>+		}
> >>+	}
> >>+}
> >>+
> >>  long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
> >>  				   struct kvm_create_spapr_tce_64 *args)
> >>  {
> >>@@ -181,6 +288,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
> >>  	stt->offset = args->offset;
> >>  	stt->size = size;
> >>  	stt->kvm = kvm;
> >>+	INIT_LIST_HEAD_RCU(&stt->groups);
> >>
> >>  	for (i = 0; i < npages; i++) {
> >>  		stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
> >
> 
> 

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

  reply	other threads:[~2016-03-08  5:10 UTC|newest]

Thread overview: 46+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-03-07  3:41 [PATCH kernel 0/9] KVM, PPC, VFIO: Enable in-kernel acceleration Alexey Kardashevskiy
2016-03-07  3:41 ` [PATCH kernel 1/9] KVM: PPC: Reserve KVM_CAP_SPAPR_TCE_VFIO capability number Alexey Kardashevskiy
2016-03-07  4:58   ` David Gibson
2016-03-07  3:41 ` [PATCH kernel 2/9] powerpc/mmu: Add real mode support for IOMMU preregistered memory Alexey Kardashevskiy
2016-03-07  5:30   ` David Gibson
2016-03-07  3:41 ` [PATCH kernel 3/9] KVM: PPC: Use preregistered memory API to access TCE list Alexey Kardashevskiy
2016-03-07  6:00   ` David Gibson
2016-03-08  5:47     ` Alexey Kardashevskiy
2016-03-08  6:30       ` David Gibson
2016-03-09  8:55         ` Alexey Kardashevskiy
2016-03-09 23:46           ` David Gibson
2016-03-10  8:33     ` Paul Mackerras
2016-03-10 23:42       ` David Gibson
2016-03-07  3:41 ` [PATCH kernel 4/9] powerpc/powernv/iommu: Add real mode version of xchg() Alexey Kardashevskiy
2016-03-07  6:05   ` David Gibson
2016-03-07  7:32     ` Alexey Kardashevskiy
2016-03-08  4:50       ` David Gibson
2016-03-10  8:43   ` Paul Mackerras
2016-03-10  8:46   ` Paul Mackerras
2016-03-07  3:41 ` [PATCH kernel 5/9] KVM: PPC: Enable IOMMU_API for KVM_BOOK3S_64 permanently Alexey Kardashevskiy
2016-03-07  3:41 ` [PATCH kernel 6/9] KVM: PPC: Associate IOMMU group with guest view of TCE table Alexey Kardashevskiy
2016-03-07  6:25   ` David Gibson
2016-03-07  9:38     ` Alexey Kardashevskiy
2016-03-08  4:55       ` David Gibson [this message]
2016-03-07  3:41 ` [PATCH kernel 7/9] KVM: PPC: Create a virtual-mode only TCE table handlers Alexey Kardashevskiy
2016-03-08  6:32   ` David Gibson
2016-03-07  3:41 ` [PATCH kernel 8/9] KVM: PPC: Add in-kernel handling for VFIO Alexey Kardashevskiy
2016-03-08 11:08   ` David Gibson
2016-03-09  8:46     ` Alexey Kardashevskiy
2016-03-10  5:18       ` David Gibson
2016-03-11  2:15         ` Alexey Kardashevskiy
2016-03-15  6:00           ` David Gibson
2016-03-07  3:41 ` [PATCH kernel 9/9] KVM: PPC: VFIO device: support SPAPR TCE Alexey Kardashevskiy
2016-03-09  5:45   ` David Gibson
2016-03-09  9:20     ` Alexey Kardashevskiy
2016-03-10  5:21       ` David Gibson
2016-03-10 23:09         ` Alexey Kardashevskiy
2016-03-15  6:04           ` David Gibson
     [not found]             ` <15389a41428.27cb.1ca38dd7e845b990cd13d431eb58563d@ozlabs.ru>
     [not found]               ` <20160321051932.GJ23586@voom.redhat.com>
2016-03-22  0:34                 ` Alexey Kardashevskiy
2016-03-23  3:03                   ` David Gibson
2016-06-09  6:47                     ` Alexey Kardashevskiy
2016-06-10  6:50                       ` David Gibson
2016-06-14  3:30                         ` Alexey Kardashevskiy
2016-06-15  4:43                           ` David Gibson
2016-04-08  9:13     ` Alexey Kardashevskiy
2016-04-11  3:36       ` David Gibson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160308045508.GY22546@voom.fritz.box \
    --to=david@gibson.dropbear.id.au \
    --cc=aik@ozlabs.ru \
    --cc=alex.williamson@redhat.com \
    --cc=kvm-ppc@vger.kernel.org \
    --cc=kvm@vger.kernel.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=paulus@samba.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).