linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
From: Alexey Kardashevskiy <aik@ozlabs.ru>
To: David Gibson <david@gibson.dropbear.id.au>
Cc: linuxppc-dev@lists.ozlabs.org, Paul Mackerras <paulus@samba.org>,
	Alex Williamson <alex.williamson@redhat.com>,
	kvm-ppc@vger.kernel.org, kvm@vger.kernel.org
Subject: Re: [PATCH kernel 6/9] KVM: PPC: Associate IOMMU group with guest view of TCE table
Date: Mon, 7 Mar 2016 20:38:13 +1100	[thread overview]
Message-ID: <56DD4C05.4060109@ozlabs.ru> (raw)
In-Reply-To: <20160307062523.GN22546@voom.fritz.box>

On 03/07/2016 05:25 PM, David Gibson wrote:
> On Mon, Mar 07, 2016 at 02:41:14PM +1100, Alexey Kardashevskiy wrote:
>> The existing in-kernel TCE table for emulated devices contains
>> guest physical addresses which are accesses by emulated devices.
>> Since we need to keep this information for VFIO devices too
>> in order to implement H_GET_TCE, we are reusing it.
>>
>> This adds IOMMU group list to kvmppc_spapr_tce_table. Each group
>> will have an iommu_table pointer.
>>
>> This adds kvm_spapr_tce_attach_iommu_group() helper and its detach
>> counterpart to manage the lists.
>>
>> This puts a group when:
>> - guest copy of TCE table is destroyed when TCE table fd is closed;
>> - kvm_spapr_tce_detach_iommu_group() is called from
>> the KVM_DEV_VFIO_GROUP_DEL ioctl handler in the case vfio-pci hotunplug
>> (will be added in the following patch).
>>
>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>> ---
>>   arch/powerpc/include/asm/kvm_host.h |   8 +++
>>   arch/powerpc/include/asm/kvm_ppc.h  |   6 ++
>>   arch/powerpc/kvm/book3s_64_vio.c    | 108 ++++++++++++++++++++++++++++++++++++
>>   3 files changed, 122 insertions(+)
>>
>> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
>> index 2e7c791..2c5c823 100644
>> --- a/arch/powerpc/include/asm/kvm_host.h
>> +++ b/arch/powerpc/include/asm/kvm_host.h
>> @@ -178,6 +178,13 @@ struct kvmppc_pginfo {
>>   	atomic_t refcnt;
>>   };
>>
>> +struct kvmppc_spapr_tce_group {
>> +	struct list_head next;
>> +	struct rcu_head rcu;
>> +	struct iommu_group *refgrp;/* for reference counting only */
>> +	struct iommu_table *tbl;
>> +};
>> +
>>   struct kvmppc_spapr_tce_table {
>>   	struct list_head list;
>>   	struct kvm *kvm;
>> @@ -186,6 +193,7 @@ struct kvmppc_spapr_tce_table {
>>   	u32 page_shift;
>>   	u64 offset;		/* in pages */
>>   	u64 size;		/* window size in pages */
>> +	struct list_head groups;
>>   	struct page *pages[0];
>>   };
>>
>> diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
>> index 2544eda..d1482dc 100644
>> --- a/arch/powerpc/include/asm/kvm_ppc.h
>> +++ b/arch/powerpc/include/asm/kvm_ppc.h
>> @@ -164,6 +164,12 @@ extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
>>   			struct kvm_memory_slot *memslot, unsigned long porder);
>>   extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu);
>>
>> +extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm,
>> +				unsigned long liobn,
>> +				phys_addr_t start_addr,
>> +				struct iommu_group *grp);
>> +extern void kvm_spapr_tce_detach_iommu_group(struct kvm *kvm,
>> +				struct iommu_group *grp);
>>   extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
>>   				struct kvm_create_spapr_tce_64 *args);
>>   extern struct kvmppc_spapr_tce_table *kvmppc_find_table(
>> diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
>> index 2c2d103..846d16d 100644
>> --- a/arch/powerpc/kvm/book3s_64_vio.c
>> +++ b/arch/powerpc/kvm/book3s_64_vio.c
>> @@ -27,6 +27,7 @@
>>   #include <linux/hugetlb.h>
>>   #include <linux/list.h>
>>   #include <linux/anon_inodes.h>
>> +#include <linux/iommu.h>
>>
>>   #include <asm/tlbflush.h>
>>   #include <asm/kvm_ppc.h>
>> @@ -95,10 +96,18 @@ static void release_spapr_tce_table(struct rcu_head *head)
>>   	struct kvmppc_spapr_tce_table *stt = container_of(head,
>>   			struct kvmppc_spapr_tce_table, rcu);
>>   	unsigned long i, npages = kvmppc_tce_pages(stt->size);
>> +	struct kvmppc_spapr_tce_group *kg;
>>
>>   	for (i = 0; i < npages; i++)
>>   		__free_page(stt->pages[i]);
>>
>> +	while (!list_empty(&stt->groups)) {
>> +		kg = list_first_entry(&stt->groups,
>> +				struct kvmppc_spapr_tce_group, next);
>> +		list_del(&kg->next);
>> +		kfree(kg);
>> +	}
>> +
>>   	kfree(stt);
>>   }
>>
>> @@ -129,9 +138,15 @@ static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma)
>>   static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
>>   {
>>   	struct kvmppc_spapr_tce_table *stt = filp->private_data;
>> +	struct kvmppc_spapr_tce_group *kg;
>>
>>   	list_del_rcu(&stt->list);
>>
>> +	list_for_each_entry_rcu(kg, &stt->groups, next)	{
>> +		iommu_group_put(kg->refgrp);
>> +		kg->refgrp = NULL;
>> +	}
>
> What's the reason for this kind of two-phase deletion?  Dereffing the
> group here, and setting to NULL, then actually removing from the liast above.

Well, this way I have only one RCU-delayed release_spapr_tce_table(). The 
other option would be to call for each @kg:
- list_del(&kg->next);
- call_rcu()

as release_spapr_tce_table() won't be able to delete them - they are not in 
the list anymore.

I suppose I can reuse kvm_spapr_tce_put_group(), this looks inaccurate...



>
>>   	kvm_put_kvm(stt->kvm);
>>
>>   	kvmppc_account_memlimit(
>> @@ -146,6 +161,98 @@ static const struct file_operations kvm_spapr_tce_fops = {
>>   	.release	= kvm_spapr_tce_release,
>>   };
>>
>> +extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm,
>> +				unsigned long liobn,
>> +				phys_addr_t start_addr,
>> +				struct iommu_group *grp)
>> +{
>> +	struct kvmppc_spapr_tce_table *stt = NULL;
>> +	struct iommu_table_group *table_group;
>> +	long i;
>> +	bool found = false;
>> +	struct kvmppc_spapr_tce_group *kg;
>> +	struct iommu_table *tbltmp;
>> +
>> +	/* Check this LIOBN hasn't been previously allocated */
>
> This comment does not appear to be correct.
>
>> +	list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) {
>> +		if (stt->liobn == liobn) {
>> +			if ((stt->offset << stt->page_shift) != start_addr)
>> +				return -EINVAL;
>> +
>> +			found = true;
>> +			break;
>> +		}
>> +	}
>> +
>> +	if (!found)
>> +		return -ENODEV;
>> +
>> +	/* Find IOMMU group and table at @start_addr */
>> +	table_group = iommu_group_get_iommudata(grp);
>> +	if (!table_group)
>> +		return -EFAULT;
>> +
>> +	tbltmp = NULL;
>> +	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
>> +		struct iommu_table *tbl = table_group->tables[i];
>> +
>> +		if (!tbl)
>> +			continue;
>> +
>> +		if ((tbl->it_page_shift == stt->page_shift) &&
>> +				(tbl->it_offset == stt->offset)) {
>> +			tbltmp = tbl;
>> +			break;
>> +		}
>> +	}
>> +	if (!tbltmp)
>> +		return -ENODEV;
>> +
>> +	list_for_each_entry_rcu(kg, &stt->groups, next) {
>> +		if (kg->refgrp == grp)
>> +			return -EBUSY;
>> +	}
>> +
>> +	kg = kzalloc(sizeof(*kg), GFP_KERNEL);
>> +	kg->refgrp = grp;
>> +	kg->tbl = tbltmp;
>> +	list_add_rcu(&kg->next, &stt->groups);
>> +
>> +	return 0;
>> +}
>> +
>> +static void kvm_spapr_tce_put_group(struct rcu_head *head)
>> +{
>> +	struct kvmppc_spapr_tce_group *kg = container_of(head,
>> +			struct kvmppc_spapr_tce_group, rcu);
>> +
>> +	iommu_group_put(kg->refgrp);
>> +	kg->refgrp = NULL;
>> +	kfree(kg);
>> +}
>> +
>> +extern void kvm_spapr_tce_detach_iommu_group(struct kvm *kvm,
>> +				struct iommu_group *grp)
>
> Hrm.  attach takes an explicit liobn, but this one iterates over all
> liobns.  Why the asymmetry?


For attach(), LIOBN is specified in an additional (to VFIO KVM device's 
"add group") ioctl(). There is no need for "detach" ioctl() as we only want 
this detach() to happen when a group is removed from a container, and in 
this case the usual KVM_DEV_VFIO_GROUP_DEL is good enough hint that we need 
to detach LIOBN. Since _DEL does not take LIOBN, here I have a loop.

I'll put this in the commit log next time.


>
>> +{
>> +	struct kvmppc_spapr_tce_table *stt;
>> +	struct iommu_table_group *table_group;
>> +	struct kvmppc_spapr_tce_group *kg;
>> +
>> +	table_group = iommu_group_get_iommudata(grp);
>> +	if (!table_group)
>> +		return;
>> +
>> +	list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) {
>> +		list_for_each_entry_rcu(kg, &stt->groups, next) {
>> +			if (kg->refgrp == grp) {
>> +				list_del_rcu(&kg->next);
>> +				call_rcu(&kg->rcu, kvm_spapr_tce_put_group);
>> +				break;
>> +			}
>> +		}
>> +	}
>> +}
>> +
>>   long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
>>   				   struct kvm_create_spapr_tce_64 *args)
>>   {
>> @@ -181,6 +288,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
>>   	stt->offset = args->offset;
>>   	stt->size = size;
>>   	stt->kvm = kvm;
>> +	INIT_LIST_HEAD_RCU(&stt->groups);
>>
>>   	for (i = 0; i < npages; i++) {
>>   		stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
>


-- 
Alexey

  reply	other threads:[~2016-03-07  9:38 UTC|newest]

Thread overview: 46+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-03-07  3:41 [PATCH kernel 0/9] KVM, PPC, VFIO: Enable in-kernel acceleration Alexey Kardashevskiy
2016-03-07  3:41 ` [PATCH kernel 1/9] KVM: PPC: Reserve KVM_CAP_SPAPR_TCE_VFIO capability number Alexey Kardashevskiy
2016-03-07  4:58   ` David Gibson
2016-03-07  3:41 ` [PATCH kernel 2/9] powerpc/mmu: Add real mode support for IOMMU preregistered memory Alexey Kardashevskiy
2016-03-07  5:30   ` David Gibson
2016-03-07  3:41 ` [PATCH kernel 3/9] KVM: PPC: Use preregistered memory API to access TCE list Alexey Kardashevskiy
2016-03-07  6:00   ` David Gibson
2016-03-08  5:47     ` Alexey Kardashevskiy
2016-03-08  6:30       ` David Gibson
2016-03-09  8:55         ` Alexey Kardashevskiy
2016-03-09 23:46           ` David Gibson
2016-03-10  8:33     ` Paul Mackerras
2016-03-10 23:42       ` David Gibson
2016-03-07  3:41 ` [PATCH kernel 4/9] powerpc/powernv/iommu: Add real mode version of xchg() Alexey Kardashevskiy
2016-03-07  6:05   ` David Gibson
2016-03-07  7:32     ` Alexey Kardashevskiy
2016-03-08  4:50       ` David Gibson
2016-03-10  8:43   ` Paul Mackerras
2016-03-10  8:46   ` Paul Mackerras
2016-03-07  3:41 ` [PATCH kernel 5/9] KVM: PPC: Enable IOMMU_API for KVM_BOOK3S_64 permanently Alexey Kardashevskiy
2016-03-07  3:41 ` [PATCH kernel 6/9] KVM: PPC: Associate IOMMU group with guest view of TCE table Alexey Kardashevskiy
2016-03-07  6:25   ` David Gibson
2016-03-07  9:38     ` Alexey Kardashevskiy [this message]
2016-03-08  4:55       ` David Gibson
2016-03-07  3:41 ` [PATCH kernel 7/9] KVM: PPC: Create a virtual-mode only TCE table handlers Alexey Kardashevskiy
2016-03-08  6:32   ` David Gibson
2016-03-07  3:41 ` [PATCH kernel 8/9] KVM: PPC: Add in-kernel handling for VFIO Alexey Kardashevskiy
2016-03-08 11:08   ` David Gibson
2016-03-09  8:46     ` Alexey Kardashevskiy
2016-03-10  5:18       ` David Gibson
2016-03-11  2:15         ` Alexey Kardashevskiy
2016-03-15  6:00           ` David Gibson
2016-03-07  3:41 ` [PATCH kernel 9/9] KVM: PPC: VFIO device: support SPAPR TCE Alexey Kardashevskiy
2016-03-09  5:45   ` David Gibson
2016-03-09  9:20     ` Alexey Kardashevskiy
2016-03-10  5:21       ` David Gibson
2016-03-10 23:09         ` Alexey Kardashevskiy
2016-03-15  6:04           ` David Gibson
     [not found]             ` <15389a41428.27cb.1ca38dd7e845b990cd13d431eb58563d@ozlabs.ru>
     [not found]               ` <20160321051932.GJ23586@voom.redhat.com>
2016-03-22  0:34                 ` Alexey Kardashevskiy
2016-03-23  3:03                   ` David Gibson
2016-06-09  6:47                     ` Alexey Kardashevskiy
2016-06-10  6:50                       ` David Gibson
2016-06-14  3:30                         ` Alexey Kardashevskiy
2016-06-15  4:43                           ` David Gibson
2016-04-08  9:13     ` Alexey Kardashevskiy
2016-04-11  3:36       ` David Gibson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=56DD4C05.4060109@ozlabs.ru \
    --to=aik@ozlabs.ru \
    --cc=alex.williamson@redhat.com \
    --cc=david@gibson.dropbear.id.au \
    --cc=kvm-ppc@vger.kernel.org \
    --cc=kvm@vger.kernel.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=paulus@samba.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).