Re: [PATCH 4/4] vfio powerpc: added real mode support

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Paul Mackerras <paulus@samba.org>
To: aik@ozlabs.ru
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>,
	Alexander Graf <agraf@suse.de>,
	Michael Ellerman <michael@ellerman.id.au>,
	linuxppc-dev@lists.ozlabs.org, linux-kernel@vger.kernel.org,
	kvm-ppc@vger.kernel.org, kvm@vger.kernel.org,
	David Gibson <david@gibson.dropbear.id.au>
Subject: Re: [PATCH 4/4] vfio powerpc: added real mode support
Date: Fri, 15 Feb 2013 03:54:00 +0000	[thread overview]
Message-ID: <20130215035400.GD25015@drongo> (raw)
In-Reply-To: <5118e071.22ca320a.1f08.ffffe2f4@mx.google.com>

On Mon, Feb 11, 2013 at 11:12:43PM +1100, aik@ozlabs.ru wrote:
> From: Alexey Kardashevskiy <aik@ozlabs.ru>
> 
> The patch allows the host kernel to handle H_PUT_TCE request
> without involving QEMU in it what should save time on switching
> from the kernel to QEMU and back.
> 
> The patch adds an IOMMU ID parameter into the KVM_CAP_SPAPR_TCE ioctl,
> QEMU needs to be fixed to support that.
> 
> At the moment H_PUT_TCE is processed in the virtual mode as the page
> to be mapped may not be present in the RAM so paging may be involved as
> it can be done from the virtual mode only.
> 
> Tests show that this patch increases tranmission speed from 220MB/s
> to 750..1020MB/s on 10Gb network (Chelsea CXGB3 10Gb ethernet card).
[snip]
> diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
> index b4fdabc..acb9cdc 100644
> --- a/arch/powerpc/kernel/iommu.c
> +++ b/arch/powerpc/kernel/iommu.c
> @@ -47,6 +47,8 @@
>  #include <asm/fadump.h>
>  #include <asm/vio.h>
>  #include <asm/tce.h>
> +#include <asm/kvm_book3s_64.h>
> +#include <asm/page.h>
>  
>  #define DBG(...)
>  
> @@ -727,6 +729,7 @@ void iommu_register_group(struct iommu_table * tbl,
>  		return;
>  	}
>  	tbl->it_group = grp;
> +	INIT_LIST_HEAD(&tbl->it_hugepages);
>  	iommu_group_set_iommudata(grp, tbl, group_release);
>  	iommu_group_set_name(grp, kasprintf(GFP_KERNEL, "domain%d-pe%lx",
>  			domain_number, pe_num));
> @@ -906,6 +909,83 @@ void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
>  {
>  }
>  
> +/*
> + * The KVM guest can be backed with 16MB pages (qemu switch
> + * -mem-path /var/lib/hugetlbfs/global/pagesize-16MB/).
> + * In this case, we cannot do page counting from the real mode
> + * as the compound pages are used - they are linked in a list
> + * with pointers as virtual addresses which are inaccessible
> + * in real mode.
> + *
> + * The code below keeps a 16MB pages list and uses page struct
> + * in real mode if it is already locked in RAM and inserted into
> + * the list or switches to the virtual mode where it can be
> + * handled in a usual manner.
> + */
> +struct iommu_kvmppc_hugepages {
> +	struct list_head list;
> +	pte_t pte;		/* Huge page PTE */
> +	unsigned long pa;	/* Base phys address used as a real TCE */
> +	struct page *page;	/* page struct of the very first subpage */
> +	unsigned long size;	/* Huge page size (always 16MB at the moment) */
> +	bool dirty;		/* Dirty bit */
> +};
> +
> +static struct iommu_kvmppc_hugepages *find_hp_by_pte(struct iommu_table *tbl,
> +		pte_t pte)
> +{
> +	struct iommu_kvmppc_hugepages *hp;
> +
> +	list_for_each_entry(hp, &tbl->it_hugepages, list) {
> +		if (hp->pte = pte)
> +			return hp;
> +	}
> +
> +	return NULL;
> +}
> +
> +static struct iommu_kvmppc_hugepages *find_hp_by_pa(struct iommu_table *tbl,
> +		unsigned long pa)
> +{
> +	struct iommu_kvmppc_hugepages *hp;
> +
> +	list_for_each_entry(hp, &tbl->it_hugepages, list) {
> +		if ((hp->pa <= pa) && (pa < hp->pa + hp->size))
> +			return hp;
> +	}
> +
> +	return NULL;
> +}
> +
> +static struct iommu_kvmppc_hugepages *add_hp(struct iommu_table *tbl,
> +		pte_t pte, unsigned long va, unsigned long pg_size)
> +{
> +	int ret;
> +	struct iommu_kvmppc_hugepages *hp;
> +
> +	hp = kzalloc(sizeof(*hp), GFP_KERNEL);
> +	if (!hp)
> +		return NULL;
> +
> +	hp->pte = pte;
> +	va = va & ~(pg_size - 1);
> +	ret = get_user_pages_fast(va, 1, true/*write*/, &hp->page);
> +	if ((ret != 1) || !hp->page) {
> +		kfree(hp);
> +		return NULL;
> +	}
> +#if defined(HASHED_PAGE_VIRTUAL) || defined(WANT_PAGE_VIRTUAL)
> +#error TODO: fix to avoid page_address() here
> +#endif
> +	hp->pa = __pa((unsigned long) page_address(hp->page));
> +
> +	hp->size = pg_size;
> +
> +	list_add(&hp->list, &tbl->it_hugepages);
> +
> +	return hp;
> +}

I don't see any locking here.  What stops one cpu doing add_hp() from
racing with another doing find_hp_by_pte() or find_hp_by_pa()?

[snip]
> @@ -1021,6 +1123,24 @@ long iommu_clear_tce_user_mode(struct iommu_table *tbl, unsigned long ioba,
>  }
>  EXPORT_SYMBOL_GPL(iommu_clear_tce_user_mode);
>  
> +long iommu_clear_tce_real_mode(struct iommu_table *tbl, unsigned long ioba,
> +		unsigned long tce_value, unsigned long npages)
> +{
> +	long ret;
> +	unsigned long entry = ioba >> IOMMU_PAGE_SHIFT;
> +
> +	ret = tce_clear_param_check(tbl, ioba, tce_value, npages);
> +	if (!ret)
> +		ret = clear_tce(tbl, true, entry, npages);
> +
> +	if (ret < 0)
> +		pr_err("iommu_tce: %s failed ioba=%lx, tce_value=%lx ret=%ld\n",
> +				__func__, ioba, tce_value, ret);

Better to avoid printk in real mode if at all possible, particularly
if they're guest-triggerable.

[snip]
> @@ -195,15 +225,43 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
>  	if (!stt)
>  		return H_TOO_HARD;
>  
> +	if (stt->virtmode_only)
> +		return H_TOO_HARD;
> +
>  	tces = (void *) get_real_address(vcpu, tce_list, false, NULL, NULL);
>  	if (!tces)
>  		return H_TOO_HARD;
>  
>  	/* Emulated IO */
> -	for (i = 0; (i < npages) && !ret; ++i, ioba += IOMMU_PAGE_SIZE)
> -		ret = emulated_h_put_tce(stt, ioba, tces[i]);
> +	if (!stt->tbl) {
> +		for (i = 0; (i < npages) && !ret; ++i, ioba += IOMMU_PAGE_SIZE)
> +			ret = emulated_h_put_tce(stt, ioba, tces[i]);
> +
> +		return ret;
> +	}
> +
> +	/* VFIO IOMMU */
> +	for (i = 0; (i < npages) && !ret; ++i, ioba += IOMMU_PAGE_SIZE) {
> +		unsigned long hpa, pg_size = 0;
> +		pte_t pte = 0;
> +
> +		hpa = get_real_address(vcpu, tces[i], tces[i] & TCE_PCI_WRITE,
> +				&pte, &pg_size);
> +		if (!hpa)
> +			return H_TOO_HARD;
> +
> +		ret = iommu_put_tce_real_mode(stt->tbl,
> +				ioba, hpa, pte, pg_size);

If we get a failure part-way through, should we go back and remove the
entries we put in?

[snip]
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 26e2b271..3727ea6 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -863,6 +863,7 @@ struct kvm_s390_ucas_mapping {
>  #define KVM_ALLOCATE_RMA	  _IOR(KVMIO,  0xa9, struct kvm_allocate_rma)
>  /* Available with KVM_CAP_PPC_HTAB_FD */
>  #define KVM_PPC_GET_HTAB_FD	  _IOW(KVMIO,  0xaa, struct kvm_get_htab_fd)
> +#define KVM_CREATE_SPAPR_TCE_IOMMU _IOW(KVMIO,  0xaf, struct kvm_create_spapr_tce_iommu)

This needs an entry in Documentation/virtual/kvm/api.txt.

Paul.

WARNING: multiple messages have this Message-ID (diff)

From: Paul Mackerras <paulus@samba.org>
To: aik@ozlabs.ru
Cc: kvm@vger.kernel.org, Alexander Graf <agraf@suse.de>,
	kvm-ppc@vger.kernel.org, linux-kernel@vger.kernel.org,
	linuxppc-dev@lists.ozlabs.org,
	David Gibson <david@gibson.dropbear.id.au>
Subject: Re: [PATCH 4/4] vfio powerpc: added real mode support
Date: Fri, 15 Feb 2013 14:54:00 +1100	[thread overview]
Message-ID: <20130215035400.GD25015@drongo> (raw)
In-Reply-To: <5118e071.22ca320a.1f08.ffffe2f4@mx.google.com>

On Mon, Feb 11, 2013 at 11:12:43PM +1100, aik@ozlabs.ru wrote:
> From: Alexey Kardashevskiy <aik@ozlabs.ru>
> 
> The patch allows the host kernel to handle H_PUT_TCE request
> without involving QEMU in it what should save time on switching
> from the kernel to QEMU and back.
> 
> The patch adds an IOMMU ID parameter into the KVM_CAP_SPAPR_TCE ioctl,
> QEMU needs to be fixed to support that.
> 
> At the moment H_PUT_TCE is processed in the virtual mode as the page
> to be mapped may not be present in the RAM so paging may be involved as
> it can be done from the virtual mode only.
> 
> Tests show that this patch increases tranmission speed from 220MB/s
> to 750..1020MB/s on 10Gb network (Chelsea CXGB3 10Gb ethernet card).
[snip]
> diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
> index b4fdabc..acb9cdc 100644
> --- a/arch/powerpc/kernel/iommu.c
> +++ b/arch/powerpc/kernel/iommu.c
> @@ -47,6 +47,8 @@
>  #include <asm/fadump.h>
>  #include <asm/vio.h>
>  #include <asm/tce.h>
> +#include <asm/kvm_book3s_64.h>
> +#include <asm/page.h>
>  
>  #define DBG(...)
>  
> @@ -727,6 +729,7 @@ void iommu_register_group(struct iommu_table * tbl,
>  		return;
>  	}
>  	tbl->it_group = grp;
> +	INIT_LIST_HEAD(&tbl->it_hugepages);
>  	iommu_group_set_iommudata(grp, tbl, group_release);
>  	iommu_group_set_name(grp, kasprintf(GFP_KERNEL, "domain%d-pe%lx",
>  			domain_number, pe_num));
> @@ -906,6 +909,83 @@ void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
>  {
>  }
>  
> +/*
> + * The KVM guest can be backed with 16MB pages (qemu switch
> + * -mem-path /var/lib/hugetlbfs/global/pagesize-16MB/).
> + * In this case, we cannot do page counting from the real mode
> + * as the compound pages are used - they are linked in a list
> + * with pointers as virtual addresses which are inaccessible
> + * in real mode.
> + *
> + * The code below keeps a 16MB pages list and uses page struct
> + * in real mode if it is already locked in RAM and inserted into
> + * the list or switches to the virtual mode where it can be
> + * handled in a usual manner.
> + */
> +struct iommu_kvmppc_hugepages {
> +	struct list_head list;
> +	pte_t pte;		/* Huge page PTE */
> +	unsigned long pa;	/* Base phys address used as a real TCE */
> +	struct page *page;	/* page struct of the very first subpage */
> +	unsigned long size;	/* Huge page size (always 16MB at the moment) */
> +	bool dirty;		/* Dirty bit */
> +};
> +
> +static struct iommu_kvmppc_hugepages *find_hp_by_pte(struct iommu_table *tbl,
> +		pte_t pte)
> +{
> +	struct iommu_kvmppc_hugepages *hp;
> +
> +	list_for_each_entry(hp, &tbl->it_hugepages, list) {
> +		if (hp->pte == pte)
> +			return hp;
> +	}
> +
> +	return NULL;
> +}
> +
> +static struct iommu_kvmppc_hugepages *find_hp_by_pa(struct iommu_table *tbl,
> +		unsigned long pa)
> +{
> +	struct iommu_kvmppc_hugepages *hp;
> +
> +	list_for_each_entry(hp, &tbl->it_hugepages, list) {
> +		if ((hp->pa <= pa) && (pa < hp->pa + hp->size))
> +			return hp;
> +	}
> +
> +	return NULL;
> +}
> +
> +static struct iommu_kvmppc_hugepages *add_hp(struct iommu_table *tbl,
> +		pte_t pte, unsigned long va, unsigned long pg_size)
> +{
> +	int ret;
> +	struct iommu_kvmppc_hugepages *hp;
> +
> +	hp = kzalloc(sizeof(*hp), GFP_KERNEL);
> +	if (!hp)
> +		return NULL;
> +
> +	hp->pte = pte;
> +	va = va & ~(pg_size - 1);
> +	ret = get_user_pages_fast(va, 1, true/*write*/, &hp->page);
> +	if ((ret != 1) || !hp->page) {
> +		kfree(hp);
> +		return NULL;
> +	}
> +#if defined(HASHED_PAGE_VIRTUAL) || defined(WANT_PAGE_VIRTUAL)
> +#error TODO: fix to avoid page_address() here
> +#endif
> +	hp->pa = __pa((unsigned long) page_address(hp->page));
> +
> +	hp->size = pg_size;
> +
> +	list_add(&hp->list, &tbl->it_hugepages);
> +
> +	return hp;
> +}

I don't see any locking here.  What stops one cpu doing add_hp() from
racing with another doing find_hp_by_pte() or find_hp_by_pa()?

[snip]
> @@ -1021,6 +1123,24 @@ long iommu_clear_tce_user_mode(struct iommu_table *tbl, unsigned long ioba,
>  }
>  EXPORT_SYMBOL_GPL(iommu_clear_tce_user_mode);
>  
> +long iommu_clear_tce_real_mode(struct iommu_table *tbl, unsigned long ioba,
> +		unsigned long tce_value, unsigned long npages)
> +{
> +	long ret;
> +	unsigned long entry = ioba >> IOMMU_PAGE_SHIFT;
> +
> +	ret = tce_clear_param_check(tbl, ioba, tce_value, npages);
> +	if (!ret)
> +		ret = clear_tce(tbl, true, entry, npages);
> +
> +	if (ret < 0)
> +		pr_err("iommu_tce: %s failed ioba=%lx, tce_value=%lx ret=%ld\n",
> +				__func__, ioba, tce_value, ret);

Better to avoid printk in real mode if at all possible, particularly
if they're guest-triggerable.

[snip]
> @@ -195,15 +225,43 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
>  	if (!stt)
>  		return H_TOO_HARD;
>  
> +	if (stt->virtmode_only)
> +		return H_TOO_HARD;
> +
>  	tces = (void *) get_real_address(vcpu, tce_list, false, NULL, NULL);
>  	if (!tces)
>  		return H_TOO_HARD;
>  
>  	/* Emulated IO */
> -	for (i = 0; (i < npages) && !ret; ++i, ioba += IOMMU_PAGE_SIZE)
> -		ret = emulated_h_put_tce(stt, ioba, tces[i]);
> +	if (!stt->tbl) {
> +		for (i = 0; (i < npages) && !ret; ++i, ioba += IOMMU_PAGE_SIZE)
> +			ret = emulated_h_put_tce(stt, ioba, tces[i]);
> +
> +		return ret;
> +	}
> +
> +	/* VFIO IOMMU */
> +	for (i = 0; (i < npages) && !ret; ++i, ioba += IOMMU_PAGE_SIZE) {
> +		unsigned long hpa, pg_size = 0;
> +		pte_t pte = 0;
> +
> +		hpa = get_real_address(vcpu, tces[i], tces[i] & TCE_PCI_WRITE,
> +				&pte, &pg_size);
> +		if (!hpa)
> +			return H_TOO_HARD;
> +
> +		ret = iommu_put_tce_real_mode(stt->tbl,
> +				ioba, hpa, pte, pg_size);

If we get a failure part-way through, should we go back and remove the
entries we put in?

[snip]
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 26e2b271..3727ea6 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -863,6 +863,7 @@ struct kvm_s390_ucas_mapping {
>  #define KVM_ALLOCATE_RMA	  _IOR(KVMIO,  0xa9, struct kvm_allocate_rma)
>  /* Available with KVM_CAP_PPC_HTAB_FD */
>  #define KVM_PPC_GET_HTAB_FD	  _IOW(KVMIO,  0xaa, struct kvm_get_htab_fd)
> +#define KVM_CREATE_SPAPR_TCE_IOMMU _IOW(KVMIO,  0xaf, struct kvm_create_spapr_tce_iommu)

This needs an entry in Documentation/virtual/kvm/api.txt.

Paul.

WARNING: multiple messages have this Message-ID (diff)

From: Paul Mackerras <paulus@samba.org>
To: aik@ozlabs.ru
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>,
	Alexander Graf <agraf@suse.de>,
	Michael Ellerman <michael@ellerman.id.au>,
	linuxppc-dev@lists.ozlabs.org, linux-kernel@vger.kernel.org,
	kvm-ppc@vger.kernel.org, kvm@vger.kernel.org,
	David Gibson <david@gibson.dropbear.id.au>
Subject: Re: [PATCH 4/4] vfio powerpc: added real mode support
Date: Fri, 15 Feb 2013 14:54:00 +1100	[thread overview]
Message-ID: <20130215035400.GD25015@drongo> (raw)
In-Reply-To: <5118e071.22ca320a.1f08.ffffe2f4@mx.google.com>

On Mon, Feb 11, 2013 at 11:12:43PM +1100, aik@ozlabs.ru wrote:
> From: Alexey Kardashevskiy <aik@ozlabs.ru>
> 
> The patch allows the host kernel to handle H_PUT_TCE request
> without involving QEMU in it what should save time on switching
> from the kernel to QEMU and back.
> 
> The patch adds an IOMMU ID parameter into the KVM_CAP_SPAPR_TCE ioctl,
> QEMU needs to be fixed to support that.
> 
> At the moment H_PUT_TCE is processed in the virtual mode as the page
> to be mapped may not be present in the RAM so paging may be involved as
> it can be done from the virtual mode only.
> 
> Tests show that this patch increases tranmission speed from 220MB/s
> to 750..1020MB/s on 10Gb network (Chelsea CXGB3 10Gb ethernet card).
[snip]
> diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
> index b4fdabc..acb9cdc 100644
> --- a/arch/powerpc/kernel/iommu.c
> +++ b/arch/powerpc/kernel/iommu.c
> @@ -47,6 +47,8 @@
>  #include <asm/fadump.h>
>  #include <asm/vio.h>
>  #include <asm/tce.h>
> +#include <asm/kvm_book3s_64.h>
> +#include <asm/page.h>
>  
>  #define DBG(...)
>  
> @@ -727,6 +729,7 @@ void iommu_register_group(struct iommu_table * tbl,
>  		return;
>  	}
>  	tbl->it_group = grp;
> +	INIT_LIST_HEAD(&tbl->it_hugepages);
>  	iommu_group_set_iommudata(grp, tbl, group_release);
>  	iommu_group_set_name(grp, kasprintf(GFP_KERNEL, "domain%d-pe%lx",
>  			domain_number, pe_num));
> @@ -906,6 +909,83 @@ void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
>  {
>  }
>  
> +/*
> + * The KVM guest can be backed with 16MB pages (qemu switch
> + * -mem-path /var/lib/hugetlbfs/global/pagesize-16MB/).
> + * In this case, we cannot do page counting from the real mode
> + * as the compound pages are used - they are linked in a list
> + * with pointers as virtual addresses which are inaccessible
> + * in real mode.
> + *
> + * The code below keeps a 16MB pages list and uses page struct
> + * in real mode if it is already locked in RAM and inserted into
> + * the list or switches to the virtual mode where it can be
> + * handled in a usual manner.
> + */
> +struct iommu_kvmppc_hugepages {
> +	struct list_head list;
> +	pte_t pte;		/* Huge page PTE */
> +	unsigned long pa;	/* Base phys address used as a real TCE */
> +	struct page *page;	/* page struct of the very first subpage */
> +	unsigned long size;	/* Huge page size (always 16MB at the moment) */
> +	bool dirty;		/* Dirty bit */
> +};
> +
> +static struct iommu_kvmppc_hugepages *find_hp_by_pte(struct iommu_table *tbl,
> +		pte_t pte)
> +{
> +	struct iommu_kvmppc_hugepages *hp;
> +
> +	list_for_each_entry(hp, &tbl->it_hugepages, list) {
> +		if (hp->pte == pte)
> +			return hp;
> +	}
> +
> +	return NULL;
> +}
> +
> +static struct iommu_kvmppc_hugepages *find_hp_by_pa(struct iommu_table *tbl,
> +		unsigned long pa)
> +{
> +	struct iommu_kvmppc_hugepages *hp;
> +
> +	list_for_each_entry(hp, &tbl->it_hugepages, list) {
> +		if ((hp->pa <= pa) && (pa < hp->pa + hp->size))
> +			return hp;
> +	}
> +
> +	return NULL;
> +}
> +
> +static struct iommu_kvmppc_hugepages *add_hp(struct iommu_table *tbl,
> +		pte_t pte, unsigned long va, unsigned long pg_size)
> +{
> +	int ret;
> +	struct iommu_kvmppc_hugepages *hp;
> +
> +	hp = kzalloc(sizeof(*hp), GFP_KERNEL);
> +	if (!hp)
> +		return NULL;
> +
> +	hp->pte = pte;
> +	va = va & ~(pg_size - 1);
> +	ret = get_user_pages_fast(va, 1, true/*write*/, &hp->page);
> +	if ((ret != 1) || !hp->page) {
> +		kfree(hp);
> +		return NULL;
> +	}
> +#if defined(HASHED_PAGE_VIRTUAL) || defined(WANT_PAGE_VIRTUAL)
> +#error TODO: fix to avoid page_address() here
> +#endif
> +	hp->pa = __pa((unsigned long) page_address(hp->page));
> +
> +	hp->size = pg_size;
> +
> +	list_add(&hp->list, &tbl->it_hugepages);
> +
> +	return hp;
> +}

I don't see any locking here.  What stops one cpu doing add_hp() from
racing with another doing find_hp_by_pte() or find_hp_by_pa()?

[snip]
> @@ -1021,6 +1123,24 @@ long iommu_clear_tce_user_mode(struct iommu_table *tbl, unsigned long ioba,
>  }
>  EXPORT_SYMBOL_GPL(iommu_clear_tce_user_mode);
>  
> +long iommu_clear_tce_real_mode(struct iommu_table *tbl, unsigned long ioba,
> +		unsigned long tce_value, unsigned long npages)
> +{
> +	long ret;
> +	unsigned long entry = ioba >> IOMMU_PAGE_SHIFT;
> +
> +	ret = tce_clear_param_check(tbl, ioba, tce_value, npages);
> +	if (!ret)
> +		ret = clear_tce(tbl, true, entry, npages);
> +
> +	if (ret < 0)
> +		pr_err("iommu_tce: %s failed ioba=%lx, tce_value=%lx ret=%ld\n",
> +				__func__, ioba, tce_value, ret);

Better to avoid printk in real mode if at all possible, particularly
if they're guest-triggerable.

[snip]
> @@ -195,15 +225,43 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
>  	if (!stt)
>  		return H_TOO_HARD;
>  
> +	if (stt->virtmode_only)
> +		return H_TOO_HARD;
> +
>  	tces = (void *) get_real_address(vcpu, tce_list, false, NULL, NULL);
>  	if (!tces)
>  		return H_TOO_HARD;
>  
>  	/* Emulated IO */
> -	for (i = 0; (i < npages) && !ret; ++i, ioba += IOMMU_PAGE_SIZE)
> -		ret = emulated_h_put_tce(stt, ioba, tces[i]);
> +	if (!stt->tbl) {
> +		for (i = 0; (i < npages) && !ret; ++i, ioba += IOMMU_PAGE_SIZE)
> +			ret = emulated_h_put_tce(stt, ioba, tces[i]);
> +
> +		return ret;
> +	}
> +
> +	/* VFIO IOMMU */
> +	for (i = 0; (i < npages) && !ret; ++i, ioba += IOMMU_PAGE_SIZE) {
> +		unsigned long hpa, pg_size = 0;
> +		pte_t pte = 0;
> +
> +		hpa = get_real_address(vcpu, tces[i], tces[i] & TCE_PCI_WRITE,
> +				&pte, &pg_size);
> +		if (!hpa)
> +			return H_TOO_HARD;
> +
> +		ret = iommu_put_tce_real_mode(stt->tbl,
> +				ioba, hpa, pte, pg_size);

If we get a failure part-way through, should we go back and remove the
entries we put in?

[snip]
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 26e2b271..3727ea6 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -863,6 +863,7 @@ struct kvm_s390_ucas_mapping {
>  #define KVM_ALLOCATE_RMA	  _IOR(KVMIO,  0xa9, struct kvm_allocate_rma)
>  /* Available with KVM_CAP_PPC_HTAB_FD */
>  #define KVM_PPC_GET_HTAB_FD	  _IOW(KVMIO,  0xaa, struct kvm_get_htab_fd)
> +#define KVM_CREATE_SPAPR_TCE_IOMMU _IOW(KVMIO,  0xaf, struct kvm_create_spapr_tce_iommu)

This needs an entry in Documentation/virtual/kvm/api.txt.

Paul.

next prev parent reply	other threads:[~2013-02-15  3:54 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <1360584763-21988-1-git-send-email-a>
2013-02-11 12:12 ` [PATCH 1/4] powerpc: lookup_linux_pte has been made public aik
2013-02-11 12:12   ` aik
2013-02-11 12:12   ` aik
2013-02-15  3:13   ` Paul Mackerras
2013-02-15  3:13     ` Paul Mackerras
2013-02-15  3:13     ` Paul Mackerras
2013-02-11 12:12 ` [PATCH 2/4] powerpc kvm: added multiple TCEs requests support aik
2013-02-11 12:12   ` aik
2013-02-11 12:12   ` aik
2013-02-15  3:24   ` Paul Mackerras
2013-02-15  3:24     ` Paul Mackerras
2013-02-15  3:24     ` Paul Mackerras
2013-02-18  8:14     ` Alexey Kardashevskiy
2013-02-18  8:14       ` Alexey Kardashevskiy
2013-02-18  8:14       ` Alexey Kardashevskiy
2013-02-11 12:12 ` [PATCH 3/4] powerpc: preparing to support real mode optimization aik
2013-02-11 12:12   ` aik
2013-02-11 12:12   ` aik
2013-02-15  3:37   ` Paul Mackerras
2013-02-15  3:37     ` Paul Mackerras
2013-02-15  3:37     ` Paul Mackerras
2013-02-11 12:12 ` [PATCH 4/4] vfio powerpc: added real mode support aik
2013-02-11 12:12   ` aik
2013-02-11 12:12   ` aik
2013-02-15  3:54   ` Paul Mackerras [this message]
2013-02-15  3:54     ` Paul Mackerras
2013-02-15  3:54     ` Paul Mackerras

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20130215035400.GD25015@drongo \
    --to=paulus@samba.org \
    --cc=agraf@suse.de \
    --cc=aik@ozlabs.ru \
    --cc=benh@kernel.crashing.org \
    --cc=david@gibson.dropbear.id.au \
    --cc=kvm-ppc@vger.kernel.org \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=michael@ellerman.id.au \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.