All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andrew Morton <akpm@linux-foundation.org>
To: anil.s.keshavamurthy@intel.com
Cc: linux-kernel@vger.kernel.org, ak@suse.de, gregkh@suse.de,
	muli@il.ibm.com, asit.k.mallick@intel.com,
	suresh.b.siddha@intel.com, arjan@linux.intel.com,
	ashok.raj@intel.com, shaohua.li@intel.com, davem@davemloft.net
Subject: Re: [Intel-IOMMU 06/10] Intel IOMMU driver
Date: Thu, 7 Jun 2007 16:57:39 -0700	[thread overview]
Message-ID: <20070607165739.dc437e96.akpm@linux-foundation.org> (raw)
In-Reply-To: <20070606190042.823168000@askeshav-devel.jf.intel.com>

On Wed, 06 Jun 2007 11:57:04 -0700
anil.s.keshavamurthy@intel.com wrote:

> 	Actual intel IOMMU driver. Hardware spec can be found at:
> http://www.intel.com/technology/virtualization
> 
> This driver sets X86_64 'dma_ops', so hook into standard DMA APIs. In this way,
> PCI driver will get virtual DMA address. This change is transparent to PCI
> drivers.
> 
> ...
>  
> +#ifdef CONFIG_DMAR
> +	detect_intel_iommu();
> +#endif
> +
>  #ifdef CONFIG_SWIOTLB
>  	pci_swiotlb_init();
>  #endif
> @@ -314,6 +319,10 @@
>  	calgary_iommu_init();
>  #endif
>  
> +#ifdef CONFIG_DMAR
> +	intel_iommu_init();
> +#endif

We'd prefer that the header file have suitable #ifndef CONFIG_DMAR stubs,
so the ifdefs here become unneeded.

> +/* context entry handling */
> +static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
> +		u8 bus, u8 devfn)
> +{
> +	struct root_entry *root;
> +	struct context_entry *context;
> +	unsigned long phy_addr;
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&iommu->lock, flags);
> +	root = &iommu->root_entry[bus];
> +	if (!(context = get_context_addr_from_root(*root))) {
> +		context = (struct context_entry *)alloc_pgtable_page();
> +		if (!context) {
> +			spin_unlock_irqrestore(&iommu->lock, flags);
> +			return NULL;
> +		}
> +		__iommu_flush_cache(iommu, (void *)context, PAGE_SIZE_4K);
> +		phy_addr = virt_to_phys((void *)context);
> +		set_root_value(*root, phy_addr);
> +		set_root_present(*root);
> +		__iommu_flush_cache(iommu, root, sizeof(*root));
> +	}
> +	spin_unlock_irqrestore(&iommu->lock, flags);
> +	return &context[devfn];
> +}

checkpatch.pl has lots of fun with this patch.

> +/* page table handling */
> +#define LEVEL_STRIDE		(9)
> +#define LEVEL_MASK		(((u64)1 << LEVEL_STRIDE) - 1)
> +#define agaw_to_level(val) ((val) + 2)
> +#define agaw_to_width(val) (30 + val * LEVEL_STRIDE)
> +#define width_to_agaw(w)  ((w - 30)/LEVEL_STRIDE)
> +#define level_to_offset_bits(l) (12 + (l - 1) * LEVEL_STRIDE)
> +#define address_level_offset(addr, level) \
> +	((addr >> level_to_offset_bits(level)) & LEVEL_MASK)
> +#define level_mask(l) (((u64)(-1)) << level_to_offset_bits(l))
> +#define level_size(l) ((u64)1 << level_to_offset_bits(l))
> +#define align_to_level(addr, l) ((addr + level_size(l) - 1) & level_mask(l))

static inlines are better than macros - please consider.

If you're going to stick with macros here then you'll find that many of the
above macro's arguments are insufficiently parenthesised.

> +#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
> +{\
> +	unsigned long start_time = jiffies;\
> +	while (1) {\
> +		sts = op (iommu->reg, offset);\
> +		if (cond)\
> +			break;\
> +		if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT))\
> +			panic("DMAR hardware is malfunctional, please disable IOMMU\n");\
> +		cpu_relax();\
> +	}\
> +}

wow, harsh treatment.

"malfunctioning" might parse better here.

> +static int inline get_alignment(u64 base, unsigned int size)
> +{
> +	int t = 0;
> +	u64 end;
> +
> +	end = base + size - 1;
> +	while (base != end) {
> +		t++;
> +		base >>= 1;
> +		end >>= 1;
> +	}
> +	return t;
> +}

What's this (too large to inline) function doing?  I suspect we might have
helper functions which already do it...  If not, perhasp we should.


> +static int inline iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
> +	u64 addr, unsigned int pages, int non_present_entry_flush)
> +{
> +	unsigned int align;
> +
> +	BUG_ON(addr & (~PAGE_MASK_4K));
> +	BUG_ON(pages == 0);
> +
> +	/* Fallback to domain selective flush if no PSI support */
> +	if (!cap_pgsel_inv(iommu->cap))
> +		return iommu_flush_iotlb_dsi(iommu, did,
> +			non_present_entry_flush);
> +
> +	/*
> +	 * PSI requires page size is 2 ^ x, and the base address is naturally
> +	 * aligned to the size
> +	 */
> +	align = get_alignment(addr >> PAGE_SHIFT_4K, pages);
> +	/* Fallback to domain selective flush if size is too big */
> +	if (align > cap_max_amask_val(iommu->cap))
> +		return iommu_flush_iotlb_dsi(iommu, did,
> +			non_present_entry_flush);
> +
> +	addr >>= PAGE_SHIFT_4K + align;
> +	addr <<= PAGE_SHIFT_4K + align;
> +
> +	return __iommu_flush_iotlb(iommu, did, addr, align,
> +		DMA_TLB_PSI_FLUSH, non_present_entry_flush);
> +}

too large for inlining.

> +static int iommu_enable_translation(struct intel_iommu *iommu)
> +{
> +	u32 sts;
> +	unsigned long flag;

we conventionally use "flags" for this.

> +	spin_lock_irqsave(&iommu->register_lock, flag);
> +	dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd|DMA_GCMD_TE);
> +
> +	/* Make sure hardware complete it */
> +	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, dmar_readl, (sts & DMA_GSTS_TES), sts);
> +
> +	iommu->gcmd |= DMA_GCMD_TE;
> +	spin_unlock_irqrestore(&iommu->register_lock, flag);
> +	return 0;
> +}
>
> ...
>
>
> +#define aligned_size(host_addr, size) \
> +	PAGE_ALIGN_4K((host_addr & (~PAGE_MASK_4K)) + size)

insufficiently parenthesized.  Consider usign a static inline.

> +struct dma_mapping_ops intel_dma_ops = {
> +	.alloc_coherent = intel_alloc_coherent,
> +	.free_coherent = intel_free_coherent,
> +	.map_single = intel_map_single,
> +	.unmap_single = intel_unmap_single,
> +	.map_sg = intel_map_sg,
> +	.unmap_sg = intel_unmap_sg,
> +};

can it be static?

> +void *iommu_rpool_alloc(unsigned int size, gfp_t flag)
> +{
> +	if (size == PAGE_SIZE_4K)
> +		return(void *)get_zeroed_page(flag);
> +	else
> +		return kzalloc(size, flag);
> +}

kmalloc(4k) is pretty efficient and (I think) is guaranteed to return a
page-aligned address.

iow: can we just use kmalloc here?

> +
> +static inline int
> +iommu_devinfo_pool_init(void)
> +{
> +	return init_resource_pool(&iommu_devinfo_pool, MIN_DEVINFO_REQ,
> +		sizeof(struct device_domain_info),
> +		GROW_DEVINFO_REQ, iommu_rpool_alloc,
> +		iommu_rpool_free);
> +}
> +
> +static inline int
> +iommu_iova_pool_init(void)
> +{
> +	return init_resource_pool(&iommu_iova_pool, MIN_IOVA_REQ,
> +		sizeof(struct iova),
> +		GROW_IOVA_REQ, iommu_rpool_alloc, iommu_rpool_free);
> +}
> +
> +static int iommu_init_mempool(void)
> +{
> +	int ret;
> +	ret = iommu_iova_pool_init();
> +	if (ret)
> +		return ret;
> +
> +	ret = iommu_pgtable_pool_init();
> +	if (ret)
> +		goto pgtable_error;
> +
> +	ret = iommu_domain_pool_init();
> +	if (ret)
> +		goto domain_error;
> +
> +	ret = iommu_devinfo_pool_init();
> +	if (!ret)
> +		return ret;
> +
> +	destroy_resource_pool(&iommu_domain_pool);
> +domain_error:
> +	destroy_resource_pool(&iommu_pgtable_pool);
> +pgtable_error:
> +	destroy_resource_pool(&iommu_iova_pool);
> +
> +	return -ENOMEM;
> +}

can be __init

> +static void iommu_exit_mempool(void)
> +{
> +	destroy_resource_pool(&iommu_devinfo_pool);
> +	destroy_resource_pool(&iommu_domain_pool);
> +	destroy_resource_pool(&iommu_pgtable_pool);
> +	destroy_resource_pool(&iommu_iova_pool);
> +}

ditto (unexpectedly)

> +void __init detect_intel_iommu(void)
> +{
> +	if (swiotlb || no_iommu || iommu_detected || dmar_disabled)
> +		return;
> +	if (early_dmar_detect()) {
> +		iommu_detected = 1;
> +	}
> +}
> +
> +static void __init init_no_remapping_devices(void)
> +{
> +	struct dmar_drhd_unit *drhd;
> +
> +	for_each_drhd_unit(drhd)
> +		if (!drhd->include_all) {
> +			int i;
> +			for (i=0; i < drhd->devices_cnt; i++)
> +				if (drhd->devices[i] != NULL)
> +					break;
> +			/* ignore DMAR unit if no pci devices exist */
> +			if (i == drhd->devices_cnt)
> +				drhd->ignored = 1;
> +		}

looks weird - I'd add the extra braces here.

> +	if (dmar_map_gfx)
> +		return;
> +
> +	for_each_drhd_unit(drhd) {
> +		int i;
> +		if (drhd->ignored || drhd->include_all)
> +			continue;
> +
> +		for (i = 0; i < drhd->devices_cnt; i++)
> +			if (drhd->devices[i] && !IS_GFX_DEVICE(drhd->devices[i]))
> +				break;
> +
> +		if (i < drhd->devices_cnt)
> +			continue;
> +
> +		/* bypass IOMMU if it is just for gfx devices */
> +		drhd->ignored = 1;
> +		for (i = 0; i < drhd->devices_cnt; i++) {
> +			if (!drhd->devices[i])
> +				continue;
> +			drhd->devices[i]->sysdata = DUMMY_DEVICE_DOMAIN_INFO;
> +		}
> +	}
> +}
> +
>
> ...
>
> +#define OFFSET_STRIDE		(9)
> +#define dmar_readl(dmar, reg) readl(dmar + reg)
> +#define dmar_writel(dmar, reg, val) writel((val), dmar + reg)

Is this pointless obfuscation?

> +#define dmar_readq(dmar, reg) ({ \
> +		u32 lo, hi; \
> +		lo = dmar_readl(dmar, reg); \
> +		hi = dmar_readl(dmar, reg + 4); \
> +		(((u64) hi) << 32) + lo; })
> +#define dmar_writeq(dmar, reg, val) do {\
> +		dmar_writel(dmar, reg, (u32)(val)); \
> +		dmar_writel(dmar, reg + 4, (u32)((val) >> 32)); \
> +	} while (0)

Do these need to be macros?  If not, a regular C function would be better. 
Not necessarily an inlined one, either.

> +#define VER_MAJOR(v)		(((v) & 0xf0) >> 4)
> +#define VER_MINOR(v)		((v) & 0x0f)

We already have several VER_MAJORs defined in the tree, so adding a new one
is asking for trouble.  Suggest the use of a more specific identifier.

> +#define set_root_value(root, value) \
> +	do {(root).val |= ((value) & PAGE_MASK_4K);} while(0)

Methinks that could be written in C too.

> +struct domain {

hm, "domain" is a pretty generic term.  I think there's a bit of namespace
hogging here..

> +	int	id;			/* domain id */
> +	struct intel_iommu *iommu;	/* back pointer to owning iommu */
> +
> +	struct list_head devices; 	/* all devices' list */
> +	struct iova_domain iovad;	/* iova's that belong to this domain */
> +
> +	struct dma_pte	*pgd;		/* virtual address */
> +	spinlock_t	mapping_lock;	/* page table lock */
> +	int		gaw;		/* max guest address width */
> +	int		agaw;		/* adjusted guest address width, 0 is level 2 30-bit */
> +
> +#define DOMAIN_FLAG_MULTIPLE_DEVICES 1
> +	int		flags;
> +};
> +
>
> ...
>
> +#define for_each_drhd_unit(drhd) \
> +	list_for_each_entry(drhd, &dmar_drhd_units, list)
> +#define for_each_rmrr_units(rmrr) \
> +	list_for_each_entry(rmrr, &dmar_rmrr_units, list)
> +#define begin_for_each_rmrr_device(rmrr, pdev) \
> +	for_each_rmrr_units(rmrr) { \
> +		int _i; \
> +		for (_i = 0; _i < rmrr->devices_cnt; _i++) { \
> +			pdev = rmrr->devices[_i]; \
> +			/* some BIOS lists non-exist devices in DMAR table */\
> +			if (!pdev) \
> +				continue;
> +#define end_for_each_rmrr_device(rmrr, pdev) \
> +		} \
> +	}
> +

Are these used often enough to justify their inclusion?

Would it be possible to implement these as regular C functions which are
passed the address of a callback function?


  reply	other threads:[~2007-06-07 23:58 UTC|newest]

Thread overview: 64+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-06-06 18:56 [Intel-IOMMU 00/10] Intel IOMMU Support anil.s.keshavamurthy
2007-06-06 18:56 ` [Intel-IOMMU 01/10] DMAR detection and parsing logic anil.s.keshavamurthy
2007-06-06 18:57 ` [Intel-IOMMU 02/10] Library routine for pre-allocat pool handling anil.s.keshavamurthy
2007-06-07 23:27   ` Andrew Morton
2007-06-08 18:21     ` Keshavamurthy, Anil S
2007-06-08 19:01       ` Andrew Morton
2007-06-08 20:12         ` Keshavamurthy, Anil S
2007-06-08 20:40           ` Siddha, Suresh B
2007-06-08 20:44           ` Andrew Morton
2007-06-08 22:33           ` Christoph Lameter
2007-06-08 22:49             ` Keshavamurthy, Anil S
2007-06-08 20:43         ` Andreas Kleen
2007-06-08 20:55           ` Andrew Morton
2007-06-08 22:31             ` Andi Kleen
2007-06-08 21:20           ` Keshavamurthy, Anil S
2007-06-08 21:42             ` Andrew Morton
2007-06-08 22:17               ` Arjan van de Ven
2007-06-08 22:18               ` Siddha, Suresh B
2007-06-08 22:38                 ` Christoph Lameter
2007-06-08 22:36           ` Christoph Lameter
2007-06-08 22:56             ` Andi Kleen
2007-06-08 22:59               ` Christoph Lameter
2007-06-09  9:47                 ` Andi Kleen
2007-06-11 20:44                   ` Keshavamurthy, Anil S
2007-06-11 21:14                     ` Andrew Morton
2007-06-11  9:46                       ` Ashok Raj
2007-06-11 22:16                       ` Andi Kleen
2007-06-11 23:28                         ` Christoph Lameter
2007-06-11 23:52                       ` Keshavamurthy, Anil S
2007-06-12  0:30                         ` Andrew Morton
2007-06-12  1:10                           ` Arjan van de Ven
2007-06-12  1:30                             ` Christoph Lameter
2007-06-12  1:35                             ` Andrew Morton
2007-06-12  1:55                               ` Arjan van de Ven
2007-06-12  2:08                                 ` Siddha, Suresh B
2007-06-13 18:40                                 ` Matt Mackall
2007-06-13 19:04                                   ` Andi Kleen
2007-06-12  0:38                         ` Christoph Lameter
2007-06-11 21:29                     ` Christoph Lameter
2007-06-11 21:40                       ` Keshavamurthy, Anil S
2007-06-11 22:25                     ` Andi Kleen
2007-06-11 11:29                       ` Ashok Raj
2007-06-11 23:15                       ` Keshavamurthy, Anil S
2007-06-08 22:32       ` Christoph Lameter
2007-06-08 22:45         ` Keshavamurthy, Anil S
2007-06-08 22:55           ` Christoph Lameter
2007-06-10 16:38             ` Arjan van de Ven
2007-06-11 16:10               ` Christoph Lameter
2007-06-06 18:57 ` [Intel-IOMMU 03/10] PCI generic helper function anil.s.keshavamurthy
2007-06-06 18:57 ` [Intel-IOMMU 04/10] clflush_cache_range now takes size param anil.s.keshavamurthy
2007-06-06 18:57 ` [Intel-IOMMU 05/10] IOVA allocation and management routines anil.s.keshavamurthy
2007-06-07 23:34   ` Andrew Morton
2007-06-08 18:25     ` Keshavamurthy, Anil S
2007-06-06 18:57 ` [Intel-IOMMU 06/10] Intel IOMMU driver anil.s.keshavamurthy
2007-06-07 23:57   ` Andrew Morton [this message]
2007-06-08 22:30     ` Christoph Lameter
2007-06-13 20:20     ` Keshavamurthy, Anil S
2007-06-06 18:57 ` [Intel-IOMMU 07/10] Intel iommu cmdline option - forcedac anil.s.keshavamurthy
2007-06-07 23:58   ` Andrew Morton
2007-06-06 18:57 ` [Intel-IOMMU 08/10] DMAR fault handling support anil.s.keshavamurthy
2007-06-06 18:57 ` [Intel-IOMMU 09/10] Iommu Gfx workaround anil.s.keshavamurthy
2007-06-08  0:01   ` Andrew Morton
2007-06-06 18:57 ` [Intel-IOMMU 10/10] Iommu floppy workaround anil.s.keshavamurthy
  -- strict thread matches above, loose matches on Subject: below --
2007-06-04 21:02 [Intel-IOMMU 00/10] Intel IOMMU support anil.s.keshavamurthy
2007-06-04 21:02 ` [Intel-IOMMU 06/10] Intel IOMMU driver anil.s.keshavamurthy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070607165739.dc437e96.akpm@linux-foundation.org \
    --to=akpm@linux-foundation.org \
    --cc=ak@suse.de \
    --cc=anil.s.keshavamurthy@intel.com \
    --cc=arjan@linux.intel.com \
    --cc=ashok.raj@intel.com \
    --cc=asit.k.mallick@intel.com \
    --cc=davem@davemloft.net \
    --cc=gregkh@suse.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=muli@il.ibm.com \
    --cc=shaohua.li@intel.com \
    --cc=suresh.b.siddha@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.