From: Andrew Morton <akpm@linux-foundation.org>
To: anil.s.keshavamurthy@intel.com
Cc: linux-kernel@vger.kernel.org, ak@suse.de, gregkh@suse.de,
muli@il.ibm.com, asit.k.mallick@intel.com,
suresh.b.siddha@intel.com, arjan@linux.intel.com,
ashok.raj@intel.com, shaohua.li@intel.com, davem@davemloft.net
Subject: Re: [Intel-IOMMU 06/10] Intel IOMMU driver
Date: Thu, 7 Jun 2007 16:57:39 -0700 [thread overview]
Message-ID: <20070607165739.dc437e96.akpm@linux-foundation.org> (raw)
In-Reply-To: <20070606190042.823168000@askeshav-devel.jf.intel.com>
On Wed, 06 Jun 2007 11:57:04 -0700
anil.s.keshavamurthy@intel.com wrote:
> Actual intel IOMMU driver. Hardware spec can be found at:
> http://www.intel.com/technology/virtualization
>
> This driver sets X86_64 'dma_ops', so hook into standard DMA APIs. In this way,
> PCI driver will get virtual DMA address. This change is transparent to PCI
> drivers.
>
> ...
>
> +#ifdef CONFIG_DMAR
> + detect_intel_iommu();
> +#endif
> +
> #ifdef CONFIG_SWIOTLB
> pci_swiotlb_init();
> #endif
> @@ -314,6 +319,10 @@
> calgary_iommu_init();
> #endif
>
> +#ifdef CONFIG_DMAR
> + intel_iommu_init();
> +#endif
We'd prefer that the header file have suitable #ifndef CONFIG_DMAR stubs,
so the ifdefs here become unneeded.
> +/* context entry handling */
> +static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
> + u8 bus, u8 devfn)
> +{
> + struct root_entry *root;
> + struct context_entry *context;
> + unsigned long phy_addr;
> + unsigned long flags;
> +
> + spin_lock_irqsave(&iommu->lock, flags);
> + root = &iommu->root_entry[bus];
> + if (!(context = get_context_addr_from_root(*root))) {
> + context = (struct context_entry *)alloc_pgtable_page();
> + if (!context) {
> + spin_unlock_irqrestore(&iommu->lock, flags);
> + return NULL;
> + }
> + __iommu_flush_cache(iommu, (void *)context, PAGE_SIZE_4K);
> + phy_addr = virt_to_phys((void *)context);
> + set_root_value(*root, phy_addr);
> + set_root_present(*root);
> + __iommu_flush_cache(iommu, root, sizeof(*root));
> + }
> + spin_unlock_irqrestore(&iommu->lock, flags);
> + return &context[devfn];
> +}
checkpatch.pl has lots of fun with this patch.
> +/* page table handling */
> +#define LEVEL_STRIDE (9)
> +#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
> +#define agaw_to_level(val) ((val) + 2)
> +#define agaw_to_width(val) (30 + val * LEVEL_STRIDE)
> +#define width_to_agaw(w) ((w - 30)/LEVEL_STRIDE)
> +#define level_to_offset_bits(l) (12 + (l - 1) * LEVEL_STRIDE)
> +#define address_level_offset(addr, level) \
> + ((addr >> level_to_offset_bits(level)) & LEVEL_MASK)
> +#define level_mask(l) (((u64)(-1)) << level_to_offset_bits(l))
> +#define level_size(l) ((u64)1 << level_to_offset_bits(l))
> +#define align_to_level(addr, l) ((addr + level_size(l) - 1) & level_mask(l))
static inlines are better than macros - please consider.
If you're going to stick with macros here then you'll find that many of the
above macro's arguments are insufficiently parenthesised.
> +#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
> +{\
> + unsigned long start_time = jiffies;\
> + while (1) {\
> + sts = op (iommu->reg, offset);\
> + if (cond)\
> + break;\
> + if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT))\
> + panic("DMAR hardware is malfunctional, please disable IOMMU\n");\
> + cpu_relax();\
> + }\
> +}
wow, harsh treatment.
"malfunctioning" might parse better here.
> +static int inline get_alignment(u64 base, unsigned int size)
> +{
> + int t = 0;
> + u64 end;
> +
> + end = base + size - 1;
> + while (base != end) {
> + t++;
> + base >>= 1;
> + end >>= 1;
> + }
> + return t;
> +}
What's this (too large to inline) function doing? I suspect we might have
helper functions which already do it... If not, perhasp we should.
> +static int inline iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
> + u64 addr, unsigned int pages, int non_present_entry_flush)
> +{
> + unsigned int align;
> +
> + BUG_ON(addr & (~PAGE_MASK_4K));
> + BUG_ON(pages == 0);
> +
> + /* Fallback to domain selective flush if no PSI support */
> + if (!cap_pgsel_inv(iommu->cap))
> + return iommu_flush_iotlb_dsi(iommu, did,
> + non_present_entry_flush);
> +
> + /*
> + * PSI requires page size is 2 ^ x, and the base address is naturally
> + * aligned to the size
> + */
> + align = get_alignment(addr >> PAGE_SHIFT_4K, pages);
> + /* Fallback to domain selective flush if size is too big */
> + if (align > cap_max_amask_val(iommu->cap))
> + return iommu_flush_iotlb_dsi(iommu, did,
> + non_present_entry_flush);
> +
> + addr >>= PAGE_SHIFT_4K + align;
> + addr <<= PAGE_SHIFT_4K + align;
> +
> + return __iommu_flush_iotlb(iommu, did, addr, align,
> + DMA_TLB_PSI_FLUSH, non_present_entry_flush);
> +}
too large for inlining.
> +static int iommu_enable_translation(struct intel_iommu *iommu)
> +{
> + u32 sts;
> + unsigned long flag;
we conventionally use "flags" for this.
> + spin_lock_irqsave(&iommu->register_lock, flag);
> + dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd|DMA_GCMD_TE);
> +
> + /* Make sure hardware complete it */
> + IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, dmar_readl, (sts & DMA_GSTS_TES), sts);
> +
> + iommu->gcmd |= DMA_GCMD_TE;
> + spin_unlock_irqrestore(&iommu->register_lock, flag);
> + return 0;
> +}
>
> ...
>
>
> +#define aligned_size(host_addr, size) \
> + PAGE_ALIGN_4K((host_addr & (~PAGE_MASK_4K)) + size)
insufficiently parenthesized. Consider usign a static inline.
> +struct dma_mapping_ops intel_dma_ops = {
> + .alloc_coherent = intel_alloc_coherent,
> + .free_coherent = intel_free_coherent,
> + .map_single = intel_map_single,
> + .unmap_single = intel_unmap_single,
> + .map_sg = intel_map_sg,
> + .unmap_sg = intel_unmap_sg,
> +};
can it be static?
> +void *iommu_rpool_alloc(unsigned int size, gfp_t flag)
> +{
> + if (size == PAGE_SIZE_4K)
> + return(void *)get_zeroed_page(flag);
> + else
> + return kzalloc(size, flag);
> +}
kmalloc(4k) is pretty efficient and (I think) is guaranteed to return a
page-aligned address.
iow: can we just use kmalloc here?
> +
> +static inline int
> +iommu_devinfo_pool_init(void)
> +{
> + return init_resource_pool(&iommu_devinfo_pool, MIN_DEVINFO_REQ,
> + sizeof(struct device_domain_info),
> + GROW_DEVINFO_REQ, iommu_rpool_alloc,
> + iommu_rpool_free);
> +}
> +
> +static inline int
> +iommu_iova_pool_init(void)
> +{
> + return init_resource_pool(&iommu_iova_pool, MIN_IOVA_REQ,
> + sizeof(struct iova),
> + GROW_IOVA_REQ, iommu_rpool_alloc, iommu_rpool_free);
> +}
> +
> +static int iommu_init_mempool(void)
> +{
> + int ret;
> + ret = iommu_iova_pool_init();
> + if (ret)
> + return ret;
> +
> + ret = iommu_pgtable_pool_init();
> + if (ret)
> + goto pgtable_error;
> +
> + ret = iommu_domain_pool_init();
> + if (ret)
> + goto domain_error;
> +
> + ret = iommu_devinfo_pool_init();
> + if (!ret)
> + return ret;
> +
> + destroy_resource_pool(&iommu_domain_pool);
> +domain_error:
> + destroy_resource_pool(&iommu_pgtable_pool);
> +pgtable_error:
> + destroy_resource_pool(&iommu_iova_pool);
> +
> + return -ENOMEM;
> +}
can be __init
> +static void iommu_exit_mempool(void)
> +{
> + destroy_resource_pool(&iommu_devinfo_pool);
> + destroy_resource_pool(&iommu_domain_pool);
> + destroy_resource_pool(&iommu_pgtable_pool);
> + destroy_resource_pool(&iommu_iova_pool);
> +}
ditto (unexpectedly)
> +void __init detect_intel_iommu(void)
> +{
> + if (swiotlb || no_iommu || iommu_detected || dmar_disabled)
> + return;
> + if (early_dmar_detect()) {
> + iommu_detected = 1;
> + }
> +}
> +
> +static void __init init_no_remapping_devices(void)
> +{
> + struct dmar_drhd_unit *drhd;
> +
> + for_each_drhd_unit(drhd)
> + if (!drhd->include_all) {
> + int i;
> + for (i=0; i < drhd->devices_cnt; i++)
> + if (drhd->devices[i] != NULL)
> + break;
> + /* ignore DMAR unit if no pci devices exist */
> + if (i == drhd->devices_cnt)
> + drhd->ignored = 1;
> + }
looks weird - I'd add the extra braces here.
> + if (dmar_map_gfx)
> + return;
> +
> + for_each_drhd_unit(drhd) {
> + int i;
> + if (drhd->ignored || drhd->include_all)
> + continue;
> +
> + for (i = 0; i < drhd->devices_cnt; i++)
> + if (drhd->devices[i] && !IS_GFX_DEVICE(drhd->devices[i]))
> + break;
> +
> + if (i < drhd->devices_cnt)
> + continue;
> +
> + /* bypass IOMMU if it is just for gfx devices */
> + drhd->ignored = 1;
> + for (i = 0; i < drhd->devices_cnt; i++) {
> + if (!drhd->devices[i])
> + continue;
> + drhd->devices[i]->sysdata = DUMMY_DEVICE_DOMAIN_INFO;
> + }
> + }
> +}
> +
>
> ...
>
> +#define OFFSET_STRIDE (9)
> +#define dmar_readl(dmar, reg) readl(dmar + reg)
> +#define dmar_writel(dmar, reg, val) writel((val), dmar + reg)
Is this pointless obfuscation?
> +#define dmar_readq(dmar, reg) ({ \
> + u32 lo, hi; \
> + lo = dmar_readl(dmar, reg); \
> + hi = dmar_readl(dmar, reg + 4); \
> + (((u64) hi) << 32) + lo; })
> +#define dmar_writeq(dmar, reg, val) do {\
> + dmar_writel(dmar, reg, (u32)(val)); \
> + dmar_writel(dmar, reg + 4, (u32)((val) >> 32)); \
> + } while (0)
Do these need to be macros? If not, a regular C function would be better.
Not necessarily an inlined one, either.
> +#define VER_MAJOR(v) (((v) & 0xf0) >> 4)
> +#define VER_MINOR(v) ((v) & 0x0f)
We already have several VER_MAJORs defined in the tree, so adding a new one
is asking for trouble. Suggest the use of a more specific identifier.
> +#define set_root_value(root, value) \
> + do {(root).val |= ((value) & PAGE_MASK_4K);} while(0)
Methinks that could be written in C too.
> +struct domain {
hm, "domain" is a pretty generic term. I think there's a bit of namespace
hogging here..
> + int id; /* domain id */
> + struct intel_iommu *iommu; /* back pointer to owning iommu */
> +
> + struct list_head devices; /* all devices' list */
> + struct iova_domain iovad; /* iova's that belong to this domain */
> +
> + struct dma_pte *pgd; /* virtual address */
> + spinlock_t mapping_lock; /* page table lock */
> + int gaw; /* max guest address width */
> + int agaw; /* adjusted guest address width, 0 is level 2 30-bit */
> +
> +#define DOMAIN_FLAG_MULTIPLE_DEVICES 1
> + int flags;
> +};
> +
>
> ...
>
> +#define for_each_drhd_unit(drhd) \
> + list_for_each_entry(drhd, &dmar_drhd_units, list)
> +#define for_each_rmrr_units(rmrr) \
> + list_for_each_entry(rmrr, &dmar_rmrr_units, list)
> +#define begin_for_each_rmrr_device(rmrr, pdev) \
> + for_each_rmrr_units(rmrr) { \
> + int _i; \
> + for (_i = 0; _i < rmrr->devices_cnt; _i++) { \
> + pdev = rmrr->devices[_i]; \
> + /* some BIOS lists non-exist devices in DMAR table */\
> + if (!pdev) \
> + continue;
> +#define end_for_each_rmrr_device(rmrr, pdev) \
> + } \
> + }
> +
Are these used often enough to justify their inclusion?
Would it be possible to implement these as regular C functions which are
passed the address of a callback function?
next prev parent reply other threads:[~2007-06-07 23:58 UTC|newest]
Thread overview: 64+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-06-06 18:56 [Intel-IOMMU 00/10] Intel IOMMU Support anil.s.keshavamurthy
2007-06-06 18:56 ` [Intel-IOMMU 01/10] DMAR detection and parsing logic anil.s.keshavamurthy
2007-06-06 18:57 ` [Intel-IOMMU 02/10] Library routine for pre-allocat pool handling anil.s.keshavamurthy
2007-06-07 23:27 ` Andrew Morton
2007-06-08 18:21 ` Keshavamurthy, Anil S
2007-06-08 19:01 ` Andrew Morton
2007-06-08 20:12 ` Keshavamurthy, Anil S
2007-06-08 20:40 ` Siddha, Suresh B
2007-06-08 20:44 ` Andrew Morton
2007-06-08 22:33 ` Christoph Lameter
2007-06-08 22:49 ` Keshavamurthy, Anil S
2007-06-08 20:43 ` Andreas Kleen
2007-06-08 20:55 ` Andrew Morton
2007-06-08 22:31 ` Andi Kleen
2007-06-08 21:20 ` Keshavamurthy, Anil S
2007-06-08 21:42 ` Andrew Morton
2007-06-08 22:17 ` Arjan van de Ven
2007-06-08 22:18 ` Siddha, Suresh B
2007-06-08 22:38 ` Christoph Lameter
2007-06-08 22:36 ` Christoph Lameter
2007-06-08 22:56 ` Andi Kleen
2007-06-08 22:59 ` Christoph Lameter
2007-06-09 9:47 ` Andi Kleen
2007-06-11 20:44 ` Keshavamurthy, Anil S
2007-06-11 21:14 ` Andrew Morton
2007-06-11 9:46 ` Ashok Raj
2007-06-11 22:16 ` Andi Kleen
2007-06-11 23:28 ` Christoph Lameter
2007-06-11 23:52 ` Keshavamurthy, Anil S
2007-06-12 0:30 ` Andrew Morton
2007-06-12 1:10 ` Arjan van de Ven
2007-06-12 1:30 ` Christoph Lameter
2007-06-12 1:35 ` Andrew Morton
2007-06-12 1:55 ` Arjan van de Ven
2007-06-12 2:08 ` Siddha, Suresh B
2007-06-13 18:40 ` Matt Mackall
2007-06-13 19:04 ` Andi Kleen
2007-06-12 0:38 ` Christoph Lameter
2007-06-11 21:29 ` Christoph Lameter
2007-06-11 21:40 ` Keshavamurthy, Anil S
2007-06-11 22:25 ` Andi Kleen
2007-06-11 11:29 ` Ashok Raj
2007-06-11 23:15 ` Keshavamurthy, Anil S
2007-06-08 22:32 ` Christoph Lameter
2007-06-08 22:45 ` Keshavamurthy, Anil S
2007-06-08 22:55 ` Christoph Lameter
2007-06-10 16:38 ` Arjan van de Ven
2007-06-11 16:10 ` Christoph Lameter
2007-06-06 18:57 ` [Intel-IOMMU 03/10] PCI generic helper function anil.s.keshavamurthy
2007-06-06 18:57 ` [Intel-IOMMU 04/10] clflush_cache_range now takes size param anil.s.keshavamurthy
2007-06-06 18:57 ` [Intel-IOMMU 05/10] IOVA allocation and management routines anil.s.keshavamurthy
2007-06-07 23:34 ` Andrew Morton
2007-06-08 18:25 ` Keshavamurthy, Anil S
2007-06-06 18:57 ` [Intel-IOMMU 06/10] Intel IOMMU driver anil.s.keshavamurthy
2007-06-07 23:57 ` Andrew Morton [this message]
2007-06-08 22:30 ` Christoph Lameter
2007-06-13 20:20 ` Keshavamurthy, Anil S
2007-06-06 18:57 ` [Intel-IOMMU 07/10] Intel iommu cmdline option - forcedac anil.s.keshavamurthy
2007-06-07 23:58 ` Andrew Morton
2007-06-06 18:57 ` [Intel-IOMMU 08/10] DMAR fault handling support anil.s.keshavamurthy
2007-06-06 18:57 ` [Intel-IOMMU 09/10] Iommu Gfx workaround anil.s.keshavamurthy
2007-06-08 0:01 ` Andrew Morton
2007-06-06 18:57 ` [Intel-IOMMU 10/10] Iommu floppy workaround anil.s.keshavamurthy
-- strict thread matches above, loose matches on Subject: below --
2007-06-04 21:02 [Intel-IOMMU 00/10] Intel IOMMU support anil.s.keshavamurthy
2007-06-04 21:02 ` [Intel-IOMMU 06/10] Intel IOMMU driver anil.s.keshavamurthy
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20070607165739.dc437e96.akpm@linux-foundation.org \
--to=akpm@linux-foundation.org \
--cc=ak@suse.de \
--cc=anil.s.keshavamurthy@intel.com \
--cc=arjan@linux.intel.com \
--cc=ashok.raj@intel.com \
--cc=asit.k.mallick@intel.com \
--cc=davem@davemloft.net \
--cc=gregkh@suse.de \
--cc=linux-kernel@vger.kernel.org \
--cc=muli@il.ibm.com \
--cc=shaohua.li@intel.com \
--cc=suresh.b.siddha@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox