From mboxrd@z Thu Jan 1 00:00:00 1970 From: Julien Grall Subject: Re: [RFC PATCH 03/19] xen/arm: its: Port ITS driver to xen Date: Fri, 13 Mar 2015 11:46:34 +0000 Message-ID: <5502CE1A.7090500@linaro.org> References: <1425299435-3278-1-git-send-email-vijay.kilari@gmail.com> <1425299435-3278-4-git-send-email-vijay.kilari@gmail.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii"; Format="flowed" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <1425299435-3278-4-git-send-email-vijay.kilari@gmail.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: vijay.kilari@gmail.com, Ian.Campbell@citrix.com, stefano.stabellini@eu.citrix.com, stefano.stabellini@citrix.com, tim@xen.org, xen-devel@lists.xen.org Cc: Prasun.Kapoor@caviumnetworks.com, vijaya.kumar@caviumnetworks.com, manish.jaggi@caviumnetworks.com List-Id: xen-devel@lists.xenproject.org Hello Vijay, On 02/03/2015 12:30, vijay.kilari@gmail.com wrote: > @@ -228,10 +242,10 @@ static struct its_collection *its_build_mapd_cmd(struct its_cmd_block *cmd, > struct its_cmd_desc *desc) > { > unsigned long itt_addr; > - u8 size = max(order_base_2(desc->its_mapd_cmd.dev->nr_ites), 1); > + u8 size = max(fls(desc->its_mapd_cmd.dev->nr_ites) - 1, 1); You may want to give a look to get_count_order. > > - itt_addr = virt_to_phys(desc->its_mapd_cmd.dev->itt); > - itt_addr = ALIGN(itt_addr, ITS_ITT_ALIGN); > + itt_addr = __pa(desc->its_mapd_cmd.dev->itt); > + itt_addr = ((itt_addr) + (ITS_ITT_ALIGN - 1)) & ~(ITS_ITT_ALIGN - 1); You can use ROUNDUP. [..] > @@ -343,17 +357,23 @@ static int its_queue_full(struct its_node *its) > static struct its_cmd_block *its_allocate_entry(struct its_node *its) > { > struct its_cmd_block *cmd; > - u32 count = 1000000; /* 1s! */ > + bool_t timeout = 0; > + s_time_t deadline = NOW() + MILLISECS(1000); > > while (its_queue_full(its)) { > - count--; > - if (!count) { > - pr_err_ratelimited("ITS queue not draining\n"); > - return NULL; > + if ( NOW() > deadline ) > + { > + timeout = 1; > + break; > } > cpu_relax(); > udelay(1); > } > + if ( timeout ) > + { > + its_err("ITS queue not draining\n"); > + return NULL; > + } Why do you need to modify the loop? It looks like to me it will work Xen too. > cmd = its->cmd_write++; > > @@ -380,7 +400,7 @@ static void its_flush_cmd(struct its_node *its, struct its_cmd_block *cmd) > * the ITS. > */ > if (its->flags & ITS_FLAGS_CMDQ_NEEDS_FLUSHING) > - __flush_dcache_area(cmd, sizeof(*cmd)); > + clean_dcache_va_range(cmd, sizeof(*cmd)); __flush_dcache_area perform a clean & invalidate while clean_dcache_va_range only clean. You may want to use clean_and_invalidate_va_range [..] > @@ -390,7 +410,8 @@ static void its_wait_for_range_completion(struct its_node *its, > struct its_cmd_block *to) > { > u64 rd_idx, from_idx, to_idx; > - u32 count = 1000000; /* 1s! */ > + bool_t timeout = 0; > + s_time_t deadline = NOW() + MILLISECS(1000); > > from_idx = its_cmd_ptr_to_offset(its, from); > to_idx = its_cmd_ptr_to_offset(its, to); > @@ -400,14 +421,16 @@ static void its_wait_for_range_completion(struct its_node *its, > if (rd_idx >= to_idx || rd_idx < from_idx) > break; > > - count--; > - if (!count) { > - pr_err_ratelimited("ITS queue timeout\n"); > - return; > + if ( NOW() > deadline ) > + { > + timeout = 1; > + break; > } > cpu_relax(); > udelay(1); > } > + if ( timeout ) > + printk("ITS queue timeout\n"); > } Same question for the loop. [..] > /* > - * irqchip functions - assumes MSI, mostly. > - */ > - > -static void lpi_set_config(struct its_device *its_dev, u32 hwirq, > - u32 id, int enable) > -{ > - u8 *cfg = page_address(gic_rdists->prop_page) + hwirq - 8192; > - > - if (enable) > - *cfg |= LPI_PROP_ENABLED; > - else > - *cfg &= ~LPI_PROP_ENABLED; > - > - /* > - * Make the above write visible to the redistributors. > - * And yes, we're flushing exactly: One. Single. Byte. > - * Humpf... > - */ > - if (gic_rdists->flags & RDIST_FLAGS_PROPBASE_NEEDS_FLUSHING) > - __flush_dcache_area(cfg, sizeof(*cfg)); > - else > - dsb(ishst); > - its_send_inv(its_dev, id); > -} > - > -static inline u16 its_msi_get_entry_nr(struct msi_desc *desc) > -{ > - return desc->msi_attrib.entry_nr; > -} > - > -static void its_mask_irq(struct irq_data *d) > -{ > - struct its_device *its_dev = irq_data_get_irq_handler_data(d); > - u32 id; > - > - /* If MSI, propagate the mask to the RC */ > - if (IS_ENABLED(CONFIG_PCI_MSI) && d->msi_desc) { > - id = its_msi_get_entry_nr(d->msi_desc); > - mask_msi_irq(d); > - } else { > - id = d->hwirq; > - } > - > - lpi_set_config(its_dev, d->hwirq, id, 0); > -} > - > -static void its_unmask_irq(struct irq_data *d) > -{ > - struct its_device *its_dev = irq_data_get_irq_handler_data(d); > - u32 id; > - > - /* If MSI, propagate the unmask to the RC */ > - if (IS_ENABLED(CONFIG_PCI_MSI) && d->msi_desc) { > - id = its_msi_get_entry_nr(d->msi_desc); > - unmask_msi_irq(d); > - } else { > - id = d->hwirq; > - } > - > - lpi_set_config(its_dev, d->hwirq, id, 1); > -} > - > -static void its_eoi_irq(struct irq_data *d) > -{ > - gic_write_eoir(d->hwirq); > -} > - > -static int its_set_affinity(struct irq_data *d, const struct cpumask *mask_val, > - bool force) > -{ > - unsigned int cpu = cpumask_any_and(mask_val, cpu_online_mask); > - struct its_device *its_dev = irq_data_get_irq_handler_data(d); > - struct its_collection *target_col; > - u32 id; > - > - if (cpu >= nr_cpu_ids) > - return -EINVAL; > - > - target_col = &its_dev->its->collections[cpu]; > - if (IS_ENABLED(CONFIG_PCI_MSI) && d->msi_desc) > - id = its_msi_get_entry_nr(d->msi_desc); > - else > - id = d->hwirq; > - its_send_movi(its_dev, target_col, id); > - its_dev->collection = target_col; > - > - return IRQ_SET_MASK_OK; > -} > - > -static struct irq_chip its_irq_chip = { > - .name = "ITS", > - .irq_mask = its_mask_irq, > - .irq_unmask = its_unmask_irq, > - .irq_eoi = its_eoi_irq, > - .irq_set_affinity = its_set_affinity, > -}; > - Most of those callbacks seems useful to me. Why did you drop them? [..] > -static unsigned long *its_lpi_alloc_chunks(int nr_irqs, int *base, int *nr_ids) > +static unsigned long *its_lpi_alloc_chunks(int nr_irq, int *base, int *nr_ids) > { > unsigned long *bitmap = NULL; > int chunk_id; > int nr_chunks; > int i; > > - nr_chunks = DIV_ROUND_UP(nr_irqs, IRQS_PER_CHUNK); > + nr_chunks = DIV_ROUND_UP(nr_irq, IRQS_PER_CHUNK); > > spin_lock(&lpi_lock); > > do { > - chunk_id = bitmap_find_next_zero_area(lpi_bitmap, lpi_chunks, > - 0, nr_chunks, 0); > + chunk_id = find_next_zero_bit(lpi_bitmap, lpi_chunks, 0); This is not the same. bitmap_find_next_zero_area looks for a contiguous region of nr_chunks while find_next_zero_bit looks for the first 0 bit. [..] > /* > @@ -732,31 +654,28 @@ static void its_lpi_free(unsigned long *bitmap, int base, int nr_ids) > /* > * This is how many bits of ID we need, including the useless ones. > */ > -#define LPI_NRBITS ilog2(LPI_PROPBASE_SZ + SZ_8K) > +#define LPI_NRBITS (fls(LPI_PROPBASE_SZ + SZ_8K) - 1) > > -#define LPI_PROP_DEFAULT_PRIO 0xa0 > +#define LPI_PROP_DEFAULT_PRIO 0xa2 It's more than a compilation fix... > static int __init its_alloc_lpi_tables(void) > { > - phys_addr_t paddr; > + gic_rdists->prop_page = alloc_xenheap_pages(get_order_from_bytes(LPI_PROPBASE_SZ), 0); > > - gic_rdists->prop_page = alloc_pages(GFP_NOWAIT, > - get_order(LPI_PROPBASE_SZ)); > if (!gic_rdists->prop_page) { > - pr_err("Failed to allocate PROPBASE\n"); > + its_err("Failed to allocate PROPBASE\n"); > return -ENOMEM; > } > > - paddr = page_to_phys(gic_rdists->prop_page); > - pr_info("GIC: using LPI property table @%pa\n", &paddr); > + its_info("GIC: using LPI property table @%pa\n", gic_rdists->prop_page); > > /* Priority 0xa0, Group-1, disabled */ > - memset(page_address(gic_rdists->prop_page), > + memset(gic_rdists->prop_page, > LPI_PROP_DEFAULT_PRIO | LPI_PROP_GROUP1, > LPI_PROPBASE_SZ); > > /* Make sure the GIC will observe the written configuration */ > - __flush_dcache_area(page_address(gic_rdists->prop_page), LPI_PROPBASE_SZ); > + clean_dcache_va_range(gic_rdists->prop_page, LPI_PROPBASE_SZ); Ditto for __flush_dcache_area [..] > @@ -806,17 +725,18 @@ static int its_alloc_tables(struct its_node *its) > if (type == GITS_BASER_TYPE_NONE) > continue; > > - base = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(max_ittsize)); > + base = alloc_xenheap_pages(get_order_from_bytes(max_ittsize), 0); > if (!base) { > err = -ENOMEM; > goto out_free; > } > - > + memset(base, 0, max_ittsize); > + clear_page(base); Why do you do both? memset should be enough. Although, you may need to align max_ittsize. [..] > @@ -909,32 +827,30 @@ static int its_alloc_collections(struct its_node *its) > static void its_cpu_init_lpis(void) > { > void __iomem *rbase = gic_data_rdist_rd_base(); > - struct page *pend_page; > + void *pend_page; > u64 val, tmp; > > /* If we didn't allocate the pending table yet, do it now */ > - pend_page = gic_data_rdist()->pend_page; > + pend_page = gic_data_rdist().pend_page; It would make sense to have gic_data_rdist returning a pointer. That would avoid a such change. [..] > + memset(pend_page, 0, max(LPI_PENDBASE_SZ, SZ_64K)); > /* Make sure the GIC will observe the zero-ed page */ > - __flush_dcache_area(page_address(pend_page), LPI_PENDBASE_SZ); > + clean_dcache_va_range(pend_page, LPI_PENDBASE_SZ); Ditto for clean_dcache_va_range [..] > > -static struct its_device *its_create_device(struct its_node *its, u32 dev_id, > - int nvecs) > +/* TODO: Removed static for compilation */ It's a bit strange to add a TODO but not on the same other changes. See its_lpi_free for instance. [..] > -static int its_alloc_device_irq(struct its_device *dev, u32 id, > - int *hwirq, unsigned int *irq) > +/* TODO: Removed static for compilation */ > +int its_alloc_device_irq(struct its_device *dev, u32 id, > + int *hwirq, unsigned int *irq) > { > int idx; > > @@ -1101,9 +1019,6 @@ static int its_alloc_device_irq(struct its_device *dev, u32 id, > return -ENOSPC; > > *hwirq = dev->lpi_base + idx; > - *irq = irq_create_mapping(lpi_domain, *hwirq); > - if (!*irq) > - return -ENOSPC; /* Don't kill the device, though */ With this change *irq is not set at all. Do you plan to fix it later? > set_bit(idx, dev->lpi_map); > > @@ -1113,134 +1028,52 @@ static int its_alloc_device_irq(struct its_device *dev, u32 id, > return 0; > } > > - > - > -static int its_msi_get_vec_count(struct pci_dev *pdev, struct msi_desc *desc) > -{ > -#ifdef CONFIG_PCI_MSI > - if (desc->msi_attrib.is_msix) > - return pci_msix_vec_count(pdev); > - else > - return pci_msi_vec_count(pdev); > -#else > - return -EINVAL; > -#endif > -} > - > -int pci_requester_id(struct pci_dev *dev); > -static int its_msi_setup_irq(struct msi_chip *chip, > - struct pci_dev *pdev, > - struct msi_desc *desc) > -{ > - struct its_node *its = container_of(chip, struct its_node, msi_chip); > - struct its_device *its_dev; > - struct msi_msg msg; > - unsigned int irq; > - u64 addr; > - int hwirq; > - int err; > - u32 dev_id = pci_requester_id(pdev); > - u32 vec_nr; > - > - its_dev = its_find_device(its, dev_id); > - if (!its_dev) { > - int nvec = its_msi_get_vec_count(pdev, desc); > - if (WARN_ON(nvec <= 0)) > - return nvec; > - its_dev = its_create_device(its, dev_id, nvec); > - } > - if (!its_dev) > - return -ENOMEM; > - vec_nr = its_msi_get_entry_nr(desc); > - err = its_alloc_device_irq(its_dev, vec_nr, &hwirq, &irq); > - if (err) > - return err; > - > - irq_set_msi_desc(irq, desc); > - irq_set_handler_data(irq, its_dev); > - > - addr = its->phys_base + GITS_TRANSLATER; > - > - msg.address_lo = (u32)addr; > - msg.address_hi = (u32)(addr >> 32); > - msg.data = vec_nr; > - > - write_msi_msg(irq, &msg); > - return 0; > -} > - > -static void its_msi_teardown_irq(struct msi_chip *chip, unsigned int irq) > -{ > - struct irq_data *d = irq_get_irq_data(irq); > - struct its_device *its_dev = irq_data_get_irq_handler_data(d); > - > - BUG_ON(d->hwirq < its_dev->lpi_base || /* OMG! */ > - d->hwirq > (its_dev->lpi_base + its_dev->nr_lpis)); > - > - /* Stop the delivery of interrupts */ > - its_send_discard(its_dev, its_msi_get_entry_nr(d->msi_desc)); > - > - /* Mark interrupt index as unused, and clear the mapping */ > - clear_bit(d->hwirq - its_dev->lpi_base, its_dev->lpi_map); > - irq_dispose_mapping(irq); > - > - /* If all interrupts have been freed, start mopping the floor */ > - if (bitmap_empty(its_dev->lpi_map, its_dev->nr_lpis)) { > - its_lpi_free(its_dev->lpi_map, > - its_dev->lpi_base, > - its_dev->nr_lpis); > - > - /* Unmap device/itt */ > - its_send_mapd(its_dev, 0); > - its_free_device(its_dev); > - } > -} > - Those 2 functions seems useful for me. Why did you drop them? > -static int its_probe(struct device_node *node) > +static int its_probe(struct dt_device_node *node) > { > - struct resource res; > + paddr_t its_addr, its_size; > struct its_node *its; > void __iomem *its_base; > u32 val; > u64 baser, tmp; > int err; > > - err = of_address_to_resource(node, 0, &res); > - if (err) { > - pr_warn("%s: no regs?\n", node->full_name); > + err = dt_device_get_address(node, 0, &its_addr, &its_size); > + if ( err || !its_addr ) Why the second check (!its_addr)? > + { > + its_warn("%s: cannot find GIC-ITS\n", node->full_name); > return -ENXIO; > } > > - its_base = ioremap(res.start, resource_size(&res)); > - if (!its_base) { > - pr_warn("%s: unable to map registers\n", node->full_name); > + its_base = ioremap_nocache(its_addr, its_size); > + if ( !its_base) > + { Please keep the Linux coding style. [..] > @@ -1255,19 +1088,19 @@ static int its_probe(struct device_node *node) [..] > writeq_relaxed(baser, its->base + GITS_CBASER); > tmp = readq_relaxed(its->base + GITS_CBASER); > -/* writeq_relaxed(0, its->base + GITS_CWRITER); */ > + writeq_relaxed(0, its->base + GITS_CWRITER); Care to explain why it was commented on the previous patch and you uncomment here? [..] > out_free_tables: > its_free_tables(its); > out_free_cmd: > - kfree(its->cmd_base); > + xfree(its->cmd_base); > out_free_its: > - kfree(its); > + xfree(its); > out_unmap: > - iounmap(its_base); > - pr_err("ITS: failed probing %s (%d)\n", node->full_name, err); > + //TODO: no call for iounmap in xen? The function exists since 2012 in xen/vmap.h ... [..] > -static struct of_device_id its_device_id[] = { > - { .compatible = "arm,gic-v3-its", }, > - {}, > -}; > - > -struct irq_chip *its_init(struct device_node *node, struct rdists *rdists, > - struct irq_domain *domain) > +int its_init(struct dt_device_node *node, struct rdist_prop *rdists) > { > - struct device_node *np; > + static const struct dt_device_match its_device_ids[] __initconst = > + { > + DT_MATCH_COMPATIBLE("arm,gic-v3-its"), > + { /* sentinel */ }, > + }; Just modifing the type of the its_device_id would have been work too. s/of_device_id/dt_device_match/ > + struct dt_device_node *np = NULL; > > - for (np = of_find_matching_node(node, its_device_id); np; > - np = of_find_matching_node(np, its_device_id)) { > - its_probe(np); > + while ( (np = dt_find_matching_node(np, its_device_ids)) ) > + { > + if ( !dt_find_property(np, "msi-controller", NULL) ) > + continue; > + > + if ( dt_get_parent(np) ) > + break; Linux doesn't do those check, why do you need them? [..] > diff --git a/xen/arch/arm/gic-v3.c b/xen/arch/arm/gic-v3.c > index 47452ca..5c35ac5 100644 > --- a/xen/arch/arm/gic-v3.c > +++ b/xen/arch/arm/gic-v3.c > @@ -63,6 +63,7 @@ static struct gic_info gicv3_info; > > /* per-cpu re-distributor base */ > static DEFINE_PER_CPU(void __iomem*, rbase); > +DEFINE_PER_CPU(struct rdist, rdist); It would have been better to create a separate patch before in order to implment rdist correctly. Regards, -- Julien Grall