All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH RFC] xen: arm: context switch vtimer PPI state.
@ 2015-01-26 15:55 Ian Campbell
  2015-01-27 13:16 ` Julien Grall
                   ` (2 more replies)
  0 siblings, 3 replies; 14+ messages in thread
From: Ian Campbell @ 2015-01-26 15:55 UTC (permalink / raw)
  To: stefano.stabellini; +Cc: julien.grall, tim, Ian Campbell, xen-devel

... instead of artificially masking the timer interrupt in the timer
state and relying on the guest to unmask (which it isn't required to
do per the h/w spec, although Linux does)

To do this introduce the concept of routing a PPI to the currently
running VCPU. For such interrupts irq_get_domain returns NULL.

Then we make use of the GICD I[SC]ACTIVER registers to save and
restore the active state of the interrupt, which prevents the nested
invocations which the current masking works around.

For edge triggered interrupts we also need to context switch the
pending bit via I[SC]PENDR. Note that for level triggered interrupts
SPENDR sets a latch which is only cleared by ICPENDR (and not by h/w
state changes), therefore we do not want to context switch the pending
state for level PPIs -- instead we rely on the context switch of the
peripheral to restore the correct level.

RFC Only:
 - Not implemented for GIC v3 yet.
 - Lightly tested with hackbench on systems with level and edge
   triggered vtimer PPI.
 - Is irq_get_domain == NULL to indicate route-to-current-vcpu the
   best idea? Any alternatives?

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
 xen/arch/arm/gic-v2.c        |   84 ++++++++++++++++++++++++++++++++++++++++++
 xen/arch/arm/gic.c           |   32 +++++++++++++---
 xen/arch/arm/irq.c           |   48 ++++++++++++++++++++++--
 xen/arch/arm/time.c          |   26 +------------
 xen/arch/arm/vtimer.c        |   24 ++++++++++--
 xen/include/asm-arm/domain.h |   11 ++++++
 xen/include/asm-arm/gic.h    |   14 +++++++
 xen/include/asm-arm/irq.h    |    1 +
 8 files changed, 204 insertions(+), 36 deletions(-)

diff --git a/xen/arch/arm/gic-v2.c b/xen/arch/arm/gic-v2.c
index 31fb81a..9074aca 100644
--- a/xen/arch/arm/gic-v2.c
+++ b/xen/arch/arm/gic-v2.c
@@ -156,6 +156,45 @@ static void gicv2_save_state(struct vcpu *v)
     writel_gich(0, GICH_HCR);
 }
 
+static void gicv2_save_and_mask_hwppi(struct vcpu *v, const unsigned virq,
+                                      struct hwppi_state *s)
+{
+    struct pending_irq *p = irq_to_pending(v, virq);
+    const unsigned int offs = virq / 32;
+    const unsigned int mask = (1u << (virq % 32));
+    const unsigned int pendingr = readl_gicd(GICD_ISPENDR + offs*4);
+    const unsigned int activer = readl_gicd(GICD_ISACTIVER + offs*4);
+    const unsigned int enabler = readl_gicd(GICD_ISENABLER + offs*4);
+    const bool is_edge = !!(p->desc->arch.type & DT_IRQ_TYPE_EDGE_BOTH);
+
+    ASSERT(!is_idle_vcpu(v));
+
+    s->active = !!(activer & mask);
+    s->enabled = !!(enabler & mask);
+    s->pending = !!(pendingr & mask);
+    s->inprogress = test_and_clear_bit(_IRQ_INPROGRESS, &p->desc->status);
+
+    /* Write a 1 to IC...R to clear the corresponding bit of state */
+    if ( s->active )
+        writel_gicd(mask, GICD_ICACTIVER + offs*4);
+    /*
+     * For an edge interrupt clear the pending state, for a level interrupt
+     * this clears the latch there is no need since saving the peripheral state
+     * (and/or restoring the next VCPU) will cause the correct action.
+     */
+    if ( is_edge && s->pending )
+        writel_gicd(mask, GICD_ICPENDR + offs*4);
+
+    if ( s->enabled )
+    {
+        writel_gicd(mask, GICD_ICENABLER + offs*4);
+        set_bit(_IRQ_DISABLED, &p->desc->status);
+    }
+
+    ASSERT(!(readl_gicd(GICD_ISACTIVER + offs*4) & mask));
+    ASSERT(!(readl_gicd(GICD_ISENABLER + offs*4) & mask));
+}
+
 static void gicv2_restore_state(const struct vcpu *v)
 {
     int i;
@@ -168,6 +207,49 @@ static void gicv2_restore_state(const struct vcpu *v)
     writel_gich(GICH_HCR_EN, GICH_HCR);
 }
 
+static void gicv2_restore_hwppi(struct vcpu *v, const unsigned virq,
+                                const struct hwppi_state *s)
+{
+    struct pending_irq *p = irq_to_pending(v, virq);
+    const unsigned int offs = virq / 32;
+    const unsigned int mask = (1u << (virq % 32));
+    struct irq_desc *desc = irq_to_desc(virq);
+    const bool is_edge = !!(desc->arch.type & DT_IRQ_TYPE_EDGE_BOTH);
+    struct pending_irq *pending = irq_to_pending(v, virq);
+
+    pending->desc = desc; /* Migrate to new physical processor */
+
+    /*
+     * The IRQ must always have been set inactive and masked etc by
+     * the saving of the previous state via save_and_mask_hwppi.
+     */
+    ASSERT(!(readl_gicd(GICD_ISACTIVER + offs*4) & mask));
+    ASSERT(!(readl_gicd(GICD_ISENABLER + offs*4) & mask));
+
+    if ( s->active )
+        writel_gicd(mask, GICD_ICACTIVER + offs*4);
+
+    /*
+     * Restore pending state for edge triggered interrupts only. For
+     * level triggered interrupts the level will be restored as
+     * necessary by restoring the state of the relevant peripheral.
+     *
+     * For a level triggered interrupt ISPENDR acts as a *latch* which
+     * is only cleared by ICPENDR (i.e. the input level is no longer
+     * relevant). We certainly do not want that here.
+     */
+    if ( is_edge && s->pending )
+        writel_gicd(mask, GICD_ISPENDR + offs*4);
+    if ( s->enabled )
+    {
+        clear_bit(_IRQ_DISABLED, &p->desc->status);
+        dsb(sy);
+        writel_gicd(mask, GICD_ISENABLER + offs*4);
+    }
+    if ( s->inprogress )
+        set_bit(_IRQ_INPROGRESS, &p->desc->status);
+}
+
 static void gicv2_dump_state(const struct vcpu *v)
 {
     int i;
@@ -660,7 +742,9 @@ const static struct gic_hw_operations gicv2_ops = {
     .info                = &gicv2_info,
     .secondary_init      = gicv2_secondary_cpu_init,
     .save_state          = gicv2_save_state,
+    .save_and_mask_hwppi = gicv2_save_and_mask_hwppi,
     .restore_state       = gicv2_restore_state,
+    .restore_hwppi       = gicv2_restore_hwppi,
     .dump_state          = gicv2_dump_state,
     .gicv_setup          = gicv2v_setup,
     .gic_host_irq_type   = &gicv2_host_irq_type,
diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
index 7d4ee19..7ea980d 100644
--- a/xen/arch/arm/gic.c
+++ b/xen/arch/arm/gic.c
@@ -81,6 +81,12 @@ void gic_save_state(struct vcpu *v)
     isb();
 }
 
+void gic_save_and_mask_hwppi(struct vcpu *v, unsigned virq,
+                             struct hwppi_state *s)
+{
+    gic_hw_ops->save_and_mask_hwppi(v, virq, s);
+}
+
 void gic_restore_state(struct vcpu *v)
 {
     ASSERT(!local_irq_is_enabled());
@@ -94,6 +100,12 @@ void gic_restore_state(struct vcpu *v)
     gic_restore_pending_irqs(v);
 }
 
+void gic_restore_hwppi(struct vcpu *v, const unsigned virq,
+                       const struct hwppi_state *s)
+{
+    gic_hw_ops->restore_hwppi(v, virq, s);
+}
+
 /*
  * needs to be called with a valid cpu_mask, ie each cpu in the mask has
  * already called gic_cpu_init
@@ -125,11 +137,15 @@ void gic_route_irq_to_xen(struct irq_desc *desc, const cpumask_t *cpu_mask,
 
 /* Program the GIC to route an interrupt to a guest
  *   - desc.lock must be held
+ *   - d may be NULL, in which case interrupt *must* be a PPI and is routed to
+ *     the vcpu currently running when that PPI fires. In this case the code
+ *     responsible for the related hardware must save and restore the PPI with
+ *     gic_save_and_mask_hwppi/gic_restore_hwppi.
+ *   - if d is non-NULL then the interrupt *must* be an SPI.
  */
 void gic_route_irq_to_guest(struct domain *d, struct irq_desc *desc,
                             const cpumask_t *cpu_mask, unsigned int priority)
 {
-    struct pending_irq *p;
     ASSERT(spin_is_locked(&desc->lock));
 
     desc->handler = gic_hw_ops->gic_guest_irq_type;
@@ -137,10 +153,16 @@ void gic_route_irq_to_guest(struct domain *d, struct irq_desc *desc,
 
     gic_set_irq_properties(desc, cpumask_of(smp_processor_id()), GIC_PRI_IRQ);
 
-    /* Use vcpu0 to retrieve the pending_irq struct. Given that we only
-     * route SPIs to guests, it doesn't make any difference. */
-    p = irq_to_pending(d->vcpu[0], desc->irq);
-    p->desc = desc;
+    if ( d )
+    {
+        struct pending_irq *p;
+
+        /* Use vcpu0 to retrieve the pending_irq struct. Given that we only
+         * route SPIs to guests, it doesn't make any difference. */
+        p = irq_to_pending(d->vcpu[0], desc->irq);
+        p->desc = desc;
+    }
+    /* else p->desc init'd on ctxt restore in gic_restore_hwppi */
 }
 
 int gic_irq_xlate(const u32 *intspec, unsigned int intsize,
diff --git a/xen/arch/arm/irq.c b/xen/arch/arm/irq.c
index ebdf19a..93c38ff 100644
--- a/xen/arch/arm/irq.c
+++ b/xen/arch/arm/irq.c
@@ -202,6 +202,19 @@ void do_IRQ(struct cpu_user_regs *regs, unsigned int irq, int is_fiq)
         goto out;
     }
 
+    if ( irq == current->arch.virt_timer.irq )
+    {
+        ASSERT(!irq_get_domain(desc));
+
+        desc->handler->end(desc);
+
+        set_bit(_IRQ_INPROGRESS, &desc->status);
+        desc->arch.eoi_cpu = smp_processor_id();
+
+        vgic_vcpu_inject_irq(current, irq);
+        goto out_no_end;
+    }
+
     if ( test_bit(_IRQ_GUEST, &desc->status) )
     {
         struct domain *d = irq_get_domain(desc);
@@ -346,8 +359,12 @@ int setup_irq(unsigned int irq, unsigned int irqflags, struct irqaction *new)
         struct domain *d = irq_get_domain(desc);
 
         spin_unlock_irqrestore(&desc->lock, flags);
-        printk(XENLOG_ERR "ERROR: IRQ %u is already in use by the domain %u\n",
-               irq, d->domain_id);
+        if ( d )
+            printk(XENLOG_ERR "ERROR: IRQ %u is already in use by domain %u\n",
+                   irq, d->domain_id);
+        else
+            printk(XENLOG_ERR
+                   "ERROR: IRQ %u is already in use by <current-vcpu>\n", irq);
         return -EBUSY;
     }
 
@@ -378,6 +395,10 @@ err:
     return rc;
 }
 
+/*
+ * If d == NULL then IRQ is routed to current vcpu at time it is received and
+ * must be a PPI.
+ */
 int route_irq_to_guest(struct domain *d, unsigned int irq,
                        const char * devname)
 {
@@ -386,6 +407,8 @@ int route_irq_to_guest(struct domain *d, unsigned int irq,
     unsigned long flags;
     int retval = 0;
 
+    ASSERT( d || ( irq >=16 && irq < 32 ) );
+
     action = xmalloc(struct irqaction);
     if (!action)
         return -ENOMEM;
@@ -406,11 +429,23 @@ int route_irq_to_guest(struct domain *d, unsigned int irq,
         struct domain *ad = irq_get_domain(desc);
 
         if ( test_bit(_IRQ_GUEST, &desc->status) && d == ad )
+        {
+            printk(XENLOG_ERR
+                   "ERROR: IRQ %u is already routed to domain %p\n",
+                   irq, ad);
             goto out;
+        }
 
         if ( test_bit(_IRQ_GUEST, &desc->status) )
-            printk(XENLOG_ERR "ERROR: IRQ %u is already used by domain %u\n",
-                   irq, ad->domain_id);
+        {
+            if ( ad )
+                printk(XENLOG_ERR
+                       "ERROR: IRQ %u is already used by domain %u\n",
+                       irq, ad->domain_id);
+            else
+                printk(XENLOG_ERR
+                       "ERROR: IRQ %u is already used by <current-vcpu>\n", irq);
+        }
         else
             printk(XENLOG_ERR "ERROR: IRQ %u is already used by Xen\n", irq);
         retval = -EBUSY;
@@ -433,6 +468,11 @@ out:
     return retval;
 }
 
+int route_irq_to_current_vcpu(unsigned int irq, const char *devname)
+{
+    return route_irq_to_guest(NULL, irq, devname);
+}
+
 /*
  * pirq event channels. We don't use these on ARM, instead we use the
  * features of the GIC to inject virtualised normal interrupts.
diff --git a/xen/arch/arm/time.c b/xen/arch/arm/time.c
index 455f217..ffa5eef 100644
--- a/xen/arch/arm/time.c
+++ b/xen/arch/arm/time.c
@@ -169,28 +169,6 @@ static void timer_interrupt(int irq, void *dev_id, struct cpu_user_regs *regs)
     }
 }
 
-static void vtimer_interrupt(int irq, void *dev_id, struct cpu_user_regs *regs)
-{
-    /*
-     * Edge-triggered interrupts can be used for the virtual timer. Even
-     * if the timer output signal is masked in the context switch, the
-     * GIC will keep track that of any interrupts raised while IRQS are
-     * disabled. As soon as IRQs are re-enabled, the virtual interrupt
-     * will be injected to Xen.
-     *
-     * If an IDLE vCPU was scheduled next then we should ignore the
-     * interrupt.
-     */
-    if ( unlikely(is_idle_vcpu(current)) )
-        return;
-
-    perfc_incr(virt_timer_irqs);
-
-    current->arch.virt_timer.ctl = READ_SYSREG32(CNTV_CTL_EL0);
-    WRITE_SYSREG32(current->arch.virt_timer.ctl | CNTx_CTL_MASK, CNTV_CTL_EL0);
-    vgic_vcpu_inject_irq(current, current->arch.virt_timer.irq);
-}
-
 /* Set up the timer interrupt on this CPU */
 void __cpuinit init_timer_interrupt(void)
 {
@@ -204,8 +182,8 @@ void __cpuinit init_timer_interrupt(void)
 
     request_irq(timer_irq[TIMER_HYP_PPI], 0, timer_interrupt,
                 "hyptimer", NULL);
-    request_irq(timer_irq[TIMER_VIRT_PPI], 0, vtimer_interrupt,
-                   "virtimer", NULL);
+    route_irq_to_current_vcpu(timer_irq[TIMER_VIRT_PPI], "virtimer");
+
     request_irq(timer_irq[TIMER_PHYS_NONSECURE_PPI], 0, timer_interrupt,
                 "phytimer", NULL);
 }
diff --git a/xen/arch/arm/vtimer.c b/xen/arch/arm/vtimer.c
index 848e2c6..d1f21a1 100644
--- a/xen/arch/arm/vtimer.c
+++ b/xen/arch/arm/vtimer.c
@@ -47,9 +47,14 @@ static void phys_timer_expired(void *data)
 static void virt_timer_expired(void *data)
 {
     struct vtimer *t = data;
-    t->ctl |= CNTx_CTL_MASK;
-    vgic_vcpu_inject_irq(t->v, t->irq);
-    perfc_incr(vtimer_virt_inject);
+    t->ctl |= CNTx_CTL_PENDING;
+    if ( !(t->ctl & CNTx_CTL_MASK) )
+    {
+        /* An edge triggered interrupt should now be pending. */
+        t->ppi_state.pending = true;
+        vcpu_unblock(t->v);
+        perfc_incr(vtimer_virt_inject);
+    }
 }
 
 int domain_vtimer_init(struct domain *d)
@@ -112,6 +117,15 @@ int virt_timer_save(struct vcpu *v)
         set_timer(&v->arch.virt_timer.timer, ticks_to_ns(v->arch.virt_timer.cval +
                   v->domain->arch.virt_timer_base.offset - boot_count));
     }
+
+    /*
+     * Since the vtimer irq is a PPI we don't need to worry about
+     * racing against it becoming active while we are saving the
+     * state, since that requires the guest to be reading the IAR.
+     */
+    gic_save_and_mask_hwppi(v, v->arch.virt_timer.irq,
+                            &v->arch.virt_timer.ppi_state);
+
     return 0;
 }
 
@@ -126,6 +140,10 @@ int virt_timer_restore(struct vcpu *v)
     WRITE_SYSREG64(v->domain->arch.virt_timer_base.offset, CNTVOFF_EL2);
     WRITE_SYSREG64(v->arch.virt_timer.cval, CNTV_CVAL_EL0);
     WRITE_SYSREG32(v->arch.virt_timer.ctl, CNTV_CTL_EL0);
+
+    gic_restore_hwppi(v, v->arch.virt_timer.irq,
+                      &v->arch.virt_timer.ppi_state);
+
     return 0;
 }
 
diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
index 90ab9c3..dd50e2c 100644
--- a/xen/include/asm-arm/domain.h
+++ b/xen/include/asm-arm/domain.h
@@ -34,12 +34,23 @@ enum domain_type {
 extern int dom0_11_mapping;
 #define is_domain_direct_mapped(d) ((d) == hardware_domain && dom0_11_mapping)
 
+struct hwppi_state {
+    /* h/w state */
+    unsigned long enabled:1;
+    unsigned long pending:1;
+    unsigned long active:1;
+
+    /* Xen s/w state */
+    unsigned long inprogress:1;
+};
+
 struct vtimer {
     struct vcpu *v;
     int irq;
     struct timer timer;
     uint32_t ctl;
     uint64_t cval;
+    struct hwppi_state ppi_state;
 };
 
 struct arch_domain
diff --git a/xen/include/asm-arm/gic.h b/xen/include/asm-arm/gic.h
index 187dc46..aa8cbac 100644
--- a/xen/include/asm-arm/gic.h
+++ b/xen/include/asm-arm/gic.h
@@ -247,6 +247,16 @@ extern int gicv_setup(struct domain *d);
 extern void gic_save_state(struct vcpu *v);
 extern void gic_restore_state(struct vcpu *v);
 
+/*
+ * Save/restore the state of a single PPI which must be routed to
+ * <current-vcpu> (that is, defined to be injected to the current vcpu).
+ */
+struct hwppi_state;
+extern void gic_save_and_mask_hwppi(struct vcpu *v, unsigned virq,
+                                    struct hwppi_state *s);
+extern void gic_restore_hwppi(struct vcpu *v, unsigned virq,
+                              const struct hwppi_state *s);
+
 /* SGI (AKA IPIs) */
 enum gic_sgi {
     GIC_SGI_EVENT_CHECK = 0,
@@ -293,8 +303,12 @@ struct gic_hw_operations {
     const struct gic_info *info;
     /* Save GIC registers */
     void (*save_state)(struct vcpu *);
+    void (*save_and_mask_hwppi)(struct vcpu *v, const unsigned virq,
+                                struct hwppi_state *s);
     /* Restore GIC registers */
     void (*restore_state)(const struct vcpu *);
+    void (*restore_hwppi)(struct vcpu *v, const unsigned virq,
+                          const struct hwppi_state *s);
     /* Dump GIC LR register information */
     void (*dump_state)(const struct vcpu *);
     /* Map MMIO region of GIC */
diff --git a/xen/include/asm-arm/irq.h b/xen/include/asm-arm/irq.h
index 435dfcd..a08438e 100644
--- a/xen/include/asm-arm/irq.h
+++ b/xen/include/asm-arm/irq.h
@@ -42,6 +42,7 @@ void init_secondary_IRQ(void);
 
 int route_irq_to_guest(struct domain *d, unsigned int irq,
                        const char *devname);
+int route_irq_to_current_vcpu(unsigned int irq, const char *devname);
 void arch_move_irqs(struct vcpu *v);
 
 /* Set IRQ type for an SPI */
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH RFC] xen: arm: context switch vtimer PPI state.
  2015-01-26 15:55 [PATCH RFC] xen: arm: context switch vtimer PPI state Ian Campbell
@ 2015-01-27 13:16 ` Julien Grall
  2015-01-27 13:34   ` Ian Campbell
  2015-02-10  2:59 ` Ian Campbell
  2015-03-03 11:38 ` Stefano Stabellini
  2 siblings, 1 reply; 14+ messages in thread
From: Julien Grall @ 2015-01-27 13:16 UTC (permalink / raw)
  To: Ian Campbell, stefano.stabellini; +Cc: tim, xen-devel

Hi Ian,

On 26/01/15 15:55, Ian Campbell wrote:
> ... instead of artificially masking the timer interrupt in the timer
> state and relying on the guest to unmask (which it isn't required to
> do per the h/w spec, although Linux does)
> 
> To do this introduce the concept of routing a PPI to the currently
> running VCPU. For such interrupts irq_get_domain returns NULL.

> Then we make use of the GICD I[SC]ACTIVER registers to save and
> restore the active state of the interrupt, which prevents the nested
> invocations which the current masking works around.
> 
> For edge triggered interrupts we also need to context switch the
> pending bit via I[SC]PENDR. Note that for level triggered interrupts
> SPENDR sets a latch which is only cleared by ICPENDR (and not by h/w
> state changes), therefore we do not want to context switch the pending
> state for level PPIs -- instead we rely on the context switch of the
> peripheral to restore the correct level.
> 
> RFC Only:
>  - Not implemented for GIC v3 yet.
>  - Lightly tested with hackbench on systems with level and edge
>    triggered vtimer PPI.
>  - Is irq_get_domain == NULL to indicate route-to-current-vcpu the
>    best idea? Any alternatives?

I have introduced an irq_guest structure in my platform device
passthrough series (https://patches.linaro.org/43012/).

Maybe you could extend it to avoid things like irq_get_domain == NULL.

> Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
> ---
>  xen/arch/arm/gic-v2.c        |   84 ++++++++++++++++++++++++++++++++++++++++++
>  xen/arch/arm/gic.c           |   32 +++++++++++++---
>  xen/arch/arm/irq.c           |   48 ++++++++++++++++++++++--
>  xen/arch/arm/time.c          |   26 +------------
>  xen/arch/arm/vtimer.c        |   24 ++++++++++--
>  xen/include/asm-arm/domain.h |   11 ++++++
>  xen/include/asm-arm/gic.h    |   14 +++++++
>  xen/include/asm-arm/irq.h    |    1 +
>  8 files changed, 204 insertions(+), 36 deletions(-)
> 
> diff --git a/xen/arch/arm/gic-v2.c b/xen/arch/arm/gic-v2.c
> index 31fb81a..9074aca 100644
> --- a/xen/arch/arm/gic-v2.c
> +++ b/xen/arch/arm/gic-v2.c
> @@ -156,6 +156,45 @@ static void gicv2_save_state(struct vcpu *v)
>      writel_gich(0, GICH_HCR);
>  }
>  
> +static void gicv2_save_and_mask_hwppi(struct vcpu *v, const unsigned virq,
> +                                      struct hwppi_state *s)
> +{
> +    struct pending_irq *p = irq_to_pending(v, virq);
> +    const unsigned int offs = virq / 32;
> +    const unsigned int mask = (1u << (virq % 32));
> +    const unsigned int pendingr = readl_gicd(GICD_ISPENDR + offs*4);
> +    const unsigned int activer = readl_gicd(GICD_ISACTIVER + offs*4);
> +    const unsigned int enabler = readl_gicd(GICD_ISENABLER + offs*4);
> +    const bool is_edge = !!(p->desc->arch.type & DT_IRQ_TYPE_EDGE_BOTH);

"For each supported PPI, it is IMPLEMENTATION DEFINED whether software
can program the corresponding Int_config field."

So I would not rely on arch.type as it may not be in sync with the register.

It will be more problematic with the upcoming patch to let the guest
configure ICFGR0.

> +
> +    ASSERT(!is_idle_vcpu(v));
> +
> +    s->active = !!(activer & mask);
> +    s->enabled = !!(enabler & mask);
> +    s->pending = !!(pendingr & mask);
> +    s->inprogress = test_and_clear_bit(_IRQ_INPROGRESS, &p->desc->status);
> +
> +    /* Write a 1 to IC...R to clear the corresponding bit of state */
> +    if ( s->active )
> +        writel_gicd(mask, GICD_ICACTIVER + offs*4);
> +    /*
> +     * For an edge interrupt clear the pending state, for a level interrupt
> +     * this clears the latch there is no need since saving the peripheral state
> +     * (and/or restoring the next VCPU) will cause the correct action.
> +     */
> +    if ( is_edge && s->pending )
> +        writel_gicd(mask, GICD_ICPENDR + offs*4);
> +
> +    if ( s->enabled )
> +    {
> +        writel_gicd(mask, GICD_ICENABLER + offs*4);
> +        set_bit(_IRQ_DISABLED, &p->desc->status);

I would prefer if you use gicv2_irq_disable rather than open coding.

> +    }
> +
> +    ASSERT(!(readl_gicd(GICD_ISACTIVER + offs*4) & mask));
> +    ASSERT(!(readl_gicd(GICD_ISENABLER + offs*4) & mask));
> +}
> +
>  static void gicv2_restore_state(const struct vcpu *v)
>  {
>      int i;
> @@ -168,6 +207,49 @@ static void gicv2_restore_state(const struct vcpu *v)
>      writel_gich(GICH_HCR_EN, GICH_HCR);
>  }
>  
> +static void gicv2_restore_hwppi(struct vcpu *v, const unsigned virq,
> +                                const struct hwppi_state *s)
> +{
> +    struct pending_irq *p = irq_to_pending(v, virq);
> +    const unsigned int offs = virq / 32;
> +    const unsigned int mask = (1u << (virq % 32));
> +    struct irq_desc *desc = irq_to_desc(virq);
> +    const bool is_edge = !!(desc->arch.type & DT_IRQ_TYPE_EDGE_BOTH);
> +    struct pending_irq *pending = irq_to_pending(v, virq);
> +
> +    pending->desc = desc; /* Migrate to new physical processor */
> +
> +    /*
> +     * The IRQ must always have been set inactive and masked etc by
> +     * the saving of the previous state via save_and_mask_hwppi.
> +     */
> +    ASSERT(!(readl_gicd(GICD_ISACTIVER + offs*4) & mask));
> +    ASSERT(!(readl_gicd(GICD_ISENABLER + offs*4) & mask));
> +
> +    if ( s->active )
> +        writel_gicd(mask, GICD_ICACTIVER + offs*4);
> +
> +    /*
> +     * Restore pending state for edge triggered interrupts only. For
> +     * level triggered interrupts the level will be restored as
> +     * necessary by restoring the state of the relevant peripheral.
> +     *
> +     * For a level triggered interrupt ISPENDR acts as a *latch* which
> +     * is only cleared by ICPENDR (i.e. the input level is no longer
> +     * relevant). We certainly do not want that here.
> +     */
> +    if ( is_edge && s->pending )
> +        writel_gicd(mask, GICD_ISPENDR + offs*4);
> +    if ( s->enabled )
> +    {
> +        clear_bit(_IRQ_DISABLED, &p->desc->status);
> +        dsb(sy);
> +        writel_gicd(mask, GICD_ISENABLER + offs*4);
> +    }
> +    if ( s->inprogress )
> +        set_bit(_IRQ_INPROGRESS, &p->desc->status);
> +}
> +
>  static void gicv2_dump_state(const struct vcpu *v)
>  {
>      int i;
> @@ -660,7 +742,9 @@ const static struct gic_hw_operations gicv2_ops = {
>      .info                = &gicv2_info,
>      .secondary_init      = gicv2_secondary_cpu_init,
>      .save_state          = gicv2_save_state,
> +    .save_and_mask_hwppi = gicv2_save_and_mask_hwppi,
>      .restore_state       = gicv2_restore_state,
> +    .restore_hwppi       = gicv2_restore_hwppi,
>      .dump_state          = gicv2_dump_state,
>      .gicv_setup          = gicv2v_setup,
>      .gic_host_irq_type   = &gicv2_host_irq_type,
> diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
> index 7d4ee19..7ea980d 100644
> --- a/xen/arch/arm/gic.c
> +++ b/xen/arch/arm/gic.c
> @@ -81,6 +81,12 @@ void gic_save_state(struct vcpu *v)
>      isb();
>  }
>  
> +void gic_save_and_mask_hwppi(struct vcpu *v, unsigned virq,
> +                             struct hwppi_state *s)
> +{

ASSERT(virq >= 16 && virq < 32);

> +    gic_hw_ops->save_and_mask_hwppi(v, virq, s);
> +}
> +
>  void gic_restore_state(struct vcpu *v)
>  {
>      ASSERT(!local_irq_is_enabled());
> @@ -94,6 +100,12 @@ void gic_restore_state(struct vcpu *v)
>      gic_restore_pending_irqs(v);
>  }
>  
> +void gic_restore_hwppi(struct vcpu *v, const unsigned virq,
> +                       const struct hwppi_state *s)
> +{

Ditto

> +    gic_hw_ops->restore_hwppi(v, virq, s);
> +}
> +
>  /*
>   * needs to be called with a valid cpu_mask, ie each cpu in the mask has
>   * already called gic_cpu_init
> @@ -125,11 +137,15 @@ void gic_route_irq_to_xen(struct irq_desc *desc, const cpumask_t *cpu_mask,
>  
>  /* Program the GIC to route an interrupt to a guest
>   *   - desc.lock must be held
> + *   - d may be NULL, in which case interrupt *must* be a PPI and is routed to
> + *     the vcpu currently running when that PPI fires. In this case the code
> + *     responsible for the related hardware must save and restore the PPI with
> + *     gic_save_and_mask_hwppi/gic_restore_hwppi.
> + *   - if d is non-NULL then the interrupt *must* be an SPI.
>   */

the d == NULL sounds more an hackish way to specify this IRQ is routed
to any guest. I would prefer if you introduce a separate function and
duplicate the relevant bits.

>  void gic_route_irq_to_guest(struct domain *d, struct irq_desc *desc,
>                              const cpumask_t *cpu_mask, unsigned int priority)
>  {
> -    struct pending_irq *p;
>      ASSERT(spin_is_locked(&desc->lock));
>  
>      desc->handler = gic_hw_ops->gic_guest_irq_type;
> @@ -137,10 +153,16 @@ void gic_route_irq_to_guest(struct domain *d, struct irq_desc *desc,
>  
>      gic_set_irq_properties(desc, cpumask_of(smp_processor_id()), GIC_PRI_IRQ);
>  
> -    /* Use vcpu0 to retrieve the pending_irq struct. Given that we only
> -     * route SPIs to guests, it doesn't make any difference. */
> -    p = irq_to_pending(d->vcpu[0], desc->irq);
> -    p->desc = desc;
> +    if ( d )
> +    {
> +        struct pending_irq *p;
> +
> +        /* Use vcpu0 to retrieve the pending_irq struct. Given that we only
> +         * route SPIs to guests, it doesn't make any difference. */
> +        p = irq_to_pending(d->vcpu[0], desc->irq);
> +        p->desc = desc;
> +    }
> +    /* else p->desc init'd on ctxt restore in gic_restore_hwppi */
>  }
>  
>  int gic_irq_xlate(const u32 *intspec, unsigned int intsize,
> diff --git a/xen/arch/arm/irq.c b/xen/arch/arm/irq.c
> index ebdf19a..93c38ff 100644
> --- a/xen/arch/arm/irq.c
> +++ b/xen/arch/arm/irq.c
> @@ -202,6 +202,19 @@ void do_IRQ(struct cpu_user_regs *regs, unsigned int irq, int is_fiq)
>          goto out;
>      }
>  
> +    if ( irq == current->arch.virt_timer.irq )

You can't compare irq with current->arch.virt_timer.irq. The later is a
physical IRQ and the former a vIRQ.

For guest, the virtual timer IRQ may be different than the platform.

> +    {
> +        ASSERT(!irq_get_domain(desc));
> +
> +        desc->handler->end(desc);
> +
> +        set_bit(_IRQ_INPROGRESS, &desc->status);
> +        desc->arch.eoi_cpu = smp_processor_id();

This will become wrong when the vCPU will be migrated.

But it looks like nobody is using desc->arch.eoi_cpu. So we should drop it.

> +
> +        vgic_vcpu_inject_irq(current, irq);
> +        goto out_no_end;
> +    }
> +
>      if ( test_bit(_IRQ_GUEST, &desc->status) )
>      {
>          struct domain *d = irq_get_domain(desc);
> @@ -346,8 +359,12 @@ int setup_irq(unsigned int irq, unsigned int irqflags, struct irqaction *new)
>          struct domain *d = irq_get_domain(desc);
>  
>          spin_unlock_irqrestore(&desc->lock, flags);
> -        printk(XENLOG_ERR "ERROR: IRQ %u is already in use by the domain %u\n",
> -               irq, d->domain_id);
> +        if ( d )
> +            printk(XENLOG_ERR "ERROR: IRQ %u is already in use by domain %u\n",
> +                   irq, d->domain_id);
> +        else
> +            printk(XENLOG_ERR
> +                   "ERROR: IRQ %u is already in use by <current-vcpu>\n", irq);
>          return -EBUSY;
>      }
>  
> @@ -378,6 +395,10 @@ err:
>      return rc;
>  }
>  
> +/*
> + * If d == NULL then IRQ is routed to current vcpu at time it is received and
> + * must be a PPI.
> + */
>  int route_irq_to_guest(struct domain *d, unsigned int irq,
>                         const char * devname)
>  {
> @@ -386,6 +407,8 @@ int route_irq_to_guest(struct domain *d, unsigned int irq,
>      unsigned long flags;
>      int retval = 0;
>  
> +    ASSERT( d || ( irq >=16 && irq < 32 ) );
> +

Please no ASSERT in this function. This will be soon expose to the guest
(see my device passthrough series).

Regards,

-- 
Julien Grall

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH RFC] xen: arm: context switch vtimer PPI state.
  2015-01-27 13:16 ` Julien Grall
@ 2015-01-27 13:34   ` Ian Campbell
  2015-01-27 13:47     ` Julien Grall
  0 siblings, 1 reply; 14+ messages in thread
From: Ian Campbell @ 2015-01-27 13:34 UTC (permalink / raw)
  To: Julien Grall; +Cc: xen-devel, tim, stefano.stabellini

On Tue, 2015-01-27 at 13:16 +0000, Julien Grall wrote:
> Hi Ian,
> 
> On 26/01/15 15:55, Ian Campbell wrote:
> > ... instead of artificially masking the timer interrupt in the timer
> > state and relying on the guest to unmask (which it isn't required to
> > do per the h/w spec, although Linux does)
> > 
> > To do this introduce the concept of routing a PPI to the currently
> > running VCPU. For such interrupts irq_get_domain returns NULL.
> 
> > Then we make use of the GICD I[SC]ACTIVER registers to save and
> > restore the active state of the interrupt, which prevents the nested
> > invocations which the current masking works around.
> > 
> > For edge triggered interrupts we also need to context switch the
> > pending bit via I[SC]PENDR. Note that for level triggered interrupts
> > SPENDR sets a latch which is only cleared by ICPENDR (and not by h/w
> > state changes), therefore we do not want to context switch the pending
> > state for level PPIs -- instead we rely on the context switch of the
> > peripheral to restore the correct level.
> > 
> > RFC Only:
> >  - Not implemented for GIC v3 yet.
> >  - Lightly tested with hackbench on systems with level and edge
> >    triggered vtimer PPI.
> >  - Is irq_get_domain == NULL to indicate route-to-current-vcpu the
> >    best idea? Any alternatives?
> 
> I have introduced an irq_guest structure in my platform device
> passthrough series (https://patches.linaro.org/43012/).
> 
> Maybe you could extend it to avoid things like irq_get_domain == NULL.

I'll have a look.

> > diff --git a/xen/arch/arm/gic-v2.c b/xen/arch/arm/gic-v2.c
> > index 31fb81a..9074aca 100644
> > --- a/xen/arch/arm/gic-v2.c
> > +++ b/xen/arch/arm/gic-v2.c
> > @@ -156,6 +156,45 @@ static void gicv2_save_state(struct vcpu *v)
> >      writel_gich(0, GICH_HCR);
> >  }
> >  
> > +static void gicv2_save_and_mask_hwppi(struct vcpu *v, const unsigned virq,
> > +                                      struct hwppi_state *s)
> > +{
> > +    struct pending_irq *p = irq_to_pending(v, virq);
> > +    const unsigned int offs = virq / 32;
> > +    const unsigned int mask = (1u << (virq % 32));
> > +    const unsigned int pendingr = readl_gicd(GICD_ISPENDR + offs*4);
> > +    const unsigned int activer = readl_gicd(GICD_ISACTIVER + offs*4);
> > +    const unsigned int enabler = readl_gicd(GICD_ISENABLER + offs*4);
> > +    const bool is_edge = !!(p->desc->arch.type & DT_IRQ_TYPE_EDGE_BOTH);
> 
> "For each supported PPI, it is IMPLEMENTATION DEFINED whether software
> can program the corresponding Int_config field."
> 
> So I would not rely on arch.type as it may not be in sync with the register.
> 
> It will be more problematic with the upcoming patch to let the guest
> configure ICFGR0.

If anything is reprogramming ICFGR0 and not reflecting that in the
corresponding desc then it is buggy IMHO. Either arch.type should be
always valid or it should be removed.

For ease of implementation I think we should probably refuse to allow
any ppi used via this scheme to be reprogrammed, i.e. Xen should choose
at start of day (based on DTB, h/w capability) and just force it upon
the guest.

Context switching icfg and handling the subtleties of flipping mode just
sounds horrible.

> > +    if ( s->enabled )
> > +    {
> > +        writel_gicd(mask, GICD_ICENABLER + offs*4);
> > +        set_bit(_IRQ_DISABLED, &p->desc->status);
> 
> I would prefer if you use gicv2_irq_disable rather than open coding.

I'd like to hear from Stefano about whether this is the correct thing to
do in general before changing this. If context switching the status bits
is not required/wrong or there is some preferable way etc.

More generally the way pending_irq and the irq descriptor are related
after this patch needs some careful though IMHO.

> > @@ -125,11 +137,15 @@ void gic_route_irq_to_xen(struct irq_desc *desc, const cpumask_t *cpu_mask,
> >  
> >  /* Program the GIC to route an interrupt to a guest
> >   *   - desc.lock must be held
> > + *   - d may be NULL, in which case interrupt *must* be a PPI and is routed to
> > + *     the vcpu currently running when that PPI fires. In this case the code
> > + *     responsible for the related hardware must save and restore the PPI with
> > + *     gic_save_and_mask_hwppi/gic_restore_hwppi.
> > + *   - if d is non-NULL then the interrupt *must* be an SPI.
> >   */
> 
> the d == NULL sounds more an hackish way to specify this IRQ is routed
> to any guest. I would prefer if you introduce a separate function and
> duplicate the relevant bits.

That is route_irq_to_current_vcpu vs route_irq_to_guest which already
exist as the API the callers actually use.

> > +    ASSERT( d || ( irq >=16 && irq < 32 ) );
> > +
> 
> Please no ASSERT in this function. This will be soon expose to the guest
> (see my device passthrough series).

Then you should add range checking on the inputs at the hypercall level,
since irrespective of whether the ASSERT is there or that condition must
be true for things to function correctly.

Ian.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH RFC] xen: arm: context switch vtimer PPI state.
  2015-01-27 13:34   ` Ian Campbell
@ 2015-01-27 13:47     ` Julien Grall
  0 siblings, 0 replies; 14+ messages in thread
From: Julien Grall @ 2015-01-27 13:47 UTC (permalink / raw)
  To: Ian Campbell; +Cc: xen-devel, tim, stefano.stabellini

On 27/01/15 13:34, Ian Campbell wrote:
>>> diff --git a/xen/arch/arm/gic-v2.c b/xen/arch/arm/gic-v2.c
>>> index 31fb81a..9074aca 100644
>>> --- a/xen/arch/arm/gic-v2.c
>>> +++ b/xen/arch/arm/gic-v2.c
>>> @@ -156,6 +156,45 @@ static void gicv2_save_state(struct vcpu *v)
>>>      writel_gich(0, GICH_HCR);
>>>  }
>>>  
>>> +static void gicv2_save_and_mask_hwppi(struct vcpu *v, const unsigned virq,
>>> +                                      struct hwppi_state *s)
>>> +{
>>> +    struct pending_irq *p = irq_to_pending(v, virq);
>>> +    const unsigned int offs = virq / 32;
>>> +    const unsigned int mask = (1u << (virq % 32));
>>> +    const unsigned int pendingr = readl_gicd(GICD_ISPENDR + offs*4);
>>> +    const unsigned int activer = readl_gicd(GICD_ISACTIVER + offs*4);
>>> +    const unsigned int enabler = readl_gicd(GICD_ISENABLER + offs*4);
>>> +    const bool is_edge = !!(p->desc->arch.type & DT_IRQ_TYPE_EDGE_BOTH);
>>
>> "For each supported PPI, it is IMPLEMENTATION DEFINED whether software
>> can program the corresponding Int_config field."
>>
>> So I would not rely on arch.type as it may not be in sync with the register.
>>
>> It will be more problematic with the upcoming patch to let the guest
>> configure ICFGR0.
> 
> If anything is reprogramming ICFGR0 and not reflecting that in the
> corresponding desc then it is buggy IMHO. Either arch.type should be
> always valid or it should be removed.
> 
> For ease of implementation I think we should probably refuse to allow
> any ppi used via this scheme to be reprogrammed, i.e. Xen should choose
> at start of day (based on DTB, h/w capability) and just force it upon
> the guest.

So you will have to modify the guest (and even DOM0) device tree to
expose the correct type of the interrupt. For now we always expose as level.

> 
>>> +    if ( s->enabled )
>>> +    {
>>> +        writel_gicd(mask, GICD_ICENABLER + offs*4);
>>> +        set_bit(_IRQ_DISABLED, &p->desc->status);
>>
>> I would prefer if you use gicv2_irq_disable rather than open coding.
> 
> I'd like to hear from Stefano about whether this is the correct thing to
> do in general before changing this. If context switching the status bits
> is not required/wrong or there is some preferable way etc.

You have to context switch the enable bit. The guest may disable/enable
this IRQ.

> More generally the way pending_irq and the irq descriptor are related
> after this patch needs some careful though IMHO.
> 
>>> @@ -125,11 +137,15 @@ void gic_route_irq_to_xen(struct irq_desc *desc, const cpumask_t *cpu_mask,
>>>  
>>>  /* Program the GIC to route an interrupt to a guest
>>>   *   - desc.lock must be held
>>> + *   - d may be NULL, in which case interrupt *must* be a PPI and is routed to
>>> + *     the vcpu currently running when that PPI fires. In this case the code
>>> + *     responsible for the related hardware must save and restore the PPI with
>>> + *     gic_save_and_mask_hwppi/gic_restore_hwppi.
>>> + *   - if d is non-NULL then the interrupt *must* be an SPI.
>>>   */
>>
>> the d == NULL sounds more an hackish way to specify this IRQ is routed
>> to any guest. I would prefer if you introduce a separate function and
>> duplicate the relevant bits.
> 
> That is route_irq_to_current_vcpu vs route_irq_to_guest which already
> exist as the API the callers actually use.

And I should have wrote the same comment for route_irq_to_current.

>>> +    ASSERT( d || ( irq >=16 && irq < 32 ) );
>>> +
>>
>> Please no ASSERT in this function. This will be soon expose to the guest
>> (see my device passthrough series).
> 
> Then you should add range checking on the inputs at the hypercall level,
> since irrespective of whether the ASSERT is there or that condition must
> be true for things to function correctly.

No, because we also need those check when routing to DOM0. It's a safe
guard to prevent bad behavior. More check are coming soon.

We should not hack route_irq_to_guest for a corner case (i.e routing PPI
to the current vCPU). A separate function is better.

Regards,

-- 
Julien Grall

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH RFC] xen: arm: context switch vtimer PPI state.
  2015-01-26 15:55 [PATCH RFC] xen: arm: context switch vtimer PPI state Ian Campbell
  2015-01-27 13:16 ` Julien Grall
@ 2015-02-10  2:59 ` Ian Campbell
  2015-03-02 18:42   ` Stefano Stabellini
  2015-03-03 11:38 ` Stefano Stabellini
  2 siblings, 1 reply; 14+ messages in thread
From: Ian Campbell @ 2015-02-10  2:59 UTC (permalink / raw)
  To: stefano.stabellini; +Cc: julien.grall, tim, xen-devel

Stefano,

do you have any comments on the viability of the general approach here
before I go off and start addressing the review comments?

Cheers,
Ian.

On Mon, 2015-01-26 at 15:55 +0000, Ian Campbell wrote:
> ... instead of artificially masking the timer interrupt in the timer
> state and relying on the guest to unmask (which it isn't required to
> do per the h/w spec, although Linux does)
> 
> To do this introduce the concept of routing a PPI to the currently
> running VCPU. For such interrupts irq_get_domain returns NULL.
> 
> Then we make use of the GICD I[SC]ACTIVER registers to save and
> restore the active state of the interrupt, which prevents the nested
> invocations which the current masking works around.
> 
> For edge triggered interrupts we also need to context switch the
> pending bit via I[SC]PENDR. Note that for level triggered interrupts
> SPENDR sets a latch which is only cleared by ICPENDR (and not by h/w
> state changes), therefore we do not want to context switch the pending
> state for level PPIs -- instead we rely on the context switch of the
> peripheral to restore the correct level.
> 
> RFC Only:
>  - Not implemented for GIC v3 yet.
>  - Lightly tested with hackbench on systems with level and edge
>    triggered vtimer PPI.
>  - Is irq_get_domain == NULL to indicate route-to-current-vcpu the
>    best idea? Any alternatives?
> 
> Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
> ---
>  xen/arch/arm/gic-v2.c        |   84 ++++++++++++++++++++++++++++++++++++++++++
>  xen/arch/arm/gic.c           |   32 +++++++++++++---
>  xen/arch/arm/irq.c           |   48 ++++++++++++++++++++++--
>  xen/arch/arm/time.c          |   26 +------------
>  xen/arch/arm/vtimer.c        |   24 ++++++++++--
>  xen/include/asm-arm/domain.h |   11 ++++++
>  xen/include/asm-arm/gic.h    |   14 +++++++
>  xen/include/asm-arm/irq.h    |    1 +
>  8 files changed, 204 insertions(+), 36 deletions(-)
> 
> diff --git a/xen/arch/arm/gic-v2.c b/xen/arch/arm/gic-v2.c
> index 31fb81a..9074aca 100644
> --- a/xen/arch/arm/gic-v2.c
> +++ b/xen/arch/arm/gic-v2.c
> @@ -156,6 +156,45 @@ static void gicv2_save_state(struct vcpu *v)
>      writel_gich(0, GICH_HCR);
>  }
>  
> +static void gicv2_save_and_mask_hwppi(struct vcpu *v, const unsigned virq,
> +                                      struct hwppi_state *s)
> +{
> +    struct pending_irq *p = irq_to_pending(v, virq);
> +    const unsigned int offs = virq / 32;
> +    const unsigned int mask = (1u << (virq % 32));
> +    const unsigned int pendingr = readl_gicd(GICD_ISPENDR + offs*4);
> +    const unsigned int activer = readl_gicd(GICD_ISACTIVER + offs*4);
> +    const unsigned int enabler = readl_gicd(GICD_ISENABLER + offs*4);
> +    const bool is_edge = !!(p->desc->arch.type & DT_IRQ_TYPE_EDGE_BOTH);
> +
> +    ASSERT(!is_idle_vcpu(v));
> +
> +    s->active = !!(activer & mask);
> +    s->enabled = !!(enabler & mask);
> +    s->pending = !!(pendingr & mask);
> +    s->inprogress = test_and_clear_bit(_IRQ_INPROGRESS, &p->desc->status);
> +
> +    /* Write a 1 to IC...R to clear the corresponding bit of state */
> +    if ( s->active )
> +        writel_gicd(mask, GICD_ICACTIVER + offs*4);
> +    /*
> +     * For an edge interrupt clear the pending state, for a level interrupt
> +     * this clears the latch there is no need since saving the peripheral state
> +     * (and/or restoring the next VCPU) will cause the correct action.
> +     */
> +    if ( is_edge && s->pending )
> +        writel_gicd(mask, GICD_ICPENDR + offs*4);
> +
> +    if ( s->enabled )
> +    {
> +        writel_gicd(mask, GICD_ICENABLER + offs*4);
> +        set_bit(_IRQ_DISABLED, &p->desc->status);
> +    }
> +
> +    ASSERT(!(readl_gicd(GICD_ISACTIVER + offs*4) & mask));
> +    ASSERT(!(readl_gicd(GICD_ISENABLER + offs*4) & mask));
> +}
> +
>  static void gicv2_restore_state(const struct vcpu *v)
>  {
>      int i;
> @@ -168,6 +207,49 @@ static void gicv2_restore_state(const struct vcpu *v)
>      writel_gich(GICH_HCR_EN, GICH_HCR);
>  }
>  
> +static void gicv2_restore_hwppi(struct vcpu *v, const unsigned virq,
> +                                const struct hwppi_state *s)
> +{
> +    struct pending_irq *p = irq_to_pending(v, virq);
> +    const unsigned int offs = virq / 32;
> +    const unsigned int mask = (1u << (virq % 32));
> +    struct irq_desc *desc = irq_to_desc(virq);
> +    const bool is_edge = !!(desc->arch.type & DT_IRQ_TYPE_EDGE_BOTH);
> +    struct pending_irq *pending = irq_to_pending(v, virq);
> +
> +    pending->desc = desc; /* Migrate to new physical processor */
> +
> +    /*
> +     * The IRQ must always have been set inactive and masked etc by
> +     * the saving of the previous state via save_and_mask_hwppi.
> +     */
> +    ASSERT(!(readl_gicd(GICD_ISACTIVER + offs*4) & mask));
> +    ASSERT(!(readl_gicd(GICD_ISENABLER + offs*4) & mask));
> +
> +    if ( s->active )
> +        writel_gicd(mask, GICD_ICACTIVER + offs*4);
> +
> +    /*
> +     * Restore pending state for edge triggered interrupts only. For
> +     * level triggered interrupts the level will be restored as
> +     * necessary by restoring the state of the relevant peripheral.
> +     *
> +     * For a level triggered interrupt ISPENDR acts as a *latch* which
> +     * is only cleared by ICPENDR (i.e. the input level is no longer
> +     * relevant). We certainly do not want that here.
> +     */
> +    if ( is_edge && s->pending )
> +        writel_gicd(mask, GICD_ISPENDR + offs*4);
> +    if ( s->enabled )
> +    {
> +        clear_bit(_IRQ_DISABLED, &p->desc->status);
> +        dsb(sy);
> +        writel_gicd(mask, GICD_ISENABLER + offs*4);
> +    }
> +    if ( s->inprogress )
> +        set_bit(_IRQ_INPROGRESS, &p->desc->status);
> +}
> +
>  static void gicv2_dump_state(const struct vcpu *v)
>  {
>      int i;
> @@ -660,7 +742,9 @@ const static struct gic_hw_operations gicv2_ops = {
>      .info                = &gicv2_info,
>      .secondary_init      = gicv2_secondary_cpu_init,
>      .save_state          = gicv2_save_state,
> +    .save_and_mask_hwppi = gicv2_save_and_mask_hwppi,
>      .restore_state       = gicv2_restore_state,
> +    .restore_hwppi       = gicv2_restore_hwppi,
>      .dump_state          = gicv2_dump_state,
>      .gicv_setup          = gicv2v_setup,
>      .gic_host_irq_type   = &gicv2_host_irq_type,
> diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
> index 7d4ee19..7ea980d 100644
> --- a/xen/arch/arm/gic.c
> +++ b/xen/arch/arm/gic.c
> @@ -81,6 +81,12 @@ void gic_save_state(struct vcpu *v)
>      isb();
>  }
>  
> +void gic_save_and_mask_hwppi(struct vcpu *v, unsigned virq,
> +                             struct hwppi_state *s)
> +{
> +    gic_hw_ops->save_and_mask_hwppi(v, virq, s);
> +}
> +
>  void gic_restore_state(struct vcpu *v)
>  {
>      ASSERT(!local_irq_is_enabled());
> @@ -94,6 +100,12 @@ void gic_restore_state(struct vcpu *v)
>      gic_restore_pending_irqs(v);
>  }
>  
> +void gic_restore_hwppi(struct vcpu *v, const unsigned virq,
> +                       const struct hwppi_state *s)
> +{
> +    gic_hw_ops->restore_hwppi(v, virq, s);
> +}
> +
>  /*
>   * needs to be called with a valid cpu_mask, ie each cpu in the mask has
>   * already called gic_cpu_init
> @@ -125,11 +137,15 @@ void gic_route_irq_to_xen(struct irq_desc *desc, const cpumask_t *cpu_mask,
>  
>  /* Program the GIC to route an interrupt to a guest
>   *   - desc.lock must be held
> + *   - d may be NULL, in which case interrupt *must* be a PPI and is routed to
> + *     the vcpu currently running when that PPI fires. In this case the code
> + *     responsible for the related hardware must save and restore the PPI with
> + *     gic_save_and_mask_hwppi/gic_restore_hwppi.
> + *   - if d is non-NULL then the interrupt *must* be an SPI.
>   */
>  void gic_route_irq_to_guest(struct domain *d, struct irq_desc *desc,
>                              const cpumask_t *cpu_mask, unsigned int priority)
>  {
> -    struct pending_irq *p;
>      ASSERT(spin_is_locked(&desc->lock));
>  
>      desc->handler = gic_hw_ops->gic_guest_irq_type;
> @@ -137,10 +153,16 @@ void gic_route_irq_to_guest(struct domain *d, struct irq_desc *desc,
>  
>      gic_set_irq_properties(desc, cpumask_of(smp_processor_id()), GIC_PRI_IRQ);
>  
> -    /* Use vcpu0 to retrieve the pending_irq struct. Given that we only
> -     * route SPIs to guests, it doesn't make any difference. */
> -    p = irq_to_pending(d->vcpu[0], desc->irq);
> -    p->desc = desc;
> +    if ( d )
> +    {
> +        struct pending_irq *p;
> +
> +        /* Use vcpu0 to retrieve the pending_irq struct. Given that we only
> +         * route SPIs to guests, it doesn't make any difference. */
> +        p = irq_to_pending(d->vcpu[0], desc->irq);
> +        p->desc = desc;
> +    }
> +    /* else p->desc init'd on ctxt restore in gic_restore_hwppi */
>  }
>  
>  int gic_irq_xlate(const u32 *intspec, unsigned int intsize,
> diff --git a/xen/arch/arm/irq.c b/xen/arch/arm/irq.c
> index ebdf19a..93c38ff 100644
> --- a/xen/arch/arm/irq.c
> +++ b/xen/arch/arm/irq.c
> @@ -202,6 +202,19 @@ void do_IRQ(struct cpu_user_regs *regs, unsigned int irq, int is_fiq)
>          goto out;
>      }
>  
> +    if ( irq == current->arch.virt_timer.irq )
> +    {
> +        ASSERT(!irq_get_domain(desc));
> +
> +        desc->handler->end(desc);
> +
> +        set_bit(_IRQ_INPROGRESS, &desc->status);
> +        desc->arch.eoi_cpu = smp_processor_id();
> +
> +        vgic_vcpu_inject_irq(current, irq);
> +        goto out_no_end;
> +    }
> +
>      if ( test_bit(_IRQ_GUEST, &desc->status) )
>      {
>          struct domain *d = irq_get_domain(desc);
> @@ -346,8 +359,12 @@ int setup_irq(unsigned int irq, unsigned int irqflags, struct irqaction *new)
>          struct domain *d = irq_get_domain(desc);
>  
>          spin_unlock_irqrestore(&desc->lock, flags);
> -        printk(XENLOG_ERR "ERROR: IRQ %u is already in use by the domain %u\n",
> -               irq, d->domain_id);
> +        if ( d )
> +            printk(XENLOG_ERR "ERROR: IRQ %u is already in use by domain %u\n",
> +                   irq, d->domain_id);
> +        else
> +            printk(XENLOG_ERR
> +                   "ERROR: IRQ %u is already in use by <current-vcpu>\n", irq);
>          return -EBUSY;
>      }
>  
> @@ -378,6 +395,10 @@ err:
>      return rc;
>  }
>  
> +/*
> + * If d == NULL then IRQ is routed to current vcpu at time it is received and
> + * must be a PPI.
> + */
>  int route_irq_to_guest(struct domain *d, unsigned int irq,
>                         const char * devname)
>  {
> @@ -386,6 +407,8 @@ int route_irq_to_guest(struct domain *d, unsigned int irq,
>      unsigned long flags;
>      int retval = 0;
>  
> +    ASSERT( d || ( irq >=16 && irq < 32 ) );
> +
>      action = xmalloc(struct irqaction);
>      if (!action)
>          return -ENOMEM;
> @@ -406,11 +429,23 @@ int route_irq_to_guest(struct domain *d, unsigned int irq,
>          struct domain *ad = irq_get_domain(desc);
>  
>          if ( test_bit(_IRQ_GUEST, &desc->status) && d == ad )
> +        {
> +            printk(XENLOG_ERR
> +                   "ERROR: IRQ %u is already routed to domain %p\n",
> +                   irq, ad);
>              goto out;
> +        }
>  
>          if ( test_bit(_IRQ_GUEST, &desc->status) )
> -            printk(XENLOG_ERR "ERROR: IRQ %u is already used by domain %u\n",
> -                   irq, ad->domain_id);
> +        {
> +            if ( ad )
> +                printk(XENLOG_ERR
> +                       "ERROR: IRQ %u is already used by domain %u\n",
> +                       irq, ad->domain_id);
> +            else
> +                printk(XENLOG_ERR
> +                       "ERROR: IRQ %u is already used by <current-vcpu>\n", irq);
> +        }
>          else
>              printk(XENLOG_ERR "ERROR: IRQ %u is already used by Xen\n", irq);
>          retval = -EBUSY;
> @@ -433,6 +468,11 @@ out:
>      return retval;
>  }
>  
> +int route_irq_to_current_vcpu(unsigned int irq, const char *devname)
> +{
> +    return route_irq_to_guest(NULL, irq, devname);
> +}
> +
>  /*
>   * pirq event channels. We don't use these on ARM, instead we use the
>   * features of the GIC to inject virtualised normal interrupts.
> diff --git a/xen/arch/arm/time.c b/xen/arch/arm/time.c
> index 455f217..ffa5eef 100644
> --- a/xen/arch/arm/time.c
> +++ b/xen/arch/arm/time.c
> @@ -169,28 +169,6 @@ static void timer_interrupt(int irq, void *dev_id, struct cpu_user_regs *regs)
>      }
>  }
>  
> -static void vtimer_interrupt(int irq, void *dev_id, struct cpu_user_regs *regs)
> -{
> -    /*
> -     * Edge-triggered interrupts can be used for the virtual timer. Even
> -     * if the timer output signal is masked in the context switch, the
> -     * GIC will keep track that of any interrupts raised while IRQS are
> -     * disabled. As soon as IRQs are re-enabled, the virtual interrupt
> -     * will be injected to Xen.
> -     *
> -     * If an IDLE vCPU was scheduled next then we should ignore the
> -     * interrupt.
> -     */
> -    if ( unlikely(is_idle_vcpu(current)) )
> -        return;
> -
> -    perfc_incr(virt_timer_irqs);
> -
> -    current->arch.virt_timer.ctl = READ_SYSREG32(CNTV_CTL_EL0);
> -    WRITE_SYSREG32(current->arch.virt_timer.ctl | CNTx_CTL_MASK, CNTV_CTL_EL0);
> -    vgic_vcpu_inject_irq(current, current->arch.virt_timer.irq);
> -}
> -
>  /* Set up the timer interrupt on this CPU */
>  void __cpuinit init_timer_interrupt(void)
>  {
> @@ -204,8 +182,8 @@ void __cpuinit init_timer_interrupt(void)
>  
>      request_irq(timer_irq[TIMER_HYP_PPI], 0, timer_interrupt,
>                  "hyptimer", NULL);
> -    request_irq(timer_irq[TIMER_VIRT_PPI], 0, vtimer_interrupt,
> -                   "virtimer", NULL);
> +    route_irq_to_current_vcpu(timer_irq[TIMER_VIRT_PPI], "virtimer");
> +
>      request_irq(timer_irq[TIMER_PHYS_NONSECURE_PPI], 0, timer_interrupt,
>                  "phytimer", NULL);
>  }
> diff --git a/xen/arch/arm/vtimer.c b/xen/arch/arm/vtimer.c
> index 848e2c6..d1f21a1 100644
> --- a/xen/arch/arm/vtimer.c
> +++ b/xen/arch/arm/vtimer.c
> @@ -47,9 +47,14 @@ static void phys_timer_expired(void *data)
>  static void virt_timer_expired(void *data)
>  {
>      struct vtimer *t = data;
> -    t->ctl |= CNTx_CTL_MASK;
> -    vgic_vcpu_inject_irq(t->v, t->irq);
> -    perfc_incr(vtimer_virt_inject);
> +    t->ctl |= CNTx_CTL_PENDING;
> +    if ( !(t->ctl & CNTx_CTL_MASK) )
> +    {
> +        /* An edge triggered interrupt should now be pending. */
> +        t->ppi_state.pending = true;
> +        vcpu_unblock(t->v);
> +        perfc_incr(vtimer_virt_inject);
> +    }
>  }
>  
>  int domain_vtimer_init(struct domain *d)
> @@ -112,6 +117,15 @@ int virt_timer_save(struct vcpu *v)
>          set_timer(&v->arch.virt_timer.timer, ticks_to_ns(v->arch.virt_timer.cval +
>                    v->domain->arch.virt_timer_base.offset - boot_count));
>      }
> +
> +    /*
> +     * Since the vtimer irq is a PPI we don't need to worry about
> +     * racing against it becoming active while we are saving the
> +     * state, since that requires the guest to be reading the IAR.
> +     */
> +    gic_save_and_mask_hwppi(v, v->arch.virt_timer.irq,
> +                            &v->arch.virt_timer.ppi_state);
> +
>      return 0;
>  }
>  
> @@ -126,6 +140,10 @@ int virt_timer_restore(struct vcpu *v)
>      WRITE_SYSREG64(v->domain->arch.virt_timer_base.offset, CNTVOFF_EL2);
>      WRITE_SYSREG64(v->arch.virt_timer.cval, CNTV_CVAL_EL0);
>      WRITE_SYSREG32(v->arch.virt_timer.ctl, CNTV_CTL_EL0);
> +
> +    gic_restore_hwppi(v, v->arch.virt_timer.irq,
> +                      &v->arch.virt_timer.ppi_state);
> +
>      return 0;
>  }
>  
> diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
> index 90ab9c3..dd50e2c 100644
> --- a/xen/include/asm-arm/domain.h
> +++ b/xen/include/asm-arm/domain.h
> @@ -34,12 +34,23 @@ enum domain_type {
>  extern int dom0_11_mapping;
>  #define is_domain_direct_mapped(d) ((d) == hardware_domain && dom0_11_mapping)
>  
> +struct hwppi_state {
> +    /* h/w state */
> +    unsigned long enabled:1;
> +    unsigned long pending:1;
> +    unsigned long active:1;
> +
> +    /* Xen s/w state */
> +    unsigned long inprogress:1;
> +};
> +
>  struct vtimer {
>      struct vcpu *v;
>      int irq;
>      struct timer timer;
>      uint32_t ctl;
>      uint64_t cval;
> +    struct hwppi_state ppi_state;
>  };
>  
>  struct arch_domain
> diff --git a/xen/include/asm-arm/gic.h b/xen/include/asm-arm/gic.h
> index 187dc46..aa8cbac 100644
> --- a/xen/include/asm-arm/gic.h
> +++ b/xen/include/asm-arm/gic.h
> @@ -247,6 +247,16 @@ extern int gicv_setup(struct domain *d);
>  extern void gic_save_state(struct vcpu *v);
>  extern void gic_restore_state(struct vcpu *v);
>  
> +/*
> + * Save/restore the state of a single PPI which must be routed to
> + * <current-vcpu> (that is, defined to be injected to the current vcpu).
> + */
> +struct hwppi_state;
> +extern void gic_save_and_mask_hwppi(struct vcpu *v, unsigned virq,
> +                                    struct hwppi_state *s);
> +extern void gic_restore_hwppi(struct vcpu *v, unsigned virq,
> +                              const struct hwppi_state *s);
> +
>  /* SGI (AKA IPIs) */
>  enum gic_sgi {
>      GIC_SGI_EVENT_CHECK = 0,
> @@ -293,8 +303,12 @@ struct gic_hw_operations {
>      const struct gic_info *info;
>      /* Save GIC registers */
>      void (*save_state)(struct vcpu *);
> +    void (*save_and_mask_hwppi)(struct vcpu *v, const unsigned virq,
> +                                struct hwppi_state *s);
>      /* Restore GIC registers */
>      void (*restore_state)(const struct vcpu *);
> +    void (*restore_hwppi)(struct vcpu *v, const unsigned virq,
> +                          const struct hwppi_state *s);
>      /* Dump GIC LR register information */
>      void (*dump_state)(const struct vcpu *);
>      /* Map MMIO region of GIC */
> diff --git a/xen/include/asm-arm/irq.h b/xen/include/asm-arm/irq.h
> index 435dfcd..a08438e 100644
> --- a/xen/include/asm-arm/irq.h
> +++ b/xen/include/asm-arm/irq.h
> @@ -42,6 +42,7 @@ void init_secondary_IRQ(void);
>  
>  int route_irq_to_guest(struct domain *d, unsigned int irq,
>                         const char *devname);
> +int route_irq_to_current_vcpu(unsigned int irq, const char *devname);
>  void arch_move_irqs(struct vcpu *v);
>  
>  /* Set IRQ type for an SPI */

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH RFC] xen: arm: context switch vtimer PPI state.
  2015-02-10  2:59 ` Ian Campbell
@ 2015-03-02 18:42   ` Stefano Stabellini
  2015-03-03 11:56     ` Ian Campbell
  0 siblings, 1 reply; 14+ messages in thread
From: Stefano Stabellini @ 2015-03-02 18:42 UTC (permalink / raw)
  To: Ian Campbell; +Cc: xen-devel, julien.grall, tim, stefano.stabellini

On Tue, 10 Feb 2015, Ian Campbell wrote:
> Stefano,
> 
> do you have any comments on the viability of the general approach here
> before I go off and start addressing the review comments?

I think that the general approach is OK


> Cheers,
> Ian.
> 
> On Mon, 2015-01-26 at 15:55 +0000, Ian Campbell wrote:
> > ... instead of artificially masking the timer interrupt in the timer
> > state and relying on the guest to unmask (which it isn't required to
> > do per the h/w spec, although Linux does)
> > 
> > To do this introduce the concept of routing a PPI to the currently
> > running VCPU. For such interrupts irq_get_domain returns NULL.
> > 
> > Then we make use of the GICD I[SC]ACTIVER registers to save and
> > restore the active state of the interrupt, which prevents the nested
> > invocations which the current masking works around.
> > 
> > For edge triggered interrupts we also need to context switch the
> > pending bit via I[SC]PENDR. Note that for level triggered interrupts
> > SPENDR sets a latch which is only cleared by ICPENDR (and not by h/w
> > state changes), therefore we do not want to context switch the pending
> > state for level PPIs -- instead we rely on the context switch of the
> > peripheral to restore the correct level.
> > 
> > RFC Only:
> >  - Not implemented for GIC v3 yet.
> >  - Lightly tested with hackbench on systems with level and edge
> >    triggered vtimer PPI.
> >  - Is irq_get_domain == NULL to indicate route-to-current-vcpu the
> >    best idea? Any alternatives?
> > 
> > Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
> > ---
> >  xen/arch/arm/gic-v2.c        |   84 ++++++++++++++++++++++++++++++++++++++++++
> >  xen/arch/arm/gic.c           |   32 +++++++++++++---
> >  xen/arch/arm/irq.c           |   48 ++++++++++++++++++++++--
> >  xen/arch/arm/time.c          |   26 +------------
> >  xen/arch/arm/vtimer.c        |   24 ++++++++++--
> >  xen/include/asm-arm/domain.h |   11 ++++++
> >  xen/include/asm-arm/gic.h    |   14 +++++++
> >  xen/include/asm-arm/irq.h    |    1 +
> >  8 files changed, 204 insertions(+), 36 deletions(-)
> > 
> > diff --git a/xen/arch/arm/gic-v2.c b/xen/arch/arm/gic-v2.c
> > index 31fb81a..9074aca 100644
> > --- a/xen/arch/arm/gic-v2.c
> > +++ b/xen/arch/arm/gic-v2.c
> > @@ -156,6 +156,45 @@ static void gicv2_save_state(struct vcpu *v)
> >      writel_gich(0, GICH_HCR);
> >  }
> >  
> > +static void gicv2_save_and_mask_hwppi(struct vcpu *v, const unsigned virq,

Shouldn't the argument be a physical irq? Maybe irq_desc?


> > +                                      struct hwppi_state *s)
> > +{
> > +    struct pending_irq *p = irq_to_pending(v, virq);
> > +    const unsigned int offs = virq / 32;
> > +    const unsigned int mask = (1u << (virq % 32));
> > +    const unsigned int pendingr = readl_gicd(GICD_ISPENDR + offs*4);
> > +    const unsigned int activer = readl_gicd(GICD_ISACTIVER + offs*4);
> > +    const unsigned int enabler = readl_gicd(GICD_ISENABLER + offs*4);
> > +    const bool is_edge = !!(p->desc->arch.type & DT_IRQ_TYPE_EDGE_BOTH);
> > +
> > +    ASSERT(!is_idle_vcpu(v));
> > +
> > +    s->active = !!(activer & mask);
> > +    s->enabled = !!(enabler & mask);
> > +    s->pending = !!(pendingr & mask);
> > +    s->inprogress = test_and_clear_bit(_IRQ_INPROGRESS, &p->desc->status);
> > +
> > +    /* Write a 1 to IC...R to clear the corresponding bit of state */
> > +    if ( s->active )
> > +        writel_gicd(mask, GICD_ICACTIVER + offs*4);
> > +    /*
> > +     * For an edge interrupt clear the pending state, for a level interrupt
> > +     * this clears the latch there is no need since saving the peripheral state
> > +     * (and/or restoring the next VCPU) will cause the correct action.
> > +     */
> > +    if ( is_edge && s->pending )
> > +        writel_gicd(mask, GICD_ICPENDR + offs*4);
> > +
> > +    if ( s->enabled )
> > +    {
> > +        writel_gicd(mask, GICD_ICENABLER + offs*4);
> > +        set_bit(_IRQ_DISABLED, &p->desc->status);
> > +    }
> > +
> > +    ASSERT(!(readl_gicd(GICD_ISACTIVER + offs*4) & mask));
> > +    ASSERT(!(readl_gicd(GICD_ISENABLER + offs*4) & mask));
> > +}
> > +
> >  static void gicv2_restore_state(const struct vcpu *v)
> >  {
> >      int i;
> > @@ -168,6 +207,49 @@ static void gicv2_restore_state(const struct vcpu *v)
> >      writel_gich(GICH_HCR_EN, GICH_HCR);
> >  }
> >  
> > +static void gicv2_restore_hwppi(struct vcpu *v, const unsigned virq,

same here


> > +                                const struct hwppi_state *s)
> > +{
> > +    struct pending_irq *p = irq_to_pending(v, virq);
> > +    const unsigned int offs = virq / 32;
> > +    const unsigned int mask = (1u << (virq % 32));
> > +    struct irq_desc *desc = irq_to_desc(virq);
> > +    const bool is_edge = !!(desc->arch.type & DT_IRQ_TYPE_EDGE_BOTH);
> > +    struct pending_irq *pending = irq_to_pending(v, virq);
> > +
> > +    pending->desc = desc; /* Migrate to new physical processor */
> > +
> > +    /*
> > +     * The IRQ must always have been set inactive and masked etc by
> > +     * the saving of the previous state via save_and_mask_hwppi.
> > +     */
> > +    ASSERT(!(readl_gicd(GICD_ISACTIVER + offs*4) & mask));
> > +    ASSERT(!(readl_gicd(GICD_ISENABLER + offs*4) & mask));
> > +
> > +    if ( s->active )
> > +        writel_gicd(mask, GICD_ICACTIVER + offs*4);
> > +
> > +    /*
> > +     * Restore pending state for edge triggered interrupts only. For
> > +     * level triggered interrupts the level will be restored as
> > +     * necessary by restoring the state of the relevant peripheral.
> > +     *
> > +     * For a level triggered interrupt ISPENDR acts as a *latch* which
> > +     * is only cleared by ICPENDR (i.e. the input level is no longer
> > +     * relevant). We certainly do not want that here.
> > +     */
> > +    if ( is_edge && s->pending )
> > +        writel_gicd(mask, GICD_ISPENDR + offs*4);
> > +    if ( s->enabled )
> > +    {
> > +        clear_bit(_IRQ_DISABLED, &p->desc->status);
> > +        dsb(sy);
> > +        writel_gicd(mask, GICD_ISENABLER + offs*4);
> > +    }
> > +    if ( s->inprogress )
> > +        set_bit(_IRQ_INPROGRESS, &p->desc->status);
> > +}
> > +
> >  static void gicv2_dump_state(const struct vcpu *v)
> >  {
> >      int i;
> > @@ -660,7 +742,9 @@ const static struct gic_hw_operations gicv2_ops = {
> >      .info                = &gicv2_info,
> >      .secondary_init      = gicv2_secondary_cpu_init,
> >      .save_state          = gicv2_save_state,
> > +    .save_and_mask_hwppi = gicv2_save_and_mask_hwppi,
> >      .restore_state       = gicv2_restore_state,
> > +    .restore_hwppi       = gicv2_restore_hwppi,
> >      .dump_state          = gicv2_dump_state,
> >      .gicv_setup          = gicv2v_setup,
> >      .gic_host_irq_type   = &gicv2_host_irq_type,
> > diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
> > index 7d4ee19..7ea980d 100644
> > --- a/xen/arch/arm/gic.c
> > +++ b/xen/arch/arm/gic.c
> > @@ -81,6 +81,12 @@ void gic_save_state(struct vcpu *v)
> >      isb();
> >  }
> >  
> > +void gic_save_and_mask_hwppi(struct vcpu *v, unsigned virq,
> > +                             struct hwppi_state *s)
> > +{
> > +    gic_hw_ops->save_and_mask_hwppi(v, virq, s);
> > +}
> > +
> >  void gic_restore_state(struct vcpu *v)
> >  {
> >      ASSERT(!local_irq_is_enabled());
> > @@ -94,6 +100,12 @@ void gic_restore_state(struct vcpu *v)
> >      gic_restore_pending_irqs(v);
> >  }
> >  
> > +void gic_restore_hwppi(struct vcpu *v, const unsigned virq,
> > +                       const struct hwppi_state *s)
> > +{
> > +    gic_hw_ops->restore_hwppi(v, virq, s);
> > +}
> > +
> >  /*
> >   * needs to be called with a valid cpu_mask, ie each cpu in the mask has
> >   * already called gic_cpu_init
> > @@ -125,11 +137,15 @@ void gic_route_irq_to_xen(struct irq_desc *desc, const cpumask_t *cpu_mask,
> >  
> >  /* Program the GIC to route an interrupt to a guest
> >   *   - desc.lock must be held
> > + *   - d may be NULL, in which case interrupt *must* be a PPI and is routed to
> > + *     the vcpu currently running when that PPI fires. In this case the code
> > + *     responsible for the related hardware must save and restore the PPI with
> > + *     gic_save_and_mask_hwppi/gic_restore_hwppi.
> > + *   - if d is non-NULL then the interrupt *must* be an SPI.
> >   */
> >  void gic_route_irq_to_guest(struct domain *d, struct irq_desc *desc,
> >                              const cpumask_t *cpu_mask, unsigned int priority)
> >  {
> > -    struct pending_irq *p;
> >      ASSERT(spin_is_locked(&desc->lock));
> >  
> >      desc->handler = gic_hw_ops->gic_guest_irq_type;
> > @@ -137,10 +153,16 @@ void gic_route_irq_to_guest(struct domain *d, struct irq_desc *desc,
> >  
> >      gic_set_irq_properties(desc, cpumask_of(smp_processor_id()), GIC_PRI_IRQ);
> >  
> > -    /* Use vcpu0 to retrieve the pending_irq struct. Given that we only
> > -     * route SPIs to guests, it doesn't make any difference. */
> > -    p = irq_to_pending(d->vcpu[0], desc->irq);
> > -    p->desc = desc;
> > +    if ( d )
> > +    {
> > +        struct pending_irq *p;
> > +
> > +        /* Use vcpu0 to retrieve the pending_irq struct. Given that we only
> > +         * route SPIs to guests, it doesn't make any difference. */
> > +        p = irq_to_pending(d->vcpu[0], desc->irq);
> > +        p->desc = desc;
> > +    }
> > +    /* else p->desc init'd on ctxt restore in gic_restore_hwppi */
> >  }
> >  
> >  int gic_irq_xlate(const u32 *intspec, unsigned int intsize,
> > diff --git a/xen/arch/arm/irq.c b/xen/arch/arm/irq.c
> > index ebdf19a..93c38ff 100644
> > --- a/xen/arch/arm/irq.c
> > +++ b/xen/arch/arm/irq.c
> > @@ -202,6 +202,19 @@ void do_IRQ(struct cpu_user_regs *regs, unsigned int irq, int is_fiq)
> >          goto out;
> >      }
> >  
> > +    if ( irq == current->arch.virt_timer.irq )
> > +    {
> > +        ASSERT(!irq_get_domain(desc));
> > +
> > +        desc->handler->end(desc);
> > +
> > +        set_bit(_IRQ_INPROGRESS, &desc->status);
> > +        desc->arch.eoi_cpu = smp_processor_id();
> > +
> > +        vgic_vcpu_inject_irq(current, irq);
> > +        goto out_no_end;
> > +    }

I don't think we should special case virt_timer.irq here. I would try to
reuse the normal _IRQ_GUEST path or make this if generic for any PPIs
routed to guests.


> >      if ( test_bit(_IRQ_GUEST, &desc->status) )
> >      {
> >          struct domain *d = irq_get_domain(desc);
> > @@ -346,8 +359,12 @@ int setup_irq(unsigned int irq, unsigned int irqflags, struct irqaction *new)
> >          struct domain *d = irq_get_domain(desc);
> >  
> >          spin_unlock_irqrestore(&desc->lock, flags);
> > -        printk(XENLOG_ERR "ERROR: IRQ %u is already in use by the domain %u\n",
> > -               irq, d->domain_id);
> > +        if ( d )
> > +            printk(XENLOG_ERR "ERROR: IRQ %u is already in use by domain %u\n",
> > +                   irq, d->domain_id);
> > +        else
> > +            printk(XENLOG_ERR
> > +                   "ERROR: IRQ %u is already in use by <current-vcpu>\n", irq);
> >          return -EBUSY;
> >      }
> >  
> > @@ -378,6 +395,10 @@ err:
> >      return rc;
> >  }
> >  
> > +/*
> > + * If d == NULL then IRQ is routed to current vcpu at time it is received and
> > + * must be a PPI.
> > + */
> >  int route_irq_to_guest(struct domain *d, unsigned int irq,
> >                         const char * devname)
> >  {
> > @@ -386,6 +407,8 @@ int route_irq_to_guest(struct domain *d, unsigned int irq,
> >      unsigned long flags;
> >      int retval = 0;
> >  
> > +    ASSERT( d || ( irq >=16 && irq < 32 ) );
> > +
> >      action = xmalloc(struct irqaction);
> >      if (!action)
> >          return -ENOMEM;
> > @@ -406,11 +429,23 @@ int route_irq_to_guest(struct domain *d, unsigned int irq,
> >          struct domain *ad = irq_get_domain(desc);
> >  
> >          if ( test_bit(_IRQ_GUEST, &desc->status) && d == ad )
> > +        {
> > +            printk(XENLOG_ERR
> > +                   "ERROR: IRQ %u is already routed to domain %p\n",
> > +                   irq, ad);
> >              goto out;
> > +        }
> >  
> >          if ( test_bit(_IRQ_GUEST, &desc->status) )
> > -            printk(XENLOG_ERR "ERROR: IRQ %u is already used by domain %u\n",
> > -                   irq, ad->domain_id);
> > +        {
> > +            if ( ad )
> > +                printk(XENLOG_ERR
> > +                       "ERROR: IRQ %u is already used by domain %u\n",
> > +                       irq, ad->domain_id);
> > +            else
> > +                printk(XENLOG_ERR
> > +                       "ERROR: IRQ %u is already used by <current-vcpu>\n", irq);
> > +        }
> >          else
> >              printk(XENLOG_ERR "ERROR: IRQ %u is already used by Xen\n", irq);
> >          retval = -EBUSY;
> > @@ -433,6 +468,11 @@ out:
> >      return retval;
> >  }
> >  
> > +int route_irq_to_current_vcpu(unsigned int irq, const char *devname)
> > +{
> > +    return route_irq_to_guest(NULL, irq, devname);
> > +}
> > +
> >  /*
> >   * pirq event channels. We don't use these on ARM, instead we use the
> >   * features of the GIC to inject virtualised normal interrupts.
> > diff --git a/xen/arch/arm/time.c b/xen/arch/arm/time.c
> > index 455f217..ffa5eef 100644
> > --- a/xen/arch/arm/time.c
> > +++ b/xen/arch/arm/time.c
> > @@ -169,28 +169,6 @@ static void timer_interrupt(int irq, void *dev_id, struct cpu_user_regs *regs)
> >      }
> >  }
> >  
> > -static void vtimer_interrupt(int irq, void *dev_id, struct cpu_user_regs *regs)
> > -{
> > -    /*
> > -     * Edge-triggered interrupts can be used for the virtual timer. Even
> > -     * if the timer output signal is masked in the context switch, the
> > -     * GIC will keep track that of any interrupts raised while IRQS are
> > -     * disabled. As soon as IRQs are re-enabled, the virtual interrupt
> > -     * will be injected to Xen.
> > -     *
> > -     * If an IDLE vCPU was scheduled next then we should ignore the
> > -     * interrupt.
> > -     */
> > -    if ( unlikely(is_idle_vcpu(current)) )
> > -        return;
> > -
> > -    perfc_incr(virt_timer_irqs);
> > -
> > -    current->arch.virt_timer.ctl = READ_SYSREG32(CNTV_CTL_EL0);
> > -    WRITE_SYSREG32(current->arch.virt_timer.ctl | CNTx_CTL_MASK, CNTV_CTL_EL0);
> > -    vgic_vcpu_inject_irq(current, current->arch.virt_timer.irq);
> > -}
> > -
> >  /* Set up the timer interrupt on this CPU */
> >  void __cpuinit init_timer_interrupt(void)
> >  {
> > @@ -204,8 +182,8 @@ void __cpuinit init_timer_interrupt(void)
> >  
> >      request_irq(timer_irq[TIMER_HYP_PPI], 0, timer_interrupt,
> >                  "hyptimer", NULL);
> > -    request_irq(timer_irq[TIMER_VIRT_PPI], 0, vtimer_interrupt,
> > -                   "virtimer", NULL);
> > +    route_irq_to_current_vcpu(timer_irq[TIMER_VIRT_PPI], "virtimer");
> > +
> >      request_irq(timer_irq[TIMER_PHYS_NONSECURE_PPI], 0, timer_interrupt,
> >                  "phytimer", NULL);
> >  }
> > diff --git a/xen/arch/arm/vtimer.c b/xen/arch/arm/vtimer.c
> > index 848e2c6..d1f21a1 100644
> > --- a/xen/arch/arm/vtimer.c
> > +++ b/xen/arch/arm/vtimer.c
> > @@ -47,9 +47,14 @@ static void phys_timer_expired(void *data)
> >  static void virt_timer_expired(void *data)
> >  {
> >      struct vtimer *t = data;
> > -    t->ctl |= CNTx_CTL_MASK;
> > -    vgic_vcpu_inject_irq(t->v, t->irq);
> > -    perfc_incr(vtimer_virt_inject);
> > +    t->ctl |= CNTx_CTL_PENDING;
> > +    if ( !(t->ctl & CNTx_CTL_MASK) )
> > +    {
> > +        /* An edge triggered interrupt should now be pending. */
> > +        t->ppi_state.pending = true;
> > +        vcpu_unblock(t->v);

I was going to say that this is trouble because
local_events_need_delivery wouldn't recognize that we actually have an
event pending. But it doesn't matter because local_events_need_delivery
only works with the current vcpu and if this code trigger then the vcpu
that needs to receive the event cannot be current. So I think that is OK
but for clarity it might be better to add a check or a comment in
local_events_need_delivery_nomask anyway.


> > +        perfc_incr(vtimer_virt_inject);
> > +    }
> >  }
> >  
> >  int domain_vtimer_init(struct domain *d)
> > @@ -112,6 +117,15 @@ int virt_timer_save(struct vcpu *v)
> >          set_timer(&v->arch.virt_timer.timer, ticks_to_ns(v->arch.virt_timer.cval +
> >                    v->domain->arch.virt_timer_base.offset - boot_count));
> >      }
> > +
> > +    /*
> > +     * Since the vtimer irq is a PPI we don't need to worry about
> > +     * racing against it becoming active while we are saving the
> > +     * state, since that requires the guest to be reading the IAR.
> > +     */
> > +    gic_save_and_mask_hwppi(v, v->arch.virt_timer.irq,
> > +                            &v->arch.virt_timer.ppi_state);
> > +
> >      return 0;
> >  }
> >  
> > @@ -126,6 +140,10 @@ int virt_timer_restore(struct vcpu *v)
> >      WRITE_SYSREG64(v->domain->arch.virt_timer_base.offset, CNTVOFF_EL2);
> >      WRITE_SYSREG64(v->arch.virt_timer.cval, CNTV_CVAL_EL0);
> >      WRITE_SYSREG32(v->arch.virt_timer.ctl, CNTV_CTL_EL0);
> > +
> > +    gic_restore_hwppi(v, v->arch.virt_timer.irq,
> > +                      &v->arch.virt_timer.ppi_state);
> > +
> >      return 0;
> >  }
> >  
> > diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
> > index 90ab9c3..dd50e2c 100644
> > --- a/xen/include/asm-arm/domain.h
> > +++ b/xen/include/asm-arm/domain.h
> > @@ -34,12 +34,23 @@ enum domain_type {
> >  extern int dom0_11_mapping;
> >  #define is_domain_direct_mapped(d) ((d) == hardware_domain && dom0_11_mapping)
> >  
> > +struct hwppi_state {
> > +    /* h/w state */
> > +    unsigned long enabled:1;
> > +    unsigned long pending:1;
> > +    unsigned long active:1;
> > +
> > +    /* Xen s/w state */
> > +    unsigned long inprogress:1;
> > +};

Using a uint32_t bitmask would be more in line the rest of the code
style, but it is just a matter of taste.


> >  struct vtimer {
> >      struct vcpu *v;
> >      int irq;
> >      struct timer timer;
> >      uint32_t ctl;
> >      uint64_t cval;
> > +    struct hwppi_state ppi_state;
> >  };
> >
> >  struct arch_domain
> > diff --git a/xen/include/asm-arm/gic.h b/xen/include/asm-arm/gic.h
> > index 187dc46..aa8cbac 100644
> > --- a/xen/include/asm-arm/gic.h
> > +++ b/xen/include/asm-arm/gic.h
> > @@ -247,6 +247,16 @@ extern int gicv_setup(struct domain *d);
> >  extern void gic_save_state(struct vcpu *v);
> >  extern void gic_restore_state(struct vcpu *v);
> >  
> > +/*
> > + * Save/restore the state of a single PPI which must be routed to
> > + * <current-vcpu> (that is, defined to be injected to the current vcpu).
> > + */
> > +struct hwppi_state;
> > +extern void gic_save_and_mask_hwppi(struct vcpu *v, unsigned virq,
> > +                                    struct hwppi_state *s);
> > +extern void gic_restore_hwppi(struct vcpu *v, unsigned virq,
> > +                              const struct hwppi_state *s);
> > +
> >  /* SGI (AKA IPIs) */
> >  enum gic_sgi {
> >      GIC_SGI_EVENT_CHECK = 0,
> > @@ -293,8 +303,12 @@ struct gic_hw_operations {
> >      const struct gic_info *info;
> >      /* Save GIC registers */
> >      void (*save_state)(struct vcpu *);
> > +    void (*save_and_mask_hwppi)(struct vcpu *v, const unsigned virq,
> > +                                struct hwppi_state *s);
> >      /* Restore GIC registers */
> >      void (*restore_state)(const struct vcpu *);
> > +    void (*restore_hwppi)(struct vcpu *v, const unsigned virq,
> > +                          const struct hwppi_state *s);
> >      /* Dump GIC LR register information */
> >      void (*dump_state)(const struct vcpu *);
> >      /* Map MMIO region of GIC */
> > diff --git a/xen/include/asm-arm/irq.h b/xen/include/asm-arm/irq.h
> > index 435dfcd..a08438e 100644
> > --- a/xen/include/asm-arm/irq.h
> > +++ b/xen/include/asm-arm/irq.h
> > @@ -42,6 +42,7 @@ void init_secondary_IRQ(void);
> >  
> >  int route_irq_to_guest(struct domain *d, unsigned int irq,
> >                         const char *devname);
> > +int route_irq_to_current_vcpu(unsigned int irq, const char *devname);
> >  void arch_move_irqs(struct vcpu *v);
> >  
> >  /* Set IRQ type for an SPI */
> 
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH RFC] xen: arm: context switch vtimer PPI state.
  2015-01-26 15:55 [PATCH RFC] xen: arm: context switch vtimer PPI state Ian Campbell
  2015-01-27 13:16 ` Julien Grall
  2015-02-10  2:59 ` Ian Campbell
@ 2015-03-03 11:38 ` Stefano Stabellini
  2015-03-03 11:51   ` Ian Campbell
  2 siblings, 1 reply; 14+ messages in thread
From: Stefano Stabellini @ 2015-03-03 11:38 UTC (permalink / raw)
  To: Ian Campbell; +Cc: xen-devel, julien.grall, tim, stefano.stabellini

On Mon, 26 Jan 2015, Ian Campbell wrote:
> ... instead of artificially masking the timer interrupt in the timer
> state and relying on the guest to unmask (which it isn't required to
> do per the h/w spec, although Linux does)
> 
> To do this introduce the concept of routing a PPI to the currently
> running VCPU. For such interrupts irq_get_domain returns NULL.
> 
> Then we make use of the GICD I[SC]ACTIVER registers to save and
> restore the active state of the interrupt, which prevents the nested
> invocations which the current masking works around.
> 
> For edge triggered interrupts we also need to context switch the
> pending bit via I[SC]PENDR. Note that for level triggered interrupts
> SPENDR sets a latch which is only cleared by ICPENDR (and not by h/w
> state changes), therefore we do not want to context switch the pending
> state for level PPIs -- instead we rely on the context switch of the
> peripheral to restore the correct level.
> 
> RFC Only:
>  - Not implemented for GIC v3 yet.
>  - Lightly tested with hackbench on systems with level and edge
>    triggered vtimer PPI.
>  - Is irq_get_domain == NULL to indicate route-to-current-vcpu the
>    best idea? Any alternatives?
> 
> Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
> ---
>  xen/arch/arm/gic-v2.c        |   84 ++++++++++++++++++++++++++++++++++++++++++
>  xen/arch/arm/gic.c           |   32 +++++++++++++---
>  xen/arch/arm/irq.c           |   48 ++++++++++++++++++++++--
>  xen/arch/arm/time.c          |   26 +------------
>  xen/arch/arm/vtimer.c        |   24 ++++++++++--
>  xen/include/asm-arm/domain.h |   11 ++++++
>  xen/include/asm-arm/gic.h    |   14 +++++++
>  xen/include/asm-arm/irq.h    |    1 +
>  8 files changed, 204 insertions(+), 36 deletions(-)
> 
> diff --git a/xen/arch/arm/gic-v2.c b/xen/arch/arm/gic-v2.c
> index 31fb81a..9074aca 100644
> --- a/xen/arch/arm/gic-v2.c
> +++ b/xen/arch/arm/gic-v2.c
> @@ -156,6 +156,45 @@ static void gicv2_save_state(struct vcpu *v)
>      writel_gich(0, GICH_HCR);
>  }
>  
> +static void gicv2_save_and_mask_hwppi(struct vcpu *v, const unsigned virq,
> +                                      struct hwppi_state *s)
> +{
> +    struct pending_irq *p = irq_to_pending(v, virq);
> +    const unsigned int offs = virq / 32;
> +    const unsigned int mask = (1u << (virq % 32));
> +    const unsigned int pendingr = readl_gicd(GICD_ISPENDR + offs*4);
> +    const unsigned int activer = readl_gicd(GICD_ISACTIVER + offs*4);
> +    const unsigned int enabler = readl_gicd(GICD_ISENABLER + offs*4);
> +    const bool is_edge = !!(p->desc->arch.type & DT_IRQ_TYPE_EDGE_BOTH);
> +
> +    ASSERT(!is_idle_vcpu(v));
> +
> +    s->active = !!(activer & mask);
> +    s->enabled = !!(enabler & mask);
> +    s->pending = !!(pendingr & mask);
> +    s->inprogress = test_and_clear_bit(_IRQ_INPROGRESS, &p->desc->status);
> +
> +    /* Write a 1 to IC...R to clear the corresponding bit of state */
> +    if ( s->active )
> +        writel_gicd(mask, GICD_ICACTIVER + offs*4);
> +    /*
> +     * For an edge interrupt clear the pending state, for a level interrupt
> +     * this clears the latch there is no need since saving the peripheral state
> +     * (and/or restoring the next VCPU) will cause the correct action.
> +     */
> +    if ( is_edge && s->pending )
> +        writel_gicd(mask, GICD_ICPENDR + offs*4);
> +
> +    if ( s->enabled )
> +    {
> +        writel_gicd(mask, GICD_ICENABLER + offs*4);
> +        set_bit(_IRQ_DISABLED, &p->desc->status);
> +    }
> +
> +    ASSERT(!(readl_gicd(GICD_ISACTIVER + offs*4) & mask));
> +    ASSERT(!(readl_gicd(GICD_ISENABLER + offs*4) & mask));
> +}
> +
>  static void gicv2_restore_state(const struct vcpu *v)
>  {
>      int i;
> @@ -168,6 +207,49 @@ static void gicv2_restore_state(const struct vcpu *v)
>      writel_gich(GICH_HCR_EN, GICH_HCR);
>  }
>  
> +static void gicv2_restore_hwppi(struct vcpu *v, const unsigned virq,
> +                                const struct hwppi_state *s)
> +{
> +    struct pending_irq *p = irq_to_pending(v, virq);
> +    const unsigned int offs = virq / 32;
> +    const unsigned int mask = (1u << (virq % 32));
> +    struct irq_desc *desc = irq_to_desc(virq);
> +    const bool is_edge = !!(desc->arch.type & DT_IRQ_TYPE_EDGE_BOTH);
> +    struct pending_irq *pending = irq_to_pending(v, virq);
> +
> +    pending->desc = desc; /* Migrate to new physical processor */
> +
> +    /*
> +     * The IRQ must always have been set inactive and masked etc by
> +     * the saving of the previous state via save_and_mask_hwppi.
> +     */
> +    ASSERT(!(readl_gicd(GICD_ISACTIVER + offs*4) & mask));
> +    ASSERT(!(readl_gicd(GICD_ISENABLER + offs*4) & mask));
> +
> +    if ( s->active )
> +        writel_gicd(mask, GICD_ICACTIVER + offs*4);
> +
> +    /*
> +     * Restore pending state for edge triggered interrupts only. For
> +     * level triggered interrupts the level will be restored as
> +     * necessary by restoring the state of the relevant peripheral.
> +     *
> +     * For a level triggered interrupt ISPENDR acts as a *latch* which
> +     * is only cleared by ICPENDR (i.e. the input level is no longer
> +     * relevant). We certainly do not want that here.
> +     */
> +    if ( is_edge && s->pending )
> +        writel_gicd(mask, GICD_ISPENDR + offs*4);
> +    if ( s->enabled )
> +    {
> +        clear_bit(_IRQ_DISABLED, &p->desc->status);
> +        dsb(sy);
> +        writel_gicd(mask, GICD_ISENABLER + offs*4);
> +    }
> +    if ( s->inprogress )
> +        set_bit(_IRQ_INPROGRESS, &p->desc->status);
> +}
> +
>  static void gicv2_dump_state(const struct vcpu *v)
>  {
>      int i;
> @@ -660,7 +742,9 @@ const static struct gic_hw_operations gicv2_ops = {
>      .info                = &gicv2_info,
>      .secondary_init      = gicv2_secondary_cpu_init,
>      .save_state          = gicv2_save_state,
> +    .save_and_mask_hwppi = gicv2_save_and_mask_hwppi,
>      .restore_state       = gicv2_restore_state,
> +    .restore_hwppi       = gicv2_restore_hwppi,
>      .dump_state          = gicv2_dump_state,
>      .gicv_setup          = gicv2v_setup,
>      .gic_host_irq_type   = &gicv2_host_irq_type,
> diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
> index 7d4ee19..7ea980d 100644
> --- a/xen/arch/arm/gic.c
> +++ b/xen/arch/arm/gic.c
> @@ -81,6 +81,12 @@ void gic_save_state(struct vcpu *v)
>      isb();
>  }
>  
> +void gic_save_and_mask_hwppi(struct vcpu *v, unsigned virq,
> +                             struct hwppi_state *s)
> +{
> +    gic_hw_ops->save_and_mask_hwppi(v, virq, s);
> +}
> +
>  void gic_restore_state(struct vcpu *v)
>  {
>      ASSERT(!local_irq_is_enabled());
> @@ -94,6 +100,12 @@ void gic_restore_state(struct vcpu *v)
>      gic_restore_pending_irqs(v);
>  }
>  
> +void gic_restore_hwppi(struct vcpu *v, const unsigned virq,
> +                       const struct hwppi_state *s)
> +{
> +    gic_hw_ops->restore_hwppi(v, virq, s);
> +}
> +
>  /*
>   * needs to be called with a valid cpu_mask, ie each cpu in the mask has
>   * already called gic_cpu_init
> @@ -125,11 +137,15 @@ void gic_route_irq_to_xen(struct irq_desc *desc, const cpumask_t *cpu_mask,
>  
>  /* Program the GIC to route an interrupt to a guest
>   *   - desc.lock must be held
> + *   - d may be NULL, in which case interrupt *must* be a PPI and is routed to
> + *     the vcpu currently running when that PPI fires. In this case the code
> + *     responsible for the related hardware must save and restore the PPI with
> + *     gic_save_and_mask_hwppi/gic_restore_hwppi.
> + *   - if d is non-NULL then the interrupt *must* be an SPI.
>   */
>  void gic_route_irq_to_guest(struct domain *d, struct irq_desc *desc,
>                              const cpumask_t *cpu_mask, unsigned int priority)
>  {
> -    struct pending_irq *p;
>      ASSERT(spin_is_locked(&desc->lock));
>  
>      desc->handler = gic_hw_ops->gic_guest_irq_type;
> @@ -137,10 +153,16 @@ void gic_route_irq_to_guest(struct domain *d, struct irq_desc *desc,
>  
>      gic_set_irq_properties(desc, cpumask_of(smp_processor_id()), GIC_PRI_IRQ);
>  
> -    /* Use vcpu0 to retrieve the pending_irq struct. Given that we only
> -     * route SPIs to guests, it doesn't make any difference. */
> -    p = irq_to_pending(d->vcpu[0], desc->irq);
> -    p->desc = desc;
> +    if ( d )
> +    {
> +        struct pending_irq *p;
> +
> +        /* Use vcpu0 to retrieve the pending_irq struct. Given that we only
> +         * route SPIs to guests, it doesn't make any difference. */
> +        p = irq_to_pending(d->vcpu[0], desc->irq);
> +        p->desc = desc;
> +    }

I think you need to assign p->desc = desc in this case too, otherwise
p->desc == NULL checks in gic.c and related code will succeed (and we
want them to fail as this is an hardware interrupt). We should be able
to use GICH_V2_LR_HW for it, right?

If you don't set p->desc, it is going to be treated as a virtual irq
with no corresponding physical irq.

In fact I am not sure how the code can work as is given that
desc->handler is set to gic_hw_ops->gic_guest_irq_type but desc = NULL:
who is doing the EOI of the physical interrupt? Who is calling
gicv2_dir_irq?


> +    /* else p->desc init'd on ctxt restore in gic_restore_hwppi */
>  }
>  
>  int gic_irq_xlate(const u32 *intspec, unsigned int intsize,
> diff --git a/xen/arch/arm/irq.c b/xen/arch/arm/irq.c
> index ebdf19a..93c38ff 100644
> --- a/xen/arch/arm/irq.c
> +++ b/xen/arch/arm/irq.c
> @@ -202,6 +202,19 @@ void do_IRQ(struct cpu_user_regs *regs, unsigned int irq, int is_fiq)
>          goto out;
>      }
>  
> +    if ( irq == current->arch.virt_timer.irq )
> +    {
> +        ASSERT(!irq_get_domain(desc));
> +
> +        desc->handler->end(desc);
> +
> +        set_bit(_IRQ_INPROGRESS, &desc->status);
> +        desc->arch.eoi_cpu = smp_processor_id();
> +
> +        vgic_vcpu_inject_irq(current, irq);
> +        goto out_no_end;
> +    }
> +
>      if ( test_bit(_IRQ_GUEST, &desc->status) )
>      {
>          struct domain *d = irq_get_domain(desc);
> @@ -346,8 +359,12 @@ int setup_irq(unsigned int irq, unsigned int irqflags, struct irqaction *new)
>          struct domain *d = irq_get_domain(desc);
>  
>          spin_unlock_irqrestore(&desc->lock, flags);
> -        printk(XENLOG_ERR "ERROR: IRQ %u is already in use by the domain %u\n",
> -               irq, d->domain_id);
> +        if ( d )
> +            printk(XENLOG_ERR "ERROR: IRQ %u is already in use by domain %u\n",
> +                   irq, d->domain_id);
> +        else
> +            printk(XENLOG_ERR
> +                   "ERROR: IRQ %u is already in use by <current-vcpu>\n", irq);
>          return -EBUSY;
>      }
>  
> @@ -378,6 +395,10 @@ err:
>      return rc;
>  }
>  
> +/*
> + * If d == NULL then IRQ is routed to current vcpu at time it is received and
> + * must be a PPI.
> + */
>  int route_irq_to_guest(struct domain *d, unsigned int irq,
>                         const char * devname)
>  {
> @@ -386,6 +407,8 @@ int route_irq_to_guest(struct domain *d, unsigned int irq,
>      unsigned long flags;
>      int retval = 0;
>  
> +    ASSERT( d || ( irq >=16 && irq < 32 ) );
> +
>      action = xmalloc(struct irqaction);
>      if (!action)
>          return -ENOMEM;
> @@ -406,11 +429,23 @@ int route_irq_to_guest(struct domain *d, unsigned int irq,
>          struct domain *ad = irq_get_domain(desc);
>  
>          if ( test_bit(_IRQ_GUEST, &desc->status) && d == ad )
> +        {
> +            printk(XENLOG_ERR
> +                   "ERROR: IRQ %u is already routed to domain %p\n",
> +                   irq, ad);
>              goto out;
> +        }
>  
>          if ( test_bit(_IRQ_GUEST, &desc->status) )
> -            printk(XENLOG_ERR "ERROR: IRQ %u is already used by domain %u\n",
> -                   irq, ad->domain_id);
> +        {
> +            if ( ad )
> +                printk(XENLOG_ERR
> +                       "ERROR: IRQ %u is already used by domain %u\n",
> +                       irq, ad->domain_id);
> +            else
> +                printk(XENLOG_ERR
> +                       "ERROR: IRQ %u is already used by <current-vcpu>\n", irq);
> +        }
>          else
>              printk(XENLOG_ERR "ERROR: IRQ %u is already used by Xen\n", irq);
>          retval = -EBUSY;
> @@ -433,6 +468,11 @@ out:
>      return retval;
>  }
>  
> +int route_irq_to_current_vcpu(unsigned int irq, const char *devname)
> +{
> +    return route_irq_to_guest(NULL, irq, devname);
> +}
> +
>  /*
>   * pirq event channels. We don't use these on ARM, instead we use the
>   * features of the GIC to inject virtualised normal interrupts.
> diff --git a/xen/arch/arm/time.c b/xen/arch/arm/time.c
> index 455f217..ffa5eef 100644
> --- a/xen/arch/arm/time.c
> +++ b/xen/arch/arm/time.c
> @@ -169,28 +169,6 @@ static void timer_interrupt(int irq, void *dev_id, struct cpu_user_regs *regs)
>      }
>  }
>  
> -static void vtimer_interrupt(int irq, void *dev_id, struct cpu_user_regs *regs)
> -{
> -    /*
> -     * Edge-triggered interrupts can be used for the virtual timer. Even
> -     * if the timer output signal is masked in the context switch, the
> -     * GIC will keep track that of any interrupts raised while IRQS are
> -     * disabled. As soon as IRQs are re-enabled, the virtual interrupt
> -     * will be injected to Xen.
> -     *
> -     * If an IDLE vCPU was scheduled next then we should ignore the
> -     * interrupt.
> -     */
> -    if ( unlikely(is_idle_vcpu(current)) )
> -        return;
> -
> -    perfc_incr(virt_timer_irqs);
> -
> -    current->arch.virt_timer.ctl = READ_SYSREG32(CNTV_CTL_EL0);
> -    WRITE_SYSREG32(current->arch.virt_timer.ctl | CNTx_CTL_MASK, CNTV_CTL_EL0);
> -    vgic_vcpu_inject_irq(current, current->arch.virt_timer.irq);
> -}
> -
>  /* Set up the timer interrupt on this CPU */
>  void __cpuinit init_timer_interrupt(void)
>  {
> @@ -204,8 +182,8 @@ void __cpuinit init_timer_interrupt(void)
>  
>      request_irq(timer_irq[TIMER_HYP_PPI], 0, timer_interrupt,
>                  "hyptimer", NULL);
> -    request_irq(timer_irq[TIMER_VIRT_PPI], 0, vtimer_interrupt,
> -                   "virtimer", NULL);
> +    route_irq_to_current_vcpu(timer_irq[TIMER_VIRT_PPI], "virtimer");
> +
>      request_irq(timer_irq[TIMER_PHYS_NONSECURE_PPI], 0, timer_interrupt,
>                  "phytimer", NULL);
>  }
> diff --git a/xen/arch/arm/vtimer.c b/xen/arch/arm/vtimer.c
> index 848e2c6..d1f21a1 100644
> --- a/xen/arch/arm/vtimer.c
> +++ b/xen/arch/arm/vtimer.c
> @@ -47,9 +47,14 @@ static void phys_timer_expired(void *data)
>  static void virt_timer_expired(void *data)
>  {
>      struct vtimer *t = data;
> -    t->ctl |= CNTx_CTL_MASK;
> -    vgic_vcpu_inject_irq(t->v, t->irq);
> -    perfc_incr(vtimer_virt_inject);
> +    t->ctl |= CNTx_CTL_PENDING;
> +    if ( !(t->ctl & CNTx_CTL_MASK) )
> +    {
> +        /* An edge triggered interrupt should now be pending. */
> +        t->ppi_state.pending = true;
> +        vcpu_unblock(t->v);
> +        perfc_incr(vtimer_virt_inject);
> +    }
>  }
>  
>  int domain_vtimer_init(struct domain *d)
> @@ -112,6 +117,15 @@ int virt_timer_save(struct vcpu *v)
>          set_timer(&v->arch.virt_timer.timer, ticks_to_ns(v->arch.virt_timer.cval +
>                    v->domain->arch.virt_timer_base.offset - boot_count));
>      }
> +
> +    /*
> +     * Since the vtimer irq is a PPI we don't need to worry about
> +     * racing against it becoming active while we are saving the
> +     * state, since that requires the guest to be reading the IAR.
> +     */
> +    gic_save_and_mask_hwppi(v, v->arch.virt_timer.irq,
> +                            &v->arch.virt_timer.ppi_state);
> +
>      return 0;
>  }
>  
> @@ -126,6 +140,10 @@ int virt_timer_restore(struct vcpu *v)
>      WRITE_SYSREG64(v->domain->arch.virt_timer_base.offset, CNTVOFF_EL2);
>      WRITE_SYSREG64(v->arch.virt_timer.cval, CNTV_CVAL_EL0);
>      WRITE_SYSREG32(v->arch.virt_timer.ctl, CNTV_CTL_EL0);
> +
> +    gic_restore_hwppi(v, v->arch.virt_timer.irq,
> +                      &v->arch.virt_timer.ppi_state);
> +
>      return 0;
>  }
>  
> diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
> index 90ab9c3..dd50e2c 100644
> --- a/xen/include/asm-arm/domain.h
> +++ b/xen/include/asm-arm/domain.h
> @@ -34,12 +34,23 @@ enum domain_type {
>  extern int dom0_11_mapping;
>  #define is_domain_direct_mapped(d) ((d) == hardware_domain && dom0_11_mapping)
>  
> +struct hwppi_state {
> +    /* h/w state */
> +    unsigned long enabled:1;
> +    unsigned long pending:1;
> +    unsigned long active:1;
> +
> +    /* Xen s/w state */
> +    unsigned long inprogress:1;
> +};
> +
>  struct vtimer {
>      struct vcpu *v;
>      int irq;
>      struct timer timer;
>      uint32_t ctl;
>      uint64_t cval;
> +    struct hwppi_state ppi_state;
>  };
>  
>  struct arch_domain
> diff --git a/xen/include/asm-arm/gic.h b/xen/include/asm-arm/gic.h
> index 187dc46..aa8cbac 100644
> --- a/xen/include/asm-arm/gic.h
> +++ b/xen/include/asm-arm/gic.h
> @@ -247,6 +247,16 @@ extern int gicv_setup(struct domain *d);
>  extern void gic_save_state(struct vcpu *v);
>  extern void gic_restore_state(struct vcpu *v);
>  
> +/*
> + * Save/restore the state of a single PPI which must be routed to
> + * <current-vcpu> (that is, defined to be injected to the current vcpu).
> + */
> +struct hwppi_state;
> +extern void gic_save_and_mask_hwppi(struct vcpu *v, unsigned virq,
> +                                    struct hwppi_state *s);
> +extern void gic_restore_hwppi(struct vcpu *v, unsigned virq,
> +                              const struct hwppi_state *s);
> +
>  /* SGI (AKA IPIs) */
>  enum gic_sgi {
>      GIC_SGI_EVENT_CHECK = 0,
> @@ -293,8 +303,12 @@ struct gic_hw_operations {
>      const struct gic_info *info;
>      /* Save GIC registers */
>      void (*save_state)(struct vcpu *);
> +    void (*save_and_mask_hwppi)(struct vcpu *v, const unsigned virq,
> +                                struct hwppi_state *s);
>      /* Restore GIC registers */
>      void (*restore_state)(const struct vcpu *);
> +    void (*restore_hwppi)(struct vcpu *v, const unsigned virq,
> +                          const struct hwppi_state *s);
>      /* Dump GIC LR register information */
>      void (*dump_state)(const struct vcpu *);
>      /* Map MMIO region of GIC */
> diff --git a/xen/include/asm-arm/irq.h b/xen/include/asm-arm/irq.h
> index 435dfcd..a08438e 100644
> --- a/xen/include/asm-arm/irq.h
> +++ b/xen/include/asm-arm/irq.h
> @@ -42,6 +42,7 @@ void init_secondary_IRQ(void);
>  
>  int route_irq_to_guest(struct domain *d, unsigned int irq,
>                         const char *devname);
> +int route_irq_to_current_vcpu(unsigned int irq, const char *devname);
>  void arch_move_irqs(struct vcpu *v);
>  
>  /* Set IRQ type for an SPI */
> -- 
> 1.7.10.4
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH RFC] xen: arm: context switch vtimer PPI state.
  2015-03-03 11:38 ` Stefano Stabellini
@ 2015-03-03 11:51   ` Ian Campbell
  2015-03-03 11:55     ` Stefano Stabellini
  0 siblings, 1 reply; 14+ messages in thread
From: Ian Campbell @ 2015-03-03 11:51 UTC (permalink / raw)
  To: Stefano Stabellini; +Cc: julien.grall, tim, xen-devel

On Tue, 2015-03-03 at 11:38 +0000, Stefano Stabellini wrote:
> >      gic_set_irq_properties(desc, cpumask_of(smp_processor_id()), GIC_PRI_IRQ);
> >  
> > -    /* Use vcpu0 to retrieve the pending_irq struct. Given that we only
> > -     * route SPIs to guests, it doesn't make any difference. */
> > -    p = irq_to_pending(d->vcpu[0], desc->irq);
> > -    p->desc = desc;
> > +    if ( d )
> > +    {
> > +        struct pending_irq *p;
> > +
> > +        /* Use vcpu0 to retrieve the pending_irq struct. Given that we only
> > +         * route SPIs to guests, it doesn't make any difference. */
> > +        p = irq_to_pending(d->vcpu[0], desc->irq);
> > +        p->desc = desc;
> > +    }
> 
> I think you need to assign p->desc = desc in this case too, otherwise
> p->desc == NULL checks in gic.c and related code will succeed (and we
> want them to fail as this is an hardware interrupt). We should be able
> to use GICH_V2_LR_HW for it, right?
> 
> If you don't set p->desc, it is going to be treated as a virtual irq
> with no corresponding physical irq.

How can we lookup p without a d?

> In fact I am not sure how the code can work as is given that
> desc->handler is set to gic_hw_ops->gic_guest_irq_type but desc = NULL:
> who is doing the EOI of the physical interrupt? Who is calling
> gicv2_dir_irq?

p->desc is set in gicv2_restore_hwppi for interrupts such as these, so
that it always correctly points to the running domain.

Ian.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH RFC] xen: arm: context switch vtimer PPI state.
  2015-03-03 11:51   ` Ian Campbell
@ 2015-03-03 11:55     ` Stefano Stabellini
  0 siblings, 0 replies; 14+ messages in thread
From: Stefano Stabellini @ 2015-03-03 11:55 UTC (permalink / raw)
  To: Ian Campbell; +Cc: xen-devel, julien.grall, tim, Stefano Stabellini

On Tue, 3 Mar 2015, Ian Campbell wrote:
> On Tue, 2015-03-03 at 11:38 +0000, Stefano Stabellini wrote:
> > >      gic_set_irq_properties(desc, cpumask_of(smp_processor_id()), GIC_PRI_IRQ);
> > >  
> > > -    /* Use vcpu0 to retrieve the pending_irq struct. Given that we only
> > > -     * route SPIs to guests, it doesn't make any difference. */
> > > -    p = irq_to_pending(d->vcpu[0], desc->irq);
> > > -    p->desc = desc;
> > > +    if ( d )
> > > +    {
> > > +        struct pending_irq *p;
> > > +
> > > +        /* Use vcpu0 to retrieve the pending_irq struct. Given that we only
> > > +         * route SPIs to guests, it doesn't make any difference. */
> > > +        p = irq_to_pending(d->vcpu[0], desc->irq);
> > > +        p->desc = desc;
> > > +    }
> > 
> > I think you need to assign p->desc = desc in this case too, otherwise
> > p->desc == NULL checks in gic.c and related code will succeed (and we
> > want them to fail as this is an hardware interrupt). We should be able
> > to use GICH_V2_LR_HW for it, right?
> > 
> > If you don't set p->desc, it is going to be treated as a virtual irq
> > with no corresponding physical irq.
> 
> How can we lookup p without a d?
> 
> > In fact I am not sure how the code can work as is given that
> > desc->handler is set to gic_hw_ops->gic_guest_irq_type but desc = NULL:
> > who is doing the EOI of the physical interrupt? Who is calling
> > gicv2_dir_irq?
> 
> p->desc is set in gicv2_restore_hwppi for interrupts such as these, so
> that it always correctly points to the running domain.
 
Ah, right! OK then. 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH RFC] xen: arm: context switch vtimer PPI state.
  2015-03-02 18:42   ` Stefano Stabellini
@ 2015-03-03 11:56     ` Ian Campbell
  2015-03-03 12:00       ` Stefano Stabellini
  0 siblings, 1 reply; 14+ messages in thread
From: Ian Campbell @ 2015-03-03 11:56 UTC (permalink / raw)
  To: Stefano Stabellini; +Cc: julien.grall, tim, xen-devel

On Mon, 2015-03-02 at 18:42 +0000, Stefano Stabellini wrote:
> On Tue, 10 Feb 2015, Ian Campbell wrote:
> > Stefano,
> > 
> > do you have any comments on the viability of the general approach here
> > before I go off and start addressing the review comments?
> 
> I think that the general approach is OK
> 
> 
> > Cheers,
> > Ian.
> > 
> > On Mon, 2015-01-26 at 15:55 +0000, Ian Campbell wrote:
> > > ... instead of artificially masking the timer interrupt in the timer
> > > state and relying on the guest to unmask (which it isn't required to
> > > do per the h/w spec, although Linux does)
> > > 
> > > To do this introduce the concept of routing a PPI to the currently
> > > running VCPU. For such interrupts irq_get_domain returns NULL.
> > > 
> > > Then we make use of the GICD I[SC]ACTIVER registers to save and
> > > restore the active state of the interrupt, which prevents the nested
> > > invocations which the current masking works around.
> > > 
> > > For edge triggered interrupts we also need to context switch the
> > > pending bit via I[SC]PENDR. Note that for level triggered interrupts
> > > SPENDR sets a latch which is only cleared by ICPENDR (and not by h/w
> > > state changes), therefore we do not want to context switch the pending
> > > state for level PPIs -- instead we rely on the context switch of the
> > > peripheral to restore the correct level.
> > > 
> > > RFC Only:
> > >  - Not implemented for GIC v3 yet.
> > >  - Lightly tested with hackbench on systems with level and edge
> > >    triggered vtimer PPI.
> > >  - Is irq_get_domain == NULL to indicate route-to-current-vcpu the
> > >    best idea? Any alternatives?
> > > 
> > > Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
> > > ---
> > >  xen/arch/arm/gic-v2.c        |   84 ++++++++++++++++++++++++++++++++++++++++++
> > >  xen/arch/arm/gic.c           |   32 +++++++++++++---
> > >  xen/arch/arm/irq.c           |   48 ++++++++++++++++++++++--
> > >  xen/arch/arm/time.c          |   26 +------------
> > >  xen/arch/arm/vtimer.c        |   24 ++++++++++--
> > >  xen/include/asm-arm/domain.h |   11 ++++++
> > >  xen/include/asm-arm/gic.h    |   14 +++++++
> > >  xen/include/asm-arm/irq.h    |    1 +
> > >  8 files changed, 204 insertions(+), 36 deletions(-)
> > > 
> > > diff --git a/xen/arch/arm/gic-v2.c b/xen/arch/arm/gic-v2.c
> > > index 31fb81a..9074aca 100644
> > > --- a/xen/arch/arm/gic-v2.c
> > > +++ b/xen/arch/arm/gic-v2.c
> > > @@ -156,6 +156,45 @@ static void gicv2_save_state(struct vcpu *v)
> > >      writel_gich(0, GICH_HCR);
> > >  }
> > >  
> > > +static void gicv2_save_and_mask_hwppi(struct vcpu *v, const unsigned virq,
> 
> Shouldn't the argument be a physical irq? Maybe irq_desc?

I think it needs to be a virq in the namespace of the given v, since it
may not be 1:1 and we would want to look it up to get the correct p.
> 
> 
> > > +                                      struct hwppi_state *s)
> > > +{
> > > +    struct pending_irq *p = irq_to_pending(v, virq);

which we do here. I think such lookups are normally done within the
(v)gic  code rather than the callers, aren't they?

> > >  int gic_irq_xlate(const u32 *intspec, unsigned int intsize,
> > > diff --git a/xen/arch/arm/irq.c b/xen/arch/arm/irq.c
> > > index ebdf19a..93c38ff 100644
> > > --- a/xen/arch/arm/irq.c
> > > +++ b/xen/arch/arm/irq.c
> > > @@ -202,6 +202,19 @@ void do_IRQ(struct cpu_user_regs *regs, unsigned int irq, int is_fiq)
> > >          goto out;
> > >      }
> > >  
> > > +    if ( irq == current->arch.virt_timer.irq )
> > > +    {
> > > +        ASSERT(!irq_get_domain(desc));
> > > +
> > > +        desc->handler->end(desc);
> > > +
> > > +        set_bit(_IRQ_INPROGRESS, &desc->status);
> > > +        desc->arch.eoi_cpu = smp_processor_id();
> > > +
> > > +        vgic_vcpu_inject_irq(current, irq);
> > > +        goto out_no_end;
> > > +    }
> 
> I don't think we should special case virt_timer.irq here. I would try to
> reuse the normal _IRQ_GUEST path or make this if generic for any PPIs
> routed to guests.

Yes, I think I thought I had done that after making this quick hack the
first time around but obviously I must have forgotten!

> > > diff --git a/xen/arch/arm/vtimer.c b/xen/arch/arm/vtimer.c
> > > index 848e2c6..d1f21a1 100644
> > > --- a/xen/arch/arm/vtimer.c
> > > +++ b/xen/arch/arm/vtimer.c
> > > @@ -47,9 +47,14 @@ static void phys_timer_expired(void *data)
> > >  static void virt_timer_expired(void *data)
> > >  {
> > >      struct vtimer *t = data;
> > > -    t->ctl |= CNTx_CTL_MASK;
> > > -    vgic_vcpu_inject_irq(t->v, t->irq);
> > > -    perfc_incr(vtimer_virt_inject);
> > > +    t->ctl |= CNTx_CTL_PENDING;
> > > +    if ( !(t->ctl & CNTx_CTL_MASK) )
> > > +    {
> > > +        /* An edge triggered interrupt should now be pending. */
> > > +        t->ppi_state.pending = true;
> > > +        vcpu_unblock(t->v);
> 
> I was going to say that this is trouble because
> local_events_need_delivery wouldn't recognize that we actually have an
> event pending. But it doesn't matter because local_events_need_delivery
> only works with the current vcpu and if this code trigger then the vcpu
> that needs to receive the event cannot be current. So I think that is OK
> but for clarity it might be better to add a check or a comment in
> local_events_need_delivery_nomask anyway.

i.e. a BUG_ON(t->v == current) + a comment to the affect that the timer
must never expire for the current vcpu?

> > > diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
> > > index 90ab9c3..dd50e2c 100644
> > > --- a/xen/include/asm-arm/domain.h
> > > +++ b/xen/include/asm-arm/domain.h
> > > @@ -34,12 +34,23 @@ enum domain_type {
> > >  extern int dom0_11_mapping;
> > >  #define is_domain_direct_mapped(d) ((d) == hardware_domain && dom0_11_mapping)
> > >  
> > > +struct hwppi_state {
> > > +    /* h/w state */
> > > +    unsigned long enabled:1;
> > > +    unsigned long pending:1;
> > > +    unsigned long active:1;
> > > +
> > > +    /* Xen s/w state */
> > > +    unsigned long inprogress:1;
> > > +};
> 
> Using a uint32_t bitmask would be more in line the rest of the code
> style, but it is just a matter of taste.

You mean "uint32_t inprogress:1" for each? Or
#define ENAVBLED 1
#define PENDING 2
etc
and test_set_bit stuff?

Ian.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH RFC] xen: arm: context switch vtimer PPI state.
  2015-03-03 11:56     ` Ian Campbell
@ 2015-03-03 12:00       ` Stefano Stabellini
  2015-03-03 12:18         ` Ian Campbell
  0 siblings, 1 reply; 14+ messages in thread
From: Stefano Stabellini @ 2015-03-03 12:00 UTC (permalink / raw)
  To: Ian Campbell; +Cc: xen-devel, julien.grall, tim, Stefano Stabellini

On Tue, 3 Mar 2015, Ian Campbell wrote:
> On Mon, 2015-03-02 at 18:42 +0000, Stefano Stabellini wrote:
> > On Tue, 10 Feb 2015, Ian Campbell wrote:
> > > Stefano,
> > > 
> > > do you have any comments on the viability of the general approach here
> > > before I go off and start addressing the review comments?
> > 
> > I think that the general approach is OK
> > 
> > 
> > > Cheers,
> > > Ian.
> > > 
> > > On Mon, 2015-01-26 at 15:55 +0000, Ian Campbell wrote:
> > > > ... instead of artificially masking the timer interrupt in the timer
> > > > state and relying on the guest to unmask (which it isn't required to
> > > > do per the h/w spec, although Linux does)
> > > > 
> > > > To do this introduce the concept of routing a PPI to the currently
> > > > running VCPU. For such interrupts irq_get_domain returns NULL.
> > > > 
> > > > Then we make use of the GICD I[SC]ACTIVER registers to save and
> > > > restore the active state of the interrupt, which prevents the nested
> > > > invocations which the current masking works around.
> > > > 
> > > > For edge triggered interrupts we also need to context switch the
> > > > pending bit via I[SC]PENDR. Note that for level triggered interrupts
> > > > SPENDR sets a latch which is only cleared by ICPENDR (and not by h/w
> > > > state changes), therefore we do not want to context switch the pending
> > > > state for level PPIs -- instead we rely on the context switch of the
> > > > peripheral to restore the correct level.
> > > > 
> > > > RFC Only:
> > > >  - Not implemented for GIC v3 yet.
> > > >  - Lightly tested with hackbench on systems with level and edge
> > > >    triggered vtimer PPI.
> > > >  - Is irq_get_domain == NULL to indicate route-to-current-vcpu the
> > > >    best idea? Any alternatives?
> > > > 
> > > > Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
> > > > ---
> > > >  xen/arch/arm/gic-v2.c        |   84 ++++++++++++++++++++++++++++++++++++++++++
> > > >  xen/arch/arm/gic.c           |   32 +++++++++++++---
> > > >  xen/arch/arm/irq.c           |   48 ++++++++++++++++++++++--
> > > >  xen/arch/arm/time.c          |   26 +------------
> > > >  xen/arch/arm/vtimer.c        |   24 ++++++++++--
> > > >  xen/include/asm-arm/domain.h |   11 ++++++
> > > >  xen/include/asm-arm/gic.h    |   14 +++++++
> > > >  xen/include/asm-arm/irq.h    |    1 +
> > > >  8 files changed, 204 insertions(+), 36 deletions(-)
> > > > 
> > > > diff --git a/xen/arch/arm/gic-v2.c b/xen/arch/arm/gic-v2.c
> > > > index 31fb81a..9074aca 100644
> > > > --- a/xen/arch/arm/gic-v2.c
> > > > +++ b/xen/arch/arm/gic-v2.c
> > > > @@ -156,6 +156,45 @@ static void gicv2_save_state(struct vcpu *v)
> > > >      writel_gich(0, GICH_HCR);
> > > >  }
> > > >  
> > > > +static void gicv2_save_and_mask_hwppi(struct vcpu *v, const unsigned virq,
> > 
> > Shouldn't the argument be a physical irq? Maybe irq_desc?
> 
> I think it needs to be a virq in the namespace of the given v, since it
> may not be 1:1 and we would want to look it up to get the correct p.
> > 
> > 
> > > > +                                      struct hwppi_state *s)
> > > > +{
> > > > +    struct pending_irq *p = irq_to_pending(v, virq);
> 
> which we do here. I think such lookups are normally done within the
> (v)gic  code rather than the callers, aren't they?
> 
> > > >  int gic_irq_xlate(const u32 *intspec, unsigned int intsize,
> > > > diff --git a/xen/arch/arm/irq.c b/xen/arch/arm/irq.c
> > > > index ebdf19a..93c38ff 100644
> > > > --- a/xen/arch/arm/irq.c
> > > > +++ b/xen/arch/arm/irq.c
> > > > @@ -202,6 +202,19 @@ void do_IRQ(struct cpu_user_regs *regs, unsigned int irq, int is_fiq)
> > > >          goto out;
> > > >      }
> > > >  
> > > > +    if ( irq == current->arch.virt_timer.irq )
> > > > +    {
> > > > +        ASSERT(!irq_get_domain(desc));
> > > > +
> > > > +        desc->handler->end(desc);
> > > > +
> > > > +        set_bit(_IRQ_INPROGRESS, &desc->status);
> > > > +        desc->arch.eoi_cpu = smp_processor_id();
> > > > +
> > > > +        vgic_vcpu_inject_irq(current, irq);
> > > > +        goto out_no_end;
> > > > +    }
> > 
> > I don't think we should special case virt_timer.irq here. I would try to
> > reuse the normal _IRQ_GUEST path or make this if generic for any PPIs
> > routed to guests.
> 
> Yes, I think I thought I had done that after making this quick hack the
> first time around but obviously I must have forgotten!
> 
> > > > diff --git a/xen/arch/arm/vtimer.c b/xen/arch/arm/vtimer.c
> > > > index 848e2c6..d1f21a1 100644
> > > > --- a/xen/arch/arm/vtimer.c
> > > > +++ b/xen/arch/arm/vtimer.c
> > > > @@ -47,9 +47,14 @@ static void phys_timer_expired(void *data)
> > > >  static void virt_timer_expired(void *data)
> > > >  {
> > > >      struct vtimer *t = data;
> > > > -    t->ctl |= CNTx_CTL_MASK;
> > > > -    vgic_vcpu_inject_irq(t->v, t->irq);
> > > > -    perfc_incr(vtimer_virt_inject);
> > > > +    t->ctl |= CNTx_CTL_PENDING;
> > > > +    if ( !(t->ctl & CNTx_CTL_MASK) )
> > > > +    {
> > > > +        /* An edge triggered interrupt should now be pending. */
> > > > +        t->ppi_state.pending = true;
> > > > +        vcpu_unblock(t->v);
> > 
> > I was going to say that this is trouble because
> > local_events_need_delivery wouldn't recognize that we actually have an
> > event pending. But it doesn't matter because local_events_need_delivery
> > only works with the current vcpu and if this code trigger then the vcpu
> > that needs to receive the event cannot be current. So I think that is OK
> > but for clarity it might be better to add a check or a comment in
> > local_events_need_delivery_nomask anyway.
> 
> i.e. a BUG_ON(t->v == current) + a comment to the affect that the timer
> must never expire for the current vcpu?

I think it would be best just to add a comment in
local_events_need_delivery to remember these events are not listed
there.


> > > > diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
> > > > index 90ab9c3..dd50e2c 100644
> > > > --- a/xen/include/asm-arm/domain.h
> > > > +++ b/xen/include/asm-arm/domain.h
> > > > @@ -34,12 +34,23 @@ enum domain_type {
> > > >  extern int dom0_11_mapping;
> > > >  #define is_domain_direct_mapped(d) ((d) == hardware_domain && dom0_11_mapping)
> > > >  
> > > > +struct hwppi_state {
> > > > +    /* h/w state */
> > > > +    unsigned long enabled:1;
> > > > +    unsigned long pending:1;
> > > > +    unsigned long active:1;
> > > > +
> > > > +    /* Xen s/w state */
> > > > +    unsigned long inprogress:1;
> > > > +};
> > 
> > Using a uint32_t bitmask would be more in line the rest of the code
> > style, but it is just a matter of taste.
> 
> You mean "uint32_t inprogress:1" for each? Or
> #define ENAVBLED 1
> #define PENDING 2
> etc
> and test_set_bit stuff?
 
Something like status in xen/include/asm-arm/vgic.h:struct pending_irq

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH RFC] xen: arm: context switch vtimer PPI state.
  2015-03-03 12:00       ` Stefano Stabellini
@ 2015-03-03 12:18         ` Ian Campbell
  2015-03-03 12:26           ` Ian Campbell
  2015-03-09 17:48           ` Stefano Stabellini
  0 siblings, 2 replies; 14+ messages in thread
From: Ian Campbell @ 2015-03-03 12:18 UTC (permalink / raw)
  To: Stefano Stabellini; +Cc: julien.grall, tim, xen-devel

On Tue, 2015-03-03 at 12:00 +0000, Stefano Stabellini wrote:
> > > > > +        /* An edge triggered interrupt should now be pending. */
> > > > > +        t->ppi_state.pending = true;
> > > > > +        vcpu_unblock(t->v);
> > > 
> > > I was going to say that this is trouble because
> > > local_events_need_delivery wouldn't recognize that we actually have an
> > > event pending. But it doesn't matter because local_events_need_delivery
> > > only works with the current vcpu and if this code trigger then the vcpu
> > > that needs to receive the event cannot be current. So I think that is OK
> > > but for clarity it might be better to add a check or a comment in
> > > local_events_need_delivery_nomask anyway.
> > 
> > i.e. a BUG_ON(t->v == current) + a comment to the affect that the timer
> > must never expire for the current vcpu?
> 
> I think it would be best just to add a comment in
> local_events_need_delivery to remember these events are not listed
> there.

So something like "events generated by XXX are not accounted for here".
But what should XXX be?

"Events which are queued to be delivered when the vcpu is next
scheduled" perhaps?

BTW: note that setting t->ppi_state.pending here means we will take the
IRQ on context restore, at which point local_events_need_delivery (via
gic_events_need_delivery)will do the right thing.


> > > > > diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
> > > > > index 90ab9c3..dd50e2c 100644
> > > > > --- a/xen/include/asm-arm/domain.h
> > > > > +++ b/xen/include/asm-arm/domain.h
> > > > > @@ -34,12 +34,23 @@ enum domain_type {
> > > > >  extern int dom0_11_mapping;
> > > > >  #define is_domain_direct_mapped(d) ((d) == hardware_domain && dom0_11_mapping)
> > > > >  
> > > > > +struct hwppi_state {
> > > > > +    /* h/w state */
> > > > > +    unsigned long enabled:1;
> > > > > +    unsigned long pending:1;
> > > > > +    unsigned long active:1;
> > > > > +
> > > > > +    /* Xen s/w state */
> > > > > +    unsigned long inprogress:1;
> > > > > +};
> > > 
> > > Using a uint32_t bitmask would be more in line the rest of the code
> > > style, but it is just a matter of taste.
> > 
> > You mean "uint32_t inprogress:1" for each? Or
> > #define ENAVBLED 1
> > #define PENDING 2
> > etc
> > and test_set_bit stuff?
>  
> Something like status in xen/include/asm-arm/vgic.h:struct pending_irq

OK.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH RFC] xen: arm: context switch vtimer PPI state.
  2015-03-03 12:18         ` Ian Campbell
@ 2015-03-03 12:26           ` Ian Campbell
  2015-03-09 17:48           ` Stefano Stabellini
  1 sibling, 0 replies; 14+ messages in thread
From: Ian Campbell @ 2015-03-03 12:26 UTC (permalink / raw)
  To: Stefano Stabellini; +Cc: julien.grall, tim, xen-devel

On Tue, 2015-03-03 at 12:18 +0000, Ian Campbell wrote:
> > > > > > +    /* Xen s/w state */
> > > > > > +    unsigned long inprogress:1;
> > > > > > +};
> > > > 
> > > > Using a uint32_t bitmask would be more in line the rest of the code
> > > > style, but it is just a matter of taste.
> > > 
> > > You mean "uint32_t inprogress:1" for each? Or
> > > #define ENAVBLED 1
> > > #define PENDING 2
> > > etc
> > > and test_set_bit stuff?
> >  
> > Something like status in xen/include/asm-arm/vgic.h:struct pending_irq
> 
> OK.

Actually, what is the benefit? I think pending_irq.status is done this
way because it is used lock free from multiple contexts and this allows
for test_and_set_bit etc to be used.

The hwppi state isn't accessed like that, so I think using the normal
appropriate types like we do in stuct arch_{domain,vcpu} is the
appropriate style. struct hwppi_state is even defined in domain.h.

Ian.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH RFC] xen: arm: context switch vtimer PPI state.
  2015-03-03 12:18         ` Ian Campbell
  2015-03-03 12:26           ` Ian Campbell
@ 2015-03-09 17:48           ` Stefano Stabellini
  1 sibling, 0 replies; 14+ messages in thread
From: Stefano Stabellini @ 2015-03-09 17:48 UTC (permalink / raw)
  To: Ian Campbell; +Cc: xen-devel, julien.grall, tim, Stefano Stabellini

On Tue, 3 Mar 2015, Ian Campbell wrote:
> On Tue, 2015-03-03 at 12:00 +0000, Stefano Stabellini wrote:
> > > > > > +        /* An edge triggered interrupt should now be pending. */
> > > > > > +        t->ppi_state.pending = true;
> > > > > > +        vcpu_unblock(t->v);
> > > > 
> > > > I was going to say that this is trouble because
> > > > local_events_need_delivery wouldn't recognize that we actually have an
> > > > event pending. But it doesn't matter because local_events_need_delivery
> > > > only works with the current vcpu and if this code trigger then the vcpu
> > > > that needs to receive the event cannot be current. So I think that is OK
> > > > but for clarity it might be better to add a check or a comment in
> > > > local_events_need_delivery_nomask anyway.
> > > 
> > > i.e. a BUG_ON(t->v == current) + a comment to the affect that the timer
> > > must never expire for the current vcpu?
> > 
> > I think it would be best just to add a comment in
> > local_events_need_delivery to remember these events are not listed
> > there.
> 
> So something like "events generated by XXX are not accounted for here".
> But what should XXX be?
> 
> "Events which are queued to be delivered when the vcpu is next
> scheduled" perhaps?
> 
> BTW: note that setting t->ppi_state.pending here means we will take the
> IRQ on context restore, at which point local_events_need_delivery (via
> gic_events_need_delivery)will do the right thing.

Yeah, you are right. Simply expand a bit more the comment you are
already adding to virt_timer_expired.


> > > > > > > +    /* Xen s/w state */
> > > > > > > +    unsigned long inprogress:1;
> > > > > > > +};
> > > > > 
> > > > > Using a uint32_t bitmask would be more in line the rest of the code
> > > > > style, but it is just a matter of taste.
> > > > 
> > > > You mean "uint32_t inprogress:1" for each? Or
> > > > #define ENAVBLED 1
> > > > #define PENDING 2
> > > > etc
> > > > and test_set_bit stuff?
> > >  
> > > Something like status in xen/include/asm-arm/vgic.h:struct pending_irq
> > 
> > OK.
> 
> Actually, what is the benefit? I think pending_irq.status is done this
> way because it is used lock free from multiple contexts and this allows
> for test_and_set_bit etc to be used.
> 
> The hwppi state isn't accessed like that, so I think using the normal
> appropriate types like we do in stuct arch_{domain,vcpu} is the
> appropriate style. struct hwppi_state is even defined in domain.h.

OK, feel free to keep it as is.

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2015-03-09 17:48 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-01-26 15:55 [PATCH RFC] xen: arm: context switch vtimer PPI state Ian Campbell
2015-01-27 13:16 ` Julien Grall
2015-01-27 13:34   ` Ian Campbell
2015-01-27 13:47     ` Julien Grall
2015-02-10  2:59 ` Ian Campbell
2015-03-02 18:42   ` Stefano Stabellini
2015-03-03 11:56     ` Ian Campbell
2015-03-03 12:00       ` Stefano Stabellini
2015-03-03 12:18         ` Ian Campbell
2015-03-03 12:26           ` Ian Campbell
2015-03-09 17:48           ` Stefano Stabellini
2015-03-03 11:38 ` Stefano Stabellini
2015-03-03 11:51   ` Ian Campbell
2015-03-03 11:55     ` Stefano Stabellini

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.