From: Thomas Gleixner <tglx@linutronix.de>
To: LKML <linux-kernel@vger.kernel.org>
Cc: Ingo Molnar <mingo@kernel.org>, Peter Anvin <hpa@zytor.com>,
Marc Zyngier <marc.zyngier@arm.com>,
Peter Zijlstra <peterz@infradead.org>,
Borislav Petkov <bp@alien8.de>, Chen Yu <yu.c.chen@intel.com>,
Rui Zhang <rui.zhang@intel.com>,
"Rafael J. Wysocki" <rjw@rjwysocki.net>,
Len Brown <lenb@kernel.org>,
Dan Williams <dan.j.williams@intel.com>,
Christoph Hellwig <hch@lst.de>,
Paolo Bonzini <pbonzini@redhat.com>,
Joerg Roedel <joro@8bytes.org>,
Boris Ostrovsky <boris.ostrovsky@oracle.com>,
Juergen Gross <jgross@suse.com>, Tony Luck <tony.luck@intel.com>,
"K. Y. Srinivasan" <kys@microsoft.com>,
Alok Kataria <akataria@vmware.com>,
Steven Rostedt <rostedt@goodmis.org>,
Arjan van de Ven <arjan@linux.intel.com>
Subject: [patch 30/52] x86/vector: Simplify vector move cleanup
Date: Wed, 13 Sep 2017 23:29:32 +0200 [thread overview]
Message-ID: <20170913213154.622727892@linutronix.de> (raw)
In-Reply-To: 20170913212902.530704676@linutronix.de
[-- Attachment #1: x86-vector--Simplify-vector-move-cleanup.patch --]
[-- Type: text/plain, Size: 11525 bytes --]
The vector move cleanup needs to walk the vector space and do a lot of
sanity checks to find a vector to cleanup.
With single CPU affinities this can be simplified and made more robust by
queueing the vector configuration which needs to be cleaned up in a hlist
on the CPU which was the previous target.
That removes all the race conditions because the cleanup either finds a
valid list entry or not. The latter happens when the interrupt was torn
down before the cleanup handler was able to run.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/x86/kernel/apic/vector.c | 221 ++++++++++++++----------------------------
1 file changed, 77 insertions(+), 144 deletions(-)
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -25,6 +25,7 @@ struct apic_chip_data {
struct irq_cfg cfg;
unsigned int cpu;
unsigned int prev_cpu;
+ struct hlist_node clist;
cpumask_var_t domain;
cpumask_var_t old_domain;
u8 move_in_progress : 1;
@@ -38,6 +39,9 @@ static struct irq_chip lapic_controller;
#ifdef CONFIG_X86_IO_APIC
static struct apic_chip_data *legacy_irq_data[NR_IRQS_LEGACY];
#endif
+#ifdef CONFIG_SMP
+static DEFINE_PER_CPU(struct hlist_head, cleanup_list);
+#endif
void lock_vector_lock(void)
{
@@ -87,6 +91,7 @@ static struct apic_chip_data *alloc_apic
goto out_data;
if (!zalloc_cpumask_var_node(&apicd->old_domain, GFP_KERNEL, node))
goto out_domain;
+ INIT_HLIST_NODE(&apicd->clist);
return apicd;
out_domain:
free_cpumask_var(apicd->domain);
@@ -127,8 +132,7 @@ static int __assign_irq_vector(int irq,
* If there is still a move in progress or the previous move has not
* been cleaned up completely, tell the caller to come back later.
*/
- if (d->move_in_progress ||
- cpumask_intersects(d->old_domain, cpu_online_mask))
+ if (d->cfg.old_vector)
return -EBUSY;
/* Only try and allocate irqs on cpus that are present */
@@ -263,38 +267,22 @@ static int assign_irq_vector_policy(int
static void clear_irq_vector(int irq, struct apic_chip_data *apicd)
{
- struct irq_desc *desc;
- int cpu, vector;
+ unsigned int vector = apicd->cfg.vector;
- if (!apicd->cfg.vector)
+ if (!vector)
return;
- vector = apicd->cfg.vector;
- for_each_cpu_and(cpu, apicd->domain, cpu_online_mask)
- per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
-
+ per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_UNUSED;
apicd->cfg.vector = 0;
- cpumask_clear(apicd->domain);
- /*
- * If move is in progress or the old_domain mask is not empty,
- * i.e. the cleanup IPI has not been processed yet, we need to remove
- * the old references to desc from all cpus vector tables.
- */
- if (!apicd->move_in_progress && cpumask_empty(apicd->old_domain))
+ /* Clean up move in progress */
+ vector = apicd->cfg.old_vector;
+ if (!vector)
return;
- desc = irq_to_desc(irq);
- for_each_cpu_and(cpu, apicd->old_domain, cpu_online_mask) {
- for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
- vector++) {
- if (per_cpu(vector_irq, cpu)[vector] != desc)
- continue;
- per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
- break;
- }
- }
+ per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_UNUSED;
apicd->move_in_progress = 0;
+ hlist_del_init(&apicd->clist);
}
void init_irq_alloc_info(struct irq_alloc_info *info,
@@ -474,7 +462,7 @@ static void vector_update_shutdown_irqs(
struct irq_data *irqd = irq_desc_get_irq_data(desc);
struct apic_chip_data *ad = apic_chip_data(irqd);
- if (ad && cpumask_test_cpu(cpu, ad->domain) && ad->cfg.vector)
+ if (ad && ad->cfg.vector && ad->cpu == smp_processor_id())
this_cpu_write(vector_irq[ad->cfg.vector], desc);
}
}
@@ -524,11 +512,9 @@ static int apic_retrigger_irq(struct irq
{
struct apic_chip_data *apicd = apic_chip_data(irqd);
unsigned long flags;
- int cpu;
raw_spin_lock_irqsave(&vector_lock, flags);
- cpu = cpumask_first_and(apicd->domain, cpu_online_mask);
- apic->send_IPI_mask(cpumask_of(cpu), apicd->cfg.vector);
+ apic->send_IPI(apicd->cpu, apicd->cfg.vector);
raw_spin_unlock_irqrestore(&vector_lock, flags);
return 1;
@@ -565,114 +551,77 @@ static struct irq_chip lapic_controller
};
#ifdef CONFIG_SMP
-static void __send_cleanup_vector(struct apic_chip_data *apicd)
-{
- raw_spin_lock(&vector_lock);
- cpumask_and(apicd->old_domain, apicd->old_domain, cpu_online_mask);
- apicd->move_in_progress = 0;
- if (!cpumask_empty(apicd->old_domain))
- apic->send_IPI_mask(apicd->old_domain, IRQ_MOVE_CLEANUP_VECTOR);
- raw_spin_unlock(&vector_lock);
-}
-
-void send_cleanup_vector(struct irq_cfg *cfg)
-{
- struct apic_chip_data *apicd;
-
- apicd = container_of(cfg, struct apic_chip_data, cfg);
- if (apicd->move_in_progress)
- __send_cleanup_vector(apicd);
-}
asmlinkage __visible void __irq_entry smp_irq_move_cleanup_interrupt(void)
{
- unsigned vector, me;
+ struct hlist_head *clhead = this_cpu_ptr(&cleanup_list);
+ struct apic_chip_data *apicd;
+ struct hlist_node *tmp;
entering_ack_irq();
-
/* Prevent vectors vanishing under us */
raw_spin_lock(&vector_lock);
- me = smp_processor_id();
- for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
- struct apic_chip_data *apicd;
- struct irq_desc *desc;
- unsigned int irr;
-
- retry:
- desc = __this_cpu_read(vector_irq[vector]);
- if (IS_ERR_OR_NULL(desc))
- continue;
-
- if (!raw_spin_trylock(&desc->lock)) {
- raw_spin_unlock(&vector_lock);
- cpu_relax();
- raw_spin_lock(&vector_lock);
- goto retry;
- }
-
- apicd = apic_chip_data(irq_desc_get_irq_data(desc));
- if (!apicd)
- goto unlock;
-
- /*
- * Nothing to cleanup if irq migration is in progress
- * or this cpu is not set in the cleanup mask.
- */
- if (apicd->move_in_progress ||
- !cpumask_test_cpu(me, apicd->old_domain))
- goto unlock;
+ hlist_for_each_entry_safe(apicd, tmp, clhead, clist) {
+ unsigned int irr, vector = apicd->cfg.old_vector;
/*
- * We have two cases to handle here:
- * 1) vector is unchanged but the target mask got reduced
- * 2) vector and the target mask has changed
- *
- * #1 is obvious, but in #2 we have two vectors with the same
- * irq descriptor: the old and the new vector. So we need to
- * make sure that we only cleanup the old vector. The new
- * vector has the current @vector number in the config and
- * this cpu is part of the target mask. We better leave that
- * one alone.
- */
- if (vector == apicd->cfg.vector &&
- cpumask_test_cpu(me, apicd->domain))
- goto unlock;
-
- irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
- /*
- * Check if the vector that needs to be cleanedup is
- * registered at the cpu's IRR. If so, then this is not
- * the best time to clean it up. Lets clean it up in the
+ * Paranoia: Check if the vector that needs to be cleaned
+ * up is registered at the APICs IRR. If so, then this is
+ * not the best time to clean it up. Clean it up in the
* next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR
- * to myself.
+ * to this CPU. IRQ_MOVE_CLEANUP_VECTOR is the lowest
+ * priority external vector, so on return from this
+ * interrupt the device interrupt will happen first.
*/
- if (irr & (1 << (vector % 32))) {
+ irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
+ if (irr & (1U << (vector % 32))) {
apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
- goto unlock;
+ continue;
}
+ hlist_del_init(&apicd->clist);
__this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
- cpumask_clear_cpu(me, apicd->old_domain);
-unlock:
- raw_spin_unlock(&desc->lock);
+ apicd->cfg.old_vector = 0;
}
raw_spin_unlock(&vector_lock);
-
exiting_irq();
}
+static void __send_cleanup_vector(struct apic_chip_data *apicd)
+{
+ unsigned int cpu;
+
+ raw_spin_lock(&vector_lock);
+ apicd->move_in_progress = 0;
+ cpu = apicd->prev_cpu;
+ if (cpu_online(cpu)) {
+ hlist_add_head(&apicd->clist, per_cpu_ptr(&cleanup_list, cpu));
+ apic->send_IPI(cpu, IRQ_MOVE_CLEANUP_VECTOR);
+ } else {
+ apicd->cfg.old_vector = 0;
+ }
+ raw_spin_unlock(&vector_lock);
+}
+
+void send_cleanup_vector(struct irq_cfg *cfg)
+{
+ struct apic_chip_data *apicd;
+
+ apicd = container_of(cfg, struct apic_chip_data, cfg);
+ if (apicd->move_in_progress)
+ __send_cleanup_vector(apicd);
+}
+
static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector)
{
- unsigned me;
struct apic_chip_data *apicd;
apicd = container_of(cfg, struct apic_chip_data, cfg);
if (likely(!apicd->move_in_progress))
return;
- me = smp_processor_id();
- if (vector == apicd->cfg.vector && cpumask_test_cpu(me, apicd->domain))
+ if (vector == apicd->cfg.vector && apicd->cpu == smp_processor_id())
__send_cleanup_vector(apicd);
}
@@ -686,10 +635,9 @@ void irq_complete_move(struct irq_cfg *c
*/
void irq_force_complete_move(struct irq_desc *desc)
{
- struct irq_data *irqd;
struct apic_chip_data *apicd;
- struct irq_cfg *cfg;
- unsigned int cpu;
+ struct irq_data *irqd;
+ unsigned int vector;
/*
* The function is called for all descriptors regardless of which
@@ -701,42 +649,30 @@ void irq_force_complete_move(struct irq_
* (apic_chip_data) before touching it any further.
*/
irqd = irq_domain_get_irq_data(x86_vector_domain,
- irq_desc_get_irq(desc));
+ irq_desc_get_irq(desc));
if (!irqd)
return;
+ raw_spin_lock(&vector_lock);
apicd = apic_chip_data(irqd);
- cfg = apicd ? &apicd->cfg : NULL;
+ if (!apicd)
+ goto unlock;
- if (!cfg)
- return;
+ /*
+ * If old_vector is empty, no action required.
+ */
+ vector = apicd->cfg.old_vector;
+ if (!vector)
+ goto unlock;
/*
- * This is tricky. If the cleanup of @data->old_domain has not been
+ * This is tricky. If the cleanup of the old vector has not been
* done yet, then the following setaffinity call will fail with
* -EBUSY. This can leave the interrupt in a stale state.
*
* All CPUs are stuck in stop machine with interrupts disabled so
* calling __irq_complete_move() would be completely pointless.
- */
- raw_spin_lock(&vector_lock);
- /*
- * Clean out all offline cpus (including the outgoing one) from the
- * old_domain mask.
- */
- cpumask_and(apicd->old_domain, apicd->old_domain, cpu_online_mask);
-
- /*
- * If move_in_progress is cleared and the old_domain mask is empty,
- * then there is nothing to cleanup. fixup_irqs() will take care of
- * the stale vectors on the outgoing cpu.
- */
- if (!apicd->move_in_progress && cpumask_empty(apicd->old_domain)) {
- raw_spin_unlock(&vector_lock);
- return;
- }
-
- /*
+ *
* 1) The interrupt is in move_in_progress state. That means that we
* have not seen an interrupt since the io_apic was reprogrammed to
* the new vector.
@@ -778,18 +714,15 @@ void irq_force_complete_move(struct irq_
* area arises.
*/
pr_warn("IRQ fixup: irq %d move in progress, old vector %d\n",
- irqd->irq, cfg->old_vector);
+ irqd->irq, vector);
}
- /*
- * If old_domain is not empty, then other cpus still have the irq
- * descriptor set in their vector array. Clean it up.
- */
- for_each_cpu(cpu, apicd->old_domain)
- per_cpu(vector_irq, cpu)[cfg->old_vector] = VECTOR_UNUSED;
-
+ per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_UNUSED;
/* Cleanup the left overs of the (half finished) move */
cpumask_clear(apicd->old_domain);
+ apicd->cfg.old_vector = 0;
apicd->move_in_progress = 0;
+ hlist_del_init(&apicd->clist);
+unlock:
raw_spin_unlock(&vector_lock);
}
#endif
next prev parent reply other threads:[~2017-09-13 21:41 UTC|newest]
Thread overview: 57+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-09-13 21:29 [patch 00/52] x86: Rework the vector management Thomas Gleixner
2017-09-13 21:29 ` [patch 01/52] genirq: Fix cpumask check in __irq_startup_managed() Thomas Gleixner
2017-09-16 18:24 ` [tip:irq/urgent] " tip-bot for Thomas Gleixner
2017-09-13 21:29 ` [patch 02/52] genirq/debugfs: Show debug information for all irq descriptors Thomas Gleixner
2017-09-13 21:29 ` [patch 03/52] genirq/msi: Capture device name for debugfs Thomas Gleixner
2017-09-13 21:29 ` [patch 04/52] irqdomain/debugfs: Provide domain specific debug callback Thomas Gleixner
2017-09-13 21:29 ` [patch 05/52] genirq: Make state consistent for !IRQ_DOMAIN_HIERARCHY Thomas Gleixner
2017-09-13 21:29 ` [patch 06/52] genirq: Set managed shut down flag at init Thomas Gleixner
2017-09-13 21:29 ` [patch 07/52] genirq: Separate activation and startup Thomas Gleixner
2017-09-13 21:29 ` [patch 08/52] genirq/irqdomain: Update irq_domain_ops.activate() signature Thomas Gleixner
2017-09-13 21:29 ` [patch 09/52] genirq/irqdomain: Allow irq_domain_activate_irq() to fail Thomas Gleixner
2017-09-13 21:29 ` [patch 10/52] genirq/irqdomain: Propagate early activation Thomas Gleixner
2017-09-13 21:29 ` [patch 11/52] genirq/irqdomain: Add force reactivation flag to irq domains Thomas Gleixner
2017-09-13 21:29 ` [patch 12/52] genirq: Implement bitmap matrix allocator Thomas Gleixner
2017-09-13 21:29 ` [patch 13/52] genirq/matrix: Add tracepoints Thomas Gleixner
2017-09-13 21:29 ` [patch 14/52] x86/apic: Deinline x2apic functions Thomas Gleixner
2017-09-13 21:29 ` [patch 15/52] x86/apic: Sanitize return value of apic.set_apic_id() Thomas Gleixner
2017-09-13 21:29 ` [patch 16/52] x86/apic: Sanitize return value of check_apicid_used() Thomas Gleixner
2017-09-13 21:29 ` [patch 17/52] x86/apic: Move probe32 specific APIC functions Thomas Gleixner
2017-09-13 21:29 ` [patch 18/52] x86/apic: Move APIC noop specific functions Thomas Gleixner
2017-09-13 21:29 ` [patch 19/52] x86/apic: Sanitize 32/64bit APIC callbacks Thomas Gleixner
2017-09-13 21:29 ` [patch 20/52] x86/apic: Move common " Thomas Gleixner
2017-09-13 21:29 ` [patch 21/52] x86/apic: Reorganize struct apic Thomas Gleixner
2017-09-13 21:29 ` [patch 22/52] x86/apic/x2apic: Simplify cluster management Thomas Gleixner
2017-09-13 21:29 ` [patch 23/52] x86/apic: Get rid of apic->target_cpus Thomas Gleixner
2017-09-13 21:29 ` [patch 24/52] x86/vector: Rename used_vectors to system_vectors Thomas Gleixner
2017-09-13 21:29 ` [patch 25/52] x86/apic: Get rid of multi CPU affinity Thomas Gleixner
2017-09-13 21:29 ` [patch 26/52] x86/ioapic: Remove obsolete post hotplug update Thomas Gleixner
2017-09-13 21:29 ` [patch 27/52] x86/vector: Simplify the CPU hotplug vector update Thomas Gleixner
2017-09-13 21:29 ` [patch 28/52] x86/vector: Cleanup variable names Thomas Gleixner
2017-09-13 21:29 ` [patch 29/52] x86/vector: Store the single CPU targets in apic data Thomas Gleixner
2017-09-13 21:29 ` Thomas Gleixner [this message]
2017-09-13 21:29 ` [patch 31/52] x86/ioapic: Mark legacy vectors at reallocation time Thomas Gleixner
2017-09-13 21:29 ` [patch 32/52] x86/apic: Get rid of the legacy irq data storage Thomas Gleixner
2017-09-13 21:29 ` [patch 33/52] x86/vector: Remove pointless pointer checks Thomas Gleixner
2017-09-13 21:29 ` [patch 34/52] x86/vector: Move helper functions around Thomas Gleixner
2017-09-13 21:29 ` [patch 35/52] x86/apic: Add replacement for cpu_mask_to_apicid() Thomas Gleixner
2017-09-13 21:29 ` [patch 36/52] x86/irq/vector: Initialize matrix allocator Thomas Gleixner
2017-09-13 21:29 ` [patch 37/52] x86/vector: Add vector domain debugfs support Thomas Gleixner
2017-09-13 21:29 ` [patch 38/52] x86/smpboot: Set online before setting up vectors Thomas Gleixner
2017-09-13 21:29 ` [patch 39/52] x86/vector: Add tracepoints for vector management Thomas Gleixner
2017-09-13 21:29 ` [patch 40/52] x86/vector: Use matrix allocator for vector assignment Thomas Gleixner
2017-09-13 21:29 ` [patch 41/52] x86/apic: Remove unused callbacks Thomas Gleixner
2017-09-13 21:29 ` [patch 42/52] x86/vector: Compile SMP only code conditionally Thomas Gleixner
2017-09-13 21:29 ` [patch 43/52] x86/vector: Untangle internal state from irq_cfg Thomas Gleixner
2017-09-13 21:29 ` [patch 44/52] x86/apic/msi: Force reactivation of interrupts at startup time Thomas Gleixner
2017-09-13 21:29 ` [patch 45/52] iommu/vt-d: Reevaluate vector configuration on activate() Thomas Gleixner
2017-09-13 21:29 ` [patch 46/52] iommu/amd: " Thomas Gleixner
2017-09-13 21:29 ` [patch 47/52] x86/io_apic: " Thomas Gleixner
2017-09-13 21:29 ` [patch 48/52] x86/vector: Handle managed interrupts proper Thomas Gleixner
2017-09-13 21:29 ` [patch 49/52] x86/vector/msi: Switch to global reservation mode Thomas Gleixner
2017-09-13 21:29 ` [patch 50/52] x86/vector: Switch IOAPIC " Thomas Gleixner
2017-09-13 21:29 ` [patch 51/52] x86/irq: Simplify hotplug vector accounting Thomas Gleixner
2017-09-13 21:29 ` [patch 52/52] x86/vector: Respect affinity mask in irq descriptor Thomas Gleixner
2017-09-14 11:21 ` [patch 00/52] x86: Rework the vector management Juergen Gross
2017-09-20 10:21 ` Paolo Bonzini
2017-09-19 9:12 ` Yu Chen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170913213154.622727892@linutronix.de \
--to=tglx@linutronix.de \
--cc=akataria@vmware.com \
--cc=arjan@linux.intel.com \
--cc=boris.ostrovsky@oracle.com \
--cc=bp@alien8.de \
--cc=dan.j.williams@intel.com \
--cc=hch@lst.de \
--cc=hpa@zytor.com \
--cc=jgross@suse.com \
--cc=joro@8bytes.org \
--cc=kys@microsoft.com \
--cc=lenb@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=marc.zyngier@arm.com \
--cc=mingo@kernel.org \
--cc=pbonzini@redhat.com \
--cc=peterz@infradead.org \
--cc=rjw@rjwysocki.net \
--cc=rostedt@goodmis.org \
--cc=rui.zhang@intel.com \
--cc=tony.luck@intel.com \
--cc=yu.c.chen@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox