From: Thomas Gleixner <tglx@linutronix.de>
To: "H. Peter Anvin" <hpa@zytor.com>, Xin Li <xin3.li@intel.com>,
linux-kernel@vger.kernel.org, x86@kernel.org,
kvm@vger.kernel.org
Cc: mingo@redhat.com, bp@alien8.de, dave.hansen@linux.intel.com,
peterz@infradead.org, andrew.cooper3@citrix.com,
seanjc@google.com, pbonzini@redhat.com, ravi.v.shankar@intel.com,
jiangshanlai@gmail.com, shan.kang@intel.com
Subject: Re: [PATCH v8 01/33] x86/traps: let common_interrupt() handle IRQ_MOVE_CLEANUP_VECTOR
Date: Tue, 06 Jun 2023 22:09:03 +0200 [thread overview]
Message-ID: <877csgl5eo.ffs@tglx> (raw)
In-Reply-To: <70ef07f1-e3b7-7c4e-01ac-11f159a87a6b@zytor.com>
On Mon, Jun 05 2023 at 10:09, H. Peter Anvin wrote:
> On 6/5/23 10:07, Thomas Gleixner wrote:
>> There is zero reason for this to be an IPI. This can be delegated to a
>> worker or whatever delayed mechanism.
>>
> As we discussed offline, I agree that this is a better solution (and
> should be a separate changeset before the FRED one.)
The untested below should do the trick. Wants to be split in several
patches, but you get the idea.
Thanks,
tglx
---
Subject: x86/vector: Get rid of IRQ_MOVE_CLEANUP_VECTOR
From: Thomas Gleixner <tglx@linutronix.de>
No point to waste a vector for cleaning up the leftovers of a moved
interrupt. Aside of that this must be the lowest priority of all vectors
which makes FRED systems utilizing vectors 0x10-0x1f more complicated
than necessary.
Schedule a timer instead.
Not-Yet-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/x86/include/asm/hw_irq.h | 4 -
arch/x86/include/asm/idtentry.h | 1
arch/x86/include/asm/irq_vectors.h | 7 ---
arch/x86/kernel/apic/vector.c | 83 ++++++++++++++++++++++++++----------
arch/x86/kernel/idt.c | 1
arch/x86/platform/uv/uv_irq.c | 2
drivers/iommu/amd/iommu.c | 2
drivers/iommu/hyperv-iommu.c | 4 -
drivers/iommu/intel/irq_remapping.c | 2
9 files changed, 68 insertions(+), 38 deletions(-)
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -97,10 +97,10 @@ extern struct irq_cfg *irqd_cfg(struct i
extern void lock_vector_lock(void);
extern void unlock_vector_lock(void);
#ifdef CONFIG_SMP
-extern void send_cleanup_vector(struct irq_cfg *);
+extern void vector_schedule_cleanup(struct irq_cfg *);
extern void irq_complete_move(struct irq_cfg *cfg);
#else
-static inline void send_cleanup_vector(struct irq_cfg *c) { }
+static inline void vector_schedule_cleanup(struct irq_cfg *c) { }
static inline void irq_complete_move(struct irq_cfg *c) { }
#endif
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -648,7 +648,6 @@ DECLARE_IDTENTRY_SYSVEC(X86_PLATFORM_IPI
#ifdef CONFIG_SMP
DECLARE_IDTENTRY(RESCHEDULE_VECTOR, sysvec_reschedule_ipi);
-DECLARE_IDTENTRY_SYSVEC(IRQ_MOVE_CLEANUP_VECTOR, sysvec_irq_move_cleanup);
DECLARE_IDTENTRY_SYSVEC(REBOOT_VECTOR, sysvec_reboot);
DECLARE_IDTENTRY_SYSVEC(CALL_FUNCTION_SINGLE_VECTOR, sysvec_call_function_single);
DECLARE_IDTENTRY_SYSVEC(CALL_FUNCTION_VECTOR, sysvec_call_function);
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -35,13 +35,6 @@
*/
#define FIRST_EXTERNAL_VECTOR 0x20
-/*
- * Reserve the lowest usable vector (and hence lowest priority) 0x20 for
- * triggering cleanup after irq migration. 0x21-0x2f will still be used
- * for device interrupts.
- */
-#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR
-
#define IA32_SYSCALL_VECTOR 0x80
/*
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -44,7 +44,18 @@ static cpumask_var_t vector_searchmask;
static struct irq_chip lapic_controller;
static struct irq_matrix *vector_matrix;
#ifdef CONFIG_SMP
-static DEFINE_PER_CPU(struct hlist_head, cleanup_list);
+
+static void vector_cleanup_callback(struct timer_list *tmr);
+
+struct vector_cleanup {
+ struct hlist_head head;
+ struct timer_list timer;
+};
+
+static DEFINE_PER_CPU(struct vector_cleanup, vector_cleanup) = {
+ .head = HLIST_HEAD_INIT,
+ .timer = __TIMER_INITIALIZER(vector_cleanup_callback, TIMER_PINNED),
+};
#endif
void lock_vector_lock(void)
@@ -843,8 +854,12 @@ void lapic_online(void)
void lapic_offline(void)
{
+ struct vector_cleanup *cl = this_cpu_ptr(&vector_cleanup);
+
lock_vector_lock();
irq_matrix_offline(vector_matrix);
+ WARN_ON_ONCE(try_to_del_timer_sync(&cl->timer) < 0);
+ WARN_ON_ONCE(!hlist_empty(&cl->head));
unlock_vector_lock();
}
@@ -934,62 +949,86 @@ static void free_moved_vector(struct api
apicd->move_in_progress = 0;
}
-DEFINE_IDTENTRY_SYSVEC(sysvec_irq_move_cleanup)
+static void vector_cleanup_callback(struct timer_list *tmr)
{
- struct hlist_head *clhead = this_cpu_ptr(&cleanup_list);
+ struct vector_cleanup *cl = container_of(tmr, typeof(*cl), timer);
struct apic_chip_data *apicd;
struct hlist_node *tmp;
+ bool rearm = false;
- ack_APIC_irq();
/* Prevent vectors vanishing under us */
- raw_spin_lock(&vector_lock);
+ raw_spin_lock_irq(&vector_lock);
- hlist_for_each_entry_safe(apicd, tmp, clhead, clist) {
+ hlist_for_each_entry_safe(apicd, tmp, &cl->head, clist) {
unsigned int irr, vector = apicd->prev_vector;
/*
* Paranoia: Check if the vector that needs to be cleaned
- * up is registered at the APICs IRR. If so, then this is
- * not the best time to clean it up. Clean it up in the
- * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR
- * to this CPU. IRQ_MOVE_CLEANUP_VECTOR is the lowest
- * priority external vector, so on return from this
- * interrupt the device interrupt will happen first.
+ * up is registered at the APICs IRR. That's clearly a
+ * hardware issue if the vector arrived on the old target
+ * _after_ interrupts were disabled above. Keep @apicd
+ * on the list and schedule the timer again to give the CPU
+ * a chance to handle the pending interrupt.
*/
irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
if (irr & (1U << (vector % 32))) {
- apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
+ pr_warn_once("Moved interrupt pending in old target APIC %u\n", apicd->irq);
+ rearm = true;
continue;
}
free_moved_vector(apicd);
}
- raw_spin_unlock(&vector_lock);
+ /*
+ * Must happen under vector_lock to make the timer_pending() check
+ * in __vector_schedule_cleanup() race free against the rearm here.
+ */
+ if (rearm)
+ mod_timer(tmr, jiffies + 1);
+
+ raw_spin_unlock_irq(&vector_lock);
}
-static void __send_cleanup_vector(struct apic_chip_data *apicd)
+static void __vector_schedule_cleanup(struct apic_chip_data *apicd)
{
- unsigned int cpu;
+ unsigned int cpu = apicd->prev_cpu;
raw_spin_lock(&vector_lock);
apicd->move_in_progress = 0;
- cpu = apicd->prev_cpu;
if (cpu_online(cpu)) {
- hlist_add_head(&apicd->clist, per_cpu_ptr(&cleanup_list, cpu));
- apic->send_IPI(cpu, IRQ_MOVE_CLEANUP_VECTOR);
+ struct vector_cleanup *cl = per_cpu_ptr(&vector_cleanup, cpu);
+
+ /*
+ * The lockless timer_pending() check is safe here. If it
+ * returns true, then the callback will observe this new
+ * apic data in the hlist as everything is serialized by
+ * vector lock.
+ *
+ * If it returns false then the timer is either not armed
+ * or the other CPU executes the callback, which again
+ * would be blocked on vector lock. Rearming it in the
+ * latter case makes it fire for nothing.
+ *
+ * This is also safe against the callback rearming the timer
+ * because that's serialized via vector lock too.
+ */
+ if (!timer_pending(&cl->timer)) {
+ cl->timer.expires = jiffies + 1;
+ add_timer_on(&cl->timer, cpu);
+ }
} else {
apicd->prev_vector = 0;
}
raw_spin_unlock(&vector_lock);
}
-void send_cleanup_vector(struct irq_cfg *cfg)
+void vector_schedule_cleanup(struct irq_cfg *cfg)
{
struct apic_chip_data *apicd;
apicd = container_of(cfg, struct apic_chip_data, hw_irq_cfg);
if (apicd->move_in_progress)
- __send_cleanup_vector(apicd);
+ __vector_schedule_cleanup(apicd);
}
void irq_complete_move(struct irq_cfg *cfg)
@@ -1007,7 +1046,7 @@ void irq_complete_move(struct irq_cfg *c
* on the same CPU.
*/
if (apicd->cpu == smp_processor_id())
- __send_cleanup_vector(apicd);
+ __vector_schedule_cleanup(apicd);
}
/*
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -131,7 +131,6 @@ static const __initconst struct idt_data
INTG(RESCHEDULE_VECTOR, asm_sysvec_reschedule_ipi),
INTG(CALL_FUNCTION_VECTOR, asm_sysvec_call_function),
INTG(CALL_FUNCTION_SINGLE_VECTOR, asm_sysvec_call_function_single),
- INTG(IRQ_MOVE_CLEANUP_VECTOR, asm_sysvec_irq_move_cleanup),
INTG(REBOOT_VECTOR, asm_sysvec_reboot),
#endif
--- a/arch/x86/platform/uv/uv_irq.c
+++ b/arch/x86/platform/uv/uv_irq.c
@@ -58,7 +58,7 @@ uv_set_irq_affinity(struct irq_data *dat
ret = parent->chip->irq_set_affinity(parent, mask, force);
if (ret >= 0) {
uv_program_mmr(cfg, data->chip_data);
- send_cleanup_vector(cfg);
+ vector_schedule_cleanup(cfg);
}
return ret;
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -3639,7 +3639,7 @@ static int amd_ir_set_affinity(struct ir
* at the new destination. So, time to cleanup the previous
* vector allocation.
*/
- send_cleanup_vector(cfg);
+ vector_schedule_cleanup(cfg);
return IRQ_SET_MASK_OK_DONE;
}
--- a/drivers/iommu/hyperv-iommu.c
+++ b/drivers/iommu/hyperv-iommu.c
@@ -51,7 +51,7 @@ static int hyperv_ir_set_affinity(struct
if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE)
return ret;
- send_cleanup_vector(cfg);
+ vector_schedule_cleanup(cfg);
return 0;
}
@@ -257,7 +257,7 @@ static int hyperv_root_ir_set_affinity(s
if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE)
return ret;
- send_cleanup_vector(cfg);
+ vector_schedule_cleanup(cfg);
return 0;
}
--- a/drivers/iommu/intel/irq_remapping.c
+++ b/drivers/iommu/intel/irq_remapping.c
@@ -1180,7 +1180,7 @@ intel_ir_set_affinity(struct irq_data *d
* at the new destination. So, time to cleanup the previous
* vector allocation.
*/
- send_cleanup_vector(cfg);
+ vector_schedule_cleanup(cfg);
return IRQ_SET_MASK_OK_DONE;
}
next prev parent reply other threads:[~2023-06-06 20:09 UTC|newest]
Thread overview: 96+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-04-10 8:14 [PATCH v8 00/33] x86: enable FRED for x86-64 Xin Li
2023-04-10 8:14 ` [PATCH v8 01/33] x86/traps: let common_interrupt() handle IRQ_MOVE_CLEANUP_VECTOR Xin Li
2023-05-07 11:59 ` Borislav Petkov
2023-06-03 19:19 ` Li, Xin3
2023-06-03 20:51 ` Thomas Gleixner
2023-06-05 17:07 ` Thomas Gleixner
2023-06-05 17:09 ` H. Peter Anvin
2023-06-06 20:09 ` Thomas Gleixner [this message]
2023-06-06 23:16 ` Li, Xin3
2023-06-19 8:00 ` Li, Xin3
2023-06-19 14:22 ` Thomas Gleixner
2023-06-19 18:47 ` Li, Xin3
2023-06-19 19:16 ` H. Peter Anvin
2023-06-20 0:04 ` Li, Xin3
2023-04-10 8:14 ` [PATCH v8 02/33] x86/fred: make unions for the cs and ss fields in struct pt_regs Xin Li
2023-06-03 9:48 ` Borislav Petkov
2023-06-05 12:07 ` Thomas Gleixner
2023-06-05 17:12 ` H. Peter Anvin
2023-06-05 17:29 ` Thomas Gleixner
2023-04-10 8:14 ` [PATCH v8 03/33] x86/traps: add a system interrupt table for system interrupt dispatch Xin Li
2023-06-05 8:34 ` Thomas Gleixner
2023-06-06 8:05 ` Li, Xin3
2023-06-05 8:38 ` Thomas Gleixner
2023-06-05 8:39 ` Thomas Gleixner
2023-04-10 8:14 ` [PATCH v8 04/33] x86/traps: add install_system_interrupt_handler() Xin Li
2023-06-05 8:57 ` Thomas Gleixner
2023-06-06 5:46 ` Li, Xin3
2023-04-10 8:14 ` [PATCH v8 05/33] x86/traps: add external_interrupt() to dispatch external interrupts Xin Li
2023-06-05 11:56 ` Thomas Gleixner
2023-06-05 17:52 ` Thomas Gleixner
2023-06-19 19:16 ` Li, Xin3
2023-06-19 21:13 ` Thomas Gleixner
2023-06-20 0:16 ` Li, Xin3
2023-04-10 8:14 ` [PATCH v8 06/33] x86/cpufeature: add the cpu feature bit for FRED Xin Li
2023-04-10 8:14 ` [PATCH v8 07/33] x86/opcode: add ERETU, ERETS instructions to x86-opcode-map Xin Li
2023-04-10 8:14 ` [PATCH v8 08/33] x86/objtool: teach objtool about ERETU and ERETS Xin Li
2023-04-10 8:14 ` [PATCH v8 09/33] x86/cpu: add X86_CR4_FRED macro Xin Li
2023-06-05 12:01 ` Thomas Gleixner
2023-06-05 17:06 ` H. Peter Anvin
2023-06-05 17:19 ` H. Peter Anvin
2023-04-10 8:14 ` [PATCH v8 10/33] x86/fred: add Kconfig option for FRED (CONFIG_X86_FRED) Xin Li
2023-04-10 8:14 ` [PATCH v8 11/33] x86/fred: if CONFIG_X86_FRED is disabled, disable FRED support Xin Li
2023-04-10 8:14 ` [PATCH v8 12/33] x86/cpu: add MSR numbers for FRED configuration Xin Li
2023-04-10 8:14 ` [PATCH v8 13/33] x86/fred: header file for event types Xin Li
2023-04-10 8:14 ` [PATCH v8 14/33] x86/fred: header file with FRED definitions Xin Li
2023-04-10 8:14 ` [PATCH v8 15/33] x86/fred: reserve space for the FRED stack frame Xin Li
2023-04-10 8:14 ` [PATCH v8 16/33] x86/fred: add a page fault entry stub for FRED Xin Li
2023-04-10 8:14 ` [PATCH v8 17/33] x86/fred: add a debug " Xin Li
2023-04-10 8:14 ` [PATCH v8 18/33] x86/fred: add a NMI " Xin Li
2023-04-10 8:14 ` [PATCH v8 19/33] x86/fred: add a machine check " Xin Li
2023-04-10 8:14 ` [PATCH v8 20/33] x86/fred: FRED entry/exit and dispatch code Xin Li
2023-06-05 13:21 ` Thomas Gleixner
2023-04-10 8:14 ` [PATCH v8 21/33] x86/fred: FRED initialization code Xin Li
2023-06-05 12:15 ` Thomas Gleixner
2023-06-05 13:41 ` Thomas Gleixner
2023-04-10 8:14 ` [PATCH v8 22/33] x86/fred: update MSR_IA32_FRED_RSP0 during task switch Xin Li
2023-04-10 8:14 ` [PATCH v8 23/33] x86/fred: let ret_from_fork() jmp to fred_exit_user when FRED is enabled Xin Li
2023-04-10 8:14 ` [PATCH v8 24/33] x86/fred: disallow the swapgs instruction " Xin Li
2023-06-05 13:47 ` Thomas Gleixner
2023-04-10 8:14 ` [PATCH v8 25/33] x86/fred: no ESPFIX needed " Xin Li
2023-04-10 8:14 ` [PATCH v8 26/33] x86/fred: allow single-step trap and NMI when starting a new thread Xin Li
2023-06-05 13:50 ` Thomas Gleixner
2023-06-05 13:52 ` Thomas Gleixner
2023-04-10 8:14 ` [PATCH v8 27/33] x86/fred: fixup fault on ERETU by jumping to fred_entrypoint_user Xin Li
2023-04-10 8:14 ` [PATCH v8 28/33] x86/ia32: do not modify the DPL bits for a null selector Xin Li
2023-04-10 8:14 ` [PATCH v8 29/33] x86/fred: allow FRED systems to use interrupt vectors 0x10-0x1f Xin Li
2023-06-05 14:06 ` Thomas Gleixner
2023-06-05 16:55 ` H. Peter Anvin
2023-04-10 8:14 ` [PATCH v8 30/33] x86/fred: allow dynamic stack frame size Xin Li
2023-06-05 14:11 ` Thomas Gleixner
2023-06-06 6:18 ` Li, Xin3
2023-06-06 13:27 ` Thomas Gleixner
2023-06-06 23:08 ` H. Peter Anvin
2023-04-10 8:14 ` [PATCH v8 31/33] x86/fred: BUG() when ERETU with %rsp not equal to that when the ring 3 event was just delivered Xin Li
2023-06-05 14:15 ` Thomas Gleixner
2023-06-05 16:42 ` H. Peter Anvin
2023-06-05 17:16 ` Thomas Gleixner
2023-04-10 8:14 ` [PATCH v8 32/33] x86/fred: disable FRED by default in its early stage Xin Li
2023-04-10 8:14 ` [PATCH v8 33/33] KVM: x86/vmx: refactor VMX_DO_EVENT_IRQOFF to generate FRED stack frames Xin Li
2023-04-10 21:50 ` Sean Christopherson
2023-04-11 5:06 ` Li, Xin3
2023-04-11 18:34 ` Sean Christopherson
2023-04-11 22:50 ` Li, Xin3
2023-04-12 18:26 ` Li, Xin3
2023-04-12 19:37 ` Sean Christopherson
2023-04-10 18:37 ` [PATCH v8 00/33] x86: enable FRED for x86-64 Dave Hansen
2023-04-10 19:14 ` Li, Xin3
2023-04-10 19:32 ` Borislav Petkov
2023-04-10 19:38 ` Dave Hansen
2023-04-10 20:52 ` Li, Xin3
2023-04-11 4:14 ` Li, Xin3
2023-04-10 18:49 ` Dave Hansen
2023-04-10 19:16 ` Li, Xin3
2023-06-05 17:11 ` Thomas Gleixner
2023-06-05 17:22 ` H. Peter Anvin
2023-06-05 17:32 ` Thomas Gleixner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=877csgl5eo.ffs@tglx \
--to=tglx@linutronix.de \
--cc=andrew.cooper3@citrix.com \
--cc=bp@alien8.de \
--cc=dave.hansen@linux.intel.com \
--cc=hpa@zytor.com \
--cc=jiangshanlai@gmail.com \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@redhat.com \
--cc=pbonzini@redhat.com \
--cc=peterz@infradead.org \
--cc=ravi.v.shankar@intel.com \
--cc=seanjc@google.com \
--cc=shan.kang@intel.com \
--cc=x86@kernel.org \
--cc=xin3.li@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox