public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Alexander Graf <graf@amazon.com>
To: X86 ML <x86@kernel.org>
Cc: Andy Lutomirski <luto@kernel.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	LKML <linux-kernel@vger.kernel.org>,
	Andrew Cooper <andrew.cooper3@citrix.com>,
	"Paul E. McKenney" <paulmck@kernel.org>,
	Alexandre Chartre <alexandre.chartre@oracle.com>,
	Frederic Weisbecker <frederic@kernel.org>,
	Paolo Bonzini <pbonzini@redhat.com>,
	"Sean Christopherson" <sean.j.christopherson@intel.com>,
	Masami Hiramatsu <mhiramat@kernel.org>,
	Petr Mladek <pmladek@suse.com>,
	Steven Rostedt <rostedt@goodmis.org>,
	Joel Fernandes <joel@joelfernandes.org>,
	"Boris Ostrovsky" <boris.ostrovsky@oracle.com>,
	Juergen Gross <jgross@suse.com>,
	Mathieu Desnoyers <mathieu.desnoyers@efficios.com>,
	Josh Poimboeuf <jpoimboe@redhat.com>,
	Will Deacon <will@kernel.org>,
	Tom Lendacky <thomas.lendacky@amd.com>,
	Wei Liu <wei.liu@kernel.org>,
	Michael Kelley <mikelley@microsoft.com>,
	Jason Chen CJ <jason.cj.chen@intel.com>,
	Zhao Yakui <yakui.zhao@intel.com>,
	"Peter Zijlstra (Intel)" <peterz@infradead.org>,
	"Avi Kivity" <avi@scylladb.com>,
	"Herrenschmidt, Benjamin" <benh@amazon.com>, <robketr@amazon.de>,
	<amos@scylladb.com>, Brian Gerst <brgerst@gmail.com>,
	<stable@vger.kernel.org>
Subject: [PATCH] x86/irq: Preserve vector in orig_ax for APIC code
Date: Wed, 26 Aug 2020 13:53:57 +0200	[thread overview]
Message-ID: <20200826115357.3049-1-graf@amazon.com> (raw)

Commit 633260fa143 ("x86/irq: Convey vector as argument and not in ptregs")
changed the handover logic of the vector identifier from ~vector in orig_ax
to purely register based. Unfortunately, this field has another consumer
in the APIC code which the commit did not touch. The net result was that
IRQ balancing did not work and instead resulted in interrupt storms, slowing
down the system.

This patch restores the original semantics that orig_ax contains the vector.
When we receive an interrupt now, the actual vector number stays stored in
the orig_ax field which then gets consumed by the APIC code.

To ensure that nobody else trips over this in the future, the patch also adds
comments at strategic places to warn anyone who would refactor the code that
there is another consumer of the field.

With this patch in place, IRQ balancing works as expected and performance
levels are restored to previous levels.

Reported-by: Alex bykov <alex.bykov@scylladb.com>
Reported-by: Avi Kivity <avi@scylladb.com>
Fixes: 633260fa143 ("x86/irq: Convey vector as argument and not in ptregs")
Cc: stable@vger.kernel.org
Signed-off-by: Alexander Graf <graf@amazon.com>
---
 arch/x86/entry/entry_32.S     |  2 +-
 arch/x86/entry/entry_64.S     | 17 +++++++++++------
 arch/x86/kernel/apic/vector.c |  2 +-
 3 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index df8c017..22e829c 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -727,7 +727,7 @@ SYM_CODE_START_LOCAL(asm_\cfunc)
 	ENCODE_FRAME_POINTER
 	movl	%esp, %eax
 	movl	PT_ORIG_EAX(%esp), %edx		/* get the vector from stack */
-	movl	$-1, PT_ORIG_EAX(%esp)		/* no syscall to restart */
+	/* keep vector on stack for APIC's irq_complete_move() */
 	call	\cfunc
 	jmp	handle_exception_return
 SYM_CODE_END(asm_\cfunc)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 70dea93..d78fb1c 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -319,7 +319,7 @@ SYM_CODE_END(ret_from_fork)
  * @cfunc:		C function to be called
  * @has_error_code:	Hardware pushed error code on stack
  */
-.macro idtentry_body cfunc has_error_code:req
+.macro idtentry_body cfunc has_error_code:req preserve_error_code:req
 
 	call	error_entry
 	UNWIND_HINT_REGS
@@ -328,7 +328,9 @@ SYM_CODE_END(ret_from_fork)
 
 	.if \has_error_code == 1
 		movq	ORIG_RAX(%rsp), %rsi	/* get error code into 2nd argument*/
-		movq	$-1, ORIG_RAX(%rsp)	/* no syscall to restart */
+		.if \preserve_error_code == 0
+			movq	$-1, ORIG_RAX(%rsp)	/* no syscall to restart */
+		.endif
 	.endif
 
 	call	\cfunc
@@ -346,7 +348,7 @@ SYM_CODE_END(ret_from_fork)
  * The macro emits code to set up the kernel context for straight forward
  * and simple IDT entries. No IST stack, no paranoid entry checks.
  */
-.macro idtentry vector asmsym cfunc has_error_code:req
+.macro idtentry vector asmsym cfunc has_error_code:req preserve_error_code=0
 SYM_CODE_START(\asmsym)
 	UNWIND_HINT_IRET_REGS offset=\has_error_code*8
 	ASM_CLAC
@@ -369,7 +371,7 @@ SYM_CODE_START(\asmsym)
 .Lfrom_usermode_no_gap_\@:
 	.endif
 
-	idtentry_body \cfunc \has_error_code
+	idtentry_body \cfunc \has_error_code \preserve_error_code
 
 _ASM_NOKPROBE(\asmsym)
 SYM_CODE_END(\asmsym)
@@ -382,11 +384,14 @@ SYM_CODE_END(\asmsym)
  * position of idtentry exceptions, and jump to one of the two idtentry points
  * (common/spurious).
  *
+ * The original vector number on the stack has to stay untouched, so that the
+ * APIC irq_complete_move() code can access it later on IRQ ack.
+ *
  * common_interrupt is a hotpath, align it to a cache line
  */
 .macro idtentry_irq vector cfunc
 	.p2align CONFIG_X86_L1_CACHE_SHIFT
-	idtentry \vector asm_\cfunc \cfunc has_error_code=1
+	idtentry \vector asm_\cfunc \cfunc has_error_code=1 preserve_error_code=1
 .endm
 
 /*
@@ -440,7 +445,7 @@ SYM_CODE_START(\asmsym)
 
 	/* Switch to the regular task stack and use the noist entry point */
 .Lfrom_usermode_switch_stack_\@:
-	idtentry_body noist_\cfunc, has_error_code=0
+	idtentry_body noist_\cfunc, has_error_code=0, preserve_error_code=0
 
 _ASM_NOKPROBE(\asmsym)
 SYM_CODE_END(\asmsym)
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index dae32d9..e81b835 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -924,7 +924,7 @@ static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector)
 
 void irq_complete_move(struct irq_cfg *cfg)
 {
-	__irq_complete_move(cfg, ~get_irq_regs()->orig_ax);
+	__irq_complete_move(cfg, (u8)get_irq_regs()->orig_ax);
 }
 
 /*
-- 
1.8.3.1




Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879




             reply	other threads:[~2020-08-26 11:54 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-08-26 11:53 Alexander Graf [this message]
2020-08-26 13:22 ` [PATCH] x86/irq: Preserve vector in orig_ax for APIC code Josh Poimboeuf
2020-08-26 13:51   ` Alexander Graf
2020-08-26 14:27 ` Thomas Gleixner
2020-08-26 16:13   ` Andy Lutomirski
2020-08-26 17:47     ` Thomas Gleixner
2020-08-26 18:00       ` Andy Lutomirski
2020-08-26 18:22         ` Graf (AWS), Alexander
2020-08-26 16:33   ` Alexander Graf
2020-08-26 18:30     ` Thomas Gleixner
2020-08-26 18:53       ` Thomas Gleixner
2020-08-26 20:09         ` Alexander Graf
2020-08-26 20:21         ` x86/irq: Unbreak interrupt affinity setting Thomas Gleixner
2020-08-26 21:37           ` David Laight
2020-08-26 21:47             ` David Laight
2020-08-26 22:52               ` Alexander Graf
2020-08-27  8:31                 ` David Laight
2020-08-26 22:07             ` Thomas Gleixner
2020-08-27  8:28               ` David Laight
2020-08-27  7:32           ` [tip: x86/urgent] " tip-bot2 for Thomas Gleixner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200826115357.3049-1-graf@amazon.com \
    --to=graf@amazon.com \
    --cc=alexandre.chartre@oracle.com \
    --cc=amos@scylladb.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=avi@scylladb.com \
    --cc=benh@amazon.com \
    --cc=boris.ostrovsky@oracle.com \
    --cc=brgerst@gmail.com \
    --cc=frederic@kernel.org \
    --cc=jason.cj.chen@intel.com \
    --cc=jgross@suse.com \
    --cc=joel@joelfernandes.org \
    --cc=jpoimboe@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=mathieu.desnoyers@efficios.com \
    --cc=mhiramat@kernel.org \
    --cc=mikelley@microsoft.com \
    --cc=paulmck@kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=peterz@infradead.org \
    --cc=pmladek@suse.com \
    --cc=robketr@amazon.de \
    --cc=rostedt@goodmis.org \
    --cc=sean.j.christopherson@intel.com \
    --cc=stable@vger.kernel.org \
    --cc=tglx@linutronix.de \
    --cc=thomas.lendacky@amd.com \
    --cc=wei.liu@kernel.org \
    --cc=will@kernel.org \
    --cc=x86@kernel.org \
    --cc=yakui.zhao@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox