* [PATCH v3 1/8] alpha: enable regset-based ptrace and core dumps
2026-06-12 20:26 [PATCH v3 0/8] alpha: enable generic entry infrastructure Magnus Lindholm
@ 2026-06-12 20:26 ` Magnus Lindholm
2026-06-12 20:26 ` [PATCH v3 2/8] alpha: add ARCH_STACKWALK-based stacktrace support Magnus Lindholm
` (7 subsequent siblings)
8 siblings, 0 replies; 12+ messages in thread
From: Magnus Lindholm @ 2026-06-12 20:26 UTC (permalink / raw)
To: richard.henderson, mattst88, linux-kernel, linux-alpha
Cc: glaubitz, mcree, ink, macro, Magnus Lindholm
Add a user_regset_view for Alpha and switch ELF core dumping to
CORE_DUMP_USE_REGSET. General-purpose registers are exported in
ELF gregs layout, including callee-saved registers and a correct
user stack pointer.
The user stack pointer is not preserved in pt_regs on Alpha, so expose
it from the PCB, or via rdusp() for the current task, when building the
ELF register image. This makes the user stack pointer consistent for
core dumps, ptrace regsets, and PTRACE_GET_SYSCALL_INFO.
Implement regset get/set callbacks for both NT_PRSTATUS and NT_PRFPREG.
The callbacks translate between Alpha's pt_regs/thread state and the
ELF-visible register layouts, while the common ptrace regset code handles
PTRACE_GETREGSET and PTRACE_SETREGSET iovec semantics. This avoids
duplicating subtle short-buffer and oversized-buffer behavior in
arch_ptrace().
With these changes Alpha satisfies the requirements for
HAVE_ARCH_TRACEHOOK and selects it, enabling generic tracehook and
ptrace syscall-info code paths without changing the existing syscall
entry ABI.
Reviewed-by: Matt Turner <mattst88@gmail.com>
Tested-by: Matt Turner <mattst88@gmail.com>
Signed-off-by: Magnus Lindholm <linmag7@gmail.com>
---
.../features/core/tracehook/arch-support.txt | 2 +-
arch/alpha/Kconfig | 1 +
arch/alpha/include/asm/elf.h | 1 +
arch/alpha/include/asm/ptrace.h | 7 +
arch/alpha/include/asm/syscall.h | 7 +
arch/alpha/include/asm/thread_info.h | 7 +-
arch/alpha/include/uapi/asm/ptrace.h | 2 +-
arch/alpha/kernel/asm-offsets.c | 1 +
arch/alpha/kernel/entry.S | 15 +-
arch/alpha/kernel/ptrace.c | 320 +++++++++++++++---
arch/alpha/kernel/traps.c | 8 +
11 files changed, 312 insertions(+), 59 deletions(-)
diff --git a/Documentation/features/core/tracehook/arch-support.txt b/Documentation/features/core/tracehook/arch-support.txt
index 4f36fcbfb6d5..654f38413d16 100644
--- a/Documentation/features/core/tracehook/arch-support.txt
+++ b/Documentation/features/core/tracehook/arch-support.txt
@@ -6,7 +6,7 @@
-----------------------
| arch |status|
-----------------------
- | alpha: | TODO |
+ | alpha: | ok |
| arc: | ok |
| arm: | ok |
| arm64: | ok |
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 7b7dafe7d9df..f3b882835617 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -33,6 +33,7 @@ config ALPHA
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_SECCOMP
select HAVE_ARCH_SECCOMP_FILTER
+ select HAVE_ARCH_TRACEHOOK
select HAVE_MOD_ARCH_SPECIFIC
select LOCK_MM_AND_FIND_VMA
select MODULES_USE_ELF_RELA
diff --git a/arch/alpha/include/asm/elf.h b/arch/alpha/include/asm/elf.h
index 50c82187e60e..b15946621d57 100644
--- a/arch/alpha/include/asm/elf.h
+++ b/arch/alpha/include/asm/elf.h
@@ -53,6 +53,7 @@
#define EF_ALPHA_32BIT 1 /* All addresses are below 2GB */
+#define CORE_DUMP_USE_REGSET 1
/*
* ELF register definitions..
*/
diff --git a/arch/alpha/include/asm/ptrace.h b/arch/alpha/include/asm/ptrace.h
index 3557ce64ed21..8e0a589e2d15 100644
--- a/arch/alpha/include/asm/ptrace.h
+++ b/arch/alpha/include/asm/ptrace.h
@@ -24,4 +24,11 @@ static inline unsigned long regs_return_value(struct pt_regs *regs)
return regs->r0;
}
+/* Helpers for working with the user stack pointer */
+static inline unsigned long user_stack_pointer(struct pt_regs *regs)
+{
+ /* Valid for user-mode regs */
+ return regs->usp;
+}
+
#endif
diff --git a/arch/alpha/include/asm/syscall.h b/arch/alpha/include/asm/syscall.h
index 584b1ab2e325..1e78cbd46faf 100644
--- a/arch/alpha/include/asm/syscall.h
+++ b/arch/alpha/include/asm/syscall.h
@@ -19,6 +19,13 @@ static inline long syscall_get_return_value(struct task_struct *task,
return regs->r19 ? -(long)regs->r0 : (long)regs->r0;
}
+static inline long syscall_get_error(struct task_struct *task,
+ struct pt_regs *regs)
+{
+ return regs->r19 ? -(long)regs->r0 : 0;
+}
+
+
/*
* Alpha syscall ABI / kernel conventions:
* - PAL provides syscall number in r0 on entry.
diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h
index 94ef9cfa30f5..1552ecca8520 100644
--- a/arch/alpha/include/asm/thread_info.h
+++ b/arch/alpha/include/asm/thread_info.h
@@ -66,6 +66,7 @@ register unsigned long *current_stack_pointer __asm__ ("$30");
#define TIF_SYSCALL_AUDIT 4 /* syscall audit active */
#define TIF_NOTIFY_SIGNAL 5 /* signal notifications exist */
#define TIF_SECCOMP 6 /* seccomp syscall filtering active */
+#define TIF_SYSCALL_TRACEPOINT 7 /* syscall tracepoint instrumentation */
#define TIF_DIE_IF_KERNEL 9 /* dik recursion lock */
#define TIF_MEMDIE 13 /* is terminating due to OOM killer */
#define TIF_POLLING_NRFLAG 14 /* idle is polling for TIF_NEED_RESCHED */
@@ -78,6 +79,7 @@ register unsigned long *current_stack_pointer __asm__ ("$30");
#define _TIF_NOTIFY_SIGNAL (1<<TIF_NOTIFY_SIGNAL)
#define _TIF_SECCOMP (1<<TIF_SECCOMP)
#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
+#define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT)
/*
* Work to do on syscall entry (in entry.S).
@@ -85,9 +87,10 @@ register unsigned long *current_stack_pointer __asm__ ("$30");
* with the mask used before branching to syscall_trace_enter().
*/
#ifdef CONFIG_AUDITSYSCALL
-# define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP)
+# define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP \
+ | _TIF_SYSCALL_TRACEPOINT)
#else
-# define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SECCOMP)
+# define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT)
#endif
/* Work to do on interrupt/exception return. */
diff --git a/arch/alpha/include/uapi/asm/ptrace.h b/arch/alpha/include/uapi/asm/ptrace.h
index 72ed913a910f..9d86b2a1526e 100644
--- a/arch/alpha/include/uapi/asm/ptrace.h
+++ b/arch/alpha/include/uapi/asm/ptrace.h
@@ -43,7 +43,7 @@ struct pt_regs {
unsigned long trap_a1;
unsigned long trap_a2;
/* This makes the stack 16-byte aligned as GCC expects */
- unsigned long __pad0;
+ unsigned long usp;
/* These are saved by PAL-code: */
unsigned long ps;
unsigned long pc;
diff --git a/arch/alpha/kernel/asm-offsets.c b/arch/alpha/kernel/asm-offsets.c
index 1ebb05890499..1d3bfca319ae 100644
--- a/arch/alpha/kernel/asm-offsets.c
+++ b/arch/alpha/kernel/asm-offsets.c
@@ -29,4 +29,5 @@ static void __used foo(void)
DEFINE(HAE_CACHE, offsetof(struct alpha_machine_vector, hae_cache));
DEFINE(HAE_REG, offsetof(struct alpha_machine_vector, hae_register));
+ DEFINE(PT_REGS_USP, offsetof(struct pt_regs, usp));
}
diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S
index fcfd06529b12..449092a31eef 100644
--- a/arch/alpha/kernel/entry.S
+++ b/arch/alpha/kernel/entry.S
@@ -520,6 +520,12 @@ entSys:
ldq $1, 0($sp) /* syscall nr from saved r0 */
stq $1, 8($sp) /* regs->r1 = shadow syscall nr */
stq $1, 16($sp) /* regs->r2 = restart syscall nr */
+ /* Syscalls always enter from user mode: snapshot USP into pt_regs->usp */
+ mov $0, $8
+ call_pal PAL_rdusp
+ stq $0, PT_REGS_USP($sp)
+ mov $8, $0
+
lda $8, 0x3fff
bic $sp, $8, $8
@@ -535,15 +541,10 @@ entSys:
.cfi_rel_offset $16, SP_OFF+24
.cfi_rel_offset $17, SP_OFF+32
.cfi_rel_offset $18, SP_OFF+40
-#ifdef CONFIG_AUDITSYSCALL
- lda $6, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP
- and $3, $6, $3
- bne $3, strace
-#else
- lda $6, _TIF_SYSCALL_TRACE | _TIF_SECCOMP
+ lda $6, _TIF_SYSCALL_WORK
and $3, $6, $3
bne $3, strace
-#endif
+
beq $4, 1f
ldq $27, 0($5)
1: ldq $0, 8($sp) /* syscall nr shadow (regs->r1) */
diff --git a/arch/alpha/kernel/ptrace.c b/arch/alpha/kernel/ptrace.c
index 0687760ea466..69eb337347df 100644
--- a/arch/alpha/kernel/ptrace.c
+++ b/arch/alpha/kernel/ptrace.c
@@ -24,10 +24,15 @@
#include "proto.h"
#include <linux/uio.h>
+#include <linux/regset.h>
#define DEBUG DBG_MEM
#undef DEBUG
+#ifndef NT_FPREGSET
+#define NT_FPREGSET NT_PRFPREG
+#endif
+
#ifdef DEBUG
enum {
DBG_MEM = (1<<0),
@@ -143,19 +148,163 @@ get_reg(struct task_struct * task, unsigned long regno)
return *get_reg_addr(task, regno);
}
+static void alpha_elf_fpregs_get(struct task_struct *target,
+ elf_fpreg_t *fpregs) /* points to ELF_NFPREG entries */
+{
+ memcpy(fpregs, task_thread_info(target)->fp, sizeof(elf_fpregset_t));
+}
+
+static void alpha_elf_fpregs_set(struct task_struct *target,
+ const elf_fpreg_t *fpregs,
+ size_t nwords)
+{
+ size_t n = min_t(size_t, nwords, ELF_NFPREG);
+
+ memcpy(task_thread_info(target)->fp, fpregs, n * sizeof(elf_fpreg_t));
+}
+
+static void alpha_elf_gregs_set(struct task_struct *child,
+ const elf_greg_t *src,
+ size_t nwords)
+{
+ struct pt_regs *pt = task_pt_regs(child);
+ struct thread_info *ti = task_thread_info(child);
+ struct switch_stack *sw = ((struct switch_stack *)pt) - 1;
+
+ /* GPRs r0..r8 live in pt_regs */
+ if (nwords > 0)
+ pt->r0 = src[0];
+ if (nwords > 1)
+ pt->r1 = src[1];
+ if (nwords > 2)
+ pt->r2 = src[2];
+ if (nwords > 3)
+ pt->r3 = src[3];
+ if (nwords > 4)
+ pt->r4 = src[4];
+ if (nwords > 5)
+ pt->r5 = src[5];
+ if (nwords > 6)
+ pt->r6 = src[6];
+ if (nwords > 7)
+ pt->r7 = src[7];
+ if (nwords > 8)
+ pt->r8 = src[8];
+
+ /* r9..r15 live in switch_stack */
+ if (nwords > 9)
+ sw->r9 = src[9];
+ if (nwords > 10)
+ sw->r10 = src[10];
+ if (nwords > 11)
+ sw->r11 = src[11];
+ if (nwords > 12)
+ sw->r12 = src[12];
+ if (nwords > 13)
+ sw->r13 = src[13];
+ if (nwords > 14)
+ sw->r14 = src[14];
+ if (nwords > 15)
+ sw->r15 = src[15];
+
+ /* r16..r28 live in pt_regs */
+ if (nwords > 16)
+ pt->r16 = src[16];
+ if (nwords > 17)
+ pt->r17 = src[17];
+ if (nwords > 18)
+ pt->r18 = src[18];
+ if (nwords > 19)
+ pt->r19 = src[19];
+ if (nwords > 20)
+ pt->r20 = src[20];
+ if (nwords > 21)
+ pt->r21 = src[21];
+ if (nwords > 22)
+ pt->r22 = src[22];
+ if (nwords > 23)
+ pt->r23 = src[23];
+ if (nwords > 24)
+ pt->r24 = src[24];
+ if (nwords > 25)
+ pt->r25 = src[25];
+ if (nwords > 26)
+ pt->r26 = src[26];
+ if (nwords > 27)
+ pt->r27 = src[27];
+ if (nwords > 28)
+ pt->r28 = src[28];
+
+ /* gp, usp, pc, unique */
+ if (nwords > 29)
+ pt->gp = src[29];
+
+ if (nwords > 30) {
+ ti->pcb.usp = src[30];
+ /*
+ * If someone ever does this to current (rare), keep the
+ * hardware usp consistent.
+ */
+ if (child == current)
+ wrusp(src[30]);
+ }
+
+ if (nwords > 31)
+ pt->pc = src[31];
+
+ if (nwords > 32)
+ ti->pcb.unique = src[32];
+
+/*
+ * PTRACE_SETREGSET can be used at a syscall-entry stop to skip the
+ * syscall by setting the syscall number to -1. The seccomp/ptrace
+ * selftests use this to synthesize errno returns.
+ *
+ * Alpha uses r19/a3 as the error flag, so a skipped syscall with a
+ * small positive r0 and a clear r19 must be normalized to an error
+ * return.
+ */
+ if (pt->r1 == (unsigned long)-1 &&
+ pt->r19 == 0 &&
+ pt->r0 > 0 &&
+ pt->r0 < MAX_ERRNO)
+ pt->r19 = 1;
+}
+
+
/*
* Write contents of register REGNO in task TASK.
*/
static int
put_reg(struct task_struct *task, unsigned long regno, unsigned long data)
{
+ struct pt_regs *regs = task_pt_regs(task);
+
if (regno == 63) {
task_thread_info(task)->ieee_state
= ((task_thread_info(task)->ieee_state & ~IEEE_SW_MASK)
| (data & IEEE_SW_MASK));
data = (data & FPCR_DYN_MASK) | ieee_swcr_to_fpcr(data);
}
+
*get_reg_addr(task, regno) = data;
+
+ /*
+ * Alpha historically exposes r0/v0 as the syscall number at a
+ * syscall-entry stop. The generic-entry conversion keeps the
+ * mutable syscall number in regs->r1, so old ptrace users such
+ * as strace that skip a syscall by poking r0 to -1 must also
+ * update the internal shadow syscall number.
+ *
+ * Do not mirror other r0 writes. strace later pokes r0 to the
+ * injected return value, e.g. 42, while r1 must remain -1.
+ */
+
+ if (regno == 0 && data == (unsigned long)-1) {
+ regs->r1 = data;
+ regs->r19 = 0;
+ }
+
return 0;
}
@@ -315,54 +464,6 @@ long arch_ptrace(struct task_struct *child, long request,
DBG(DBG_MEM, ("poke $%lu<-%#lx\n", addr, data));
ret = put_reg(child, addr, data);
break;
- case PTRACE_GETREGSET:
- case PTRACE_SETREGSET: {
- struct iovec __user *uiov = (struct iovec __user *)data;
- struct iovec iov;
- struct pt_regs *regs;
- size_t len;
-
- /* Only support NT_PRSTATUS (general registers) for now. */
- if (addr != NT_PRSTATUS) {
- ret = -EIO;
- break;
- }
-
- if (copy_from_user(&iov, uiov, sizeof(iov))) {
- ret = -EFAULT;
- break;
- }
-
- regs = task_pt_regs(child);
- len = min_t(size_t, iov.iov_len, sizeof(*regs));
-
- if (request == PTRACE_GETREGSET) {
- if (copy_to_user(iov.iov_base, regs, len)) {
- ret = -EFAULT;
- break;
- }
- } else {
- /*
- * Allow writing back regs. This is needed by the TRACE_syscall
- * tests (they change PC/syscall nr/retval).
- */
- if (copy_from_user(regs, iov.iov_base, len)) {
- ret = -EFAULT;
- break;
- }
- }
-
- /* Per API, update iov_len with amount transferred. */
- iov.iov_len = len;
- if (copy_to_user(uiov, &iov, sizeof(iov))) {
- ret = -EFAULT;
- break;
- }
-
- ret = 0;
- break;
- }
-
default:
ret = ptrace_request(child, request, addr, data);
break;
@@ -410,3 +511,126 @@ syscall_trace_leave(void)
if (test_thread_flag(TIF_SYSCALL_TRACE))
ptrace_report_syscall_exit(current_pt_regs(), 0);
}
+
+/*
+ * Minimal regset support for Alpha.
+ *
+ * Alpha-specific notes:
+ * - Do NOT use ELF_CORE_COPY_REGS(): it uses current_thread_info(),
+ * which is wrong for non-current tasks.
+ * - dump_elf_task() returns 1 unconditionally in this tree, while
+ * regset_get should return 0 on success. So call dump_elf_thread()
+ * directly and return membuf_write()'s result.
+ */
+
+static int alpha_regset_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf,
+ const void __user *ubuf)
+{
+ elf_gregset_t gregs;
+ unsigned int nwords;
+
+ if (pos + count > sizeof(gregs))
+ return -EIO;
+
+ /*
+ * Preserve registers outside the written range.
+ */
+ dump_elf_thread(gregs, task_pt_regs(target),
+ task_thread_info(target));
+
+ if (user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ gregs, 0, sizeof(gregs)))
+ return -EFAULT;
+
+ nwords = sizeof(gregs) / sizeof(elf_greg_t);
+ alpha_elf_gregs_set(target, gregs, nwords);
+
+ return 0;
+}
+
+static int alpha_fpregset_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf,
+ const void __user *ubuf)
+{
+ elf_fpregset_t fpregs;
+ unsigned int nwords;
+
+ if (pos + count > sizeof(fpregs))
+ return -EIO;
+
+ alpha_elf_fpregs_get(target, fpregs);
+
+ if (user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ fpregs, 0, sizeof(fpregs)))
+ return -EFAULT;
+
+ nwords = sizeof(fpregs) / sizeof(elf_fpreg_t);
+ alpha_elf_fpregs_set(target, fpregs, nwords);
+
+ return 0;
+}
+
+static int alpha_regset_get(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to)
+{
+ struct pt_regs *pt = task_pt_regs(target);
+ struct thread_info *ti = task_thread_info(target);
+ elf_gregset_t gregs;
+
+ dump_elf_thread(gregs, pt, ti);
+ return membuf_write(&to, gregs, sizeof(gregs));
+}
+
+static int alpha_fpregset_get(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to)
+{
+ elf_fpregset_t fpregs;
+
+ alpha_elf_fpregs_get(target, fpregs);
+ return membuf_write(&to, fpregs, sizeof(fpregs));
+}
+
+enum alpha_regset {
+ REGSET_GPR,
+ REGSET_FPR,
+};
+
+static const struct user_regset alpha_user_regsets[] = {
+ [REGSET_GPR] = {
+ .core_note_type = NT_PRSTATUS,
+ .n = ELF_NGREG,
+ .size = sizeof(elf_greg_t),
+ .align = sizeof(elf_greg_t),
+ .regset_get = alpha_regset_get,
+ .set = alpha_regset_set,
+ },
+ [REGSET_FPR] = {
+ .core_note_type = NT_PRFPREG,
+ .core_note_name = "CORE",
+ .n = ELF_NFPREG,
+ .size = sizeof(elf_fpreg_t),
+ .align = sizeof(elf_fpreg_t),
+ .regset_get = alpha_fpregset_get,
+ .set = alpha_fpregset_set,
+ },
+};
+
+static const struct user_regset_view user_alpha_view = {
+ .name = "alpha",
+ .e_machine = EM_ALPHA,
+ .ei_osabi = ELF_OSABI,
+ .regsets = alpha_user_regsets,
+ .n = ARRAY_SIZE(alpha_user_regsets),
+};
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *task)
+{
+ return &user_alpha_view;
+}
diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c
index 7004397937cf..7631129ac914 100644
--- a/arch/alpha/kernel/traps.c
+++ b/arch/alpha/kernel/traps.c
@@ -30,6 +30,12 @@
#include "proto.h"
+static __always_inline void alpha_snapshot_usp(struct pt_regs *regs)
+{
+ if (user_mode(regs))
+ regs->usp = rdusp();
+}
+
void
dik_show_regs(struct pt_regs *regs, unsigned long *r9_15)
{
@@ -180,6 +186,7 @@ do_entArith(unsigned long summary, unsigned long write_mask,
{
long si_code = FPE_FLTINV;
+ alpha_snapshot_usp(regs);
if (summary & 1) {
/* Software-completion summary bit is set, so try to
emulate the instruction. If the processor supports
@@ -201,6 +208,7 @@ do_entIF(unsigned long type, struct pt_regs *regs)
{
int signo, code;
+ alpha_snapshot_usp(regs);
if (type == 3) { /* FEN fault */
/* Irritating users can call PAL_clrfen to disable the
FPU for the process. The kernel will then trap in
--
2.53.0
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH v3 8/8] alpha: enable GENERIC_ENTRY and GENERIC_IRQ_ENTRY
2026-06-12 20:26 [PATCH v3 0/8] alpha: enable generic entry infrastructure Magnus Lindholm
` (6 preceding siblings ...)
2026-06-12 20:26 ` [PATCH v3 7/8] alpha: enable lockdep hardirq state tracking Magnus Lindholm
@ 2026-06-12 20:26 ` Magnus Lindholm
2026-06-12 21:38 ` Matt Turner
2026-06-13 12:25 ` Julian Braha
2026-06-14 1:38 ` [PATCH v3 0/8] alpha: enable generic entry infrastructure Michael Cree
8 siblings, 2 replies; 12+ messages in thread
From: Magnus Lindholm @ 2026-06-12 20:26 UTC (permalink / raw)
To: richard.henderson, mattst88, linux-kernel, linux-alpha
Cc: glaubitz, mcree, ink, macro, Magnus Lindholm
Wire Alpha into the generic entry code for syscall entry/exit and
return-to-user handling, while keeping the low-level PALcode return paths
Alpha-specific.
Move most of the syscall entry/exit logic out of entSys and into C helpers
built around the generic entry API. Syscall entry now uses
syscall_enter_from_user_mode(), records Alpha-local syscall metadata in
thread_info, handles the ptrace/seccomp skip decision, and selects the
syscall table target in C. The final target call remains in entry.S so
Alpha can preserve its existing syscall ABI and assembly syscall-table
wrappers.
On return from syscalls, finish Alpha's r0/r19 result encoding and
skipped-syscall restart handling in C before calling
syscall_exit_to_user_mode(). Non-syscall returns to user mode use a
separate alpha_exit_to_user_mode() helper, which disables interrupts,
runs irqentry_exit_to_user_mode_prepare(), and then enters the common
exit_to_user_mode() path.
Keep the remaining PALcode restore handling in assembly. In particular,
kernel-mode returns still need Alpha-specific lockdep IRQ-state annotation
based on the saved processor status, while user-mode returns are handed to
the generic exit-to-user code.
Add the generic-entry support bits needed by common code, including
thread_info.syscall_work, syscall trace support, ptrace sysemu request
numbers, and arch_syscall_is_vdso_sigreturn().
This has been tested by booting Alpha with GENERIC_ENTRY enabled, checking
lockdep IRQ-state accounting, running fork/clone-heavy package builds, and
running the seccomp as well as strace test suites.
Signed-off-by: Magnus Lindholm <linmag7@gmail.com>
---
arch/alpha/Kconfig | 3 +
arch/alpha/include/asm/entry-common.h | 14 ++
arch/alpha/include/asm/ptrace.h | 14 +-
arch/alpha/include/asm/stacktrace.h | 20 ++
arch/alpha/include/asm/syscall.h | 11 +-
arch/alpha/include/asm/thread_info.h | 30 +--
arch/alpha/kernel/asm-offsets.c | 5 +
arch/alpha/kernel/entry.S | 322 +++++++-------------------
arch/alpha/kernel/irq_alpha.c | 14 +-
arch/alpha/kernel/proto.h | 9 +-
arch/alpha/kernel/ptrace.c | 130 +++++------
arch/alpha/kernel/signal.c | 155 ++++++++++---
12 files changed, 362 insertions(+), 365 deletions(-)
create mode 100644 arch/alpha/include/asm/entry-common.h
create mode 100644 arch/alpha/include/asm/stacktrace.h
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index e53ef2d88463..74795e22aafa 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -2,6 +2,9 @@
config ALPHA
bool
default y
+ select GENERIC_IRQ_ENTRY
+ select GENERIC_ENTRY
+ select HAVE_SYSCALL_TRACEPOINTS
select ARCH_32BIT_USTAT_F_TINODE
select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_DMA_OPS if PCI
diff --git a/arch/alpha/include/asm/entry-common.h b/arch/alpha/include/asm/entry-common.h
new file mode 100644
index 000000000000..a811c73454d2
--- /dev/null
+++ b/arch/alpha/include/asm/entry-common.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ARCH_ALPHA_ENTRY_COMMON_H
+#define ARCH_ALPHA_ENTRY_COMMON_H
+
+#include <asm/stacktrace.h> /* For on_thread_stack() */
+#include <asm/syscall.h>
+
+#define arch_exit_to_user_mode_work arch_exit_to_user_mode_work
+
+static __always_inline void arch_exit_to_user_mode_work(struct pt_regs *regs,
+ unsigned long ti_work)
+{
+}
+#endif
diff --git a/arch/alpha/include/asm/ptrace.h b/arch/alpha/include/asm/ptrace.h
index 8e0a589e2d15..430e8dc27ff7 100644
--- a/arch/alpha/include/asm/ptrace.h
+++ b/arch/alpha/include/asm/ptrace.h
@@ -3,7 +3,7 @@
#define _ASMAXP_PTRACE_H
#include <uapi/asm/ptrace.h>
-
+#include <asm/irqflags.h>
#define arch_has_single_step() (1)
#define user_mode(regs) (((regs)->ps & 8) != 0)
@@ -17,7 +17,9 @@
#define current_pt_regs() \
((struct pt_regs *) ((char *)current_thread_info() + 2*PAGE_SIZE) - 1)
-#define force_successful_syscall_return() (current_pt_regs()->r0 = 0)
+#define force_successful_syscall_return() \
+ (current_thread_info()->syscall_meta \
+ |= ALPHA_SYSCALL_META_FORCE_SUCCESS)
static inline unsigned long regs_return_value(struct pt_regs *regs)
{
@@ -31,4 +33,12 @@ static inline unsigned long user_stack_pointer(struct pt_regs *regs)
return regs->usp;
}
+static __always_inline bool regs_irqs_disabled(struct pt_regs *regs)
+{
+ return arch_irqs_disabled_flags(regs->ps);
+}
+
+/* Syscall emulation defines */
+#define PTRACE_SYSEMU 0x1d
+#define PTRACE_SYSEMU_SINGLESTEP 0x1e
#endif
diff --git a/arch/alpha/include/asm/stacktrace.h b/arch/alpha/include/asm/stacktrace.h
new file mode 100644
index 000000000000..f006d6f00fd0
--- /dev/null
+++ b/arch/alpha/include/asm/stacktrace.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_ALPHA_STACKTRACE_H
+#define _ASM_ALPHA_STACKTRACE_H
+
+#include <linux/compiler_attributes.h>
+#include <linux/types.h>
+
+#include <asm/current.h>
+#include <asm/processor.h>
+#include <asm/thread_info.h>
+
+static __always_inline bool on_thread_stack(void)
+{
+ unsigned long base = (unsigned long)current->stack;
+ unsigned long sp = (unsigned long)current_stack_pointer;
+
+ return !((base ^ sp) & ~(THREAD_SIZE - 1));
+}
+
+#endif /* _ASM_ALPHA_STACKTRACE_H */
diff --git a/arch/alpha/include/asm/syscall.h b/arch/alpha/include/asm/syscall.h
index 1e78cbd46faf..c1394910f584 100644
--- a/arch/alpha/include/asm/syscall.h
+++ b/arch/alpha/include/asm/syscall.h
@@ -8,6 +8,8 @@
#include <linux/types.h>
#include <asm/ptrace.h>
+extern void *sys_call_table[];
+
static inline int syscall_get_arch(struct task_struct *task)
{
return AUDIT_ARCH_ALPHA;
@@ -104,10 +106,17 @@ static inline void syscall_set_return_value(struct task_struct *task,
}
/* Restore the original syscall nr after seccomp/ptrace modified regs->r1. */
+
static inline void syscall_rollback(struct task_struct *task,
struct pt_regs *regs)
{
- regs->r1 = regs->r2;
+ unsigned long nr = task_thread_info(task)->syscall_saved_nr;
+
+ regs->r1 = nr;
}
+static inline bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs)
+{
+ return false;
+}
#endif /* _ASM_ALPHA_SYSCALL_H */
diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h
index 1552ecca8520..d781ac79106d 100644
--- a/arch/alpha/include/asm/thread_info.h
+++ b/arch/alpha/include/asm/thread_info.h
@@ -12,16 +12,21 @@
#endif
#ifndef __ASSEMBLER__
+
struct thread_info {
struct pcb_struct pcb; /* palcode state */
struct task_struct *task; /* main task structure */
- unsigned int flags; /* low level flags */
+ unsigned long flags; /* low level flags */
+ unsigned long syscall_work; /* SYSCALL_WORK_* flags */
unsigned int ieee_state; /* see fpu.h */
unsigned cpu; /* current CPU */
- int preempt_count; /* 0 => preemptable, <0 => BUG */
+ int preempt_count; /* 0 => preemptable, <0 => BUG */
unsigned int status; /* thread-synchronous flags */
+ unsigned long syscall_saved_r19;
+ unsigned long syscall_meta;
+ unsigned long syscall_saved_nr;
int bpt_nsaved;
unsigned long bpt_addr[2]; /* breakpoint handling */
@@ -50,6 +55,9 @@ register unsigned long *current_stack_pointer __asm__ ("$30");
#define THREAD_SIZE_ORDER 1
#define THREAD_SIZE (2*PAGE_SIZE)
+#define ALPHA_SYSCALL_META_SKIP 0x2
+#define ALPHA_SYSCALL_META_FORCE_SUCCESS 0x4
+
/*
* Thread information flags:
* - these are process state flags and used from assembly
@@ -68,6 +76,7 @@ register unsigned long *current_stack_pointer __asm__ ("$30");
#define TIF_SECCOMP 6 /* seccomp syscall filtering active */
#define TIF_SYSCALL_TRACEPOINT 7 /* syscall tracepoint instrumentation */
#define TIF_DIE_IF_KERNEL 9 /* dik recursion lock */
+#define TIF_UPROBE 10 /* uprobe breakpoint or singlestep */
#define TIF_MEMDIE 13 /* is terminating due to OOM killer */
#define TIF_POLLING_NRFLAG 14 /* idle is polling for TIF_NEED_RESCHED */
@@ -80,22 +89,7 @@ register unsigned long *current_stack_pointer __asm__ ("$30");
#define _TIF_SECCOMP (1<<TIF_SECCOMP)
#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
#define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT)
-
-/*
- * Work to do on syscall entry (in entry.S).
- * If you want this to exactly mirror what entry.S checks, keep it aligned
- * with the mask used before branching to syscall_trace_enter().
- */
-#ifdef CONFIG_AUDITSYSCALL
-# define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP \
- | _TIF_SYSCALL_TRACEPOINT)
-#else
-# define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT)
-#endif
-
-/* Work to do on interrupt/exception return. */
-#define _TIF_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
- _TIF_NOTIFY_RESUME | _TIF_NOTIFY_SIGNAL)
+#define _TIF_UPROBE (1 << TIF_UPROBE)
#define TS_UAC_NOPRINT 0x0001 /* ! Preserve the following three */
#define TS_UAC_NOFIX 0x0002 /* ! flags as they match */
diff --git a/arch/alpha/kernel/asm-offsets.c b/arch/alpha/kernel/asm-offsets.c
index 1d3bfca319ae..c89792c86044 100644
--- a/arch/alpha/kernel/asm-offsets.c
+++ b/arch/alpha/kernel/asm-offsets.c
@@ -22,6 +22,7 @@ static void __used foo(void)
DEFINE(SP_OFF, offsetof(struct pt_regs, ps));
DEFINE(SIZEOF_PT_REGS, sizeof(struct pt_regs));
+ DEFINE(TI_SYSCALL_META, offsetof(struct thread_info, syscall_meta));
BLANK();
DEFINE(SWITCH_STACK_SIZE, sizeof(struct switch_stack));
@@ -30,4 +31,8 @@ static void __used foo(void)
DEFINE(HAE_CACHE, offsetof(struct alpha_machine_vector, hae_cache));
DEFINE(HAE_REG, offsetof(struct alpha_machine_vector, hae_register));
DEFINE(PT_REGS_USP, offsetof(struct pt_regs, usp));
+
+ DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
+ DEFINE(TI_SYSCALL_WORK, offsetof(struct thread_info, syscall_work));
+ DEFINE(TI_STATUS, offsetof(struct thread_info, status));
}
diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S
index 9f2608de2544..bfa116d455fb 100644
--- a/arch/alpha/kernel/entry.S
+++ b/arch/alpha/kernel/entry.S
@@ -36,67 +36,10 @@
.size \func, . - \func
.endm
-/*
- * SYSCALL_SKIP_RETURN_RESTART_GATE
- *
- * Used when syscall dispatch is skipped (seccomp/ptrace injected nr=-1).
- * - Ensure we never return r0==-1 with a3==0 (success); convert to ENOSYS.
- * - Gate whether syscall restart is allowed by preserving restart context
- * only for ERESTART* returns. Result:
- * $26 = 0 => restart allowed
- * $26 = 1 => restart NOT allowed
- * $18 = preserved syscall nr (regs->r2) if restart allowed, else 0
- */
-.macro SYSCALL_SKIP_RETURN_RESTART_GATE
- /* Fix up invalid "-1 success" return state. */
- ldq $19, 72($sp) /* a3 */
- bne $19, 1f /* already error => skip fixup */
-
- ldq $20, 0($sp) /* r0 */
- lda $21, -1($31)
- cmpeq $20, $21, $22
- beq $22, 1f /* r0 != -1 => skip fixup */
-
-
- lda $20, ENOSYS($31)
- stq $20, 0($sp) /* r0 = ENOSYS */
- lda $19, 1($31)
- stq $19, 72($sp) /* a3 = 1 */
-1:
- /* Restart gating: success is never restartable here. */
- ldq $19, 72($sp) /* a3 */
- beq $19, 3f /* success => not restartable */
-
- ldq $20, 0($sp) /* r0 (positive errno if a3==1) */
- lda $21, ERESTARTSYS($31)
- cmpeq $20, $21, $22
- bne $22, 2f
- lda $21, ERESTARTNOINTR($31)
- cmpeq $20, $21, $22
- bne $22, 2f
- lda $21, ERESTARTNOHAND($31)
- cmpeq $20, $21, $22
- bne $22, 2f
- lda $21, ERESTART_RESTARTBLOCK($31)
- cmpeq $20, $21, $22
- bne $22, 2f
-
-3: /* Not a restart code (or success) => restart NOT allowed. */
- addq $31, 1, $26 /* $26=1 => restart NOT allowed */
- mov 0, $18
- br 4f
-
-2: /* Restart allowed. */
- ldq $18, 16($sp) /* preserved syscall nr (regs->r2) */
- mov $31, $26 /* $26=0 => restart allowed */
- br 4f
-4:
-.endm
-
-.macro LOCKDEP_HARDIRQS_ON_RESTORE
+.macro LOCKDEP_HARDIRQS_ON_RESTORE psreg
#ifdef CONFIG_PROVE_LOCKING
/* a0 = saved PS */
- ldq $16, SP_OFF($sp)
+ ldq $16, \psreg
/* a1 = callsite IP for lockdep */
lda $17, 1f
@@ -248,7 +191,7 @@
CFI_START_OSF_FRAME entInt
SAVE_ALL
lda $8, 0x3fff
- lda $26, ret_from_sys_call
+ lda $26, ret_from_exception
bic $sp, $8, $8
mov $sp, $19
jsr $31, do_entInt
@@ -257,7 +200,7 @@ CFI_END_OSF_FRAME entInt
CFI_START_OSF_FRAME entArith
SAVE_ALL
lda $8, 0x3fff
- lda $26, ret_from_sys_call
+ lda $26, ret_from_exception
bic $sp, $8, $8
mov $sp, $18
jsr $31, do_entArith
@@ -305,13 +248,13 @@ CFI_START_OSF_FRAME entMM
.cfi_restore $15
.cfi_adjust_cfa_offset -64
/* finish up the syscall as normal. */
- br ret_from_sys_call
+ br ret_from_exception
CFI_END_OSF_FRAME entMM
CFI_START_OSF_FRAME entIF
SAVE_ALL
lda $8, 0x3fff
- lda $26, ret_from_sys_call
+ lda $26, ret_from_exception
bic $sp, $8, $8
mov $sp, $17
jsr $31, do_entIF
@@ -440,7 +383,7 @@ CFI_START_OSF_FRAME entUna
.cfi_restore $28
.cfi_restore $29
.cfi_adjust_cfa_offset -256
- LOCKDEP_HARDIRQS_ON_RESTORE
+ LOCKDEP_HARDIRQS_ON_RESTORE SP_OFF($sp)
call_pal PAL_rti
.align 4
@@ -487,18 +430,19 @@ entUnaUser:
.cfi_restore $14
.cfi_restore $15
.cfi_adjust_cfa_offset -64
- br ret_from_sys_call
+ br ret_from_exception
CFI_END_OSF_FRAME entUna
CFI_START_OSF_FRAME entDbg
SAVE_ALL
lda $8, 0x3fff
- lda $26, ret_from_sys_call
+ lda $26, ret_from_exception
bic $sp, $8, $8
mov $sp, $16
jsr $31, do_entDbg
CFI_END_OSF_FRAME entDbg
+
/*
* The system call entry point is special. Most importantly, it looks
* like a function call to userspace as far as clobbered registers. We
@@ -516,9 +460,7 @@ CFI_END_OSF_FRAME entDbg
* For seccomp/ptrace/generic syscall helpers we track the syscall
* number separately:
* - regs->r1: current (mutable) syscall number (may be changed or set to -1)
- * - regs->r2: original syscall number for restart/rollback
*
- * On entry PAL provides the syscall number in r0; copy it into r1/r2.
*/
.align 4
@@ -531,203 +473,117 @@ CFI_END_OSF_FRAME entDbg
.cfi_rel_offset $gp, 16
entSys:
SAVE_ALL
- ldq $1, 0($sp) /* syscall nr from saved r0 */
- stq $1, 8($sp) /* regs->r1 = shadow syscall nr */
- stq $1, 16($sp) /* regs->r2 = restart syscall nr */
- /* Syscalls always enter from user mode: snapshot USP into pt_regs->usp */
+ ldq $1, 0($sp) /* syscall nr from saved r0 */
+ stq $1, 8($sp) /* regs->r1 = shadow syscall nr */
+
mov $0, $8
call_pal PAL_rdusp
- stq $0, PT_REGS_USP($sp)
+ stq $0, PT_REGS_USP($sp)
mov $8, $0
-
lda $8, 0x3fff
bic $sp, $8, $8
- lda $4, NR_syscalls($31)
+
stq $16, SP_OFF+24($sp)
- lda $5, sys_call_table
- lda $27, sys_ni_syscall
- cmpult $0, $4, $4
- ldl $3, TI_FLAGS($8)
stq $17, SP_OFF+32($sp)
- s8addq $0, $5, $5
stq $18, SP_OFF+40($sp)
- .cfi_rel_offset $16, SP_OFF+24
- .cfi_rel_offset $17, SP_OFF+32
- .cfi_rel_offset $18, SP_OFF+40
- lda $6, _TIF_SYSCALL_WORK
- and $3, $6, $3
- bne $3, strace
- beq $4, 1f
- ldq $27, 0($5)
-1: ldq $0, 8($sp) /* syscall nr shadow (regs->r1) */
+ mov $0, $1
+ lda $16, 7
+ call_pal PAL_swpipl
+ mov $1, $0
+ mov $sp, $16
+ mov $0, $17 /* pv = selected syscall function */
+
+ DO_SWITCH_STACK
+ jsr $26, alpha_syscall_enter_select
+ ldgp $gp, 0($26)
+ UNDO_SWITCH_STACK
+
+ /*
+ * C returned syscall function pointer in $0.
+ * It also stored SKIP in TI_SYSCALL_META if dispatch is skipped.
+ */
+ lda $8, 0x3fff
+ bic $sp, $8, $8
+ ldq $3, TI_SYSCALL_META($8)
+ lda $6, ALPHA_SYSCALL_META_SKIP
+ and $3, $6, $6
+ bne $6, skip_dispatch
+
+
+ mov $0, $27
+ ldq $16, SP_OFF+24($sp)
+ ldq $17, SP_OFF+32($sp)
+ ldq $18, SP_OFF+40($sp)
+ ldq $19, 72($sp)
+ ldq $20, 80($sp)
+ ldq $21, 88($sp)
jsr $26, ($27), sys_ni_syscall
ldgp $gp, 0($26)
- blt $0, $syscall_error /* the call failed */
-$ret_success:
- stq $0, 0($sp)
- stq $31, 72($sp) /* a3=0 => no error */
- .align 4
- .globl ret_from_sys_call
-ret_from_sys_call:
- cmovne $26, 0, $18 /* $18 = 0 => non-restartable */
- ldq $0, SP_OFF($sp)
- and $0, 8, $0
- beq $0, ret_to_kernel
-ret_to_user:
- /* Make sure need_resched and sigpending don't change between
- sampling and the rti. */
- lda $16, 7
- call_pal PAL_swpipl
- ldl $17, TI_FLAGS($8)
- and $17, _TIF_WORK_MASK, $2
- bne $2, work_pending
+skip_dispatch:
+ mov $0, $17 /* raw ret; ignored if SKIP is set */
+ mov $sp, $16 /* regs */
+
+ DO_SWITCH_STACK
+ jsr $26, alpha_finish_syscall_to_user_mode
+ ldgp $gp, 0($26)
+ UNDO_SWITCH_STACK
+
restore_all:
+ lda $8, 0x3fff
+ bic $sp, $8, $8
ldl $2, TI_STATUS($8)
and $2, TS_SAVED_FP | TS_RESTORE_FP, $3
bne $3, restore_fpu
restore_other:
.cfi_remember_state
- LOCKDEP_HARDIRQS_ON_RESTORE
RESTORE_ALL
call_pal PAL_rti
-ret_to_kernel:
- .cfi_restore_state
- lda $16, 7
- call_pal PAL_swpipl
- br restore_other
-
- .align 3
-$syscall_error:
- /* Restart syscall nr comes from saved r2 (preserved even if r0 overwritten). */
- ldq $18, 16($sp) /* old syscall nr for restart */
-
- ldq $19, 72($sp) /* .. and this a3 */
- subq $31, $0, $0 /* with error in v0 */
- addq $31, 1, $1 /* set a3 for errno return */
- stq $0, 0($sp)
- mov $31, $26 /* tell "ret_from_sys_call" we can restart */
- stq $1, 72($sp) /* a3 for return */
- br ret_from_sys_call
-
-/*
- * Do all cleanup when returning from all interrupts and system calls.
- *
- * Arguments:
- * $8: current.
- * $17: TI_FLAGS.
- * $18: The old syscall number, or zero if this is not a return
- * from a syscall that errored and is possibly restartable.
- * $19: The old a3 value
- */
-
- .align 4
- .type work_pending, @function
-work_pending:
- and $17, _TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL, $2
- bne $2, $work_notifysig
-
-$work_resched:
- /*
- * We can get here only if we returned from syscall without SIGPENDING
- * or got through work_notifysig already. Either case means no syscall
- * restarts for us, so let $18 and $19 burn.
- */
- jsr $26, alpha_schedule_user_work
- mov 0, $18
- br ret_to_user
+ret_to_user_from_syscall:
+ lda $8, 0x3fff
+ bic $sp, $8, $8
-$work_notifysig:
mov $sp, $16
DO_SWITCH_STACK
- jsr $26, do_work_pending
+ jsr $26, alpha_syscall_exit_to_user_mode
+ ldgp $gp, 0($26)
UNDO_SWITCH_STACK
br restore_all
-/*
- * PTRACE syscall handler
- */
-
.align 4
- .type strace, @function
-strace:
- /* set up signal stack, call syscall_trace */
- // NB: if anyone adds preemption, this block will need to be protected
- ldl $1, TI_STATUS($8)
- and $1, TS_SAVED_FP, $3
- or $1, TS_SAVED_FP, $2
- bne $3, 1f
- stl $2, TI_STATUS($8)
- bsr $26, __save_fpu
-1:
- DO_SWITCH_STACK
- jsr $26, syscall_trace_enter /* returns the syscall number */
- UNDO_SWITCH_STACK
-
- stq $0, 8($sp) /* regs->r1 = shadow syscall nr */
-
- /* get the arguments back.. */
- ldq $16, SP_OFF+24($sp)
- ldq $17, SP_OFF+32($sp)
- ldq $18, SP_OFF+40($sp)
- ldq $19, 72($sp)
- ldq $20, 80($sp)
- ldq $21, 88($sp)
+ .globl ret_from_sys_call
+ret_from_sys_call:
+ ldq $0, SP_OFF($sp)
+ and $0, 8, $0
+ beq $0, ret_to_kernel
+ br ret_to_user_from_syscall
- /* nr == -1: internal skip-dispatch or userspace syscall(-1)? */
- lda $6, -1($31)
- cmpeq $0, $6, $6
- bne $6, $strace_skip_call /* nr == -1 => dispatch */
-
- /* get the system call pointer.. */
- lda $1, NR_syscalls($31)
- lda $2, sys_call_table
- lda $27, sys_ni_syscall
- cmpult $0, $1, $1
- s8addq $0, $2, $2
- beq $1, 1f
- ldq $27, 0($2)
-1: jsr $26, ($27), sys_gettimeofday
-ret_from_straced:
- ldgp $gp, 0($26)
+ret_from_exception:
+ ldq $0, SP_OFF($sp)
+ and $0, 8, $0
+ beq $0, ret_to_kernel
+ br ret_to_user_from_exception
- /* check return.. */
- blt $0, $strace_error /* the call failed */
-$strace_success:
- stq $31, 72($sp) /* a3=0 => no error */
- stq $0, 0($sp) /* save return value */
+ret_to_user_from_exception:
-$strace_skip_call:
- SYSCALL_SKIP_RETURN_RESTART_GATE
+ mov $sp, $16
DO_SWITCH_STACK
- jsr $26, syscall_trace_leave
+ jsr $26, alpha_exit_to_user_mode
+ ldgp $gp, 0($26)
UNDO_SWITCH_STACK
- br $31, ret_from_sys_call
-
- .align 3
-$strace_error:
- ldq $18, 16($sp) /* restart syscall nr */
- ldq $19, 72($sp) /* .. and this a3 */
-
- subq $31, $0, $0 /* with error in v0 */
- addq $31, 1, $1 /* set a3 for errno return */
- stq $0, 0($sp)
- stq $1, 72($sp) /* a3 for return */
+ br restore_all
- DO_SWITCH_STACK
- mov $18, $9 /* save old syscall number */
- mov $19, $10 /* save old a3 */
- jsr $26, syscall_trace_leave
- mov $9, $18
- mov $10, $19
- UNDO_SWITCH_STACK
+ret_to_kernel:
+ .cfi_restore_state
+ lda $16, 7
+ call_pal PAL_swpipl
+ LOCKDEP_HARDIRQS_ON_RESTORE SP_OFF($sp)
+ br restore_other
- mov $31, $26 /* tell "ret_from_sys_call" we can restart */
- br ret_from_sys_call
CFI_END_OSF_FRAME entSys
/*
@@ -815,7 +671,6 @@ restore_fpu:
br restore_other
#undef V
-\f
/*
* The meat of the context switch code.
*/
@@ -851,7 +706,7 @@ alpha_switch_to:
.align 4
.ent ret_from_fork
ret_from_fork:
- lda $26, ret_to_user
+ lda $26, ret_to_user_from_exception
mov $17, $16
jmp $31, schedule_tail
.end ret_from_fork
@@ -868,7 +723,7 @@ ret_from_kernel_thread:
mov $9, $27
mov $10, $16
jsr $26, ($9)
- br $31, ret_to_user
+ br $31, ret_to_user_from_exception
.end ret_from_kernel_thread
\f
@@ -910,12 +765,9 @@ fork_like clone3
.ent sys_\name
sys_\name:
.prologue 0
- lda $9, ret_from_straced
- cmpult $26, $9, $9
+ mov $sp, $10
lda $sp, -SWITCH_STACK_SIZE($sp)
jsr $26, do_\name
- bne $9, 1f
- jsr $26, syscall_trace_leave
1: br $1, undo_switch_stack
br ret_from_sys_call
.end sys_\name
diff --git a/arch/alpha/kernel/irq_alpha.c b/arch/alpha/kernel/irq_alpha.c
index 736294d3dd51..ac941172ae66 100644
--- a/arch/alpha/kernel/irq_alpha.c
+++ b/arch/alpha/kernel/irq_alpha.c
@@ -105,20 +105,24 @@ void notrace lockdep_on_restore(unsigned long ps,
unsigned long ip)
{
#ifdef CONFIG_PROVE_LOCKING
- /* Restoring IPL==7 means interrupts remain disabled. */
+ /*
+ * If PAL_rti will restore IPL == 7, IRQs remain disabled.
+ * There is no hardirqs-on transition to annotate.
+ */
if ((ps & 7) == 7)
return;
/*
- * If hardware IRQs are already enabled here, then emitting a
- * hardirqs-on transition is redundant.
+ * This helper is meant to run before PAL_rti, after entry.S has
+ * forced IPL to 7. If IRQs are already enabled, do not emit a
+ * fake transition.
*/
if (!irqs_disabled())
return;
/*
- * Only emit the transition if lockdep currently believes
- * hardirqs are off.
+ * Only emit an ON transition if lockdep currently tracks hardirqs
+ * as off.
*/
if (lockdep_hardirqs_enabled())
return;
diff --git a/arch/alpha/kernel/proto.h b/arch/alpha/kernel/proto.h
index 9b262ef09a3a..f138bd494628 100644
--- a/arch/alpha/kernel/proto.h
+++ b/arch/alpha/kernel/proto.h
@@ -2,6 +2,7 @@
#include <linux/interrupt.h>
#include <linux/screen_info.h>
#include <linux/io.h>
+#include <asm/ptrace.h>
/* Prototypes of functions used across modules here in this directory. */
@@ -164,16 +165,18 @@ extern void pcibios_claim_one_bus(struct pci_bus *);
/* ptrace.c */
extern int ptrace_set_bpt (struct task_struct *child);
extern int ptrace_cancel_bpt (struct task_struct *child);
-extern void syscall_trace_leave(void);
-extern unsigned long syscall_trace_enter(void);
/* signal.c */
struct sigcontext;
extern void do_sigreturn(struct sigcontext __user *);
struct rt_sigframe;
extern void do_rt_sigreturn(struct rt_sigframe __user *);
-extern void do_work_pending(struct pt_regs *, unsigned long, unsigned long, unsigned long);
extern void alpha_schedule_user_work(void);
+extern void do_signal(struct pt_regs *regs, unsigned long r0, unsigned long r19);
+extern void alpha_syscall_exit_to_user_mode(struct pt_regs *regs);
+extern void alpha_exit_to_user_mode(struct pt_regs *regs);
+extern void alpha_finish_syscall_to_user_mode(struct pt_regs *regs, long ret);
+extern unsigned long alpha_syscall_enter_select(struct pt_regs *regs, long syscall);
/* traps.c */
extern void dik_show_regs(struct pt_regs *regs, unsigned long *r9_15);
diff --git a/arch/alpha/kernel/ptrace.c b/arch/alpha/kernel/ptrace.c
index 69eb337347df..e1d8c2f69688 100644
--- a/arch/alpha/kernel/ptrace.c
+++ b/arch/alpha/kernel/ptrace.c
@@ -134,18 +134,44 @@ get_reg_addr(struct task_struct * task, unsigned long regno)
/*
* Get contents of register REGNO in task TASK.
*/
-static unsigned long
-get_reg(struct task_struct * task, unsigned long regno)
+
+static bool
+valid_regno(unsigned long regno)
+{
+ return regno <= 65;
+}
+
+static long
+get_reg(struct task_struct *task, unsigned long regno)
{
- /* Special hack for fpcr -- combine hardware and software bits. */
+ unsigned long *addr;
+
+ if (!valid_regno(regno))
+ return -EIO;
+
+ /*
+ * Special hack for fpcr -- combine hardware and software bits.
+ */
if (regno == 63) {
- unsigned long fpcr = *get_reg_addr(task, regno);
- unsigned long swcr
- = task_thread_info(task)->ieee_state & IEEE_SW_MASK;
+ unsigned long fpcr;
+ unsigned long swcr;
+
+ addr = get_reg_addr(task, regno);
+ if (!addr)
+ return -EIO;
+
+ fpcr = *addr;
+ swcr = task_thread_info(task)->ieee_state & IEEE_SW_MASK;
swcr = swcr_update_status(swcr, fpcr);
+
return fpcr | swcr;
}
- return *get_reg_addr(task, regno);
+
+ addr = get_reg_addr(task, regno);
+ if (!addr)
+ return -EIO;
+
+ return *addr;
}
static void alpha_elf_fpregs_get(struct task_struct *target,
@@ -271,14 +297,17 @@ static void alpha_elf_gregs_set(struct task_struct *child,
pt->r19 = 1;
}
-
-/*
- * Write contents of register REGNO in task TASK.
- */
static int
put_reg(struct task_struct *task, unsigned long regno, unsigned long data)
{
struct pt_regs *regs = task_pt_regs(task);
+ unsigned long *addr;
+ unsigned long old_r0 = regs->r0;
+
+ if (regno == 31)
+ return 0;
+ if (!valid_regno(regno))
+ return -EIO;
if (regno == 63) {
task_thread_info(task)->ieee_state
@@ -287,24 +316,30 @@ put_reg(struct task_struct *task, unsigned long regno, unsigned long data)
data = (data & FPCR_DYN_MASK) | ieee_swcr_to_fpcr(data);
}
- *get_reg_addr(task, regno) = data;
+ addr = get_reg_addr(task, regno);
+ if (!addr)
+ return -EIO;
+
+ *addr = data;
/*
* Alpha historically exposes r0/v0 as the syscall number at a
* syscall-entry stop. The generic-entry conversion keeps the
- * mutable syscall number in regs->r1, so old ptrace users such
- * as strace that skip a syscall by poking r0 to -1 must also
- * update the internal shadow syscall number.
- *
- * Do not mirror other r0 writes. strace later pokes r0 to the
- * injected return value, e.g. 42, while r1 must remain -1.
+ * mutable syscall number in regs->r1.
*/
- if (regno == 0 && data == (unsigned long)-1) {
+ if (regno == 0 && regs->r1 == old_r0 &&
+ (data == (unsigned long)-1 ||
+ (regs->r19 == 0 && data < NR_syscalls))) {
regs->r1 = data;
- regs->r19 = 0;
- }
+ /*
+ * Keep the skip path looking like a clean entry-side syscall
+ * rewrite. Do not touch r19 for ordinary syscall substitution.
+ */
+ if (data == (unsigned long)-1)
+ regs->r19 = 0;
+ }
return 0;
}
@@ -435,25 +470,24 @@ long arch_ptrace(struct task_struct *child, long request,
switch (request) {
/* When I and D space are separate, these will need to be fixed. */
- case PTRACE_PEEKTEXT: /* read word at location addr. */
+ case PTRACE_PEEKTEXT:
case PTRACE_PEEKDATA:
copied = ptrace_access_vm(child, addr, &tmp, sizeof(tmp),
FOLL_FORCE);
ret = -EIO;
if (copied != sizeof(tmp))
break;
-
force_successful_syscall_return();
ret = tmp;
break;
- /* Read register number ADDR. */
case PTRACE_PEEKUSR:
- force_successful_syscall_return();
ret = get_reg(child, addr);
- DBG(DBG_MEM, ("peek $%lu->%#lx\n", addr, ret));
- break;
+ if (ret == -EIO)
+ break;
+ force_successful_syscall_return();
+ break;
/* When I and D space are separate, this will have to be fixed. */
case PTRACE_POKETEXT: /* write the word at location addr. */
case PTRACE_POKEDATA:
@@ -471,47 +505,6 @@ long arch_ptrace(struct task_struct *child, long request,
return ret;
}
-asmlinkage unsigned long syscall_trace_enter(void)
-{
- struct pt_regs *regs = current_pt_regs();
-
- if (test_thread_flag(TIF_SYSCALL_TRACE) &&
- ptrace_report_syscall_entry(regs)) {
- syscall_set_nr(current, regs, -1);
- if (regs->r19 == 0 && regs->r0 == (unsigned long)-1)
- syscall_set_return_value(current, regs, -ENOSYS, 0);
- return -1UL;
- }
-
- /*
- * Do the secure computing after ptrace; failures should be fast.
- * If this fails, seccomp may already have set up the return value
- * (e.g. SECCOMP_RET_ERRNO / TRACE).
- */
- if (secure_computing() == -1) {
- if (regs->r19 == 0 && regs->r0 == (unsigned long)-1)
- syscall_set_return_value(current, regs, -ENOSYS, 0);
- syscall_set_nr(current, regs, -1);
- return -1UL;
- }
-
-#ifdef CONFIG_AUDITSYSCALL
- audit_syscall_entry(syscall_get_nr(current, regs),
- regs->r16, regs->r17, regs->r18, regs->r19);
-#endif
- return syscall_get_nr(current, regs);
-}
-
-
-
-asmlinkage void
-syscall_trace_leave(void)
-{
- audit_syscall_exit(current_pt_regs());
- if (test_thread_flag(TIF_SYSCALL_TRACE))
- ptrace_report_syscall_exit(current_pt_regs(), 0);
-}
-
/*
* Minimal regset support for Alpha.
*
@@ -522,7 +515,6 @@ syscall_trace_leave(void)
* regset_get should return 0 on success. So call dump_elf_thread()
* directly and return membuf_write()'s result.
*/
-
static int alpha_regset_set(struct task_struct *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c
index ce40a49b8496..9dae17f288c4 100644
--- a/arch/alpha/kernel/signal.c
+++ b/arch/alpha/kernel/signal.c
@@ -27,10 +27,9 @@
#include <linux/uaccess.h>
#include <asm/sigcontext.h>
#include <asm/ucontext.h>
-
+#include <linux/entry-common.h>
#include "proto.h"
-
#define DEBUG_SIG 0
#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
@@ -41,14 +40,6 @@ asmlinkage void ret_from_sys_call(void);
* The OSF/1 sigprocmask calling sequence is different from the
* C sigprocmask() sequence..
*/
-
-asmlinkage void alpha_schedule_user_work(void)
-{
- local_irq_enable();
- schedule();
- local_irq_disable();
-}
-
SYSCALL_DEFINE2(osf_sigprocmask, int, how, unsigned long, newmask)
{
sigset_t oldmask;
@@ -465,6 +456,7 @@ syscall_restart(unsigned long r0, unsigned long r19,
fallthrough;
case ERESTARTNOINTR:
regs->r0 = r0; /* reset v0 and a3 and replay syscall */
+ regs->r1 = r0;
regs->r19 = r19;
regs->pc -= 4;
break;
@@ -488,7 +480,7 @@ syscall_restart(unsigned long r0, unsigned long r19,
* restart. "r0" is also used as an indicator whether we can restart at
* all (if we get here from anything but a syscall return, it will be 0)
*/
-static void
+void
do_signal(struct pt_regs *regs, unsigned long r0, unsigned long r19)
{
unsigned long single_stepping = ptrace_cancel_bpt(current);
@@ -511,12 +503,14 @@ do_signal(struct pt_regs *regs, unsigned long r0, unsigned long r19)
case ERESTARTNOINTR:
/* Reset v0 and a3 and replay syscall. */
regs->r0 = r0;
+ regs->r1 = r0;
regs->r19 = r19;
regs->pc -= 4;
break;
case ERESTART_RESTARTBLOCK:
/* Set v0 to the restart_syscall and replay */
regs->r0 = __NR_restart_syscall;
+ regs->r1 = __NR_restart_syscall;
regs->pc -= 4;
break;
}
@@ -527,27 +521,124 @@ do_signal(struct pt_regs *regs, unsigned long r0, unsigned long r19)
ptrace_set_bpt(current); /* re-set breakpoint */
}
-void
-do_work_pending(struct pt_regs *regs, unsigned long thread_flags,
- unsigned long r0, unsigned long r19)
+asmlinkage void alpha_exit_to_user_mode(struct pt_regs *regs)
{
- do {
- if (thread_flags & _TIF_NEED_RESCHED) {
- local_irq_enable();
- schedule();
- } else {
- local_irq_enable();
- if (thread_flags & (_TIF_SIGPENDING|_TIF_NOTIFY_SIGNAL)) {
- preempt_disable();
- save_fpu();
- preempt_enable();
- do_signal(regs, r0, r19);
- r0 = 0;
- } else {
- resume_user_mode_work(regs);
- }
+ local_irq_disable();
+ irqentry_exit_to_user_mode_prepare(regs);
+ exit_to_user_mode();
+}
+
+/*
+ * Syscall return reaches here after Alpha-specific r0/a3 result encoding.
+ * Delegate syscall-exit work and final exit-to-user handling to generic
+ * entry code; low-level PAL restore remains in assembly.
+ */
+asmlinkage void alpha_syscall_exit_to_user_mode(struct pt_regs *regs)
+{
+ syscall_exit_to_user_mode(regs);
+}
+
+void arch_do_signal_or_restart(struct pt_regs *regs)
+{
+ struct thread_info *ti = current_thread_info();
+
+ do_signal(regs, ti->syscall_saved_nr, ti->syscall_saved_r19);
+}
+
+asmlinkage unsigned long
+alpha_syscall_enter_select(struct pt_regs *regs, long syscall)
+{
+ struct thread_info *ti = current_thread_info();
+ unsigned long work;
+ unsigned long nr;
+ unsigned long fn = (unsigned long)sys_ni_syscall;
+
+ ti->syscall_meta = 0;
+ ti->syscall_saved_nr = syscall;
+
+ if (!(ti->status & TS_SAVED_FP)) {
+ ti->status |= TS_SAVED_FP;
+ __save_fpu();
+ }
+
+ work = READ_ONCE(ti->syscall_work) & SYSCALL_WORK_ENTER;
+
+ nr = syscall_enter_from_user_mode(regs, syscall);
+
+ syscall_set_nr(current, regs, nr);
+ /*
+ * In the unified path, nr == -1 is ambiguous:
+ * - without syscall work: syscall(-1), dispatch to sys_ni_syscall
+ * - with syscall work: ptrace/seccomp skip marker
+ */
+ if (work && (long)nr == -1L) {
+ ti->syscall_meta = ALPHA_SYSCALL_META_SKIP;
+ return fn; /* ignored by asm when SKIP is set */
+ }
+
+ instrumentation_begin();
+ if (likely(nr < (unsigned long)NR_syscalls)) {
+ nr = array_index_nospec(nr, NR_syscalls);
+ fn = (unsigned long)sys_call_table[nr];
+ }
+ instrumentation_end();
+
+ return fn;
+}
+
+asmlinkage noinstr void
+alpha_finish_syscall_to_user_mode(struct pt_regs *regs, long ret)
+{
+ struct thread_info *ti = current_thread_info();
+ unsigned long meta = ti->syscall_meta;
+
+ ti->syscall_meta = 0;
+ ti->syscall_saved_r19 = regs->r19;
+
+ instrumentation_begin();
+
+ if (meta & ALPHA_SYSCALL_META_SKIP) {
+ /*
+ * Skip-dispatch path: ptrace/seccomp may already have installed
+ * the return state in r0/r19. Preserve it unless the syscall was
+ * skipped with no explicit return value.
+ *
+ * Generic PTRACE_SET_SYSCALL_INFO changes only the syscall-number
+ * shadow, so r1 == -1 while r0 still contains the original Alpha
+ * syscall number. Legacy PTRACE_POKEUSR based skipping can leave
+ * r0 == -1 with a3/r19 still indicating success. Both represent
+ * an unhandled skipped syscall and should become ENOSYS/a3=1.
+ */
+ if (regs->r1 == (unsigned long)-1 &&
+ (regs->r0 == ti->syscall_saved_nr ||
+ regs->r0 == (unsigned long)-1)) {
+ regs->r0 = ENOSYS;
+ regs->r19 = 1;
}
- local_irq_disable();
- thread_flags = read_thread_flags();
- } while (thread_flags & _TIF_WORK_MASK);
+
+ instrumentation_end();
+
+ syscall_exit_to_user_mode(regs);
+ return;
+ }
+
+ /*
+ * Some successful syscalls, notably legacy ptrace PEEK requests,
+ * return arbitrary data in r0. That data may have the bit pattern
+ * of a negative errno, so do not infer failure from ret < 0 when
+ * arch code explicitly requested a successful Alpha return.
+ */
+ if (meta & ALPHA_SYSCALL_META_FORCE_SUCCESS) {
+ regs->r0 = ret;
+ regs->r19 = 0;
+ } else if (ret < 0) {
+ regs->r0 = -ret;
+ regs->r19 = 1;
+ } else {
+ regs->r0 = ret;
+ regs->r19 = 0;
+ }
+
+ instrumentation_end();
+ syscall_exit_to_user_mode(regs);
}
--
2.53.0
^ permalink raw reply related [flat|nested] 12+ messages in thread