* [PATCH v3 1/4] Make anon_inodes unconditional
From: Christian Brauner @ 2019-04-19 12:09 UTC (permalink / raw)
To: torvalds, viro, jannh, dhowells, oleg, linux-api, linux-kernel
Cc: serge, luto, arnd, ebiederm, keescook, tglx, mtk.manpages, akpm,
cyphar, joel, dancol, Christian Brauner
In-Reply-To: <20190419120904.27502-1-christian@brauner.io>
From: David Howells <dhowells@redhat.com>
Make the anon_inodes facility unconditional so that it can be used by core
VFS code and pidfd code.
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
[christian@brauner.io: adapt commit message to mention pidfds]
Signed-off-by: Christian Brauner <christian@brauner.io>
---
/* changelog */
v1: patch unchanged
v2: patch unchanged
v3: patch unchanged
---
arch/arm/kvm/Kconfig | 1 -
arch/arm64/kvm/Kconfig | 1 -
arch/mips/kvm/Kconfig | 1 -
arch/powerpc/kvm/Kconfig | 1 -
arch/s390/kvm/Kconfig | 1 -
arch/x86/Kconfig | 1 -
arch/x86/kvm/Kconfig | 1 -
drivers/base/Kconfig | 1 -
drivers/char/tpm/Kconfig | 1 -
drivers/dma-buf/Kconfig | 1 -
drivers/gpio/Kconfig | 1 -
drivers/iio/Kconfig | 1 -
drivers/infiniband/Kconfig | 1 -
drivers/vfio/Kconfig | 1 -
fs/Makefile | 2 +-
fs/notify/fanotify/Kconfig | 1 -
fs/notify/inotify/Kconfig | 1 -
init/Kconfig | 10 ----------
18 files changed, 1 insertion(+), 27 deletions(-)
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index 3f5320f46de2..f591026347a5 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -22,7 +22,6 @@ config KVM
bool "Kernel-based Virtual Machine (KVM) support"
depends on MMU && OF
select PREEMPT_NOTIFIERS
- select ANON_INODES
select ARM_GIC
select ARM_GIC_V3
select ARM_GIC_V3_ITS
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index a3f85624313e..a67121d419a2 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -23,7 +23,6 @@ config KVM
depends on OF
select MMU_NOTIFIER
select PREEMPT_NOTIFIERS
- select ANON_INODES
select HAVE_KVM_CPU_RELAX_INTERCEPT
select HAVE_KVM_ARCH_TLB_FLUSH_ALL
select KVM_MMIO
diff --git a/arch/mips/kvm/Kconfig b/arch/mips/kvm/Kconfig
index 4528bc9c3cb1..eac25aef21e0 100644
--- a/arch/mips/kvm/Kconfig
+++ b/arch/mips/kvm/Kconfig
@@ -21,7 +21,6 @@ config KVM
depends on MIPS_FP_SUPPORT
select EXPORT_UASM
select PREEMPT_NOTIFIERS
- select ANON_INODES
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
select HAVE_KVM_VCPU_ASYNC_IOCTL
select KVM_MMIO
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index bfdde04e4905..f53997a8ca62 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -20,7 +20,6 @@ if VIRTUALIZATION
config KVM
bool
select PREEMPT_NOTIFIERS
- select ANON_INODES
select HAVE_KVM_EVENTFD
select HAVE_KVM_VCPU_ASYNC_IOCTL
select SRCU
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index 767453faacfc..1816ee48eadd 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -21,7 +21,6 @@ config KVM
prompt "Kernel-based Virtual Machine (KVM) support"
depends on HAVE_KVM
select PREEMPT_NOTIFIERS
- select ANON_INODES
select HAVE_KVM_CPU_RELAX_INTERCEPT
select HAVE_KVM_VCPU_ASYNC_IOCTL
select HAVE_KVM_EVENTFD
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5ad92419be19..7a70fb58b2d0 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -44,7 +44,6 @@ config X86
#
select ACPI_LEGACY_TABLES_LOOKUP if ACPI
select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI
- select ANON_INODES
select ARCH_32BIT_OFF_T if X86_32
select ARCH_CLOCKSOURCE_DATA
select ARCH_CLOCKSOURCE_INIT
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 72fa955f4a15..fc042419e670 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -27,7 +27,6 @@ config KVM
depends on X86_LOCAL_APIC
select PREEMPT_NOTIFIERS
select MMU_NOTIFIER
- select ANON_INODES
select HAVE_KVM_IRQCHIP
select HAVE_KVM_IRQFD
select IRQ_BYPASS_MANAGER
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 059700ea3521..03f067da12ee 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -174,7 +174,6 @@ source "drivers/base/regmap/Kconfig"
config DMA_SHARED_BUFFER
bool
default n
- select ANON_INODES
select IRQ_WORK
help
This option enables the framework for buffer-sharing between
diff --git a/drivers/char/tpm/Kconfig b/drivers/char/tpm/Kconfig
index 536e55d3919f..f3e4bc490cf0 100644
--- a/drivers/char/tpm/Kconfig
+++ b/drivers/char/tpm/Kconfig
@@ -157,7 +157,6 @@ config TCG_CRB
config TCG_VTPM_PROXY
tristate "VTPM Proxy Interface"
depends on TCG_TPM
- select ANON_INODES
---help---
This driver proxies for an emulated TPM (vTPM) running in userspace.
A device /dev/vtpmx is provided that creates a device pair
diff --git a/drivers/dma-buf/Kconfig b/drivers/dma-buf/Kconfig
index 2e5a0faa2cb1..3fc9c2efc583 100644
--- a/drivers/dma-buf/Kconfig
+++ b/drivers/dma-buf/Kconfig
@@ -3,7 +3,6 @@ menu "DMABUF options"
config SYNC_FILE
bool "Explicit Synchronization Framework"
default n
- select ANON_INODES
select DMA_SHARED_BUFFER
---help---
The Sync File Framework adds explicit syncronization via
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 3f50526a771f..0f91600c27ae 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -12,7 +12,6 @@ config ARCH_HAVE_CUSTOM_GPIO_H
menuconfig GPIOLIB
bool "GPIO Support"
- select ANON_INODES
help
This enables GPIO support through the generic GPIO library.
You only need to enable this, if you also want to enable
diff --git a/drivers/iio/Kconfig b/drivers/iio/Kconfig
index d08aeb41cd07..1dec0fecb6ef 100644
--- a/drivers/iio/Kconfig
+++ b/drivers/iio/Kconfig
@@ -4,7 +4,6 @@
menuconfig IIO
tristate "Industrial I/O support"
- select ANON_INODES
help
The industrial I/O subsystem provides a unified framework for
drivers for many different types of embedded sensors using a
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index a1fb840de45d..d318bab25860 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -25,7 +25,6 @@ config INFINIBAND_USER_MAD
config INFINIBAND_USER_ACCESS
tristate "InfiniBand userspace access (verbs and CM)"
- select ANON_INODES
depends on MMU
---help---
Userspace InfiniBand access support. This enables the
diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index 9de5ed38da83..3798d77d131c 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -22,7 +22,6 @@ menuconfig VFIO
tristate "VFIO Non-Privileged userspace driver framework"
depends on IOMMU_API
select VFIO_IOMMU_TYPE1 if (X86 || S390 || ARM || ARM64)
- select ANON_INODES
help
VFIO provides a framework for secure userspace device drivers.
See Documentation/vfio.txt for more details.
diff --git a/fs/Makefile b/fs/Makefile
index 427fec226fae..35945f8139e6 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -25,7 +25,7 @@ obj-$(CONFIG_PROC_FS) += proc_namespace.o
obj-y += notify/
obj-$(CONFIG_EPOLL) += eventpoll.o
-obj-$(CONFIG_ANON_INODES) += anon_inodes.o
+obj-y += anon_inodes.o
obj-$(CONFIG_SIGNALFD) += signalfd.o
obj-$(CONFIG_TIMERFD) += timerfd.o
obj-$(CONFIG_EVENTFD) += eventfd.o
diff --git a/fs/notify/fanotify/Kconfig b/fs/notify/fanotify/Kconfig
index 735bfb2e9190..521dc91d2cb5 100644
--- a/fs/notify/fanotify/Kconfig
+++ b/fs/notify/fanotify/Kconfig
@@ -1,7 +1,6 @@
config FANOTIFY
bool "Filesystem wide access notification"
select FSNOTIFY
- select ANON_INODES
select EXPORTFS
default n
---help---
diff --git a/fs/notify/inotify/Kconfig b/fs/notify/inotify/Kconfig
index b981fc0c8379..0161c74e76e2 100644
--- a/fs/notify/inotify/Kconfig
+++ b/fs/notify/inotify/Kconfig
@@ -1,6 +1,5 @@
config INOTIFY_USER
bool "Inotify support for userspace"
- select ANON_INODES
select FSNOTIFY
default y
---help---
diff --git a/init/Kconfig b/init/Kconfig
index 4592bf7997c0..be8f97e37a76 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1171,9 +1171,6 @@ config LD_DEAD_CODE_DATA_ELIMINATION
config SYSCTL
bool
-config ANON_INODES
- bool
-
config HAVE_UID16
bool
@@ -1378,14 +1375,12 @@ config HAVE_FUTEX_CMPXCHG
config EPOLL
bool "Enable eventpoll support" if EXPERT
default y
- select ANON_INODES
help
Disabling this option will cause the kernel to be built without
support for epoll family of system calls.
config SIGNALFD
bool "Enable signalfd() system call" if EXPERT
- select ANON_INODES
default y
help
Enable the signalfd() system call that allows to receive signals
@@ -1395,7 +1390,6 @@ config SIGNALFD
config TIMERFD
bool "Enable timerfd() system call" if EXPERT
- select ANON_INODES
default y
help
Enable the timerfd() system call that allows to receive timer
@@ -1405,7 +1399,6 @@ config TIMERFD
config EVENTFD
bool "Enable eventfd() system call" if EXPERT
- select ANON_INODES
default y
help
Enable the eventfd() system call that allows to receive both
@@ -1516,7 +1509,6 @@ config KALLSYMS_BASE_RELATIVE
# syscall, maps, verifier
config BPF_SYSCALL
bool "Enable bpf() system call"
- select ANON_INODES
select BPF
select IRQ_WORK
default n
@@ -1533,7 +1525,6 @@ config BPF_JIT_ALWAYS_ON
config USERFAULTFD
bool "Enable userfaultfd() system call"
- select ANON_INODES
depends on MMU
help
Enable the userfaultfd() system call that allows to intercept and
@@ -1600,7 +1591,6 @@ config PERF_EVENTS
bool "Kernel performance events and counters"
default y if PROFILING
depends on HAVE_PERF_EVENTS
- select ANON_INODES
select IRQ_WORK
select SRCU
help
--
2.21.0
^ permalink raw reply related
* [PATCH v3 2/4] clone: add CLONE_PIDFD
From: Christian Brauner @ 2019-04-19 12:09 UTC (permalink / raw)
To: torvalds, viro, jannh, dhowells, oleg, linux-api, linux-kernel
Cc: serge, luto, arnd, ebiederm, keescook, tglx, mtk.manpages, akpm,
cyphar, joel, dancol, Christian Brauner, Jann Horn
In-Reply-To: <20190419120904.27502-1-christian@brauner.io>
This patchset makes it possible to retrieve pid file descriptors at
process creation time by introducing the new flag CLONE_PIDFD to the
clone() system call. Linus originally suggested to implement this as a
new flag to clone() instead of making it a separate system call. As
spotted by Linus, there is exactly one bit for clone() left.
CLONE_PIDFD creates file descriptors based on the anonymous inode
implementation in the kernel that will also be used to implement the new
mount api. They serve as a simple opaque handle on pids. Logically,
this makes it possible to interpret a pidfd differently, narrowing or
widening the scope of various operations (e.g. signal sending). Thus, a
pidfd cannot just refer to a tgid, but also a tid, or in theory - given
appropriate flag arguments in relevant syscalls - a process group or
session. A pidfd does not represent a privilege. This does not imply it
cannot ever be that way but for now this is not the case.
A pidfd comes with additional information in fdinfo if the kernel supports
procfs. The fdinfo file contains the pid of the process in the callers
pid namespace in the same format as the procfs status file, i.e. "Pid:\t%d".
As suggested by Oleg, with CLONE_PIDFD the pidfd is returned in the
parent_tidptr argument of clone. This has the advantage that we can
give back the associated pid and the pidfd at the same time.
To remove worries about missing metadata access this patchset comes with
a sample program that illustrates how a combination of CLONE_PIDFD, and
pidfd_send_signal() can be used to gain race-free access to process
metadata through /proc/<pid>. The sample program can easily be
translated into a helper that would be suitable for inclusion in libc so
that users don't have to worry about writing it themselves.
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Christian Brauner <christian@brauner.io>
Co-developed-by: Jann Horn <jann@thejh.net>
Signed-off-by: Jann Horn <jann@thejh.net>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: David Howells <dhowells@redhat.com>
Cc: "Michael Kerrisk (man-pages)" <mtk.manpages@gmail.com>
Cc: Andy Lutomirsky <luto@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Aleksa Sarai <cyphar@cyphar.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
---
/* changelog */
v1:
- Oleg Nesterov <oleg@redhat.com>:
- return pidfd in parent_tidptr argument of clone
This way we can return the pid and the pidfd at the same time to the
caller and can also start pid file descriptor numbering at 0 as is
customary for file descriptors.
- Christian Brauner <christian@brauner.io>:
- update comments to reflect changes based on Oleg's idea
v2:
- Oleg Nesterov <oleg@redhat.com>:
- move put_user() before clone()'s point of no return so we can handle
put_user() errors
- Christian Brauner <christian@brauner.io>:
- change pidfd_create() to also fd_install()
With Oleg's change it makes sense to do the fd_install() right before
the moved put_user().
v3:
- Oleg Nesterov <oleg@redhat.com>:
- block CLONE_PIDFD with CLONE_THREAD until someone really needs this
feature
- ensure that parent_tidptr is pristine so we can use with CLONE_PIDFD
for additional features
- Christian Brauner <christian@brauner.io>:
- return EFAULT on get_user() error, EINVAL on non-zero value
Oleg had originally sketched a piece of code that returned EINVAL when
get_user() fails. However, it is clearer for userspace if EFAULT is
returned when get_user() fails and EINVAL when the retrieved value is
not 0.
---
include/linux/pid.h | 2 +
include/uapi/linux/sched.h | 1 +
kernel/fork.c | 108 +++++++++++++++++++++++++++++++++++--
3 files changed, 107 insertions(+), 4 deletions(-)
diff --git a/include/linux/pid.h b/include/linux/pid.h
index b6f4ba16065a..3c8ef5a199ca 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -66,6 +66,8 @@ struct pid
extern struct pid init_struct_pid;
+extern const struct file_operations pidfd_fops;
+
static inline struct pid *get_pid(struct pid *pid)
{
if (pid)
diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
index 22627f80063e..ed4ee170bee2 100644
--- a/include/uapi/linux/sched.h
+++ b/include/uapi/linux/sched.h
@@ -10,6 +10,7 @@
#define CLONE_FS 0x00000200 /* set if fs info shared between processes */
#define CLONE_FILES 0x00000400 /* set if open files shared between processes */
#define CLONE_SIGHAND 0x00000800 /* set if signal handlers and blocked signals shared */
+#define CLONE_PIDFD 0x00001000 /* set if a pidfd should be placed in parent */
#define CLONE_PTRACE 0x00002000 /* set if we want to let tracing continue on the child too */
#define CLONE_VFORK 0x00004000 /* set if the parent wants the child to wake it up on mm_release */
#define CLONE_PARENT 0x00008000 /* set if we want to have the same parent as the cloner */
diff --git a/kernel/fork.c b/kernel/fork.c
index 9dcd18aa210b..5525837ed80e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -11,6 +11,7 @@
* management can be a bitch. See 'mm/memory.c': 'copy_page_range()'
*/
+#include <linux/anon_inodes.h>
#include <linux/slab.h>
#include <linux/sched/autogroup.h>
#include <linux/sched/mm.h>
@@ -21,8 +22,10 @@
#include <linux/sched/task.h>
#include <linux/sched/task_stack.h>
#include <linux/sched/cputime.h>
+#include <linux/seq_file.h>
#include <linux/rtmutex.h>
#include <linux/init.h>
+#include <linux/fsnotify.h>
#include <linux/unistd.h>
#include <linux/module.h>
#include <linux/vmalloc.h>
@@ -1662,6 +1665,58 @@ static inline void rcu_copy_process(struct task_struct *p)
#endif /* #ifdef CONFIG_TASKS_RCU */
}
+static int pidfd_release(struct inode *inode, struct file *file)
+{
+ struct pid *pid = file->private_data;
+
+ file->private_data = NULL;
+ put_pid(pid);
+ return 0;
+}
+
+#ifdef CONFIG_PROC_FS
+static void pidfd_show_fdinfo(struct seq_file *m, struct file *f)
+{
+ struct pid_namespace *ns = proc_pid_ns(file_inode(m->file));
+ struct pid *pid = f->private_data;
+
+ seq_put_decimal_ull(m, "Pid:\t", pid_nr_ns(pid, ns));
+ seq_putc(m, '\n');
+}
+#endif
+
+const struct file_operations pidfd_fops = {
+ .release = pidfd_release,
+#ifdef CONFIG_PROC_FS
+ .show_fdinfo = pidfd_show_fdinfo,
+#endif
+};
+
+/**
+ * pidfd_create() - Create a new pid file descriptor.
+ *
+ * @pid: struct pid that the pidfd will reference
+ *
+ * This creates a new pid file descriptor with the O_CLOEXEC flag set.
+ *
+ * Note, that this function can only be called after the fd table has
+ * been unshared to avoid leaking the pidfd to the new process.
+ *
+ * Return: On success, a cloexec pidfd is returned.
+ * On error, a negative errno number will be returned.
+ */
+static int pidfd_create(struct pid *pid)
+{
+ int fd;
+
+ fd = anon_inode_getfd("pidfd", &pidfd_fops, get_pid(pid),
+ O_RDWR | O_CLOEXEC);
+ if (fd < 0)
+ put_pid(pid);
+
+ return fd;
+}
+
/*
* This creates a new process as a copy of the old one,
* but does not actually start it yet.
@@ -1674,13 +1729,14 @@ static __latent_entropy struct task_struct *copy_process(
unsigned long clone_flags,
unsigned long stack_start,
unsigned long stack_size,
+ int __user *parent_tidptr,
int __user *child_tidptr,
struct pid *pid,
int trace,
unsigned long tls,
int node)
{
- int retval;
+ int pidfd = -1, retval;
struct task_struct *p;
struct multiprocess_signals delayed;
@@ -1730,6 +1786,31 @@ static __latent_entropy struct task_struct *copy_process(
return ERR_PTR(-EINVAL);
}
+ if (clone_flags & CLONE_PIDFD) {
+ int reserved;
+
+ /*
+ * - CLONE_PARENT_SETTID is useless for pidfds and also
+ * parent_tidptr is used to return pidfds.
+ * - CLONE_DETACHED is blocked so that we can potentially
+ * reuse it later for CLONE_PIDFD.
+ * - CLONE_THREAD is blocked until someone really needs it.
+ */
+ if (clone_flags &
+ (CLONE_DETACHED | CLONE_PARENT_SETTID | CLONE_THREAD))
+ return ERR_PTR(-EINVAL);
+
+ /*
+ * Verify that parent_tidptr is sane so we can potentially
+ * reuse it later.
+ */
+ if (get_user(reserved, parent_tidptr))
+ return ERR_PTR(-EFAULT);
+
+ if (reserved != 0)
+ return ERR_PTR(-EINVAL);
+ }
+
/*
* Force any signals received before this point to be delivered
* before the fork happens. Collect up signals sent to multiple
@@ -1936,6 +2017,22 @@ static __latent_entropy struct task_struct *copy_process(
}
}
+ /*
+ * This has to happen after we've potentially unshared the file
+ * descriptor table (so that the pidfd doesn't leak into the child
+ * if the fd table isn't shared).
+ */
+ if (clone_flags & CLONE_PIDFD) {
+ retval = pidfd_create(pid);
+ if (retval < 0)
+ goto bad_fork_free_pid;
+
+ pidfd = retval;
+ retval = put_user(pidfd, parent_tidptr);
+ if (retval)
+ goto bad_fork_put_pidfd;
+ }
+
#ifdef CONFIG_BLOCK
p->plug = NULL;
#endif
@@ -1996,7 +2093,7 @@ static __latent_entropy struct task_struct *copy_process(
*/
retval = cgroup_can_fork(p);
if (retval)
- goto bad_fork_free_pid;
+ goto bad_fork_put_pidfd;
/*
* From this point on we must avoid any synchronous user-space
@@ -2111,6 +2208,9 @@ static __latent_entropy struct task_struct *copy_process(
spin_unlock(¤t->sighand->siglock);
write_unlock_irq(&tasklist_lock);
cgroup_cancel_fork(p);
+bad_fork_put_pidfd:
+ if (clone_flags & CLONE_PIDFD)
+ ksys_close(pidfd);
bad_fork_free_pid:
cgroup_threadgroup_change_end(current);
if (pid != &init_struct_pid)
@@ -2176,7 +2276,7 @@ static inline void init_idle_pids(struct task_struct *idle)
struct task_struct *fork_idle(int cpu)
{
struct task_struct *task;
- task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0, 0,
+ task = copy_process(CLONE_VM, 0, 0, NULL, NULL, &init_struct_pid, 0, 0,
cpu_to_node(cpu));
if (!IS_ERR(task)) {
init_idle_pids(task);
@@ -2223,7 +2323,7 @@ long _do_fork(unsigned long clone_flags,
trace = 0;
}
- p = copy_process(clone_flags, stack_start, stack_size,
+ p = copy_process(clone_flags, stack_start, stack_size, parent_tidptr,
child_tidptr, NULL, trace, tls, NUMA_NO_NODE);
add_latent_entropy();
--
2.21.0
^ permalink raw reply related
* [PATCH v3 3/4] signal: support CLONE_PIDFD with pidfd_send_signal
From: Christian Brauner @ 2019-04-19 12:09 UTC (permalink / raw)
To: torvalds, viro, jannh, dhowells, oleg, linux-api, linux-kernel
Cc: serge, luto, arnd, ebiederm, keescook, tglx, mtk.manpages, akpm,
cyphar, joel, dancol, Christian Brauner, Jann Horn
In-Reply-To: <20190419120904.27502-1-christian@brauner.io>
Let pidfd_send_signal() use pidfds retrieved via CLONE_PIDFD. With this
patch pidfd_send_signal() becomes independent of procfs. This fullfils
the request made when we merged the pidfd_send_signal() patchset. The
pidfd_send_signal() syscall is now always available allowing for it to
be used by users without procfs mounted or even users without procfs
support compiled into the kernel.
Signed-off-by: Christian Brauner <christian@brauner.io>
Co-developed-by: Jann Horn <jann@thejh.net>
Signed-off-by: Jann Horn <jann@thejh.net>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: David Howells <dhowells@redhat.com>
Cc: "Michael Kerrisk (man-pages)" <mtk.manpages@gmail.com>
Cc: Andy Lutomirsky <luto@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Aleksa Sarai <cyphar@cyphar.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
---
/* changelog */
v1: patch unchanged
v2:
- Oleg Nesterov <oleg@redhat.com>:
- split s/fdget_raw()/fdget()/ into separate patch as it has nothing to
do with supporting CLONE_PIDFD
v3:
- Christian Brauner <christian@brauner.io>:
- Linus has applied the s/fdget_raw()/fdget()/ patch right away so the
patch is gone from the series
---
kernel/signal.c | 12 +++++++++---
kernel/sys_ni.c | 3 ---
2 files changed, 9 insertions(+), 6 deletions(-)
diff --git a/kernel/signal.c b/kernel/signal.c
index f98448cf2def..1581140f2d99 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3513,7 +3513,6 @@ SYSCALL_DEFINE2(kill, pid_t, pid, int, sig)
return kill_something_info(sig, &info, pid);
}
-#ifdef CONFIG_PROC_FS
/*
* Verify that the signaler and signalee either are in the same pid namespace
* or that the signaler's pid namespace is an ancestor of the signalee's pid
@@ -3550,6 +3549,14 @@ static int copy_siginfo_from_user_any(kernel_siginfo_t *kinfo, siginfo_t *info)
return copy_siginfo_from_user(kinfo, info);
}
+static struct pid *pidfd_to_pid(const struct file *file)
+{
+ if (file->f_op == &pidfd_fops)
+ return file->private_data;
+
+ return tgid_pidfd_to_pid(file);
+}
+
/**
* sys_pidfd_send_signal - send a signal to a process through a task file
* descriptor
@@ -3586,7 +3593,7 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
return -EBADF;
/* Is this a pidfd? */
- pid = tgid_pidfd_to_pid(f.file);
+ pid = pidfd_to_pid(f.file);
if (IS_ERR(pid)) {
ret = PTR_ERR(pid);
goto err;
@@ -3620,7 +3627,6 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
fdput(f);
return ret;
}
-#endif /* CONFIG_PROC_FS */
static int
do_send_specific(pid_t tgid, pid_t pid, int sig, struct kernel_siginfo *info)
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index d21f4befaea4..4d9ae5ea6caf 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -167,9 +167,6 @@ COND_SYSCALL(syslog);
/* kernel/sched/core.c */
-/* kernel/signal.c */
-COND_SYSCALL(pidfd_send_signal);
-
/* kernel/sys.c */
COND_SYSCALL(setregid);
COND_SYSCALL(setgid);
--
2.21.0
^ permalink raw reply related
* [PATCH v3 4/4] samples: show race-free pidfd metadata access
From: Christian Brauner @ 2019-04-19 12:09 UTC (permalink / raw)
To: torvalds, viro, jannh, dhowells, oleg, linux-api, linux-kernel
Cc: serge, luto, arnd, ebiederm, keescook, tglx, mtk.manpages, akpm,
cyphar, joel, dancol, Christian Brauner, Jann Horn
In-Reply-To: <20190419120904.27502-1-christian@brauner.io>
This is a sample program showing userspace how to get race-free access
to process metadata from a pidfd. It is rather easy to do and userspace
can actually simply reuse code that currently parses a process's status
file in procfs.
The program can easily be extended into a generic helper suitable for
inclusion in a libc to make it even easier for userspace to gain metadata
access.
Since this came up in a discussion because this API is going to be used
in various service managers: A lot of programs will have a whitelist
seccomp filter that returns <some-errno> for all new syscalls. This
means that programs might get confused if CLONE_PIDFD works but the
later pidfd_send_signal() syscall doesn't. Hence, here's a ahead of
time check that pidfd_send_signal() is supported:
bool pidfd_send_signal_supported()
{
int procfd = open("/proc/self", O_DIRECTORY | O_RDONLY | O_CLOEXEC);
if (procfd < 0)
return false;
/*
* A process is always allowed to signal itself so
* pidfd_send_signal() should never fail this test. If it does
* it must mean it is not available, blocked by an LSM, seccomp,
* or other.
*/
return pidfd_send_signal(procfd, 0, NULL, 0) == 0;
}
Signed-off-by: Christian Brauner <christian@brauner.io>
Co-developed-by: Jann Horn <jann@thejh.net>
Signed-off-by: Jann Horn <jann@thejh.net>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: David Howells <dhowells@redhat.com>
Cc: "Michael Kerrisk (man-pages)" <mtk.manpages@gmail.com>
Cc: Andy Lutomirsky <luto@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Aleksa Sarai <cyphar@cyphar.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
---
/* changelog */
v1:
- Christian Brauner <christian@brauner.io>:
- adapt sample program to changes in how CLONE_PIDFD returns the pidfd
With Oleg's suggestion we can simplify the program even more.
v2: patch unchanged
v3: patch unchanged
---
samples/Makefile | 2 +-
samples/pidfd/Makefile | 6 ++
samples/pidfd/pidfd-metadata.c | 112 +++++++++++++++++++++++++++++++++
3 files changed, 119 insertions(+), 1 deletion(-)
create mode 100644 samples/pidfd/Makefile
create mode 100644 samples/pidfd/pidfd-metadata.c
diff --git a/samples/Makefile b/samples/Makefile
index b1142a958811..fadadb1c3b05 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -3,4 +3,4 @@
obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ trace_events/ livepatch/ \
hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/ \
configfs/ connector/ v4l/ trace_printk/ \
- vfio-mdev/ statx/ qmi/ binderfs/
+ vfio-mdev/ statx/ qmi/ binderfs/ pidfd/
diff --git a/samples/pidfd/Makefile b/samples/pidfd/Makefile
new file mode 100644
index 000000000000..0ff97784177a
--- /dev/null
+++ b/samples/pidfd/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+
+hostprogs-y := pidfd-metadata
+always := $(hostprogs-y)
+HOSTCFLAGS_pidfd-metadata.o += -I$(objtree)/usr/include
+all: pidfd-metadata
diff --git a/samples/pidfd/pidfd-metadata.c b/samples/pidfd/pidfd-metadata.c
new file mode 100644
index 000000000000..bd8456fc4c0e
--- /dev/null
+++ b/samples/pidfd/pidfd-metadata.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#ifndef CLONE_PIDFD
+#define CLONE_PIDFD 0x00001000
+#endif
+
+static int do_child(void *args)
+{
+ printf("%d\n", getpid());
+ _exit(EXIT_SUCCESS);
+}
+
+static pid_t pidfd_clone(int flags, int *pidfd)
+{
+ size_t stack_size = 1024;
+ char *stack[1024] = { 0 };
+
+#ifdef __ia64__
+ return __clone2(do_child, stack, stack_size, flags | SIGCHLD, NULL, pidfd);
+#else
+ return clone(do_child, stack + stack_size, flags | SIGCHLD, NULL, pidfd);
+#endif
+}
+
+static inline int sys_pidfd_send_signal(int pidfd, int sig, siginfo_t *info,
+ unsigned int flags)
+{
+ return syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags);
+}
+
+static int pidfd_metadata_fd(pid_t pid, int pidfd)
+{
+ int procfd, ret;
+ char path[100];
+
+ snprintf(path, sizeof(path), "/proc/%d", pid);
+ procfd = open(path, O_DIRECTORY | O_RDONLY | O_CLOEXEC);
+ if (procfd < 0) {
+ warn("Failed to open %s\n", path);
+ return -1;
+ }
+
+ /*
+ * Verify that the pid has not been recycled and our /proc/<pid> handle
+ * is still valid.
+ */
+ ret = sys_pidfd_send_signal(pidfd, 0, NULL, 0);
+ if (ret < 0) {
+ switch (errno) {
+ case EPERM:
+ /* Process exists, just not allowed to signal it. */
+ break;
+ default:
+ warn("Failed to signal process\n");
+ close(procfd);
+ procfd = -1;
+ }
+ }
+
+ return procfd;
+}
+
+int main(int argc, char *argv[])
+{
+ int ret = EXIT_FAILURE;
+ char buf[4096] = { 0 };
+ pid_t pid;
+ int pidfd, procfd, statusfd;
+ ssize_t bytes;
+
+ pid = pidfd_clone(CLONE_PIDFD, &pidfd);
+ if (pid < 0)
+ exit(ret);
+
+ procfd = pidfd_metadata_fd(pid, pidfd);
+ close(pidfd);
+ if (procfd < 0)
+ goto out;
+
+ statusfd = openat(procfd, "status", O_RDONLY | O_CLOEXEC);
+ close(procfd);
+ if (statusfd < 0)
+ goto out;
+
+ bytes = read(statusfd, buf, sizeof(buf));
+ if (bytes > 0)
+ bytes = write(STDOUT_FILENO, buf, bytes);
+ close(statusfd);
+ ret = EXIT_SUCCESS;
+
+out:
+ (void)wait(NULL);
+
+ exit(ret);
+}
--
2.21.0
^ permalink raw reply related
* Re: [PATCH for 5.1 3/3] rseq/selftests: Adapt number of threads to the number of detected cpus
From: Mathieu Desnoyers @ 2019-04-19 12:41 UTC (permalink / raw)
To: Ingo Molnar
Cc: Thomas Gleixner, linux-kernel, linux-api, Peter Zijlstra,
Paul E . McKenney, Boqun Feng, Andy Lutomirski, Dave Watson,
Paul Turner, Andrew Morton, Russell King, Ingo Molnar,
H. Peter Anvin, Andi Kleen, Chris Lameter, Ben Maurer, rostedt,
Josh Triplett, Linus Torvalds, Catalin Marinas, Will
In-Reply-To: <20190419103847.GA111210@gmail.com>
----- On Apr 19, 2019, at 6:38 AM, Ingo Molnar mingo@kernel.org wrote:
> * Mathieu Desnoyers <mathieu.desnoyers@efficios.com> wrote:
>
>> On smaller systems, running a test with 200 threads can take a long
>> time on machines with smaller number of CPUs.
>>
>> Detect the number of online cpus at test runtime, and multiply that
>> by 6 to have 6 rseq threads per cpu preempting each other.
>>
>> Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
>> Cc: Shuah Khan <shuah@kernel.org>
>> Cc: Thomas Gleixner <tglx@linutronix.de>
>> Cc: Joel Fernandes <joelaf@google.com>
>> Cc: Peter Zijlstra <peterz@infradead.org>
>> Cc: Catalin Marinas <catalin.marinas@arm.com>
>> Cc: Dave Watson <davejwatson@fb.com>
>> Cc: Will Deacon <will.deacon@arm.com>
>> Cc: Andi Kleen <andi@firstfloor.org>
>> Cc: linux-kselftest@vger.kernel.org
>> Cc: "H . Peter Anvin" <hpa@zytor.com>
>> Cc: Chris Lameter <cl@linux.com>
>> Cc: Russell King <linux@arm.linux.org.uk>
>> Cc: Michael Kerrisk <mtk.manpages@gmail.com>
>> Cc: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com>
>> Cc: Paul Turner <pjt@google.com>
>> Cc: Boqun Feng <boqun.feng@gmail.com>
>> Cc: Josh Triplett <josh@joshtriplett.org>
>> Cc: Steven Rostedt <rostedt@goodmis.org>
>> Cc: Ben Maurer <bmaurer@fb.com>
>> Cc: Andy Lutomirski <luto@amacapital.net>
>> Cc: Andrew Morton <akpm@linux-foundation.org>
>> Cc: Linus Torvalds <torvalds@linux-foundation.org>
>> ---
>> tools/testing/selftests/rseq/run_param_test.sh | 7 +++++--
>> 1 file changed, 5 insertions(+), 2 deletions(-)
>>
>> diff --git a/tools/testing/selftests/rseq/run_param_test.sh
>> b/tools/testing/selftests/rseq/run_param_test.sh
>> index 3acd6d75ff9f..e426304fd4a0 100755
>> --- a/tools/testing/selftests/rseq/run_param_test.sh
>> +++ b/tools/testing/selftests/rseq/run_param_test.sh
>> @@ -1,6 +1,8 @@
>> #!/bin/bash
>> # SPDX-License-Identifier: GPL-2.0+ or MIT
>>
>> +NR_CPUS=`grep '^processor' /proc/cpuinfo | wc -l`
>> +
>> EXTRA_ARGS=${@}
>>
>> OLDIFS="$IFS"
>> @@ -28,15 +30,16 @@ IFS="$OLDIFS"
>>
>> REPS=1000
>> SLOW_REPS=100
>> +NR_THREADS=$((6*${NR_CPUS}))
>>
>> function do_tests()
>> {
>> local i=0
>> while [ "$i" -lt "${#TEST_LIST[@]}" ]; do
>> echo "Running test ${TEST_NAME[$i]}"
>> - ./param_test ${TEST_LIST[$i]} -r ${REPS} ${@} ${EXTRA_ARGS} || exit 1
>> + ./param_test ${TEST_LIST[$i]} -r ${REPS} -t ${NR_THREADS} ${@} ${EXTRA_ARGS}
>> || exit 1
>> echo "Running compare-twice test ${TEST_NAME[$i]}"
>> - ./param_test_compare_twice ${TEST_LIST[$i]} -r ${REPS} ${@} ${EXTRA_ARGS} ||
>> exit 1
>> + ./param_test_compare_twice ${TEST_LIST[$i]} -r ${REPS} -t ${NR_THREADS} ${@}
>> ${EXTRA_ARGS} || exit 1
>> let "i++"
>> done
>> }
>
> BTW., when trying to build the rseq self-tests I get this build failure:
>
> dagon:~/tip/tools/testing/selftests/rseq> make
> gcc -O2 -Wall -g -I./ -I../../../../usr/include/ -L./ -Wl,-rpath=./ -shared
> -fPIC rseq.c -lpthread -o
> /home/mingo/tip/tools/testing/selftests/rseq/librseq.so
> gcc -O2 -Wall -g -I./ -I../../../../usr/include/ -L./ -Wl,-rpath=./ basic_test.c
> -lpthread -lrseq -o /home/mingo/tip/tools/testing/selftests/rseq/basic_test
> gcc -O2 -Wall -g -I./ -I../../../../usr/include/ -L./ -Wl,-rpath=./
> basic_percpu_ops_test.c -lpthread -lrseq -o
> /home/mingo/tip/tools/testing/selftests/rseq/basic_percpu_ops_test
> /usr/bin/ld: /tmp/ccuHTWnZ.o: in function `rseq_cmpeqv_storev':
> /home/mingo/tip/tools/testing/selftests/rseq/./rseq-x86.h:84: undefined
> reference to `.L8'
> /usr/bin/ld: /home/mingo/tip/tools/testing/selftests/rseq/./rseq-x86.h:84:
> undefined reference to `.L49'
> /usr/bin/ld: /tmp/ccuHTWnZ.o: in function `rseq_cmpnev_storeoffp_load':
> /home/mingo/tip/tools/testing/selftests/rseq/./rseq-x86.h:141: undefined
> reference to `.L57'
> /usr/bin/ld: /tmp/ccuHTWnZ.o:(__rseq_failure+0x8): undefined reference to `.L8'
> /usr/bin/ld: /tmp/ccuHTWnZ.o:(__rseq_failure+0x14): undefined reference to
> `.L49'
> /usr/bin/ld: /tmp/ccuHTWnZ.o:(__rseq_failure+0x20): undefined reference to
> `.L55'
> collect2: error: ld returned 1 exit status
> make: *** [Makefile:22:
> /home/mingo/tip/tools/testing/selftests/rseq/basic_percpu_ops_test] Error 1
>
> Is this a known problem, or do I miss something from my build environment
> perhaps? Vanilla 64-bit Ubuntu 18.10 (Cosmic).
It works fine with gcc-7 (gcc version 7.3.0 (Ubuntu 7.3.0-16ubuntu3))
but indeed I get the same failure with gcc-8 (gcc version 8.0.1 20180414
(experimental) [trunk revision 259383] (Ubuntu 8-20180414-1ubuntu2)).
Thanks for reporting! I will investigate.
Mathieu
>
> Thanks,
>
> Ingo
--
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com
^ permalink raw reply
* Re: [PATCH for 5.1 0/3] Restartable Sequences updates for 5.1
From: Mathieu Desnoyers @ 2019-04-19 12:42 UTC (permalink / raw)
To: Ingo Molnar
Cc: Thomas Gleixner, linux-kernel, linux-api, Peter Zijlstra,
Paul E . McKenney, Boqun Feng, Andy Lutomirski, Dave Watson,
Paul Turner, Andrew Morton, Russell King, Ingo Molnar,
H. Peter Anvin, Andi Kleen, Chris Lameter, Ben Maurer, rostedt,
Josh Triplett, Linus Torvalds, Catalin Marinas, Will
In-Reply-To: <20190419104123.GB111210@gmail.com>
----- On Apr 19, 2019, at 6:41 AM, Ingo Molnar mingo@kernel.org wrote:
> * Mathieu Desnoyers <mathieu.desnoyers@efficios.com> wrote:
>
>> Those changes aiming at 5.1 include one comment cleanup, the removal of
>> the rseq_len field from the task struct which serves no purpose
>> considering that the struct size is fixed by the ABI, and a selftest
>> improvement adapting the number of threads to the number of detected
>> CPUs, which is nicer for smaller systems.
>>
>> Thanks,
>>
>> Mathieu
>>
>> Mathieu Desnoyers (3):
>> rseq: cleanup: Reflect removal of event counter in comments
>> rseq: cleanup: remove rseq_len from task_struct
>> rseq/selftests: Adapt number of threads to the number of detected cpus
>>
>> arch/arm/kernel/signal.c | 3 +--
>> arch/x86/kernel/signal.c | 5 +----
>> include/linux/sched.h | 4 ----
>> kernel/rseq.c | 9 +++------
>> tools/testing/selftests/rseq/run_param_test.sh | 7 +++++--
>> 5 files changed, 10 insertions(+), 18 deletions(-)
>
> Looks good, I've applied these to tip:core/rseq to make sure they don't
> miss the v5.2 merge window.
>
> (Let me know if you wanted to handle this differently.)
That's fine by me!
Thanks,
Mathieu
>
> Thanks,
>
> Ingo
--
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com
^ permalink raw reply
* Re: [PATCH for 5.1 3/3] rseq/selftests: Adapt number of threads to the number of detected cpus
From: Mathieu Desnoyers @ 2019-04-19 12:55 UTC (permalink / raw)
To: Ingo Molnar
Cc: Thomas Gleixner, linux-kernel, linux-api, Peter Zijlstra,
Paul E . McKenney, Boqun Feng, Andy Lutomirski, Dave Watson,
Paul Turner, Andrew Morton, Russell King, Ingo Molnar,
H. Peter Anvin, Andi Kleen, Chris Lameter, Ben Maurer, rostedt,
Josh Triplett, Linus Torvalds, Catalin Marinas, Will
In-Reply-To: <1444419838.71.1555677682502.JavaMail.zimbra@efficios.com>
----- On Apr 19, 2019, at 8:41 AM, Mathieu Desnoyers mathieu.desnoyers@efficios.com wrote:
> ----- On Apr 19, 2019, at 6:38 AM, Ingo Molnar mingo@kernel.org wrote:
>
>> * Mathieu Desnoyers <mathieu.desnoyers@efficios.com> wrote:
>>
>>> On smaller systems, running a test with 200 threads can take a long
>>> time on machines with smaller number of CPUs.
>>>
>>> Detect the number of online cpus at test runtime, and multiply that
>>> by 6 to have 6 rseq threads per cpu preempting each other.
>>>
>>> Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
>>> Cc: Shuah Khan <shuah@kernel.org>
>>> Cc: Thomas Gleixner <tglx@linutronix.de>
>>> Cc: Joel Fernandes <joelaf@google.com>
>>> Cc: Peter Zijlstra <peterz@infradead.org>
>>> Cc: Catalin Marinas <catalin.marinas@arm.com>
>>> Cc: Dave Watson <davejwatson@fb.com>
>>> Cc: Will Deacon <will.deacon@arm.com>
>>> Cc: Andi Kleen <andi@firstfloor.org>
>>> Cc: linux-kselftest@vger.kernel.org
>>> Cc: "H . Peter Anvin" <hpa@zytor.com>
>>> Cc: Chris Lameter <cl@linux.com>
>>> Cc: Russell King <linux@arm.linux.org.uk>
>>> Cc: Michael Kerrisk <mtk.manpages@gmail.com>
>>> Cc: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com>
>>> Cc: Paul Turner <pjt@google.com>
>>> Cc: Boqun Feng <boqun.feng@gmail.com>
>>> Cc: Josh Triplett <josh@joshtriplett.org>
>>> Cc: Steven Rostedt <rostedt@goodmis.org>
>>> Cc: Ben Maurer <bmaurer@fb.com>
>>> Cc: Andy Lutomirski <luto@amacapital.net>
>>> Cc: Andrew Morton <akpm@linux-foundation.org>
>>> Cc: Linus Torvalds <torvalds@linux-foundation.org>
>>> ---
>>> tools/testing/selftests/rseq/run_param_test.sh | 7 +++++--
>>> 1 file changed, 5 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/tools/testing/selftests/rseq/run_param_test.sh
>>> b/tools/testing/selftests/rseq/run_param_test.sh
>>> index 3acd6d75ff9f..e426304fd4a0 100755
>>> --- a/tools/testing/selftests/rseq/run_param_test.sh
>>> +++ b/tools/testing/selftests/rseq/run_param_test.sh
>>> @@ -1,6 +1,8 @@
>>> #!/bin/bash
>>> # SPDX-License-Identifier: GPL-2.0+ or MIT
>>>
>>> +NR_CPUS=`grep '^processor' /proc/cpuinfo | wc -l`
>>> +
>>> EXTRA_ARGS=${@}
>>>
>>> OLDIFS="$IFS"
>>> @@ -28,15 +30,16 @@ IFS="$OLDIFS"
>>>
>>> REPS=1000
>>> SLOW_REPS=100
>>> +NR_THREADS=$((6*${NR_CPUS}))
>>>
>>> function do_tests()
>>> {
>>> local i=0
>>> while [ "$i" -lt "${#TEST_LIST[@]}" ]; do
>>> echo "Running test ${TEST_NAME[$i]}"
>>> - ./param_test ${TEST_LIST[$i]} -r ${REPS} ${@} ${EXTRA_ARGS} || exit 1
>>> + ./param_test ${TEST_LIST[$i]} -r ${REPS} -t ${NR_THREADS} ${@} ${EXTRA_ARGS}
>>> || exit 1
>>> echo "Running compare-twice test ${TEST_NAME[$i]}"
>>> - ./param_test_compare_twice ${TEST_LIST[$i]} -r ${REPS} ${@} ${EXTRA_ARGS} ||
>>> exit 1
>>> + ./param_test_compare_twice ${TEST_LIST[$i]} -r ${REPS} -t ${NR_THREADS} ${@}
>>> ${EXTRA_ARGS} || exit 1
>>> let "i++"
>>> done
>>> }
>>
>> BTW., when trying to build the rseq self-tests I get this build failure:
>>
>> dagon:~/tip/tools/testing/selftests/rseq> make
>> gcc -O2 -Wall -g -I./ -I../../../../usr/include/ -L./ -Wl,-rpath=./ -shared
>> -fPIC rseq.c -lpthread -o
>> /home/mingo/tip/tools/testing/selftests/rseq/librseq.so
>> gcc -O2 -Wall -g -I./ -I../../../../usr/include/ -L./ -Wl,-rpath=./ basic_test.c
>> -lpthread -lrseq -o /home/mingo/tip/tools/testing/selftests/rseq/basic_test
>> gcc -O2 -Wall -g -I./ -I../../../../usr/include/ -L./ -Wl,-rpath=./
>> basic_percpu_ops_test.c -lpthread -lrseq -o
>> /home/mingo/tip/tools/testing/selftests/rseq/basic_percpu_ops_test
>> /usr/bin/ld: /tmp/ccuHTWnZ.o: in function `rseq_cmpeqv_storev':
>> /home/mingo/tip/tools/testing/selftests/rseq/./rseq-x86.h:84: undefined
>> reference to `.L8'
>> /usr/bin/ld: /home/mingo/tip/tools/testing/selftests/rseq/./rseq-x86.h:84:
>> undefined reference to `.L49'
>> /usr/bin/ld: /tmp/ccuHTWnZ.o: in function `rseq_cmpnev_storeoffp_load':
>> /home/mingo/tip/tools/testing/selftests/rseq/./rseq-x86.h:141: undefined
>> reference to `.L57'
>> /usr/bin/ld: /tmp/ccuHTWnZ.o:(__rseq_failure+0x8): undefined reference to `.L8'
>> /usr/bin/ld: /tmp/ccuHTWnZ.o:(__rseq_failure+0x14): undefined reference to
>> `.L49'
>> /usr/bin/ld: /tmp/ccuHTWnZ.o:(__rseq_failure+0x20): undefined reference to
>> `.L55'
>> collect2: error: ld returned 1 exit status
>> make: *** [Makefile:22:
>> /home/mingo/tip/tools/testing/selftests/rseq/basic_percpu_ops_test] Error 1
>>
>> Is this a known problem, or do I miss something from my build environment
>> perhaps? Vanilla 64-bit Ubuntu 18.10 (Cosmic).
>
> It works fine with gcc-7 (gcc version 7.3.0 (Ubuntu 7.3.0-16ubuntu3))
> but indeed I get the same failure with gcc-8 (gcc version 8.0.1 20180414
> (experimental) [trunk revision 259383] (Ubuntu 8-20180414-1ubuntu2)).
>
> Thanks for reporting! I will investigate.
It looks like gcc-8 optimize away the target of asm goto labels when
there are more than one of them on x86-64. I'll try to come up with
a simpler reproducer.
Thanks,
Mathieu
--
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com
^ permalink raw reply
* Re: [PATCH v2 2/5] clone: add CLONE_PIDFD
From: Aleksa Sarai @ 2019-04-19 13:39 UTC (permalink / raw)
To: Christian Brauner
Cc: Oleg Nesterov, torvalds, viro, jannh, dhowells, linux-api,
linux-kernel, serge, luto, arnd, ebiederm, keescook, tglx,
mtk.manpages, akpm, joel, dancol
In-Reply-To: <20190418132822.untjt7erfvbbiz7a@brauner.io>
[-- Attachment #1: Type: text/plain, Size: 1552 bytes --]
On 2019-04-18, Christian Brauner <christian@brauner.io> wrote:
> > Why O_CLOEXEC? I am just curious, I do not really care.
>
> I think that having the file descriptor O_CLOEXEC by default is a good
> security measure in general. Most of the time you do not want to pass a
> file descriptor through exec() (apart from 0,1,2) and it is usually more
> of an issue when you accidently do it then when you accidently don't. So
> if users really care about passing a pidfd they should do so by removing
> the O_CLOEXEC flag explicitly.
> (New file descriptors should probably all default to that but that's just
> my opinion.)
> Another thing is that for a pidfds it makes even more sense to have them
> cloexec by default. You don't want to *unintentionally* leak an fd that
> can be used to operate on a process.
There is another factor as well -- if you want to set O_CLOEXEC in a
multi-threaded process you can't be sure that another thread didn't fork
in between you getting the fd_install'd and the userspace process
setting O_CLOEXEC (leading to the fd leaking outside the current
process). This is why a lot of syscalls have a way to get an O_CLOEXEC
fd from the outset.
So I'm +1 on doing O_CLOEXEC by default -- you can always disable it
safely but enabling it safely isn't so simple (and I don't think it
makes much sense to add the mechanism to pass PIDFD_CLOEXEC as well,
given how tight the flags are getting).
--
Aleksa Sarai
Senior Software Engineer (Containers)
SUSE Linux GmbH
<https://www.cyphar.com/>
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply
* Re: [PATCH for 5.1 3/3] rseq/selftests: Adapt number of threads to the number of detected cpus
From: Mathieu Desnoyers @ 2019-04-19 13:42 UTC (permalink / raw)
To: Ingo Molnar
Cc: Thomas Gleixner, linux-kernel, linux-api, Peter Zijlstra,
Paul E . McKenney, Boqun Feng, Andy Lutomirski, Dave Watson,
Paul Turner, Andrew Morton, Russell King, Ingo Molnar,
H. Peter Anvin, Andi Kleen, Chris Lameter, Ben Maurer, rostedt,
Josh Triplett, Linus Torvalds, Catalin Marinas, Will
In-Reply-To: <1266612341.87.1555678507226.JavaMail.zimbra@efficios.com>
----- On Apr 19, 2019, at 8:55 AM, Mathieu Desnoyers mathieu.desnoyers@efficios.com wrote:
> ----- On Apr 19, 2019, at 8:41 AM, Mathieu Desnoyers
> mathieu.desnoyers@efficios.com wrote:
>
>> ----- On Apr 19, 2019, at 6:38 AM, Ingo Molnar mingo@kernel.org wrote:
>>
>>> * Mathieu Desnoyers <mathieu.desnoyers@efficios.com> wrote:
>>>
>>>> On smaller systems, running a test with 200 threads can take a long
>>>> time on machines with smaller number of CPUs.
>>>>
>>>> Detect the number of online cpus at test runtime, and multiply that
>>>> by 6 to have 6 rseq threads per cpu preempting each other.
>>>>
>>>> Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
>>>> Cc: Shuah Khan <shuah@kernel.org>
>>>> Cc: Thomas Gleixner <tglx@linutronix.de>
>>>> Cc: Joel Fernandes <joelaf@google.com>
>>>> Cc: Peter Zijlstra <peterz@infradead.org>
>>>> Cc: Catalin Marinas <catalin.marinas@arm.com>
>>>> Cc: Dave Watson <davejwatson@fb.com>
>>>> Cc: Will Deacon <will.deacon@arm.com>
>>>> Cc: Andi Kleen <andi@firstfloor.org>
>>>> Cc: linux-kselftest@vger.kernel.org
>>>> Cc: "H . Peter Anvin" <hpa@zytor.com>
>>>> Cc: Chris Lameter <cl@linux.com>
>>>> Cc: Russell King <linux@arm.linux.org.uk>
>>>> Cc: Michael Kerrisk <mtk.manpages@gmail.com>
>>>> Cc: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com>
>>>> Cc: Paul Turner <pjt@google.com>
>>>> Cc: Boqun Feng <boqun.feng@gmail.com>
>>>> Cc: Josh Triplett <josh@joshtriplett.org>
>>>> Cc: Steven Rostedt <rostedt@goodmis.org>
>>>> Cc: Ben Maurer <bmaurer@fb.com>
>>>> Cc: Andy Lutomirski <luto@amacapital.net>
>>>> Cc: Andrew Morton <akpm@linux-foundation.org>
>>>> Cc: Linus Torvalds <torvalds@linux-foundation.org>
>>>> ---
>>>> tools/testing/selftests/rseq/run_param_test.sh | 7 +++++--
>>>> 1 file changed, 5 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/tools/testing/selftests/rseq/run_param_test.sh
>>>> b/tools/testing/selftests/rseq/run_param_test.sh
>>>> index 3acd6d75ff9f..e426304fd4a0 100755
>>>> --- a/tools/testing/selftests/rseq/run_param_test.sh
>>>> +++ b/tools/testing/selftests/rseq/run_param_test.sh
>>>> @@ -1,6 +1,8 @@
>>>> #!/bin/bash
>>>> # SPDX-License-Identifier: GPL-2.0+ or MIT
>>>>
>>>> +NR_CPUS=`grep '^processor' /proc/cpuinfo | wc -l`
>>>> +
>>>> EXTRA_ARGS=${@}
>>>>
>>>> OLDIFS="$IFS"
>>>> @@ -28,15 +30,16 @@ IFS="$OLDIFS"
>>>>
>>>> REPS=1000
>>>> SLOW_REPS=100
>>>> +NR_THREADS=$((6*${NR_CPUS}))
>>>>
>>>> function do_tests()
>>>> {
>>>> local i=0
>>>> while [ "$i" -lt "${#TEST_LIST[@]}" ]; do
>>>> echo "Running test ${TEST_NAME[$i]}"
>>>> - ./param_test ${TEST_LIST[$i]} -r ${REPS} ${@} ${EXTRA_ARGS} || exit 1
>>>> + ./param_test ${TEST_LIST[$i]} -r ${REPS} -t ${NR_THREADS} ${@} ${EXTRA_ARGS}
>>>> || exit 1
>>>> echo "Running compare-twice test ${TEST_NAME[$i]}"
>>>> - ./param_test_compare_twice ${TEST_LIST[$i]} -r ${REPS} ${@} ${EXTRA_ARGS} ||
>>>> exit 1
>>>> + ./param_test_compare_twice ${TEST_LIST[$i]} -r ${REPS} -t ${NR_THREADS} ${@}
>>>> ${EXTRA_ARGS} || exit 1
>>>> let "i++"
>>>> done
>>>> }
>>>
>>> BTW., when trying to build the rseq self-tests I get this build failure:
>>>
>>> dagon:~/tip/tools/testing/selftests/rseq> make
>>> gcc -O2 -Wall -g -I./ -I../../../../usr/include/ -L./ -Wl,-rpath=./ -shared
>>> -fPIC rseq.c -lpthread -o
>>> /home/mingo/tip/tools/testing/selftests/rseq/librseq.so
>>> gcc -O2 -Wall -g -I./ -I../../../../usr/include/ -L./ -Wl,-rpath=./ basic_test.c
>>> -lpthread -lrseq -o /home/mingo/tip/tools/testing/selftests/rseq/basic_test
>>> gcc -O2 -Wall -g -I./ -I../../../../usr/include/ -L./ -Wl,-rpath=./
>>> basic_percpu_ops_test.c -lpthread -lrseq -o
>>> /home/mingo/tip/tools/testing/selftests/rseq/basic_percpu_ops_test
>>> /usr/bin/ld: /tmp/ccuHTWnZ.o: in function `rseq_cmpeqv_storev':
>>> /home/mingo/tip/tools/testing/selftests/rseq/./rseq-x86.h:84: undefined
>>> reference to `.L8'
>>> /usr/bin/ld: /home/mingo/tip/tools/testing/selftests/rseq/./rseq-x86.h:84:
>>> undefined reference to `.L49'
>>> /usr/bin/ld: /tmp/ccuHTWnZ.o: in function `rseq_cmpnev_storeoffp_load':
>>> /home/mingo/tip/tools/testing/selftests/rseq/./rseq-x86.h:141: undefined
>>> reference to `.L57'
>>> /usr/bin/ld: /tmp/ccuHTWnZ.o:(__rseq_failure+0x8): undefined reference to `.L8'
>>> /usr/bin/ld: /tmp/ccuHTWnZ.o:(__rseq_failure+0x14): undefined reference to
>>> `.L49'
>>> /usr/bin/ld: /tmp/ccuHTWnZ.o:(__rseq_failure+0x20): undefined reference to
>>> `.L55'
>>> collect2: error: ld returned 1 exit status
>>> make: *** [Makefile:22:
>>> /home/mingo/tip/tools/testing/selftests/rseq/basic_percpu_ops_test] Error 1
>>>
>>> Is this a known problem, or do I miss something from my build environment
>>> perhaps? Vanilla 64-bit Ubuntu 18.10 (Cosmic).
>>
>> It works fine with gcc-7 (gcc version 7.3.0 (Ubuntu 7.3.0-16ubuntu3))
>> but indeed I get the same failure with gcc-8 (gcc version 8.0.1 20180414
>> (experimental) [trunk revision 259383] (Ubuntu 8-20180414-1ubuntu2)).
>>
>> Thanks for reporting! I will investigate.
>
> It looks like gcc-8 optimize away the target of asm goto labels when
> there are more than one of them on x86-64. I'll try to come up with
> a simpler reproducer.
It appears to be related to gcc-8 mishandling combination of
asm goto and thread-local storage input operands on x86-64.
Here is a simple reproducer:
__thread int var;
static int fct(void)
{
asm goto ( "jmp %l[testlabel]\n\t"
: : [var] "m" (var) : : testlabel);
return 0;
testlabel:
return 1;
}
int main()
{
return fct();
}
building with gcc-7 -O2 is fine. Building with gcc-8 -O0 is
fine too. Building with gcc-8 -O1 and -O2 fails with:
/tmp/ccuXTFfs.o: In function `main':
test-asm-goto.c:(.text.startup+0x1): undefined reference to `.L2'
collect2: error: ld returned 1 exit status
With gcc-7 -O2, the assembly of main has the .L2 label:
main:
.LFB1:
.cfi_startproc
#APP
# 5 "test-asm-goto.c" 1
jmp .L2
# 0 "" 2
#NO_APP
.L4:
.L3:
xorl %eax, %eax
ret
.L2:
movl $1, %eax
ret
.cfi_endproc
However, with gcc-8 -O2, it's missing:
main:
.LFB1:
.cfi_startproc
.L3:
#APP
# 5 "test-asm-goto.c" 1
jmp .L2
# 0 "" 2
#NO_APP
xorl %eax, %eax
ret
.cfi_endproc
It looks like we have a compiler issue. :-/
Thanks,
Mathieu
--
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com
^ permalink raw reply
* Re: [PATCH for 5.1 3/3] rseq/selftests: Adapt number of threads to the number of detected cpus
From: Mathieu Desnoyers @ 2019-04-19 13:48 UTC (permalink / raw)
To: Ingo Molnar
Cc: Thomas Gleixner, linux-kernel, linux-api, Peter Zijlstra,
Paul E . McKenney, Boqun Feng, Andy Lutomirski, Dave Watson,
Paul Turner, Andrew Morton, Russell King, Ingo Molnar,
H. Peter Anvin, Andi Kleen, Chris Lameter, Ben Maurer, rostedt,
Josh Triplett, Linus Torvalds, Catalin Marinas, Will
In-Reply-To: <614774674.134.1555681346941.JavaMail.zimbra@efficios.com>
----- On Apr 19, 2019, at 9:42 AM, Mathieu Desnoyers mathieu.desnoyers@efficios.com wrote:
> ----- On Apr 19, 2019, at 8:55 AM, Mathieu Desnoyers
> mathieu.desnoyers@efficios.com wrote:
>
>> ----- On Apr 19, 2019, at 8:41 AM, Mathieu Desnoyers
>> mathieu.desnoyers@efficios.com wrote:
>>
>>> ----- On Apr 19, 2019, at 6:38 AM, Ingo Molnar mingo@kernel.org wrote:
>>>
>>>> * Mathieu Desnoyers <mathieu.desnoyers@efficios.com> wrote:
>>>>
>>>>> On smaller systems, running a test with 200 threads can take a long
>>>>> time on machines with smaller number of CPUs.
>>>>>
>>>>> Detect the number of online cpus at test runtime, and multiply that
>>>>> by 6 to have 6 rseq threads per cpu preempting each other.
>>>>>
>>>>> Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
>>>>> Cc: Shuah Khan <shuah@kernel.org>
>>>>> Cc: Thomas Gleixner <tglx@linutronix.de>
>>>>> Cc: Joel Fernandes <joelaf@google.com>
>>>>> Cc: Peter Zijlstra <peterz@infradead.org>
>>>>> Cc: Catalin Marinas <catalin.marinas@arm.com>
>>>>> Cc: Dave Watson <davejwatson@fb.com>
>>>>> Cc: Will Deacon <will.deacon@arm.com>
>>>>> Cc: Andi Kleen <andi@firstfloor.org>
>>>>> Cc: linux-kselftest@vger.kernel.org
>>>>> Cc: "H . Peter Anvin" <hpa@zytor.com>
>>>>> Cc: Chris Lameter <cl@linux.com>
>>>>> Cc: Russell King <linux@arm.linux.org.uk>
>>>>> Cc: Michael Kerrisk <mtk.manpages@gmail.com>
>>>>> Cc: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com>
>>>>> Cc: Paul Turner <pjt@google.com>
>>>>> Cc: Boqun Feng <boqun.feng@gmail.com>
>>>>> Cc: Josh Triplett <josh@joshtriplett.org>
>>>>> Cc: Steven Rostedt <rostedt@goodmis.org>
>>>>> Cc: Ben Maurer <bmaurer@fb.com>
>>>>> Cc: Andy Lutomirski <luto@amacapital.net>
>>>>> Cc: Andrew Morton <akpm@linux-foundation.org>
>>>>> Cc: Linus Torvalds <torvalds@linux-foundation.org>
>>>>> ---
>>>>> tools/testing/selftests/rseq/run_param_test.sh | 7 +++++--
>>>>> 1 file changed, 5 insertions(+), 2 deletions(-)
>>>>>
>>>>> diff --git a/tools/testing/selftests/rseq/run_param_test.sh
>>>>> b/tools/testing/selftests/rseq/run_param_test.sh
>>>>> index 3acd6d75ff9f..e426304fd4a0 100755
>>>>> --- a/tools/testing/selftests/rseq/run_param_test.sh
>>>>> +++ b/tools/testing/selftests/rseq/run_param_test.sh
>>>>> @@ -1,6 +1,8 @@
>>>>> #!/bin/bash
>>>>> # SPDX-License-Identifier: GPL-2.0+ or MIT
>>>>>
>>>>> +NR_CPUS=`grep '^processor' /proc/cpuinfo | wc -l`
>>>>> +
>>>>> EXTRA_ARGS=${@}
>>>>>
>>>>> OLDIFS="$IFS"
>>>>> @@ -28,15 +30,16 @@ IFS="$OLDIFS"
>>>>>
>>>>> REPS=1000
>>>>> SLOW_REPS=100
>>>>> +NR_THREADS=$((6*${NR_CPUS}))
>>>>>
>>>>> function do_tests()
>>>>> {
>>>>> local i=0
>>>>> while [ "$i" -lt "${#TEST_LIST[@]}" ]; do
>>>>> echo "Running test ${TEST_NAME[$i]}"
>>>>> - ./param_test ${TEST_LIST[$i]} -r ${REPS} ${@} ${EXTRA_ARGS} || exit 1
>>>>> + ./param_test ${TEST_LIST[$i]} -r ${REPS} -t ${NR_THREADS} ${@} ${EXTRA_ARGS}
>>>>> || exit 1
>>>>> echo "Running compare-twice test ${TEST_NAME[$i]}"
>>>>> - ./param_test_compare_twice ${TEST_LIST[$i]} -r ${REPS} ${@} ${EXTRA_ARGS} ||
>>>>> exit 1
>>>>> + ./param_test_compare_twice ${TEST_LIST[$i]} -r ${REPS} -t ${NR_THREADS} ${@}
>>>>> ${EXTRA_ARGS} || exit 1
>>>>> let "i++"
>>>>> done
>>>>> }
>>>>
>>>> BTW., when trying to build the rseq self-tests I get this build failure:
>>>>
>>>> dagon:~/tip/tools/testing/selftests/rseq> make
>>>> gcc -O2 -Wall -g -I./ -I../../../../usr/include/ -L./ -Wl,-rpath=./ -shared
>>>> -fPIC rseq.c -lpthread -o
>>>> /home/mingo/tip/tools/testing/selftests/rseq/librseq.so
>>>> gcc -O2 -Wall -g -I./ -I../../../../usr/include/ -L./ -Wl,-rpath=./ basic_test.c
>>>> -lpthread -lrseq -o /home/mingo/tip/tools/testing/selftests/rseq/basic_test
>>>> gcc -O2 -Wall -g -I./ -I../../../../usr/include/ -L./ -Wl,-rpath=./
>>>> basic_percpu_ops_test.c -lpthread -lrseq -o
>>>> /home/mingo/tip/tools/testing/selftests/rseq/basic_percpu_ops_test
>>>> /usr/bin/ld: /tmp/ccuHTWnZ.o: in function `rseq_cmpeqv_storev':
>>>> /home/mingo/tip/tools/testing/selftests/rseq/./rseq-x86.h:84: undefined
>>>> reference to `.L8'
>>>> /usr/bin/ld: /home/mingo/tip/tools/testing/selftests/rseq/./rseq-x86.h:84:
>>>> undefined reference to `.L49'
>>>> /usr/bin/ld: /tmp/ccuHTWnZ.o: in function `rseq_cmpnev_storeoffp_load':
>>>> /home/mingo/tip/tools/testing/selftests/rseq/./rseq-x86.h:141: undefined
>>>> reference to `.L57'
>>>> /usr/bin/ld: /tmp/ccuHTWnZ.o:(__rseq_failure+0x8): undefined reference to `.L8'
>>>> /usr/bin/ld: /tmp/ccuHTWnZ.o:(__rseq_failure+0x14): undefined reference to
>>>> `.L49'
>>>> /usr/bin/ld: /tmp/ccuHTWnZ.o:(__rseq_failure+0x20): undefined reference to
>>>> `.L55'
>>>> collect2: error: ld returned 1 exit status
>>>> make: *** [Makefile:22:
>>>> /home/mingo/tip/tools/testing/selftests/rseq/basic_percpu_ops_test] Error 1
>>>>
>>>> Is this a known problem, or do I miss something from my build environment
>>>> perhaps? Vanilla 64-bit Ubuntu 18.10 (Cosmic).
>>>
>>> It works fine with gcc-7 (gcc version 7.3.0 (Ubuntu 7.3.0-16ubuntu3))
>>> but indeed I get the same failure with gcc-8 (gcc version 8.0.1 20180414
>>> (experimental) [trunk revision 259383] (Ubuntu 8-20180414-1ubuntu2)).
>>>
>>> Thanks for reporting! I will investigate.
>>
>> It looks like gcc-8 optimize away the target of asm goto labels when
>> there are more than one of them on x86-64. I'll try to come up with
>> a simpler reproducer.
>
> It appears to be related to gcc-8 mishandling combination of
> asm goto and thread-local storage input operands on x86-64.
> Here is a simple reproducer:
>
> __thread int var;
>
> static int fct(void)
> {
> asm goto ( "jmp %l[testlabel]\n\t"
> : : [var] "m" (var) : : testlabel);
> return 0;
> testlabel:
FWIW, if I add an empty
asm volatile ("");
here after the label, gcc-8 -O2 builds "something" which is
a bogus assembler (an endless loop) :
main:
.LFB24:
.cfi_startproc
.L2:
subq $8, %rsp
.cfi_def_cfa_offset 16
#APP
# 6 "test-asm-goto.c" 1
jmp .L2
# 0 "" 2
#NO_APP
movl %fs:var@tpoff, %edx
leaq .LC0(%rip), %rsi
movl $1, %edi
xorl %eax, %eax
call __printf_chk@PLT
xorl %eax, %eax
addq $8, %rsp
.cfi_def_cfa_offset 8
ret
.cfi_endproc
Thoughts ?
Thanks,
Mathieu
> return 1;
> }
>
> int main()
> {
> return fct();
> }
>
> building with gcc-7 -O2 is fine. Building with gcc-8 -O0 is
> fine too. Building with gcc-8 -O1 and -O2 fails with:
>
> /tmp/ccuXTFfs.o: In function `main':
> test-asm-goto.c:(.text.startup+0x1): undefined reference to `.L2'
> collect2: error: ld returned 1 exit status
>
> With gcc-7 -O2, the assembly of main has the .L2 label:
>
> main:
> .LFB1:
> .cfi_startproc
> #APP
> # 5 "test-asm-goto.c" 1
> jmp .L2
>
> # 0 "" 2
> #NO_APP
> .L4:
> .L3:
> xorl %eax, %eax
> ret
> .L2:
> movl $1, %eax
> ret
> .cfi_endproc
>
> However, with gcc-8 -O2, it's missing:
>
> main:
> .LFB1:
> .cfi_startproc
> .L3:
> #APP
> # 5 "test-asm-goto.c" 1
> jmp .L2
>
> # 0 "" 2
> #NO_APP
> xorl %eax, %eax
> ret
> .cfi_endproc
>
> It looks like we have a compiler issue. :-/
>
> Thanks,
>
> Mathieu
>
> --
> Mathieu Desnoyers
> EfficiOS Inc.
> http://www.efficios.com
--
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com
^ permalink raw reply
* Re: [PATCH for 5.1 3/3] rseq/selftests: Adapt number of threads to the number of detected cpus
From: shuah @ 2019-04-19 14:17 UTC (permalink / raw)
To: Mathieu Desnoyers, Ingo Molnar
Cc: Thomas Gleixner, linux-kernel, linux-api, Peter Zijlstra,
Paul E . McKenney, Boqun Feng, Andy Lutomirski, Dave Watson,
Paul Turner, Andrew Morton, Russell King, Ingo Molnar,
H. Peter Anvin, Andi Kleen, Chris Lameter, Ben Maurer, rostedt,
Josh Triplett, Linus Torvalds, Catalin Marinas, Will
In-Reply-To: <1863599735.141.1555681723685.JavaMail.zimbra@efficios.com>
On 4/19/19 7:48 AM, Mathieu Desnoyers wrote:
> ----- On Apr 19, 2019, at 9:42 AM, Mathieu Desnoyers mathieu.desnoyers@efficios.com wrote:
>
>> ----- On Apr 19, 2019, at 8:55 AM, Mathieu Desnoyers
>> mathieu.desnoyers@efficios.com wrote:
>>
>>> ----- On Apr 19, 2019, at 8:41 AM, Mathieu Desnoyers
>>> mathieu.desnoyers@efficios.com wrote:
>>>
>>>> ----- On Apr 19, 2019, at 6:38 AM, Ingo Molnar mingo@kernel.org wrote:
>>>>
>>>>> * Mathieu Desnoyers <mathieu.desnoyers@efficios.com> wrote:
>>>>>
>>>>>> On smaller systems, running a test with 200 threads can take a long
>>>>>> time on machines with smaller number of CPUs.
>>>>>>
>>>>>> Detect the number of online cpus at test runtime, and multiply that
>>>>>> by 6 to have 6 rseq threads per cpu preempting each other.
>>>>>>
>>>>>> Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
>>>>>> Cc: Shuah Khan <shuah@kernel.org>
>>>>>> Cc: Thomas Gleixner <tglx@linutronix.de>
>>>>>> Cc: Joel Fernandes <joelaf@google.com>
>>>>>> Cc: Peter Zijlstra <peterz@infradead.org>
>>>>>> Cc: Catalin Marinas <catalin.marinas@arm.com>
>>>>>> Cc: Dave Watson <davejwatson@fb.com>
>>>>>> Cc: Will Deacon <will.deacon@arm.com>
>>>>>> Cc: Andi Kleen <andi@firstfloor.org>
>>>>>> Cc: linux-kselftest@vger.kernel.org
>>>>>> Cc: "H . Peter Anvin" <hpa@zytor.com>
>>>>>> Cc: Chris Lameter <cl@linux.com>
>>>>>> Cc: Russell King <linux@arm.linux.org.uk>
>>>>>> Cc: Michael Kerrisk <mtk.manpages@gmail.com>
>>>>>> Cc: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com>
>>>>>> Cc: Paul Turner <pjt@google.com>
>>>>>> Cc: Boqun Feng <boqun.feng@gmail.com>
>>>>>> Cc: Josh Triplett <josh@joshtriplett.org>
>>>>>> Cc: Steven Rostedt <rostedt@goodmis.org>
>>>>>> Cc: Ben Maurer <bmaurer@fb.com>
>>>>>> Cc: Andy Lutomirski <luto@amacapital.net>
>>>>>> Cc: Andrew Morton <akpm@linux-foundation.org>
>>>>>> Cc: Linus Torvalds <torvalds@linux-foundation.org>
>>>>>> ---
>>>>>> tools/testing/selftests/rseq/run_param_test.sh | 7 +++++--
>>>>>> 1 file changed, 5 insertions(+), 2 deletions(-)
>>>>>>
>>>>>> diff --git a/tools/testing/selftests/rseq/run_param_test.sh
>>>>>> b/tools/testing/selftests/rseq/run_param_test.sh
>>>>>> index 3acd6d75ff9f..e426304fd4a0 100755
>>>>>> --- a/tools/testing/selftests/rseq/run_param_test.sh
>>>>>> +++ b/tools/testing/selftests/rseq/run_param_test.sh
>>>>>> @@ -1,6 +1,8 @@
>>>>>> #!/bin/bash
>>>>>> # SPDX-License-Identifier: GPL-2.0+ or MIT
>>>>>>
>>>>>> +NR_CPUS=`grep '^processor' /proc/cpuinfo | wc -l`
>>>>>> +
>>>>>> EXTRA_ARGS=${@}
>>>>>>
>>>>>> OLDIFS="$IFS"
>>>>>> @@ -28,15 +30,16 @@ IFS="$OLDIFS"
>>>>>>
>>>>>> REPS=1000
>>>>>> SLOW_REPS=100
>>>>>> +NR_THREADS=$((6*${NR_CPUS}))
>>>>>>
>>>>>> function do_tests()
>>>>>> {
>>>>>> local i=0
>>>>>> while [ "$i" -lt "${#TEST_LIST[@]}" ]; do
>>>>>> echo "Running test ${TEST_NAME[$i]}"
>>>>>> - ./param_test ${TEST_LIST[$i]} -r ${REPS} ${@} ${EXTRA_ARGS} || exit 1
>>>>>> + ./param_test ${TEST_LIST[$i]} -r ${REPS} -t ${NR_THREADS} ${@} ${EXTRA_ARGS}
>>>>>> || exit 1
>>>>>> echo "Running compare-twice test ${TEST_NAME[$i]}"
>>>>>> - ./param_test_compare_twice ${TEST_LIST[$i]} -r ${REPS} ${@} ${EXTRA_ARGS} ||
>>>>>> exit 1
>>>>>> + ./param_test_compare_twice ${TEST_LIST[$i]} -r ${REPS} -t ${NR_THREADS} ${@}
>>>>>> ${EXTRA_ARGS} || exit 1
>>>>>> let "i++"
>>>>>> done
>>>>>> }
>>>>>
>>>>> BTW., when trying to build the rseq self-tests I get this build failure:
>>>>>
>>>>> dagon:~/tip/tools/testing/selftests/rseq> make
>>>>> gcc -O2 -Wall -g -I./ -I../../../../usr/include/ -L./ -Wl,-rpath=./ -shared
>>>>> -fPIC rseq.c -lpthread -o
>>>>> /home/mingo/tip/tools/testing/selftests/rseq/librseq.so
>>>>> gcc -O2 -Wall -g -I./ -I../../../../usr/include/ -L./ -Wl,-rpath=./ basic_test.c
>>>>> -lpthread -lrseq -o /home/mingo/tip/tools/testing/selftests/rseq/basic_test
>>>>> gcc -O2 -Wall -g -I./ -I../../../../usr/include/ -L./ -Wl,-rpath=./
>>>>> basic_percpu_ops_test.c -lpthread -lrseq -o
>>>>> /home/mingo/tip/tools/testing/selftests/rseq/basic_percpu_ops_test
>>>>> /usr/bin/ld: /tmp/ccuHTWnZ.o: in function `rseq_cmpeqv_storev':
>>>>> /home/mingo/tip/tools/testing/selftests/rseq/./rseq-x86.h:84: undefined
>>>>> reference to `.L8'
>>>>> /usr/bin/ld: /home/mingo/tip/tools/testing/selftests/rseq/./rseq-x86.h:84:
>>>>> undefined reference to `.L49'
>>>>> /usr/bin/ld: /tmp/ccuHTWnZ.o: in function `rseq_cmpnev_storeoffp_load':
>>>>> /home/mingo/tip/tools/testing/selftests/rseq/./rseq-x86.h:141: undefined
>>>>> reference to `.L57'
>>>>> /usr/bin/ld: /tmp/ccuHTWnZ.o:(__rseq_failure+0x8): undefined reference to `.L8'
>>>>> /usr/bin/ld: /tmp/ccuHTWnZ.o:(__rseq_failure+0x14): undefined reference to
>>>>> `.L49'
>>>>> /usr/bin/ld: /tmp/ccuHTWnZ.o:(__rseq_failure+0x20): undefined reference to
>>>>> `.L55'
>>>>> collect2: error: ld returned 1 exit status
>>>>> make: *** [Makefile:22:
>>>>> /home/mingo/tip/tools/testing/selftests/rseq/basic_percpu_ops_test] Error 1
>>>>>
>>>>> Is this a known problem, or do I miss something from my build environment
>>>>> perhaps? Vanilla 64-bit Ubuntu 18.10 (Cosmic).
>>>>
>>>> It works fine with gcc-7 (gcc version 7.3.0 (Ubuntu 7.3.0-16ubuntu3))
>>>> but indeed I get the same failure with gcc-8 (gcc version 8.0.1 20180414
>>>> (experimental) [trunk revision 259383] (Ubuntu 8-20180414-1ubuntu2)).
>>>>
>>>> Thanks for reporting! I will investigate.
>>>
>>> It looks like gcc-8 optimize away the target of asm goto labels when
>>> there are more than one of them on x86-64. I'll try to come up with
>>> a simpler reproducer.
>>
>> It appears to be related to gcc-8 mishandling combination of
>> asm goto and thread-local storage input operands on x86-64.
>> Here is a simple reproducer:
>>
>> __thread int var;
>>
>> static int fct(void)
>> {
>> asm goto ( "jmp %l[testlabel]\n\t"
>> : : [var] "m" (var) : : testlabel);
>> return 0;
>> testlabel:
>
> FWIW, if I add an empty
>
> asm volatile ("");
>
> here after the label, gcc-8 -O2 builds "something" which is
> a bogus assembler (an endless loop) :
>
> main:
> .LFB24:
> .cfi_startproc
> .L2:
> subq $8, %rsp
> .cfi_def_cfa_offset 16
> #APP
> # 6 "test-asm-goto.c" 1
> jmp .L2
>
> # 0 "" 2
> #NO_APP
> movl %fs:var@tpoff, %edx
> leaq .LC0(%rip), %rsi
> movl $1, %edi
> xorl %eax, %eax
> call __printf_chk@PLT
> xorl %eax, %eax
> addq $8, %rsp
> .cfi_def_cfa_offset 8
> ret
> .cfi_endproc
>
> Thoughts ?
>
Didn't see problems when I tested it before applying it to
linux-kselftest next.
I have gcc version 7.3.0 (Ubuntu 7.3.0-27ubuntu1~18.04)
thanks,
-- Shuah
^ permalink raw reply
* Re: [PATCH for 5.1 3/3] rseq/selftests: Adapt number of threads to the number of detected cpus
From: Mathieu Desnoyers @ 2019-04-19 14:40 UTC (permalink / raw)
To: shuah, Ingo Molnar
Cc: Thomas Gleixner, linux-kernel, linux-api, Peter Zijlstra,
Paul E . McKenney, Boqun Feng, Andy Lutomirski, Dave Watson,
Paul Turner, Andrew Morton, Russell King, Ingo Molnar,
H. Peter Anvin, Andi Kleen, Chris Lameter, Ben Maurer, rostedt,
Josh Triplett, Linus Torvalds, Catalin Marinas, Will
In-Reply-To: <6ba0796c-8a96-f797-265c-37bfb9b4bb71@kernel.org>
----- On Apr 19, 2019, at 10:17 AM, shuah shuah@kernel.org wrote:
> On 4/19/19 7:48 AM, Mathieu Desnoyers wrote:
>> ----- On Apr 19, 2019, at 9:42 AM, Mathieu Desnoyers
>> mathieu.desnoyers@efficios.com wrote:
>>
>>> ----- On Apr 19, 2019, at 8:55 AM, Mathieu Desnoyers
>>> mathieu.desnoyers@efficios.com wrote:
>>>
>>>> ----- On Apr 19, 2019, at 8:41 AM, Mathieu Desnoyers
>>>> mathieu.desnoyers@efficios.com wrote:
>>>>
>>>>> ----- On Apr 19, 2019, at 6:38 AM, Ingo Molnar mingo@kernel.org wrote:
>>>>>
>>>>>> * Mathieu Desnoyers <mathieu.desnoyers@efficios.com> wrote:
>>>>>>
>>>>>>> On smaller systems, running a test with 200 threads can take a long
>>>>>>> time on machines with smaller number of CPUs.
>>>>>>>
>>>>>>> Detect the number of online cpus at test runtime, and multiply that
>>>>>>> by 6 to have 6 rseq threads per cpu preempting each other.
>>>>>>>
>>>>>>> Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
>>>>>>> Cc: Shuah Khan <shuah@kernel.org>
>>>>>>> Cc: Thomas Gleixner <tglx@linutronix.de>
>>>>>>> Cc: Joel Fernandes <joelaf@google.com>
>>>>>>> Cc: Peter Zijlstra <peterz@infradead.org>
>>>>>>> Cc: Catalin Marinas <catalin.marinas@arm.com>
>>>>>>> Cc: Dave Watson <davejwatson@fb.com>
>>>>>>> Cc: Will Deacon <will.deacon@arm.com>
>>>>>>> Cc: Andi Kleen <andi@firstfloor.org>
>>>>>>> Cc: linux-kselftest@vger.kernel.org
>>>>>>> Cc: "H . Peter Anvin" <hpa@zytor.com>
>>>>>>> Cc: Chris Lameter <cl@linux.com>
>>>>>>> Cc: Russell King <linux@arm.linux.org.uk>
>>>>>>> Cc: Michael Kerrisk <mtk.manpages@gmail.com>
>>>>>>> Cc: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com>
>>>>>>> Cc: Paul Turner <pjt@google.com>
>>>>>>> Cc: Boqun Feng <boqun.feng@gmail.com>
>>>>>>> Cc: Josh Triplett <josh@joshtriplett.org>
>>>>>>> Cc: Steven Rostedt <rostedt@goodmis.org>
>>>>>>> Cc: Ben Maurer <bmaurer@fb.com>
>>>>>>> Cc: Andy Lutomirski <luto@amacapital.net>
>>>>>>> Cc: Andrew Morton <akpm@linux-foundation.org>
>>>>>>> Cc: Linus Torvalds <torvalds@linux-foundation.org>
>>>>>>> ---
>>>>>>> tools/testing/selftests/rseq/run_param_test.sh | 7 +++++--
>>>>>>> 1 file changed, 5 insertions(+), 2 deletions(-)
>>>>>>>
>>>>>>> diff --git a/tools/testing/selftests/rseq/run_param_test.sh
>>>>>>> b/tools/testing/selftests/rseq/run_param_test.sh
>>>>>>> index 3acd6d75ff9f..e426304fd4a0 100755
>>>>>>> --- a/tools/testing/selftests/rseq/run_param_test.sh
>>>>>>> +++ b/tools/testing/selftests/rseq/run_param_test.sh
>>>>>>> @@ -1,6 +1,8 @@
>>>>>>> #!/bin/bash
>>>>>>> # SPDX-License-Identifier: GPL-2.0+ or MIT
>>>>>>>
>>>>>>> +NR_CPUS=`grep '^processor' /proc/cpuinfo | wc -l`
>>>>>>> +
>>>>>>> EXTRA_ARGS=${@}
>>>>>>>
>>>>>>> OLDIFS="$IFS"
>>>>>>> @@ -28,15 +30,16 @@ IFS="$OLDIFS"
>>>>>>>
>>>>>>> REPS=1000
>>>>>>> SLOW_REPS=100
>>>>>>> +NR_THREADS=$((6*${NR_CPUS}))
>>>>>>>
>>>>>>> function do_tests()
>>>>>>> {
>>>>>>> local i=0
>>>>>>> while [ "$i" -lt "${#TEST_LIST[@]}" ]; do
>>>>>>> echo "Running test ${TEST_NAME[$i]}"
>>>>>>> - ./param_test ${TEST_LIST[$i]} -r ${REPS} ${@} ${EXTRA_ARGS} || exit 1
>>>>>>> + ./param_test ${TEST_LIST[$i]} -r ${REPS} -t ${NR_THREADS} ${@} ${EXTRA_ARGS}
>>>>>>> || exit 1
>>>>>>> echo "Running compare-twice test ${TEST_NAME[$i]}"
>>>>>>> - ./param_test_compare_twice ${TEST_LIST[$i]} -r ${REPS} ${@} ${EXTRA_ARGS} ||
>>>>>>> exit 1
>>>>>>> + ./param_test_compare_twice ${TEST_LIST[$i]} -r ${REPS} -t ${NR_THREADS} ${@}
>>>>>>> ${EXTRA_ARGS} || exit 1
>>>>>>> let "i++"
>>>>>>> done
>>>>>>> }
>>>>>>
>>>>>> BTW., when trying to build the rseq self-tests I get this build failure:
>>>>>>
>>>>>> dagon:~/tip/tools/testing/selftests/rseq> make
>>>>>> gcc -O2 -Wall -g -I./ -I../../../../usr/include/ -L./ -Wl,-rpath=./ -shared
>>>>>> -fPIC rseq.c -lpthread -o
>>>>>> /home/mingo/tip/tools/testing/selftests/rseq/librseq.so
>>>>>> gcc -O2 -Wall -g -I./ -I../../../../usr/include/ -L./ -Wl,-rpath=./ basic_test.c
>>>>>> -lpthread -lrseq -o /home/mingo/tip/tools/testing/selftests/rseq/basic_test
>>>>>> gcc -O2 -Wall -g -I./ -I../../../../usr/include/ -L./ -Wl,-rpath=./
>>>>>> basic_percpu_ops_test.c -lpthread -lrseq -o
>>>>>> /home/mingo/tip/tools/testing/selftests/rseq/basic_percpu_ops_test
>>>>>> /usr/bin/ld: /tmp/ccuHTWnZ.o: in function `rseq_cmpeqv_storev':
>>>>>> /home/mingo/tip/tools/testing/selftests/rseq/./rseq-x86.h:84: undefined
>>>>>> reference to `.L8'
>>>>>> /usr/bin/ld: /home/mingo/tip/tools/testing/selftests/rseq/./rseq-x86.h:84:
>>>>>> undefined reference to `.L49'
>>>>>> /usr/bin/ld: /tmp/ccuHTWnZ.o: in function `rseq_cmpnev_storeoffp_load':
>>>>>> /home/mingo/tip/tools/testing/selftests/rseq/./rseq-x86.h:141: undefined
>>>>>> reference to `.L57'
>>>>>> /usr/bin/ld: /tmp/ccuHTWnZ.o:(__rseq_failure+0x8): undefined reference to `.L8'
>>>>>> /usr/bin/ld: /tmp/ccuHTWnZ.o:(__rseq_failure+0x14): undefined reference to
>>>>>> `.L49'
>>>>>> /usr/bin/ld: /tmp/ccuHTWnZ.o:(__rseq_failure+0x20): undefined reference to
>>>>>> `.L55'
>>>>>> collect2: error: ld returned 1 exit status
>>>>>> make: *** [Makefile:22:
>>>>>> /home/mingo/tip/tools/testing/selftests/rseq/basic_percpu_ops_test] Error 1
>>>>>>
>>>>>> Is this a known problem, or do I miss something from my build environment
>>>>>> perhaps? Vanilla 64-bit Ubuntu 18.10 (Cosmic).
>>>>>
>>>>> It works fine with gcc-7 (gcc version 7.3.0 (Ubuntu 7.3.0-16ubuntu3))
>>>>> but indeed I get the same failure with gcc-8 (gcc version 8.0.1 20180414
>>>>> (experimental) [trunk revision 259383] (Ubuntu 8-20180414-1ubuntu2)).
>>>>>
>>>>> Thanks for reporting! I will investigate.
>>>>
>>>> It looks like gcc-8 optimize away the target of asm goto labels when
>>>> there are more than one of them on x86-64. I'll try to come up with
>>>> a simpler reproducer.
>>>
>>> It appears to be related to gcc-8 mishandling combination of
>>> asm goto and thread-local storage input operands on x86-64.
>>> Here is a simple reproducer:
>>>
>>> __thread int var;
>>>
>>> static int fct(void)
>>> {
>>> asm goto ( "jmp %l[testlabel]\n\t"
>>> : : [var] "m" (var) : : testlabel);
>>> return 0;
>>> testlabel:
>>
>> FWIW, if I add an empty
>>
>> asm volatile ("");
>>
>> here after the label, gcc-8 -O2 builds "something" which is
>> a bogus assembler (an endless loop) :
>>
>> main:
>> .LFB24:
>> .cfi_startproc
>> .L2:
>> subq $8, %rsp
>> .cfi_def_cfa_offset 16
>> #APP
>> # 6 "test-asm-goto.c" 1
>> jmp .L2
>>
>> # 0 "" 2
>> #NO_APP
>> movl %fs:var@tpoff, %edx
>> leaq .LC0(%rip), %rsi
>> movl $1, %edi
>> xorl %eax, %eax
>> call __printf_chk@PLT
>> xorl %eax, %eax
>> addq $8, %rsp
>> .cfi_def_cfa_offset 8
>> ret
>> .cfi_endproc
>>
>> Thoughts ?
>>
>
> Didn't see problems when I tested it before applying it to
> linux-kselftest next.
>
> I have gcc version 7.3.0 (Ubuntu 7.3.0-27ubuntu1~18.04)
It really appears to be an optimization bug in gcc-8. Considering that
bogus compilers are released in the wild, we can hardly justify using
the compiler feature that triggers the bogus behavior, even if it gets
fixed in the future.
I've prepared a patch that changes the way the __rseq_abi fields are
passed to the inline asm. I pass the address of the __rseq_abi TLS
as a register input operand rather than each individual field as "m"
operand.
I will submit it in a separate thread.
By the way, it affects both x86-32 (building with gcc-8 -m32) and x86-64.
Thanks,
Mathieu
--
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com
^ permalink raw reply
* [PATCH] rseq/selftests: x86: Work-around bogus gcc-8 optimisation
From: Mathieu Desnoyers @ 2019-04-19 14:53 UTC (permalink / raw)
To: Ingo Molnar
Cc: linux-kernel, Mathieu Desnoyers, Peter Zijlstra, Thomas Gleixner,
Joel Fernandes, Catalin Marinas, Dave Watson, Will Deacon,
Shuah Khan, Andi Kleen, linux-kselftest, H . Peter Anvin,
Chris Lameter, Russell King, Michael Kerrisk, Paul E . McKenney,
Paul Turner, Boqun Feng, Josh Triplett, Steven Rostedt
At least the following versions of gcc-8:
- gcc version 8.0.1 20180414 (experimental) [trunk revision 259383] (Ubuntu 8-20180414-1ubuntu2)
- gcc 8.2.0-7ubuntu1 (Ubuntu 18.10 (Cosmic)),
generate broken assembler with asm goto that have a thread-local storage
"m" input operand on both x86-32 and x86-64. For instance:
__thread int var;
static int fct(void)
{
asm goto ( "jmp %l[testlabel]\n\t"
: : [var] "m" (var) : : testlabel);
return 0;
testlabel:
return 1;
}
int main()
{
return fct();
}
% gcc-8 -O2 -o test-asm-goto test-asm-goto.c
/tmp/ccAdHJbe.o: In function `main':
test-asm-goto.c:(.text.startup+0x1): undefined reference to `.L2'
collect2: error: ld returned 1 exit status
% gcc-8 -m32 -O2 -o test-asm-goto test-asm-goto.c
/tmp/ccREsVXA.o: In function `main':
test-asm-goto.c:(.text.startup+0x1): undefined reference to `.L2'
collect2: error: ld returned 1 exit status
Work-around this compiler bug in the rseq-x86.h header by passing the
address of the __rseq_abi TLS as a register operand rather than using
the "m" input operand.
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
CC: Ingo Molnar <mingo@redhat.com>
CC: Peter Zijlstra <peterz@infradead.org>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Joel Fernandes <joelaf@google.com>
CC: Catalin Marinas <catalin.marinas@arm.com>
CC: Dave Watson <davejwatson@fb.com>
CC: Will Deacon <will.deacon@arm.com>
CC: Shuah Khan <shuah@kernel.org>
CC: Andi Kleen <andi@firstfloor.org>
CC: linux-kselftest@vger.kernel.org
CC: "H . Peter Anvin" <hpa@zytor.com>
CC: Chris Lameter <cl@linux.com>
CC: Russell King <linux@arm.linux.org.uk>
CC: Michael Kerrisk <mtk.manpages@gmail.com>
CC: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com>
CC: Paul Turner <pjt@google.com>
CC: Boqun Feng <boqun.feng@gmail.com>
CC: Josh Triplett <josh@joshtriplett.org>
CC: Steven Rostedt <rostedt@goodmis.org>
CC: Ben Maurer <bmaurer@fb.com>
CC: linux-api@vger.kernel.org
CC: Andy Lutomirski <luto@amacapital.net>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: Linus Torvalds <torvalds@linux-foundation.org>
---
tools/testing/selftests/rseq/rseq-x86.h | 144 ++++++++++++------------
1 file changed, 70 insertions(+), 74 deletions(-)
diff --git a/tools/testing/selftests/rseq/rseq-x86.h b/tools/testing/selftests/rseq/rseq-x86.h
index 1780faf30f28..b2da6004fe30 100644
--- a/tools/testing/selftests/rseq/rseq-x86.h
+++ b/tools/testing/selftests/rseq/rseq-x86.h
@@ -16,6 +16,16 @@
*/
#define RSEQ_SIG 0x53053053
+/*
+ * Due to a compiler optimization bug in gcc-8 with asm goto and TLS asm input
+ * operands, we cannot use "m" input operands, and rather pass the __rseq_abi
+ * address through a "r" input operand.
+ */
+
+/* Offset of cpu_id and rseq_cs fields in struct rseq. */
+#define RSEQ_CPU_ID_OFFSET 4
+#define RSEQ_CS_OFFSET 8
+
#ifdef __x86_64__
#define rseq_smp_mb() \
@@ -75,12 +85,12 @@ do { \
#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
RSEQ_INJECT_ASM(1) \
"leaq " __rseq_str(cs_label) "(%%rip), %%rax\n\t" \
- "movq %%rax, %[" __rseq_str(rseq_cs) "]\n\t" \
+ "movq %%rax, " __rseq_str(rseq_cs) "\n\t" \
__rseq_str(label) ":\n\t"
#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \
RSEQ_INJECT_ASM(2) \
- "cmpl %[" __rseq_str(cpu_id) "], %[" __rseq_str(current_cpu_id) "]\n\t" \
+ "cmpl %[" __rseq_str(cpu_id) "], " __rseq_str(current_cpu_id) "\n\t" \
"jnz " __rseq_str(label) "\n\t"
#define RSEQ_ASM_DEFINE_ABORT(label, teardown, abort_label) \
@@ -113,14 +123,14 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
RSEQ_INJECT_ASM(3)
"cmpq %[v], %[expect]\n\t"
"jnz %l[cmpfail]\n\t"
RSEQ_INJECT_ASM(4)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
"cmpq %[v], %[expect]\n\t"
"jnz %l[error2]\n\t"
#endif
@@ -131,8 +141,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [rseq_abi] "r" (&__rseq_abi),
[v] "m" (*v),
[expect] "r" (expect),
[newv] "r" (newv)
@@ -175,15 +184,15 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
RSEQ_INJECT_ASM(3)
"movq %[v], %%rbx\n\t"
"cmpq %%rbx, %[expectnot]\n\t"
"je %l[cmpfail]\n\t"
RSEQ_INJECT_ASM(4)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
"movq %[v], %%rbx\n\t"
"cmpq %%rbx, %[expectnot]\n\t"
"je %l[error2]\n\t"
@@ -198,8 +207,7 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [rseq_abi] "r" (&__rseq_abi),
/* final store input */
[v] "m" (*v),
[expectnot] "r" (expectnot),
@@ -237,11 +245,11 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
RSEQ_INJECT_ASM(3)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
#endif
/* final store */
"addq %[count], %[v]\n\t"
@@ -250,8 +258,7 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [rseq_abi] "r" (&__rseq_abi),
/* final store input */
[v] "m" (*v),
[count] "er" (count)
@@ -287,14 +294,14 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
RSEQ_INJECT_ASM(3)
"cmpq %[v], %[expect]\n\t"
"jnz %l[cmpfail]\n\t"
RSEQ_INJECT_ASM(4)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
"cmpq %[v], %[expect]\n\t"
"jnz %l[error2]\n\t"
#endif
@@ -308,8 +315,7 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [rseq_abi] "r" (&__rseq_abi),
/* try store input */
[v2] "m" (*v2),
[newv2] "r" (newv2),
@@ -363,8 +369,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3])
#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
RSEQ_INJECT_ASM(3)
"cmpq %[v], %[expect]\n\t"
"jnz %l[cmpfail]\n\t"
@@ -373,7 +379,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
"jnz %l[cmpfail]\n\t"
RSEQ_INJECT_ASM(5)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
"cmpq %[v], %[expect]\n\t"
"jnz %l[error2]\n\t"
"cmpq %[v2], %[expect2]\n\t"
@@ -386,8 +392,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [rseq_abi] "r" (&__rseq_abi),
/* cmp2 input */
[v2] "m" (*v2),
[expect2] "r" (expect2),
@@ -438,14 +443,14 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
"movq %[dst], %[rseq_scratch1]\n\t"
"movq %[len], %[rseq_scratch2]\n\t"
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
RSEQ_INJECT_ASM(3)
"cmpq %[v], %[expect]\n\t"
"jnz 5f\n\t"
RSEQ_INJECT_ASM(4)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 6f)
"cmpq %[v], %[expect]\n\t"
"jnz 7f\n\t"
#endif
@@ -493,8 +498,7 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
#endif
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [rseq_abi] "r" (&__rseq_abi),
/* final store input */
[v] "m" (*v),
[expect] "r" (expect),
@@ -602,12 +606,12 @@ do { \
#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
RSEQ_INJECT_ASM(1) \
- "movl $" __rseq_str(cs_label) ", %[rseq_cs]\n\t" \
+ "movl $" __rseq_str(cs_label) ", " __rseq_str(rseq_cs) "\n\t" \
__rseq_str(label) ":\n\t"
#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \
RSEQ_INJECT_ASM(2) \
- "cmpl %[" __rseq_str(cpu_id) "], %[" __rseq_str(current_cpu_id) "]\n\t" \
+ "cmpl %[" __rseq_str(cpu_id) "], " __rseq_str(current_cpu_id) "\n\t" \
"jnz " __rseq_str(label) "\n\t"
#define RSEQ_ASM_DEFINE_ABORT(label, teardown, abort_label) \
@@ -640,14 +644,14 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
RSEQ_INJECT_ASM(3)
"cmpl %[v], %[expect]\n\t"
"jnz %l[cmpfail]\n\t"
RSEQ_INJECT_ASM(4)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
"cmpl %[v], %[expect]\n\t"
"jnz %l[error2]\n\t"
#endif
@@ -658,8 +662,7 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [rseq_abi] "r" (&__rseq_abi),
[v] "m" (*v),
[expect] "r" (expect),
[newv] "r" (newv)
@@ -702,15 +705,15 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
RSEQ_INJECT_ASM(3)
"movl %[v], %%ebx\n\t"
"cmpl %%ebx, %[expectnot]\n\t"
"je %l[cmpfail]\n\t"
RSEQ_INJECT_ASM(4)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
"movl %[v], %%ebx\n\t"
"cmpl %%ebx, %[expectnot]\n\t"
"je %l[error2]\n\t"
@@ -725,8 +728,7 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [rseq_abi] "r" (&__rseq_abi),
/* final store input */
[v] "m" (*v),
[expectnot] "r" (expectnot),
@@ -764,11 +766,11 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
RSEQ_INJECT_ASM(3)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
#endif
/* final store */
"addl %[count], %[v]\n\t"
@@ -777,8 +779,7 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu)
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [rseq_abi] "r" (&__rseq_abi),
/* final store input */
[v] "m" (*v),
[count] "ir" (count)
@@ -814,14 +815,14 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
RSEQ_INJECT_ASM(3)
"cmpl %[v], %[expect]\n\t"
"jnz %l[cmpfail]\n\t"
RSEQ_INJECT_ASM(4)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
"cmpl %[v], %[expect]\n\t"
"jnz %l[error2]\n\t"
#endif
@@ -836,8 +837,7 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [rseq_abi] "r" (&__rseq_abi),
/* try store input */
[v2] "m" (*v2),
[newv2] "m" (newv2),
@@ -881,15 +881,15 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
RSEQ_INJECT_ASM(3)
"movl %[expect], %%eax\n\t"
"cmpl %[v], %%eax\n\t"
"jnz %l[cmpfail]\n\t"
RSEQ_INJECT_ASM(4)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
"movl %[expect], %%eax\n\t"
"cmpl %[v], %%eax\n\t"
"jnz %l[error2]\n\t"
@@ -905,8 +905,7 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [rseq_abi] "r" (&__rseq_abi),
/* try store input */
[v2] "m" (*v2),
[newv2] "r" (newv2),
@@ -952,8 +951,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3])
#endif
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
RSEQ_INJECT_ASM(3)
"cmpl %[v], %[expect]\n\t"
"jnz %l[cmpfail]\n\t"
@@ -962,7 +961,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
"jnz %l[cmpfail]\n\t"
RSEQ_INJECT_ASM(5)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
"cmpl %[v], %[expect]\n\t"
"jnz %l[error2]\n\t"
"cmpl %[expect2], %[v2]\n\t"
@@ -976,8 +975,7 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
RSEQ_ASM_DEFINE_ABORT(4, "", abort)
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [rseq_abi] "r" (&__rseq_abi),
/* cmp2 input */
[v2] "m" (*v2),
[expect2] "r" (expect2),
@@ -1029,15 +1027,15 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
"movl %[dst], %[rseq_scratch1]\n\t"
"movl %[len], %[rseq_scratch2]\n\t"
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
RSEQ_INJECT_ASM(3)
"movl %[expect], %%eax\n\t"
"cmpl %%eax, %[v]\n\t"
"jnz 5f\n\t"
RSEQ_INJECT_ASM(4)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 6f)
"movl %[expect], %%eax\n\t"
"cmpl %%eax, %[v]\n\t"
"jnz 7f\n\t"
@@ -1087,8 +1085,7 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
#endif
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [rseq_abi] "r" (&__rseq_abi),
/* final store input */
[v] "m" (*v),
[expect] "m" (expect),
@@ -1142,15 +1139,15 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
"movl %[dst], %[rseq_scratch1]\n\t"
"movl %[len], %[rseq_scratch2]\n\t"
/* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
RSEQ_INJECT_ASM(3)
"movl %[expect], %%eax\n\t"
"cmpl %%eax, %[v]\n\t"
"jnz 5f\n\t"
RSEQ_INJECT_ASM(4)
#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 6f)
"movl %[expect], %%eax\n\t"
"cmpl %%eax, %[v]\n\t"
"jnz 7f\n\t"
@@ -1201,8 +1198,7 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
#endif
: /* gcc asm goto does not allow outputs */
: [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [rseq_abi] "r" (&__rseq_abi),
/* final store input */
[v] "m" (*v),
[expect] "m" (expect),
--
2.17.1
^ permalink raw reply related
* Re: [PATCH v3 4/4] samples: show race-free pidfd metadata access
From: Oleg Nesterov @ 2019-04-19 15:30 UTC (permalink / raw)
To: Christian Brauner
Cc: torvalds, viro, jannh, dhowells, linux-api, linux-kernel, serge,
luto, arnd, ebiederm, keescook, tglx, mtk.manpages, akpm, cyphar,
joel, dancol, Jann Horn
In-Reply-To: <20190419120904.27502-5-christian@brauner.io>
On 04/19, Christian Brauner wrote:
>
> +int main(int argc, char *argv[])
> +{
> + int ret = EXIT_FAILURE;
> + char buf[4096] = { 0 };
> + pid_t pid;
> + int pidfd, procfd, statusfd;
I think you need to initialize pidfd = 0 in this version ;) Otherwise,
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
^ permalink raw reply
* Re: [PATCH v3 4/4] samples: show race-free pidfd metadata access
From: Christian Brauner @ 2019-04-19 16:38 UTC (permalink / raw)
To: Oleg Nesterov
Cc: torvalds, viro, jannh, dhowells, linux-api, linux-kernel, serge,
luto, arnd, ebiederm, keescook, tglx, mtk.manpages, akpm, cyphar,
joel, dancol, Jann Horn
In-Reply-To: <20190419153047.GA12228@redhat.com>
On Fri, Apr 19, 2019 at 05:30:48PM +0200, Oleg Nesterov wrote:
> On 04/19, Christian Brauner wrote:
> >
> > +int main(int argc, char *argv[])
> > +{
> > + int ret = EXIT_FAILURE;
> > + char buf[4096] = { 0 };
> > + pid_t pid;
> > + int pidfd, procfd, statusfd;
>
> I think you need to initialize pidfd = 0 in this version ;) Otherwise,
Yes, you are right. Seems I got saved by the compiler since I compile a
kernel and run a test even when i just change the commit message. :)
Will fix that up and add your Reviewed-by but I think I spare everyone
a v4 for this if that's ok.
>
> Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Thank you!
Christian
^ permalink raw reply
* Re: [PATCH for 5.1 3/3] rseq/selftests: Adapt number of threads to the number of detected cpus
From: shuah @ 2019-04-19 18:57 UTC (permalink / raw)
To: Mathieu Desnoyers, Ingo Molnar
Cc: Thomas Gleixner, linux-kernel, linux-api, Peter Zijlstra,
Paul E . McKenney, Boqun Feng, Andy Lutomirski, Dave Watson,
Paul Turner, Andrew Morton, Russell King, Ingo Molnar,
H. Peter Anvin, Andi Kleen, Chris Lameter, Ben Maurer, rostedt,
Josh Triplett, Linus Torvalds, Catalin Marinas, Will
In-Reply-To: <580328197.148.1555684824260.JavaMail.zimbra@efficios.com>
On 4/19/19 8:40 AM, Mathieu Desnoyers wrote:
> ----- On Apr 19, 2019, at 10:17 AM, shuah shuah@kernel.org wrote:
>
>> On 4/19/19 7:48 AM, Mathieu Desnoyers wrote:
>>> ----- On Apr 19, 2019, at 9:42 AM, Mathieu Desnoyers
>>> mathieu.desnoyers@efficios.com wrote:
>>>
>>>> ----- On Apr 19, 2019, at 8:55 AM, Mathieu Desnoyers
>>>> mathieu.desnoyers@efficios.com wrote:
>>>>
>>>>> ----- On Apr 19, 2019, at 8:41 AM, Mathieu Desnoyers
>>>>> mathieu.desnoyers@efficios.com wrote:
>>>>>
>>>>>> ----- On Apr 19, 2019, at 6:38 AM, Ingo Molnar mingo@kernel.org wrote:
>>>>>>
>>>>>>> * Mathieu Desnoyers <mathieu.desnoyers@efficios.com> wrote:
>>>>>>>
>>>>>>>> On smaller systems, running a test with 200 threads can take a long
>>>>>>>> time on machines with smaller number of CPUs.
>>>>>>>>
>>>>>>>> Detect the number of online cpus at test runtime, and multiply that
>>>>>>>> by 6 to have 6 rseq threads per cpu preempting each other.
>>>>>>>>
>>>>>>>> Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
>>>>>>>> Cc: Shuah Khan <shuah@kernel.org>
>>>>>>>> Cc: Thomas Gleixner <tglx@linutronix.de>
>>>>>>>> Cc: Joel Fernandes <joelaf@google.com>
>>>>>>>> Cc: Peter Zijlstra <peterz@infradead.org>
>>>>>>>> Cc: Catalin Marinas <catalin.marinas@arm.com>
>>>>>>>> Cc: Dave Watson <davejwatson@fb.com>
>>>>>>>> Cc: Will Deacon <will.deacon@arm.com>
>>>>>>>> Cc: Andi Kleen <andi@firstfloor.org>
>>>>>>>> Cc: linux-kselftest@vger.kernel.org
>>>>>>>> Cc: "H . Peter Anvin" <hpa@zytor.com>
>>>>>>>> Cc: Chris Lameter <cl@linux.com>
>>>>>>>> Cc: Russell King <linux@arm.linux.org.uk>
>>>>>>>> Cc: Michael Kerrisk <mtk.manpages@gmail.com>
>>>>>>>> Cc: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com>
>>>>>>>> Cc: Paul Turner <pjt@google.com>
>>>>>>>> Cc: Boqun Feng <boqun.feng@gmail.com>
>>>>>>>> Cc: Josh Triplett <josh@joshtriplett.org>
>>>>>>>> Cc: Steven Rostedt <rostedt@goodmis.org>
>>>>>>>> Cc: Ben Maurer <bmaurer@fb.com>
>>>>>>>> Cc: Andy Lutomirski <luto@amacapital.net>
>>>>>>>> Cc: Andrew Morton <akpm@linux-foundation.org>
>>>>>>>> Cc: Linus Torvalds <torvalds@linux-foundation.org>
>>>>>>>> ---
>>>>>>>> tools/testing/selftests/rseq/run_param_test.sh | 7 +++++--
>>>>>>>> 1 file changed, 5 insertions(+), 2 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/tools/testing/selftests/rseq/run_param_test.sh
>>>>>>>> b/tools/testing/selftests/rseq/run_param_test.sh
>>>>>>>> index 3acd6d75ff9f..e426304fd4a0 100755
>>>>>>>> --- a/tools/testing/selftests/rseq/run_param_test.sh
>>>>>>>> +++ b/tools/testing/selftests/rseq/run_param_test.sh
>>>>>>>> @@ -1,6 +1,8 @@
>>>>>>>> #!/bin/bash
>>>>>>>> # SPDX-License-Identifier: GPL-2.0+ or MIT
>>>>>>>>
>>>>>>>> +NR_CPUS=`grep '^processor' /proc/cpuinfo | wc -l`
>>>>>>>> +
>>>>>>>> EXTRA_ARGS=${@}
>>>>>>>>
>>>>>>>> OLDIFS="$IFS"
>>>>>>>> @@ -28,15 +30,16 @@ IFS="$OLDIFS"
>>>>>>>>
>>>>>>>> REPS=1000
>>>>>>>> SLOW_REPS=100
>>>>>>>> +NR_THREADS=$((6*${NR_CPUS}))
>>>>>>>>
>>>>>>>> function do_tests()
>>>>>>>> {
>>>>>>>> local i=0
>>>>>>>> while [ "$i" -lt "${#TEST_LIST[@]}" ]; do
>>>>>>>> echo "Running test ${TEST_NAME[$i]}"
>>>>>>>> - ./param_test ${TEST_LIST[$i]} -r ${REPS} ${@} ${EXTRA_ARGS} || exit 1
>>>>>>>> + ./param_test ${TEST_LIST[$i]} -r ${REPS} -t ${NR_THREADS} ${@} ${EXTRA_ARGS}
>>>>>>>> || exit 1
>>>>>>>> echo "Running compare-twice test ${TEST_NAME[$i]}"
>>>>>>>> - ./param_test_compare_twice ${TEST_LIST[$i]} -r ${REPS} ${@} ${EXTRA_ARGS} ||
>>>>>>>> exit 1
>>>>>>>> + ./param_test_compare_twice ${TEST_LIST[$i]} -r ${REPS} -t ${NR_THREADS} ${@}
>>>>>>>> ${EXTRA_ARGS} || exit 1
>>>>>>>> let "i++"
>>>>>>>> done
>>>>>>>> }
>>>>>>>
>>>>>>> BTW., when trying to build the rseq self-tests I get this build failure:
>>>>>>>
>>>>>>> dagon:~/tip/tools/testing/selftests/rseq> make
>>>>>>> gcc -O2 -Wall -g -I./ -I../../../../usr/include/ -L./ -Wl,-rpath=./ -shared
>>>>>>> -fPIC rseq.c -lpthread -o
>>>>>>> /home/mingo/tip/tools/testing/selftests/rseq/librseq.so
>>>>>>> gcc -O2 -Wall -g -I./ -I../../../../usr/include/ -L./ -Wl,-rpath=./ basic_test.c
>>>>>>> -lpthread -lrseq -o /home/mingo/tip/tools/testing/selftests/rseq/basic_test
>>>>>>> gcc -O2 -Wall -g -I./ -I../../../../usr/include/ -L./ -Wl,-rpath=./
>>>>>>> basic_percpu_ops_test.c -lpthread -lrseq -o
>>>>>>> /home/mingo/tip/tools/testing/selftests/rseq/basic_percpu_ops_test
>>>>>>> /usr/bin/ld: /tmp/ccuHTWnZ.o: in function `rseq_cmpeqv_storev':
>>>>>>> /home/mingo/tip/tools/testing/selftests/rseq/./rseq-x86.h:84: undefined
>>>>>>> reference to `.L8'
>>>>>>> /usr/bin/ld: /home/mingo/tip/tools/testing/selftests/rseq/./rseq-x86.h:84:
>>>>>>> undefined reference to `.L49'
>>>>>>> /usr/bin/ld: /tmp/ccuHTWnZ.o: in function `rseq_cmpnev_storeoffp_load':
>>>>>>> /home/mingo/tip/tools/testing/selftests/rseq/./rseq-x86.h:141: undefined
>>>>>>> reference to `.L57'
>>>>>>> /usr/bin/ld: /tmp/ccuHTWnZ.o:(__rseq_failure+0x8): undefined reference to `.L8'
>>>>>>> /usr/bin/ld: /tmp/ccuHTWnZ.o:(__rseq_failure+0x14): undefined reference to
>>>>>>> `.L49'
>>>>>>> /usr/bin/ld: /tmp/ccuHTWnZ.o:(__rseq_failure+0x20): undefined reference to
>>>>>>> `.L55'
>>>>>>> collect2: error: ld returned 1 exit status
>>>>>>> make: *** [Makefile:22:
>>>>>>> /home/mingo/tip/tools/testing/selftests/rseq/basic_percpu_ops_test] Error 1
>>>>>>>
>>>>>>> Is this a known problem, or do I miss something from my build environment
>>>>>>> perhaps? Vanilla 64-bit Ubuntu 18.10 (Cosmic).
>>>>>>
>>>>>> It works fine with gcc-7 (gcc version 7.3.0 (Ubuntu 7.3.0-16ubuntu3))
>>>>>> but indeed I get the same failure with gcc-8 (gcc version 8.0.1 20180414
>>>>>> (experimental) [trunk revision 259383] (Ubuntu 8-20180414-1ubuntu2)).
>>>>>>
>>>>>> Thanks for reporting! I will investigate.
>>>>>
>>>>> It looks like gcc-8 optimize away the target of asm goto labels when
>>>>> there are more than one of them on x86-64. I'll try to come up with
>>>>> a simpler reproducer.
>>>>
>>>> It appears to be related to gcc-8 mishandling combination of
>>>> asm goto and thread-local storage input operands on x86-64.
>>>> Here is a simple reproducer:
>>>>
>>>> __thread int var;
>>>>
>>>> static int fct(void)
>>>> {
>>>> asm goto ( "jmp %l[testlabel]\n\t"
>>>> : : [var] "m" (var) : : testlabel);
>>>> return 0;
>>>> testlabel:
>>>
>>> FWIW, if I add an empty
>>>
>>> asm volatile ("");
>>>
>>> here after the label, gcc-8 -O2 builds "something" which is
>>> a bogus assembler (an endless loop) :
>>>
>>> main:
>>> .LFB24:
>>> .cfi_startproc
>>> .L2:
>>> subq $8, %rsp
>>> .cfi_def_cfa_offset 16
>>> #APP
>>> # 6 "test-asm-goto.c" 1
>>> jmp .L2
>>>
>>> # 0 "" 2
>>> #NO_APP
>>> movl %fs:var@tpoff, %edx
>>> leaq .LC0(%rip), %rsi
>>> movl $1, %edi
>>> xorl %eax, %eax
>>> call __printf_chk@PLT
>>> xorl %eax, %eax
>>> addq $8, %rsp
>>> .cfi_def_cfa_offset 8
>>> ret
>>> .cfi_endproc
>>>
>>> Thoughts ?
>>>
>>
>> Didn't see problems when I tested it before applying it to
>> linux-kselftest next.
>>
>> I have gcc version 7.3.0 (Ubuntu 7.3.0-27ubuntu1~18.04)
>
> It really appears to be an optimization bug in gcc-8. Considering that
> bogus compilers are released in the wild, we can hardly justify using
> the compiler feature that triggers the bogus behavior, even if it gets
> fixed in the future.
>
> I've prepared a patch that changes the way the __rseq_abi fields are
> passed to the inline asm. I pass the address of the __rseq_abi TLS
> as a register input operand rather than each individual field as "m"
> operand.
>
> I will submit it in a separate thread.
>
> By the way, it affects both x86-32 (building with gcc-8 -m32) and x86-64.
>
Should I drop this patch that is currently in linux-kseltest next? Just
confirming if your new patch is supposed to be applied on top of this
one or not?
thanks,
-- Shuah
^ permalink raw reply
* Re: [PATCH v3 0/4] clone: add CLONE_PIDFD
From: Christian Brauner @ 2019-04-19 20:05 UTC (permalink / raw)
To: torvalds, viro, jannh, dhowells, oleg, linux-api, linux-kernel
Cc: serge, luto, arnd, ebiederm, keescook, tglx, mtk.manpages, akpm,
cyphar, joel, dancol, Jann Horn
In-Reply-To: <20190419120904.27502-1-christian@brauner.io>
On Fri, Apr 19, 2019 at 02:09:00PM +0200, Christian Brauner wrote:
> Hey,
>
> /* v3 summary */
> After a brief discussion we decided to block CLONE_PIDFD with
> CLONE_THREAD for now. Not because it is not possible but because we
> don't have a use-case yet and blocking it makes the initial work for
> pidfd polling easier. However, it is possible to simply flick the
> switch later.
I have moved v3 into my for-next branch which means it should show up in
linux-next for some testing soon. All versions of the CLONE_PIDFD
patches are on top of v5.1-rc4.
Thanks!
Christian
^ permalink raw reply
* Re: [PATCH for 5.1 3/3] rseq/selftests: Adapt number of threads to the number of detected cpus
From: Mathieu Desnoyers @ 2019-04-19 20:59 UTC (permalink / raw)
To: shuah
Cc: Ingo Molnar, Thomas Gleixner, linux-kernel, linux-api,
Peter Zijlstra, Paul E . McKenney, Boqun Feng, Andy Lutomirski,
Dave Watson, Paul Turner, Andrew Morton, Russell King,
Ingo Molnar, H. Peter Anvin, Andi Kleen, Chris Lameter,
Ben Maurer, rostedt, Josh Triplett, Linus Torvalds,
Catalin Marinas <cat>
In-Reply-To: <cfcbace7-363d-e45e-11da-8efb200783b7@kernel.org>
----- On Apr 19, 2019, at 2:57 PM, shuah shuah@kernel.org wrote:
[...]
> Should I drop this patch that is currently in linux-kseltest next? Just
> confirming if your new patch is supposed to be applied on top of this
> one or not?
We should keep this patch in linux-kselftest next. The fix applies on top
of it.
Thanks,
Mathieu
--
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com
^ permalink raw reply
* Re: [PATCH for 5.1 3/3] rseq/selftests: Adapt number of threads to the number of detected cpus
From: shuah @ 2019-04-19 21:03 UTC (permalink / raw)
To: Mathieu Desnoyers
Cc: Ingo Molnar, Thomas Gleixner, linux-kernel, linux-api,
Peter Zijlstra, Paul E . McKenney, Boqun Feng, Andy Lutomirski,
Dave Watson, Paul Turner, Andrew Morton, Russell King,
Ingo Molnar, H. Peter Anvin, Andi Kleen, Chris Lameter,
Ben Maurer, rostedt, Josh Triplett, Linus Torvalds,
Catalin Marinas <cat>
In-Reply-To: <1136505826.203.1555707541777.JavaMail.zimbra@efficios.com>
On 4/19/19 2:59 PM, Mathieu Desnoyers wrote:
> ----- On Apr 19, 2019, at 2:57 PM, shuah shuah@kernel.org wrote:
> [...]
>> Should I drop this patch that is currently in linux-kseltest next? Just
>> confirming if your new patch is supposed to be applied on top of this
>> one or not?
>
> We should keep this patch in linux-kselftest next. The fix applies on top
> of it.
>
Will do. Thanks for a quick response.
-- Shuah
^ permalink raw reply
* Re: RFC: on adding new CLONE_* flags [WAS Re: [PATCH 0/4] clone: add CLONE_PIDFD]
From: Kevin Easton @ 2019-04-20 7:14 UTC (permalink / raw)
To: Andy Lutomirski
Cc: Aleksa Sarai, Enrico Weigelt, metux IT consult, Christian Brauner,
Linus Torvalds, Al Viro, Jann Horn, David Howells, Linux API,
LKML, Serge E. Hallyn, Arnd Bergmann, Eric W. Biederman,
Kees Cook, Thomas Gleixner, Michael Kerrisk, Andrew Morton,
Oleg Nesterov, Joel Fernandes, Daniel Colascione
In-Reply-To: <CALCETrWxMnaPvwicqkMLswMynWvJVteazD-bFv3ZnBKWp-1joQ@mail.gmail.com>
On Mon, Apr 15, 2019 at 01:29:23PM -0700, Andy Lutomirski wrote:
> On Mon, Apr 15, 2019 at 12:59 PM Aleksa Sarai <cyphar@cyphar.com> wrote:
> >
> > On 2019-04-15, Enrico Weigelt, metux IT consult <lkml@metux.net> wrote:
> > > > This patchset makes it possible to retrieve pid file descriptors at
> > > > process creation time by introducing the new flag CLONE_PIDFD to the
> > > > clone() system call as previously discussed.
> > >
> > > Sorry, for highjacking this thread, but I'm curious on what things to
> > > consider when introducing new CLONE_* flags.
> > >
> > > The reason I'm asking is:
> > >
> > > I'm working on implementing plan9-like fs namespaces, where unprivileged
> > > processes can change their own namespace at will. For that, certain
> > > traditional unix'ish things have to be disabled, most notably suid.
> > > As forbidding suid can be helpful in other scenarios, too, I thought
> > > about making this its own feature. Doing that switch on clone() seems
> > > a nice place for that, IMHO.
> >
> > Just spit-balling -- is no_new_privs not sufficient for this usecase?
> > Not granting privileges such as setuid during execve(2) is the main
> > point of that flag.
> >
>
> I would personally *love* it if distros started setting no_new_privs
> for basically all processes. And pidfd actually gets us part of the
> way toward a straightforward way to make sudo and su still work in a
> no_new_privs world: su could call into a daemon that would spawn the
> privileged task, and su would get a (read-only!) pidfd back and then
> wait for the fd and exit. I suppose that, done naively, this might
> cause some odd effects with respect to tty handling, but I bet it's
> solveable. I suppose it would be nifty if there were a way for a
> process, by mutual agreement, to reparent itself to an unrelated
> process.
>
> Anyway, clone(2) is an enormous mess. Surely the right solution here
> is to have a whole new process creation API that takes a big,
> extensible struct as an argument, and supports *at least* the full
> abilities of posix_spawn() and ideally covers all the use cases for
> fork() + do stuff + exec(). It would be nifty if this API also had a
> way to say "add no_new_privs and therefore enable extra functionality
> that doesn't work without no_new_privs". This functionality would
> include things like returning a future extra-privileged pidfd that
> gives ptrace-like access.
>
> As basic examples, the improved process creation API should take a
> list of dup2() operations to perform, fds to remove the O_CLOEXEC flag
> from, fds to close (or, maybe even better, a list of fds to *not*
> close), a list of rlimit changes to make, a list of signal changes to
> make, the ability to set sid, pgrp, uid, gid (as in
> setresuid/setresgid), the ability to do capset() operations, etc. The
> posix_spawn() API, for all that it's rather complicated, covers a
> bunch of the basics pretty well.
The idea of a system call that takes an infinitely-extendable laundry
list of operations to perform in kernel space seems quite inelegant, if
only for the error-reporting reason.
Instead, I suggest that what you'd want is a way to create a new
embryonic process that has no address space and isn't yet schedulable.
You then just need other-process-directed variants of all the normal
setup functions - so pr_openat(pidfd, dirfd, pathname, flags, mode),
pr_sigaction(pidfd, signum, act, oldact), pr_dup2(pidfd, oldfd, newfd)
etc.
Then when it's all set up you pr_execve() to kick it off.
- Kevin
^ permalink raw reply
* Re: RFC: on adding new CLONE_* flags [WAS Re: [PATCH 0/4] clone: add CLONE_PIDFD]
From: Christian Brauner @ 2019-04-20 11:15 UTC (permalink / raw)
To: Kevin Easton, Andy Lutomirski
Cc: Aleksa Sarai, Enrico Weigelt, metux IT consult, Linus Torvalds,
Al Viro, Jann Horn, David Howells, Linux API, LKML,
Serge E. Hallyn, Arnd Bergmann, Eric W. Biederman, Kees Cook,
Thomas Gleixner, Michael Kerrisk, Andrew Morton, Oleg Nesterov,
Joel Fernandes, Daniel Colascione
In-Reply-To: <20190420071406.GA22257@ip-172-31-15-78>
On April 20, 2019 9:14:06 AM GMT+02:00, Kevin Easton <kevin@guarana.org> wrote:
>On Mon, Apr 15, 2019 at 01:29:23PM -0700, Andy Lutomirski wrote:
>> On Mon, Apr 15, 2019 at 12:59 PM Aleksa Sarai <cyphar@cyphar.com>
>wrote:
>> >
>> > On 2019-04-15, Enrico Weigelt, metux IT consult <lkml@metux.net>
>wrote:
>> > > > This patchset makes it possible to retrieve pid file
>descriptors at
>> > > > process creation time by introducing the new flag CLONE_PIDFD
>to the
>> > > > clone() system call as previously discussed.
>> > >
>> > > Sorry, for highjacking this thread, but I'm curious on what
>things to
>> > > consider when introducing new CLONE_* flags.
>> > >
>> > > The reason I'm asking is:
>> > >
>> > > I'm working on implementing plan9-like fs namespaces, where
>unprivileged
>> > > processes can change their own namespace at will. For that,
>certain
>> > > traditional unix'ish things have to be disabled, most notably
>suid.
>> > > As forbidding suid can be helpful in other scenarios, too, I
>thought
>> > > about making this its own feature. Doing that switch on clone()
>seems
>> > > a nice place for that, IMHO.
>> >
>> > Just spit-balling -- is no_new_privs not sufficient for this
>usecase?
>> > Not granting privileges such as setuid during execve(2) is the main
>> > point of that flag.
>> >
>>
>> I would personally *love* it if distros started setting no_new_privs
>> for basically all processes. And pidfd actually gets us part of the
>> way toward a straightforward way to make sudo and su still work in a
>> no_new_privs world: su could call into a daemon that would spawn the
>> privileged task, and su would get a (read-only!) pidfd back and then
>> wait for the fd and exit. I suppose that, done naively, this might
>> cause some odd effects with respect to tty handling, but I bet it's
>> solveable. I suppose it would be nifty if there were a way for a
>> process, by mutual agreement, to reparent itself to an unrelated
>> process.
>>
>> Anyway, clone(2) is an enormous mess. Surely the right solution here
>> is to have a whole new process creation API that takes a big,
>> extensible struct as an argument, and supports *at least* the full
>> abilities of posix_spawn() and ideally covers all the use cases for
>> fork() + do stuff + exec(). It would be nifty if this API also had a
>> way to say "add no_new_privs and therefore enable extra functionality
>> that doesn't work without no_new_privs". This functionality would
>> include things like returning a future extra-privileged pidfd that
>> gives ptrace-like access.
>>
>> As basic examples, the improved process creation API should take a
>> list of dup2() operations to perform, fds to remove the O_CLOEXEC
>flag
>> from, fds to close (or, maybe even better, a list of fds to *not*
>> close), a list of rlimit changes to make, a list of signal changes to
>> make, the ability to set sid, pgrp, uid, gid (as in
>> setresuid/setresgid), the ability to do capset() operations, etc.
>The
>> posix_spawn() API, for all that it's rather complicated, covers a
>> bunch of the basics pretty well.
>
>The idea of a system call that takes an infinitely-extendable laundry
>list of operations to perform in kernel space seems quite inelegant, if
>only for the error-reporting reason.
>
>Instead, I suggest that what you'd want is a way to create a new
>embryonic process that has no address space and isn't yet schedulable.
>You then just need other-process-directed variants of all the normal
>setup functions - so pr_openat(pidfd, dirfd, pathname, flags, mode),
>pr_sigaction(pidfd, signum, act, oldact), pr_dup2(pidfd, oldfd, newfd)
>etc.
>
>Then when it's all set up you pr_execve() to kick it off.
>
> - Kevin
I proposed a version of this a while back when we first started talking about this.
^ permalink raw reply
* Re: RFC: on adding new CLONE_* flags [WAS Re: [PATCH 0/4] clone: add CLONE_PIDFD]
From: Daniel Colascione @ 2019-04-20 15:06 UTC (permalink / raw)
To: Kevin Easton
Cc: Andy Lutomirski, Aleksa Sarai, Enrico Weigelt, metux IT consult,
Christian Brauner, Linus Torvalds, Al Viro, Jann Horn,
David Howells, Linux API, LKML, Serge E. Hallyn, Arnd Bergmann,
Eric W. Biederman, Kees Cook, Thomas Gleixner, Michael Kerrisk,
Andrew Morton, Oleg Nesterov, Joel Fernandes
In-Reply-To: <20190420071406.GA22257@ip-172-31-15-78>
On Sat, Apr 20, 2019 at 12:14 AM Kevin Easton <kevin@guarana.org> wrote:
> On Mon, Apr 15, 2019 at 01:29:23PM -0700, Andy Lutomirski wrote:
> > On Mon, Apr 15, 2019 at 12:59 PM Aleksa Sarai <cyphar@cyphar.com> wrote:
> > >
> > > On 2019-04-15, Enrico Weigelt, metux IT consult <lkml@metux.net> wrote:
> > > > > This patchset makes it possible to retrieve pid file descriptors at
> > > > > process creation time by introducing the new flag CLONE_PIDFD to the
> > > > > clone() system call as previously discussed.
> > > >
> > > > Sorry, for highjacking this thread, but I'm curious on what things to
> > > > consider when introducing new CLONE_* flags.
> > > >
> > > > The reason I'm asking is:
> > > >
> > > > I'm working on implementing plan9-like fs namespaces, where unprivileged
> > > > processes can change their own namespace at will. For that, certain
> > > > traditional unix'ish things have to be disabled, most notably suid.
> > > > As forbidding suid can be helpful in other scenarios, too, I thought
> > > > about making this its own feature. Doing that switch on clone() seems
> > > > a nice place for that, IMHO.
> > >
> > > Just spit-balling -- is no_new_privs not sufficient for this usecase?
> > > Not granting privileges such as setuid during execve(2) is the main
> > > point of that flag.
> > >
> >
> > I would personally *love* it if distros started setting no_new_privs
> > for basically all processes. And pidfd actually gets us part of the
> > way toward a straightforward way to make sudo and su still work in a
> > no_new_privs world: su could call into a daemon that would spawn the
> > privileged task, and su would get a (read-only!) pidfd back and then
> > wait for the fd and exit. I suppose that, done naively, this might
> > cause some odd effects with respect to tty handling, but I bet it's
> > solveable. I suppose it would be nifty if there were a way for a
> > process, by mutual agreement, to reparent itself to an unrelated
> > process.
> >
> > Anyway, clone(2) is an enormous mess. Surely the right solution here
> > is to have a whole new process creation API that takes a big,
> > extensible struct as an argument, and supports *at least* the full
> > abilities of posix_spawn() and ideally covers all the use cases for
> > fork() + do stuff + exec(). It would be nifty if this API also had a
> > way to say "add no_new_privs and therefore enable extra functionality
> > that doesn't work without no_new_privs". This functionality would
> > include things like returning a future extra-privileged pidfd that
> > gives ptrace-like access.
> >
> > As basic examples, the improved process creation API should take a
> > list of dup2() operations to perform, fds to remove the O_CLOEXEC flag
> > from, fds to close (or, maybe even better, a list of fds to *not*
> > close), a list of rlimit changes to make, a list of signal changes to
> > make, the ability to set sid, pgrp, uid, gid (as in
> > setresuid/setresgid), the ability to do capset() operations, etc. The
> > posix_spawn() API, for all that it's rather complicated, covers a
> > bunch of the basics pretty well.
>
> The idea of a system call that takes an infinitely-extendable laundry
> list of operations to perform in kernel space seems quite inelegant, if
> only for the error-reporting reason.
>
> Instead, I suggest that what you'd want is a way to create a new
> embryonic process that has no address space and isn't yet schedulable.
> You then just need other-process-directed variants of all the normal
> setup functions - so pr_openat(pidfd, dirfd, pathname, flags, mode),
> pr_sigaction(pidfd, signum, act, oldact), pr_dup2(pidfd, oldfd, newfd)
> etc.
Providing process-directed versions of these functions would be useful
for a variety of management tasks anyway,
> Then when it's all set up you pr_execve() to kick it off.
Yes. That's the right general approach.
^ permalink raw reply
* Re: RFC: on adding new CLONE_* flags [WAS Re: [PATCH 0/4] clone: add CLONE_PIDFD]
From: Al Viro @ 2019-04-20 15:28 UTC (permalink / raw)
To: Andy Lutomirski
Cc: Aleksa Sarai, Enrico Weigelt, metux IT consult, Christian Brauner,
Linus Torvalds, Jann Horn, David Howells, Linux API, LKML,
Serge E. Hallyn, Arnd Bergmann, Eric W. Biederman, Kees Cook,
Thomas Gleixner, Michael Kerrisk, Andrew Morton, Oleg Nesterov,
Joel Fernandes, Daniel Colascione
In-Reply-To: <CALCETrWxMnaPvwicqkMLswMynWvJVteazD-bFv3ZnBKWp-1joQ@mail.gmail.com>
On Mon, Apr 15, 2019 at 01:29:23PM -0700, Andy Lutomirski wrote:
> Anyway, clone(2) is an enormous mess. Surely the right solution here
> is to have a whole new process creation API that takes a big,
> extensible struct as an argument, and supports *at least* the full
> abilities of posix_spawn() and ideally covers all the use cases for
> fork() + do stuff + exec(). It would be nifty if this API also had a
> way to say "add no_new_privs and therefore enable extra functionality
> that doesn't work without no_new_privs". This functionality would
> include things like returning a future extra-privileged pidfd that
> gives ptrace-like access.
You had been two weeks too late with that, and a bit too obvious with the use
of "surely" too close to the beginning...
If it was _not_ a belated AFD posting, alt.tasteless is over -> that way...
^ permalink raw reply
* [PATCH v17 1/3] proc: add /proc/<pid>/arch_status
From: Aubrey Li @ 2019-04-21 18:35 UTC (permalink / raw)
To: tglx, mingo, peterz, hpa
Cc: ak, tim.c.chen, dave.hansen, arjan, adobriyan, akpm, aubrey.li,
linux-api, linux-kernel, Aubrey Li, Andy Lutomirski
The architecture specific information of the running processes
could be useful to the userland. Add /proc/<pid>/arch_status
interface support to examine process architecture specific
information externally.
Signed-off-by: Aubrey Li <aubrey.li@linux.intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Linux API <linux-api@vger.kernel.org>
---
arch/x86/Kconfig | 1 +
fs/proc/Kconfig | 10 ++++++++++
fs/proc/base.c | 23 +++++++++++++++++++++++
3 files changed, 34 insertions(+)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5ad92419be19..d5a9c5ddd453 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -208,6 +208,7 @@ config X86
select USER_STACKTRACE_SUPPORT
select VIRT_TO_BUS
select X86_FEATURE_NAMES if PROC_FS
+ select PROC_PID_ARCH_STATUS if PROC_FS
config INSTRUCTION_DECODER
def_bool y
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig
index 817c02b13b1d..101bf5054e81 100644
--- a/fs/proc/Kconfig
+++ b/fs/proc/Kconfig
@@ -97,3 +97,13 @@ config PROC_CHILDREN
Say Y if you are running any user-space software which takes benefit from
this interface. For example, rkt is such a piece of software.
+
+config PROC_PID_ARCH_STATUS
+ bool "Enable /proc/<pid>/arch_status file"
+ default n
+ help
+ Provides a way to examine process architecture specific information.
+ See <file:Documentation/filesystems/proc.txt> for more information.
+
+ Say Y if you are running any user-space software which wants to obtain
+ process architecture specific information from this interface.
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 6a803a0b75df..a890d9f12851 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -94,6 +94,7 @@
#include <linux/sched/debug.h>
#include <linux/sched/stat.h>
#include <linux/posix-timers.h>
+#include <linux/processor.h>
#include <trace/events/oom.h>
#include "internal.h"
#include "fd.h"
@@ -2957,6 +2958,22 @@ static int proc_stack_depth(struct seq_file *m, struct pid_namespace *ns,
}
#endif /* CONFIG_STACKLEAK_METRICS */
+/*
+ * Add support for task architecture specific output in /proc/pid/arch_status.
+ * task_arch_status() must be defined in asm/processor.h
+ */
+#ifdef CONFIG_PROC_PID_ARCH_STATUS
+# ifndef task_arch_status
+# define task_arch_status(m, task)
+# endif
+static int proc_pid_arch_status(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task)
+{
+ task_arch_status(m, task);
+ return 0;
+}
+#endif /* CONFIG_PROC_PID_ARCH_STATUS */
+
/*
* Thread groups
*/
@@ -3061,6 +3078,9 @@ static const struct pid_entry tgid_base_stuff[] = {
#ifdef CONFIG_STACKLEAK_METRICS
ONE("stack_depth", S_IRUGO, proc_stack_depth),
#endif
+#ifdef CONFIG_PROC_PID_ARCH_STATUS
+ ONE("arch_status", S_IRUGO, proc_pid_arch_status),
+#endif
};
static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
@@ -3449,6 +3469,9 @@ static const struct pid_entry tid_base_stuff[] = {
#ifdef CONFIG_LIVEPATCH
ONE("patch_state", S_IRUSR, proc_pid_patch_state),
#endif
+#ifdef CONFIG_PROC_PID_ARCH_STATUS
+ ONE("arch_status", S_IRUGO, proc_pid_arch_status),
+#endif
};
static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx)
--
2.17.1
^ permalink raw reply related
* [PATCH v17 2/3] /proc/pid/arch_status: Add AVX-512 usage elapsed time
From: Aubrey Li @ 2019-04-21 18:35 UTC (permalink / raw)
To: tglx, mingo, peterz, hpa
Cc: ak, tim.c.chen, dave.hansen, arjan, adobriyan, akpm, aubrey.li,
linux-api, linux-kernel, Aubrey Li, Andy Lutomirski
In-Reply-To: <20190421183529.9141-1-aubrey.li@linux.intel.com>
AVX-512 components use could cause core turbo frequency drop. So
it's useful to expose AVX-512 usage elapsed time as a heuristic hint
for the user space job scheduler to cluster the AVX-512 using tasks
together.
Tensorflow example:
$ while [ 1 ]; do cat /proc/tid/arch_status | grep AVX512; sleep 1; done
AVX512_elapsed_ms: 4
AVX512_elapsed_ms: 8
AVX512_elapsed_ms: 4
This means that 4 milliseconds have elapsed since the AVX512 usage
of tensorflow task was detected when the task was scheduled out.
Or:
$ cat /proc/tid/arch_status | grep AVX512
AVX512_elapsed_ms: -1
The number '-1' indicates that no AVX512 usage recorded before
thus the task unlikely has frequency drop issue.
User space tools may want to further check by:
$ perf stat --pid <pid> -e core_power.lvl2_turbo_license -- sleep 1
Performance counter stats for process id '3558':
3,251,565,961 core_power.lvl2_turbo_license
1.004031387 seconds time elapsed
Non-zero counter value confirms that the task causes frequency drop.
Signed-off-by: Aubrey Li <aubrey.li@linux.intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Linux API <linux-api@vger.kernel.org>
---
arch/x86/include/asm/processor.h | 6 +++++
arch/x86/kernel/fpu/xstate.c | 43 ++++++++++++++++++++++++++++++++
2 files changed, 49 insertions(+)
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 2bb3a648fc12..0728848473a2 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -991,4 +991,10 @@ enum l1tf_mitigations {
extern enum l1tf_mitigations l1tf_mitigation;
+#ifdef CONFIG_PROC_PID_ARCH_STATUS
+/* Add support for task architecture specific output in /proc/pid/arch_status */
+void task_arch_status(struct seq_file *m, struct task_struct *task);
+#define task_arch_status task_arch_status
+#endif /* CONFIG_PROC_PID_ARCH_STATUS */
+
#endif /* _ASM_X86_PROCESSOR_H */
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index d7432c2b1051..a0dda11ab72e 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -7,6 +7,7 @@
#include <linux/cpu.h>
#include <linux/mman.h>
#include <linux/pkeys.h>
+#include <linux/seq_file.h>
#include <asm/fpu/api.h>
#include <asm/fpu/internal.h>
@@ -1243,3 +1244,45 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
return 0;
}
+
+#ifdef CONFIG_PROC_PID_ARCH_STATUS
+/*
+ * Report the amount of time elapsed in millisecond since last AVX512
+ * use in the task.
+ */
+static void avx512_status(struct seq_file *m, struct task_struct *task)
+{
+ unsigned long timestamp = READ_ONCE(task->thread.fpu.avx512_timestamp);
+ long delta;
+
+ if (!timestamp) {
+ /*
+ * Report -1 if no AVX512 usage
+ */
+ delta = -1;
+ } else {
+ delta = (long)(jiffies - timestamp);
+ /*
+ * Cap to LONG_MAX if time difference > LONG_MAX
+ */
+ if (delta < 0)
+ delta = LONG_MAX;
+ delta = jiffies_to_msecs(delta);
+ }
+
+ seq_put_decimal_ll(m, "AVX512_elapsed_ms:\t", delta);
+ seq_putc(m, '\n');
+}
+
+/*
+ * Report architecture specific information
+ */
+void task_arch_status(struct seq_file *m, struct task_struct *task)
+{
+ /*
+ * Report AVX512 state if the processor and build option supported.
+ */
+ if (cpu_feature_enabled(X86_FEATURE_AVX512F))
+ avx512_status(m, task);
+}
+#endif /* CONFIG_PROC_PID_ARCH_STATUS */
--
2.17.1
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox