* [PATCHv5 32/37] selftest/timens: Add a test for timerfd
From: Dmitry Safonov @ 2019-07-29 21:57 UTC (permalink / raw)
To: linux-kernel
Cc: Dmitry Safonov, Andrei Vagin, Dmitry Safonov, Adrian Reber,
Andrei Vagin, Andy Lutomirski, Arnd Bergmann, Christian Brauner,
Cyrill Gorcunov, Eric W. Biederman, H. Peter Anvin, Ingo Molnar,
Jann Horn, Jeff Dike, Oleg Nesterov, Pavel Emelyanov, Shuah Khan,
Thomas Gleixner, Vincenzo Frascino, containers, criu, linux-api
In-Reply-To: <20190729215758.28405-1-dima@arista.com>
From: Andrei Vagin <avagin@gmail.com>
Check that timerfd_create() takes into account clock offsets.
Signed-off-by: Andrei Vagin <avagin@gmail.com>
Co-developed-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: Dmitry Safonov <dima@arista.com>
---
tools/testing/selftests/timens/.gitignore | 1 +
tools/testing/selftests/timens/Makefile | 2 +-
tools/testing/selftests/timens/timerfd.c | 129 ++++++++++++++++++++++
3 files changed, 131 insertions(+), 1 deletion(-)
create mode 100644 tools/testing/selftests/timens/timerfd.c
diff --git a/tools/testing/selftests/timens/.gitignore b/tools/testing/selftests/timens/.gitignore
index 27a693229ce1..b609f6ee9fb9 100644
--- a/tools/testing/selftests/timens/.gitignore
+++ b/tools/testing/selftests/timens/.gitignore
@@ -1 +1,2 @@
timens
+timerfd
diff --git a/tools/testing/selftests/timens/Makefile b/tools/testing/selftests/timens/Makefile
index b877efb78974..66b90cd28e5c 100644
--- a/tools/testing/selftests/timens/Makefile
+++ b/tools/testing/selftests/timens/Makefile
@@ -1,4 +1,4 @@
-TEST_GEN_PROGS := timens
+TEST_GEN_PROGS := timens timerfd
CFLAGS := -Wall -Werror
diff --git a/tools/testing/selftests/timens/timerfd.c b/tools/testing/selftests/timens/timerfd.c
new file mode 100644
index 000000000000..b87076aae040
--- /dev/null
+++ b/tools/testing/selftests/timens/timerfd.c
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sched.h>
+
+#include <sys/timerfd.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <time.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+
+#include "log.h"
+#include "timens.h"
+
+static int tclock_gettime(clock_t clockid, struct timespec *now)
+{
+ if (clockid == CLOCK_BOOTTIME_ALARM)
+ clockid = CLOCK_BOOTTIME;
+ return clock_gettime(clockid, now);
+}
+
+int run_test(int clockid, struct timespec now)
+{
+ struct itimerspec new_value;
+ long long elapsed;
+ int fd, i;
+
+ if (tclock_gettime(clockid, &now))
+ return pr_perror("clock_gettime(%d)", clockid);
+
+ for (i = 0; i < 2; i++) {
+ int flags = 0;
+
+ new_value.it_value.tv_sec = 3600;
+ new_value.it_value.tv_nsec = 0;
+ new_value.it_interval.tv_sec = 1;
+ new_value.it_interval.tv_nsec = 0;
+
+ if (i == 1) {
+ new_value.it_value.tv_sec += now.tv_sec;
+ new_value.it_value.tv_nsec += now.tv_nsec;
+ }
+
+ fd = timerfd_create(clockid, 0);
+ if (fd == -1)
+ return pr_perror("timerfd_create(%d)", clockid);
+
+ if (i == 1)
+ flags |= TFD_TIMER_ABSTIME;
+
+ if (timerfd_settime(fd, flags, &new_value, NULL))
+ return pr_perror("timerfd_settime(%d)", clockid);
+
+ if (timerfd_gettime(fd, &new_value))
+ return pr_perror("timerfd_gettime(%d)", clockid);
+
+ elapsed = new_value.it_value.tv_sec;
+ if (abs(elapsed - 3600) > 60) {
+ ksft_test_result_fail("clockid: %d elapsed: %lld\n",
+ clockid, elapsed);
+ return 1;
+ }
+
+ close(fd);
+ }
+
+ ksft_test_result_pass("clockid=%d\n", clockid);
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ int ret, status, len, fd;
+ char buf[4096];
+ pid_t pid;
+ struct timespec btime_now, mtime_now;
+
+ nscheck();
+
+ ksft_set_plan(3);
+
+ clock_gettime(CLOCK_MONOTONIC, &mtime_now);
+ clock_gettime(CLOCK_BOOTTIME, &btime_now);
+
+ if (unshare(CLONE_NEWTIME))
+ return pr_perror("unshare");
+
+ len = snprintf(buf, sizeof(buf), "%d %d 0\n%d %d 0",
+ CLOCK_MONOTONIC, 70 * 24 * 3600,
+ CLOCK_BOOTTIME, 9 * 24 * 3600);
+ fd = open("/proc/self/timens_offsets", O_WRONLY);
+ if (fd < 0)
+ return pr_perror("/proc/self/timens_offsets");
+
+ if (write(fd, buf, len) != len)
+ return pr_perror("/proc/self/timens_offsets");
+
+ close(fd);
+ mtime_now.tv_sec += 70 * 24 * 3600;
+ btime_now.tv_sec += 9 * 24 * 3600;
+
+ pid = fork();
+ if (pid < 0)
+ return pr_perror("Unable to fork");
+ if (pid == 0) {
+ ret = 0;
+ ret |= run_test(CLOCK_BOOTTIME, btime_now);
+ ret |= run_test(CLOCK_MONOTONIC, mtime_now);
+ ret |= run_test(CLOCK_BOOTTIME_ALARM, btime_now);
+
+ if (ret)
+ ksft_exit_fail();
+ ksft_exit_pass();
+ return ret;
+ }
+
+ if (waitpid(pid, &status, 0) != pid)
+ return pr_perror("Unable to wait the child process");
+
+ if (WIFEXITED(status))
+ return WEXITSTATUS(status);
+
+ return 1;
+}
+
--
2.22.0
^ permalink raw reply related
* [PATCHv5 31/37] selftest/timens: Add Time Namespace test for supported clocks
From: Dmitry Safonov @ 2019-07-29 21:57 UTC (permalink / raw)
To: linux-kernel
Cc: Dmitry Safonov, Dmitry Safonov, Adrian Reber, Andrei Vagin,
Andy Lutomirski, Arnd Bergmann, Christian Brauner,
Cyrill Gorcunov, Eric W. Biederman, H. Peter Anvin, Ingo Molnar,
Jann Horn, Jeff Dike, Oleg Nesterov, Pavel Emelyanov, Shuah Khan,
Thomas Gleixner, Vincenzo Frascino, containers, criu, linux-api,
x86
In-Reply-To: <20190729215758.28405-1-dima@arista.com>
A test to check that all supported clocks work on host and inside
a new time namespace. Use both ways to get time: through VDSO and
by entering the kernel with implicit syscall.
Introduce a new timens directory in selftests framework for
the next timens tests.
Co-developed-by: Andrei Vagin <avagin@openvz.org>
Signed-off-by: Andrei Vagin <avagin@openvz.org>
Signed-off-by: Dmitry Safonov <dima@arista.com>
---
tools/testing/selftests/Makefile | 1 +
tools/testing/selftests/timens/.gitignore | 1 +
tools/testing/selftests/timens/Makefile | 5 +
tools/testing/selftests/timens/config | 1 +
tools/testing/selftests/timens/log.h | 26 +++
tools/testing/selftests/timens/timens.c | 185 ++++++++++++++++++++++
tools/testing/selftests/timens/timens.h | 63 ++++++++
7 files changed, 282 insertions(+)
create mode 100644 tools/testing/selftests/timens/.gitignore
create mode 100644 tools/testing/selftests/timens/Makefile
create mode 100644 tools/testing/selftests/timens/config
create mode 100644 tools/testing/selftests/timens/log.h
create mode 100644 tools/testing/selftests/timens/timens.c
create mode 100644 tools/testing/selftests/timens/timens.h
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 25b43a8c2b15..6fc63b84a857 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -47,6 +47,7 @@ TARGETS += splice
TARGETS += static_keys
TARGETS += sync
TARGETS += sysctl
+TARGETS += timens
ifneq (1, $(quicktest))
TARGETS += timers
endif
diff --git a/tools/testing/selftests/timens/.gitignore b/tools/testing/selftests/timens/.gitignore
new file mode 100644
index 000000000000..27a693229ce1
--- /dev/null
+++ b/tools/testing/selftests/timens/.gitignore
@@ -0,0 +1 @@
+timens
diff --git a/tools/testing/selftests/timens/Makefile b/tools/testing/selftests/timens/Makefile
new file mode 100644
index 000000000000..b877efb78974
--- /dev/null
+++ b/tools/testing/selftests/timens/Makefile
@@ -0,0 +1,5 @@
+TEST_GEN_PROGS := timens
+
+CFLAGS := -Wall -Werror
+
+include ../lib.mk
diff --git a/tools/testing/selftests/timens/config b/tools/testing/selftests/timens/config
new file mode 100644
index 000000000000..4480620f6f49
--- /dev/null
+++ b/tools/testing/selftests/timens/config
@@ -0,0 +1 @@
+CONFIG_TIME_NS=y
diff --git a/tools/testing/selftests/timens/log.h b/tools/testing/selftests/timens/log.h
new file mode 100644
index 000000000000..db64df2a8483
--- /dev/null
+++ b/tools/testing/selftests/timens/log.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __SELFTEST_TIMENS_LOG_H__
+#define __SELFTEST_TIMENS_LOG_H__
+
+#define pr_msg(fmt, lvl, ...) \
+ ksft_print_msg("[%s] (%s:%d)\t" fmt "\n", \
+ lvl, __FILE__, __LINE__, ##__VA_ARGS__)
+
+#define pr_p(func, fmt, ...) func(fmt ": %m", ##__VA_ARGS__)
+
+#define pr_err(fmt, ...) \
+ ({ \
+ ksft_test_result_error(fmt "\n", ##__VA_ARGS__); \
+ -1; \
+ })
+
+#define pr_fail(fmt, ...) \
+ ({ \
+ ksft_test_result_fail(fmt, ##__VA_ARGS__); \
+ -1; \
+ })
+
+#define pr_perror(fmt, ...) pr_p(pr_err, fmt, ##__VA_ARGS__)
+
+#endif
diff --git a/tools/testing/selftests/timens/timens.c b/tools/testing/selftests/timens/timens.c
new file mode 100644
index 000000000000..55d38f02eb64
--- /dev/null
+++ b/tools/testing/selftests/timens/timens.c
@@ -0,0 +1,185 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+#include <time.h>
+#include <string.h>
+
+#include "log.h"
+#include "timens.h"
+
+/*
+ * Test shouldn't be run for a day, so add 10 days to child
+ * time and check parent's time to be in the same day.
+ */
+#define DAY_IN_SEC (60*60*24)
+#define TEN_DAYS_IN_SEC (10*DAY_IN_SEC)
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+struct test_clock {
+ clockid_t id;
+ char *name;
+ /*
+ * off_id is -1 if a clock has own offset, or it contains an index
+ * which contains a right offset of this clock.
+ */
+ int off_id;
+ time_t offset;
+};
+
+#define ct(clock, off_id) { clock, #clock, off_id }
+static struct test_clock clocks[] = {
+ ct(CLOCK_BOOTTIME, -1),
+ ct(CLOCK_BOOTTIME_ALARM, 1),
+ ct(CLOCK_MONOTONIC, -1),
+ ct(CLOCK_MONOTONIC_COARSE, 1),
+ ct(CLOCK_MONOTONIC_RAW, 1),
+};
+#undef ct
+
+static int child_ns, parent_ns = -1;
+
+static int switch_ns(int fd)
+{
+ if (setns(fd, CLONE_NEWTIME)) {
+ pr_perror("setns()");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int init_namespaces(void)
+{
+ char path[] = "/proc/self/ns/time_for_children";
+ struct stat st1, st2;
+
+ if (parent_ns == -1) {
+ parent_ns = open(path, O_RDONLY);
+ if (parent_ns <= 0)
+ return pr_perror("Unable to open %s", path);
+ }
+
+ if (fstat(parent_ns, &st1))
+ return pr_perror("Unable to stat the parent timens");
+
+ if (unshare(CLONE_NEWTIME))
+ return pr_perror("Can't unshare() timens");
+
+ child_ns = open(path, O_RDONLY);
+ if (child_ns <= 0)
+ return pr_perror("Unable to open %s", path);
+
+ if (fstat(child_ns, &st2))
+ return pr_perror("Unable to stat the timens");
+
+ if (st1.st_ino == st2.st_ino)
+ return pr_perror("The same child_ns after CLONE_NEWTIME");
+
+ return 0;
+}
+
+static int test_gettime(clockid_t clock_index, bool raw_syscall, time_t offset)
+{
+ struct timespec child_ts_new, parent_ts_old, cur_ts;
+ char *entry = raw_syscall ? "syscall" : "vdso";
+ double precision = 0.0;
+
+ switch (clocks[clock_index].id) {
+ case CLOCK_MONOTONIC_COARSE:
+ case CLOCK_MONOTONIC_RAW:
+ precision = -2.0;
+ break;
+ }
+
+ if (switch_ns(parent_ns))
+ return pr_err("switch_ns(%d)", child_ns);
+
+ if (_gettime(clocks[clock_index].id, &parent_ts_old, raw_syscall))
+ return -1;
+
+ child_ts_new.tv_nsec = parent_ts_old.tv_nsec;
+ child_ts_new.tv_sec = parent_ts_old.tv_sec + offset;
+
+ if (switch_ns(child_ns))
+ return pr_err("switch_ns(%d)", child_ns);
+
+ if (_gettime(clocks[clock_index].id, &cur_ts, raw_syscall))
+ return -1;
+
+ if (difftime(cur_ts.tv_sec, child_ts_new.tv_sec) < precision) {
+ ksft_test_result_fail(
+ "Child's %s (%s) time has not changed: %lu -> %lu [%lu]\n",
+ clocks[clock_index].name, entry, parent_ts_old.tv_sec,
+ child_ts_new.tv_sec, cur_ts.tv_sec);
+ return -1;
+ }
+
+ if (switch_ns(parent_ns))
+ return pr_err("switch_ns(%d)", parent_ns);
+
+ if (_gettime(clocks[clock_index].id, &cur_ts, raw_syscall))
+ return -1;
+
+ if (difftime(cur_ts.tv_sec, parent_ts_old.tv_sec) > DAY_IN_SEC) {
+ ksft_test_result_fail(
+ "Parent's %s (%s) time has changed: %lu -> %lu [%lu]\n",
+ clocks[clock_index].name, entry, parent_ts_old.tv_sec,
+ child_ts_new.tv_sec, cur_ts.tv_sec);
+ /* Let's play nice and put it closer to original */
+ clock_settime(clocks[clock_index].id, &cur_ts);
+ return -1;
+ }
+
+ ksft_test_result_pass("Passed for %s (%s)\n",
+ clocks[clock_index].name, entry);
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ unsigned int i;
+ time_t offset;
+ int ret = 0;
+
+ nscheck();
+
+ ksft_set_plan(ARRAY_SIZE(clocks) * 2);
+
+ if (init_namespaces())
+ return 1;
+
+ /* Offsets have to be set before tasks enter the namespace. */
+ for (i = 0; i < ARRAY_SIZE(clocks); i++) {
+ if (clocks[i].off_id != -1)
+ continue;
+ offset = TEN_DAYS_IN_SEC + i * 1000;
+ clocks[i].offset = offset;
+ if (_settime(clocks[i].id, offset))
+ return 1;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(clocks); i++) {
+ if (clocks[i].off_id != -1)
+ offset = clocks[clocks[i].off_id].offset;
+ else
+ offset = clocks[i].offset;
+ ret |= test_gettime(i, true, offset);
+ ret |= test_gettime(i, false, offset);
+ }
+
+ if (ret)
+ ksft_exit_fail();
+
+ ksft_exit_pass();
+ return !!ret;
+}
diff --git a/tools/testing/selftests/timens/timens.h b/tools/testing/selftests/timens/timens.h
new file mode 100644
index 000000000000..77c127384810
--- /dev/null
+++ b/tools/testing/selftests/timens/timens.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TIMENS_H__
+#define __TIMENS_H__
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+#include "../kselftest.h"
+
+#ifndef CLONE_NEWTIME
+# define CLONE_NEWTIME 0x00000080
+#endif
+
+static inline int _settime(clockid_t clk_id, time_t offset)
+{
+ int fd, len;
+ char buf[4096];
+
+ if (clk_id == CLOCK_MONOTONIC_COARSE || clk_id == CLOCK_MONOTONIC_RAW)
+ clk_id = CLOCK_MONOTONIC;
+
+ len = snprintf(buf, sizeof(buf), "%d %ld 0", clk_id, offset);
+
+ fd = open("/proc/self/timens_offsets", O_WRONLY);
+ if (fd < 0)
+ return pr_perror("/proc/self/timens_offsets");
+
+ if (write(fd, buf, len) != len)
+ return pr_perror("/proc/self/timens_offsets");
+
+ close(fd);
+
+ return 0;
+}
+
+static inline int _gettime(clockid_t clk_id, struct timespec *res, bool raw_syscall)
+{
+ int err;
+
+ if (!raw_syscall) {
+ if (clock_gettime(clk_id, res)) {
+ pr_perror("clock_gettime(%d)", (int)clk_id);
+ return -1;
+ }
+ return 0;
+ }
+
+ err = syscall(SYS_clock_gettime, clk_id, res);
+ if (err)
+ pr_perror("syscall(SYS_clock_gettime(%d))", (int)clk_id);
+
+ return err;
+}
+
+static inline void nscheck(void)
+{
+ if (access("/proc/self/ns/time", F_OK) < 0)
+ ksft_exit_skip("Time namespaces are not supported\n");
+}
+
+#endif
--
2.22.0
^ permalink raw reply related
* [PATCHv5 30/37] fs/proc: Introduce /proc/pid/timens_offsets
From: Dmitry Safonov @ 2019-07-29 21:57 UTC (permalink / raw)
To: linux-kernel
Cc: Dmitry Safonov, Andrei Vagin, Dmitry Safonov, Adrian Reber,
Andrei Vagin, Andy Lutomirski, Arnd Bergmann, Christian Brauner,
Cyrill Gorcunov, Eric W. Biederman, H. Peter Anvin, Ingo Molnar,
Jann Horn, Jeff Dike, Oleg Nesterov, Pavel Emelyanov, Shuah Khan,
Thomas Gleixner, Vincenzo Frascino, containers, criu, linux-api
In-Reply-To: <20190729215758.28405-1-dima@arista.com>
From: Andrei Vagin <avagin@gmail.com>
API to set time namespace offsets for children processes, i.e.:
echo "clockid off_ses off_nsec" > /proc/self/timens_offsets
Signed-off-by: Andrei Vagin <avagin@gmail.com>
Co-developed-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: Dmitry Safonov <dima@arista.com>
---
fs/proc/base.c | 95 ++++++++++++++++++++++++++++++
include/linux/time_namespace.h | 10 ++++
kernel/time_namespace.c | 104 +++++++++++++++++++++++++++++++++
3 files changed, 209 insertions(+)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index ebea9501afb8..1d2007365e87 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -94,6 +94,7 @@
#include <linux/sched/debug.h>
#include <linux/sched/stat.h>
#include <linux/posix-timers.h>
+#include <linux/time_namespace.h>
#include <trace/events/oom.h>
#include "internal.h"
#include "fd.h"
@@ -1533,6 +1534,97 @@ static const struct file_operations proc_pid_sched_autogroup_operations = {
#endif /* CONFIG_SCHED_AUTOGROUP */
+#ifdef CONFIG_TIME_NS
+static int timens_offsets_show(struct seq_file *m, void *v)
+{
+ struct task_struct *p;
+
+ p = get_proc_task(file_inode(m->file));
+ if (!p)
+ return -ESRCH;
+ proc_timens_show_offsets(p, m);
+
+ put_task_struct(p);
+
+ return 0;
+}
+
+static ssize_t
+timens_offsets_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct inode *inode = file_inode(file);
+ struct proc_timens_offset offsets[2];
+ char *kbuf = NULL, *pos, *next_line;
+ struct task_struct *p;
+ int ret, noffsets;
+
+ /* Only allow < page size writes at the beginning of the file */
+ if ((*ppos != 0) || (count >= PAGE_SIZE))
+ return -EINVAL;
+
+ /* Slurp in the user data */
+ kbuf = memdup_user_nul(buf, count);
+ if (IS_ERR(kbuf))
+ return PTR_ERR(kbuf);
+
+ /* Parse the user data */
+ ret = -EINVAL;
+ noffsets = 0;
+ for (pos = kbuf; pos; pos = next_line) {
+ struct proc_timens_offset *off = &offsets[noffsets];
+ int err;
+
+ /* Find the end of line and ensure we don't look past it */
+ next_line = strchr(pos, '\n');
+ if (next_line) {
+ *next_line = '\0';
+ next_line++;
+ if (*next_line == '\0')
+ next_line = NULL;
+ }
+
+ err = sscanf(pos, "%u %lld %lu", &off->clockid,
+ &off->val.tv_sec, &off->val.tv_nsec);
+ if (err != 3 || off->val.tv_nsec >= NSEC_PER_SEC)
+ goto out;
+ noffsets++;
+ if (noffsets == ARRAY_SIZE(offsets)) {
+ if (next_line)
+ count = next_line - kbuf;
+ break;
+ }
+ }
+
+ ret = -ESRCH;
+ p = get_proc_task(inode);
+ if (!p)
+ goto out;
+ ret = proc_timens_set_offset(file, p, offsets, noffsets);
+ put_task_struct(p);
+ if (ret)
+ goto out;
+
+ ret = count;
+out:
+ kfree(kbuf);
+ return ret;
+}
+
+static int timens_offsets_open(struct inode *inode, struct file *filp)
+{
+ return single_open(filp, timens_offsets_show, inode);
+}
+
+static const struct file_operations proc_timens_offsets_operations = {
+ .open = timens_offsets_open,
+ .read = seq_read,
+ .write = timens_offsets_write,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+#endif /* CONFIG_TIME_NS */
+
static ssize_t comm_write(struct file *file, const char __user *buf,
size_t count, loff_t *offset)
{
@@ -3015,6 +3107,9 @@ static const struct pid_entry tgid_base_stuff[] = {
#endif
#ifdef CONFIG_SCHED_AUTOGROUP
REG("autogroup", S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations),
+#endif
+#ifdef CONFIG_TIME_NS
+ REG("timens_offsets", S_IRUGO|S_IWUSR, proc_timens_offsets_operations),
#endif
REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h
index 9ba9664ff0ab..3f4d457ff0dc 100644
--- a/include/linux/time_namespace.h
+++ b/include/linux/time_namespace.h
@@ -40,6 +40,16 @@ static inline void put_time_ns(struct time_namespace *ns)
kref_put(&ns->kref, free_time_ns);
}
+extern void proc_timens_show_offsets(struct task_struct *p, struct seq_file *m);
+
+struct proc_timens_offset {
+ int clockid;
+ struct timespec64 val;
+};
+
+extern int proc_timens_set_offset(struct file *file, struct task_struct *p,
+ struct proc_timens_offset *offsets, int n);
+
static inline void timens_add_monotonic(struct timespec64 *ts)
{
struct timens_offsets *ns_offsets = current->nsproxy->time_ns->offsets;
diff --git a/kernel/time_namespace.c b/kernel/time_namespace.c
index 4b2eb92ad595..e17b8569bead 100644
--- a/kernel/time_namespace.c
+++ b/kernel/time_namespace.c
@@ -8,6 +8,7 @@
#include <linux/user_namespace.h>
#include <linux/sched/signal.h>
#include <linux/sched/task.h>
+#include <linux/seq_file.h>
#include <linux/proc_ns.h>
#include <linux/export.h>
#include <linux/time.h>
@@ -249,6 +250,109 @@ static struct user_namespace *timens_owner(struct ns_common *ns)
return to_time_ns(ns)->user_ns;
}
+static void show_offset(struct seq_file *m, int clockid, struct timespec64 *ts)
+{
+ seq_printf(m, "%d %lld %ld\n", clockid, ts->tv_sec, ts->tv_nsec);
+}
+
+void proc_timens_show_offsets(struct task_struct *p, struct seq_file *m)
+{
+ struct ns_common *ns;
+ struct time_namespace *time_ns;
+ struct timens_offsets *ns_offsets;
+
+ ns = timens_for_children_get(p);
+ if (!ns)
+ return;
+ time_ns = to_time_ns(ns);
+
+ if (!time_ns->offsets) {
+ put_time_ns(time_ns);
+ return;
+ }
+ ns_offsets = time_ns->offsets;
+
+ show_offset(m, CLOCK_MONOTONIC, &ns_offsets->monotonic);
+ show_offset(m, CLOCK_BOOTTIME, &ns_offsets->boottime);
+ put_time_ns(time_ns);
+}
+
+int proc_timens_set_offset(struct file *file, struct task_struct *p,
+ struct proc_timens_offset *offsets, int noffsets)
+{
+ struct ns_common *ns;
+ struct time_namespace *time_ns;
+ struct timens_offsets *ns_offsets;
+ struct timespec64 *offset;
+ struct timespec64 tp;
+ int i, err;
+
+ ns = timens_for_children_get(p);
+ if (!ns)
+ return -ESRCH;
+ time_ns = to_time_ns(ns);
+
+ if (!time_ns->offsets || time_ns->initialized ||
+ !file_ns_capable(file, time_ns->user_ns, CAP_SYS_TIME)) {
+ put_time_ns(time_ns);
+ return -EPERM;
+ }
+ ns_offsets = time_ns->offsets;
+
+ for (i = 0; i < noffsets; i++) {
+ struct proc_timens_offset *off = &offsets[i];
+
+ switch (off->clockid) {
+ case CLOCK_MONOTONIC:
+ ktime_get_ts64(&tp);
+ break;
+ case CLOCK_BOOTTIME:
+ ktime_get_boottime_ts64(&tp);
+ break;
+ default:
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = -ERANGE;
+
+ if (off->val.tv_sec > KTIME_SEC_MAX || off->val.tv_sec < -KTIME_SEC_MAX)
+ goto out;
+
+ tp = timespec64_add(tp, off->val);
+ /*
+ * KTIME_SEC_MAX is divided by 2 to be sure that KTIME_MAX is
+ * still unreachable.
+ */
+ if (tp.tv_sec < 0 || tp.tv_sec > KTIME_SEC_MAX / 2)
+ goto out;
+ }
+
+ err = 0;
+ /* don't report errors after this line */
+ for (i = 0; i < noffsets; i++) {
+ struct proc_timens_offset *off = &offsets[i];
+
+ switch (off->clockid) {
+ case CLOCK_MONOTONIC:
+ offset = &ns_offsets->monotonic;
+ break;
+ case CLOCK_BOOTTIME:
+ offset = &ns_offsets->boottime;
+ break;
+ default:
+ goto out;
+ }
+
+ *offset = off->val;
+ }
+
+out:
+ put_time_ns(time_ns);
+
+ return err;
+}
+
const struct proc_ns_operations timens_operations = {
.name = "time",
.type = CLONE_NEWTIME,
--
2.22.0
^ permalink raw reply related
* [PATCHv5 29/37] posix-clocks: Add align for timens_offsets
From: Dmitry Safonov @ 2019-07-29 21:57 UTC (permalink / raw)
To: linux-kernel
Cc: Dmitry Safonov, Dmitry Safonov, Adrian Reber, Andrei Vagin,
Andy Lutomirski, Arnd Bergmann, Christian Brauner,
Cyrill Gorcunov, Eric W. Biederman, H. Peter Anvin, Ingo Molnar,
Jann Horn, Jeff Dike, Oleg Nesterov, Pavel Emelyanov, Shuah Khan,
Thomas Gleixner, Vincenzo Frascino, containers, criu, linux-api,
x86
In-Reply-To: <20190729215758.28405-1-dima@arista.com>
Align offsets so that time namespace will work for ia32 applications on
x86_64 host.
Co-developed-by: Andrei Vagin <avagin@openvz.org>
Signed-off-by: Andrei Vagin <avagin@openvz.org>
Signed-off-by: Dmitry Safonov <dima@arista.com>
---
include/linux/timens_offsets.h | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/include/linux/timens_offsets.h b/include/linux/timens_offsets.h
index e93aabaa5e45..05da1b0563ce 100644
--- a/include/linux/timens_offsets.h
+++ b/include/linux/timens_offsets.h
@@ -2,9 +2,17 @@
#ifndef _LINUX_TIME_OFFSETS_H
#define _LINUX_TIME_OFFSETS_H
+/*
+ * Time offsets need align as they're placed on VVAR page,
+ * which is used by x86_64 and ia32 VDSO code.
+ * On ia32 offset::tv_sec (u64) has align(4), so re-align offsets
+ * to the same positions as 64-bit offsets.
+ * On 64-bit big-endian systems VDSO should convert to timespec64
+ * to timespec because of a padding occurring between the fields.
+ */
struct timens_offsets {
- struct timespec64 monotonic;
- struct timespec64 boottime;
+ struct timespec64 monotonic __aligned(8);
+ struct timespec64 boottime __aligned(8);
};
#endif
--
2.22.0
^ permalink raw reply related
* [PATCHv5 28/37] x86/vdso: Enable static branches for the timens vdso
From: Dmitry Safonov @ 2019-07-29 21:57 UTC (permalink / raw)
To: linux-kernel
Cc: Dmitry Safonov, Andrei Vagin, Dmitry Safonov, Adrian Reber,
Andrei Vagin, Andy Lutomirski, Arnd Bergmann, Christian Brauner,
Cyrill Gorcunov, Eric W. Biederman, H. Peter Anvin, Ingo Molnar,
Jann Horn, Jeff Dike, Oleg Nesterov, Pavel Emelyanov, Shuah Khan,
Thomas Gleixner, Vincenzo Frascino, containers, criu, linux-api
In-Reply-To: <20190729215758.28405-1-dima@arista.com>
From: Andrei Vagin <avagin@gmail.com>
As it has been discussed on timens RFC, adding a new conditional branch
`if (inside_time_ns)` on VDSO for all processes is undesirable.
Addressing those problems, there are two versions of VDSO's .so:
for host tasks (without any penalty) and for processes inside of time
namespace with clk_to_ns() that subtracts offsets from host's time.
The timens code in vdso looks like this:
if (timens_static_branch_unlikely()) {
clk_to_ns(clk, ts);
}
This static branch is disabled by default. And the code generated consist
of a single atomic 'no-op' instruction, in the straight-line code path.
Enable static branches in the timens vdso: the 'no-op' instruction
gets replaced with a 'jump' instruction to the out-of-line true branch.
Signed-off-by: Andrei Vagin <avagin@gmail.com>
Co-developed-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: Dmitry Safonov <dima@arista.com>
---
arch/x86/entry/vdso/vma.c | 30 ++++++++++++++++++++++++------
arch/x86/kernel/jump_label.c | 14 ++++++++++++++
include/linux/jump_label.h | 8 ++++++++
init/Kconfig | 1 +
4 files changed, 47 insertions(+), 6 deletions(-)
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 91cf5a5c8c9e..1a3eb4656eb6 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -15,6 +15,7 @@
#include <linux/cpu.h>
#include <linux/ptrace.h>
#include <linux/time_namespace.h>
+#include <linux/jump_label.h>
#include <asm/pvclock.h>
#include <asm/vgtod.h>
#include <asm/proto.h>
@@ -31,20 +32,37 @@
unsigned int __read_mostly vdso64_enabled = 1;
#endif
-void __init init_vdso_image(struct vdso_image *image)
+#ifdef CONFIG_TIME_NS
+static __init void init_timens(struct vdso_image *image)
{
- BUG_ON(image->size % PAGE_SIZE != 0);
+ struct vdso_jump_entry *entries;
+ unsigned long entries_nr;
+
+ if (WARN_ON(image->jump_table == -1UL))
+ return;
- apply_alternatives((struct alt_instr *)(image->text + image->alt),
- (struct alt_instr *)(image->text + image->alt +
- image->alt_len));
-#ifdef CONFIG_TIME_NS
image->text_timens = vmalloc_32(image->size);
if (WARN_ON(image->text_timens == NULL))
return;
memcpy(image->text_timens, image->text, image->size);
+
+ entries = image->text_timens + image->jump_table;
+ entries_nr = image->jump_table_len / sizeof(struct vdso_jump_entry);
+ apply_vdso_jump_labels(entries, entries_nr);
+}
+#else
+static inline void init_timens(struct vdso_image *image) {}
#endif
+
+void __init init_vdso_image(struct vdso_image *image)
+{
+ BUG_ON(image->size % PAGE_SIZE != 0);
+
+ apply_alternatives((struct alt_instr *)(image->text + image->alt),
+ (struct alt_instr *)(image->text + image->alt +
+ image->alt_len));
+ init_timens(image);
}
struct linux_binprm;
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index 044053235302..7820ac61b688 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -24,6 +24,20 @@ union jump_code_union {
} __attribute__((packed));
};
+__init void apply_vdso_jump_labels(struct vdso_jump_entry *ent, unsigned long nr)
+{
+ while (nr--) {
+ void *code_addr = (void *)ent + ent->code;
+ union jump_code_union jmp;
+
+ jmp.jump = 0xe9; /* JMP rel32 */
+ jmp.offset = ent->target - ent->code - JUMP_LABEL_NOP_SIZE;
+ memcpy(code_addr, &jmp, JUMP_LABEL_NOP_SIZE);
+
+ ent++;
+ }
+}
+
static void bug_at(unsigned char *ip, int line)
{
/*
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 3526c0aee954..bb9d828ee49a 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -125,6 +125,11 @@ struct jump_entry {
long key; // key may be far away from the core kernel under KASLR
};
+struct vdso_jump_entry {
+ u16 code;
+ u16 target;
+};
+
static inline unsigned long jump_entry_code(const struct jump_entry *entry)
{
return (unsigned long)&entry->code + entry->code;
@@ -229,6 +234,9 @@ extern void static_key_enable(struct static_key *key);
extern void static_key_disable(struct static_key *key);
extern void static_key_enable_cpuslocked(struct static_key *key);
extern void static_key_disable_cpuslocked(struct static_key *key);
+extern void apply_vdso_jump_labels(struct vdso_jump_entry *ent,
+ unsigned long nr);
+
/*
* We should be using ATOMIC_INIT() for initializing .enabled, but
diff --git a/init/Kconfig b/init/Kconfig
index 9e40c07da4e1..be8bd41774f6 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1072,6 +1072,7 @@ config UTS_NS
config TIME_NS
bool "TIME namespace"
depends on ARCH_HAS_VDSO_TIME_NS
+ depends on JUMP_LABEL
default y
help
In this namespace boottime and monotonic clocks can be set.
--
2.22.0
^ permalink raw reply related
* [PATCHv5 27/37] x86/vdso2c: Process jump tables
From: Dmitry Safonov @ 2019-07-29 21:57 UTC (permalink / raw)
To: linux-kernel
Cc: Dmitry Safonov, Dmitry Safonov, Adrian Reber, Andrei Vagin,
Andy Lutomirski, Arnd Bergmann, Christian Brauner,
Cyrill Gorcunov, Eric W. Biederman, H. Peter Anvin, Ingo Molnar,
Jann Horn, Jeff Dike, Oleg Nesterov, Pavel Emelyanov, Shuah Khan,
Thomas Gleixner, Vincenzo Frascino, containers, criu, linux-api,
x86
In-Reply-To: <20190729215758.28405-1-dima@arista.com>
As it has been discussed on timens RFC, adding a new conditional branch
`if (inside_time_ns)` on VDSO for all processes is undesirable.
Addressing those problems, there are two versions of VDSO's .so:
for host tasks (without any penalty) and for processes inside time
namespace with clk_to_ns() that subtracts offsets from host's time.
The timens code in vdso looks like this:
if (timens_static_branch()) {
clk_to_ns(clk, ts);
}
Static branch mechanism adds a __jump_table section into vdso.
Vdso's linker script drops all unwanted sections in compile time.
Preserve __jump_table section and add it into (struct vdso_image),
as it's needed for enabling (patching) static branches that are
present on vdso.
Co-developed-by: Andrei Vagin <avagin@gmail.com>
Signed-off-by: Andrei Vagin <avagin@gmail.com>
Signed-off-by: Dmitry Safonov <dima@arista.com>
---
arch/x86/entry/vdso/vdso-layout.lds.S | 1 +
arch/x86/entry/vdso/vdso2c.h | 9 ++++++++-
arch/x86/include/asm/vdso.h | 1 +
3 files changed, 10 insertions(+), 1 deletion(-)
diff --git a/arch/x86/entry/vdso/vdso-layout.lds.S b/arch/x86/entry/vdso/vdso-layout.lds.S
index ba216527e59f..69dbe4821aa5 100644
--- a/arch/x86/entry/vdso/vdso-layout.lds.S
+++ b/arch/x86/entry/vdso/vdso-layout.lds.S
@@ -45,6 +45,7 @@ SECTIONS
.gnu.version : { *(.gnu.version) }
.gnu.version_d : { *(.gnu.version_d) }
.gnu.version_r : { *(.gnu.version_r) }
+ __jump_table : { *(__jump_table) } :text
.dynamic : { *(.dynamic) } :text :dynamic
diff --git a/arch/x86/entry/vdso/vdso2c.h b/arch/x86/entry/vdso/vdso2c.h
index 885b988aea19..318b278ca396 100644
--- a/arch/x86/entry/vdso/vdso2c.h
+++ b/arch/x86/entry/vdso/vdso2c.h
@@ -14,7 +14,7 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
unsigned long mapping_size;
ELF(Ehdr) *hdr = (ELF(Ehdr) *)raw_addr;
unsigned int i, syms_nr;
- unsigned long j;
+ unsigned long j, jump_table_addr = -1UL, jump_table_size = -1UL;
ELF(Shdr) *symtab_hdr = NULL, *strtab_hdr, *secstrings_hdr,
*alt_sec = NULL;
ELF(Dyn) *dyn = 0, *dyn_end = 0;
@@ -78,6 +78,10 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
if (!strcmp(secstrings + GET_LE(&sh->sh_name),
".altinstructions"))
alt_sec = sh;
+ if (!strcmp(secstrings + GET_LE(&sh->sh_name), "__jump_table")) {
+ jump_table_addr = GET_LE(&sh->sh_offset);
+ jump_table_size = GET_LE(&sh->sh_size);
+ }
}
if (!symtab_hdr)
@@ -166,6 +170,9 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
fprintf(outfile, "\t.alt_len = %lu,\n",
(unsigned long)GET_LE(&alt_sec->sh_size));
}
+ fprintf(outfile, "\t.jump_table = %luUL,\n", jump_table_addr);
+ fprintf(outfile, "\t.jump_table_len = %luUL,\n", jump_table_size);
+
for (i = 0; i < NSYMS; i++) {
if (required_syms[i].export && syms[i])
fprintf(outfile, "\t.sym_%s = %" PRIi64 ",\n",
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index ccf89dedd04f..5e83bd3cda22 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -16,6 +16,7 @@ struct vdso_image {
unsigned long size; /* Always a multiple of PAGE_SIZE */
unsigned long alt, alt_len;
+ unsigned long jump_table, jump_table_len;
long sym_vvar_start; /* Negative offset to the vvar area */
--
2.22.0
^ permalink raw reply related
* [PATCHv5 26/37] vdso: Introduce vdso_static_branch_unlikely()
From: Dmitry Safonov @ 2019-07-29 21:57 UTC (permalink / raw)
To: linux-kernel
Cc: Dmitry Safonov, Andrei Vagin, Dmitry Safonov, Adrian Reber,
Andrei Vagin, Andy Lutomirski, Arnd Bergmann, Christian Brauner,
Cyrill Gorcunov, Eric W. Biederman, H. Peter Anvin, Ingo Molnar,
Jann Horn, Jeff Dike, Oleg Nesterov, Pavel Emelyanov, Shuah Khan,
Thomas Gleixner, Vincenzo Frascino, containers, criu, linux-api
In-Reply-To: <20190729215758.28405-1-dima@arista.com>
From: Andrei Vagin <avagin@gmail.com>
As it has been discussed on timens RFC, adding a new conditional branch
`if (inside_time_ns)` on VDSO for all processes is undesirable.
Addressing those problems, there are two versions of VDSO's .so:
for host tasks (without any penalty) and for processes inside of time
namespace with clk_to_ns() that subtracts offsets from host's time.
Introduce vdso_static_branch_unlikely(), which is similar to
static_branch_unlikely(); alias it with timens_static_branch_unlikely()
under CONFIG_TIME_NS.
The timens code in vdso will look like this:
if (timens_static_branch_unlikely()) {
clk_to_ns(clk, ts);
}
The version of vdso which is compiled from sources will never execute
clk_to_ns(). And then we can patch the 'no-op' in the straight-line
codepath with a 'jump' instruction to the out-of-line true branch and
get the timens version of the vdso library.
Signed-off-by: Andrei Vagin <avagin@gmail.com>
Co-developed-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: Dmitry Safonov <dima@arista.com>
---
arch/x86/include/asm/jump_label.h | 14 ++++++++++++++
lib/vdso/gettimeofday.c | 10 ++++++++--
2 files changed, 22 insertions(+), 2 deletions(-)
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 06c3cc22a058..376efb53183b 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -53,6 +53,20 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key, bool
return true;
}
+static __always_inline bool vdso_static_branch_unlikely(void)
+{
+ asm_volatile_goto("1:\n\t"
+ ".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t"
+ ".pushsection __jump_table, \"aw\"\n\t"
+ "2: .word 1b - 2b, %l[l_yes] - 2b\n\t"
+ ".popsection\n\t"
+ : : : : l_yes);
+
+ return false;
+l_yes:
+ return true;
+}
+
#else /* __ASSEMBLY__ */
.macro STATIC_JUMP_IF_TRUE target, key, def
diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c
index 7525433f8ba4..605bdb92055d 100644
--- a/lib/vdso/gettimeofday.c
+++ b/lib/vdso/gettimeofday.c
@@ -8,6 +8,7 @@
#include <linux/kernel.h>
#include <linux/hrtimer_defs.h>
#include <linux/timens_offsets.h>
+#include <linux/jump_label.h>
#include <vdso/datapage.h>
#include <vdso/helpers.h>
@@ -43,6 +44,8 @@ u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
extern u8 timens_page
__attribute__((visibility("hidden")));
+#define timens_static_branch_unlikely vdso_static_branch_unlikely
+
notrace static __always_inline void clk_to_ns(clockid_t clk, struct __kernel_timespec *ts)
{
struct timens_offsets *timens = (struct timens_offsets *) &timens_page;
@@ -79,6 +82,7 @@ notrace static __always_inline void clk_to_ns(clockid_t clk, struct __kernel_tim
}
#else
notrace static __always_inline void clk_to_ns(clockid_t clk, struct __kernel_timespec *ts) {}
+notrace static __always_inline bool timens_static_branch_unlikely(void) { return false; }
#endif
static int do_hres(const struct vdso_data *vd, clockid_t clk,
@@ -108,7 +112,8 @@ static int do_hres(const struct vdso_data *vd, clockid_t clk,
ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
ts->tv_nsec = ns;
- clk_to_ns(clk, ts);
+ if (timens_static_branch_unlikely())
+ clk_to_ns(clk, ts);
return 0;
}
@@ -125,7 +130,8 @@ static void do_coarse(const struct vdso_data *vd, clockid_t clk,
ts->tv_nsec = vdso_ts->nsec;
} while (unlikely(vdso_read_retry(vd, seq)));
- clk_to_ns(clk, ts);
+ if (timens_static_branch_unlikely())
+ clk_to_ns(clk, ts);
}
static __maybe_unused int
--
2.22.0
^ permalink raw reply related
* [PATCHv5 25/37] x86/vdso: Switch image on setns()/clone()
From: Dmitry Safonov @ 2019-07-29 21:57 UTC (permalink / raw)
To: linux-kernel
Cc: Dmitry Safonov, Dmitry Safonov, Adrian Reber, Andrei Vagin,
Andy Lutomirski, Arnd Bergmann, Christian Brauner,
Cyrill Gorcunov, Eric W. Biederman, H. Peter Anvin, Ingo Molnar,
Jann Horn, Jeff Dike, Oleg Nesterov, Pavel Emelyanov, Shuah Khan,
Thomas Gleixner, Vincenzo Frascino, containers, criu, linux-api,
x86
In-Reply-To: <20190729215758.28405-1-dima@arista.com>
As it has been discussed on timens RFC, adding a new conditional branch
`if (inside_time_ns)` on VDSO for all processes is undesirable.
It will add a penalty for everybody as branch predictor may mispredict
the jump. Also there are instruction cache lines wasted on cmp/jmp.
Those effects of introducing time namespace are very much unwanted
having in mind how much work have been spent on micro-optimisation
vdso code.
Addressing those problems, there are two versions of VDSO's .so:
for host tasks (without any penalty) and for processes inside of time
namespace with clk_to_ns() that subtracts offsets from host's time.
Whenever a user does setns() or unshare(CLONE_TIMENS) followed
by clone(), change VDSO image in mm and zap VVAR/VDSO page tables.
They will be re-faulted with corresponding image and VVAR offsets.
Co-developed-by: Andrei Vagin <avagin@gmail.com>
Signed-off-by: Andrei Vagin <avagin@gmail.com>
Signed-off-by: Dmitry Safonov <dima@arista.com>
---
arch/x86/entry/vdso/vma.c | 23 +++++++++++++++++++++++
arch/x86/include/asm/vdso.h | 1 +
kernel/time_namespace.c | 11 +++++++++++
3 files changed, 35 insertions(+)
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 8a8211fd4cfc..91cf5a5c8c9e 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -25,6 +25,7 @@
#include <asm/cpufeature.h>
#include <clocksource/hyperv_timer.h>
#include <asm/page.h>
+#include <asm/tlb.h>
#if defined(CONFIG_X86_64)
unsigned int __read_mostly vdso64_enabled = 1;
@@ -266,6 +267,28 @@ static const struct vm_special_mapping vvar_mapping = {
.mremap = vvar_mremap,
};
+#ifdef CONFIG_TIME_NS
+int vdso_join_timens(struct task_struct *task)
+{
+ struct mm_struct *mm = task->mm;
+ struct vm_area_struct *vma;
+
+ if (down_write_killable(&mm->mmap_sem))
+ return -EINTR;
+
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ unsigned long size = vma->vm_end - vma->vm_start;
+
+ if (vma_is_special_mapping(vma, &vvar_mapping) ||
+ vma_is_special_mapping(vma, &vdso_mapping))
+ zap_page_range(vma, vma->vm_start, size);
+ }
+
+ up_write(&mm->mmap_sem);
+ return 0;
+}
+#endif
+
/*
* Add vdso and vvar mappings to current process.
* @image - blob to map
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index 03f468c63a24..ccf89dedd04f 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -45,6 +45,7 @@ extern struct vdso_image vdso_image_32;
extern void __init init_vdso_image(struct vdso_image *image);
extern int map_vdso_once(const struct vdso_image *image, unsigned long addr);
+extern int vdso_join_timens(struct task_struct *task);
#endif /* __ASSEMBLER__ */
diff --git a/kernel/time_namespace.c b/kernel/time_namespace.c
index 9807c5c90cb2..4b2eb92ad595 100644
--- a/kernel/time_namespace.c
+++ b/kernel/time_namespace.c
@@ -15,6 +15,7 @@
#include <linux/cred.h>
#include <linux/err.h>
#include <linux/mm.h>
+#include <asm/vdso.h>
ktime_t do_timens_ktime_to_host(clockid_t clockid, ktime_t tim,
struct timens_offsets *ns_offsets)
@@ -199,6 +200,7 @@ static void timens_put(struct ns_common *ns)
static int timens_install(struct nsproxy *nsproxy, struct ns_common *new)
{
struct time_namespace *ns = to_time_ns(new);
+ int ret;
if (!thread_group_empty(current))
return -EINVAL;
@@ -207,6 +209,10 @@ static int timens_install(struct nsproxy *nsproxy, struct ns_common *new)
!ns_capable(current_user_ns(), CAP_SYS_ADMIN))
return -EPERM;
+ ret = vdso_join_timens(current);
+ if (ret)
+ return ret;
+
get_time_ns(ns);
get_time_ns(ns);
put_time_ns(nsproxy->time_ns);
@@ -221,10 +227,15 @@ int timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk)
{
struct ns_common *nsc = &nsproxy->time_ns_for_children->ns;
struct time_namespace *ns = to_time_ns(nsc);
+ int ret;
if (nsproxy->time_ns == nsproxy->time_ns_for_children)
return 0;
+ ret = vdso_join_timens(tsk);
+ if (ret)
+ return ret;
+
get_time_ns(ns);
put_time_ns(nsproxy->time_ns);
nsproxy->time_ns = ns;
--
2.22.0
^ permalink raw reply related
* [PATCHv5 24/37] x86/vdso: Allocate timens vdso
From: Dmitry Safonov @ 2019-07-29 21:57 UTC (permalink / raw)
To: linux-kernel
Cc: Dmitry Safonov, Dmitry Safonov, Adrian Reber, Andrei Vagin,
Andy Lutomirski, Arnd Bergmann, Christian Brauner,
Cyrill Gorcunov, Eric W. Biederman, H. Peter Anvin, Ingo Molnar,
Jann Horn, Jeff Dike, Oleg Nesterov, Pavel Emelyanov, Shuah Khan,
Thomas Gleixner, Vincenzo Frascino, containers, criu, linux-api,
x86
In-Reply-To: <20190729215758.28405-1-dima@arista.com>
As it has been discussed on timens RFC, adding a new conditional branch
`if (inside_time_ns)` on VDSO for all processes is undesirable.
It will add a penalty for everybody as branch predictor may mispredict
the jump. Also there are instruction cache lines wasted on cmp/jmp.
Those effects of introducing time namespace are very much unwanted
having in mind how much work have been spent on micro-optimisation
vdso code.
The propose is to allocate a second vdso code with dynamically
patched out (disabled by static_branch) timens code on boot time.
Allocate another vdso and copy original code.
Co-developed-by: Andrei Vagin <avagin@openvz.org>
Signed-off-by: Andrei Vagin <avagin@openvz.org>
Signed-off-by: Dmitry Safonov <dima@arista.com>
---
arch/x86/entry/vdso/vdso2c.h | 2 +-
arch/x86/entry/vdso/vma.c | 113 +++++++++++++++++++++++++++++++++--
arch/x86/include/asm/vdso.h | 9 +--
3 files changed, 114 insertions(+), 10 deletions(-)
diff --git a/arch/x86/entry/vdso/vdso2c.h b/arch/x86/entry/vdso/vdso2c.h
index 7556bb70ed8b..885b988aea19 100644
--- a/arch/x86/entry/vdso/vdso2c.h
+++ b/arch/x86/entry/vdso/vdso2c.h
@@ -157,7 +157,7 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
}
fprintf(outfile, "\n};\n\n");
- fprintf(outfile, "const struct vdso_image %s = {\n", image_name);
+ fprintf(outfile, "struct vdso_image %s __ro_after_init = {\n", image_name);
fprintf(outfile, "\t.text = raw_data,\n");
fprintf(outfile, "\t.size = %lu,\n", mapping_size);
if (alt_sec) {
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 9bd66f84db5e..8a8211fd4cfc 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -30,26 +30,128 @@
unsigned int __read_mostly vdso64_enabled = 1;
#endif
-void __init init_vdso_image(const struct vdso_image *image)
+void __init init_vdso_image(struct vdso_image *image)
{
BUG_ON(image->size % PAGE_SIZE != 0);
apply_alternatives((struct alt_instr *)(image->text + image->alt),
(struct alt_instr *)(image->text + image->alt +
image->alt_len));
+#ifdef CONFIG_TIME_NS
+ image->text_timens = vmalloc_32(image->size);
+ if (WARN_ON(image->text_timens == NULL))
+ return;
+
+ memcpy(image->text_timens, image->text, image->size);
+#endif
}
struct linux_binprm;
+#ifdef CONFIG_TIME_NS
+static inline struct timens_offsets *current_timens_offsets(void)
+{
+ return current->nsproxy->time_ns->offsets;
+}
+
+static int vdso_check_timens(struct vm_area_struct *vma, bool *in_timens)
+{
+ struct task_struct *tsk;
+
+ if (likely(vma->vm_mm == current->mm)) {
+ *in_timens = !!current_timens_offsets();
+ return 0;
+ }
+
+ /*
+ * .fault() handler can be called over remote process through
+ * interfaces like /proc/$pid/mem or process_vm_{readv,writev}()
+ * Considering such access to vdso as a slow-path.
+ */
+
+#ifdef CONFIG_MEMCG
+ rcu_read_lock();
+
+ tsk = rcu_dereference(vma->vm_mm->owner);
+ if (tsk) {
+ task_lock(tsk);
+ /*
+ * Shouldn't happen: nsproxy is unset in exit_mm().
+ * Before that exit_mm() holds mmap_sem to set (mm = NULL).
+ * It's impossible to have a fault in task without mm
+ * and mmap_sem is taken during the fault.
+ */
+ if (WARN_ON_ONCE(tsk->nsproxy == NULL)) {
+ task_unlock(tsk);
+ rcu_read_unlock();
+ return -EIO;
+ }
+ *in_timens = !!tsk->nsproxy->time_ns->offsets;
+ task_unlock(tsk);
+ rcu_read_unlock();
+ return 0;
+ }
+ rcu_read_unlock();
+#endif
+
+ read_lock(&tasklist_lock);
+ for_each_process(tsk) {
+ struct task_struct *c;
+
+ if (tsk->flags & PF_KTHREAD)
+ continue;
+ for_each_thread(tsk, c) {
+ if (c->mm == vma->vm_mm)
+ goto found;
+ if (c->mm)
+ break;
+ }
+ }
+ read_unlock(&tasklist_lock);
+ return -ESRCH;
+
+found:
+ task_lock(tsk);
+ read_unlock(&tasklist_lock);
+ *in_timens = !!tsk->nsproxy->time_ns->offsets;
+ task_unlock(tsk);
+
+ return 0;
+}
+#else /* CONFIG_TIME_NS */
+static inline int vdso_check_timens(struct vm_area_struct *vma, bool *in_timens)
+{
+ *in_timens = false;
+ return 0;
+}
+static inline struct timens_offsets *current_timens_offsets(void)
+{
+ return NULL;
+}
+#endif /* CONFIG_TIME_NS */
+
static vm_fault_t vdso_fault(const struct vm_special_mapping *sm,
struct vm_area_struct *vma, struct vm_fault *vmf)
{
const struct vdso_image *image = vma->vm_mm->context.vdso_image;
+ unsigned long offset = vmf->pgoff << PAGE_SHIFT;
+ bool in_timens;
+ int err;
if (!image || (vmf->pgoff << PAGE_SHIFT) >= image->size)
return VM_FAULT_SIGBUS;
- vmf->page = virt_to_page(image->text + (vmf->pgoff << PAGE_SHIFT));
+ err = vdso_check_timens(vma, &in_timens);
+ if (err)
+ return VM_FAULT_SIGBUS;
+
+ WARN_ON_ONCE(in_timens && !image->text_timens);
+
+ if (in_timens && image->text_timens)
+ vmf->page = vmalloc_to_page(image->text_timens + offset);
+ else
+ vmf->page = virt_to_page(image->text + offset);
+
get_page(vmf->page);
return 0;
}
@@ -138,13 +240,14 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
return vmf_insert_pfn(vma, vmf->address,
vmalloc_to_pfn(tsc_pg));
} else if (sym_offset == image->sym_timens_page) {
- struct time_namespace *ns = current->nsproxy->time_ns;
+ /* We can fault only in current context for VM_PFNMAP mapping */
+ struct timens_offsets *offsets = current_timens_offsets();
unsigned long pfn;
- if (!ns->offsets)
+ if (!offsets)
pfn = page_to_pfn(ZERO_PAGE(0));
else
- pfn = page_to_pfn(virt_to_page(ns->offsets));
+ pfn = page_to_pfn(virt_to_page(offsets));
return vmf_insert_pfn(vma, vmf->address, pfn);
}
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index 9d420c545607..03f468c63a24 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -12,6 +12,7 @@
struct vdso_image {
void *text;
+ void *text_timens;
unsigned long size; /* Always a multiple of PAGE_SIZE */
unsigned long alt, alt_len;
@@ -30,18 +31,18 @@ struct vdso_image {
};
#ifdef CONFIG_X86_64
-extern const struct vdso_image vdso_image_64;
+extern struct vdso_image vdso_image_64;
#endif
#ifdef CONFIG_X86_X32
-extern const struct vdso_image vdso_image_x32;
+extern struct vdso_image vdso_image_x32;
#endif
#if defined CONFIG_X86_32 || defined CONFIG_COMPAT
-extern const struct vdso_image vdso_image_32;
+extern struct vdso_image vdso_image_32;
#endif
-extern void __init init_vdso_image(const struct vdso_image *image);
+extern void __init init_vdso_image(struct vdso_image *image);
extern int map_vdso_once(const struct vdso_image *image, unsigned long addr);
--
2.22.0
^ permalink raw reply related
* [PATCHv5 23/37] x86/vdso: Add offsets page in vvar
From: Dmitry Safonov @ 2019-07-29 21:57 UTC (permalink / raw)
To: linux-kernel
Cc: Dmitry Safonov, Andrei Vagin, Dmitry Safonov, Adrian Reber,
Andy Lutomirski, Arnd Bergmann, Christian Brauner,
Cyrill Gorcunov, Eric W. Biederman, H. Peter Anvin, Ingo Molnar,
Jann Horn, Jeff Dike, Oleg Nesterov, Pavel Emelyanov, Shuah Khan,
Thomas Gleixner, Vincenzo Frascino, containers, criu, linux-api,
x86
In-Reply-To: <20190729215758.28405-1-dima@arista.com>
From: Andrei Vagin <avagin@openvz.org>
As modern applications fetch time from VDSO without entering the kernel,
it's needed to provide offsets for userspace code inside time namespace.
A page for timens offsets is allocated on time namespace construction.
Put that page into VVAR for tasks inside timens and zero page for
host processes.
As VDSO code is already optimized as much as possible in terms of speed,
any new if-condition in VDSO code is undesirable; the goal is to provide
two .so(s), as was originally suggested by Andy and Thomas:
- for host tasks with optimized-out clk_to_ns() without any penalty
- for processes inside timens with clk_to_ns()
For this purpose, define clk_to_ns() under CONFIG_TIME_NS.
To eliminate any performance regression, clk_to_ns() will be called
under static_branch with follow-up patches, that adds support for
patching vdso.
VDSO mappings are platform-specific, add Kconfig dependency for arch.
Signed-off-by: Andrei Vagin <avagin@gmail.com>
Co-developed-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: Dmitry Safonov <dima@arista.com>
---
arch/Kconfig | 5 +++
arch/x86/Kconfig | 1 +
arch/x86/entry/vdso/vdso-layout.lds.S | 9 ++++-
arch/x86/entry/vdso/vdso2c.c | 3 ++
arch/x86/entry/vdso/vma.c | 12 +++++++
arch/x86/include/asm/vdso.h | 1 +
init/Kconfig | 1 +
lib/vdso/gettimeofday.c | 47 +++++++++++++++++++++++++++
8 files changed, 78 insertions(+), 1 deletion(-)
diff --git a/arch/Kconfig b/arch/Kconfig
index a7b57dd42c26..e43d27f510ec 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -729,6 +729,11 @@ config HAVE_ARCH_NVRAM_OPS
config ISA_BUS_API
def_bool ISA
+config ARCH_HAS_VDSO_TIME_NS
+ bool
+ help
+ VDSO can add time-ns offsets without entering kernel.
+
#
# ABI hall of shame
#
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 222855cc0158..91615938b470 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -81,6 +81,7 @@ config X86
select ARCH_HAS_STRICT_MODULE_RWX
select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
select ARCH_HAS_UBSAN_SANITIZE_ALL
+ select ARCH_HAS_VDSO_TIME_NS
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI
select ARCH_MIGHT_HAVE_PC_PARPORT
diff --git a/arch/x86/entry/vdso/vdso-layout.lds.S b/arch/x86/entry/vdso/vdso-layout.lds.S
index 93c6dc7812d0..ba216527e59f 100644
--- a/arch/x86/entry/vdso/vdso-layout.lds.S
+++ b/arch/x86/entry/vdso/vdso-layout.lds.S
@@ -7,6 +7,12 @@
* This script controls its layout.
*/
+#ifdef CONFIG_TIME_NS
+# define TIMENS_SZ PAGE_SIZE
+#else
+# define TIMENS_SZ 0
+#endif
+
SECTIONS
{
/*
@@ -16,7 +22,7 @@ SECTIONS
* segment.
*/
- vvar_start = . - 3 * PAGE_SIZE;
+ vvar_start = . - (3 * PAGE_SIZE + TIMENS_SZ);
vvar_page = vvar_start;
/* Place all vvars at the offsets in asm/vvar.h. */
@@ -28,6 +34,7 @@ SECTIONS
pvclock_page = vvar_start + PAGE_SIZE;
hvclock_page = vvar_start + 2 * PAGE_SIZE;
+ timens_page = vvar_start + 3 * PAGE_SIZE;
. = SIZEOF_HEADERS;
diff --git a/arch/x86/entry/vdso/vdso2c.c b/arch/x86/entry/vdso/vdso2c.c
index ce67370d14e5..7380908045c7 100644
--- a/arch/x86/entry/vdso/vdso2c.c
+++ b/arch/x86/entry/vdso/vdso2c.c
@@ -75,12 +75,14 @@ enum {
sym_vvar_page,
sym_pvclock_page,
sym_hvclock_page,
+ sym_timens_page,
};
const int special_pages[] = {
sym_vvar_page,
sym_pvclock_page,
sym_hvclock_page,
+ sym_timens_page,
};
struct vdso_sym {
@@ -93,6 +95,7 @@ struct vdso_sym required_syms[] = {
[sym_vvar_page] = {"vvar_page", true},
[sym_pvclock_page] = {"pvclock_page", true},
[sym_hvclock_page] = {"hvclock_page", true},
+ [sym_timens_page] = {"timens_page", true},
{"VDSO32_NOTE_MASK", true},
{"__kernel_vsyscall", true},
{"__kernel_sigreturn", true},
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 2dc4f0b5481c..9bd66f84db5e 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -14,6 +14,7 @@
#include <linux/elf.h>
#include <linux/cpu.h>
#include <linux/ptrace.h>
+#include <linux/time_namespace.h>
#include <asm/pvclock.h>
#include <asm/vgtod.h>
#include <asm/proto.h>
@@ -23,6 +24,7 @@
#include <asm/desc.h>
#include <asm/cpufeature.h>
#include <clocksource/hyperv_timer.h>
+#include <asm/page.h>
#if defined(CONFIG_X86_64)
unsigned int __read_mostly vdso64_enabled = 1;
@@ -135,6 +137,16 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
if (tsc_pg && vclock_was_used(VCLOCK_HVCLOCK))
return vmf_insert_pfn(vma, vmf->address,
vmalloc_to_pfn(tsc_pg));
+ } else if (sym_offset == image->sym_timens_page) {
+ struct time_namespace *ns = current->nsproxy->time_ns;
+ unsigned long pfn;
+
+ if (!ns->offsets)
+ pfn = page_to_pfn(ZERO_PAGE(0));
+ else
+ pfn = page_to_pfn(virt_to_page(ns->offsets));
+
+ return vmf_insert_pfn(vma, vmf->address, pfn);
}
return VM_FAULT_SIGBUS;
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index dffdc12cc7d6..9d420c545607 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -21,6 +21,7 @@ struct vdso_image {
long sym_vvar_page;
long sym_pvclock_page;
long sym_hvclock_page;
+ long sym_timens_page;
long sym_VDSO32_NOTE_MASK;
long sym___kernel_sigreturn;
long sym___kernel_rt_sigreturn;
diff --git a/init/Kconfig b/init/Kconfig
index a7cbc9b470c7..9e40c07da4e1 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1071,6 +1071,7 @@ config UTS_NS
config TIME_NS
bool "TIME namespace"
+ depends on ARCH_HAS_VDSO_TIME_NS
default y
help
In this namespace boottime and monotonic clocks can be set.
diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c
index 2d1c1f241fd9..7525433f8ba4 100644
--- a/lib/vdso/gettimeofday.c
+++ b/lib/vdso/gettimeofday.c
@@ -7,6 +7,7 @@
#include <linux/time.h>
#include <linux/kernel.h>
#include <linux/hrtimer_defs.h>
+#include <linux/timens_offsets.h>
#include <vdso/datapage.h>
#include <vdso/helpers.h>
@@ -38,6 +39,48 @@ u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
}
#endif
+#ifdef CONFIG_TIME_NS
+extern u8 timens_page
+ __attribute__((visibility("hidden")));
+
+notrace static __always_inline void clk_to_ns(clockid_t clk, struct __kernel_timespec *ts)
+{
+ struct timens_offsets *timens = (struct timens_offsets *) &timens_page;
+ struct timespec64 *offset64;
+
+ switch (clk) {
+ case CLOCK_MONOTONIC:
+ case CLOCK_MONOTONIC_COARSE:
+ case CLOCK_MONOTONIC_RAW:
+ offset64 = &timens->monotonic;
+ break;
+ case CLOCK_BOOTTIME:
+ offset64 = &timens->boottime;
+ break;
+ default:
+ return;
+ }
+
+ /*
+ * The kernel allows to set a negative offset only if the current clock
+ * value in a namespace is positive, so the result tv_sec can't be
+ * negative here.
+ */
+ ts->tv_nsec += offset64->tv_nsec;
+ ts->tv_sec += offset64->tv_sec;
+ if (ts->tv_nsec >= NSEC_PER_SEC) {
+ ts->tv_nsec -= NSEC_PER_SEC;
+ ts->tv_sec++;
+ }
+ if (ts->tv_nsec < 0) {
+ ts->tv_nsec += NSEC_PER_SEC;
+ ts->tv_sec--;
+ }
+}
+#else
+notrace static __always_inline void clk_to_ns(clockid_t clk, struct __kernel_timespec *ts) {}
+#endif
+
static int do_hres(const struct vdso_data *vd, clockid_t clk,
struct __kernel_timespec *ts)
{
@@ -65,6 +108,8 @@ static int do_hres(const struct vdso_data *vd, clockid_t clk,
ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
ts->tv_nsec = ns;
+ clk_to_ns(clk, ts);
+
return 0;
}
@@ -79,6 +124,8 @@ static void do_coarse(const struct vdso_data *vd, clockid_t clk,
ts->tv_sec = vdso_ts->sec;
ts->tv_nsec = vdso_ts->nsec;
} while (unlikely(vdso_read_retry(vd, seq)));
+
+ clk_to_ns(clk, ts);
}
static __maybe_unused int
--
2.22.0
^ permalink raw reply related
* [PATCHv5 22/37] x86/vdso: Rename vdso_image {.data=>.text}
From: Dmitry Safonov @ 2019-07-29 21:57 UTC (permalink / raw)
To: linux-kernel
Cc: Dmitry Safonov, Dmitry Safonov, Adrian Reber, Andrei Vagin,
Andy Lutomirski, Arnd Bergmann, Christian Brauner,
Cyrill Gorcunov, Eric W. Biederman, H. Peter Anvin, Ingo Molnar,
Jann Horn, Jeff Dike, Oleg Nesterov, Pavel Emelyanov, Shuah Khan,
Thomas Gleixner, Vincenzo Frascino, containers, criu, linux-api,
x86
In-Reply-To: <20190729215758.28405-1-dima@arista.com>
To avoid any confusion with VVAR.
Co-developed-by: Andrei Vagin <avagin@openvz.org>
Signed-off-by: Andrei Vagin <avagin@openvz.org>
Signed-off-by: Dmitry Safonov <dima@arista.com>
---
arch/x86/entry/vdso/vdso2c.h | 2 +-
arch/x86/entry/vdso/vma.c | 6 +++---
arch/x86/include/asm/vdso.h | 2 +-
3 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/arch/x86/entry/vdso/vdso2c.h b/arch/x86/entry/vdso/vdso2c.h
index 80be339ee93e..7556bb70ed8b 100644
--- a/arch/x86/entry/vdso/vdso2c.h
+++ b/arch/x86/entry/vdso/vdso2c.h
@@ -158,7 +158,7 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
fprintf(outfile, "\n};\n\n");
fprintf(outfile, "const struct vdso_image %s = {\n", image_name);
- fprintf(outfile, "\t.data = raw_data,\n");
+ fprintf(outfile, "\t.text = raw_data,\n");
fprintf(outfile, "\t.size = %lu,\n", mapping_size);
if (alt_sec) {
fprintf(outfile, "\t.alt = %lu,\n",
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 3f05418642a8..2dc4f0b5481c 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -32,8 +32,8 @@ void __init init_vdso_image(const struct vdso_image *image)
{
BUG_ON(image->size % PAGE_SIZE != 0);
- apply_alternatives((struct alt_instr *)(image->data + image->alt),
- (struct alt_instr *)(image->data + image->alt +
+ apply_alternatives((struct alt_instr *)(image->text + image->alt),
+ (struct alt_instr *)(image->text + image->alt +
image->alt_len));
}
@@ -47,7 +47,7 @@ static vm_fault_t vdso_fault(const struct vm_special_mapping *sm,
if (!image || (vmf->pgoff << PAGE_SHIFT) >= image->size)
return VM_FAULT_SIGBUS;
- vmf->page = virt_to_page(image->data + (vmf->pgoff << PAGE_SHIFT));
+ vmf->page = virt_to_page(image->text + (vmf->pgoff << PAGE_SHIFT));
get_page(vmf->page);
return 0;
}
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index 230474e2ddb5..dffdc12cc7d6 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -11,7 +11,7 @@
#include <linux/mm_types.h>
struct vdso_image {
- void *data;
+ void *text;
unsigned long size; /* Always a multiple of PAGE_SIZE */
unsigned long alt, alt_len;
--
2.22.0
^ permalink raw reply related
* [PATCHv5 21/37] x86/vdso: Restrict splitting VVAR VMA
From: Dmitry Safonov @ 2019-07-29 21:57 UTC (permalink / raw)
To: linux-kernel
Cc: Dmitry Safonov, Dmitry Safonov, Adrian Reber, Andrei Vagin,
Andy Lutomirski, Arnd Bergmann, Christian Brauner,
Cyrill Gorcunov, Eric W. Biederman, H. Peter Anvin, Ingo Molnar,
Jann Horn, Jeff Dike, Oleg Nesterov, Pavel Emelyanov, Shuah Khan,
Thomas Gleixner, Vincenzo Frascino, containers, criu, linux-api,
x86
In-Reply-To: <20190729215758.28405-1-dima@arista.com>
Although, time namespace can work with VVAR VMA split, it seems worth
to forbid splitting VVAR resulting in stricter ABI and reducing amount
of corner-cases to consider while working further on VDSO.
I don't think there is any use-case for partial mremap() of vvar,
but if there is any - this patch can be easily reverted.
Co-developed-by: Andrei Vagin <avagin@openvz.org>
Signed-off-by: Andrei Vagin <avagin@openvz.org>
Signed-off-by: Dmitry Safonov <dima@arista.com>
---
arch/x86/entry/vdso/vma.c | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 349a61d8bf34..3f05418642a8 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -84,6 +84,18 @@ static int vdso_mremap(const struct vm_special_mapping *sm,
return 0;
}
+static int vvar_mremap(const struct vm_special_mapping *sm,
+ struct vm_area_struct *new_vma)
+{
+ unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
+ const struct vdso_image *image = new_vma->vm_mm->context.vdso_image;
+
+ if (new_size != -image->sym_vvar_start)
+ return -EINVAL;
+
+ return 0;
+}
+
static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
struct vm_area_struct *vma, struct vm_fault *vmf)
{
@@ -136,6 +148,7 @@ static const struct vm_special_mapping vdso_mapping = {
static const struct vm_special_mapping vvar_mapping = {
.name = "[vvar]",
.fault = vvar_fault,
+ .mremap = vvar_mremap,
};
/*
--
2.22.0
^ permalink raw reply related
* [PATCHv5 20/37] x86/vdso/Makefile: Add vobjs32
From: Dmitry Safonov @ 2019-07-29 21:57 UTC (permalink / raw)
To: linux-kernel
Cc: Dmitry Safonov, Dmitry Safonov, Adrian Reber, Andrei Vagin,
Andy Lutomirski, Arnd Bergmann, Christian Brauner,
Cyrill Gorcunov, Eric W. Biederman, H. Peter Anvin, Ingo Molnar,
Jann Horn, Jeff Dike, Oleg Nesterov, Pavel Emelyanov, Shuah Khan,
Thomas Gleixner, Vincenzo Frascino, containers, criu, linux-api,
x86
In-Reply-To: <20190729215758.28405-1-dima@arista.com>
Treat ia32/i386 objects in array the same As for 64-bit vdso objects.
This is a preparation ground to avoid code duplication on introduction
timens vdso.
Co-developed-by: Andrei Vagin <avagin@openvz.org>
Signed-off-by: Andrei Vagin <avagin@openvz.org>
Signed-off-by: Dmitry Safonov <dima@arista.com>
---
arch/x86/entry/vdso/Makefile | 15 +++++----------
1 file changed, 5 insertions(+), 10 deletions(-)
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 8df549138193..d4bffc4cabd1 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -24,6 +24,8 @@ VDSO32-$(CONFIG_IA32_EMULATION) := y
# files to link into the vdso
vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o
+vobjs32-y := vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o
+vobjs32-y += vdso32/vclock_gettime.o
# files to link into kernel
obj-y += vma.o
@@ -37,10 +39,12 @@ vdso_img-$(VDSO32-y) += 32
obj-$(VDSO32-y) += vdso32-setup.o
vobjs := $(foreach F,$(vobjs-y),$(obj)/$F)
+vobjs32 := $(foreach F,$(vobjs32-y),$(obj)/$F)
$(obj)/vdso.o: $(obj)/vdso.so
targets += vdso.lds $(vobjs-y)
+targets += vdso32/vdso32.lds $(vobjs32-y)
# Build the vDSO image C files and link them in.
vdso_img_objs := $(vdso_img-y:%=vdso-image-%.o)
@@ -131,10 +135,6 @@ $(obj)/vdsox32.so.dbg: $(obj)/vdsox32.lds $(vobjx32s) FORCE
CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -soname linux-gate.so.1
-targets += vdso32/vdso32.lds
-targets += vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o
-targets += vdso32/vclock_gettime.o
-
KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS)) -DBUILD_VDSO
$(obj)/vdso32.so.dbg: KBUILD_AFLAGS = $(KBUILD_AFLAGS_32)
$(obj)/vdso32.so.dbg: asflags-$(CONFIG_X86_64) += -m32
@@ -159,12 +159,7 @@ endif
$(obj)/vdso32.so.dbg: KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
-$(obj)/vdso32.so.dbg: FORCE \
- $(obj)/vdso32/vdso32.lds \
- $(obj)/vdso32/vclock_gettime.o \
- $(obj)/vdso32/note.o \
- $(obj)/vdso32/system_call.o \
- $(obj)/vdso32/sigreturn.o
+$(obj)/vdso32.so.dbg: $(obj)/vdso32/vdso32.lds $(vobjs32) FORCE
$(call if_changed,vdso_and_check)
#
--
2.22.0
^ permalink raw reply related
* [PATCHv5 19/37] x86/vdso2c: Convert iterator to unsigned
From: Dmitry Safonov @ 2019-07-29 21:57 UTC (permalink / raw)
To: linux-kernel
Cc: Dmitry Safonov, Dmitry Safonov, Adrian Reber, Andrei Vagin,
Andy Lutomirski, Arnd Bergmann, Christian Brauner,
Cyrill Gorcunov, Eric W. Biederman, H. Peter Anvin, Ingo Molnar,
Jann Horn, Jeff Dike, Oleg Nesterov, Pavel Emelyanov, Shuah Khan,
Thomas Gleixner, Vincenzo Frascino, containers, criu, linux-api,
x86
In-Reply-To: <20190729215758.28405-1-dima@arista.com>
i and j are used everywhere with unsigned types.
Cleanup and prettify the code a bit.
Introduce syms_nr for readability and as a preparation for allocating an
array of vDSO entries that will be needed for creating two vdso .so's:
one for host tasks and another for processes inside time namespace.
Co-developed-by: Andrei Vagin <avagin@openvz.org>
Signed-off-by: Andrei Vagin <avagin@openvz.org>
Signed-off-by: Dmitry Safonov <dima@arista.com>
---
arch/x86/entry/vdso/vdso2c.h | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/arch/x86/entry/vdso/vdso2c.h b/arch/x86/entry/vdso/vdso2c.h
index a20b134de2a8..80be339ee93e 100644
--- a/arch/x86/entry/vdso/vdso2c.h
+++ b/arch/x86/entry/vdso/vdso2c.h
@@ -13,7 +13,7 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
unsigned long load_size = -1; /* Work around bogus warning */
unsigned long mapping_size;
ELF(Ehdr) *hdr = (ELF(Ehdr) *)raw_addr;
- int i;
+ unsigned int i, syms_nr;
unsigned long j;
ELF(Shdr) *symtab_hdr = NULL, *strtab_hdr, *secstrings_hdr,
*alt_sec = NULL;
@@ -86,11 +86,10 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
strtab_hdr = raw_addr + GET_LE(&hdr->e_shoff) +
GET_LE(&hdr->e_shentsize) * GET_LE(&symtab_hdr->sh_link);
+ syms_nr = GET_LE(&symtab_hdr->sh_size) / GET_LE(&symtab_hdr->sh_entsize);
/* Walk the symbol table */
- for (i = 0;
- i < GET_LE(&symtab_hdr->sh_size) / GET_LE(&symtab_hdr->sh_entsize);
- i++) {
- int k;
+ for (i = 0; i < syms_nr; i++) {
+ unsigned int k;
ELF(Sym) *sym = raw_addr + GET_LE(&symtab_hdr->sh_offset) +
GET_LE(&symtab_hdr->sh_entsize) * i;
const char *sym_name = raw_addr +
--
2.22.0
^ permalink raw reply related
* [PATCHv5 18/37] x86/vdso2c: Correct err messages on file opening
From: Dmitry Safonov @ 2019-07-29 21:57 UTC (permalink / raw)
To: linux-kernel
Cc: Dmitry Safonov, Dmitry Safonov, Adrian Reber, Andrei Vagin,
Andy Lutomirski, Arnd Bergmann, Christian Brauner,
Cyrill Gorcunov, Eric W. Biederman, H. Peter Anvin, Ingo Molnar,
Jann Horn, Jeff Dike, Oleg Nesterov, Pavel Emelyanov, Shuah Khan,
Thomas Gleixner, Vincenzo Frascino, containers, criu, linux-api,
x86
In-Reply-To: <20190729215758.28405-1-dima@arista.com>
err() message in main() is misleading: it should print `outfilename`,
which is argv[3], not argv[2].
Correct error messages to be more precise about what failed and for
which file.
Co-developed-by: Andrei Vagin <avagin@openvz.org>
Signed-off-by: Andrei Vagin <avagin@openvz.org>
Signed-off-by: Dmitry Safonov <dima@arista.com>
---
arch/x86/entry/vdso/vdso2c.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/x86/entry/vdso/vdso2c.c b/arch/x86/entry/vdso/vdso2c.c
index 3a4d8d4d39f8..ce67370d14e5 100644
--- a/arch/x86/entry/vdso/vdso2c.c
+++ b/arch/x86/entry/vdso/vdso2c.c
@@ -184,7 +184,7 @@ static void map_input(const char *name, void **addr, size_t *len, int prot)
int fd = open(name, O_RDONLY);
if (fd == -1)
- err(1, "%s", name);
+ err(1, "open(%s)", name);
tmp_len = lseek(fd, 0, SEEK_END);
if (tmp_len == (off_t)-1)
@@ -237,7 +237,7 @@ int main(int argc, char **argv)
outfilename = argv[3];
outfile = fopen(outfilename, "w");
if (!outfile)
- err(1, "%s", argv[2]);
+ err(1, "fopen(%s)", outfilename);
go(raw_addr, raw_len, stripped_addr, stripped_len, outfile, name);
--
2.22.0
^ permalink raw reply related
* [PATCHv5 17/37] fd/proc: Respect boottime inside time namespace for /proc/uptime
From: Dmitry Safonov @ 2019-07-29 21:56 UTC (permalink / raw)
To: linux-kernel
Cc: Dmitry Safonov, Dmitry Safonov, Adrian Reber, Andrei Vagin,
Andy Lutomirski, Arnd Bergmann, Christian Brauner,
Cyrill Gorcunov, Eric W. Biederman, H. Peter Anvin, Ingo Molnar,
Jann Horn, Jeff Dike, Oleg Nesterov, Pavel Emelyanov, Shuah Khan,
Thomas Gleixner, Vincenzo Frascino, containers, criu, linux-api,
x86
In-Reply-To: <20190729215758.28405-1-dima@arista.com>
Co-developed-by: Andrei Vagin <avagin@openvz.org>
Signed-off-by: Andrei Vagin <avagin@openvz.org>
Signed-off-by: Dmitry Safonov <dima@arista.com>
---
fs/proc/uptime.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c
index a4c2791ab70b..5a1b228964fb 100644
--- a/fs/proc/uptime.c
+++ b/fs/proc/uptime.c
@@ -5,6 +5,7 @@
#include <linux/sched.h>
#include <linux/seq_file.h>
#include <linux/time.h>
+#include <linux/time_namespace.h>
#include <linux/kernel_stat.h>
static int uptime_proc_show(struct seq_file *m, void *v)
@@ -20,6 +21,8 @@ static int uptime_proc_show(struct seq_file *m, void *v)
nsec += (__force u64) kcpustat_cpu(i).cpustat[CPUTIME_IDLE];
ktime_get_boottime_ts64(&uptime);
+ timens_add_boottime(&uptime);
+
idle.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem);
idle.tv_nsec = rem;
seq_printf(m, "%lu.%02lu %lu.%02lu\n",
--
2.22.0
^ permalink raw reply related
* [PATCHv5 16/37] posix-timers: Make clock_nanosleep() time namespace aware
From: Dmitry Safonov @ 2019-07-29 21:56 UTC (permalink / raw)
To: linux-kernel
Cc: Dmitry Safonov, Andrei Vagin, Dmitry Safonov, Adrian Reber,
Andrei Vagin, Andy Lutomirski, Arnd Bergmann, Christian Brauner,
Cyrill Gorcunov, Eric W. Biederman, H. Peter Anvin, Ingo Molnar,
Jann Horn, Jeff Dike, Oleg Nesterov, Pavel Emelyanov, Shuah Khan,
Thomas Gleixner, Vincenzo Frascino, containers, criu, linux-api
In-Reply-To: <20190729215758.28405-1-dima@arista.com>
From: Andrei Vagin <avagin@gmail.com>
clock_nanosleep() accepts absolute values of expiration time, if the
TIMER_ABSTIME flag is set. This value is in the task time namespace,
which has to be converted to the host time namespace.
Signed-off-by: Andrei Vagin <avagin@openvz.org>
Co-developed-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: Dmitry Safonov <dima@arista.com>
---
kernel/time/posix-stubs.c | 12 ++++++++++--
kernel/time/posix-timers.c | 21 ++++++++++++++++++---
2 files changed, 28 insertions(+), 5 deletions(-)
diff --git a/kernel/time/posix-stubs.c b/kernel/time/posix-stubs.c
index 2ccefc9ce184..47ee2684d250 100644
--- a/kernel/time/posix-stubs.c
+++ b/kernel/time/posix-stubs.c
@@ -129,6 +129,7 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
struct __kernel_timespec __user *, rmtp)
{
struct timespec64 t;
+ ktime_t texp;
switch (which_clock) {
case CLOCK_REALTIME:
@@ -147,7 +148,10 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
rmtp = NULL;
current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE;
current->restart_block.nanosleep.rmtp = rmtp;
- return hrtimer_nanosleep(timespec64_to_ktime(t), flags & TIMER_ABSTIME ?
+ texp = timespec64_to_ktime(t);
+ if (flags & TIMER_ABSTIME)
+ texp = timens_ktime_to_host(clockid, texp);
+ return hrtimer_nanosleep(texp, flags & TIMER_ABSTIME ?
HRTIMER_MODE_ABS : HRTIMER_MODE_REL,
which_clock);
}
@@ -215,6 +219,7 @@ SYSCALL_DEFINE4(clock_nanosleep_time32, clockid_t, which_clock, int, flags,
struct old_timespec32 __user *, rmtp)
{
struct timespec64 t;
+ ktime texp;
switch (which_clock) {
case CLOCK_REALTIME:
@@ -233,7 +238,10 @@ SYSCALL_DEFINE4(clock_nanosleep_time32, clockid_t, which_clock, int, flags,
rmtp = NULL;
current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE;
current->restart_block.nanosleep.compat_rmtp = rmtp;
- return hrtimer_nanosleep(timespec64_to_ktime(t), flags & TIMER_ABSTIME ?
+ texp = timespec64_to_ktime(t);
+ if (flags & TIMER_ABSTIME)
+ texp = timens_ktime_to_host(clockid, texp);
+ return hrtimer_nanosleep(texp, flags & TIMER_ABSTIME ?
HRTIMER_MODE_ABS : HRTIMER_MODE_REL,
which_clock);
}
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index c979e720a5a1..4dc46d697144 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -1187,7 +1187,22 @@ SYSCALL_DEFINE2(clock_getres_time32, clockid_t, which_clock,
static int common_nsleep(const clockid_t which_clock, int flags,
const struct timespec64 *rqtp)
{
- return hrtimer_nanosleep(rqtp, flags & TIMER_ABSTIME ?
+ ktime_t texp = timespec64_to_ktime(*rqtp);
+
+ return hrtimer_nanosleep(texp, flags & TIMER_ABSTIME ?
+ HRTIMER_MODE_ABS : HRTIMER_MODE_REL,
+ which_clock);
+}
+
+static int common_nsleep_timens(const clockid_t which_clock, int flags,
+ const struct timespec64 *rqtp)
+{
+ ktime_t texp = timespec64_to_ktime(*rqtp);
+
+ if (flags & TIMER_ABSTIME)
+ texp = timens_ktime_to_host(which_clock, texp);
+
+ return hrtimer_nanosleep(texp, flags & TIMER_ABSTIME ?
HRTIMER_MODE_ABS : HRTIMER_MODE_REL,
which_clock);
}
@@ -1268,7 +1283,7 @@ static const struct k_clock clock_monotonic = {
.clock_getres = posix_get_hrtimer_res,
.clock_get_timespec = posix_get_timespec,
.clock_get_ktime = posix_get_ktime,
- .nsleep = common_nsleep,
+ .nsleep = common_nsleep_timens,
.timer_create = common_timer_create,
.timer_set = common_timer_set,
.timer_get = common_timer_get,
@@ -1315,7 +1330,7 @@ static const struct k_clock clock_boottime = {
.clock_getres = posix_get_hrtimer_res,
.clock_get_ktime = posix_get_boottime_ktime,
.clock_get_timespec = posix_get_boottime_timespec,
- .nsleep = common_nsleep,
+ .nsleep = common_nsleep_timens,
.timer_create = common_timer_create,
.timer_set = common_timer_set,
.timer_get = common_timer_get,
--
2.22.0
^ permalink raw reply related
* [PATCHv5 15/37] hrtimers: Prepare hrtimer_nanosleep() for time namespaces
From: Dmitry Safonov @ 2019-07-29 21:56 UTC (permalink / raw)
To: linux-kernel
Cc: Dmitry Safonov, Andrei Vagin, Dmitry Safonov, Adrian Reber,
Andrei Vagin, Andy Lutomirski, Arnd Bergmann, Christian Brauner,
Cyrill Gorcunov, Eric W. Biederman, H. Peter Anvin, Ingo Molnar,
Jann Horn, Jeff Dike, Oleg Nesterov, Pavel Emelyanov, Shuah Khan,
Thomas Gleixner, Vincenzo Frascino, containers, criu, linux-api
In-Reply-To: <20190729215758.28405-1-dima@arista.com>
From: Andrei Vagin <avagin@gmail.com>
clock_nanosleep() accepts absolute values of expiration time when
TIMER_ABSTIME flag is set. This absolute value is inside the task's
time namespace, and has to be converted to the host's time.
There is timens_ktime_to_host() helper for converting time, but
it accepts ktime argument.
As a preparation, make hrtimer_nanosleep() accept a clock value in ktime
instead of timespec64.
Signed-off-by: Andrei Vagin <avagin@openvz.org>
Co-developed-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: Dmitry Safonov <dima@arista.com>
---
include/linux/hrtimer.h | 2 +-
kernel/time/hrtimer.c | 8 ++++----
kernel/time/posix-stubs.c | 4 ++--
3 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 4971100a8cab..3285d75b5a0f 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -459,7 +459,7 @@ static inline u64 hrtimer_forward_now(struct hrtimer *timer,
/* Precise sleep: */
extern int nanosleep_copyout(struct restart_block *, struct timespec64 *);
-extern long hrtimer_nanosleep(const struct timespec64 *rqtp,
+extern long hrtimer_nanosleep(ktime_t rqtp,
const enum hrtimer_mode mode,
const clockid_t clockid);
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 5ee77f1a8a92..b67927c65410 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1716,7 +1716,7 @@ static long __sched hrtimer_nanosleep_restart(struct restart_block *restart)
return ret;
}
-long hrtimer_nanosleep(const struct timespec64 *rqtp,
+long hrtimer_nanosleep(ktime_t rqtp,
const enum hrtimer_mode mode, const clockid_t clockid)
{
struct restart_block *restart;
@@ -1729,7 +1729,7 @@ long hrtimer_nanosleep(const struct timespec64 *rqtp,
slack = 0;
hrtimer_init_on_stack(&t.timer, clockid, mode);
- hrtimer_set_expires_range_ns(&t.timer, timespec64_to_ktime(*rqtp), slack);
+ hrtimer_set_expires_range_ns(&t.timer, rqtp, slack);
ret = do_nanosleep(&t, mode);
if (ret != -ERESTART_RESTARTBLOCK)
goto out;
@@ -1764,7 +1764,7 @@ SYSCALL_DEFINE2(nanosleep, struct __kernel_timespec __user *, rqtp,
current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE;
current->restart_block.nanosleep.rmtp = rmtp;
- return hrtimer_nanosleep(&tu, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
+ return hrtimer_nanosleep(timespec64_to_ktime(tu), HRTIMER_MODE_REL, CLOCK_MONOTONIC);
}
#endif
@@ -1784,7 +1784,7 @@ SYSCALL_DEFINE2(nanosleep_time32, struct old_timespec32 __user *, rqtp,
current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE;
current->restart_block.nanosleep.compat_rmtp = rmtp;
- return hrtimer_nanosleep(&tu, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
+ return hrtimer_nanosleep(timespec64_to_ktime(tu), HRTIMER_MODE_REL, CLOCK_MONOTONIC);
}
#endif
diff --git a/kernel/time/posix-stubs.c b/kernel/time/posix-stubs.c
index edaf075d1ee4..2ccefc9ce184 100644
--- a/kernel/time/posix-stubs.c
+++ b/kernel/time/posix-stubs.c
@@ -147,7 +147,7 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
rmtp = NULL;
current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE;
current->restart_block.nanosleep.rmtp = rmtp;
- return hrtimer_nanosleep(&t, flags & TIMER_ABSTIME ?
+ return hrtimer_nanosleep(timespec64_to_ktime(t), flags & TIMER_ABSTIME ?
HRTIMER_MODE_ABS : HRTIMER_MODE_REL,
which_clock);
}
@@ -233,7 +233,7 @@ SYSCALL_DEFINE4(clock_nanosleep_time32, clockid_t, which_clock, int, flags,
rmtp = NULL;
current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE;
current->restart_block.nanosleep.compat_rmtp = rmtp;
- return hrtimer_nanosleep(&t, flags & TIMER_ABSTIME ?
+ return hrtimer_nanosleep(timespec64_to_ktime(t), flags & TIMER_ABSTIME ?
HRTIMER_MODE_ABS : HRTIMER_MODE_REL,
which_clock);
}
--
2.22.0
^ permalink raw reply related
* [PATCHv5 14/37] alarmtimer: Make nanosleep time namespace aware
From: Dmitry Safonov @ 2019-07-29 21:56 UTC (permalink / raw)
To: linux-kernel
Cc: Dmitry Safonov, Andrei Vagin, Dmitry Safonov, Adrian Reber,
Andrei Vagin, Andy Lutomirski, Arnd Bergmann, Christian Brauner,
Cyrill Gorcunov, Eric W. Biederman, H. Peter Anvin, Ingo Molnar,
Jann Horn, Jeff Dike, Oleg Nesterov, Pavel Emelyanov, Shuah Khan,
Thomas Gleixner, Vincenzo Frascino, containers, criu, linux-api
In-Reply-To: <20190729215758.28405-1-dima@arista.com>
From: Andrei Vagin <avagin@gmail.com>
clock_nanosleep() accepts absolute values of expiration time when
TIMER_ABSTIME flag is set. This absolute value is inside the task's
time namespace, and has to be converted to the host's time.
Signed-off-by: Andrei Vagin <avagin@openvz.org>
Co-developed-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: Dmitry Safonov <dima@arista.com>
---
kernel/time/alarmtimer.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index b1e82bb6cc6b..f21c743f6ede 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -825,6 +825,8 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags,
ktime_t now = alarm_bases[type].get_ktime();
exp = ktime_add_safe(now, exp);
+ } else {
+ exp = timens_ktime_to_host(which_clock, exp);
}
ret = alarmtimer_do_nsleep(&alarm, exp, type);
--
2.22.0
^ permalink raw reply related
* [PATCHv5 13/37] posix-timers: Make timer_settime() time namespace aware
From: Dmitry Safonov @ 2019-07-29 21:56 UTC (permalink / raw)
To: linux-kernel
Cc: Dmitry Safonov, Andrei Vagin, Dmitry Safonov, Adrian Reber,
Andrei Vagin, Andy Lutomirski, Arnd Bergmann, Christian Brauner,
Cyrill Gorcunov, Eric W. Biederman, H. Peter Anvin, Ingo Molnar,
Jann Horn, Jeff Dike, Oleg Nesterov, Pavel Emelyanov, Shuah Khan,
Thomas Gleixner, Vincenzo Frascino, containers, criu, linux-api
In-Reply-To: <20190729215758.28405-1-dima@arista.com>
From: Andrei Vagin <avagin@gmail.com>
Wire timer_settime() syscall into time namespace virtualization.
sys_timer_settime() calls the ktime->timer_set() callback. Right now,
common_timer_set() is the only implementation for the callback.
There user-supplied timer's value is converted from timespec64 to ktime
and then timens_ktime_to_host() can be used to convert namespace's time
to the host time.
Inside a time namespace kernel's time differ on a fixed offset from
a user-supplied, but only absolute values (TIMER_ABSTIME) must
be converted.
Signed-off-by: Andrei Vagin <avagin@openvz.org>
Co-developed-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: Dmitry Safonov <dima@arista.com>
---
kernel/time/posix-timers.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 265fbc816520..c979e720a5a1 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -857,6 +857,8 @@ int common_timer_set(struct k_itimer *timr, int flags,
timr->it_interval = timespec64_to_ktime(new_setting->it_interval);
expires = timespec64_to_ktime(new_setting->it_value);
+ if (flags & TIMER_ABSTIME)
+ expires = timens_ktime_to_host(timr->it_clock, expires);
sigev_none = timr->it_sigev_notify == SIGEV_NONE;
kc->timer_arm(timr, expires, flags & TIMER_ABSTIME, sigev_none);
--
2.22.0
^ permalink raw reply related
* [PATCHv5 12/37] timerfd: Make timerfd_settime() time namespace aware
From: Dmitry Safonov @ 2019-07-29 21:56 UTC (permalink / raw)
To: linux-kernel
Cc: Dmitry Safonov, Andrei Vagin, Dmitry Safonov, Adrian Reber,
Andrei Vagin, Andy Lutomirski, Arnd Bergmann, Christian Brauner,
Cyrill Gorcunov, Eric W. Biederman, H. Peter Anvin, Ingo Molnar,
Jann Horn, Jeff Dike, Oleg Nesterov, Pavel Emelyanov, Shuah Khan,
Thomas Gleixner, Vincenzo Frascino, containers, criu, linux-api
In-Reply-To: <20190729215758.28405-1-dima@arista.com>
From: Andrei Vagin <avagin@gmail.com>
timerfd_settime() accepts an absolute value of the experation time if
TFD_TIMER_ABSTIME is specified. This value is in task's time namespace
and has to be converted to the host's time namespace.
Signed-off-by: Andrei Vagin <avagin@gmail.com>
Co-developed-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: Dmitry Safonov <dima@arista.com>
---
fs/timerfd.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 6a6fc8aa1de7..9b0c2f65e7e8 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -26,6 +26,7 @@
#include <linux/syscalls.h>
#include <linux/compat.h>
#include <linux/rcupdate.h>
+#include <linux/time_namespace.h>
struct timerfd_ctx {
union {
@@ -196,6 +197,8 @@ static int timerfd_setup(struct timerfd_ctx *ctx, int flags,
}
if (texp != 0) {
+ if (flags & TFD_TIMER_ABSTIME)
+ texp = timens_ktime_to_host(clockid, texp);
if (isalarm(ctx)) {
if (flags & TFD_TIMER_ABSTIME)
alarm_start(&ctx->t.alarm, texp);
--
2.22.0
^ permalink raw reply related
* [PATCHv5 11/37] kernel: Add do_timens_ktime_to_host() helper
From: Dmitry Safonov @ 2019-07-29 21:56 UTC (permalink / raw)
To: linux-kernel
Cc: Dmitry Safonov, Andrei Vagin, Dmitry Safonov, Adrian Reber,
Andrei Vagin, Andy Lutomirski, Arnd Bergmann, Christian Brauner,
Cyrill Gorcunov, Eric W. Biederman, H. Peter Anvin, Ingo Molnar,
Jann Horn, Jeff Dike, Oleg Nesterov, Pavel Emelyanov, Shuah Khan,
Thomas Gleixner, Vincenzo Frascino, containers, criu, linux-api
In-Reply-To: <20190729215758.28405-1-dima@arista.com>
From: Andrei Vagin <avagin@gmail.com>
The helper subtracts namespace's clock offset from the given time
and checks that the result is in [0, KTIME_MAX].
Signed-off-by: Andrei Vagin <avagin@gmail.com>
Co-developed-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: Dmitry Safonov <dima@arista.com>
---
include/linux/time_namespace.h | 17 ++++++++++++++
kernel/time_namespace.c | 43 ++++++++++++++++++++++++++++++++++
2 files changed, 60 insertions(+)
diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h
index 334c1a1c6607..9ba9664ff0ab 100644
--- a/include/linux/time_namespace.h
+++ b/include/linux/time_namespace.h
@@ -56,6 +56,18 @@ static inline void timens_add_boottime(struct timespec64 *ts)
*ts = timespec64_add(*ts, ns_offsets->boottime);
}
+ktime_t do_timens_ktime_to_host(clockid_t clockid, ktime_t tim,
+ struct timens_offsets *offsets);
+static inline ktime_t timens_ktime_to_host(clockid_t clockid, ktime_t tim)
+{
+ struct timens_offsets *offsets = current->nsproxy->time_ns->offsets;
+
+ if (!offsets)
+ return tim;
+
+ return do_timens_ktime_to_host(clockid, tim, offsets);
+}
+
#else
static inline struct time_namespace *get_time_ns(struct time_namespace *ns)
{
@@ -82,6 +94,11 @@ static inline int timens_on_fork(struct nsproxy *nsproxy, struct task_struct *ts
static inline void timens_add_monotonic(struct timespec64 *ts) {}
static inline void timens_add_boottime(struct timespec64 *ts) {}
+
+static inline ktime_t timens_ktime_to_host(clockid_t clockid, ktime_t tim)
+{
+ return tim;
+}
#endif
#endif /* _LINUX_TIMENS_H */
diff --git a/kernel/time_namespace.c b/kernel/time_namespace.c
index f849c59f1108..9807c5c90cb2 100644
--- a/kernel/time_namespace.c
+++ b/kernel/time_namespace.c
@@ -16,6 +16,49 @@
#include <linux/err.h>
#include <linux/mm.h>
+ktime_t do_timens_ktime_to_host(clockid_t clockid, ktime_t tim,
+ struct timens_offsets *ns_offsets)
+{
+ ktime_t offset;
+
+ switch (clockid) {
+ case CLOCK_MONOTONIC:
+ offset = timespec64_to_ktime(ns_offsets->monotonic);
+ break;
+ case CLOCK_BOOTTIME:
+ case CLOCK_BOOTTIME_ALARM:
+ offset = timespec64_to_ktime(ns_offsets->boottime);
+ break;
+ default:
+ return tim;
+ }
+
+ /*
+ * Check that @tim value is in [offset, KTIME_MAX + offset]
+ * and subtract offset.
+ */
+ if (tim < offset) {
+ /*
+ * User can specify @tim *absolute* value - if it's lesser than
+ * the time namespace's offset - it's already expired.
+ */
+ tim = 0;
+ } else if (KTIME_MAX - tim < -offset) {
+ /*
+ * User-supplied @tim may be close or even equal KTIME_MAX
+ * and time namespace offset can be negative.
+ * Let's check (tim - offset) for an overflow.
+ * Return KTIME_MAX in such case, as the time value is
+ * thousands *years* in future anyway.
+ */
+ tim = KTIME_MAX;
+ } else {
+ tim = ktime_sub(tim, offset);
+ }
+
+ return tim;
+}
+
static struct ucounts *inc_time_namespaces(struct user_namespace *ns)
{
return inc_ucount(ns, current_euid(), UCOUNT_TIME_NAMESPACES);
--
2.22.0
^ permalink raw reply related
* [PATCHv5 10/37] posix-clocks: Introduce CLOCK_BOOTTIME time namespace offset
From: Dmitry Safonov @ 2019-07-29 21:56 UTC (permalink / raw)
To: linux-kernel
Cc: Dmitry Safonov, Andrei Vagin, Dmitry Safonov, Adrian Reber,
Andrei Vagin, Andy Lutomirski, Arnd Bergmann, Christian Brauner,
Cyrill Gorcunov, Eric W. Biederman, H. Peter Anvin, Ingo Molnar,
Jann Horn, Jeff Dike, Oleg Nesterov, Pavel Emelyanov, Shuah Khan,
Thomas Gleixner, Vincenzo Frascino, containers, criu, linux-api
In-Reply-To: <20190729215758.28405-1-dima@arista.com>
From: Andrei Vagin <avagin@gmail.com>
Adds boottime virtualisation for time namespace.
Introduce timespec for boottime clock into timens offsets and wire
clock_gettime() syscall.
Signed-off-by: Andrei Vagin <avagin@gmail.com>
Co-developed-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: Dmitry Safonov <dima@arista.com>
---
include/linux/time_namespace.h | 9 +++++++++
include/linux/timens_offsets.h | 1 +
kernel/time/alarmtimer.c | 1 +
kernel/time/posix-stubs.c | 1 +
kernel/time/posix-timers.c | 1 +
5 files changed, 13 insertions(+)
diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h
index 186c134fe222..334c1a1c6607 100644
--- a/include/linux/time_namespace.h
+++ b/include/linux/time_namespace.h
@@ -48,6 +48,14 @@ static inline void timens_add_monotonic(struct timespec64 *ts)
*ts = timespec64_add(*ts, ns_offsets->monotonic);
}
+static inline void timens_add_boottime(struct timespec64 *ts)
+{
+ struct timens_offsets *ns_offsets = current->nsproxy->time_ns->offsets;
+
+ if (ns_offsets)
+ *ts = timespec64_add(*ts, ns_offsets->boottime);
+}
+
#else
static inline struct time_namespace *get_time_ns(struct time_namespace *ns)
{
@@ -73,6 +81,7 @@ static inline int timens_on_fork(struct nsproxy *nsproxy, struct task_struct *ts
}
static inline void timens_add_monotonic(struct timespec64 *ts) {}
+static inline void timens_add_boottime(struct timespec64 *ts) {}
#endif
#endif /* _LINUX_TIMENS_H */
diff --git a/include/linux/timens_offsets.h b/include/linux/timens_offsets.h
index eaac2c82be5c..e93aabaa5e45 100644
--- a/include/linux/timens_offsets.h
+++ b/include/linux/timens_offsets.h
@@ -4,6 +4,7 @@
struct timens_offsets {
struct timespec64 monotonic;
+ struct timespec64 boottime;
};
#endif
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 55cb6e78d082..b1e82bb6cc6b 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -26,6 +26,7 @@
#include <linux/freezer.h>
#include <linux/compat.h>
#include <linux/module.h>
+#include <linux/time_namespace.h>
#include "posix-timers.h"
diff --git a/kernel/time/posix-stubs.c b/kernel/time/posix-stubs.c
index 17c67e0aecd8..edaf075d1ee4 100644
--- a/kernel/time/posix-stubs.c
+++ b/kernel/time/posix-stubs.c
@@ -82,6 +82,7 @@ int do_clock_gettime(clockid_t which_clock, struct timespec64 *tp)
break;
case CLOCK_BOOTTIME:
ktime_get_boottime_ts64(tp);
+ timens_add_boottime(tp);
break;
default:
return -EINVAL;
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index cfeb0477d1f9..265fbc816520 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -240,6 +240,7 @@ static int posix_get_coarse_res(const clockid_t which_clock, struct timespec64 *
int posix_get_boottime_timespec(const clockid_t which_clock, struct timespec64 *tp)
{
ktime_get_boottime_ts64(tp);
+ timens_add_boottime(tp);
return 0;
}
--
2.22.0
^ permalink raw reply related
* [PATCHv5 09/37] posix-clocks: Introduce CLOCK_MONOTONIC time namespace offsets
From: Dmitry Safonov @ 2019-07-29 21:56 UTC (permalink / raw)
To: linux-kernel
Cc: Dmitry Safonov, Andrei Vagin, Dmitry Safonov, Adrian Reber,
Andy Lutomirski, Arnd Bergmann, Christian Brauner,
Cyrill Gorcunov, Eric W. Biederman, H. Peter Anvin, Ingo Molnar,
Jann Horn, Jeff Dike, Oleg Nesterov, Pavel Emelyanov, Shuah Khan,
Thomas Gleixner, Vincenzo Frascino, containers, criu, linux-api,
x86
In-Reply-To: <20190729215758.28405-1-dima@arista.com>
From: Andrei Vagin <avagin@openvz.org>
Add monotonic time virtualisation for time namespace.
Introduce timespec for monotionic clock into timens offsets and wire
clock_gettime() syscall.
Signed-off-by: Andrei Vagin <avagin@openvz.org>
Co-developed-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: Dmitry Safonov <dima@arista.com>
---
include/linux/time_namespace.h | 8 ++++++++
include/linux/timens_offsets.h | 1 +
kernel/time/posix-stubs.c | 2 ++
kernel/time/posix-timers.c | 4 ++++
4 files changed, 15 insertions(+)
diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h
index b6985aa87479..186c134fe222 100644
--- a/include/linux/time_namespace.h
+++ b/include/linux/time_namespace.h
@@ -40,6 +40,13 @@ static inline void put_time_ns(struct time_namespace *ns)
kref_put(&ns->kref, free_time_ns);
}
+static inline void timens_add_monotonic(struct timespec64 *ts)
+{
+ struct timens_offsets *ns_offsets = current->nsproxy->time_ns->offsets;
+
+ if (ns_offsets)
+ *ts = timespec64_add(*ts, ns_offsets->monotonic);
+}
#else
static inline struct time_namespace *get_time_ns(struct time_namespace *ns)
@@ -65,6 +72,7 @@ static inline int timens_on_fork(struct nsproxy *nsproxy, struct task_struct *ts
return 0;
}
+static inline void timens_add_monotonic(struct timespec64 *ts) {}
#endif
#endif /* _LINUX_TIMENS_H */
diff --git a/include/linux/timens_offsets.h b/include/linux/timens_offsets.h
index 7d7cb68ea778..eaac2c82be5c 100644
--- a/include/linux/timens_offsets.h
+++ b/include/linux/timens_offsets.h
@@ -3,6 +3,7 @@
#define _LINUX_TIME_OFFSETS_H
struct timens_offsets {
+ struct timespec64 monotonic;
};
#endif
diff --git a/kernel/time/posix-stubs.c b/kernel/time/posix-stubs.c
index 67df65f887ac..17c67e0aecd8 100644
--- a/kernel/time/posix-stubs.c
+++ b/kernel/time/posix-stubs.c
@@ -14,6 +14,7 @@
#include <linux/ktime.h>
#include <linux/timekeeping.h>
#include <linux/posix-timers.h>
+#include <linux/time_namespace.h>
#include <linux/compat.h>
#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
@@ -77,6 +78,7 @@ int do_clock_gettime(clockid_t which_clock, struct timespec64 *tp)
break;
case CLOCK_MONOTONIC:
ktime_get_ts64(tp);
+ timens_add_monotonic(tp);
break;
case CLOCK_BOOTTIME:
ktime_get_boottime_ts64(tp);
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index aae7ab53790d..cfeb0477d1f9 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -30,6 +30,7 @@
#include <linux/hashtable.h>
#include <linux/compat.h>
#include <linux/nospec.h>
+#include <linux/time_namespace.h>
#include "timekeeping.h"
#include "posix-timers.h"
@@ -196,6 +197,7 @@ static int posix_clock_realtime_adj(const clockid_t which_clock,
int posix_get_timespec(clockid_t which_clock, struct timespec64 *tp)
{
ktime_get_ts64(tp);
+ timens_add_monotonic(tp);
return 0;
}
@@ -210,6 +212,7 @@ static ktime_t posix_get_ktime(clockid_t which_clock)
static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec64 *tp)
{
ktime_get_raw_ts64(tp);
+ timens_add_monotonic(tp);
return 0;
}
@@ -224,6 +227,7 @@ static int posix_get_monotonic_coarse(clockid_t which_clock,
struct timespec64 *tp)
{
ktime_get_coarse_ts64(tp);
+ timens_add_monotonic(tp);
return 0;
}
--
2.22.0
^ permalink raw reply related
* [PATCHv5 08/37] posix-timers: Use clock_get_ktime() in common_timer_get()
From: Dmitry Safonov @ 2019-07-29 21:56 UTC (permalink / raw)
To: linux-kernel
Cc: Dmitry Safonov, Andrei Vagin, Dmitry Safonov, Adrian Reber,
Andrei Vagin, Andy Lutomirski, Arnd Bergmann, Christian Brauner,
Cyrill Gorcunov, Eric W. Biederman, H. Peter Anvin, Ingo Molnar,
Jann Horn, Jeff Dike, Oleg Nesterov, Pavel Emelyanov, Shuah Khan,
Thomas Gleixner, Vincenzo Frascino, containers, criu, linux-api
In-Reply-To: <20190729215758.28405-1-dima@arista.com>
From: Andrei Vagin <avagin@gmail.com>
Now, when the clock_get_ktime() callback exists, the suboptimal
timespec64-based conversion can be removed from common_timer_get().
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrei Vagin <avagin@gmail.com>
Co-developed-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: Dmitry Safonov <dima@arista.com>
---
kernel/time/posix-timers.c | 8 +-------
1 file changed, 1 insertion(+), 7 deletions(-)
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index fb1848c84241..aae7ab53790d 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -666,7 +666,6 @@ void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting)
{
const struct k_clock *kc = timr->kclock;
ktime_t now, remaining, iv;
- struct timespec64 ts64;
bool sig_none;
sig_none = timr->it_sigev_notify == SIGEV_NONE;
@@ -684,12 +683,7 @@ void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting)
return;
}
- /*
- * The timespec64 based conversion is suboptimal, but it's not
- * worth to implement yet another callback.
- */
- kc->clock_get_timespec(timr->it_clock, &ts64);
- now = timespec64_to_ktime(ts64);
+ now = kc->clock_get_ktime(timr->it_clock);
/*
* When a requeue is pending or this is a SIGEV_NONE timer move the
--
2.22.0
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox