All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dmitry Safonov <dima@arista.com>
To: linux-kernel@vger.kernel.org
Cc: Dmitry Safonov <0x7f454c46@gmail.com>,
	Andrei Vagin <avagin@openvz.org>,
	Dmitry Safonov <dima@arista.com>, Adrian Reber <adrian@lisas.de>,
	Andy Lutomirski <luto@kernel.org>,
	Christian Brauner <christian.brauner@ubuntu.com>,
	Cyrill Gorcunov <gorcunov@openvz.org>,
	"Eric W. Biederman" <ebiederm@xmission.com>,
	"H. Peter Anvin" <hpa@zytor.com>, Ingo Molnar <mingo@redhat.com>,
	Jeff Dike <jdike@addtoit.com>, Oleg Nesterov <oleg@redhat.com>,
	Pavel Emelyanov <xemul@virtuozzo.com>,
	Shuah Khan <shuah@kernel.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	containers@lists.linux-foundation.org, criu@openvz.org,
	linux-api@vger.kernel.org, x86@kernel.org
Subject: [RFC 09/20] x86/vdso/timens: Add offsets page in vvar
Date: Wed, 19 Sep 2018 21:50:26 +0100	[thread overview]
Message-ID: <20180919205037.9574-10-dima@arista.com> (raw)
In-Reply-To: <20180919205037.9574-1-dima@arista.com>

From: Andrei Vagin <avagin@openvz.org>

As modern applications fetch time from vdso without entering the kernel,
it's needed to provide offsets for userspace code.

Allocate a page for timens offsets when constructing time namespace.
As vdso mappings are platform-specific, add Kconfig dependency for arch.

Signed-off-by: Andrei Vagin <avagin@openvz.org>
Co-developed-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: Dmitry Safonov <dima@arista.com>
---
 arch/Kconfig                          |  5 +++++
 arch/x86/Kconfig                      |  1 +
 arch/x86/entry/vdso/vclock_gettime.c  | 26 ++++++++++++++++++++++++++
 arch/x86/entry/vdso/vdso-layout.lds.S |  9 ++++++++-
 arch/x86/entry/vdso/vdso2c.c          |  3 +++
 arch/x86/entry/vdso/vma.c             | 12 ++++++++++++
 arch/x86/include/asm/vdso.h           |  1 +
 init/Kconfig                          |  1 +
 8 files changed, 57 insertions(+), 1 deletion(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 6801123932a5..411df0227a1d 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -681,6 +681,11 @@ config HAVE_ARCH_HASH
 config ISA_BUS_API
 	def_bool ISA
 
+config ARCH_HAS_VDSO_TIME_NS
+	bool
+	help
+	 VDSO can add time-ns offsets without entering kernel.
+
 #
 # ABI hall of shame
 #
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 1a0be022f91d..4bcbdd1f1200 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -70,6 +70,7 @@ config X86
 	select ARCH_HAS_STRICT_MODULE_RWX
 	select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
 	select ARCH_HAS_UBSAN_SANITIZE_ALL
+	select ARCH_HAS_VDSO_TIME_NS
 	select ARCH_HAS_ZONE_DEVICE		if X86_64
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select ARCH_MIGHT_HAVE_ACPI_PDC		if ACPI
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index f19856d95c60..0594266740b9 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -21,6 +21,7 @@
 #include <linux/math64.h>
 #include <linux/time.h>
 #include <linux/kernel.h>
+#include <linux/timens_offsets.h>
 
 #define gtod (&VVAR(vsyscall_gtod_data))
 
@@ -38,6 +39,11 @@ extern u8 hvclock_page
 	__attribute__((visibility("hidden")));
 #endif
 
+#ifdef CONFIG_TIME_NS
+extern u8 timens_page
+	__attribute__((visibility("hidden")));
+#endif
+
 #ifndef BUILD_VDSO32
 
 notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
@@ -225,6 +231,23 @@ notrace static int __always_inline do_realtime(struct timespec *ts)
 	return mode;
 }
 
+notrace static __always_inline void monotonic_to_ns(struct timespec *ts)
+{
+#ifdef CONFIG_TIME_NS
+	struct timens_offsets *timens = (struct timens_offsets *) &timens_page;
+
+	ts->tv_sec += timens->monotonic_time_offset.tv_sec;
+	ts->tv_nsec += timens->monotonic_time_offset.tv_nsec;
+	if (ts->tv_nsec > NSEC_PER_SEC) {
+		ts->tv_nsec -= NSEC_PER_SEC;
+		ts->tv_sec++;
+	} else if (ts->tv_nsec < 0) {
+		ts->tv_nsec += NSEC_PER_SEC;
+		ts->tv_sec--;
+	}
+#endif
+}
+
 notrace static int __always_inline do_monotonic(struct timespec *ts)
 {
 	unsigned long seq;
@@ -243,6 +266,8 @@ notrace static int __always_inline do_monotonic(struct timespec *ts)
 	ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
 	ts->tv_nsec = ns;
 
+	monotonic_to_ns(ts);
+
 	return mode;
 }
 
@@ -264,6 +289,7 @@ notrace static void do_monotonic_coarse(struct timespec *ts)
 		ts->tv_sec = gtod->monotonic_time_coarse_sec;
 		ts->tv_nsec = gtod->monotonic_time_coarse_nsec;
 	} while (unlikely(gtod_read_retry(gtod, seq)));
+	monotonic_to_ns(ts);
 }
 
 notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
diff --git a/arch/x86/entry/vdso/vdso-layout.lds.S b/arch/x86/entry/vdso/vdso-layout.lds.S
index acfd5ba7d943..e5c2e9deca03 100644
--- a/arch/x86/entry/vdso/vdso-layout.lds.S
+++ b/arch/x86/entry/vdso/vdso-layout.lds.S
@@ -17,6 +17,12 @@
 
 #define NUM_FAKE_SHDRS 13
 
+#ifdef CONFIG_TIME_NS
+# define TIMENS_SZ	PAGE_SIZE
+#else
+# define TIMENS_SZ	0
+#endif
+
 SECTIONS
 {
 	/*
@@ -26,7 +32,7 @@ SECTIONS
 	 * segment.
 	 */
 
-	vvar_start = . - 3 * PAGE_SIZE;
+	vvar_start = . - (3 * PAGE_SIZE + TIMENS_SZ);
 	vvar_page = vvar_start;
 
 	/* Place all vvars at the offsets in asm/vvar.h. */
@@ -38,6 +44,7 @@ SECTIONS
 
 	pvclock_page = vvar_start + PAGE_SIZE;
 	hvclock_page = vvar_start + 2 * PAGE_SIZE;
+	timens_page = vvar_start + 3 * PAGE_SIZE;
 
 	. = SIZEOF_HEADERS;
 
diff --git a/arch/x86/entry/vdso/vdso2c.c b/arch/x86/entry/vdso/vdso2c.c
index 4674f58581a1..6c67cde7fe99 100644
--- a/arch/x86/entry/vdso/vdso2c.c
+++ b/arch/x86/entry/vdso/vdso2c.c
@@ -76,6 +76,7 @@ enum {
 	sym_hpet_page,
 	sym_pvclock_page,
 	sym_hvclock_page,
+	sym_timens_page,
 	sym_VDSO_FAKE_SECTION_TABLE_START,
 	sym_VDSO_FAKE_SECTION_TABLE_END,
 };
@@ -85,6 +86,7 @@ const int special_pages[] = {
 	sym_hpet_page,
 	sym_pvclock_page,
 	sym_hvclock_page,
+	sym_timens_page,
 };
 
 struct vdso_sym {
@@ -98,6 +100,7 @@ struct vdso_sym required_syms[] = {
 	[sym_hpet_page] = {"hpet_page", true},
 	[sym_pvclock_page] = {"pvclock_page", true},
 	[sym_hvclock_page] = {"hvclock_page", true},
+	[sym_timens_page] = {"timens_page", true},
 	[sym_VDSO_FAKE_SECTION_TABLE_START] = {
 		"VDSO_FAKE_SECTION_TABLE_START", false
 	},
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 8cc0395687b0..0f92227a4a7e 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -14,6 +14,7 @@
 #include <linux/elf.h>
 #include <linux/cpu.h>
 #include <linux/ptrace.h>
+#include <linux/time_namespace.h>
 #include <asm/pvclock.h>
 #include <asm/vgtod.h>
 #include <asm/proto.h>
@@ -23,6 +24,7 @@
 #include <asm/desc.h>
 #include <asm/cpufeature.h>
 #include <asm/mshyperv.h>
+#include <asm/page.h>
 
 #if defined(CONFIG_X86_64)
 unsigned int __read_mostly vdso64_enabled = 1;
@@ -138,6 +140,16 @@ static int vvar_fault(const struct vm_special_mapping *sm,
 		if (tsc_pg && vclock_was_used(VCLOCK_HVCLOCK))
 			ret = vm_insert_pfn(vma, vmf->address,
 					    vmalloc_to_pfn(tsc_pg));
+	} else if (sym_offset == image->sym_timens_page) {
+		struct time_namespace *ns = current->nsproxy->time_ns;
+		unsigned long pfn;
+
+		if (!ns->offsets)
+			pfn = page_to_pfn(ZERO_PAGE(0));
+		else
+			pfn = page_to_pfn(virt_to_page(ns->offsets));
+
+		ret = vm_insert_pfn(vma, vmf->address, pfn);
 	}
 
 	if (ret == 0 || ret == -EBUSY)
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index 27566e57e87d..619322065b8e 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -22,6 +22,7 @@ struct vdso_image {
 	long sym_hpet_page;
 	long sym_pvclock_page;
 	long sym_hvclock_page;
+	long sym_timens_page;
 	long sym_VDSO32_NOTE_MASK;
 	long sym___kernel_sigreturn;
 	long sym___kernel_rt_sigreturn;
diff --git a/init/Kconfig b/init/Kconfig
index dc2b40f7d73f..c9b250475ddb 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -929,6 +929,7 @@ config UTS_NS
 
 config TIME_NS
 	bool "TIME namespace"
+	depends on ARCH_HAS_VDSO_TIME_NS
 	default y
 	help
 	  In this namespace boottime and monotonic clocks can be set.
-- 
2.13.6

  parent reply	other threads:[~2018-09-19 20:50 UTC|newest]

Thread overview: 164+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-09-19 20:50 [RFC 00/20] ns: Introduce Time Namespace Dmitry Safonov
2018-09-19 20:50 ` Dmitry Safonov
2018-09-19 20:50 ` Dmitry Safonov
2018-09-19 20:50 ` dima
2018-09-19 20:50 ` [RFC 01/20] " Dmitry Safonov
2018-09-28 18:20   ` Laurent Vivier
2018-09-19 20:50 ` [RFC 02/20] timens: Add timens_offsets Dmitry Safonov
2018-09-20 18:45   ` Cyrill Gorcunov
2018-09-20 22:14     ` Cyrill Gorcunov
2018-09-19 20:50 ` [RFC 03/20] timens: Introduce CLOCK_MONOTONIC offsets Dmitry Safonov
2018-09-19 20:50 ` [RFC 04/20] timens: Introduce CLOCK_BOOTTIME offset Dmitry Safonov
2018-09-30  3:18   ` [LKP] [timens] 3cc8de9dcb: RIP:posix_get_boottime kernel test robot
2018-09-30  3:18     ` kernel test robot
2018-09-30  3:18     ` kernel test robot
2018-09-19 20:50 ` [RFC 05/20] timerfd/timens: Take into account ns clock offsets Dmitry Safonov
2018-09-19 20:50 ` [RFC 06/20] kernel: Take into account timens clock offsets in clock_nanosleep Dmitry Safonov
2018-09-19 20:50 ` [RFC 07/20] timens: Shift /proc/uptime Dmitry Safonov
2018-09-19 20:50 ` [RFC 08/20] x86/vdso: Restrict splitting vvar vma Dmitry Safonov
2018-09-19 20:50 ` Dmitry Safonov [this message]
2018-09-19 20:50 ` [RFC 10/20] x86/vdso: Use set_normalized_timespec() to avoid 32 bit overflow Dmitry Safonov
2018-09-19 20:50 ` [RFC 11/20] x86/vdso: Purge timens page on setns()/unshare()/clone() Dmitry Safonov
2018-09-19 20:50 ` [RFC 12/20] x86/vdso: Look for vvar vma to purge timens page Dmitry Safonov
2018-09-19 20:50 ` [RFC 13/20] posix-timers/timens: Take into account clock offsets Dmitry Safonov
2018-09-30  3:11   ` [LKP] [posix] 25217c6e39: BUG:KASAN:null-ptr-deref_in_c kernel test robot
2018-09-30  3:11     ` kernel test robot
2018-09-30  3:11     ` kernel test robot
2018-09-19 20:50 ` [RFC 14/20] timens: Add align for timens_offsets Dmitry Safonov
2018-09-19 20:50 ` [RFC 15/20] timens: Optimize zero-offsets Dmitry Safonov
2018-09-19 20:50 ` [RFC 16/20] selftest: Add Time Namespace test for supported clocks Dmitry Safonov
2018-09-19 20:50   ` Dmitry Safonov
2018-09-19 20:50   ` dima
2018-09-24 21:36   ` Shuah Khan
2018-09-24 21:36     ` Shuah Khan
2018-09-24 21:36     ` shuah
2018-09-19 20:50 ` [RFC 17/20] selftest/timens: Add test for timerfd Dmitry Safonov
2018-09-19 20:50   ` Dmitry Safonov
2018-09-19 20:50   ` dima
2018-09-19 20:50 ` [RFC 18/20] selftest/timens: Add test for clock_nanosleep Dmitry Safonov
2018-09-19 20:50   ` Dmitry Safonov
2018-09-19 20:50   ` dima
2018-09-19 20:50 ` [RFC 19/20] timens/selftest: Add procfs selftest Dmitry Safonov
2018-09-19 20:50   ` Dmitry Safonov
2018-09-19 20:50   ` dima
2018-09-19 20:50 ` [RFC 20/20] timens/selftest: Add timer offsets test Dmitry Safonov
2018-09-19 20:50   ` Dmitry Safonov
2018-09-19 20:50   ` dima
2018-09-21 12:27 ` [RFC 00/20] ns: Introduce Time Namespace Eric W. Biederman
2018-09-21 12:27   ` Eric W. Biederman
2018-09-21 12:27   ` ebiederm
2018-09-24 20:51   ` Andrey Vagin
2018-09-24 20:51     ` Andrey Vagin
2018-09-24 20:51     ` Andrey Vagin
2018-09-24 20:51     ` avagin
2018-09-24 22:02     ` Eric W. Biederman
2018-09-24 22:02       ` Eric W. Biederman
2018-09-24 22:02       ` Eric W. Biederman
2018-09-24 22:02       ` ebiederm
2018-09-25  1:42       ` Andrey Vagin
2018-09-25  1:42         ` Andrey Vagin
2018-09-25  1:42         ` Andrey Vagin
2018-09-25  1:42         ` avagin
2018-09-26 17:36         ` Eric W. Biederman
2018-09-26 17:36           ` Eric W. Biederman
2018-09-26 17:36           ` Eric W. Biederman
2018-09-26 17:36           ` ebiederm
2018-09-26 17:59           ` Dmitry Safonov
2018-09-26 17:59             ` Dmitry Safonov
2018-09-26 17:59             ` Dmitry Safonov
2018-09-26 17:59             ` 0x7f454c46
2018-09-27 21:30           ` Thomas Gleixner
2018-09-27 21:30             ` Thomas Gleixner
2018-09-27 21:30             ` Thomas Gleixner
2018-09-27 21:30             ` tglx
2018-09-27 21:41             ` Thomas Gleixner
2018-09-27 21:41               ` Thomas Gleixner
2018-09-27 21:41               ` Thomas Gleixner
2018-09-27 21:41               ` tglx
2018-10-01 23:20               ` Andrey Vagin
2018-10-01 23:20                 ` Andrey Vagin
2018-10-01 23:20                 ` Andrey Vagin
2018-10-01 23:20                 ` avagin
2018-10-02  6:15                 ` Thomas Gleixner
2018-10-02  6:15                   ` Thomas Gleixner
2018-10-02  6:15                   ` Thomas Gleixner
2018-10-02  6:15                   ` tglx
2018-10-02 21:05                   ` Dmitry Safonov
2018-10-02 21:05                     ` Dmitry Safonov
2018-10-02 21:05                     ` 0x7f454c46
2018-10-02 21:26                     ` Thomas Gleixner
2018-10-02 21:26                       ` Thomas Gleixner
2018-10-02 21:26                       ` tglx
2018-09-28 17:03             ` Eric W. Biederman
2018-09-28 17:03               ` Eric W. Biederman
2018-09-28 17:03               ` Eric W. Biederman
2018-09-28 17:03               ` ebiederm
2018-09-28 19:32               ` Thomas Gleixner
2018-09-28 19:32                 ` Thomas Gleixner
2018-09-28 19:32                 ` Thomas Gleixner
2018-09-28 19:32                 ` tglx
2018-10-01  9:05                 ` Eric W. Biederman
2018-10-01  9:05                   ` Eric W. Biederman
2018-10-01  9:05                   ` Eric W. Biederman
2018-10-01  9:05                   ` ebiederm
2018-10-01  9:15                 ` Setting monotonic time? Eric W. Biederman
2018-10-01  9:15                   ` Eric W. Biederman
2018-10-01  9:15                   ` Eric W. Biederman
2018-10-01  9:15                   ` ebiederm
2018-10-01 18:52                   ` Thomas Gleixner
2018-10-01 18:52                     ` Thomas Gleixner
2018-10-01 18:52                     ` Thomas Gleixner
2018-10-01 18:52                     ` tglx
2018-10-02 20:00                     ` Arnd Bergmann
2018-10-02 20:00                       ` Arnd Bergmann
2018-10-02 20:00                       ` arnd
2018-10-02 20:06                       ` Thomas Gleixner
2018-10-02 20:06                         ` Thomas Gleixner
2018-10-02 20:06                         ` tglx
2018-10-03  4:50                         ` Eric W. Biederman
2018-10-03  4:50                           ` Eric W. Biederman
2018-10-03  4:50                           ` ebiederm
2018-10-03  5:25                           ` Thomas Gleixner
2018-10-03  5:25                             ` Thomas Gleixner
2018-10-03  5:25                             ` tglx
2018-10-03  6:14                             ` Eric W. Biederman
2018-10-03  6:14                               ` Eric W. Biederman
2018-10-03  6:14                               ` ebiederm
2018-10-03  7:02                               ` Arnd Bergmann
2018-10-03  7:02                                 ` Arnd Bergmann
2018-10-03  7:02                                 ` arnd
2018-10-03  6:14                             ` Thomas Gleixner
2018-10-03  6:14                               ` Thomas Gleixner
2018-10-03  6:14                               ` tglx
2018-10-01 20:51                   ` Andrey Vagin
2018-10-01 20:51                     ` Andrey Vagin
2018-10-01 20:51                     ` Andrey Vagin
2018-10-01 20:51                     ` avagin
2018-10-02  6:16                     ` Thomas Gleixner
2018-10-02  6:16                       ` Thomas Gleixner
2018-10-02  6:16                       ` Thomas Gleixner
2018-10-02  6:16                       ` tglx
2018-10-21  1:41               ` [RFC 00/20] ns: Introduce Time Namespace Andrei Vagin
2018-10-21  1:41                 ` Andrei Vagin
2018-10-21  1:41                 ` Andrei Vagin
2018-10-21  1:41                 ` avagin
2018-10-21  3:54                 ` Andrei Vagin
2018-10-21  3:54                   ` Andrei Vagin
2018-10-21  3:54                   ` Andrei Vagin
2018-10-21  3:54                   ` avagin
2018-10-29 20:33                 ` Thomas Gleixner
2018-10-29 20:33                   ` Thomas Gleixner
2018-10-29 20:33                   ` Thomas Gleixner
2018-10-29 20:33                   ` tglx
2018-10-29 21:21                   ` Eric W. Biederman
2018-10-29 21:21                     ` Eric W. Biederman
2018-10-29 21:21                     ` Eric W. Biederman
2018-10-29 21:21                     ` ebiederm
2018-10-29 21:36                     ` Thomas Gleixner
2018-10-29 21:36                       ` Thomas Gleixner
2018-10-29 21:36                       ` Thomas Gleixner
2018-10-29 21:36                       ` tglx
2018-10-31 16:26                   ` Andrei Vagin
2018-10-31 16:26                     ` Andrei Vagin
2018-10-31 16:26                     ` Andrei Vagin
2018-10-31 16:26                     ` avagin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180919205037.9574-10-dima@arista.com \
    --to=dima@arista.com \
    --cc=0x7f454c46@gmail.com \
    --cc=adrian@lisas.de \
    --cc=avagin@openvz.org \
    --cc=christian.brauner@ubuntu.com \
    --cc=containers@lists.linux-foundation.org \
    --cc=criu@openvz.org \
    --cc=ebiederm@xmission.com \
    --cc=gorcunov@openvz.org \
    --cc=hpa@zytor.com \
    --cc=jdike@addtoit.com \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=mingo@redhat.com \
    --cc=oleg@redhat.com \
    --cc=shuah@kernel.org \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    --cc=xemul@virtuozzo.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.