* [PATCH v4 6/9] epoll: Add implementation for epoll_pwait1
From: Fam Zheng @ 2015-03-10 1:49 UTC (permalink / raw)
To: linux-kernel
Cc: Thomas Gleixner, Ingo Molnar, H. Peter Anvin, x86, Alexander Viro,
Andrew Morton, Kees Cook, Andy Lutomirski, David Herrmann,
Alexei Starovoitov, Miklos Szeredi, David Drysdale, Oleg Nesterov,
David S. Miller, Vivek Goyal, Mike Frysinger, Theodore Ts'o,
Heiko Carstens, Rasmus Villemoes, Rashika Kheria, Hugh Dickins,
Mathieu Desnoyers, Fam Zheng, Peter Zijlstra <peter>
In-Reply-To: <1425952155-27603-1-git-send-email-famz@redhat.com>
This is the new implementation for poll which has a flags parameter and
packs a number of parameters into a structure.
The main advantage of it over existing epoll_pwait is about timeout:
epoll_pwait expects a relative millisecond value, while epoll_pwait1
accepts 1) a timespec which is in nanosecond granularity; 2) a clockid
to allow using a clock other than CLOCK_MONOTONIC.
The 'flags' field in params is reserved for now and must be zero. The
next step would be allowing absolute timeout value.
Signed-off-by: Fam Zheng <famz@redhat.com>
---
fs/eventpoll.c | 39 ++++++++++++++++++++++++++++++++++++++-
include/linux/syscalls.h | 5 +++++
include/uapi/linux/eventpoll.h | 8 ++++++++
3 files changed, 51 insertions(+), 1 deletion(-)
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 54dc63f..06a59fc 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -2085,7 +2085,6 @@ SYSCALL_DEFINE4(epoll_ctl_batch, int, epfd, int, flags,
cmd_size = sizeof(struct epoll_ctl_cmd) * ncmds;
/* TODO: optimize for small arguments like select/poll with a stack
* allocated buffer */
-
kcmds = kmalloc(cmd_size, GFP_KERNEL);
if (!kcmds)
return -ENOMEM;
@@ -2119,6 +2118,44 @@ out:
return ret;
}
+SYSCALL_DEFINE5(epoll_pwait1, int, epfd, int, flags,
+ struct epoll_event __user *, events,
+ int, maxevents,
+ struct epoll_wait_params __user *, params)
+{
+ struct epoll_wait_params p;
+ ktime_t kt = { 0 };
+ sigset_t sigmask;
+ struct timespec timeout;
+
+ if (flags)
+ return -EINVAL;
+ if (!params)
+ return -EINVAL;
+ if (copy_from_user(&p, params, sizeof(p)))
+ return -EFAULT;
+ if (p.size != sizeof(p))
+ return -EINVAL;
+ if (p.sigmask) {
+ if (copy_from_user(&sigmask, p.sigmask, sizeof(sigmask)))
+ return -EFAULT;
+ if (p.sigsetsize != sizeof(p.sigmask))
+ return -EINVAL;
+ }
+ if (p.timeout) {
+ if (copy_from_user(&timeout, p.timeout, sizeof(timeout)))
+ return -EFAULT;
+ if (!timespec_valid(&timeout))
+ return -EINVAL;
+ kt = timespec_to_ktime(timeout);
+ } else {
+ kt = ns_to_ktime(-1);
+ }
+
+ return epoll_pwait_do(epfd, events, maxevents, p.clockid,
+ kt, p.sigmask ? &sigmask : NULL);
+}
+
#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd,
struct epoll_event __user *, events,
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 7d784e3..a4823d9 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -13,6 +13,7 @@
struct epoll_event;
struct epoll_ctl_cmd;
+struct epoll_wait_params;
struct iattr;
struct inode;
struct iocb;
@@ -635,6 +636,10 @@ asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events,
int maxevents, int timeout,
const sigset_t __user *sigmask,
size_t sigsetsize);
+asmlinkage long sys_epoll_pwait1(int epfd, int flags,
+ struct epoll_event __user *events,
+ int maxevents,
+ struct epoll_wait_params __user *params);
asmlinkage long sys_epoll_ctl_batch(int epfd, int flags,
int ncmds,
struct epoll_ctl_cmd __user *cmds);
diff --git a/include/uapi/linux/eventpoll.h b/include/uapi/linux/eventpoll.h
index 4e18b17..05ae035 100644
--- a/include/uapi/linux/eventpoll.h
+++ b/include/uapi/linux/eventpoll.h
@@ -72,6 +72,14 @@ struct epoll_ctl_cmd {
int result;
} EPOLL_PACKED;
+struct epoll_wait_params {
+ int size;
+ int clockid;
+ struct timespec *timeout;
+ sigset_t *sigmask;
+ size_t sigsetsize;
+} EPOLL_PACKED;
+
#ifdef CONFIG_PM_SLEEP
static inline void ep_take_care_of_epollwakeup(struct epoll_event *epev)
{
--
1.9.3
^ permalink raw reply related
* [PATCH v4 5/9] x86: Hook up epoll_ctl_batch syscall
From: Fam Zheng @ 2015-03-10 1:49 UTC (permalink / raw)
To: linux-kernel
Cc: Thomas Gleixner, Ingo Molnar, H. Peter Anvin, x86, Alexander Viro,
Andrew Morton, Kees Cook, Andy Lutomirski, David Herrmann,
Alexei Starovoitov, Miklos Szeredi, David Drysdale, Oleg Nesterov,
David S. Miller, Vivek Goyal, Mike Frysinger, Theodore Ts'o,
Heiko Carstens, Rasmus Villemoes, Rashika Kheria, Hugh Dickins,
Mathieu Desnoyers, Fam Zheng, Peter Zijlstra <peter>
In-Reply-To: <1425952155-27603-1-git-send-email-famz@redhat.com>
Signed-off-by: Fam Zheng <famz@redhat.com>
---
arch/x86/syscalls/syscall_32.tbl | 1 +
arch/x86/syscalls/syscall_64.tbl | 1 +
2 files changed, 2 insertions(+)
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index b3560ec..fe809f6 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -365,3 +365,4 @@
356 i386 memfd_create sys_memfd_create
357 i386 bpf sys_bpf
358 i386 execveat sys_execveat stub32_execveat
+359 i386 epoll_ctl_batch sys_epoll_ctl_batch
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index 8d656fb..67b2ea4 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -329,6 +329,7 @@
320 common kexec_file_load sys_kexec_file_load
321 common bpf sys_bpf
322 64 execveat stub_execveat
+323 64 epoll_ctl_batch sys_epoll_ctl_batch
#
# x32-specific system call numbers start at 512 to avoid cache impact
--
1.9.3
^ permalink raw reply related
* [PATCH v4 4/9] epoll: Add implementation for epoll_ctl_batch
From: Fam Zheng @ 2015-03-10 1:49 UTC (permalink / raw)
To: linux-kernel
Cc: Thomas Gleixner, Ingo Molnar, H. Peter Anvin, x86, Alexander Viro,
Andrew Morton, Kees Cook, Andy Lutomirski, David Herrmann,
Alexei Starovoitov, Miklos Szeredi, David Drysdale, Oleg Nesterov,
David S. Miller, Vivek Goyal, Mike Frysinger, Theodore Ts'o,
Heiko Carstens, Rasmus Villemoes, Rashika Kheria, Hugh Dickins,
Mathieu Desnoyers, Fam Zheng, Peter Zijlstra <peter>
In-Reply-To: <1425952155-27603-1-git-send-email-famz@redhat.com>
This new syscall is a batched version of epoll_ctl. It will execute each
command as specified in cmds in given order, and stop at first failure
or upon completion of all commands.
Signed-off-by: Fam Zheng <famz@redhat.com>
---
fs/eventpoll.c | 50 ++++++++++++++++++++++++++++++++++++++++++
include/linux/syscalls.h | 4 ++++
include/uapi/linux/eventpoll.h | 11 ++++++++++
3 files changed, 65 insertions(+)
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 7909c88..54dc63f 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -99,6 +99,8 @@
#define EP_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event))
+#define EP_MAX_BATCH (INT_MAX / sizeof(struct epoll_ctl_cmd))
+
#define EP_UNACTIVE_PTR ((void *) -1L)
#define EP_ITEM_COST (sizeof(struct epitem) + sizeof(struct eppoll_entry))
@@ -2069,6 +2071,54 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events,
sigmask ? &ksigmask : NULL);
}
+SYSCALL_DEFINE4(epoll_ctl_batch, int, epfd, int, flags,
+ int, ncmds, struct epoll_ctl_cmd __user *, cmds)
+{
+ struct epoll_ctl_cmd *kcmds = NULL;
+ int i, ret = 0;
+ size_t cmd_size;
+
+ if (flags)
+ return -EINVAL;
+ if (!cmds || ncmds <= 0 || ncmds > EP_MAX_BATCH)
+ return -EINVAL;
+ cmd_size = sizeof(struct epoll_ctl_cmd) * ncmds;
+ /* TODO: optimize for small arguments like select/poll with a stack
+ * allocated buffer */
+
+ kcmds = kmalloc(cmd_size, GFP_KERNEL);
+ if (!kcmds)
+ return -ENOMEM;
+ if (copy_from_user(kcmds, cmds, cmd_size)) {
+ ret = -EFAULT;
+ goto out;
+ }
+ for (i = 0; i < ncmds; i++) {
+ struct epoll_event ev = (struct epoll_event) {
+ .events = kcmds[i].events,
+ .data = kcmds[i].data,
+ };
+ if (kcmds[i].flags) {
+ kcmds[i].result = -EINVAL;
+ goto copy;
+ }
+ kcmds[i].result = ep_ctl_do(epfd, kcmds[i].op,
+ kcmds[i].fd, ev);
+ if (kcmds[i].result)
+ goto copy;
+ ret++;
+ }
+copy:
+ /* We lose the number of succeeded commands in favor of returning
+ * -EFAULT, but in this case the application will want to fix the
+ * memory bug first. */
+ if (copy_to_user(cmds, kcmds, cmd_size))
+ ret = -EFAULT;
+out:
+ kfree(kcmds);
+ return ret;
+}
+
#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd,
struct epoll_event __user *, events,
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 76d1e38..7d784e3 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -12,6 +12,7 @@
#define _LINUX_SYSCALLS_H
struct epoll_event;
+struct epoll_ctl_cmd;
struct iattr;
struct inode;
struct iocb;
@@ -634,6 +635,9 @@ asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events,
int maxevents, int timeout,
const sigset_t __user *sigmask,
size_t sigsetsize);
+asmlinkage long sys_epoll_ctl_batch(int epfd, int flags,
+ int ncmds,
+ struct epoll_ctl_cmd __user *cmds);
asmlinkage long sys_gethostname(char __user *name, int len);
asmlinkage long sys_sethostname(char __user *name, int len);
asmlinkage long sys_setdomainname(char __user *name, int len);
diff --git a/include/uapi/linux/eventpoll.h b/include/uapi/linux/eventpoll.h
index bc81fb2..4e18b17 100644
--- a/include/uapi/linux/eventpoll.h
+++ b/include/uapi/linux/eventpoll.h
@@ -18,6 +18,8 @@
#include <linux/fcntl.h>
#include <linux/types.h>
+#include <linux/signal.h>
+
/* Flags for epoll_create1. */
#define EPOLL_CLOEXEC O_CLOEXEC
@@ -61,6 +63,15 @@ struct epoll_event {
__u64 data;
} EPOLL_PACKED;
+struct epoll_ctl_cmd {
+ int flags;
+ int op;
+ int fd;
+ __u32 events;
+ __u64 data;
+ int result;
+} EPOLL_PACKED;
+
#ifdef CONFIG_PM_SLEEP
static inline void ep_take_care_of_epollwakeup(struct epoll_event *epev)
{
--
1.9.3
^ permalink raw reply related
* [PATCH v4 3/9] epoll: Extract ep_ctl_do
From: Fam Zheng @ 2015-03-10 1:49 UTC (permalink / raw)
To: linux-kernel
Cc: Thomas Gleixner, Ingo Molnar, H. Peter Anvin, x86, Alexander Viro,
Andrew Morton, Kees Cook, Andy Lutomirski, David Herrmann,
Alexei Starovoitov, Miklos Szeredi, David Drysdale, Oleg Nesterov,
David S. Miller, Vivek Goyal, Mike Frysinger, Theodore Ts'o,
Heiko Carstens, Rasmus Villemoes, Rashika Kheria, Hugh Dickins,
Mathieu Desnoyers, Fam Zheng, Peter Zijlstra <peter>
In-Reply-To: <1425952155-27603-1-git-send-email-famz@redhat.com>
This is the common part from epoll_ctl implementation which will be
shared with the new syscall.
Signed-off-by: Fam Zheng <famz@redhat.com>
---
fs/eventpoll.c | 26 ++++++++++++++++++--------
1 file changed, 18 insertions(+), 8 deletions(-)
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 957d1d0..7909c88 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1810,22 +1810,15 @@ SYSCALL_DEFINE1(epoll_create, int, size)
* the eventpoll file that enables the insertion/removal/change of
* file descriptors inside the interest set.
*/
-SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
- struct epoll_event __user *, event)
+int ep_ctl_do(int epfd, int op, int fd, struct epoll_event epds)
{
int error;
int full_check = 0;
struct fd f, tf;
struct eventpoll *ep;
struct epitem *epi;
- struct epoll_event epds;
struct eventpoll *tep = NULL;
- error = -EFAULT;
- if (ep_op_has_event(op) &&
- copy_from_user(&epds, event, sizeof(struct epoll_event)))
- goto error_return;
-
error = -EBADF;
f = fdget(epfd);
if (!f.file)
@@ -1947,6 +1940,23 @@ error_return:
return error;
}
+/*
+ * The following function implements the controller interface for
+ * the eventpoll file that enables the insertion/removal/change of
+ * file descriptors inside the interest set.
+ */
+SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
+ struct epoll_event __user *, event)
+{
+ struct epoll_event epds;
+
+ if (ep_op_has_event(op) &&
+ copy_from_user(&epds, event, sizeof(struct epoll_event)))
+ return -EFAULT;
+
+ return ep_ctl_do(epfd, op, fd, epds);
+}
+
static inline int epoll_wait_do(int epfd, struct epoll_event __user *events,
int maxevents, int clockid,
const ktime_t timeout)
--
1.9.3
^ permalink raw reply related
* [PATCH v4 2/9] epoll: Specify clockid explicitly
From: Fam Zheng @ 2015-03-10 1:49 UTC (permalink / raw)
To: linux-kernel
Cc: Thomas Gleixner, Ingo Molnar, H. Peter Anvin, x86, Alexander Viro,
Andrew Morton, Kees Cook, Andy Lutomirski, David Herrmann,
Alexei Starovoitov, Miklos Szeredi, David Drysdale, Oleg Nesterov,
David S. Miller, Vivek Goyal, Mike Frysinger, Theodore Ts'o,
Heiko Carstens, Rasmus Villemoes, Rashika Kheria, Hugh Dickins,
Mathieu Desnoyers, Fam Zheng, Peter Zijlstra <peter>
In-Reply-To: <1425952155-27603-1-git-send-email-famz@redhat.com>
Later we will add clockid in the interface, so let's start using explicit
clockid internally. Now we specify CLOCK_MONOTONIC, which is the same as before.
Signed-off-by: Fam Zheng <famz@redhat.com>
---
fs/eventpoll.c | 29 +++++++++++++++++------------
1 file changed, 17 insertions(+), 12 deletions(-)
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 7dfabeb..957d1d0 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1570,7 +1570,7 @@ static int ep_send_events(struct eventpoll *ep,
* error code, in case of error.
*/
static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
- int maxevents, const ktime_t timeout)
+ int maxevents, int clockid, const ktime_t timeout)
{
int res = 0, eavail, timed_out = 0;
unsigned long flags;
@@ -1578,6 +1578,8 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
wait_queue_t wait;
ktime_t expires, *to = NULL;
+ if (clockid != CLOCK_MONOTONIC && clockid != CLOCK_REALTIME)
+ return -EINVAL;
if (!ktime_to_ns(timeout)) {
/*
* Avoid the unnecessary trip to the wait queue loop, if the
@@ -1624,7 +1626,8 @@ fetch_events:
}
spin_unlock_irqrestore(&ep->lock, flags);
- if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))
+ if (!schedule_hrtimeout_range_clock(to, slack,
+ HRTIMER_MODE_ABS, clockid))
timed_out = 1;
spin_lock_irqsave(&ep->lock, flags);
@@ -1945,7 +1948,8 @@ error_return:
}
static inline int epoll_wait_do(int epfd, struct epoll_event __user *events,
- int maxevents, const ktime_t timeout)
+ int maxevents, int clockid,
+ const ktime_t timeout)
{
int error;
struct fd f;
@@ -1979,7 +1983,7 @@ static inline int epoll_wait_do(int epfd, struct epoll_event __user *events,
ep = f.file->private_data;
/* Time to fish for events ... */
- error = ep_poll(ep, events, maxevents, timeout);
+ error = ep_poll(ep, events, maxevents, clockid, timeout);
error_fput:
fdput(f);
@@ -1994,12 +1998,13 @@ SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events,
int, maxevents, int, timeout)
{
ktime_t kt = ms_to_ktime(timeout);
- return epoll_wait_do(epfd, events, maxevents, kt);
+ return epoll_wait_do(epfd, events, maxevents, CLOCK_MONOTONIC, kt);
}
static inline int epoll_pwait_do(int epfd, struct epoll_event __user *events,
- int maxevents, ktime_t timeout,
- sigset_t *sigmask, size_t sigsetsize)
+ int maxevents,
+ int clockid, ktime_t timeout,
+ sigset_t *sigmask)
{
int error;
sigset_t sigsaved;
@@ -2013,7 +2018,7 @@ static inline int epoll_pwait_do(int epfd, struct epoll_event __user *events,
set_current_blocked(sigmask);
}
- error = epoll_wait_do(epfd, events, maxevents, timeout);
+ error = epoll_wait_do(epfd, events, maxevents, clockid, timeout);
/*
* If we changed the signal mask, we need to restore the original one.
@@ -2050,8 +2055,8 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events,
if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
return -EFAULT;
}
- return epoll_pwait_do(epfd, events, maxevents, kt,
- sigmask ? &ksigmask : NULL, sigsetsize);
+ return epoll_pwait_do(epfd, events, maxevents, CLOCK_MONOTONIC, kt,
+ sigmask ? &ksigmask : NULL);
}
#ifdef CONFIG_COMPAT
@@ -2073,8 +2078,8 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd,
sigset_from_compat(&ksigmask, &csigmask);
}
- return epoll_pwait_do(epfd, events, maxevents, kt,
- sigmask ? &ksigmask : NULL, sigsetsize);
+ return epoll_pwait_do(epfd, events, maxevents, CLOCK_MONOTONIC, kt,
+ sigmask ? &ksigmask : NULL);
}
#endif
--
1.9.3
^ permalink raw reply related
* [PATCH v4 1/9] epoll: Extract epoll_wait_do and epoll_pwait_do
From: Fam Zheng @ 2015-03-10 1:49 UTC (permalink / raw)
To: linux-kernel
Cc: Thomas Gleixner, Ingo Molnar, H. Peter Anvin, x86, Alexander Viro,
Andrew Morton, Kees Cook, Andy Lutomirski, David Herrmann,
Alexei Starovoitov, Miklos Szeredi, David Drysdale, Oleg Nesterov,
David S. Miller, Vivek Goyal, Mike Frysinger, Theodore Ts'o,
Heiko Carstens, Rasmus Villemoes, Rashika Kheria, Hugh Dickins,
Mathieu Desnoyers, Fam Zheng, Peter Zijlstra <peter>
In-Reply-To: <1425952155-27603-1-git-send-email-famz@redhat.com>
In preparation of new epoll syscalls, this patch allows reusing the code from
epoll_pwait implementation. The new functions uses ktime_t for more accuracy.
Signed-off-by: Fam Zheng <famz@redhat.com>
---
fs/eventpoll.c | 154 ++++++++++++++++++++++++++-------------------------------
1 file changed, 71 insertions(+), 83 deletions(-)
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 1e009ca..7dfabeb 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1554,17 +1554,6 @@ static int ep_send_events(struct eventpoll *ep,
return ep_scan_ready_list(ep, ep_send_events_proc, &esed, 0, false);
}
-static inline struct timespec ep_set_mstimeout(long ms)
-{
- struct timespec now, ts = {
- .tv_sec = ms / MSEC_PER_SEC,
- .tv_nsec = NSEC_PER_MSEC * (ms % MSEC_PER_SEC),
- };
-
- ktime_get_ts(&now);
- return timespec_add_safe(now, ts);
-}
-
/**
* ep_poll - Retrieves ready events, and delivers them to the caller supplied
* event buffer.
@@ -1573,17 +1562,15 @@ static inline struct timespec ep_set_mstimeout(long ms)
* @events: Pointer to the userspace buffer where the ready events should be
* stored.
* @maxevents: Size (in terms of number of events) of the caller event buffer.
- * @timeout: Maximum timeout for the ready events fetch operation, in
- * milliseconds. If the @timeout is zero, the function will not block,
- * while if the @timeout is less than zero, the function will block
- * until at least one event has been retrieved (or an error
- * occurred).
+ * @timeout: Maximum timeout for the ready events fetch operation. If 0, the
+ * function will not block. If negative, the function will block until
+ * at least one event has been retrieved (or an error occurred).
*
* Returns: Returns the number of ready events which have been fetched, or an
* error code, in case of error.
*/
static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
- int maxevents, long timeout)
+ int maxevents, const ktime_t timeout)
{
int res = 0, eavail, timed_out = 0;
unsigned long flags;
@@ -1591,13 +1578,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
wait_queue_t wait;
ktime_t expires, *to = NULL;
- if (timeout > 0) {
- struct timespec end_time = ep_set_mstimeout(timeout);
-
- slack = select_estimate_accuracy(&end_time);
- to = &expires;
- *to = timespec_to_ktime(end_time);
- } else if (timeout == 0) {
+ if (!ktime_to_ns(timeout)) {
/*
* Avoid the unnecessary trip to the wait queue loop, if the
* caller specified a non blocking operation.
@@ -1605,6 +1586,15 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
timed_out = 1;
spin_lock_irqsave(&ep->lock, flags);
goto check_events;
+ } else if (ktime_to_ns(timeout) > 0) {
+ struct timespec now, end_time;
+
+ ktime_get_ts(&now);
+ end_time = timespec_add_safe(now, ktime_to_timespec(timeout));
+
+ slack = select_estimate_accuracy(&end_time);
+ to = &expires;
+ *to = timespec_to_ktime(end_time);
}
fetch_events:
@@ -1954,12 +1944,8 @@ error_return:
return error;
}
-/*
- * Implement the event wait interface for the eventpoll file. It is the kernel
- * part of the user space epoll_wait(2).
- */
-SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events,
- int, maxevents, int, timeout)
+static inline int epoll_wait_do(int epfd, struct epoll_event __user *events,
+ int maxevents, const ktime_t timeout)
{
int error;
struct fd f;
@@ -2002,46 +1988,70 @@ error_fput:
/*
* Implement the event wait interface for the eventpoll file. It is the kernel
+ * part of the user space epoll_wait(2).
+ */
+SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events,
+ int, maxevents, int, timeout)
+{
+ ktime_t kt = ms_to_ktime(timeout);
+ return epoll_wait_do(epfd, events, maxevents, kt);
+}
+
+static inline int epoll_pwait_do(int epfd, struct epoll_event __user *events,
+ int maxevents, ktime_t timeout,
+ sigset_t *sigmask, size_t sigsetsize)
+{
+ int error;
+ sigset_t sigsaved;
+
+ /*
+ * If the caller wants a certain signal mask to be set during the wait,
+ * we apply it here.
+ */
+ if (sigmask) {
+ sigsaved = current->blocked;
+ set_current_blocked(sigmask);
+ }
+
+ error = epoll_wait_do(epfd, events, maxevents, timeout);
+
+ /*
+ * If we changed the signal mask, we need to restore the original one.
+ * In case we've got a signal while waiting, we do not restore the
+ * signal mask yet, and we allow do_signal() to deliver the signal on
+ * the way back to userspace, before the signal mask is restored.
+ */
+ if (sigmask) {
+ if (error == -EINTR) {
+ memcpy(¤t->saved_sigmask, &sigsaved,
+ sizeof(sigsaved));
+ set_restore_sigmask();
+ } else
+ set_current_blocked(&sigsaved);
+ }
+
+ return error;
+}
+
+/*
+ * Implement the event wait interface for the eventpoll file. It is the kernel
* part of the user space epoll_pwait(2).
*/
SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events,
int, maxevents, int, timeout, const sigset_t __user *, sigmask,
size_t, sigsetsize)
{
- int error;
- sigset_t ksigmask, sigsaved;
+ ktime_t kt = ms_to_ktime(timeout);
+ sigset_t ksigmask;
- /*
- * If the caller wants a certain signal mask to be set during the wait,
- * we apply it here.
- */
if (sigmask) {
if (sigsetsize != sizeof(sigset_t))
return -EINVAL;
if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
return -EFAULT;
- sigsaved = current->blocked;
- set_current_blocked(&ksigmask);
}
-
- error = sys_epoll_wait(epfd, events, maxevents, timeout);
-
- /*
- * If we changed the signal mask, we need to restore the original one.
- * In case we've got a signal while waiting, we do not restore the
- * signal mask yet, and we allow do_signal() to deliver the signal on
- * the way back to userspace, before the signal mask is restored.
- */
- if (sigmask) {
- if (error == -EINTR) {
- memcpy(¤t->saved_sigmask, &sigsaved,
- sizeof(sigsaved));
- set_restore_sigmask();
- } else
- set_current_blocked(&sigsaved);
- }
-
- return error;
+ return epoll_pwait_do(epfd, events, maxevents, kt,
+ sigmask ? &ksigmask : NULL, sigsetsize);
}
#ifdef CONFIG_COMPAT
@@ -2051,42 +2061,20 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd,
const compat_sigset_t __user *, sigmask,
compat_size_t, sigsetsize)
{
- long err;
compat_sigset_t csigmask;
- sigset_t ksigmask, sigsaved;
+ sigset_t ksigmask;
+ ktime_t kt = ms_to_ktime(timeout);
- /*
- * If the caller wants a certain signal mask to be set during the wait,
- * we apply it here.
- */
if (sigmask) {
if (sigsetsize != sizeof(compat_sigset_t))
return -EINVAL;
if (copy_from_user(&csigmask, sigmask, sizeof(csigmask)))
return -EFAULT;
sigset_from_compat(&ksigmask, &csigmask);
- sigsaved = current->blocked;
- set_current_blocked(&ksigmask);
}
- err = sys_epoll_wait(epfd, events, maxevents, timeout);
-
- /*
- * If we changed the signal mask, we need to restore the original one.
- * In case we've got a signal while waiting, we do not restore the
- * signal mask yet, and we allow do_signal() to deliver the signal on
- * the way back to userspace, before the signal mask is restored.
- */
- if (sigmask) {
- if (err == -EINTR) {
- memcpy(¤t->saved_sigmask, &sigsaved,
- sizeof(sigsaved));
- set_restore_sigmask();
- } else
- set_current_blocked(&sigsaved);
- }
-
- return err;
+ return epoll_pwait_do(epfd, events, maxevents, kt,
+ sigmask ? &ksigmask : NULL, sigsetsize);
}
#endif
--
1.9.3
^ permalink raw reply related
* [PATCH v4 0/9] epoll: Introduce new syscalls, epoll_ctl_batch and epoll_pwait1
From: Fam Zheng @ 2015-03-10 1:49 UTC (permalink / raw)
To: linux-kernel-u79uwXL29TY76Z2rM5mHXA
Cc: Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
x86-DgEjT+Ai2ygdnm+yROfE0A, Alexander Viro, Andrew Morton,
Kees Cook, Andy Lutomirski, David Herrmann, Alexei Starovoitov,
Miklos Szeredi, David Drysdale, Oleg Nesterov, David S. Miller,
Vivek Goyal, Mike Frysinger, Theodore Ts'o, Heiko Carstens,
Rasmus Villemoes, Rashika Kheria, Hugh Dickins, Mathieu Desnoyers,
Fam Zheng, Peter Zijlstra <peter>
Changes from v3:
- Add "size" field in epoll_wait_params. [Jon, Ingo, Seymour]
- Input validation for ncmds in epoll_ctl_batch. [Dan]
- Return -EFAULT if copy_to_user failed in epoll_ctl_batch. [Omar, Michael]
- Change "timeout" in epoll_wait_params to pointer, to get the same
convention of 'no wait', 'wait indefinitely' and 'wait for specified time'
with epoll_pwait. [Seymour]
- Add compat implementation of epoll_pwait1.
Justification
=============
QEMU, among many select/poll based applications, considers epoll as an
alternative when its event loop needs to handle a big number of FDs. However,
there are currently two concerns with epoll which prevents the switching:
The major one is the timeout precision. For example in QEMU, the main loop
takes care of calling callbacks at a specific timeout - the QEMU timer API. The
timeout value in ppoll depends on the next firing timer. epoll_pwait's
millisecond timeout is so coarse that rounding up the timeout will hurt
performance badly.
The minor one is the number of system call to update fd set. While epoll can
handle a large number of fds quickly, it still requires one epoll_ctl per fd
update, compared to the one-shot call to select/poll with an fd array. This may
as well make epoll inferior to ppoll in the cases where a small, but frequently
changing set of fds are polled by the event loop.
This series introduces two new epoll sys calls to address them respectively.
The idea of epoll_ctl_batch is suggested by Andy Lutomirski in [1], who also
suggested clockid as a parameter in epoll_pwait1.
[1]: http://lists.openwall.net/linux-kernel/2015/01/08/542
Benchmark for epoll_pwait1
==========================
By running fio tests inside VM with both original and modified QEMU, we can
compare their difference in performance.
With a small VM setup [t1], the original QEMU (ppoll based) has an 4k read
latency overhead around 37 us. In this setup, the main loop polls 10~20 fds.
With a slightly larger VM instance [t2] - attached a virtio-serial device so
that there are 80~90 fds in the main loop - the original QEMU has a latency
overhead around 49 us. By adding more such devices [t3], we can see the latency
go even higher - 83 us with ~200 FDs.
Now modify QEMU to use epoll_pwait1 and test again, the latency numbers are
repectively 36us, 37us, 47us for t1, t2 and t3.
Previous Changelogs
===================
Changes from v2 (https://lkml.org/lkml/2015/2/4/105)
----------------------------------------------------
- Rename epoll_ctl_cmd.error_hint to "result". [Michael]
- Add background introduction in cover letter. [Michael]
- Expand the last struct of epoll_pwait1, add clockid and timespec.
- Update man page in cover letter accordingly:
* "error_hint" -> "result".
* The result field's caveat in "RETURN VALUE" secion of epoll_ctl_batch.
Please review!
Changes from v1 (https://lkml.org/lkml/2015/1/20/189)
-----------------------------------------------------
- As discussed in previous thread [1], split the call to epoll_ctl_batch and
epoll_pwait. [Michael]
- Fix memory leaks. [Omar]
- Add a short comment about the ignored copy_to_user failure. [Omar]
- Cover letter rewritten.
Documentation of the new system calls
=====================================
1) epoll_ctl_batch
------------------
NAME
epoll_ctl_batch - batch control interface for an epoll descriptor
SYNOPSIS
#include <sys/epoll.h>
int epoll_ctl_batch(int epfd, int flags,
int ncmds, struct epoll_ctl_cmd *cmds);
DESCRIPTION
This system call is an extension of epoll_ctl(). The primary difference
is that this system call allows you to batch multiple operations with
the one system call. This provides a more efficient interface for
updating events on this epoll file descriptor epfd.
The flags argument is reserved and must be 0.
The argument ncmds is the number of cmds entries being passed in.
This number must be greater than 0.
Each operation is specified as an element in the cmds array, defined as:
struct epoll_ctl_cmd {
/* Reserved flags for future extension, must be 0. */
int flags;
/* The same as epoll_ctl() op parameter. */
int op;
/* The same as epoll_ctl() fd parameter. */
int fd;
/* The same as the "events" field in struct epoll_event. */
uint32_t events;
/* The same as the "data" field in struct epoll_event. */
uint64_t data;
/* Output field, will be set to the return code after this
* command is executed by kernel */
int result;
};
This system call is not atomic when updating the epoll descriptor. All
entries in cmds are executed in the provided order. If any cmds entry
fails to be processed, no further entries are processed and the number
of successfully processed entries is returned.
Each single operation defined by a struct epoll_ctl_cmd has the same
semantics as an epoll_ctl(2) call. See the epoll_ctl() manual page for
more information about how to correctly setup the members of a struct
epoll_ctl_cmd.
Upon completion of the call the result member of each struct
epoll_ctl_cmd may be set to 0 (sucessfully completed) or an error code
depending on the result of the command. If the kernel fails to change
the result (for example the location of the cmds argument is fully or
partly read only) the result member of each struct epoll_ctl_cmd may be
unchanged.
RETURN VALUE
epoll_ctl_batch() returns a number greater than 0 to indicate the number
of cmnd entries processed. If all entries have been processed this will
equal the ncmds parameter passed in.
If one or more parameters are incorrect the value returned is -1 with
errno set appropriately - no cmds entries have been processed when this
happens.
If processing any entry in the cmds argument results in an error, the
number returned is the index of the failing entry - this number will be
less than ncmds. Since ncmds must be greater than 0, a return value of 0
indicates an error associated with the very first cmds entry. A return
value of 0 does not indicate a successful system call.
To correctly test the return value from epoll_ctl_batch() use code
similar to the following:
ret = epoll_ctl_batch(epfd, flags, ncmds, &cmds);
if (ret < ncmds) {
if (ret == -1) {
/* An argument was invalid */
} else {
/* ret contains the number of successful entries
* processed. If you (mis?)use it as a C index it
* will index directly to the failing entry to
* get the result use cmds[ret].result which may
* contain the errno value associated with the
* entry.
*/
}
} else {
/* Success */
}
ERRORS
EINVAL flags is non-zero; ncmds is less than or equal to zero, or
greater than (INT_MAX / sizeof(struct epoll_ctl_cmd); cmds is
NULL;
ENOMEM There was insufficient memory to handle the requested op control
operation.
EFAULT The memory area pointed to by cmds is not accessible.
In the event that the return value is not the same as the ncmds
parameter, the result member of the failing struct epoll_ctl_cmd will
contain a negative errno value related to the error, unless the memory
area is not writable (EFAULT returned). The errno values that can be set
are those documented on the epoll_ctl(2) manual page.
CONFORMING TO
epoll_ctl_batch() is Linux-specific.
SEE ALSO
epoll_create(2), epoll_ctl(2), epoll_wait(2), epoll_pwait(2), epoll(7)
2) epoll_pwait1
---------------
NAME
epoll_pwait1 - wait for an I/O event on an epoll file descriptor
SYNOPSIS
#include <sys/epoll.h>
int epoll_pwait1(int epfd, int flags,
struct epoll_event *events,
int maxevents,
struct epoll_wait_params *params);
DESCRIPTION
The epoll_pwait1() syscall has more elaborate parameters compared to
epoll_pwait(), in order to allow fine control of the wait.
The epfd, events and maxevents parameters are the same
as in epoll_wait() and epoll_pwait(). The flags and params are new.
The flags is reserved and must be zero.
The params is a pointer to a struct epoll_wait_params which is
defined as:
struct epoll_wait_params {
int clockid;
struct timespec *timeout;
sigset_t *sigmask;
size_t sigsetsize;
};
The clockid member must be either CLOCK_REALTIME or CLOCK_MONOTONIC.
This will choose the clock type to use for timeout. This differs to
epoll_pwait(2) which has an implicit clock type of CLOCK_MONOTONIC.
The timeout member specifies the minimum time that epoll_wait(2) will
block. The time spent waiting will be rounded up to the clock
granularity. Kernel scheduling delays mean that the blocking
interval may overrun by a small amount. Specifying NULL will cause
causes epoll_pwait1(2) to block indefinitely. Specifying a timeout
equal to zero (both tv_sec and tv_nsec are zero) causes epoll_pwait1(2)
to return immediately, even if no events are available.
Both sigmask and sigsetsize have the same semantics as epoll_pwait(2).
The sigmask field may be specified as NULL, in which case
epoll_pwait1(2) will behave like epoll_wait(2).
User visibility of sigsetsize
In epoll_pwait(2) and other syscalls, sigsetsize is not visible to
an application developer as glibc has a wrapper around epoll_pwait(2).
Now we pack several parameters in epoll_wait_params. In
order to hide sigsetsize from application code this system call also
needs to be wrapped either by expanding parameters and building the
structure in the wrapper function, or by only asking application to
provide this part of the structure:
struct epoll_wait_params_user {
int clockid;
struct timespec *timeout;
sigset_t *sigmask;
};
In the wrapper function it would be copied to a full structure with
sigsetsize filled in.
RETURN VALUE
When successful, epoll_wait1() returns the number of file descriptors
ready for the requested I/O, or zero if no file descriptor became ready
during the requested timeout nanoseconds. When an error occurs,
epoll_wait1() returns -1 and errno is set appropriately.
ERRORS
This system call can set errno to the same values as epoll_pwait(2),
as well as the following additional reasons:
EINVAL flags is not zero, or clockid is not one of CLOCK_REALTIME or
CLOCK_MONOTONIC, or the timespec data pointed to by timeout is
not valid.
EFAULT The memory area pointed to by params, params.sigmask or
params.timeout is not accessible.
CONFORMING TO
epoll_pwait1() is Linux-specific.
SEE ALSO
epoll_create(2), epoll_ctl(2), epoll_wait(2), epoll_pwait(2), epoll(7)
Fam Zheng (9):
epoll: Extract epoll_wait_do and epoll_pwait_do
epoll: Specify clockid explicitly
epoll: Extract ep_ctl_do
epoll: Add implementation for epoll_ctl_batch
x86: Hook up epoll_ctl_batch syscall
epoll: Add implementation for epoll_pwait1
x86: Hook up epoll_pwait1 syscall
epoll: Add compat version implementation of epoll_pwait1
x86: Hook up 32 bit compat epoll_pwait1 syscall
arch/x86/syscalls/syscall_32.tbl | 2 +
arch/x86/syscalls/syscall_64.tbl | 2 +
fs/eventpoll.c | 308 ++++++++++++++++++++++++++++-----------
include/linux/compat.h | 6 +
include/linux/syscalls.h | 9 ++
include/uapi/linux/eventpoll.h | 19 +++
6 files changed, 262 insertions(+), 84 deletions(-)
--
1.9.3
^ permalink raw reply
* Re: [PATCH v3 7/7] selftests/mount: Use implicit rules
From: Shuah Khan @ 2015-03-09 23:41 UTC (permalink / raw)
To: Michael Ellerman
Cc: linux-kernel-u79uwXL29TY76Z2rM5mHXA,
davej-rdkfGonbjUTCLXcRTR1eJlpr/1R2p/CL, mmarek-AlSwsSmVLrQ,
linux-api-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <1425465694-27095-7-git-send-email-mpe-Gsx/Oe8HsFggBc27wqDAHg@public.gmane.org>
On 03/04/2015 03:41 AM, Michael Ellerman wrote:
> There's no need to open-code the build rules, use the implicit ones.
>
> This has the nice side effect of enabling cross compilation.
>
> Signed-off-by: Michael Ellerman <mpe-Gsx/Oe8HsFggBc27wqDAHg@public.gmane.org>
> ---
> tools/testing/selftests/mount/Makefile | 8 ++++----
> 1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/tools/testing/selftests/mount/Makefile b/tools/testing/selftests/mount/Makefile
> index a5b367f032ba..a6c62bedb0d9 100644
> --- a/tools/testing/selftests/mount/Makefile
> +++ b/tools/testing/selftests/mount/Makefile
> @@ -1,13 +1,13 @@
> # Makefile for mount selftests.
>
> -all: unprivileged-remount-test
> +TEST_PROGS := unprivileged-remount-test
> +
> +CFLAGS += -O2 -Wall
>
> -unprivileged-remount-test: unprivileged-remount-test.c
> - gcc -Wall -O2 unprivileged-remount-test.c -o unprivileged-remount-test
> +all: $(TEST_PROGS)
>
> include ../lib.mk
>
> -TEST_PROGS := unprivileged-remount-test
> override RUN_TESTS := if [ -f /proc/self/uid_map ] ; then ./unprivileged-remount-test ; fi
> override EMIT_TESTS := echo "$(RUN_TESTS)"
>
Nack. This change breaks make kselftest run. Test fails to
build.
thanks,
-- Shuah
--
Shuah Khan
Sr. Linux Kernel Developer
Open Source Innovation Group
Samsung Research America (Silicon Valley)
shuahkh-JPH+aEBZ4P+UEJcrhfAQsw@public.gmane.org | (970) 217-8978
^ permalink raw reply
* Re: [PATCH v3 6/7] selftests/mqueue: Use implicit rules
From: Shuah Khan @ 2015-03-09 23:37 UTC (permalink / raw)
To: Michael Ellerman
Cc: linux-kernel-u79uwXL29TY76Z2rM5mHXA,
davej-rdkfGonbjUTCLXcRTR1eJlpr/1R2p/CL, mmarek-AlSwsSmVLrQ,
linux-api-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <1425465694-27095-6-git-send-email-mpe-Gsx/Oe8HsFggBc27wqDAHg@public.gmane.org>
On 03/04/2015 03:41 AM, Michael Ellerman wrote:
> There's no need to open-code the build rules, use the implicit ones.
>
> This has the nice side effect of enabling cross compilation.
>
> Signed-off-by: Michael Ellerman <mpe-Gsx/Oe8HsFggBc27wqDAHg@public.gmane.org>
> ---
> tools/testing/selftests/mqueue/Makefile | 15 +++++++++------
> 1 file changed, 9 insertions(+), 6 deletions(-)
>
> diff --git a/tools/testing/selftests/mqueue/Makefile b/tools/testing/selftests/mqueue/Makefile
> index 6ca7261b55dc..ed85a1d7ee36 100644
> --- a/tools/testing/selftests/mqueue/Makefile
> +++ b/tools/testing/selftests/mqueue/Makefile
> @@ -1,6 +1,11 @@
> -all:
> - gcc -O2 mq_open_tests.c -o mq_open_tests -lrt
> - gcc -O2 -o mq_perf_tests mq_perf_tests.c -lrt -lpthread -lpopt
Nack. I can't take this one either. This breaks make kselftest
run. mqueue fails to build.
-- Shuah
--
Shuah Khan
Sr. Linux Kernel Developer
Open Source Innovation Group
Samsung Research America (Silicon Valley)
shuahkh-JPH+aEBZ4P+UEJcrhfAQsw@public.gmane.org | (970) 217-8978
^ permalink raw reply
* Re: [PATCH v3 5/7] selftests/timers: Use implicit rules
From: Shuah Khan @ 2015-03-09 23:17 UTC (permalink / raw)
To: Michael Ellerman
Cc: linux-kernel-u79uwXL29TY76Z2rM5mHXA,
davej-rdkfGonbjUTCLXcRTR1eJlpr/1R2p/CL, mmarek-AlSwsSmVLrQ,
linux-api-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <1425465694-27095-5-git-send-email-mpe-Gsx/Oe8HsFggBc27wqDAHg@public.gmane.org>
On 03/04/2015 03:41 AM, Michael Ellerman wrote:
> There's no need to open-code the build rules, use the implicit ones.
>
> This has the nice side effect of enabling cross compilation.
>
> Signed-off-by: Michael Ellerman <mpe-Gsx/Oe8HsFggBc27wqDAHg@public.gmane.org>
> ---
> tools/testing/selftests/timers/Makefile | 8 ++++----
> 1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile
> index 149cee3b7b8a..87340405e4b3 100644
> --- a/tools/testing/selftests/timers/Makefile
> +++ b/tools/testing/selftests/timers/Makefile
> @@ -1,9 +1,9 @@
> -all:
> - gcc posix_timers.c -o posix_timers -lrt
> -
> TEST_PROGS := posix_timers
> +LDLIBS += -lrt
> +
> +all: $(TEST_PROGS)
I can't take this patch. This breaks make kselftest
run. With this change timers test fails to build
make[2]: Nothing to be done for 'all'.
make[2]: *** No rule to make target 'posix_timers', needed by 'all'. Stop.
thanks,
-- Shuah
--
Shuah Khan
Sr. Linux Kernel Developer
Open Source Innovation Group
Samsung Research America (Silicon Valley)
shuahkh-JPH+aEBZ4P+UEJcrhfAQsw@public.gmane.org | (970) 217-8978
^ permalink raw reply
* Re: [PATCH] kbuild: Don't pass -rR to selftest makefiles
From: Shuah Khan @ 2015-03-09 22:49 UTC (permalink / raw)
To: Michael Ellerman, mmarek-AlSwsSmVLrQ
Cc: linux-kernel-u79uwXL29TY76Z2rM5mHXA,
davej-rdkfGonbjUTCLXcRTR1eJlpr/1R2p/CL,
linux-api-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <54FDAE28.6050801-JPH+aEBZ4P+UEJcrhfAQsw@public.gmane.org>
On 03/09/2015 08:28 AM, Shuah Khan wrote:
> On 03/04/2015 03:19 AM, Michael Ellerman wrote:
>> The makefiles under tools/testing/selftests are not real kbuild
>> makefiles, they are regular stand alone makefiles. As such they *do*
>> want all the standard implicit rules and variables defined.
>>
>> So before calling those makefiles, filter -rR out of MAKEFLAGS.
>>
>> Without this not all the selftests are built correctly when called via
>> the top-level Makefile.
>>
>> Signed-off-by: Michael Ellerman <mpe-Gsx/Oe8HsFggBc27wqDAHg@public.gmane.org>
>
> Hi Michal,
>
> Could you please take this patch in your tree.
>
> Acked-by: Shuah Khan <shuahkh-JPH+aEBZ4P+UEJcrhfAQsw@public.gmane.org>
>
I am changing this to a Nack. This is not fully cooked.
I am seeing new failures on some tests.
gcc: error: elf_x86_64: No such file or directory
gcc: error: unrecognized command line option ‘-m’
thanks,
-- Shuah
--
Shuah Khan
Sr. Linux Kernel Developer
Open Source Innovation Group
Samsung Research America (Silicon Valley)
shuahkh-JPH+aEBZ4P+UEJcrhfAQsw@public.gmane.org | (970) 217-8978
^ permalink raw reply
* Re: [PATCH 2/9] selftests: Add install target
From: Shuah Khan @ 2015-03-09 22:29 UTC (permalink / raw)
To: Dave Jones, Michael Ellerman, linux-kernel-u79uwXL29TY76Z2rM5mHXA,
mmarek-AlSwsSmVLrQ, linux-api-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <54FDAC4A.9080703-JPH+aEBZ4P+UEJcrhfAQsw@public.gmane.org>
On 03/09/2015 08:20 AM, Shuah Khan wrote:
> On 03/05/2015 11:53 AM, Dave Jones wrote:
>> On Tue, Mar 03, 2015 at 03:51:35PM +1100, Michael Ellerman wrote:
>> > This adds make install support to selftests. The basic usage is:
>> >
>> > $ cd tools/testing/selftests
>> > $ make install
>> >
>> > That installs into tools/testing/selftests/install, which can then be
>> > copied where ever necessary.
>> >
>> > The install destination is also configurable using eg:
>> >
>> > $ INSTALL_PATH=/mnt/selftests make install
>>
>> ...
>>
>> > + @# Ask all targets to emit their test scripts
>> > + echo "#!/bin/bash\n\n" > $(ALL_SCRIPT)
>>
>> $ ./all.sh
>> -bash: ./all.sh: /bin/bash\n\n: bad interpreter: No such file or directory
>>
>> Removing the \n\n fixes it.
>>
>> > + echo "cd \$$ROOT\n" >> $(ALL_SCRIPT); \
>>
>> ditto
>>
>> Dave
>
> Michael,
>
> Could you please fix these problems and send the patch.
>
Michael,
Did you happen to run run_kselftest.sh from the install
directory to make sure all the dependent executables
are installed? You are missing a few required dependencies.
efivars test for example.
Please run kselftest_install.sh I sent out for review and
compare the following:
- contents of install directory created with your patch vs.
my kselftest_install.sh tool
- Compare your run_kselftest.sh run with the one that gets
generated with my kselftest_install.sh tool
General rule is all tests that get run when run_tests target
is run should run from the install directory using the
run_kselftest.sh generated during install.
thanks,
-- Shuah
--
Shuah Khan
Sr. Linux Kernel Developer
Open Source Innovation Group
Samsung Research America (Silicon Valley)
shuahkh-JPH+aEBZ4P+UEJcrhfAQsw@public.gmane.org | (970) 217-8978
^ permalink raw reply
* Re: [PATCH v2 04/18] clocksource: Add ARM System timer driver
From: Uwe Kleine-König @ 2015-03-09 22:17 UTC (permalink / raw)
To: Paul Bolle
Cc: Maxime Coquelin, Andreas Färber, Geert Uytterhoeven,
Rob Herring, Philipp Zabel, Jonathan Corbet, Pawel Moll,
Mark Rutland, Ian Campbell, Kumar Gala, Russell King,
Daniel Lezcano, Thomas Gleixner, Linus Walleij,
Greg Kroah-Hartman, Jiri Slaby, Arnd Bergmann, Andrew Morton,
David S. Miller, Mauro Carvalho Chehab, Joe Perches,
Antti Palosaari
In-Reply-To: <1425935552.4244.6.camel@x220>
Hello,
On Mon, Mar 09, 2015 at 10:12:32PM +0100, Paul Bolle wrote:
> On Wed, 2015-03-04 at 13:08 +0100, Maxime Coquelin wrote:
> > This is because I added also support for COMPILE_TEST coverage as per
> > Uwe advice,
> > and thought it was necessary to have an entry for this.
> > Maybe I'm just wrong?
>
> I missed that you added COMPILE_TEST.
>
> A quick scan of your idea doesn't show any obvious issues. (Note that I
> don't really know how people actually use COMPILE_TEST. I guess things
> like "make allyesconfig" are involved.)
Maybe this can clearify the purpose of COMPILE_TEST:
diff --git a/init/Kconfig b/init/Kconfig
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -67,6 +67,26 @@ config COMPILE_TEST
here. If you are a user/distributor, say N here to exclude useless
drivers to be distributed.
+ # If you are a driver author consider to adjust your driver's
+ # dependencies to make it buildable with minimal preconditions if
+ # COMPILE_TEST is enabled. This helps contributers and maintainers
+ # that might not have the necessary toolchain or kernel config handy and
+ # also increases compile test coverage. It's your advantage if others can
+ # build your driver more easily! So for a device that is only found on the
+ # foo cpu use:
+ #
+ # depends on CPU_FOO || COMPILE_TEST
+ #
+ # . You might have to use
+ #
+ # depends on CPU_FOO || (COMPILE_TEST && COOKIE)
+ #
+ # or
+ #
+ # depends on COOKIE && (CPU_FOO || COMPILE_TEST)
+ #
+ # if your driver uses features that are only available if COOKIE is on.
+
config LOCALVERSION
string "Local version - append to kernel release"
help
--
Pengutronix e.K. | Uwe Kleine-König |
Industrial Linux Solutions | http://www.pengutronix.de/ |
^ permalink raw reply
* Re: [PATCH v0 01/11] stm class: Introduce an abstraction for System Trace Module devices
From: Mathieu Poirier @ 2015-03-09 21:29 UTC (permalink / raw)
To: Alexander Shishkin
Cc: Greg Kroah-Hartman,
linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
Pratik Patel, peter.lachner-ral2JQCrhuEAvxtiuMwx3w,
norbert.schulz-ral2JQCrhuEAvxtiuMwx3w,
keven.boell-ral2JQCrhuEAvxtiuMwx3w,
yann.fouassier-ral2JQCrhuEAvxtiuMwx3w,
laurent.fert-ral2JQCrhuEAvxtiuMwx3w,
linux-api-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <1425728161-164217-2-git-send-email-alexander.shishkin-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
On 7 March 2015 at 04:35, Alexander Shishkin
<alexander.shishkin-VuQAYsv1563Yd54FQh9/CA@public.gmane.org> wrote:
> A System Trace Module (STM) is a device exporting data in System Trace
> Protocol (STP) format as defined by MIPI STP standards. Examples of such
> devices are Intel Trace Hub and Coresight STM.
>
> This abstraction provides a unified interface for software trace sources
> to send their data over an STM device to a debug host. In order to do
> that, such a trace source needs to be assigned a pair of master/channel
> identifiers that all the data from this source will be tagged with. The
> STP decoder on the debug host side will use these master/channel tags to
> distinguish different trace streams from one another inside one STP
> stream.
>
> This abstraction provides a configfs-based policy management mechanism
> for dynamic allocation of these master/channel pairs based on trace
> source-supplied string identifier. It has the flexibility of being
> defined at runtime and at the same time (provided that the policy
> definition is aligned with the decoding end) consistency.
>
> For userspace trace sources, this abstraction provides write()-based and
> mmap()-based (if the underlying stm device allows this) output mechanism.
>
> For kernel-side trace sources, we provide "stm_source" device class that
> can be connected to an stm device at run time.
>
> Cc: linux-api-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> Cc: Pratik Patel <pratikp-sgV2jX0FEOL9JmXXK+q4OQ@public.gmane.org>
> Cc: Mathieu Poirier <mathieu.poirier-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org>
> Signed-off-by: Alexander Shishkin <alexander.shishkin-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
> ---
> Documentation/ABI/testing/configfs-stp-policy | 44 ++
> Documentation/ABI/testing/sysfs-class-stm | 14 +
> Documentation/ABI/testing/sysfs-class-stm_source | 11 +
> Documentation/trace/stm.txt | 77 +++
> drivers/Kconfig | 2 +
> drivers/Makefile | 1 +
> drivers/stm/Kconfig | 8 +
> drivers/stm/Makefile | 3 +
> drivers/stm/core.c | 839 +++++++++++++++++++++++
> drivers/stm/policy.c | 470 +++++++++++++
> drivers/stm/stm.h | 77 +++
> include/linux/stm.h | 87 +++
> include/uapi/linux/stm.h | 47 ++
> 13 files changed, 1680 insertions(+)
> create mode 100644 Documentation/ABI/testing/configfs-stp-policy
> create mode 100644 Documentation/ABI/testing/sysfs-class-stm
> create mode 100644 Documentation/ABI/testing/sysfs-class-stm_source
> create mode 100644 Documentation/trace/stm.txt
> create mode 100644 drivers/stm/Kconfig
> create mode 100644 drivers/stm/Makefile
> create mode 100644 drivers/stm/core.c
> create mode 100644 drivers/stm/policy.c
> create mode 100644 drivers/stm/stm.h
> create mode 100644 include/linux/stm.h
> create mode 100644 include/uapi/linux/stm.h
>
[Snip...]
> +
> +static int stm_output_assign(struct stm_device *stm, unsigned int width,
> + struct stp_policy_node *policy_node,
> + struct stm_output *output)
> +{
> + unsigned int midx, cidx, mend, cend;
> + int ret = -EBUSY;
> +
> + if (width > stm->data->sw_nchannels)
> + return -EINVAL;
> +
> + if (policy_node) {
Where does this get set? All I found is code that is switching on it.
> + stp_policy_node_get_ranges(policy_node,
> + &midx, &mend, &cidx, &cend);
> + } else {
> + midx = stm->data->sw_start;
> + cidx = 0;
> + mend = stm->data->sw_end;
> + cend = stm->data->sw_nchannels - 1;
> + }
> +
> + spin_lock(&stm->mc_lock);
> + if (output->nr_chans)
> + goto unlock;
> +
> + ret = stm_find_master_chan(stm, width, &midx, mend, &cidx, cend);
> + if (ret)
> + goto unlock;
> +
> + output->master = midx;
> + output->channel = cidx;
> + output->nr_chans = width;
> + stm_output_claim(stm, output);
> + dev_dbg(stm->dev, "assigned %u:%u (+%u)\n", midx, cidx, width);
> +
> + ret = 0;
> +unlock:
> + spin_unlock(&stm->mc_lock);
> +
> + return ret;
> +}
> +
[Snip...]
> +
> +/**
> + * stm_source_register_device() - register an stm_source device
> + * @parent: parent device
> + * @data: device description structure
> + *
> + * This will create a device of stm_source class that can write
> + * data to an stm device once linked.
> + *
> + * Return: 0 on success, -errno otherwise.
> + */
> +int stm_source_register_device(struct device *parent,
> + struct stm_source_data *data)
> +{
> + struct stm_source_device *src;
> + struct device *dev;
> +
> + if (!stm_core_up)
> + return -EPROBE_DEFER;
> +
> + src = kzalloc(sizeof(*src), GFP_KERNEL);
> + if (!src)
> + return -ENOMEM;
> +
> + dev = device_create(&stm_source_class, parent, MKDEV(0, 0), NULL, "%s",
> + data->name);
> + if (IS_ERR(dev)) {
> + kfree(src);
> + return PTR_ERR(dev);
> + }
> +
> + spin_lock_init(&src->link_lock);
> + INIT_LIST_HEAD(&src->link_entry);
> + src->dev = dev;
> + src->data = data;
> + data->src = src;
> + dev_set_drvdata(dev, src);
> +
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(stm_source_register_device);
> +
[Snip...]
It's really not clear (at least to me) how stm_source_device works.
They make a bridge between the kernel and the STM devices but the
"link" between them is not obvious. More documentation perhaps?
[Snip...]
> +
> +/**
> + * struct stm_data - STM device description and callbacks
> + * @name: device name
> + * @stm: internal structure, only used by stm class code
> + * @sw_start: first STP master
> + * @sw_end: last STP master
> + * @sw_nchannels: number of STP channels per master
> + * @sw_mmiosz: size of one channel's IO space, for mmap, optional
> + * @write: write callback
> + * @mmio_addr: mmap callback, optional
> + *
> + * Fill out this structure before calling stm_register_device() to create
> + * an STM device and stm_unregister_device() to destroy it. It will also be
> + * passed back to write() and mmio_addr() callbacks.
> + */
> +struct stm_data {
> + const char *name;
> + struct stm_device *stm;
> + unsigned int sw_start;
> + unsigned int sw_end;
The above kernel doc is the only place where "sw_start" and "sw_end"
are described as first and last STP masters. Other than that it takes
a really long time to figure out what they really are. I think a
better naming convention can be chosen here.
> + unsigned int sw_nchannels;
> + unsigned int sw_mmiosz;
> + ssize_t (*write)(struct stm_data *, unsigned int,
> + unsigned int, const char *, size_t);
> + phys_addr_t (*mmio_addr)(struct stm_data *, unsigned int,
> + unsigned int, unsigned int);
> + void (*link)(struct stm_data *, unsigned int,
> + unsigned int);
> + void (*unlink)(struct stm_data *, unsigned int,
> + unsigned int);
It is really not clear to me what the "link" and "unlink" functions do
- documenting what they're for and explain when to use them and (not
use them) would be appreciated.
I think we should also add two things to this structure: 1) a private
field and 2) a (*stm_drv_ioctl) stub.
The private field would be filled by the registrant and left alone by
the generic-stm core. When parsing the commands in
"stm_char_ioctl()", data->stm_drv_ioctl(private, cmd, arg) could be
called to let architecture specific drivers deal with it. That way
applications can deal with a single configuration file descriptor.
> +};
> +
> +int stm_register_device(struct device *parent, struct stm_data *stm_data,
> + struct module *owner);
> +void stm_unregister_device(struct stm_data *stm_data);
> +
> +struct stm_source_device;
> +
> +/**
> + * struct stm_source_data - STM source device description and callbacks
> + * @name: device name, will be used for policy lookup
> + * @src: internal structure, only used by stm class code
> + * @nr_chans: number of channels to allocate
> + * @link: called when STM device gets linked to this source
> + * @unlink: called when STH device is about to be unlinked
> + *
> + * Fill in this structure before calling stm_source_register_device() to
> + * register a source device. Also pass it to unregister and write calls.
> + */
> +struct stm_source_data {
> + const char *name;
> + struct stm_source_device *src;
> + unsigned int percpu;
> + unsigned int nr_chans;
> + int (*link)(struct stm_source_data *data);
> + void (*unlink)(struct stm_source_data *data);
> +};
> +
I didn't get the linking/unlinking process - it is also not clear as
to why we need struct stm_data and struct stm_source_data. More
explanation on this would be good.
> +int stm_source_register_device(struct device *parent,
> + struct stm_source_data *data);
> +void stm_source_unregister_device(struct stm_source_data *data);
> +
> +int stm_source_write(struct stm_source_data *data, unsigned int chan,
> + const char *buf, size_t count);
> +
> +#endif /* _STM_H_ */
> diff --git a/include/uapi/linux/stm.h b/include/uapi/linux/stm.h
> new file mode 100644
> index 0000000000..042b58b53b
> --- /dev/null
> +++ b/include/uapi/linux/stm.h
> @@ -0,0 +1,47 @@
> +/*
> + * System Trace Module (STM) userspace interfaces
> + * Copyright (c) 2014, Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
> + * more details.
> + *
> + * STM class implements generic infrastructure for System Trace Module devices
> + * as defined in MIPI STPv2 specification.
> + */
> +
> +#ifndef _UAPI_LINUX_STM_H
> +#define _UAPI_LINUX_STM_H
> +
> +/**
> + * struct stp_policy_id - identification for the STP policy
> + * @size: size of the structure including real id[] length
> + * @master: assigned master
> + * @channel: first assigned channel
> + * @width: number of requested channels
> + * @id: identification string
> + *
> + * User must calculate the total size of the structure and put it into
> + * @size field, fill out the @id and desired @width. In return, kernel
> + * fills out @master, @channel and @width.
> + */
> +struct stp_policy_id {
> + __u32 size;
> + __u16 master;
> + __u16 channel;
> + __u16 width;
> + /* padding */
> + __u16 __reserved_0;
> + __u32 __reserved_1;
> + char id[0];
> +};
> +
> +#define STP_POLICY_ID_SET _IOWR('%', 0, struct stp_policy_id)
> +#define STP_POLICY_ID_GET _IOR('%', 1, struct stp_policy_id)
> +
> +#endif /* _UAPI_LINUX_STM_H */
> --
> 2.1.4
>
Aside from the above points I think this is all very good work. The
patchset should likely be broken up in two sets, one for the generic
STM architecture wrapper and another for the Intel TH part. That way
things can be worked on independently.
On my side I will get a prototype going that folds the current
coresight-stm driver in this new mindset - I'll let you know how
things go.
Thanks,
Mathieu
^ permalink raw reply
* Re: [PATCH v2 04/18] clocksource: Add ARM System timer driver
From: Paul Bolle @ 2015-03-09 21:12 UTC (permalink / raw)
To: Maxime Coquelin
Cc: Uwe Kleine-König, Andreas Färber, Geert Uytterhoeven,
Rob Herring, Philipp Zabel, Jonathan Corbet, Pawel Moll,
Mark Rutland, Ian Campbell, Kumar Gala, Russell King,
Daniel Lezcano, Thomas Gleixner, Linus Walleij,
Greg Kroah-Hartman, Jiri Slaby, Arnd Bergmann, Andrew Morton,
David S. Miller, Mauro Carvalho Chehab, Joe Perches
In-Reply-To: <CALszF6AEThPrRe7c7nJ6q7yb2fhLMFuHJUV10sf=w0TMcqVVaw@mail.gmail.com>
On Wed, 2015-03-04 at 13:08 +0100, Maxime Coquelin wrote:
> This is because I added also support for COMPILE_TEST coverage as per
> Uwe advice,
> and thought it was necessary to have an entry for this.
> Maybe I'm just wrong?
I missed that you added COMPILE_TEST.
A quick scan of your idea doesn't show any obvious issues. (Note that I
don't really know how people actually use COMPILE_TEST. I guess things
like "make allyesconfig" are involved.)
Paul Bolle
^ permalink raw reply
* Re: [PATCH v2 02/18] ARM: ARMv7M: Enlarge vector table to 256 entries
From: Maxime Coquelin @ 2015-03-09 17:12 UTC (permalink / raw)
To: Stefan Agner
Cc: Uwe Kleine-König, Andreas Färber, Geert Uytterhoeven,
Rob Herring, Philipp Zabel, Jonathan Corbet, Pawel Moll,
Mark Rutland, Ian Campbell, Kumar Gala, Russell King,
Daniel Lezcano, Thomas Gleixner, Linus Walleij,
Greg Kroah-Hartman, Jiri Slaby, Arnd Bergmann, Andrew Morton,
David S. Miller, Mauro Carvalho Chehab, Joe Perches
In-Reply-To: <1364122b3b72c50138d81ee4eec2a4c7@agner.ch>
2015-03-09 1:29 GMT+01:00 Stefan Agner <stefan@agner.ch>:
> On 2015-02-20 19:01, Maxime Coquelin wrote:
>> From Cortex-M reference manuals, the nvic supports up to 240 interrupts.
>> So the number of entries in vectors table is up to 256.
>>
>> This patch adds a new config flag to specify the number of external interrupts.
>> Some ifdeferies are added in order to respect the natural alignment without
>> wasting too much space on smaller systems.
>>
>> Signed-off-by: Maxime Coquelin <mcoquelin.stm32@gmail.com>
>> ---
>> arch/arm/kernel/entry-v7m.S | 13 +++++++++----
>> arch/arm/mm/Kconfig | 15 +++++++++++++++
>> 2 files changed, 24 insertions(+), 4 deletions(-)
>>
>> diff --git a/arch/arm/kernel/entry-v7m.S b/arch/arm/kernel/entry-v7m.S
>> index 8944f49..68cde36 100644
>> --- a/arch/arm/kernel/entry-v7m.S
>> +++ b/arch/arm/kernel/entry-v7m.S
>> @@ -117,9 +117,14 @@ ENTRY(__switch_to)
>> ENDPROC(__switch_to)
>>
>> .data
>> - .align 8
>> +#if CONFIG_CPUV7M_NUM_IRQ <= 112
>> + .align 9
>> +#else
>> + .align 10
>> +#endif
>> +
>> /*
>> - * Vector table (64 words => 256 bytes natural alignment)
>> + * Vector table (Natural alignment need to be ensured)
>> */
>> ENTRY(vector_table)
>> .long 0 @ 0 - Reset stack pointer
>> @@ -138,6 +143,6 @@ ENTRY(vector_table)
>> .long __invalid_entry @ 13 - Reserved
>> .long __pendsv_entry @ 14 - PendSV
>> .long __invalid_entry @ 15 - SysTick
>> - .rept 64 - 16
>> - .long __irq_entry @ 16..64 - External Interrupts
>> + .rept CONFIG_CPUV7M_NUM_IRQ
>> + .long __irq_entry @ External Interrupts
>> .endr
>> diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
>> index c43c714..27eb835 100644
>> --- a/arch/arm/mm/Kconfig
>> +++ b/arch/arm/mm/Kconfig
>> @@ -604,6 +604,21 @@ config CPU_USE_DOMAINS
>> This option enables or disables the use of domain switching
>> via the set_fs() function.
>>
>> +config CPUV7M_NUM_IRQ
>> + int "Number of external interrupts connected to the NVIC"
>> + depends on CPU_V7M
>> + default 90 if ARCH_STM32
>> + default 38 if ARCH_EFM32
>> + default 240
>> + help
>> + This option indicates the number of interrupts connected to the NVIC.
>> + The value can be larger than the real number of interrupts supported
>> + by the system, but must not be lower.
>> + The default value is 240, corresponding to the maximum number of
>> + interrupts supported by the NVIC on Cortex-M family.
>> +
>> + If unsure, keep default value.
>> +
>> #
>> # CPU supports 36-bit I/O
>> #
>
> I sent a patch which extended that vector table some weeks ago:
> https://lkml.org/lkml/2014/12/29/296
I did something similar in my first version.
>
> But your solution is definitely more flexible, and given that we deal
> with small devices here, it's worth saving memory.
Yes, it is worth for these small devices.
>
> Acked-by: Stefan Agner <stefan@agner.ch>
>
Thanks for the review,
Maxime
^ permalink raw reply
* Re: [PATCH 14/14] MAINTAINERS: Add entry for STM32 MCUs
From: Maxime Coquelin @ 2015-03-09 17:01 UTC (permalink / raw)
To: Linus Walleij
Cc: Jonathan Corbet, Rob Herring, Pawel Moll, Mark Rutland,
Ian Campbell, Kumar Gala, Philipp Zabel, Russell King,
Daniel Lezcano, Thomas Gleixner, Greg Kroah-Hartman, Jiri Slaby,
Arnd Bergmann, Andrew Morton, David S. Miller,
Mauro Carvalho Chehab, Joe Perches, Antti Palosaari, Tejun Heo,
Will Deacon, Nikolay Borisov, Rusty Russell, Kees Cook, Michal
In-Reply-To: <CACRpkdaYVyH-y19yHYp2noq6VhDVJYQfqkevDtLr-7y8gVwdXw@mail.gmail.com>
2015-03-09 17:47 GMT+01:00 Linus Walleij <linus.walleij@linaro.org>:
> On Fri, Mar 6, 2015 at 10:55 AM, Maxime Coquelin
> <mcoquelin.stm32@gmail.com> wrote:
>> 2015-03-06 10:03 GMT+01:00 Linus Walleij <linus.walleij@linaro.org>:
>>> On Thu, Feb 12, 2015 at 6:46 PM, Maxime Coquelin
>>> <mcoquelin.stm32@gmail.com> wrote:
>>>
>>>> Add a MAINTAINER entry covering all STM32 machine and drivers files.
>>>>
>>>> Signed-off-by: Maxime Coquelin <mcoquelin.stm32@gmail.com>
>>>
>>> (...)
>>>> +F: drivers/clocksource/arm_system_timer.c
>>>
>>> Is that all? And that is not even a STM32 specific driver.
>>
>> For the ARM System Timer, I'm fine to add a new entry.
>> Or remove the line, and let the maintain-ship to clocksource maintainers.
>>
>> All the STM32 files are covered by this line:
>> +N: stm32
>
> Aha you're right, news2me, I'm old and stupid :/
This is new to me too. :)
Maxime
>
> Thanks,
> Linus Walleij
^ permalink raw reply
* Re: [PATCH v2 04/18] clocksource: Add ARM System timer driver
From: Maxime Coquelin @ 2015-03-09 17:00 UTC (permalink / raw)
To: Linus Walleij
Cc: Uwe Kleine-König, Andreas Färber, Geert Uytterhoeven,
Rob Herring, Philipp Zabel, Jonathan Corbet, Pawel Moll,
Mark Rutland, Ian Campbell, Kumar Gala, Russell King,
Daniel Lezcano, Thomas Gleixner, Greg Kroah-Hartman, Jiri Slaby,
Arnd Bergmann, Andrew Morton, David S. Miller,
Mauro Carvalho Chehab, Joe Perches, Antti Palosaari, Tejun Heo
In-Reply-To: <CACRpkdYnj0vTBVEEDp4isznRZ+KQuC2h+VViZr=Pt8VtEOWP-A@mail.gmail.com>
2015-03-09 16:50 GMT+01:00 Linus Walleij <linus.walleij@linaro.org>:
> On Fri, Feb 20, 2015 at 7:01 PM, Maxime Coquelin
> <mcoquelin.stm32@gmail.com> wrote:
>
>> This patch adds clocksource support for ARMv7-M's System timer,
>> also known as SysTick.
>>
>> Signed-off-by: Maxime Coquelin <mcoquelin.stm32@gmail.com>
> (...)
>> + /* If no clock found, try to get clock-frequency property */
>> + if (!rate) {
>> + ret = of_property_read_u32(np, "clock-frequency", &rate);
>> + if (ret)
>> + goto out_unmap;
>> + }
>
> If this driver is only used for this one system, and if on this one system
> the clk subsystem will provide the clock rate, then there is no need
> to include this hackaround property.
>
> Alternatively there is no point including reading the frequency from
> the clk subsystem for this one system.
>
> So which one is it?
In the first version, the "clock-frequency" property handling was not here.
Rob Herring advised me to add its support, as it could be used by
simple systems not selecting CCF.
So, I don't have the name of a system where it could be useful, but I
think Rob's request make sense.
Note that I tested using the clock-frequency property before sending.
Best regards,
Maxime
>
> Yours,
> Linus Walleij
^ permalink raw reply
* Re: [PATCH v2 16/18] ARM: dts: Introduce STM32F429 MCU
From: Maxime Coquelin @ 2015-03-09 16:51 UTC (permalink / raw)
To: Linus Walleij
Cc: Uwe Kleine-König, Andreas Färber, Geert Uytterhoeven,
Rob Herring, Philipp Zabel, Jonathan Corbet, Pawel Moll,
Mark Rutland, Ian Campbell, Kumar Gala, Russell King,
Daniel Lezcano, Thomas Gleixner, Greg Kroah-Hartman, Jiri Slaby,
Arnd Bergmann, Andrew Morton, David S. Miller,
Mauro Carvalho Chehab, Joe Perches, Antti Palosaari, Tejun Heo
In-Reply-To: <CACRpkdaW-OMgGfw8+ngia8AsmzRpW8z7922=dPpRGjgv79-gqw@mail.gmail.com>
2015-03-09 15:39 GMT+01:00 Linus Walleij <linus.walleij@linaro.org>:
> On Fri, Feb 20, 2015 at 7:01 PM, Maxime Coquelin
> <mcoquelin.stm32@gmail.com> wrote:
>
>> The STMicrolectornics's STM32F419 MCU has the following main features:
>> - Cortex-M4 core running up to @180MHz
>> - 2MB internal flash, 256KBytes internal RAM
>> - FMC controller to connect SDRAM, NOR and NAND memories
>> - SD/MMC/SDIO support
>> - Ethernet controller
>> - USB OTFG FS & HS controllers
>> - I2C, SPI, CAN busses support
>> - Several 16 & 32 bits general purpose timers
>> - Serial Audio interface
>> - LCD controller
>>
>> Signed-off-by: Maxime Coquelin <mcoquelin.stm32@gmail.com>
> (...)
>> + soc {
>> + usart1: usart@40011000 {
>> + status = "okay";
>> + };
>> +
>> + leds {
>> + compatible = "gpio-leds";
>> + red {
>> + #gpio-cells = <2>;
>> + label = "Front Panel LED";
>> + gpios = <&gpiog 14 0>;
>> + linux,default-trigger = "heartbeat";
>> + };
>> + green {
>> + #gpio-cells = <2>;
>> + gpios = <&gpiog 13 0>;
>> + default-state = "off";
>> + };
>> + };
>> + };
>
> The LEDs are mounted on the board not the SoC, right?
>
> So move them outside of the soc node, to the top level
> of the DTS file.
Yes, you are right.
Andreas already made the same remark, and the top-level move is
already here in the upcoming v3.
Thanks,
Maxime
>
> Yours,
> Linus Walleij
^ permalink raw reply
* Re: [PATCH 14/14] MAINTAINERS: Add entry for STM32 MCUs
From: Linus Walleij @ 2015-03-09 16:47 UTC (permalink / raw)
To: Maxime Coquelin
Cc: Jonathan Corbet, Rob Herring, Pawel Moll, Mark Rutland,
Ian Campbell, Kumar Gala, Philipp Zabel, Russell King,
Daniel Lezcano, Thomas Gleixner, Greg Kroah-Hartman, Jiri Slaby,
Arnd Bergmann, Andrew Morton, David S. Miller,
Mauro Carvalho Chehab, Joe Perches, Antti Palosaari, Tejun Heo,
Will Deacon, Nikolay Borisov, Rusty Russell, Kees Cook, Michal
In-Reply-To: <CALszF6Bz978KA1DAxcKtmA55X=vJWEcVDa9fLOF=uNatYS+b+Q@mail.gmail.com>
On Fri, Mar 6, 2015 at 10:55 AM, Maxime Coquelin
<mcoquelin.stm32@gmail.com> wrote:
> 2015-03-06 10:03 GMT+01:00 Linus Walleij <linus.walleij@linaro.org>:
>> On Thu, Feb 12, 2015 at 6:46 PM, Maxime Coquelin
>> <mcoquelin.stm32@gmail.com> wrote:
>>
>>> Add a MAINTAINER entry covering all STM32 machine and drivers files.
>>>
>>> Signed-off-by: Maxime Coquelin <mcoquelin.stm32@gmail.com>
>>
>> (...)
>>> +F: drivers/clocksource/arm_system_timer.c
>>
>> Is that all? And that is not even a STM32 specific driver.
>
> For the ARM System Timer, I'm fine to add a new entry.
> Or remove the line, and let the maintain-ship to clocksource maintainers.
>
> All the STM32 files are covered by this line:
> +N: stm32
Aha you're right, news2me, I'm old and stupid :/
Thanks,
Linus Walleij
^ permalink raw reply
* Re: [PATCHv3 man-pages 3/3] open.2: describe O_BENEATH flag
From: Michael Kerrisk (man-pages) @ 2015-03-09 15:54 UTC (permalink / raw)
To: David Drysdale
Cc: linux-kernel@vger.kernel.org, Alexander Viro, Kees Cook,
Eric W. Biederman, Greg Kroah-Hartman, Meredydd Luff, Will Drewry,
Jorge Lucangeli Obes, Ricky Zhou, Lee Campbell, Julien Tinnes,
Mike Depinet, James Morris, Andy Lutomirski, Paolo Bonzini,
Paul Moore, Christoph Hellwig, Linux API, LSM List, fstests
In-Reply-To: <CAHse=S_kMkiV8hB5Y1=kVGCi3ZusRm5G=EyXhY_+PRycSt7GSw@mail.gmail.com>
On 9 March 2015 at 16:16, David Drysdale <drysdale@google.com> wrote:
> On Mon, Mar 9, 2015 at 2:32 PM, Michael Kerrisk (man-pages)
> <mtk.manpages@gmail.com> wrote:
>> On 03/09/2015 03:00 PM, David Drysdale wrote:
>>> Signed-off-by: David Drysdale <drysdale@google.com>
>>
>> Hi David,
>>
>> The text looks good insofar as it goes. But, it would be helpful
>> to have sentence or to that explains why this flag exists.
>> Could you add that, please?
>>
>> Thanks,
>>
>> Michael
>
> How about something like:
>
> This feature allows applications to be sure that the opened file
> is within the specified directory, regardless of the original
> source of the pathname argument. Some security-conscious pro‐
> grams may further ensure this by imposing a system call filter
> (with seccomp(2)) that requires this flag for all open() opera‐
> tions, so that the program cannot open files outside of speci‐
> fied directories even if subverted.
>
> (Also, I realize that I somehow failed to notice that the flags
> are listed in alphabetical order, so I'll move the text up, as
> in the updated diff below).
That looks good to me. Thanks!
Cheers,
Michael
> ---
> man2/open.2 | 44 ++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 44 insertions(+)
>
> diff --git a/man2/open.2 b/man2/open.2
> index 956531b24b26..ece1fa90775a 100644
> --- a/man2/open.2
> +++ b/man2/open.2
> @@ -201,6 +201,43 @@ See
> for further details.
> See also BUGS, below.
> .TP
> +.B O_BENEATH " (since Linux 4.??)"
> +Ensure that the
> +.I pathname
> +is beneath the current working directory (for
> +.BR open (2))
> +or the
> +.I dirfd
> +(for
> +.BR openat (2)).
> +If the
> +.I pathname
> +is absolute or contains a path component of "..", the
> +.BR open ()
> +fails with the error
> +.BR EPERM.
> +This occurs even if ".." path component would not actually
> +escape the original directory; for example, a
> +.I pathname
> +of "subdir/../filename" would be rejected.
> +Path components that are symbolic links to absolute paths, or that are
> +relative paths containing a ".." component, will also cause the
> +.BR open ()
> +operation to fail with the error
> +.BR EPERM.
> +
> +This feature allows applications to be sure that the opened file is
> +within the specified directory, regardless of the original source of the
> +.I pathname
> +argument.
> +Some security-conscious programs may further ensure
> +this by imposing a system call filter (with
> +.BR seccomp (2))
> +that requires this flag for all
> +.BR open ()
> +operations, so that the program cannot open files outside of
> +specified directories even if subverted.
> +.TP
> .BR O_CLOEXEC " (since Linux 2.6.23)"
> .\" NOTE! several other man pages refer to this text
> Enable the close-on-exec flag for the new file descriptor.
> @@ -984,6 +1021,13 @@ did not match the owner of the file and the
> caller was not privileged
> The operation was prevented by a file seal; see
> .BR fcntl (2).
> .TP
> +.B EPERM
> +The
> +.B O_BENEATH
> +flag was specified and the
> +.I pathname
> +was not beneath the relevant directory.
> +.TP
> .B EROFS
> .I pathname
> refers to a file on a read-only filesystem and write access was
> --
> 2.2.0.rc0.207.ga3a616c
--
Michael Kerrisk
Linux man-pages maintainer; http://www.kernel.org/doc/man-pages/
Linux/UNIX System Programming Training: http://man7.org/training/
^ permalink raw reply
* Re: [PATCH v2 04/18] clocksource: Add ARM System timer driver
From: Linus Walleij @ 2015-03-09 15:50 UTC (permalink / raw)
To: Maxime Coquelin
Cc: Uwe Kleine-König, Andreas Färber, Geert Uytterhoeven,
Rob Herring, Philipp Zabel, Jonathan Corbet, Pawel Moll,
Mark Rutland, Ian Campbell, Kumar Gala, Russell King,
Daniel Lezcano, Thomas Gleixner, Greg Kroah-Hartman, Jiri Slaby,
Arnd Bergmann, Andrew Morton, David S. Miller,
Mauro Carvalho Chehab, Joe Perches, Antti Palosaari, Tejun Heo
In-Reply-To: <1424455277-29983-5-git-send-email-mcoquelin.stm32@gmail.com>
On Fri, Feb 20, 2015 at 7:01 PM, Maxime Coquelin
<mcoquelin.stm32@gmail.com> wrote:
> This patch adds clocksource support for ARMv7-M's System timer,
> also known as SysTick.
>
> Signed-off-by: Maxime Coquelin <mcoquelin.stm32@gmail.com>
(...)
> + /* If no clock found, try to get clock-frequency property */
> + if (!rate) {
> + ret = of_property_read_u32(np, "clock-frequency", &rate);
> + if (ret)
> + goto out_unmap;
> + }
If this driver is only used for this one system, and if on this one system
the clk subsystem will provide the clock rate, then there is no need
to include this hackaround property.
Alternatively there is no point including reading the frequency from
the clk subsystem for this one system.
So which one is it?
Yours,
Linus Walleij
^ permalink raw reply
* Re: [PATCHv3 man-pages 3/3] open.2: describe O_BENEATH flag
From: David Drysdale @ 2015-03-09 15:16 UTC (permalink / raw)
To: Michael Kerrisk (man-pages)
Cc: linux-kernel@vger.kernel.org, Alexander Viro, Kees Cook,
Eric W. Biederman, Greg Kroah-Hartman, Meredydd Luff, Will Drewry,
Jorge Lucangeli Obes, Ricky Zhou, Lee Campbell, Julien Tinnes,
Mike Depinet, James Morris, Andy Lutomirski, Paolo Bonzini,
Paul Moore, Christoph Hellwig, Linux API, LSM List, fstests
In-Reply-To: <54FDAF16.2030407@gmail.com>
On Mon, Mar 9, 2015 at 2:32 PM, Michael Kerrisk (man-pages)
<mtk.manpages@gmail.com> wrote:
> On 03/09/2015 03:00 PM, David Drysdale wrote:
>> Signed-off-by: David Drysdale <drysdale@google.com>
>
> Hi David,
>
> The text looks good insofar as it goes. But, it would be helpful
> to have sentence or to that explains why this flag exists.
> Could you add that, please?
>
> Thanks,
>
> Michael
How about something like:
This feature allows applications to be sure that the opened file
is within the specified directory, regardless of the original
source of the pathname argument. Some security-conscious pro‐
grams may further ensure this by imposing a system call filter
(with seccomp(2)) that requires this flag for all open() opera‐
tions, so that the program cannot open files outside of speci‐
fied directories even if subverted.
(Also, I realize that I somehow failed to notice that the flags
are listed in alphabetical order, so I'll move the text up, as
in the updated diff below).
Thanks,
David
---
man2/open.2 | 44 ++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 44 insertions(+)
diff --git a/man2/open.2 b/man2/open.2
index 956531b24b26..ece1fa90775a 100644
--- a/man2/open.2
+++ b/man2/open.2
@@ -201,6 +201,43 @@ See
for further details.
See also BUGS, below.
.TP
+.B O_BENEATH " (since Linux 4.??)"
+Ensure that the
+.I pathname
+is beneath the current working directory (for
+.BR open (2))
+or the
+.I dirfd
+(for
+.BR openat (2)).
+If the
+.I pathname
+is absolute or contains a path component of "..", the
+.BR open ()
+fails with the error
+.BR EPERM.
+This occurs even if ".." path component would not actually
+escape the original directory; for example, a
+.I pathname
+of "subdir/../filename" would be rejected.
+Path components that are symbolic links to absolute paths, or that are
+relative paths containing a ".." component, will also cause the
+.BR open ()
+operation to fail with the error
+.BR EPERM.
+
+This feature allows applications to be sure that the opened file is
+within the specified directory, regardless of the original source of the
+.I pathname
+argument.
+Some security-conscious programs may further ensure
+this by imposing a system call filter (with
+.BR seccomp (2))
+that requires this flag for all
+.BR open ()
+operations, so that the program cannot open files outside of
+specified directories even if subverted.
+.TP
.BR O_CLOEXEC " (since Linux 2.6.23)"
.\" NOTE! several other man pages refer to this text
Enable the close-on-exec flag for the new file descriptor.
@@ -984,6 +1021,13 @@ did not match the owner of the file and the
caller was not privileged
The operation was prevented by a file seal; see
.BR fcntl (2).
.TP
+.B EPERM
+The
+.B O_BENEATH
+flag was specified and the
+.I pathname
+was not beneath the relevant directory.
+.TP
.B EROFS
.I pathname
refers to a file on a read-only filesystem and write access was
--
2.2.0.rc0.207.ga3a616c
--
To unsubscribe from this list: send the line "unsubscribe linux-security-module" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
* Re: [PATCH v2 16/18] ARM: dts: Introduce STM32F429 MCU
From: Linus Walleij @ 2015-03-09 14:39 UTC (permalink / raw)
To: Maxime Coquelin
Cc: Uwe Kleine-König, Andreas Färber, Geert Uytterhoeven,
Rob Herring, Philipp Zabel, Jonathan Corbet, Pawel Moll,
Mark Rutland, Ian Campbell, Kumar Gala, Russell King,
Daniel Lezcano, Thomas Gleixner, Greg Kroah-Hartman, Jiri Slaby,
Arnd Bergmann, Andrew Morton, David S. Miller,
Mauro Carvalho Chehab, Joe Perches, Antti Palosaari, Tejun Heo
In-Reply-To: <1424455277-29983-17-git-send-email-mcoquelin.stm32@gmail.com>
On Fri, Feb 20, 2015 at 7:01 PM, Maxime Coquelin
<mcoquelin.stm32@gmail.com> wrote:
> The STMicrolectornics's STM32F419 MCU has the following main features:
> - Cortex-M4 core running up to @180MHz
> - 2MB internal flash, 256KBytes internal RAM
> - FMC controller to connect SDRAM, NOR and NAND memories
> - SD/MMC/SDIO support
> - Ethernet controller
> - USB OTFG FS & HS controllers
> - I2C, SPI, CAN busses support
> - Several 16 & 32 bits general purpose timers
> - Serial Audio interface
> - LCD controller
>
> Signed-off-by: Maxime Coquelin <mcoquelin.stm32@gmail.com>
(...)
> + soc {
> + usart1: usart@40011000 {
> + status = "okay";
> + };
> +
> + leds {
> + compatible = "gpio-leds";
> + red {
> + #gpio-cells = <2>;
> + label = "Front Panel LED";
> + gpios = <&gpiog 14 0>;
> + linux,default-trigger = "heartbeat";
> + };
> + green {
> + #gpio-cells = <2>;
> + gpios = <&gpiog 13 0>;
> + default-state = "off";
> + };
> + };
> + };
The LEDs are mounted on the board not the SoC, right?
So move them outside of the soc node, to the top level
of the DTS file.
Yours,
Linus Walleij
^ permalink raw reply
* Re: [PATCH] capabilities: Ambient capability set V2
From: Serge E. Hallyn @ 2015-03-09 14:36 UTC (permalink / raw)
To: Christoph Lameter
Cc: Serge E. Hallyn, Andy Lutomirski, Serge Hallyn, Jonathan Corbet,
Aaron Jones, LSM List, linux-kernel@vger.kernel.org,
Andrew Morton, Andrew G. Morgan, Mimi Zohar, Austin S Hemmelgarn,
Markku Savela, Jarkko Sakkinen, Linux API, Michael Kerrisk
In-Reply-To: <alpine.DEB.2.11.1503090704230.25725@gentwo.org>
On Mon, Mar 09, 2015 at 07:05:24AM -0500, Christoph Lameter wrote:
> On Sat, 7 Mar 2015, Serge E. Hallyn wrote:
>
> > > The ancestor here is ambient_test and when it is run pI will not be set
> > > despite the cap setting.
> >
> > ambient_test is supposed to set it.
>
> I thought the setcap +i would do it.
>
> So the setcap and setting of the file inheritance bits has no effect on
> pI? When the process starts pI is off despite fI being set?
Correct, pI must be set through capset(). Again, x in fI is saying
that the certain trusted users may have x in pP when they run the
binary; x in pi means that the users may have x in pP when they run
certain files. Other users running the file won't have x in pP, and
the special user running other files won't have x in pP.
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox