* [PATCH 2/4][PoC][RFC] Add rlimit-events framework
2017-10-18 20:32 ` [PATCH 0/4][PoC][RFC] Add rlimit-resources change notification mechanism Krzysztof Opasiak
2017-10-18 20:32 ` [PATCH 1/4][PoC][RFC] sched: Allow to get() and put() signal struct Krzysztof Opasiak
@ 2017-10-18 20:32 ` Krzysztof Opasiak
2017-10-19 7:41 ` Greg KH
2017-10-18 20:32 ` [PATCH 3/4][PoC][RFC] Connect rlimit-events with process life cycle Krzysztof Opasiak
2017-10-18 20:32 ` [PATCH 4/4][PoC][RFC] Allow to trace fd usage with rlimit-events Krzysztof Opasiak
3 siblings, 1 reply; 13+ messages in thread
From: Krzysztof Opasiak @ 2017-10-18 20:32 UTC (permalink / raw)
To: gregkh, viro, arnd
Cc: linux-kernel, linux-fsdevel, linux-arch, k.lewandowsk, l.stelmach,
p.szewczyk, b.zolnierkie, andrzej.p, kopasiak90,
Krzysztof Opasiak
Add a framework which allows to notify userspace programs
about change of resource (the same as in rlimits) usage.
To monitor some process, monitor FD has to be obtained from
kernel using rlimit-events netlink interface.
Then monitor can issue ioctls() to subscribe for a particular
usage level of given resource.
When monitoring subject crosses given usage level monitoring
fd will be ready to read resource change event from it.
It's possible to monitor multiple processes and single
process can be monitored by multiple other processes.
Signed-off-by: Krzysztof Opasiak <k.opasiak@samsung.com>
---
include/asm-generic/resource.h | 37 +-
include/linux/init_task.h | 1 +
include/linux/rlimit_noti_kern.h | 54 +++
include/linux/sched/signal.h | 6 +
include/uapi/linux/netlink.h | 1 +
include/uapi/linux/rlimit_noti.h | 71 ++++
init/Kconfig | 6 +
kernel/Makefile | 1 +
kernel/rlimit_noti.c | 786 +++++++++++++++++++++++++++++++++++++++
9 files changed, 962 insertions(+), 1 deletion(-)
create mode 100644 include/linux/rlimit_noti_kern.h
create mode 100644 include/uapi/linux/rlimit_noti.h
create mode 100644 kernel/rlimit_noti.c
diff --git a/include/asm-generic/resource.h b/include/asm-generic/resource.h
index 5e752b959054..338f20ba7e56 100644
--- a/include/asm-generic/resource.h
+++ b/include/asm-generic/resource.h
@@ -2,7 +2,7 @@
#define _ASM_GENERIC_RESOURCE_H
#include <uapi/asm-generic/resource.h>
-
+#include <linux/spinlock.h>
/*
* boot-time rlimit defaults for the init task:
@@ -27,4 +27,39 @@
[RLIMIT_RTTIME] = { RLIM_INFINITY, RLIM_INFINITY }, \
}
+#ifdef CONFIG_RLIMIT_NOTIFICATION
+
+#define INIT_RLIMIT_WATCHER(watchers, limit) \
+ [limit] = LIST_HEAD_INIT(watchers[limit])
+
+#define INIT_RLIMIT_WATCHERS(watchers) \
+{ \
+ INIT_RLIMIT_WATCHER(watchers, RLIMIT_CPU), \
+ INIT_RLIMIT_WATCHER(watchers, RLIMIT_FSIZE), \
+ INIT_RLIMIT_WATCHER(watchers, RLIMIT_DATA), \
+ INIT_RLIMIT_WATCHER(watchers, RLIMIT_STACK), \
+ INIT_RLIMIT_WATCHER(watchers, RLIMIT_CORE), \
+ INIT_RLIMIT_WATCHER(watchers, RLIMIT_RSS), \
+ INIT_RLIMIT_WATCHER(watchers, RLIMIT_NPROC), \
+ INIT_RLIMIT_WATCHER(watchers, RLIMIT_NOFILE), \
+ INIT_RLIMIT_WATCHER(watchers, RLIMIT_MEMLOCK), \
+ INIT_RLIMIT_WATCHER(watchers, RLIMIT_AS), \
+ INIT_RLIMIT_WATCHER(watchers, RLIMIT_LOCKS), \
+ INIT_RLIMIT_WATCHER(watchers, RLIMIT_SIGPENDING), \
+ INIT_RLIMIT_WATCHER(watchers, RLIMIT_MSGQUEUE), \
+ INIT_RLIMIT_WATCHER(watchers, RLIMIT_NICE), \
+ INIT_RLIMIT_WATCHER(watchers, RLIMIT_RTPRIO), \
+ INIT_RLIMIT_WATCHER(watchers, RLIMIT_RTTIME), \
+}
+
+#define INIT_RLIMIT_EVENTS_CTX(sig) \
+.rlimit_events_ctx = { \
+ .lock = __SPIN_LOCK_UNLOCKED(sig.rlimit_events_ctx.lock), \
+ .watchers = INIT_RLIMIT_WATCHERS(sig.rlimit_events_ctx.watchers),\
+ .process_dead = 0, \
+},
+#else
+#define INIT_RLIMIT_EVENTS_CTX(sig)
+#endif /* CONFIG_RLIMIT_NOTIFICATION */
+
#endif
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index e049526bc188..65400b376b92 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -76,6 +76,7 @@ extern struct fs_struct init_fs;
INIT_POSIX_TIMERS(sig) \
INIT_CPU_TIMERS(sig) \
.rlim = INIT_RLIMITS, \
+ INIT_RLIMIT_EVENTS_CTX(sig) \
INIT_CPUTIMER(sig) \
INIT_PREV_CPUTIME(sig) \
.cred_guard_mutex = \
diff --git a/include/linux/rlimit_noti_kern.h b/include/linux/rlimit_noti_kern.h
new file mode 100644
index 000000000000..e49fddaa21c0
--- /dev/null
+++ b/include/linux/rlimit_noti_kern.h
@@ -0,0 +1,54 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _LINUX_RLIMIT_NOTI_H_
+#define _LINUX_RLIMIT_NOTI_H_
+
+#include <uapi/linux/rlimit_noti.h>
+
+struct rlimit_noti_ctx {
+ /* for mdification protection */
+ spinlock_t lock;
+ /* protected by RCU */
+ struct list_head watchers[RLIM_NLIMITS];
+
+ unsigned process_dead:1;
+};
+
+#ifdef CONFIG_RLIMIT_NOTIFICATION
+
+int rlimit_noti_task_fork(struct task_struct *parent,
+ struct task_struct *child);
+
+void rlimit_noti_task_exit(struct task_struct *tsk);
+
+int rlimit_noti_watch_active(struct task_struct *tsk, unsigned int res);
+
+void rlimit_noti_res_changed(struct task_struct *tsk, unsigned int res,
+ uint64_t old, uint64_t new);
+
+#else
+
+static inline int rlimit_noti_watch_active(struct task_struct *tsk,
+ unsigned int res)
+{
+ return 0;
+}
+
+static inline void rlimit_noti_res_changed(struct task_struct *tsk,
+ unsigned int res,
+ uint64_t old, uint64_t new)
+{
+}
+
+#endif /* CONFIG_RLIMIT_NOTIFICATION */
+#endif /* _LINUX_RLIMIT_NOTI_H_ */
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 05cef037fbf2..36849df51c5b 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -7,7 +7,9 @@
#include <linux/sched/jobctl.h>
#include <linux/sched/task.h>
#include <linux/cred.h>
+#include <linux/list.h>
+#include <linux/rlimit_noti_kern.h>
/*
* Types defining task->signal and task->sighand and APIs using them:
*/
@@ -197,6 +199,10 @@ struct signal_struct {
*/
struct rlimit rlim[RLIM_NLIMITS];
+#ifdef CONFIG_RLIMIT_NOTIFICATION
+ struct rlimit_noti_ctx rlimit_events_ctx;
+#endif
+
#ifdef CONFIG_BSD_PROCESS_ACCT
struct pacct_struct pacct; /* per-process accounting information */
#endif
diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h
index f86127a46cfc..24b55805d607 100644
--- a/include/uapi/linux/netlink.h
+++ b/include/uapi/linux/netlink.h
@@ -28,6 +28,7 @@
#define NETLINK_RDMA 20
#define NETLINK_CRYPTO 21 /* Crypto layer */
#define NETLINK_SMC 22 /* SMC monitoring */
+#define NETLINK_RLIMIT_EVENTS 23 /* rlimit notification */
#define NETLINK_INET_DIAG NETLINK_SOCK_DIAG
diff --git a/include/uapi/linux/rlimit_noti.h b/include/uapi/linux/rlimit_noti.h
new file mode 100644
index 000000000000..a15a2826bce9
--- /dev/null
+++ b/include/uapi/linux/rlimit_noti.h
@@ -0,0 +1,71 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _UAPI_LINUX_RLIMIT_NOTI_H_
+#define _UAPI_LINUX_RLIMIT_NOTI_H_
+
+#ifdef __KERNEL__
+#include <linux/types.h>
+#include <linux/resource.h>
+#else
+#include <stdint.h>
+#endif
+
+#define RLIMIT_GET_NOTI_FD 1000
+
+/* ioctl's */
+#define RLIMIT_ADD_NOTI_LVL 1
+#define RLIMIT_RM_NOTI_LVL 2
+
+#define RLIMIT_SET_NOTI_ALL 3
+#define RLIMIT_CLEAR_NOTI_ALL 4
+
+/*
+ * For future (notify every 5, 10 units change):
+ * #define RLIMIT_SET_NOTI_STEP 5
+ */
+
+#define RLIMIT_GET_NOTI_LVLS 6
+#define RLIMIT_GET_NOTI_LVL_COUNT 7
+
+/* Flags for ioctl's */
+#define RLIMIT_FLAG_NO_INHERIT (1u << 0)
+
+/* Event types */
+enum {
+ RLIMIT_EVENT_TYPE_RES_CHANGED,
+ RLIMIT_EVENT_TYPE_MAX
+};
+
+/* TODO take care of padding (packed) */
+struct rlimit_noti_subject {
+ pid_t pid;
+ uint32_t resource;
+};
+
+struct rlimit_noti_level {
+ struct rlimit_noti_subject subj;
+ uint64_t value;
+ uint32_t flags;
+};
+
+struct rlimit_event {
+ uint32_t ev_type;
+ size_t size;
+};
+
+struct rlimit_event_res_changed {
+ struct rlimit_noti_subject subj;
+ uint64_t new_value;
+};
+
+#endif /* _UAPI_LINUX_RLIMIT_NOTI_H_ */
diff --git a/init/Kconfig b/init/Kconfig
index 1d3475fc9496..4bc44fa86640 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -332,6 +332,12 @@ config AUDIT_TREE
depends on AUDITSYSCALL
select FSNOTIFY
+config RLIMIT_NOTIFICATION
+ bool "Support fd notifications on given resource usage"
+ depends on NET
+ help
+ Enable this to monitor process resource changes usage via fd.
+
source "kernel/irq/Kconfig"
source "kernel/time/Kconfig"
diff --git a/kernel/Makefile b/kernel/Makefile
index 72aa080f91f0..d927d41c35f5 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -76,6 +76,7 @@ obj-$(CONFIG_AUDIT) += audit.o auditfilter.o
obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
obj-$(CONFIG_AUDIT_WATCH) += audit_watch.o audit_fsnotify.o
obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
+obj-$(CONFIG_RLIMIT_NOTIFICATION) += rlimit_noti.o
obj-$(CONFIG_GCOV_KERNEL) += gcov/
obj-$(CONFIG_KCOV) += kcov.o
obj-$(CONFIG_KPROBES) += kprobes.o
diff --git a/kernel/rlimit_noti.c b/kernel/rlimit_noti.c
new file mode 100644
index 000000000000..a4fe5b9dd02b
--- /dev/null
+++ b/kernel/rlimit_noti.c
@@ -0,0 +1,786 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Netlink communication strongly based on audit.c.
+ */
+
+#include <linux/rlimit_noti.h>
+
+#include <net/sock.h>
+#include <net/netlink.h>
+#include <linux/skbuff.h>
+#include <net/netns/generic.h>
+
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/wait.h>
+#include <linux/list.h>
+#include <linux/sched.h>
+#include <linux/pid.h>
+#include <linux/pid_namespace.h>
+#include <linux/anon_inodes.h>
+#include <linux/sched/signal.h>
+#include <linux/spinlock.h>
+
+#define sig_watchers(sig) sig->rlimit_events_ctx.watchers
+
+#define sig_for_each_res(wlist, sig) \
+ for (wlist = &sig_watchers(sig)[0]; \
+ wlist - &sig_watchers(sig)[0] < ARRAY_SIZE(sig_watchers(sig)); \
+ ++wlist)
+
+struct rlimit_event_list {
+ struct rlimit_event ev;
+ union {
+ struct rlimit_event_res_changed rchanged;
+ } event_data;
+ struct list_head node;
+};
+
+#define MAX_RLIMIT_EVENT_SIZE ({ \
+ struct rlimit_event_list *_rl = NULL; \
+ sizeof(_rl->event_data); \
+})
+
+struct rlimit_watch_fd_ctx {
+ struct kref kref;
+
+ spinlock_t noti_ctx_lock;
+ struct list_head watchers;
+ unsigned fd_invalid:1;
+
+ spinlock_t events_lock;
+ wait_queue_head_t events_queue;
+ struct list_head events;
+};
+
+struct rlimit_watcher {
+ struct rcu_head rcu;
+ struct rlimit_watch_fd_ctx *ctx;
+ struct signal_struct *signal;
+
+ struct list_head tsk_node;
+ struct list_head ctx_node;
+
+ uint64_t value;
+ unsigned noti_all_changes:1;
+};
+
+/******************************************************************************
+ * Public API
+ ******************************************************************************/
+
+static void release_ctx(struct kref *kref)
+{
+ struct rlimit_watch_fd_ctx *ctx =
+ container_of(kref, struct rlimit_watch_fd_ctx, kref);
+
+ kfree(ctx);
+}
+
+static struct rlimit_watcher *alloc_rlimit_watcher(
+ struct rlimit_watch_fd_ctx *ctx, struct signal_struct *signal,
+ uint64_t value, bool noti_all)
+{
+ struct rlimit_watcher *w;
+
+ w = kzalloc(sizeof(*w), GFP_ATOMIC);
+ if (!w)
+ return ERR_PTR(ENOMEM);
+
+ INIT_LIST_HEAD(&w->tsk_node);
+ INIT_LIST_HEAD(&w->ctx_node);
+
+ w->ctx = ctx;
+ kref_get(&ctx->kref);
+ w->signal = signal;
+ get_signal_struct(signal);
+ w->value = value;
+ w->noti_all_changes = noti_all;
+
+ return w;
+}
+
+static void free_rlimit_watcher(struct rlimit_watcher *w)
+{
+ if (!w)
+ return;
+
+ kref_put(&w->ctx->kref, release_ctx);
+ put_signal_struct(w->signal);
+ kfree(w);
+}
+
+static void free_rlimit_watcher_rcu(struct rcu_head *head)
+{
+ free_rlimit_watcher(container_of(head, struct rlimit_watcher, rcu));
+}
+
+static inline struct rlimit_watcher *rlimit_watcher_dup(
+ struct rlimit_watcher *org, struct task_struct *new_owner)
+{
+ return alloc_rlimit_watcher(org->ctx, new_owner->signal, org->value,
+ org->noti_all_changes);
+}
+
+/* This is not called for threads */
+int rlimit_noti_task_fork(struct task_struct *parent, struct task_struct *child)
+{
+ struct rlimit_watcher *w, *nw;
+ struct signal_struct *sig = child->signal;
+ unsigned long flags;
+ struct list_head *iter;
+ int ret;
+
+ /*
+ * init all list to avoid leaving uninitialized lists
+ * in case of error
+ */
+ sig_for_each_res(iter, sig)
+ INIT_LIST_HEAD(iter);
+
+ spin_lock_init(&sig->rlimit_events_ctx.lock);
+ sig->rlimit_events_ctx.process_dead = 0;
+
+ /* Lock the list to be safe against modification */
+ spin_lock_irqsave(&parent->signal->rlimit_events_ctx.lock, flags);
+
+ sig_for_each_res(iter, sig)
+ list_for_each_entry(w, iter, tsk_node) {
+ nw = rlimit_watcher_dup(w, child);
+ if (!nw) {
+ spin_unlock_irqrestore(
+ &parent->signal->rlimit_events_ctx.lock,
+ flags);
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+
+ /*
+ * For now we put this only on task side list
+ * to avoid deadlock (ABBA)
+ *
+ * We assume that no one can access this new task
+ * for now so we don't use any locking here
+ */
+ list_add_tail_rcu(&nw->tsk_node, iter);
+ }
+
+ /*
+ * now we got all watchers on our brand new list so we can release
+ * parent lock and allow modification of its list
+ */
+ spin_unlock_irqrestore(&parent->signal->rlimit_events_ctx.lock, flags);
+
+ sig_for_each_res(iter, sig) {
+start_again:
+ rcu_read_lock();
+ list_for_each_entry_rcu(w, iter, tsk_node) {
+ spin_lock_irqsave(&w->ctx->noti_ctx_lock, flags);
+ if (list_empty(&w->ctx_node)) {
+ if (!w->ctx->fd_invalid) {
+ list_add_tail(&w->ctx_node,
+ &w->ctx->watchers);
+ } else {
+ spin_lock(&sig->rlimit_events_ctx.lock);
+ list_del_rcu(&w->tsk_node);
+ call_rcu(&w->rcu,
+ free_rlimit_watcher_rcu);
+ spin_unlock(
+ &sig->rlimit_events_ctx.lock);
+ rcu_read_unlock();
+ goto start_again;
+ }
+ }
+ spin_unlock_irqrestore(&w->ctx->noti_ctx_lock, flags);
+ }
+ rcu_read_unlock();
+ }
+
+ return 0;
+cleanup:
+ sig_for_each_res(iter, sig) {
+ while (!list_empty(iter)) {
+ w = list_first_entry(iter,
+ struct rlimit_watcher, ctx_node);
+ list_del_init(&w->tsk_node);
+ call_rcu(&w->rcu, free_rlimit_watcher_rcu);
+ }
+ }
+ return ret;
+}
+
+void rlimit_noti_task_exit(struct task_struct *tsk)
+{
+ struct rlimit_watcher *w;
+ struct rlimit_noti_ctx *n_ctx = &tsk->signal->rlimit_events_ctx;
+ unsigned long flags;
+ struct list_head *head;
+
+ if (tsk != tsk->group_leader)
+ return;
+
+ /*
+ * Let's mark that we are in the middle of cleaning up
+ * to prevent new watchers from being added to the list
+ */
+ spin_lock_irqsave(&n_ctx->lock, flags);
+ WARN_ON(n_ctx->process_dead);
+ n_ctx->process_dead = true;
+ spin_unlock_irqrestore(&n_ctx->lock, flags);
+
+ sig_for_each_res(head, tsk->signal) {
+ /*
+ * Let's go through the list and remove watchers form respective
+ * fd contextes.
+ */
+ rcu_read_lock();
+ list_for_each_entry_rcu(w, head, tsk_node) {
+ spin_lock_irqsave(&w->ctx->noti_ctx_lock, flags);
+ /*
+ * List empty means that between iteration and acquiring
+ * lock this watcher has been already removed and
+ * it's just hanging due to grace period
+ */
+ if (!list_empty(&w->ctx_node)
+ && !list_empty(&w->tsk_node))
+ list_del_init(&w->ctx_node);
+
+ spin_unlock_irqrestore(&w->ctx->noti_ctx_lock, flags);
+ }
+ rcu_read_unlock();
+
+ /* Now let's cleanup our list */
+ spin_lock_irqsave(&n_ctx->lock, flags);
+ while (!list_empty(head)) {
+ w = list_first_entry(head,
+ struct rlimit_watcher, tsk_node);
+ list_del_rcu(&w->tsk_node);
+ call_rcu(&w->rcu, free_rlimit_watcher_rcu);
+ }
+ spin_unlock_irqrestore(&n_ctx->lock, flags);
+ }
+}
+
+static int rlimit_generate_res_changed_event(struct rlimit_watch_fd_ctx *ctx,
+ struct task_struct *tsk,
+ unsigned int resource,
+ uint64_t new, int mflags)
+{
+ struct rlimit_event_list *ev_list;
+ unsigned long flags;
+
+ ev_list = kzalloc(sizeof(*ev_list), mflags);
+ if (!ev_list)
+ return -ENOMEM;
+
+ ev_list->ev.ev_type = RLIMIT_EVENT_TYPE_RES_CHANGED;
+ ev_list->ev.size = sizeof(struct rlimit_event)
+ + sizeof(struct rlimit_event_res_changed);
+
+ /* TODO add here support for PID namespace */
+ ev_list->event_data.rchanged.subj.pid = tsk->pid;
+ ev_list->event_data.rchanged.subj.resource = resource;
+
+ ev_list->event_data.rchanged.new_value = new;
+
+ INIT_LIST_HEAD(&ev_list->node);
+
+ spin_lock_irqsave(&ctx->events_lock, flags);
+ list_add_tail(&ev_list->node, &ctx->events);
+ wake_up_interruptible(&ctx->events_queue);
+ spin_unlock_irqrestore(&ctx->events_lock, flags);
+
+ return 0;
+}
+
+int rlimit_noti_watch_active(struct task_struct *tsk, unsigned int res)
+{
+ return !list_empty(&tsk->signal->rlimit_events_ctx.watchers[res]);
+}
+
+void rlimit_noti_res_changed(struct task_struct *tsk, unsigned int res,
+ uint64_t old, uint64_t new)
+{
+ struct rlimit_watcher *w;
+ struct signal_struct *signal = tsk->signal;
+
+ rcu_read_lock();
+ /* TODO this should be replaced with sth faster */
+ list_for_each_entry_rcu(w, &signal->rlimit_events_ctx.watchers[res],
+ tsk_node)
+ if (w->noti_all_changes ||
+ (w->value > old && w->value <= new) ||
+ (w->value > new && w->value <= old)) {
+ /* ignore error as there is nothing we can do */
+ rlimit_generate_res_changed_event(w->ctx, tsk,
+ res, new, GFP_ATOMIC);
+ }
+ rcu_read_unlock();
+}
+
+/******************************************************************************
+ * FD part
+ ******************************************************************************/
+
+static int add_new_watcher(struct rlimit_watch_fd_ctx *ctx,
+ struct task_struct *tsk,
+ int resource, uint64_t value, bool noti_all)
+{
+ struct rlimit_watcher *w;
+ struct signal_struct *signal;
+ unsigned long flags;
+ int ret = 0;
+
+ if (resource >= RLIM_NLIMITS)
+ return -EINVAL;
+
+ read_lock(&tasklist_lock);
+ if (!tsk->sighand) {
+ ret = -ESRCH;
+ goto unlock_read;
+ }
+
+ task_lock(tsk->group_leader);
+ signal = tsk->signal;
+
+ w = alloc_rlimit_watcher(ctx, signal, value, noti_all);
+ if (IS_ERR(w)) {
+ ret = PTR_ERR(w);
+ goto unlock_group_leader;
+ }
+
+ spin_lock_irqsave(&ctx->noti_ctx_lock, flags);
+ /*
+ * First add it to ctx list as we are holding it's lock
+ * and no one is going to modify or iterate it
+ */
+ list_add_tail(&w->ctx_node, &ctx->watchers);
+ /* Now let's lock process side lock and add this torcu protected list */
+ spin_lock(&signal->rlimit_events_ctx.lock);
+
+ /* If process is in the middle of cleanup let's rollback everything */
+ if (!signal->rlimit_events_ctx.process_dead) {
+ list_add_tail_rcu(&signal->rlimit_events_ctx.watchers[resource],
+ &w->tsk_node);
+ ret = 0;
+ } else {
+ list_del(&w->ctx_node);
+ free_rlimit_watcher(w);
+ ret = -ENOENT;
+ }
+
+ spin_unlock(&signal->rlimit_events_ctx.lock);
+ spin_unlock_irqrestore(&ctx->noti_ctx_lock, flags);
+unlock_group_leader:
+ task_unlock(tsk->group_leader);
+unlock_read:
+ read_unlock(&tasklist_lock);
+
+ return ret;
+}
+
+ssize_t rlimit_noti_read_event(struct file *file, char __user *buf,
+ size_t size, loff_t *ptr)
+{
+ struct rlimit_watch_fd_ctx *ctx = file->private_data;
+ struct rlimit_event_list *ev_list;
+ unsigned long flags;
+ size_t ret;
+
+ /* TODO allow to read only part of event */
+ if (size < MAX_RLIMIT_EVENT_SIZE)
+ return -EINVAL;
+
+ spin_lock_irqsave(&ctx->events_lock, flags);
+#define READ_COND (!list_empty(&ctx->events))
+ while (!READ_COND) {
+ spin_unlock_irqrestore(&ctx->events_lock, flags);
+
+ if (wait_event_interruptible(ctx->events_queue, READ_COND))
+ return -ERESTARTSYS;
+ spin_lock_irqsave(&ctx->events_lock, flags);
+ }
+#undef READ_COND
+
+ ev_list = list_first_entry(&ctx->events,
+ struct rlimit_event_list, node);
+ list_del(&ev_list->node);
+ spin_unlock_irqrestore(&ctx->events_lock, flags);
+
+ /* TODO handle fault */
+ ret = copy_to_user(buf, &ev_list->ev, ev_list->ev.size);
+ if (ret == 0)
+ ret = ev_list->ev.size;
+
+ kfree(ev_list);
+
+ return ret;
+}
+
+
+unsigned int rlimit_noti_poll(struct file *file, struct poll_table_struct *wait)
+{
+ struct rlimit_watch_fd_ctx *ctx = file->private_data;
+ unsigned int mask = POLLWRNORM;
+ unsigned long flags;
+
+ poll_wait(file, &ctx->events_queue, wait);
+
+ spin_lock_irqsave(&ctx->events_lock, flags);
+ if (!list_empty(&ctx->events))
+ mask |= POLLIN;
+
+ /* TODO add notification when last process exited */
+ spin_unlock_irqrestore(&ctx->events_lock, flags);
+
+ return mask;
+}
+
+
+static long rlimit_noti_ioctl(struct file *file,
+ unsigned int cmd, unsigned long arg)
+{
+ struct rlimit_watch_fd_ctx *ctx = file->private_data;
+ struct task_struct *tsk;
+ struct rlimit_noti_level nlvl;
+ bool noti_all = false;
+ int ret;
+
+ switch (cmd) {
+ case RLIMIT_SET_NOTI_ALL:
+ if (copy_from_user(&nlvl.subj,
+ (void __user *)arg, sizeof(nlvl.subj)))
+ return -EFAULT;
+
+ nlvl.value = 0;
+ noti_all = true;
+ goto set_watch;
+
+ case RLIMIT_ADD_NOTI_LVL:
+ if (copy_from_user(&nlvl, (void __user *)arg, sizeof(nlvl)))
+ return -EFAULT;
+set_watch:
+ rcu_read_lock();
+ tsk = find_task_by_vpid(nlvl.subj.pid);
+ if (!tsk) {
+ rcu_read_unlock();
+ printk(KERN_DEBUG "No PID in current NS\n");
+ return -EINVAL;
+ }
+
+ get_task_struct(tsk);
+ rcu_read_unlock();
+
+ /* TODO check for duplicates before adding */
+ ret = add_new_watcher(ctx, tsk, nlvl.subj.resource,
+ nlvl.value, false);
+ put_task_struct(tsk);
+ break;
+
+ case RLIMIT_CLEAR_NOTI_ALL:
+ case RLIMIT_RM_NOTI_LVL:
+
+ case RLIMIT_GET_NOTI_LVLS:
+ case RLIMIT_GET_NOTI_LVL_COUNT:
+ /* TODO: Implement me */
+ ret = -ENOTSUPP;
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+static int rlimit_noti_release(struct inode *inode, struct file *file)
+{
+ struct rlimit_watch_fd_ctx *ctx = file->private_data;
+ struct rlimit_watcher *w;
+ struct rlimit_event_list *ev_list;
+ unsigned long flags;
+
+ /* Clean up watchers */
+ spin_lock_irqsave(&ctx->noti_ctx_lock, flags);
+ ctx->fd_invalid = 1;
+ list_for_each_entry(w, &ctx->watchers, ctx_node) {
+ spin_lock(&w->signal->rlimit_events_ctx.lock);
+ list_del_rcu(&w->tsk_node);
+ spin_unlock(&w->signal->rlimit_events_ctx.lock);
+ }
+
+ while (!list_empty(&ctx->watchers)) {
+ w = list_first_entry(&ctx->watchers,
+ struct rlimit_watcher, ctx_node);
+ list_del_init(&w->ctx_node);
+ call_rcu(&w->rcu, free_rlimit_watcher_rcu);
+ }
+
+ spin_unlock_irqrestore(&ctx->noti_ctx_lock, flags);
+
+ /* to ensure that no more events will be generated */
+ synchronize_rcu();
+
+ spin_lock_irqsave(&ctx->events_lock, flags);
+ while (!list_empty(&ctx->events)) {
+ ev_list = list_first_entry(&ctx->events,
+ struct rlimit_event_list, node);
+ list_del(&ev_list->node);
+ kfree(ev_list);
+ }
+ spin_unlock_irqrestore(&ctx->events_lock, flags);
+
+ kref_put(&ctx->kref, release_ctx);
+
+ return 0;
+}
+
+static const struct file_operations rlimit_noti_fops = {
+ .read = rlimit_noti_read_event,
+ .release = rlimit_noti_release,
+ .poll = rlimit_noti_poll,
+ .unlocked_ioctl = rlimit_noti_ioctl,
+};
+
+static int rlimit_noti_create_fd(void)
+{
+ struct rlimit_watch_fd_ctx *ctx;
+ int ret;
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ kref_init(&ctx->kref);
+ spin_lock_init(&ctx->noti_ctx_lock);
+ INIT_LIST_HEAD(&ctx->watchers);
+ spin_lock_init(&ctx->events_lock);
+ INIT_LIST_HEAD(&ctx->events);
+ init_waitqueue_head(&ctx->events_queue);
+
+ ret = anon_inode_getfd("rlimit_noti", &rlimit_noti_fops, ctx, 0);
+ if (ret < 0)
+ goto put_ctx;
+
+ return ret;
+put_ctx:
+ kref_put(&ctx->kref, release_ctx);
+ return ret;
+}
+
+
+
+/******************************************************************************
+ * netlink part
+ ******************************************************************************/
+
+
+/* private rlimit_noti network namespace index */
+static unsigned int rlimit_noti_net_id;
+
+struct rlimit_noti_net {
+ struct sock *sk;
+};
+
+struct rlimit_noti_reply {
+ __u32 portid;
+ struct net *net;
+ struct sk_buff *skb;
+};
+
+static struct sock *rlimit_noti_get_socket(const struct net *net)
+{
+ struct rlimit_noti_net *rn_net;
+
+ if (!net)
+ return NULL;
+
+ rn_net = net_generic(net, rlimit_noti_net_id);
+ return rn_net->sk;
+}
+
+static struct sk_buff *rlimit_noti_make_reply(int seq, int type,
+ void *payload, int size)
+{
+ struct sk_buff *skb;
+ struct nlmsghdr *nl_header;
+ int flags = 0;
+
+ skb = nlmsg_new(size, GFP_KERNEL);
+ if (!skb)
+ return NULL;
+
+ nl_header = nlmsg_put(skb, 0, seq, type, size, flags);
+ if (!nl_header)
+ goto free_skb;
+
+ memcpy(nlmsg_data(nl_header), payload, size);
+
+ return skb;
+
+free_skb:
+ kfree_skb(skb);
+ return NULL;
+}
+
+static int rlimit_noti_send_reply_thread(void *arg)
+{
+ struct rlimit_noti_reply *reply = arg;
+ struct sock *sk = rlimit_noti_get_socket(reply->net);
+
+ /*
+ * Ignore failure. It'll only happen if the sender goes away,
+ * because our timeout is set to infinite.
+ */
+ netlink_unicast(sk, reply->skb, reply->portid, 0);
+ put_net(reply->net);
+ kfree(reply);
+ return 0;
+}
+
+static void rlimit_noti_send_reply(struct sk_buff *request_skb, int seq,
+ int type, void *payload, int size)
+{
+ struct net *net = sock_net(NETLINK_CB(request_skb).sk);
+ struct sk_buff *skb;
+ struct task_struct *tsk;
+ struct rlimit_noti_reply *reply;
+
+ reply = kmalloc(sizeof(*reply), GFP_KERNEL);
+ if (!reply)
+ return;
+
+ skb = rlimit_noti_make_reply(seq, type, payload, size);
+ if (!skb)
+ goto out;
+
+ reply->net = get_net(net);
+ reply->portid = NETLINK_CB(request_skb).portid;
+ reply->skb = skb;
+
+ tsk = kthread_run(rlimit_noti_send_reply_thread, reply,
+ "rlimit_noti_send_reply");
+ if (!IS_ERR(tsk))
+ return;
+ kfree_skb(skb);
+out:
+ kfree(reply);
+}
+
+static int rlimit_noti_netlink_ok(struct sk_buff *skb, u16 msg_type)
+{
+ /* TODO: put here some security and namespace checks */
+ return 0;
+}
+
+static int rlimit_noti_receive_msg(struct sk_buff *skb,
+ struct nlmsghdr *nl_header)
+{
+ u32 seq_nb = nl_header->nlmsg_seq;
+ u16 msg_type = nl_header->nlmsg_type;
+ int ret;
+
+ ret = rlimit_noti_netlink_ok(skb, msg_type);
+ if (ret)
+ return ret;
+
+ switch (msg_type) {
+ case RLIMIT_GET_NOTI_FD: {
+ int fd = 10;
+
+ fd = rlimit_noti_create_fd();
+ if (fd < 0) {
+ ret = fd;
+ goto out;
+ }
+ rlimit_noti_send_reply(skb, seq_nb, RLIMIT_GET_NOTI_FD,
+ &fd, sizeof(fd));
+ ret = 0;
+ break;
+ }
+ default:
+ ret = -EINVAL;
+ break;
+ }
+out:
+ return ret;
+}
+
+static void rlimit_noti_netlink_receive(struct sk_buff *skb)
+{
+ struct nlmsghdr *nl_header;
+ int len, ret;
+
+ nl_header = nlmsg_hdr(skb);
+ len = skb->len;
+
+ while (nlmsg_ok(nl_header, len)) {
+ ret = rlimit_noti_receive_msg(skb, nl_header);
+ /* if err or if this message says it wants a response */
+ if (ret || (nl_header->nlmsg_flags & NLM_F_ACK))
+ netlink_ack(skb, nl_header, ret, NULL);
+
+ nl_header = nlmsg_next(nl_header, &len);
+ }
+}
+
+static int rlimit_noti_netlink_bind(struct net *net, int group)
+{
+ /* For now we allow everyone but maybe this should be limited? */
+ return 0;
+}
+
+static int __net_init rlimit_noti_net_init(struct net *net)
+{
+ struct netlink_kernel_cfg cfg = {
+ .input = rlimit_noti_netlink_receive,
+ .bind = rlimit_noti_netlink_bind,
+ .flags = NL_CFG_F_NONROOT_RECV,
+ .groups = 1, /* Just one, the default */
+ };
+ struct rlimit_noti_net *rn_net = net_generic(net, rlimit_noti_net_id);
+
+ rn_net->sk = netlink_kernel_create(net, NETLINK_RLIMIT_EVENTS, &cfg);
+ if (rn_net->sk == NULL) {
+ printk(KERN_ERR
+ "cannot initialize netlink socket in namespace");
+ return -ENOMEM;
+ }
+ rn_net->sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
+
+ return 0;
+
+}
+
+static void __net_exit rlimit_noti_net_exit(struct net *net)
+{
+ struct rlimit_noti_net *rn_net = net_generic(net, rlimit_noti_net_id);
+
+ netlink_kernel_release(rn_net->sk);
+}
+
+static struct pernet_operations rlimit_noti_net_ops __net_initdata = {
+ .init = rlimit_noti_net_init,
+ .exit = rlimit_noti_net_exit,
+ .id = &rlimit_noti_net_id,
+ .size = sizeof(struct rlimit_noti_net),
+};
+
+static int __init rlimit_noti_init(void)
+{
+ return register_pernet_subsys(&rlimit_noti_net_ops);
+}
+late_initcall(rlimit_noti_init);
+
+static void __exit rlimit_noti_exit(void)
+{
+ unregister_pernet_subsys(&rlimit_noti_net_ops);
+}
--
2.9.3
^ permalink raw reply related [flat|nested] 13+ messages in thread* [PATCH 4/4][PoC][RFC] Allow to trace fd usage with rlimit-events
2017-10-18 20:32 ` [PATCH 0/4][PoC][RFC] Add rlimit-resources change notification mechanism Krzysztof Opasiak
` (2 preceding siblings ...)
2017-10-18 20:32 ` [PATCH 3/4][PoC][RFC] Connect rlimit-events with process life cycle Krzysztof Opasiak
@ 2017-10-18 20:32 ` Krzysztof Opasiak
2017-10-18 23:05 ` Al Viro
3 siblings, 1 reply; 13+ messages in thread
From: Krzysztof Opasiak @ 2017-10-18 20:32 UTC (permalink / raw)
To: gregkh, viro, arnd
Cc: linux-kernel, linux-fsdevel, linux-arch, k.lewandowsk, l.stelmach,
p.szewczyk, b.zolnierkie, andrzej.p, kopasiak90,
Krzysztof Opasiak
Add rlimit-events calls to file descriptors management
code to allow tracing of FD usage.
This allows userspace process (monitor) to get notification when
other process (subject) uses given amount of file descriptors.
This can be used to for example asynchronously monitor number
of open FD's in system services instead of polling with
predefined interval.
Signed-off-by: Krzysztof Opasiak <k.opasiak@samsung.com>
---
drivers/android/binder.c | 4 +--
fs/exec.c | 2 +-
fs/file.c | 83 ++++++++++++++++++++++++++++++++++++++++--------
fs/open.c | 2 +-
include/linux/fdtable.h | 9 +++---
5 files changed, 77 insertions(+), 23 deletions(-)
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index d802384ccdd3..b4be938a8ddf 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -417,7 +417,7 @@ static int task_get_unused_fd_flags(struct binder_proc *proc, int flags)
rlim_cur = task_rlimit(proc->tsk, RLIMIT_NOFILE);
unlock_task_sighand(proc->tsk, &irqs);
- return __alloc_fd(files, 0, rlim_cur, flags);
+ return __alloc_fd(proc->tsk, 0, rlim_cur, flags);
}
/*
@@ -440,7 +440,7 @@ static long task_close_fd(struct binder_proc *proc, unsigned int fd)
if (proc->files == NULL)
return -ESRCH;
- retval = __close_fd(proc->files, fd);
+ retval = __close_fd(proc->tsk, fd);
/* can't restart close syscall because file table entry was cleared */
if (unlikely(retval == -ERESTARTSYS ||
retval == -ERESTARTNOINTR ||
diff --git a/fs/exec.c b/fs/exec.c
index 964e34e307df..01bd18d29ee4 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1297,7 +1297,7 @@ int flush_old_exec(struct linux_binprm * bprm)
* trying to access the should-be-closed file descriptors of a process
* undergoing exec(2).
*/
- do_close_on_exec(current->files);
+ do_close_on_exec(current);
return 0;
out:
diff --git a/fs/file.c b/fs/file.c
index 1c2972e3a405..43ed27238b09 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -22,6 +22,7 @@
#include <linux/spinlock.h>
#include <linux/rcupdate.h>
#include <linux/workqueue.h>
+#include <linux/rlimit_noti_kern.h>
unsigned int sysctl_nr_open __read_mostly = 1024*1024;
unsigned int sysctl_nr_open_min = BITS_PER_LONG;
@@ -268,7 +269,7 @@ static inline void __clear_open_fd(unsigned int fd, struct fdtable *fdt)
__clear_bit(fd / BITS_PER_LONG, fdt->full_fds_bits);
}
-static unsigned int count_open_files(struct fdtable *fdt)
+static unsigned int get_last_open_file(struct fdtable *fdt)
{
unsigned int size = fdt->max_fds;
unsigned int i;
@@ -314,7 +315,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
spin_lock(&oldf->file_lock);
old_fdt = files_fdtable(oldf);
- open_files = count_open_files(old_fdt);
+ open_files = get_last_open_file(old_fdt);
/*
* Check whether we need to allocate a larger fd array and fd set.
@@ -345,7 +346,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
*/
spin_lock(&oldf->file_lock);
old_fdt = files_fdtable(oldf);
- open_files = count_open_files(old_fdt);
+ open_files = get_last_open_file(old_fdt);
}
copy_fd_bitmaps(new_fdt, old_fdt, open_files);
@@ -477,6 +478,31 @@ struct files_struct init_files = {
.file_lock = __SPIN_LOCK_UNLOCKED(init_files.file_lock),
};
+static unsigned int count_open_fds(struct fdtable *fdt)
+{
+ unsigned int maxfd = fdt->max_fds;
+ unsigned int maxbit = maxfd / BITS_PER_LONG;
+ unsigned int count = 0;
+ int i;
+
+ i = find_next_zero_bit(fdt->full_fds_bits, maxbit, 0);
+ /* If there is no free fds */
+ if (i > maxbit)
+ return maxfd;
+#if BITS_PER_LONG == 32
+#define HWEIGHT_LONG hweight32
+#else
+#define HWEIGHT_LONG hweight64
+#endif
+
+ count += i * BITS_PER_LONG;
+ for (; i < maxbit; ++i)
+ count += HWEIGHT_LONG(fdt->open_fds[i]);
+
+#undef HWEIGHT_LONG
+ return count;
+}
+
static unsigned int find_next_fd(struct fdtable *fdt, unsigned int start)
{
unsigned int maxfd = fdt->max_fds;
@@ -494,9 +520,10 @@ static unsigned int find_next_fd(struct fdtable *fdt, unsigned int start)
/*
* allocate a file descriptor, mark it busy.
*/
-int __alloc_fd(struct files_struct *files,
- unsigned start, unsigned end, unsigned flags)
+int __alloc_fd(struct task_struct *owner, unsigned int start, unsigned int end,
+ unsigned int flags)
{
+ struct files_struct *files = owner->files;
unsigned int fd;
int error;
struct fdtable *fdt;
@@ -539,6 +566,13 @@ int __alloc_fd(struct files_struct *files,
else
__clear_close_on_exec(fd, fdt);
error = fd;
+
+ if (rlimit_noti_watch_active(owner, RLIMIT_NOFILE)) {
+ unsigned int count;
+
+ count = count_open_fds(fdt);
+ rlimit_noti_res_changed(owner, RLIMIT_NOFILE, count - 1, count);
+ }
#if 1
/* Sanity check */
if (rcu_access_pointer(fdt->fd[fd]) != NULL) {
@@ -554,28 +588,37 @@ int __alloc_fd(struct files_struct *files,
static int alloc_fd(unsigned start, unsigned flags)
{
- return __alloc_fd(current->files, start, rlimit(RLIMIT_NOFILE), flags);
+ return __alloc_fd(current,
+ start, rlimit(RLIMIT_NOFILE), flags);
}
int get_unused_fd_flags(unsigned flags)
{
- return __alloc_fd(current->files, 0, rlimit(RLIMIT_NOFILE), flags);
+ return alloc_fd(0, flags);
}
EXPORT_SYMBOL(get_unused_fd_flags);
-static void __put_unused_fd(struct files_struct *files, unsigned int fd)
+static void __put_unused_fd(struct task_struct *owner, unsigned int fd)
{
+ struct files_struct *files = owner->files;
struct fdtable *fdt = files_fdtable(files);
__clear_open_fd(fd, fdt);
if (fd < files->next_fd)
files->next_fd = fd;
+
+ if (rlimit_noti_watch_active(owner, RLIMIT_NOFILE)) {
+ unsigned int count;
+
+ count = count_open_fds(fdt);
+ rlimit_noti_res_changed(owner, RLIMIT_NOFILE, count + 1, count);
+ }
}
void put_unused_fd(unsigned int fd)
{
struct files_struct *files = current->files;
spin_lock(&files->file_lock);
- __put_unused_fd(files, fd);
+ __put_unused_fd(current, fd);
spin_unlock(&files->file_lock);
}
@@ -632,8 +675,9 @@ EXPORT_SYMBOL(fd_install);
/*
* The same warnings as for __alloc_fd()/__fd_install() apply here...
*/
-int __close_fd(struct files_struct *files, unsigned fd)
+int __close_fd(struct task_struct *owner, unsigned int fd)
{
+ struct files_struct *files = owner->files;
struct file *file;
struct fdtable *fdt;
@@ -646,7 +690,7 @@ int __close_fd(struct files_struct *files, unsigned fd)
goto out_unlock;
rcu_assign_pointer(fdt->fd[fd], NULL);
__clear_close_on_exec(fd, fdt);
- __put_unused_fd(files, fd);
+ __put_unused_fd(owner, fd);
spin_unlock(&files->file_lock);
return filp_close(file, files);
@@ -655,10 +699,11 @@ int __close_fd(struct files_struct *files, unsigned fd)
return -EBADF;
}
-void do_close_on_exec(struct files_struct *files)
+void do_close_on_exec(struct task_struct *tsk)
{
unsigned i;
struct fdtable *fdt;
+ struct files_struct *files = tsk->files;
/* exec unshares first */
spin_lock(&files->file_lock);
@@ -680,7 +725,7 @@ void do_close_on_exec(struct files_struct *files)
if (!file)
continue;
rcu_assign_pointer(fdt->fd[fd], NULL);
- __put_unused_fd(files, fd);
+ __put_unused_fd(tsk, fd);
spin_unlock(&files->file_lock);
filp_close(file, files);
cond_resched();
@@ -852,6 +897,16 @@ __releases(&files->file_lock)
__set_close_on_exec(fd, fdt);
else
__clear_close_on_exec(fd, fdt);
+
+ /* If fd was previously open then number of opened fd stays untouched */
+ if (!tofree && rlimit_noti_watch_active(current, RLIMIT_NOFILE)) {
+ unsigned int count;
+
+ count = count_open_fds(fdt);
+ rlimit_noti_res_changed(current, RLIMIT_NOFILE,
+ count - 1, count);
+ }
+
spin_unlock(&files->file_lock);
if (tofree)
@@ -870,7 +925,7 @@ int replace_fd(unsigned fd, struct file *file, unsigned flags)
struct files_struct *files = current->files;
if (!file)
- return __close_fd(files, fd);
+ return __close_fd(current, fd);
if (fd >= rlimit(RLIMIT_NOFILE))
return -EBADF;
diff --git a/fs/open.c b/fs/open.c
index cd0c5be8d012..0fef6288f88c 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1148,7 +1148,7 @@ EXPORT_SYMBOL(filp_close);
*/
SYSCALL_DEFINE1(close, unsigned int, fd)
{
- int retval = __close_fd(current->files, fd);
+ int retval = __close_fd(current, fd);
/* can't restart close syscall because file table entry was cleared */
if (unlikely(retval == -ERESTARTSYS ||
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index 6e84b2cae6ad..6379753c0037 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -106,17 +106,16 @@ void put_files_struct(struct files_struct *fs);
void reset_files_struct(struct files_struct *);
int unshare_files(struct files_struct **);
struct files_struct *dup_fd(struct files_struct *, int *) __latent_entropy;
-void do_close_on_exec(struct files_struct *);
+void do_close_on_exec(struct task_struct *tsk);
int iterate_fd(struct files_struct *, unsigned,
int (*)(const void *, struct file *, unsigned),
const void *);
-extern int __alloc_fd(struct files_struct *files,
- unsigned start, unsigned end, unsigned flags);
+extern int __alloc_fd(struct task_struct *owner,
+ unsigned int start, unsigned int end, unsigned int flags);
extern void __fd_install(struct files_struct *files,
unsigned int fd, struct file *file);
-extern int __close_fd(struct files_struct *files,
- unsigned int fd);
+extern int __close_fd(struct task_struct *owner, unsigned int fd);
extern struct kmem_cache *files_cachep;
--
2.9.3
^ permalink raw reply related [flat|nested] 13+ messages in thread