* [RFC] event about group change
@ 2012-04-02 4:19 Alexander Nikiforov
[not found] ` <4F7928BE.8000502-Sze3O3UU22JBDgjK7y7TUQ@public.gmane.org>
0 siblings, 1 reply; 6+ messages in thread
From: Alexander Nikiforov @ 2012-04-02 4:19 UTC (permalink / raw)
To: Tejun Heo
Cc: cgroups-u79uwXL29TY76Z2rM5mHXA, KAMEZAWA Hiroyuki, Glauber Costa,
a.nikiforov-Sze3O3UU22JBDgjK7y7TUQ
Hello, guys,
During our work we face to problem - get events about processes die in
user space (not all but some of them). In the past we had special module
which solve this problem.
But now I think that this task is quite native for cgroups subsystem.
Move interesting PIDs into special group and set event handler. We
investigate how events made in the memcg and implement proof of concepts
patch against mainline (maybe little bit old, but it's just RFC message)
with the same approach to "tasks" file.
So, I see several possible use cases for others with this patch.
1) replace wait4() for processes where we dont need for return status as
well as we want just know about changing status of some group of processes
2) As extension of 1, implementing some balancing mechanism in the user
space between several groups. For example if some process in one group
consume much system resources, processes in other groups will be
protected from slowdown.
3) Extremely useful in the libcgroup (in my mind), because now it's
unprotected against changing state with hands.
Need you opinion about this.
Thank you.
-- Best regards, Alex Nikiforov, Mobile SW, Advanced Software Group,
Moscow R&D center, Samsung Electronics
^ permalink raw reply [flat|nested] 6+ messages in thread
* [RFC] patch
[not found] ` <4F7928BE.8000502-Sze3O3UU22JBDgjK7y7TUQ@public.gmane.org>
@ 2012-04-02 4:22 ` Alexander Nikiforov
[not found] ` <4F792976.4090503-Sze3O3UU22JBDgjK7y7TUQ@public.gmane.org>
0 siblings, 1 reply; 6+ messages in thread
From: Alexander Nikiforov @ 2012-04-02 4:22 UTC (permalink / raw)
To: Alexander Nikiforov
Cc: Tejun Heo, cgroups-u79uwXL29TY76Z2rM5mHXA, KAMEZAWA Hiroyuki,
Glauber Costa
[-- Attachment #1: Type: text/plain, Size: 131 bytes --]
--
Best regards,
Alex Nikiforov,
Mobile SW, Advanced Software Group,
Moscow R&D center, Samsung Electronics
[-- Attachment #2: cgroup-task-event.patch --]
[-- Type: text/x-patch, Size: 3645 bytes --]
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index e9b6021..86b0031 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -192,6 +192,11 @@ struct cgroup_pidlist {
struct rw_semaphore mutex;
};
+struct fe_eventfd_list {
+ struct list_head list;
+ struct eventfd_ctx *eventfd;
+};
+
struct cgroup {
unsigned long flags; /* "unsigned long" so bitops work */
@@ -243,6 +248,10 @@ struct cgroup {
/* List of events which userspace want to receive */
struct list_head event_list;
spinlock_t event_list_lock;
+
+ /* fork-exit event */
+ struct list_head fe_notify;
+ spinlock_t fe_list_lock;
};
/*
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index a5d3b53..6c5dda9 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1336,6 +1336,8 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
INIT_LIST_HEAD(&cgrp->css_sets);
INIT_LIST_HEAD(&cgrp->release_list);
INIT_LIST_HEAD(&cgrp->pidlists);
+ INIT_LIST_HEAD(&cgrp->fe_notify);
+ spin_lock_init(&cgrp->fe_list_lock);
mutex_init(&cgrp->pidlist_mutex);
INIT_LIST_HEAD(&cgrp->event_list);
spin_lock_init(&cgrp->event_list_lock);
@@ -3659,6 +3661,38 @@ static int cgroup_clone_children_write(struct cgroup *cgrp,
return 0;
}
+static int tasks_register_event(struct cgroup *cgrp,
+ struct cftype *cft, struct eventfd_ctx *eventfd, const char *args)
+{
+ struct fe_eventfd_list *ev;
+
+ ev = kmalloc(sizeof(*ev), GFP_KERNEL);
+ if(!ev)
+ return -ENOMEM;
+
+ spin_lock(&cgrp->fe_list_lock);
+ ev->eventfd = eventfd;
+ list_add(&ev->list, &cgrp->fe_notify);
+ spin_unlock(&cgrp->fe_list_lock);
+
+ return 0;
+}
+
+static void tasks_unregister_event(struct cgroup *cgrp,
+ struct cftype *cft, struct eventfd_ctx *eventfd)
+{
+ struct fe_eventfd_list *ev, *tmp;
+
+ spin_lock(&cgrp->fe_list_lock);
+ list_for_each_entry_safe(ev, tmp, &cgrp->fe_notify, list) {
+ if (ev->eventfd == eventfd) {
+ list_del(&ev->list);
+ kfree(ev);
+ }
+ }
+ spin_unlock(&cgrp->fe_list_lock);
+}
+
/*
* for the common functions, 'private' gives the type of file
*/
@@ -3670,6 +3704,8 @@ static struct cftype files[] = {
.open = cgroup_tasks_open,
.write_u64 = cgroup_tasks_write,
.release = cgroup_pidlist_release,
+ .register_event = tasks_register_event,
+ .unregister_event = tasks_unregister_event,
.mode = S_IRUGO | S_IWUSR,
},
{
@@ -4558,6 +4594,22 @@ void cgroup_fork(struct task_struct *child)
child->cgroups = current->cgroups;
get_css_set(child->cgroups);
INIT_LIST_HEAD(&child->cg_list);
+
+ struct cgroupfs_root *root;
+
+ /* send event to the userspace */
+ mutex_lock(&cgroup_mutex);
+ for_each_active_root(root) {
+ struct cgroup *cgrp;
+ struct fe_eventfd_list *ev;
+
+ cgrp = task_cgroup_from_root(child, root);
+
+ list_for_each_entry(ev, &cgrp->fe_notify, list) {
+ eventfd_signal(ev->eventfd, 1);
+ }
+ }
+ mutex_unlock(&cgroup_mutex);
}
/**
@@ -4653,6 +4705,7 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
{
struct css_set *cg;
int i;
+ struct cgroupfs_root *root;
/*
* Unlink from the css_set task list if necessary.
@@ -4666,6 +4719,20 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
write_unlock(&css_set_lock);
}
+ /* send event to the userspace */
+ mutex_lock(&cgroup_mutex);
+ for_each_active_root(root) {
+ struct cgroup *cgrp;
+ struct fe_eventfd_list *ev;
+
+ cgrp = task_cgroup_from_root(tsk, root);
+
+ list_for_each_entry(ev, &cgrp->fe_notify, list) {
+ eventfd_signal(ev->eventfd, 1);
+ }
+ }
+ mutex_unlock(&cgroup_mutex);
+
/* Reassign the task to the init_css_set. */
task_lock(tsk);
cg = tsk->cgroups;
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [RFC] patch
[not found] ` <20120402102931.GA5885-oKw7cIdHH8eLwutG50LtGA@public.gmane.org>
@ 2012-04-02 10:17 ` Alexander Nikiforov
2012-04-05 3:38 ` Alexander Nikiforov
1 sibling, 0 replies; 6+ messages in thread
From: Alexander Nikiforov @ 2012-04-02 10:17 UTC (permalink / raw)
To: Kirill A. Shutemov
Cc: Tejun Heo, cgroups-u79uwXL29TY76Z2rM5mHXA, KAMEZAWA Hiroyuki,
Glauber Costa, Frederic Weisbecker
Hi Kirill, thanks for your reply
On 04/02/2012 02:29 PM, Kirill A. Shutemov wrote:
> On Mon, Apr 02, 2012 at 08:22:14AM +0400, Alexander Nikiforov wrote:
>
> I think it should be part of task counter css, not core.
> CC list updated.
Maybe, I need time to think about your suggestion.
>> @@ -4558,6 +4594,22 @@ void cgroup_fork(struct task_struct *child)
>> child->cgroups = current->cgroups;
>> get_css_set(child->cgroups);
>> INIT_LIST_HEAD(&child->cg_list);
>> +
>> + struct cgroupfs_root *root;
>> +
>> + /* send event to the userspace */
>> + mutex_lock(&cgroup_mutex);
>> + for_each_active_root(root) {
>> + struct cgroup *cgrp;
>> + struct fe_eventfd_list *ev;
>> +
>> + cgrp = task_cgroup_from_root(child, root);
>> +
>> + list_for_each_entry(ev,&cgrp->fe_notify, list) {
>> + eventfd_signal(ev->eventfd, 1);
>> + }
>> + }
>> + mutex_unlock(&cgroup_mutex);
>> }
> How does it affect performance?
One cycle with active roots, one inside task_cgroup_from_root() through
cg_links and through events. I don't think what this is significant,
maybe I wrong.
>
>>
>> /**
>> @@ -4653,6 +4705,7 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
>> {
>> struct css_set *cg;
>> int i;
>> + struct cgroupfs_root *root;
>>
>> /*
>> * Unlink from the css_set task list if necessary.
>> @@ -4666,6 +4719,20 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
>> write_unlock(&css_set_lock);
>> }
>>
>> + /* send event to the userspace */
>> + mutex_lock(&cgroup_mutex);
>> + for_each_active_root(root) {
>> + struct cgroup *cgrp;
>> + struct fe_eventfd_list *ev;
>> +
>> + cgrp = task_cgroup_from_root(tsk, root);
>> +
>> + list_for_each_entry(ev,&cgrp->fe_notify, list) {
>> + eventfd_signal(ev->eventfd, 1);
>> + }
>> + }
>> + mutex_unlock(&cgroup_mutex);
>> +
> I think it's racy. You need to notify userspace after reassigning the
> task, not before.
You are right.
>> /* Reassign the task to the init_css_set. */
>> task_lock(tsk);
>> cg = tsk->cgroups;
>
--
Best regards,
Alex Nikiforov,
Mobile SW, Advanced Software Group,
Moscow R&D center, Samsung Electronics
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [RFC] patch
[not found] ` <4F792976.4090503-Sze3O3UU22JBDgjK7y7TUQ@public.gmane.org>
@ 2012-04-02 10:29 ` Kirill A. Shutemov
[not found] ` <20120402102931.GA5885-oKw7cIdHH8eLwutG50LtGA@public.gmane.org>
0 siblings, 1 reply; 6+ messages in thread
From: Kirill A. Shutemov @ 2012-04-02 10:29 UTC (permalink / raw)
To: Alexander Nikiforov
Cc: Tejun Heo, cgroups-u79uwXL29TY76Z2rM5mHXA, KAMEZAWA Hiroyuki,
Glauber Costa, Frederic Weisbecker
On Mon, Apr 02, 2012 at 08:22:14AM +0400, Alexander Nikiforov wrote:
I think it should be part of task counter css, not core.
CC list updated.
> @@ -4558,6 +4594,22 @@ void cgroup_fork(struct task_struct *child)
> child->cgroups = current->cgroups;
> get_css_set(child->cgroups);
> INIT_LIST_HEAD(&child->cg_list);
> +
> + struct cgroupfs_root *root;
> +
> + /* send event to the userspace */
> + mutex_lock(&cgroup_mutex);
> + for_each_active_root(root) {
> + struct cgroup *cgrp;
> + struct fe_eventfd_list *ev;
> +
> + cgrp = task_cgroup_from_root(child, root);
> +
> + list_for_each_entry(ev, &cgrp->fe_notify, list) {
> + eventfd_signal(ev->eventfd, 1);
> + }
> + }
> + mutex_unlock(&cgroup_mutex);
> }
How does it affect performance?
>
> /**
> @@ -4653,6 +4705,7 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
> {
> struct css_set *cg;
> int i;
> + struct cgroupfs_root *root;
>
> /*
> * Unlink from the css_set task list if necessary.
> @@ -4666,6 +4719,20 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
> write_unlock(&css_set_lock);
> }
>
> + /* send event to the userspace */
> + mutex_lock(&cgroup_mutex);
> + for_each_active_root(root) {
> + struct cgroup *cgrp;
> + struct fe_eventfd_list *ev;
> +
> + cgrp = task_cgroup_from_root(tsk, root);
> +
> + list_for_each_entry(ev, &cgrp->fe_notify, list) {
> + eventfd_signal(ev->eventfd, 1);
> + }
> + }
> + mutex_unlock(&cgroup_mutex);
> +
I think it's racy. You need to notify userspace after reassigning the
task, not before.
> /* Reassign the task to the init_css_set. */
> task_lock(tsk);
> cg = tsk->cgroups;
--
Kirill A. Shutemov
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [RFC] patch
[not found] ` <20120402102931.GA5885-oKw7cIdHH8eLwutG50LtGA@public.gmane.org>
2012-04-02 10:17 ` Alexander Nikiforov
@ 2012-04-05 3:38 ` Alexander Nikiforov
[not found] ` <4F7D13AD.3030309-Sze3O3UU22JBDgjK7y7TUQ@public.gmane.org>
1 sibling, 1 reply; 6+ messages in thread
From: Alexander Nikiforov @ 2012-04-05 3:38 UTC (permalink / raw)
To: Kirill A. Shutemov
Cc: Tejun Heo, cgroups-u79uwXL29TY76Z2rM5mHXA, KAMEZAWA Hiroyuki,
Glauber Costa, Frederic Weisbecker
Hi guys, I investigate
http://lwn.net/Articles/453642/
seems that this really fit my proposal and I can make move this
functionality from core to this css. But, unfortunately, Frederic git is
not available on kernel.org and current Linux kernel doesn't have this
patches. Could you be so kind to provide link to the kernel with this
patches.
On 04/02/2012 02:29 PM, Kirill A. Shutemov wrote:
> On Mon, Apr 02, 2012 at 08:22:14AM +0400, Alexander Nikiforov wrote:
>
> I think it should be part of task counter css, not core.
> CC list updated.
>
>> @@ -4558,6 +4594,22 @@ void cgroup_fork(struct task_struct *child)
>> child->cgroups = current->cgroups;
>> get_css_set(child->cgroups);
>> INIT_LIST_HEAD(&child->cg_list);
>> +
>> + struct cgroupfs_root *root;
>> +
>> + /* send event to the userspace */
>> + mutex_lock(&cgroup_mutex);
>> + for_each_active_root(root) {
>> + struct cgroup *cgrp;
>> + struct fe_eventfd_list *ev;
>> +
>> + cgrp = task_cgroup_from_root(child, root);
>> +
>> + list_for_each_entry(ev,&cgrp->fe_notify, list) {
>> + eventfd_signal(ev->eventfd, 1);
>> + }
>> + }
>> + mutex_unlock(&cgroup_mutex);
>> }
> How does it affect performance?
>
>>
>> /**
>> @@ -4653,6 +4705,7 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
>> {
>> struct css_set *cg;
>> int i;
>> + struct cgroupfs_root *root;
>>
>> /*
>> * Unlink from the css_set task list if necessary.
>> @@ -4666,6 +4719,20 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
>> write_unlock(&css_set_lock);
>> }
>>
>> + /* send event to the userspace */
>> + mutex_lock(&cgroup_mutex);
>> + for_each_active_root(root) {
>> + struct cgroup *cgrp;
>> + struct fe_eventfd_list *ev;
>> +
>> + cgrp = task_cgroup_from_root(tsk, root);
>> +
>> + list_for_each_entry(ev,&cgrp->fe_notify, list) {
>> + eventfd_signal(ev->eventfd, 1);
>> + }
>> + }
>> + mutex_unlock(&cgroup_mutex);
>> +
> I think it's racy. You need to notify userspace after reassigning the
> task, not before.
>
>> /* Reassign the task to the init_css_set. */
>> task_lock(tsk);
>> cg = tsk->cgroups;
>
--
Best regards,
Alex Nikiforov,
Mobile SW, Advanced Software Group,
Moscow R&D center, Samsung Electronics
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [RFC] patch
[not found] ` <4F7D13AD.3030309-Sze3O3UU22JBDgjK7y7TUQ@public.gmane.org>
@ 2012-04-06 15:43 ` Frederic Weisbecker
0 siblings, 0 replies; 6+ messages in thread
From: Frederic Weisbecker @ 2012-04-06 15:43 UTC (permalink / raw)
To: Alexander Nikiforov
Cc: Kirill A. Shutemov, Tejun Heo, cgroups-u79uwXL29TY76Z2rM5mHXA,
KAMEZAWA Hiroyuki, Glauber Costa
2012/4/5 Alexander Nikiforov <a.nikiforov-Sze3O3UU22JBDgjK7y7TUQ@public.gmane.org>:
> Hi guys, I investigate
>
> http://lwn.net/Articles/453642/
>
> seems that this really fit my proposal and I can make move this
> functionality from core to this css. But, unfortunately, Frederic git is not
> available on kernel.org and current Linux kernel doesn't have this patches.
> Could you be so kind to provide link to the kernel with this patches.
Hi,
Last version was https://lkml.org/lkml/2012/1/31/489
I'm going to repost a new version around next week because I expect a
few conflicts
with the latest selftests changes after the last merge window.
Will Cc you!
Thanks.
>
>
> On 04/02/2012 02:29 PM, Kirill A. Shutemov wrote:
>>
>> On Mon, Apr 02, 2012 at 08:22:14AM +0400, Alexander Nikiforov wrote:
>>
>> I think it should be part of task counter css, not core.
>> CC list updated.
>>
>>> @@ -4558,6 +4594,22 @@ void cgroup_fork(struct task_struct *child)
>>> child->cgroups = current->cgroups;
>>> get_css_set(child->cgroups);
>>> INIT_LIST_HEAD(&child->cg_list);
>>> +
>>> + struct cgroupfs_root *root;
>>> +
>>> + /* send event to the userspace */
>>> + mutex_lock(&cgroup_mutex);
>>> + for_each_active_root(root) {
>>> + struct cgroup *cgrp;
>>> + struct fe_eventfd_list *ev;
>>> +
>>> + cgrp = task_cgroup_from_root(child, root);
>>> +
>>> + list_for_each_entry(ev,&cgrp->fe_notify, list) {
>>>
>>> + eventfd_signal(ev->eventfd, 1);
>>> + }
>>> + }
>>> + mutex_unlock(&cgroup_mutex);
>>> }
>>
>> How does it affect performance?
>>
>>>
>>> /**
>>> @@ -4653,6 +4705,7 @@ void cgroup_exit(struct task_struct *tsk, int
>>> run_callbacks)
>>> {
>>> struct css_set *cg;
>>> int i;
>>> + struct cgroupfs_root *root;
>>>
>>> /*
>>> * Unlink from the css_set task list if necessary.
>>> @@ -4666,6 +4719,20 @@ void cgroup_exit(struct task_struct *tsk, int
>>> run_callbacks)
>>> write_unlock(&css_set_lock);
>>> }
>>>
>>> + /* send event to the userspace */
>>> + mutex_lock(&cgroup_mutex);
>>> + for_each_active_root(root) {
>>> + struct cgroup *cgrp;
>>> + struct fe_eventfd_list *ev;
>>> +
>>> + cgrp = task_cgroup_from_root(tsk, root);
>>> +
>>> + list_for_each_entry(ev,&cgrp->fe_notify, list) {
>>>
>>> + eventfd_signal(ev->eventfd, 1);
>>> + }
>>> + }
>>> + mutex_unlock(&cgroup_mutex);
>>> +
>>
>> I think it's racy. You need to notify userspace after reassigning the
>> task, not before.
>>
>>> /* Reassign the task to the init_css_set. */
>>> task_lock(tsk);
>>> cg = tsk->cgroups;
>>
>>
>
>
> --
> Best regards,
> Alex Nikiforov,
> Mobile SW, Advanced Software Group,
> Moscow R&D center, Samsung Electronics
>
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2012-04-06 15:43 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-04-02 4:19 [RFC] event about group change Alexander Nikiforov
[not found] ` <4F7928BE.8000502-Sze3O3UU22JBDgjK7y7TUQ@public.gmane.org>
2012-04-02 4:22 ` [RFC] patch Alexander Nikiforov
[not found] ` <4F792976.4090503-Sze3O3UU22JBDgjK7y7TUQ@public.gmane.org>
2012-04-02 10:29 ` Kirill A. Shutemov
[not found] ` <20120402102931.GA5885-oKw7cIdHH8eLwutG50LtGA@public.gmane.org>
2012-04-02 10:17 ` Alexander Nikiforov
2012-04-05 3:38 ` Alexander Nikiforov
[not found] ` <4F7D13AD.3030309-Sze3O3UU22JBDgjK7y7TUQ@public.gmane.org>
2012-04-06 15:43 ` Frederic Weisbecker
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.