All of lore.kernel.org
 help / color / mirror / Atom feed
From: Marco <marco.stornelli@gmail.com>
To: containers@lists.linux-foundation.org
Cc: linux-kernel@vger.kernel.org,
	Linux Embedded <linux-embedded@vger.kernel.org>
Subject: [PATCH] Cgroup: add cgroup members's exit data statistics
Date: Tue, 02 Jun 2009 16:36:29 +0200	[thread overview]
Message-ID: <4A2538ED.3030700@gmail.com> (raw)

From: Marco Stornelli <marco.stornelli@gmail.com>

This patch adds the possibility for an application to receive statistics information only
for processes belonging to a cgroup. The mechanism is the same of the cpu's exit data statistics.
With this patch, instead of waiting on a specific cpumask, an application can wait for
exit data on a specific container. Through this patch it's possible to have a simple death
notifier mechanism. We can select the processes to watch and wait for their death.
A death notify mechanism is especially useful for embedded systems.

Signed-off-by: Marco Stornelli <marco.stornelli@gmail.com>
---

diff -uprN linux-2.6.29-orig/Documentation/accounting/getdelays.c linux-2.6.29/Documentation/accounting/getdelays.c
--- linux-2.6.29-orig/Documentation/accounting/getdelays.c	2009-03-24 00:12:14.000000000 +0100
+++ linux-2.6.29/Documentation/accounting/getdelays.c	2009-06-02 15:47:01.000000000 +0200
@@ -77,9 +77,11 @@ static void usage(void)
 			"[-m cpumask] [-t tgid] [-p pid]\n");
 	fprintf(stderr, "  -d: print delayacct stats\n");
 	fprintf(stderr, "  -i: print IO accounting (works only with -p)\n");
+	fprintf(stderr, "  -q: print context switch accounting\n");
 	fprintf(stderr, "  -l: listen forever\n");
 	fprintf(stderr, "  -v: debug on\n");
-	fprintf(stderr, "  -C: container path\n");
+	fprintf(stderr, "  -C: container path (container statistics)\n");
+	fprintf(stderr, "  -N: container path (death notify)\n");
 }
 
 /*
@@ -263,13 +265,14 @@ int main(int argc, char *argv[])
 	char *logfile = NULL;
 	int loop = 0;
 	int containerset = 0;
+	int containernotify = 0;
 	char containerpath[1024];
 	int cfd = 0;
 
 	struct msgtemplate msg;
 
 	while (1) {
-		c = getopt(argc, argv, "qdiw:r:m:t:p:vlC:");
+		c = getopt(argc, argv, "qdiw:r:m:t:p:vlC:N:");
 		if (c < 0)
 			break;
 
@@ -290,6 +293,10 @@ int main(int argc, char *argv[])
 			containerset = 1;
 			strncpy(containerpath, optarg, strlen(optarg) + 1);
 			break;
+		case 'N':
+			containernotify = 1;
+			strncpy(containerpath, optarg, strlen(optarg) + 1);
+			break;
 		case 'w':
 			logfile = strdup(optarg);
 			printf("write to file %s\n", logfile);
@@ -364,8 +371,13 @@ int main(int argc, char *argv[])
 		}
 	}
 
-	if (tid && containerset) {
-		fprintf(stderr, "Select either -t or -C, not both\n");
+	if (tid && (containerset || containernotify)) {
+		fprintf(stderr, "Select either -t or -C or -N\n");
+		goto err;
+	}
+
+	if (containerset && containernotify) {
+		fprintf(stderr, "Select either -C or -N, not both\n");
 		goto err;
 	}
 
@@ -392,7 +404,23 @@ int main(int argc, char *argv[])
 			goto err;
 		}
 	}
-	if (!maskset && !tid && !containerset) {
+
+	if (containernotify) {
+		cfd = open(containerpath, O_RDONLY);
+		if (cfd < 0) {
+			perror("error opening container file");
+			goto err;
+		}
+		rc = send_cmd(nl_sd, id, mypid, CGROUPSTATS_CMD_GET,
+			      CGROUPSTATS_CMD_ATTR_REGISTER_FD,
+				&cfd, sizeof(__u32));
+		if (rc < 0) {
+			perror("error sending cgroupstats command");
+			goto err;
+		}
+	}
+
+	if (!maskset && !tid && !containerset && !containernotify) {
 		usage();
 		goto err;
 	}
@@ -400,6 +428,7 @@ int main(int argc, char *argv[])
 	do {
 		int i;
 
+		PRINTF("Recv...\n");
 		rep_len = recv(nl_sd, &msg, sizeof(msg), 0);
 		PRINTF("received %d bytes\n", rep_len);
 
@@ -495,6 +524,14 @@ done:
 		if (rc < 0)
 			err(rc, "error sending deregister cpumask\n");
 	}
+	if (containernotify) {
+		rc = send_cmd(nl_sd, id, mypid, CGROUPSTATS_CMD_GET,
+			      CGROUPSTATS_CMD_ATTR_DEREGISTER_FD,
+			      &cfd, sizeof(__u32));
+		printf("Sent deregister container, retval %d\n", rc);
+		if (rc < 0)
+			err(rc, "error sending deregister container\n");
+	}
 err:
 	close(nl_sd);
 	if (fd)
--- linux-2.6.29-orig/kernel/taskstats.c	2009-03-24 00:12:14.000000000 +0100
+++ linux-2.6.29/kernel/taskstats.c	2009-06-02 15:54:37.000000000 +0200
@@ -56,6 +56,8 @@ __read_mostly = {
 static struct nla_policy
 cgroupstats_cmd_get_policy[CGROUPSTATS_CMD_ATTR_MAX+1] __read_mostly = {
 	[CGROUPSTATS_CMD_ATTR_FD] = { .type = NLA_U32 },
+	[CGROUPSTATS_CMD_ATTR_REGISTER_FD] = { .type = NLA_U32 },
+	[CGROUPSTATS_CMD_ATTR_DEREGISTER_FD] = { .type = NLA_U32 },
 };
 
 struct listener {
@@ -70,6 +72,16 @@ struct listener_list {
 };
 static DEFINE_PER_CPU(struct listener_list, listener_array);
 
+struct cgroup_listener {
+	struct list_head list;
+	pid_t pid;
+	char valid;
+	struct dentry *d_cgroup;
+	int ready_to_send;
+};
+
+static struct listener_list cgroup_listener_array;
+
 enum actions {
 	REGISTER,
 	DEREGISTER,
@@ -124,6 +136,63 @@ static int send_reply(struct sk_buff *sk
 }
 
 /*
+ * Send taskstats data in @skb to listeners registered for cgroup members exit
+ * data
+ */
+static void send_cgroup_listeners(struct sk_buff *skb,
+				struct listener_list *listeners)
+{
+	struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb));
+	struct cgroup_listener *s, *tmp;
+	struct sk_buff *skb_next, *skb_cur = skb;
+	void *reply = genlmsg_data(genlhdr);
+	int rc, delcount = 0;
+
+	rc = genlmsg_end(skb, reply);
+	if (rc < 0) {
+		nlmsg_free(skb);
+		return;
+	}
+
+	rc = 0;
+	down_read(&listeners->sem);
+	list_for_each_entry(s, &listeners->list, list) {
+		if (!s->ready_to_send)
+			continue;
+		skb_next = NULL;
+		if (!list_is_last(&s->list, &listeners->list)) {
+			skb_next = skb_clone(skb_cur, GFP_KERNEL);
+			if (!skb_next)
+				break;
+		}
+		rc = genlmsg_unicast(skb_cur, s->pid);
+		if (rc == -ECONNREFUSED) {
+			s->valid = 0;
+			delcount++;
+		}
+		s->ready_to_send = 0;
+		skb_cur = skb_next;
+	}
+	up_read(&listeners->sem);
+
+	if (skb_cur)
+		nlmsg_free(skb_cur);
+
+	if (!delcount)
+		return;
+
+	/* Delete invalidated entries */
+	down_write(&listeners->sem);
+	list_for_each_entry_safe(s, tmp, &listeners->list, list) {
+		if (!s->valid) {
+			list_del(&s->list);
+			kfree(s);
+		}
+	}
+	up_write(&listeners->sem);
+}
+
+/*
  * Send taskstats data in @skb to listeners registered for @cpu's exit data
  */
 static void send_cpu_listeners(struct sk_buff *skb,
@@ -290,6 +359,43 @@ ret:
 	return;
 }
 
+
+static int add_cgroup_del_listener(pid_t pid, struct dentry *d_cgroup,
+								 int isadd)
+{
+	struct listener_list *listeners = &cgroup_listener_array;
+	struct cgroup_listener *s, *tmp;
+
+	if (isadd == REGISTER) {
+		s = kmalloc(sizeof(struct cgroup_listener), GFP_KERNEL);
+		if (!s)
+			goto cleanup;
+		s->pid = pid;
+		INIT_LIST_HEAD(&s->list);
+		s->valid = 1;
+		s->d_cgroup = d_cgroup;
+		s->ready_to_send = 0;
+
+		down_write(&listeners->sem);
+		list_add(&s->list, &listeners->list);
+		up_write(&listeners->sem);
+		return 0;
+	}
+
+	/* Deregister or cleanup */
+cleanup:
+	down_write(&listeners->sem);
+	list_for_each_entry_safe(s, tmp, &listeners->list, list) {
+		if (s->pid == pid) {
+			list_del(&s->list);
+			kfree(s);
+			break;
+		}
+	}
+	up_write(&listeners->sem);
+	return 0;
+}
+
 static int add_del_listener(pid_t pid, const struct cpumask *mask, int isadd)
 {
 	struct listener_list *listeners;
@@ -391,6 +497,32 @@ static int cgroupstats_user_cmd(struct s
 	struct file *file;
 	int fput_needed;
 
+	na = info->attrs[CGROUPSTATS_CMD_ATTR_REGISTER_FD];
+	if (na) {
+		fd = nla_get_u32(info->attrs[CGROUPSTATS_CMD_ATTR_REGISTER_FD]);
+		file = fget_light(fd, &fput_needed);
+		if (!file)
+			return 0;
+
+		rc = add_cgroup_del_listener(info->snd_pid, file->f_dentry,
+								REGISTER);
+		fput_light(file, fput_needed);
+		return rc;
+	}
+
+	na = info->attrs[CGROUPSTATS_CMD_ATTR_DEREGISTER_FD];
+	if (na) {
+		fd = nla_get_u32(info->attrs[CGROUPSTATS_CMD_ATTR_DEREGISTER_FD]);
+		file = fget_light(fd, &fput_needed);
+		if (!file)
+			return 0;
+
+		rc = add_cgroup_del_listener(info->snd_pid, file->f_dentry,
+								DEREGISTER);
+		fput_light(file, fput_needed);
+		return rc;
+	}
+
 	na = info->attrs[CGROUPSTATS_CMD_ATTR_FD];
 	if (!na)
 		return -EINVAL;
@@ -517,15 +649,32 @@ ret:
 	return sig->stats;
 }
 
+int check_ready_to_send(pid_t pid, struct listener_list *cgroup_list)
+{
+	struct listener_list *listeners = cgroup_list;
+	struct cgroup_listener *s, *tmp;
+	int ready = 0;
+
+	list_for_each_entry_safe(s, tmp, &listeners->list, list) {
+		if (cgroup_verify_pid(pid, s->d_cgroup) > 0) {
+			s->ready_to_send = 1;
+			ready = 1;
+		}
+	}
+
+	return ready;
+}
+
 /* Send pid data out on exit */
 void taskstats_exit(struct task_struct *tsk, int group_dead)
 {
 	int rc;
 	struct listener_list *listeners;
+	struct listener_list *cgroup_listeners = &cgroup_listener_array;
 	struct taskstats *stats;
 	struct sk_buff *rep_skb;
 	size_t size;
-	int is_thread_group;
+	int is_thread_group, target = 0;
 
 	if (!family_registered)
 		return;
@@ -545,7 +694,16 @@ void taskstats_exit(struct task_struct *
 	}
 
 	listeners = &__raw_get_cpu_var(listener_array);
-	if (list_empty(&listeners->list))
+	if (!list_empty(&listeners->list))
+		target |= CPU_TARGET;
+
+	down_write(&cgroup_listeners->sem);
+	if (!list_empty(&cgroup_listeners->list))
+		if (check_ready_to_send(tsk->pid, cgroup_listeners))
+			target |= CGROUP_TARGET;
+	up_write(&cgroup_listeners->sem);
+
+	if (!target)
 		return;
 
 	rc = prepare_reply(NULL, TASKSTATS_CMD_NEW, &rep_skb, size);
@@ -573,7 +731,10 @@ void taskstats_exit(struct task_struct *
 	memcpy(stats, tsk->signal->stats, sizeof(*stats));
 
 send:
-	send_cpu_listeners(rep_skb, listeners);
+	if (target & CPU_TARGET)
+		send_cpu_listeners(rep_skb, listeners);
+	if (target & CGROUP_TARGET)
+		send_cgroup_listeners(rep_skb, cgroup_listeners);
 	return;
 err:
 	nlmsg_free(rep_skb);
@@ -595,12 +756,15 @@ static struct genl_ops cgroupstats_ops =
 void __init taskstats_init_early(void)
 {
 	unsigned int i;
+	struct listener_list *listeners = &cgroup_listener_array;
 
 	taskstats_cache = KMEM_CACHE(taskstats, SLAB_PANIC);
 	for_each_possible_cpu(i) {
 		INIT_LIST_HEAD(&(per_cpu(listener_array, i).list));
 		init_rwsem(&(per_cpu(listener_array, i).sem));
 	}
+	INIT_LIST_HEAD(&listeners->list);
+	init_rwsem(&listeners->sem);
 }
 
 static int __init taskstats_init(void)
--- linux-2.6.29-orig/kernel/cgroup.c	2009-03-24 00:12:14.000000000 +0100
+++ linux-2.6.29/kernel/cgroup.c	2009-06-02 15:50:57.000000000 +0200
@@ -2040,6 +2040,44 @@ static int pid_array_load(pid_t *pidarra
 }
 
 /**
+ * cgroup_verify_pid - it verifies if a pid is in a cgroup
+ * @dentry: A dentry entry belonging to the cgroup for which stats have
+ * been requested.
+ *
+ * Return value can be < 0 for error, 0 not pid not found, > 0 pid found
+ */
+int cgroup_verify_pid(pid_t pid, struct dentry *dentry)
+{
+	int ret = -EINVAL;
+	struct cgroup *cgrp;
+	struct cgroup_iter it;
+	struct task_struct *tsk;
+
+	/*
+	 * Validate dentry by checking the superblock operations,
+	 * and make sure it's a directory.
+	 */
+	if (dentry->d_sb->s_op != &cgroup_ops ||
+	    !S_ISDIR(dentry->d_inode->i_mode))
+		 goto err;
+
+	ret = 0;
+	cgrp = dentry->d_fsdata;
+
+	cgroup_iter_start(cgrp, &it);
+	while ((tsk = cgroup_iter_next(cgrp, &it))) {
+		if (tsk->pid == pid) {
+			cgroup_iter_end(cgrp, &it);
+			return 1;
+		}
+	}
+	cgroup_iter_end(cgrp, &it);
+
+err:
+	return ret;
+}
+
+/**
  * cgroupstats_build - build and fill cgroupstats
  * @stats: cgroupstats to fill information into
  * @dentry: A dentry entry belonging to the cgroup for which stats have
--- linux-2.6.29-orig/include/linux/cgroup.h	2009-03-24 00:12:14.000000000 +0100
+++ linux-2.6.29/include/linux/cgroup.h	2009-06-02 15:55:11.000000000 +0200
@@ -32,6 +32,7 @@ extern void cgroup_fork(struct task_stru
 extern void cgroup_fork_callbacks(struct task_struct *p);
 extern void cgroup_post_fork(struct task_struct *p);
 extern void cgroup_exit(struct task_struct *p, int run_callbacks);
+extern int cgroup_verify_pid(pid_t pid, struct dentry *dentry);
 extern int cgroupstats_build(struct cgroupstats *stats,
 				struct dentry *dentry);
 
@@ -450,6 +451,10 @@ static inline void cgroup_exit(struct ta
 
 static inline void cgroup_lock(void) {}
 static inline void cgroup_unlock(void) {}
+static inline int cgroup_verify_pid(pid_t pid, struct dentry *dentry)
+{
+	return -EINVAL;
+}
 static inline int cgroupstats_build(struct cgroupstats *stats,
 					struct dentry *dentry)
 {
--- linux-2.6.29-orig/include/linux/cgroupstats.h	2009-03-24 00:12:14.000000000 +0100
+++ linux-2.6.29/include/linux/cgroupstats.h	2009-06-01 11:37:46.000000000 +0200
@@ -63,6 +63,8 @@ enum {
 enum {
 	CGROUPSTATS_CMD_ATTR_UNSPEC = 0,
 	CGROUPSTATS_CMD_ATTR_FD,
+	CGROUPSTATS_CMD_ATTR_REGISTER_FD,
+	CGROUPSTATS_CMD_ATTR_DEREGISTER_FD,
 	__CGROUPSTATS_CMD_ATTR_MAX,
 };
 
--- linux-2.6.29-orig/include/linux/taskstats.h	2009-03-24 00:12:14.000000000 +0100
+++ linux-2.6.29/include/linux/taskstats.h	2009-06-02 15:35:24.000000000 +0200
@@ -37,6 +37,9 @@
 #define TS_COMM_LEN		32	/* should be >= TASK_COMM_LEN
 					 * in linux/sched.h */
 
+#define CPU_TARGET			0x1
+#define CGROUP_TARGET			0x2
+
 struct taskstats {
 
 	/* The version number of this struct. This field is always set to


             reply	other threads:[~2009-06-02 14:36 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-06-02 14:36 Marco [this message]
  -- strict thread matches above, loose matches on Subject: below --
2009-06-02 14:36 [PATCH] Cgroup: add cgroup members's exit data statistics Marco

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4A2538ED.3030700@gmail.com \
    --to=marco.stornelli@gmail.com \
    --cc=containers@lists.linux-foundation.org \
    --cc=linux-embedded@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.