All of lore.kernel.org
 help / color / mirror / Atom feed
From: Lai Jiangshan <laijs@cn.fujitsu.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Paul Menage <menage@google.com>,
	Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
Subject: [PATCH -mm 2/2] cgroup: use multibuf for tasks file
Date: Fri, 12 Sep 2008 19:55:21 +0800	[thread overview]
Message-ID: <48CA58A9.4010508@cn.fujitsu.com> (raw)


when we open a really large cgroup for read, we may failed
for kmalloc() is not reliable for allocate a big buffer.

the patch use multibuf for tasks file, every buf is a page
apart from we need only a small buffer.

we use obj_sort() to sort this pids, so we don't need to map this
pages to an continuous memory region.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index bb298de..3d3c3bb 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -141,8 +141,8 @@ struct cgroup {
 
 	/* pids_mutex protects the fields below */
 	struct rw_semaphore pids_mutex;
-	/* Array of process ids in the cgroup */
-	pid_t *tasks_pids;
+	/* Multi-array of process ids in the cgroup */
+	const pid_t *const *tasks_pids;
 	/* How many files are using the current tasks_pids array */
 	int pids_use_count;
 	/* Length of the current tasks_pids array */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 996865a..f61b152 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2004,6 +2004,8 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan)
  *
  */
 
+const static int pid_per_page = PAGE_SIZE / sizeof(pid_t);
+
 /*
  * Load into 'pidarray' up to 'npids' of the tasks using cgroup
  * 'cgrp'.  Return actual number of pids loaded.  No need to
@@ -2011,16 +2013,22 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan)
  * read section, so the css_set can't go away, and is
  * immutable after creation.
  */
-static int pid_array_load(pid_t *pidarray, int npids, struct cgroup *cgrp)
+static int pid_array_load(pid_t **pidarray, int npids, struct cgroup *cgrp)
 {
-	int n = 0;
+	int n = 0, i = 0, j = 0;
 	struct cgroup_iter it;
 	struct task_struct *tsk;
 	cgroup_iter_start(cgrp, &it);
 	while ((tsk = cgroup_iter_next(cgrp, &it))) {
 		if (unlikely(n == npids))
 			break;
-		pidarray[n++] = task_pid_vnr(tsk);
+		pidarray[i][j] = task_pid_vnr(tsk);
+		n++;
+		j++;
+		if (j == pid_per_page) {
+			i++;
+			j = 0;
+		}
 	}
 	cgroup_iter_end(cgrp, &it);
 	return n;
@@ -2079,11 +2087,27 @@ err:
 	return ret;
 }
 
-static int cmppid(const void *a, const void *b)
+static inline pid_t getpidofmbuf(const pid_t *const *multibuf, int index)
+{
+	return multibuf[index / pid_per_page][index % pid_per_page];
+}
+
+static int cmppid(const void *c, size_t left, size_t right)
 {
-	return *(pid_t *)a - *(pid_t *)b;
+	return getpidofmbuf(c, left) - getpidofmbuf(c, right);
 }
 
+static inline pid_t *getpidptr(pid_t *const *multibuf, int index)
+{
+	return &multibuf[index / pid_per_page][index % pid_per_page];
+}
+
+static void swappid(void *c, size_t left, size_t right)
+{
+	pid_t rpid = getpidofmbuf(c, right);
+	*getpidptr(c, right) = getpidofmbuf(c, left);
+	*getpidptr(c, left) = rpid;
+}
 
 /*
  * seq_file methods for the "tasks" file. The seq_file position is the
@@ -2100,19 +2124,19 @@ static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos)
 	 * next pid to display, if any
 	 */
 	struct cgroup *cgrp = s->private;
-	int index = 0, pid = *pos;
-	int *iter;
+	int index = 0;
+	pid_t pid = *pos;
 
 	down_read(&cgrp->pids_mutex);
 	if (pid) {
 		int end = cgrp->pids_length;
-		int i;
 		while (index < end) {
 			int mid = (index + end) / 2;
-			if (cgrp->tasks_pids[mid] == pid) {
+			pid_t mpid = getpidofmbuf(cgrp->tasks_pids, mid);
+			if (mpid == pid) {
 				index = mid;
 				break;
-			} else if (cgrp->tasks_pids[mid] <= pid)
+			} else if (mpid <= pid)
 				index = mid + 1;
 			else
 				end = mid;
@@ -2122,9 +2146,8 @@ static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos)
 	if (index >= cgrp->pids_length)
 		return NULL;
 	/* Update the abstract position to be the actual pid that we found */
-	iter = cgrp->tasks_pids + index;
-	*pos = *iter;
-	return iter;
+	*pos = getpidofmbuf(cgrp->tasks_pids, index);
+	return (void *)(index ^ -0x10000); /* we cannot return 0 */
 }
 
 static void cgroup_tasks_stop(struct seq_file *s, void *v)
@@ -2136,25 +2159,26 @@ static void cgroup_tasks_stop(struct seq_file *s, void *v)
 static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos)
 {
 	struct cgroup *cgrp = s->private;
-	int *p = v;
-	int *end = cgrp->tasks_pids + cgrp->pids_length;
+	int index = (int)v ^ -0x10000;
 
 	/*
 	 * Advance to the next pid in the array. If this goes off the
 	 * end, we're done
 	 */
-	p++;
-	if (p >= end) {
+	index++;
+	if (index >= cgrp->pids_length) {
 		return NULL;
 	} else {
-		*pos = *p;
-		return p;
+		*pos = getpidofmbuf(cgrp->tasks_pids, index);
+		return (void *)(index ^ -0x10000); /* we cannot return 0 */
 	}
 }
 
 static int cgroup_tasks_show(struct seq_file *s, void *v)
 {
-	return seq_printf(s, "%d\n", *(int *)v);
+	struct cgroup *cgrp = s->private;
+	int index = (int)v ^ -0x10000;
+	return seq_printf(s, "%d\n", getpidofmbuf(cgrp->tasks_pids, index));
 }
 
 static struct seq_operations cgroup_tasks_seq_operations = {
@@ -2164,12 +2188,60 @@ static struct seq_operations cgroup_tasks_seq_operations = {
 	.show = cgroup_tasks_show,
 };
 
+static void *alloc_mutibufs(size_t npids)
+{
+	int i, j, npages = (npids + pid_per_page - 1) / pid_per_page;
+	unsigned long *pages;
+
+	if (npids <= pid_per_page - sizeof(pid_t *) / sizeof(pid_t)) {
+		void *pids = kmalloc(sizeof(pid_t *) + sizeof(pid_t) * npids,
+				GFP_KERNEL);
+		if (!pids)
+			return NULL;
+		/* make single buf fake multi-buf */
+		*(void **)pids = pids + sizeof(pid_t *);
+		return pids;
+	}
+
+	pages = kmalloc(sizeof(*pages) * npages, GFP_KERNEL);
+	if (!pages)
+		return NULL;
+
+	for (i = 0; i < npages; i++) {
+		pages[i] = __get_free_page(GFP_KERNEL);
+		if (unlikely(!pages[i]))
+			goto depopulate;
+	}
+	return pages;
+
+depopulate:
+	for (j = 0; j < i; j++)
+		free_page(pages[j]);
+	kfree(pages);
+	return NULL;
+}
+
+static void free_multibufs(void *ptr, size_t npids)
+{
+	if (!ptr)
+		return;
+
+	if (npids > pid_per_page - sizeof(pid_t *) / sizeof(pid_t)) {
+		int i, npages = (npids + pid_per_page - 1) / pid_per_page;
+		unsigned long *pages = ptr;
+		for (i = 0; i < npages; i++)
+			free_page(pages[i]);
+	}
+
+	kfree(ptr);
+}
+
 static void release_cgroup_pid_array(struct cgroup *cgrp)
 {
 	down_write(&cgrp->pids_mutex);
 	BUG_ON(!cgrp->pids_use_count);
 	if (!--cgrp->pids_use_count) {
-		kfree(cgrp->tasks_pids);
+		free_multibufs((void *)cgrp->tasks_pids, cgrp->pids_length);
 		cgrp->tasks_pids = NULL;
 		cgrp->pids_length = 0;
 	}
@@ -2202,7 +2274,7 @@ static struct file_operations cgroup_tasks_operations = {
 static int cgroup_tasks_open(struct inode *unused, struct file *file)
 {
 	struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
-	pid_t *pidarray;
+	pid_t **pidarray;
 	int npids;
 	int retval;
 
@@ -2217,19 +2289,19 @@ static int cgroup_tasks_open(struct inode *unused, struct file *file)
 	 * show up until sometime later on.
 	 */
 	npids = cgroup_task_count(cgrp);
-	pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL);
+	pidarray = alloc_mutibufs(npids);
 	if (!pidarray)
 		return -ENOMEM;
 	npids = pid_array_load(pidarray, npids, cgrp);
-	sort(pidarray, npids, sizeof(pid_t), cmppid, NULL);
+	obj_sort(pidarray, 0, npids, cmppid, swappid);
 
 	/*
 	 * Store the array in the cgroup, freeing the old
 	 * array if necessary
 	 */
 	down_write(&cgrp->pids_mutex);
-	kfree(cgrp->tasks_pids);
-	cgrp->tasks_pids = pidarray;
+	free_multibufs((void *)cgrp->tasks_pids, cgrp->pids_length);
+	cgrp->tasks_pids = (const pid_t *const *)pidarray;
 	cgrp->pids_length = npids;
 	cgrp->pids_use_count++;
 	up_write(&cgrp->pids_mutex);



             reply	other threads:[~2008-09-12 11:58 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-09-12 11:55 Lai Jiangshan [this message]
2008-09-15 20:28 ` [PATCH -mm 2/2] cgroup: use multibuf for tasks file Paul Menage
2008-09-16  1:37   ` Lai Jiangshan
2008-09-16  2:16     ` Li Zefan
2008-09-16  3:30       ` Lai Jiangshan
2008-09-18 19:52         ` KOSAKI Motohiro

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=48CA58A9.4010508@cn.fujitsu.com \
    --to=laijs@cn.fujitsu.com \
    --cc=akpm@linux-foundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=menage@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.