* [PATCH 1/8] CGroup Files: Clean up whitespace in struct cftype
[not found] <20080620234358.182933000@menage.corp.google.com>
@ 2008-06-20 23:43 ` menage-hpIqsD4AKlfQT0dZR+AlfA
2008-06-20 23:44 ` [PATCH 2/8] CGroup Files: Add write_string cgroup control file method menage-hpIqsD4AKlfQT0dZR+AlfA
` (9 subsequent siblings)
10 siblings, 0 replies; 17+ messages in thread
From: menage-hpIqsD4AKlfQT0dZR+AlfA @ 2008-06-20 23:43 UTC (permalink / raw)
To: pj-sJ/iWh9BUns, xemul-GEFAQzZX7r8dnm+yROfE0A,
balbir-xthvdsQ13ZrQT0dZR+AlfA, serue-r/Jw6+rmf7HQT0dZR+AlfA,
akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b
Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA
[-- Attachment #1: cgroup_remove_spaces.patch --]
[-- Type: text/plain, Size: 3314 bytes --]
This patch removes some extraneous spaces from method declarations in
struct cftype, to fit in with conventional kernel style.
Signed-off-by: Paul Menage <menage-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
---
include/linux/cgroup.h | 32 ++++++++++++++++----------------
1 file changed, 16 insertions(+), 16 deletions(-)
Index: cws-2.6.26-rc5-mm3/include/linux/cgroup.h
===================================================================
--- cws-2.6.26-rc5-mm3.orig/include/linux/cgroup.h
+++ cws-2.6.26-rc5-mm3/include/linux/cgroup.h
@@ -205,48 +205,48 @@ struct cftype {
* subsystem, followed by a period */
char name[MAX_CFTYPE_NAME];
int private;
- int (*open) (struct inode *inode, struct file *file);
- ssize_t (*read) (struct cgroup *cgrp, struct cftype *cft,
- struct file *file,
- char __user *buf, size_t nbytes, loff_t *ppos);
+ int (*open)(struct inode *inode, struct file *file);
+ ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft,
+ struct file *file,
+ char __user *buf, size_t nbytes, loff_t *ppos);
/*
* read_u64() is a shortcut for the common case of returning a
* single integer. Use it in place of read()
*/
- u64 (*read_u64) (struct cgroup *cgrp, struct cftype *cft);
+ u64 (*read_u64)(struct cgroup *cgrp, struct cftype *cft);
/*
* read_s64() is a signed version of read_u64()
*/
- s64 (*read_s64) (struct cgroup *cgrp, struct cftype *cft);
+ s64 (*read_s64)(struct cgroup *cgrp, struct cftype *cft);
/*
* read_map() is used for defining a map of key/value
* pairs. It should call cb->fill(cb, key, value) for each
* entry. The key/value pairs (and their ordering) should not
* change between reboots.
*/
- int (*read_map) (struct cgroup *cont, struct cftype *cft,
- struct cgroup_map_cb *cb);
+ int (*read_map)(struct cgroup *cont, struct cftype *cft,
+ struct cgroup_map_cb *cb);
/*
* read_seq_string() is used for outputting a simple sequence
* using seqfile.
*/
- int (*read_seq_string) (struct cgroup *cont, struct cftype *cft,
- struct seq_file *m);
+ int (*read_seq_string)(struct cgroup *cont, struct cftype *cft,
+ struct seq_file *m);
- ssize_t (*write) (struct cgroup *cgrp, struct cftype *cft,
- struct file *file,
- const char __user *buf, size_t nbytes, loff_t *ppos);
+ ssize_t (*write)(struct cgroup *cgrp, struct cftype *cft,
+ struct file *file,
+ const char __user *buf, size_t nbytes, loff_t *ppos);
/*
* write_u64() is a shortcut for the common case of accepting
* a single integer (as parsed by simple_strtoull) from
* userspace. Use in place of write(); return 0 or error.
*/
- int (*write_u64) (struct cgroup *cgrp, struct cftype *cft, u64 val);
+ int (*write_u64)(struct cgroup *cgrp, struct cftype *cft, u64 val);
/*
* write_s64() is a signed version of write_u64()
*/
- int (*write_s64) (struct cgroup *cgrp, struct cftype *cft, s64 val);
+ int (*write_s64)(struct cgroup *cgrp, struct cftype *cft, s64 val);
/*
* trigger() callback can be used to get some kick from the
@@ -256,7 +256,7 @@ struct cftype {
*/
int (*trigger)(struct cgroup *cgrp, unsigned int event);
- int (*release) (struct inode *inode, struct file *file);
+ int (*release)(struct inode *inode, struct file *file);
};
struct cgroup_scanner {
--
^ permalink raw reply [flat|nested] 17+ messages in thread* [PATCH 2/8] CGroup Files: Add write_string cgroup control file method
[not found] <20080620234358.182933000@menage.corp.google.com>
2008-06-20 23:43 ` [PATCH 1/8] CGroup Files: Clean up whitespace in struct cftype menage-hpIqsD4AKlfQT0dZR+AlfA
@ 2008-06-20 23:44 ` menage-hpIqsD4AKlfQT0dZR+AlfA
2008-06-20 23:44 ` [PATCH 3/8] CGroup Files: Move the release_agent file to use typed handlers menage-hpIqsD4AKlfQT0dZR+AlfA
` (8 subsequent siblings)
10 siblings, 0 replies; 17+ messages in thread
From: menage-hpIqsD4AKlfQT0dZR+AlfA @ 2008-06-20 23:44 UTC (permalink / raw)
To: pj-sJ/iWh9BUns, xemul-GEFAQzZX7r8dnm+yROfE0A,
balbir-xthvdsQ13ZrQT0dZR+AlfA, serue-r/Jw6+rmf7HQT0dZR+AlfA,
akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b
Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA
[-- Attachment #1: cgroup-write-string.patch --]
[-- Type: text/plain, Size: 3546 bytes --]
This patch adds a write_string() method for cgroups control files. The
semantics are that a buffer is copied from userspace to kernelspace
and the handler function invoked on that buffer. The buffer is
guaranteed to be nul-terminated, and no longer than max_write_len
(defaulting to 64 bytes if unspecified). Later patches will convert
existing raw file write handlers in control group subsystems to use
this method.
Signed-off-by: Paul Menage <menage-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
---
include/linux/cgroup.h | 14 ++++++++++++++
kernel/cgroup.c | 35 +++++++++++++++++++++++++++++++++++
2 files changed, 49 insertions(+)
Index: cws-2.6.26-rc5-mm3/include/linux/cgroup.h
===================================================================
--- cws-2.6.26-rc5-mm3.orig/include/linux/cgroup.h
+++ cws-2.6.26-rc5-mm3/include/linux/cgroup.h
@@ -205,6 +205,13 @@ struct cftype {
* subsystem, followed by a period */
char name[MAX_CFTYPE_NAME];
int private;
+
+ /*
+ * If non-zero, defines the maximum length of string that can
+ * be passed to write_string; defaults to 64
+ */
+ size_t max_write_len;
+
int (*open)(struct inode *inode, struct file *file);
ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft,
struct file *file,
@@ -249,6 +256,13 @@ struct cftype {
int (*write_s64)(struct cgroup *cgrp, struct cftype *cft, s64 val);
/*
+ * write_string() is passed a nul-terminated kernelspace
+ * buffer of maximum length determined by max_write_len.
+ * Returns 0 or -ve error code.
+ */
+ int (*write_string)(struct cgroup *cgrp, struct cftype *cft,
+ const char *buffer);
+ /*
* trigger() callback can be used to get some kick from the
* userspace, when the actual string written is not important
* at all. The private field can be used to determine the
Index: cws-2.6.26-rc5-mm3/kernel/cgroup.c
===================================================================
--- cws-2.6.26-rc5-mm3.orig/kernel/cgroup.c
+++ cws-2.6.26-rc5-mm3/kernel/cgroup.c
@@ -1363,6 +1363,39 @@ static ssize_t cgroup_write_X64(struct c
return retval;
}
+static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft,
+ struct file *file,
+ const char __user *userbuf,
+ size_t nbytes, loff_t *unused_ppos)
+{
+ char local_buffer[64];
+ int retval = 0;
+ size_t max_bytes = cft->max_write_len;
+ char *buffer = local_buffer;
+
+ if (!max_bytes)
+ max_bytes = sizeof(local_buffer) - 1;
+ if (nbytes >= max_bytes)
+ return -E2BIG;
+ /* Allocate a dynamic buffer if we need one */
+ if (nbytes >= sizeof(local_buffer)) {
+ buffer = kmalloc(nbytes + 1, GFP_KERNEL);
+ if (buffer == NULL)
+ return -ENOMEM;
+ }
+ if (nbytes && copy_from_user(buffer, userbuf, nbytes))
+ return -EFAULT;
+
+ buffer[nbytes] = 0; /* nul-terminate */
+ strstrip(buffer);
+ retval = cft->write_string(cgrp, cft, buffer);
+ if (!retval)
+ retval = nbytes;
+ if (buffer != local_buffer)
+ kfree(buffer);
+ return retval;
+}
+
static ssize_t cgroup_common_file_write(struct cgroup *cgrp,
struct cftype *cft,
struct file *file,
@@ -1440,6 +1473,8 @@ static ssize_t cgroup_file_write(struct
return cft->write(cgrp, cft, file, buf, nbytes, ppos);
if (cft->write_u64 || cft->write_s64)
return cgroup_write_X64(cgrp, cft, file, buf, nbytes, ppos);
+ if (cft->write_string)
+ return cgroup_write_string(cgrp, cft, file, buf, nbytes, ppos);
if (cft->trigger) {
int ret = cft->trigger(cgrp, (unsigned int)cft->private);
return ret ? ret : nbytes;
--
^ permalink raw reply [flat|nested] 17+ messages in thread* [PATCH 3/8] CGroup Files: Move the release_agent file to use typed handlers
[not found] <20080620234358.182933000@menage.corp.google.com>
2008-06-20 23:43 ` [PATCH 1/8] CGroup Files: Clean up whitespace in struct cftype menage-hpIqsD4AKlfQT0dZR+AlfA
2008-06-20 23:44 ` [PATCH 2/8] CGroup Files: Add write_string cgroup control file method menage-hpIqsD4AKlfQT0dZR+AlfA
@ 2008-06-20 23:44 ` menage-hpIqsD4AKlfQT0dZR+AlfA
2008-06-20 23:44 ` [PATCH 4/8] CGroup Files: Move notify_on_release file to separate write handler menage-hpIqsD4AKlfQT0dZR+AlfA
` (7 subsequent siblings)
10 siblings, 0 replies; 17+ messages in thread
From: menage-hpIqsD4AKlfQT0dZR+AlfA @ 2008-06-20 23:44 UTC (permalink / raw)
To: pj-sJ/iWh9BUns, xemul-GEFAQzZX7r8dnm+yROfE0A,
balbir-xthvdsQ13ZrQT0dZR+AlfA, serue-r/Jw6+rmf7HQT0dZR+AlfA,
akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b
Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA
[-- Attachment #1: cgroup_release_agent_file.patch --]
[-- Type: text/plain, Size: 6516 bytes --]
Adds cgroup_release_agent_write() and cgroup_release_agent_show()
methods to handle writing/reading the path to a cgroup hierarchy's
release agent. As a result, cgroup_common_file_read() is now unnecessary.
As part of the change, a previously-tolerated race in
cgroup_release_agent() is avoided by copying the current
release_agent_path prior to calling call_usermode_helper().
Signed-off-by: Paul Menage <menage-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
---
include/linux/cgroup.h | 2
kernel/cgroup.c | 125 ++++++++++++++++++++++---------------------------
2 files changed, 59 insertions(+), 68 deletions(-)
Index: cws-2.6.26-rc5-mm3/kernel/cgroup.c
===================================================================
--- cws-2.6.26-rc5-mm3.orig/kernel/cgroup.c
+++ cws-2.6.26-rc5-mm3/kernel/cgroup.c
@@ -89,11 +89,7 @@ struct cgroupfs_root {
/* Hierarchy-specific flags */
unsigned long flags;
- /* The path to use for release notifications. No locking
- * between setting and use - so if userspace updates this
- * while child cgroups exist, you could miss a
- * notification. We ensure that it's always a valid
- * NUL-terminated string */
+ /* The path to use for release notifications. */
char release_agent_path[PATH_MAX];
};
@@ -1329,6 +1325,45 @@ enum cgroup_filetype {
FILE_RELEASE_AGENT,
};
+/**
+ * cgroup_lock_live_group - take cgroup_mutex and check that cgrp is alive.
+ * @cgrp: the cgroup to be checked for liveness
+ *
+ * Returns true (with lock held) on success, or false (with no lock
+ * held) on failure.
+ */
+int cgroup_lock_live_group(struct cgroup *cgrp)
+{
+ mutex_lock(&cgroup_mutex);
+ if (cgroup_is_removed(cgrp)) {
+ mutex_unlock(&cgroup_mutex);
+ return false;
+ }
+ return true;
+}
+
+static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
+ const char *buffer)
+{
+ BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
+ if (!cgroup_lock_live_group(cgrp))
+ return -ENODEV;
+ strcpy(cgrp->root->release_agent_path, buffer);
+ mutex_unlock(&cgroup_mutex);
+ return 0;
+}
+
+static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft,
+ struct seq_file *seq)
+{
+ if (!cgroup_lock_live_group(cgrp))
+ return -ENODEV;
+ seq_puts(seq, cgrp->root->release_agent_path);
+ seq_putc(seq, '\n');
+ mutex_unlock(&cgroup_mutex);
+ return 0;
+}
+
static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft,
struct file *file,
const char __user *userbuf,
@@ -1443,10 +1478,6 @@ static ssize_t cgroup_common_file_write(
else
clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
break;
- case FILE_RELEASE_AGENT:
- BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
- strcpy(cgrp->root->release_agent_path, buffer);
- break;
default:
retval = -EINVAL;
goto out2;
@@ -1506,49 +1537,6 @@ static ssize_t cgroup_read_s64(struct cg
return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
}
-static ssize_t cgroup_common_file_read(struct cgroup *cgrp,
- struct cftype *cft,
- struct file *file,
- char __user *buf,
- size_t nbytes, loff_t *ppos)
-{
- enum cgroup_filetype type = cft->private;
- char *page;
- ssize_t retval = 0;
- char *s;
-
- if (!(page = (char *)__get_free_page(GFP_KERNEL)))
- return -ENOMEM;
-
- s = page;
-
- switch (type) {
- case FILE_RELEASE_AGENT:
- {
- struct cgroupfs_root *root;
- size_t n;
- mutex_lock(&cgroup_mutex);
- root = cgrp->root;
- n = strnlen(root->release_agent_path,
- sizeof(root->release_agent_path));
- n = min(n, (size_t) PAGE_SIZE);
- strncpy(s, root->release_agent_path, n);
- mutex_unlock(&cgroup_mutex);
- s += n;
- break;
- }
- default:
- retval = -EINVAL;
- goto out;
- }
- *s++ = '\n';
-
- retval = simple_read_from_buffer(buf, nbytes, ppos, page, s - page);
-out:
- free_page((unsigned long)page);
- return retval;
-}
-
static ssize_t cgroup_file_read(struct file *file, char __user *buf,
size_t nbytes, loff_t *ppos)
{
@@ -1606,6 +1594,7 @@ static int cgroup_seqfile_release(struct
static struct file_operations cgroup_seqfile_operations = {
.read = seq_read,
+ .write = cgroup_file_write,
.llseek = seq_lseek,
.release = cgroup_seqfile_release,
};
@@ -2283,8 +2272,9 @@ static struct cftype files[] = {
static struct cftype cft_release_agent = {
.name = "release_agent",
- .read = cgroup_common_file_read,
- .write = cgroup_common_file_write,
+ .read_seq_string = cgroup_release_agent_show,
+ .write_string = cgroup_release_agent_write,
+ .max_write_len = PATH_MAX,
.private = FILE_RELEASE_AGENT,
};
@@ -3113,27 +3103,24 @@ static void cgroup_release_agent(struct
while (!list_empty(&release_list)) {
char *argv[3], *envp[3];
int i;
- char *pathbuf;
+ char *pathbuf = NULL, *agentbuf = NULL;
struct cgroup *cgrp = list_entry(release_list.next,
struct cgroup,
release_list);
list_del_init(&cgrp->release_list);
spin_unlock(&release_list_lock);
pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
- if (!pathbuf) {
- spin_lock(&release_list_lock);
- continue;
- }
-
- if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0) {
- kfree(pathbuf);
- spin_lock(&release_list_lock);
- continue;
- }
+ if (!pathbuf)
+ goto continue_free;
+ if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0)
+ goto continue_free;
+ agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
+ if (!agentbuf)
+ goto continue_free;
i = 0;
- argv[i++] = cgrp->root->release_agent_path;
- argv[i++] = (char *)pathbuf;
+ argv[i++] = agentbuf;
+ argv[i++] = pathbuf;
argv[i] = NULL;
i = 0;
@@ -3147,8 +3134,10 @@ static void cgroup_release_agent(struct
* be a slow process */
mutex_unlock(&cgroup_mutex);
call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
- kfree(pathbuf);
mutex_lock(&cgroup_mutex);
+ continue_free:
+ kfree(pathbuf);
+ kfree(agentbuf);
spin_lock(&release_list_lock);
}
spin_unlock(&release_list_lock);
Index: cws-2.6.26-rc5-mm3/include/linux/cgroup.h
===================================================================
--- cws-2.6.26-rc5-mm3.orig/include/linux/cgroup.h
+++ cws-2.6.26-rc5-mm3/include/linux/cgroup.h
@@ -295,6 +295,8 @@ int cgroup_add_files(struct cgroup *cgrp
int cgroup_is_removed(const struct cgroup *cgrp);
+int cgroup_lock_live_group(struct cgroup *cgrp);
+
int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen);
int cgroup_task_count(const struct cgroup *cgrp);
--
^ permalink raw reply [flat|nested] 17+ messages in thread* [PATCH 4/8] CGroup Files: Move notify_on_release file to separate write handler
[not found] <20080620234358.182933000@menage.corp.google.com>
` (2 preceding siblings ...)
2008-06-20 23:44 ` [PATCH 3/8] CGroup Files: Move the release_agent file to use typed handlers menage-hpIqsD4AKlfQT0dZR+AlfA
@ 2008-06-20 23:44 ` menage-hpIqsD4AKlfQT0dZR+AlfA
2008-06-20 23:44 ` [PATCH 5/8] CGroup Files: Turn attach_task_by_pid directly into a cgroup " menage-hpIqsD4AKlfQT0dZR+AlfA
` (6 subsequent siblings)
10 siblings, 0 replies; 17+ messages in thread
From: menage-hpIqsD4AKlfQT0dZR+AlfA @ 2008-06-20 23:44 UTC (permalink / raw)
To: pj-sJ/iWh9BUns, xemul-GEFAQzZX7r8dnm+yROfE0A,
balbir-xthvdsQ13ZrQT0dZR+AlfA, serue-r/Jw6+rmf7HQT0dZR+AlfA,
akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b
Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA
[-- Attachment #1: cgroup_notify_on_release_file.patch --]
[-- Type: text/plain, Size: 1740 bytes --]
This patch moves the write handler for the cgroups notify_on_release
file into a separate handler. This handler requires no cgroups locking
since it relies on atomic bitops for synchronization.
Signed-off-by: Paul Menage <menage-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
---
kernel/cgroup.c | 21 +++++++++++++--------
1 file changed, 13 insertions(+), 8 deletions(-)
Index: cws-2.6.26-rc5-mm3/kernel/cgroup.c
===================================================================
--- cws-2.6.26-rc5-mm3.orig/kernel/cgroup.c
+++ cws-2.6.26-rc5-mm3/kernel/cgroup.c
@@ -1471,13 +1471,6 @@ static ssize_t cgroup_common_file_write(
case FILE_TASKLIST:
retval = attach_task_by_pid(cgrp, buffer);
break;
- case FILE_NOTIFY_ON_RELEASE:
- clear_bit(CGRP_RELEASABLE, &cgrp->flags);
- if (simple_strtoul(buffer, NULL, 10) != 0)
- set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
- else
- clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
- break;
default:
retval = -EINVAL;
goto out2;
@@ -2248,6 +2241,18 @@ static u64 cgroup_read_notify_on_release
return notify_on_release(cgrp);
}
+static int cgroup_write_notify_on_release(struct cgroup *cgrp,
+ struct cftype *cft,
+ u64 val)
+{
+ clear_bit(CGRP_RELEASABLE, &cgrp->flags);
+ if (val)
+ set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
+ else
+ clear_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
+ return 0;
+}
+
/*
* for the common functions, 'private' gives the type of file
*/
@@ -2264,7 +2269,7 @@ static struct cftype files[] = {
{
.name = "notify_on_release",
.read_u64 = cgroup_read_notify_on_release,
- .write = cgroup_common_file_write,
+ .write_u64 = cgroup_write_notify_on_release,
.private = FILE_NOTIFY_ON_RELEASE,
},
};
--
^ permalink raw reply [flat|nested] 17+ messages in thread* [PATCH 5/8] CGroup Files: Turn attach_task_by_pid directly into a cgroup write handler
[not found] <20080620234358.182933000@menage.corp.google.com>
` (3 preceding siblings ...)
2008-06-20 23:44 ` [PATCH 4/8] CGroup Files: Move notify_on_release file to separate write handler menage-hpIqsD4AKlfQT0dZR+AlfA
@ 2008-06-20 23:44 ` menage-hpIqsD4AKlfQT0dZR+AlfA
2008-06-20 23:44 ` [PATCH 6/8] CGroup Files: Remove cpuset_common_file_write() menage-hpIqsD4AKlfQT0dZR+AlfA
` (5 subsequent siblings)
10 siblings, 0 replies; 17+ messages in thread
From: menage-hpIqsD4AKlfQT0dZR+AlfA @ 2008-06-20 23:44 UTC (permalink / raw)
To: pj-sJ/iWh9BUns, xemul-GEFAQzZX7r8dnm+yROfE0A,
balbir-xthvdsQ13ZrQT0dZR+AlfA, serue-r/Jw6+rmf7HQT0dZR+AlfA,
akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b
Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA
[-- Attachment #1: cgroup_tasks_file.patch --]
[-- Type: text/plain, Size: 3824 bytes --]
This patch changes attach_task_by_pid() to take a u64 rather than a
string; as a result it can be called directly as a control groups
write_u64 handler, and cgroup_common_file_write() can be removed.
Signed-off-by: Paul Menage <menage-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
---
kernel/cgroup.c | 80 +++++++++-----------------------------------------------
1 file changed, 14 insertions(+), 66 deletions(-)
Index: cws-2.6.26-rc5-mm3/kernel/cgroup.c
===================================================================
--- cws-2.6.26-rc5-mm3.orig/kernel/cgroup.c
+++ cws-2.6.26-rc5-mm3/kernel/cgroup.c
@@ -504,10 +504,6 @@ static struct css_set *find_css_set(
* knows that the cgroup won't be removed, as cgroup_rmdir()
* needs that mutex.
*
- * The cgroup_common_file_write handler for operations that modify
- * the cgroup hierarchy holds cgroup_mutex across the entire operation,
- * single threading all such cgroup modifications across the system.
- *
* The fork and exit callbacks cgroup_fork() and cgroup_exit(), don't
* (usually) take cgroup_mutex. These are the two most performance
* critical pieces of code here. The exception occurs on cgroup_exit(),
@@ -1279,18 +1275,14 @@ int cgroup_attach_task(struct cgroup *cg
}
/*
- * Attach task with pid 'pid' to cgroup 'cgrp'. Call with
- * cgroup_mutex, may take task_lock of task
+ * Attach task with pid 'pid' to cgroup 'cgrp'. Call with cgroup_mutex
+ * held. May take task_lock of task
*/
-static int attach_task_by_pid(struct cgroup *cgrp, char *pidbuf)
+static int attach_task_by_pid(struct cgroup *cgrp, u64 pid)
{
- pid_t pid;
struct task_struct *tsk;
int ret;
- if (sscanf(pidbuf, "%d", &pid) != 1)
- return -EIO;
-
if (pid) {
rcu_read_lock();
tsk = find_task_by_vpid(pid);
@@ -1316,6 +1308,16 @@ static int attach_task_by_pid(struct cgr
return ret;
}
+static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid)
+{
+ int ret;
+ if (!cgroup_lock_live_group(cgrp))
+ return -ENODEV;
+ ret = attach_task_by_pid(cgrp, pid);
+ cgroup_unlock();
+ return ret;
+}
+
/* The various types of files and directories in a cgroup file system */
enum cgroup_filetype {
FILE_ROOT,
@@ -1431,60 +1433,6 @@ static ssize_t cgroup_write_string(struc
return retval;
}
-static ssize_t cgroup_common_file_write(struct cgroup *cgrp,
- struct cftype *cft,
- struct file *file,
- const char __user *userbuf,
- size_t nbytes, loff_t *unused_ppos)
-{
- enum cgroup_filetype type = cft->private;
- char *buffer;
- int retval = 0;
-
- if (nbytes >= PATH_MAX)
- return -E2BIG;
-
- /* +1 for nul-terminator */
- buffer = kmalloc(nbytes + 1, GFP_KERNEL);
- if (buffer == NULL)
- return -ENOMEM;
-
- if (copy_from_user(buffer, userbuf, nbytes)) {
- retval = -EFAULT;
- goto out1;
- }
- buffer[nbytes] = 0; /* nul-terminate */
- strstrip(buffer); /* strip -just- trailing whitespace */
-
- mutex_lock(&cgroup_mutex);
-
- /*
- * This was already checked for in cgroup_file_write(), but
- * check again now we're holding cgroup_mutex.
- */
- if (cgroup_is_removed(cgrp)) {
- retval = -ENODEV;
- goto out2;
- }
-
- switch (type) {
- case FILE_TASKLIST:
- retval = attach_task_by_pid(cgrp, buffer);
- break;
- default:
- retval = -EINVAL;
- goto out2;
- }
-
- if (retval == 0)
- retval = nbytes;
-out2:
- mutex_unlock(&cgroup_mutex);
-out1:
- kfree(buffer);
- return retval;
-}
-
static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
size_t nbytes, loff_t *ppos)
{
@@ -2262,7 +2210,7 @@ static struct cftype files[] = {
.name = "tasks",
.open = cgroup_tasks_open,
.read = cgroup_tasks_read,
- .write = cgroup_common_file_write,
+ .write_u64 = cgroup_tasks_write,
.release = cgroup_tasks_release,
.private = FILE_TASKLIST,
},
--
^ permalink raw reply [flat|nested] 17+ messages in thread* [PATCH 6/8] CGroup Files: Remove cpuset_common_file_write()
[not found] <20080620234358.182933000@menage.corp.google.com>
` (4 preceding siblings ...)
2008-06-20 23:44 ` [PATCH 5/8] CGroup Files: Turn attach_task_by_pid directly into a cgroup " menage-hpIqsD4AKlfQT0dZR+AlfA
@ 2008-06-20 23:44 ` menage-hpIqsD4AKlfQT0dZR+AlfA
2008-06-20 23:44 ` [PATCH 7/8] CGroup Files: Convert devcgroup_access_write() into a cgroup write_string() handler menage-hpIqsD4AKlfQT0dZR+AlfA
` (4 subsequent siblings)
10 siblings, 0 replies; 17+ messages in thread
From: menage-hpIqsD4AKlfQT0dZR+AlfA @ 2008-06-20 23:44 UTC (permalink / raw)
To: pj-sJ/iWh9BUns, xemul-GEFAQzZX7r8dnm+yROfE0A,
balbir-xthvdsQ13ZrQT0dZR+AlfA, serue-r/Jw6+rmf7HQT0dZR+AlfA,
akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b
Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA
[-- Attachment #1: cpuset_files.patch --]
[-- Type: text/plain, Size: 5825 bytes --]
This patch tweaks the signatures of the update_cpumask() and
update_nodemask() functions so that they can be called directly as
handlers for the new cgroups write_string() method.
This allows cpuset_common_file_write() to be removed.
Signed-off-by: Paul Menage <menage-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
---
This patch fails checkpatch.pl which objects to the use of
NR_CPUS. It's not clear that there's a way to express this more
cleanly, other than leaving it empty and initializing it early in the
boot process.
kernel/cpuset.c | 109 +++++++++++++++++---------------------------------------
1 file changed, 35 insertions(+), 74 deletions(-)
Index: cws-2.6.26-rc5-mm3/kernel/cpuset.c
===================================================================
--- cws-2.6.26-rc5-mm3.orig/kernel/cpuset.c
+++ cws-2.6.26-rc5-mm3/kernel/cpuset.c
@@ -227,10 +227,6 @@ static struct cpuset top_cpuset = {
* The task_struct fields mems_allowed and mems_generation may only
* be accessed in the context of that task, so require no locks.
*
- * The cpuset_common_file_write handler for operations that modify
- * the cpuset hierarchy holds cgroup_mutex across the entire operation,
- * single threading all such cpuset modifications across the system.
- *
* The cpuset_common_file_read() handlers only hold callback_mutex across
* small pieces of code, such as when reading out possibly multi-word
* cpumasks and nodemasks.
@@ -801,7 +797,7 @@ static int update_tasks_cpumask(struct c
* @cs: the cpuset to consider
* @buf: buffer of cpu numbers written to this cpuset
*/
-static int update_cpumask(struct cpuset *cs, char *buf)
+static int update_cpumask(struct cpuset *cs, const char *buf)
{
struct cpuset trialcs;
int retval;
@@ -819,7 +815,6 @@ static int update_cpumask(struct cpuset
* that parsing. The validate_change() call ensures that cpusets
* with tasks have cpus.
*/
- buf = strstrip(buf);
if (!*buf) {
cpus_clear(trialcs.cpus_allowed);
} else {
@@ -1016,7 +1011,7 @@ done:
* lock each such tasks mm->mmap_sem, scan its vma's and rebind
* their mempolicies to the cpusets new mems_allowed.
*/
-static int update_nodemask(struct cpuset *cs, char *buf)
+static int update_nodemask(struct cpuset *cs, const char *buf)
{
struct cpuset trialcs;
nodemask_t oldmem;
@@ -1037,7 +1032,6 @@ static int update_nodemask(struct cpuset
* that parsing. The validate_change() call ensures that cpusets
* with tasks have memory.
*/
- buf = strstrip(buf);
if (!*buf) {
nodes_clear(trialcs.mems_allowed);
} else {
@@ -1280,72 +1274,14 @@ typedef enum {
FILE_SPREAD_SLAB,
} cpuset_filetype_t;
-static ssize_t cpuset_common_file_write(struct cgroup *cont,
- struct cftype *cft,
- struct file *file,
- const char __user *userbuf,
- size_t nbytes, loff_t *unused_ppos)
-{
- struct cpuset *cs = cgroup_cs(cont);
- cpuset_filetype_t type = cft->private;
- char *buffer;
- int retval = 0;
-
- /* Crude upper limit on largest legitimate cpulist user might write. */
- if (nbytes > 100U + 6 * max(NR_CPUS, MAX_NUMNODES))
- return -E2BIG;
-
- /* +1 for nul-terminator */
- buffer = kmalloc(nbytes + 1, GFP_KERNEL);
- if (!buffer)
- return -ENOMEM;
-
- if (copy_from_user(buffer, userbuf, nbytes)) {
- retval = -EFAULT;
- goto out1;
- }
- buffer[nbytes] = 0; /* nul-terminate */
-
- cgroup_lock();
-
- if (cgroup_is_removed(cont)) {
- retval = -ENODEV;
- goto out2;
- }
-
- switch (type) {
- case FILE_CPULIST:
- retval = update_cpumask(cs, buffer);
- break;
- case FILE_MEMLIST:
- retval = update_nodemask(cs, buffer);
- break;
- default:
- retval = -EINVAL;
- goto out2;
- }
-
- if (retval == 0)
- retval = nbytes;
-out2:
- cgroup_unlock();
-out1:
- kfree(buffer);
- return retval;
-}
-
static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val)
{
int retval = 0;
struct cpuset *cs = cgroup_cs(cgrp);
cpuset_filetype_t type = cft->private;
- cgroup_lock();
-
- if (cgroup_is_removed(cgrp)) {
- cgroup_unlock();
+ if (!cgroup_lock_live_group(cgrp))
return -ENODEV;
- }
switch (type) {
case FILE_CPU_EXCLUSIVE:
@@ -1391,12 +1327,9 @@ static int cpuset_write_s64(struct cgrou
struct cpuset *cs = cgroup_cs(cgrp);
cpuset_filetype_t type = cft->private;
- cgroup_lock();
-
- if (cgroup_is_removed(cgrp)) {
- cgroup_unlock();
+ if (!cgroup_lock_live_group(cgrp))
return -ENODEV;
- }
+
switch (type) {
case FILE_SCHED_RELAX_DOMAIN_LEVEL:
retval = update_relax_domain_level(cs, val);
@@ -1410,6 +1343,32 @@ static int cpuset_write_s64(struct cgrou
}
/*
+ * Common handling for a write to a "cpus" or "mems" file.
+ */
+static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft,
+ const char *buf)
+{
+ int retval = 0;
+
+ if (!cgroup_lock_live_group(cgrp))
+ return -ENODEV;
+
+ switch (cft->private) {
+ case FILE_CPULIST:
+ retval = update_cpumask(cgroup_cs(cgrp), buf);
+ break;
+ case FILE_MEMLIST:
+ retval = update_nodemask(cgroup_cs(cgrp), buf);
+ break;
+ default:
+ retval = -EINVAL;
+ break;
+ }
+ cgroup_unlock();
+ return retval;
+}
+
+/*
* These ascii lists should be read in a single call, by using a user
* buffer large enough to hold the entire map. If read in smaller
* chunks, there is no guarantee of atomicity. Since the display format
@@ -1528,14 +1487,16 @@ static struct cftype files[] = {
{
.name = "cpus",
.read = cpuset_common_file_read,
- .write = cpuset_common_file_write,
+ .write_string = cpuset_write_resmask,
+ .max_write_len = (100U + 6 * NR_CPUS),
.private = FILE_CPULIST,
},
{
.name = "mems",
.read = cpuset_common_file_read,
- .write = cpuset_common_file_write,
+ .write_string = cpuset_write_resmask,
+ .max_write_len = (100U + 6 * MAX_NUMNODES),
.private = FILE_MEMLIST,
},
--
^ permalink raw reply [flat|nested] 17+ messages in thread* [PATCH 7/8] CGroup Files: Convert devcgroup_access_write() into a cgroup write_string() handler
[not found] <20080620234358.182933000@menage.corp.google.com>
` (5 preceding siblings ...)
2008-06-20 23:44 ` [PATCH 6/8] CGroup Files: Remove cpuset_common_file_write() menage-hpIqsD4AKlfQT0dZR+AlfA
@ 2008-06-20 23:44 ` menage-hpIqsD4AKlfQT0dZR+AlfA
2008-06-20 23:44 ` [PATCH 8/8] CGroup Files: Convert res_counter_write() to be a cgroups " menage-hpIqsD4AKlfQT0dZR+AlfA
` (3 subsequent siblings)
10 siblings, 0 replies; 17+ messages in thread
From: menage-hpIqsD4AKlfQT0dZR+AlfA @ 2008-06-20 23:44 UTC (permalink / raw)
To: pj-sJ/iWh9BUns, xemul-GEFAQzZX7r8dnm+yROfE0A,
balbir-xthvdsQ13ZrQT0dZR+AlfA, serue-r/Jw6+rmf7HQT0dZR+AlfA,
akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b
Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA
[-- Attachment #1: devcgroup_files.patch --]
[-- Type: text/plain, Size: 5162 bytes --]
This patch converts devcgroup_access_write() from a raw file handler
into a handler for the cgroup write_string() method. This allows some
boilerplate copying/locking/checking to be removed and simplifies the
cleanup path, since these functions are performed by the cgroups
framework before calling the handler.
Signed-off-by: Paul Menage <menage-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
---
security/device_cgroup.c | 103 +++++++++++++++++------------------------------
1 file changed, 39 insertions(+), 64 deletions(-)
Index: cws-2.6.26-rc5-mm3/security/device_cgroup.c
===================================================================
--- cws-2.6.26-rc5-mm3.orig/security/device_cgroup.c
+++ cws-2.6.26-rc5-mm3/security/device_cgroup.c
@@ -59,6 +59,11 @@ static inline struct dev_cgroup *cgroup_
return css_to_devcgroup(cgroup_subsys_state(cgroup, devices_subsys_id));
}
+static inline struct dev_cgroup *task_devcgroup(struct task_struct *task)
+{
+ return css_to_devcgroup(task_subsys_state(task, devices_subsys_id));
+}
+
struct cgroup_subsys devices_subsys;
static int devcgroup_can_attach(struct cgroup_subsys *ss,
@@ -312,10 +317,10 @@ static int may_access_whitelist(struct d
* when adding a new allow rule to a device whitelist, the rule
* must be allowed in the parent device
*/
-static int parent_has_perm(struct cgroup *childcg,
+static int parent_has_perm(struct dev_cgroup *childcg,
struct dev_whitelist_item *wh)
{
- struct cgroup *pcg = childcg->parent;
+ struct cgroup *pcg = childcg->css.cgroup->parent;
struct dev_cgroup *parent;
int ret;
@@ -341,39 +346,18 @@ static int parent_has_perm(struct cgroup
* new access is only allowed if you're in the top-level cgroup, or your
* parent cgroup has the access you're asking for.
*/
-static ssize_t devcgroup_access_write(struct cgroup *cgroup, struct cftype *cft,
- struct file *file, const char __user *userbuf,
- size_t nbytes, loff_t *ppos)
-{
- struct cgroup *cur_cgroup;
- struct dev_cgroup *devcgroup, *cur_devcgroup;
- int filetype = cft->private;
- char *buffer, *b;
+static int devcgroup_update_access(struct dev_cgroup *devcgroup,
+ int filetype, const char *buffer)
+{
+ struct dev_cgroup *cur_devcgroup;
+ const char *b;
int retval = 0, count;
struct dev_whitelist_item wh;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- devcgroup = cgroup_to_devcgroup(cgroup);
- cur_cgroup = task_cgroup(current, devices_subsys.subsys_id);
- cur_devcgroup = cgroup_to_devcgroup(cur_cgroup);
-
- buffer = kmalloc(nbytes+1, GFP_KERNEL);
- if (!buffer)
- return -ENOMEM;
-
- if (copy_from_user(buffer, userbuf, nbytes)) {
- retval = -EFAULT;
- goto out1;
- }
- buffer[nbytes] = 0; /* nul-terminate */
-
- cgroup_lock();
- if (cgroup_is_removed(cgroup)) {
- retval = -ENODEV;
- goto out2;
- }
+ cur_devcgroup = task_devcgroup(current);
memset(&wh, 0, sizeof(wh));
b = buffer;
@@ -390,14 +374,11 @@ static ssize_t devcgroup_access_write(st
wh.type = DEV_CHAR;
break;
default:
- retval = -EINVAL;
- goto out2;
+ return -EINVAL;
}
b++;
- if (!isspace(*b)) {
- retval = -EINVAL;
- goto out2;
- }
+ if (!isspace(*b))
+ return -EINVAL;
b++;
if (*b == '*') {
wh.major = ~0;
@@ -409,13 +390,10 @@ static ssize_t devcgroup_access_write(st
b++;
}
} else {
- retval = -EINVAL;
- goto out2;
- }
- if (*b != ':') {
- retval = -EINVAL;
- goto out2;
+ return -EINVAL;
}
+ if (*b != ':')
+ return -EINVAL;
b++;
/* read minor */
@@ -429,13 +407,10 @@ static ssize_t devcgroup_access_write(st
b++;
}
} else {
- retval = -EINVAL;
- goto out2;
- }
- if (!isspace(*b)) {
- retval = -EINVAL;
- goto out2;
+ return -EINVAL;
}
+ if (!isspace(*b))
+ return -EINVAL;
for (b++, count = 0; count < 3; count++, b++) {
switch (*b) {
case 'r':
@@ -452,8 +427,7 @@ static ssize_t devcgroup_access_write(st
count = 3;
break;
default:
- retval = -EINVAL;
- goto out2;
+ return -EINVAL;
}
}
@@ -461,38 +435,39 @@ handle:
retval = 0;
switch (filetype) {
case DEVCG_ALLOW:
- if (!parent_has_perm(cgroup, &wh))
- retval = -EPERM;
- else
- retval = dev_whitelist_add(devcgroup, &wh);
- break;
+ if (!parent_has_perm(devcgroup, &wh))
+ return -EPERM;
+ return dev_whitelist_add(devcgroup, &wh);
case DEVCG_DENY:
dev_whitelist_rm(devcgroup, &wh);
break;
default:
- retval = -EINVAL;
- goto out2;
+ return -EINVAL;
}
+ return 0;
+}
- if (retval == 0)
- retval = nbytes;
-
-out2:
+static int devcgroup_access_write(struct cgroup *cgrp, struct cftype *cft,
+ const char *buffer)
+{
+ int retval;
+ if (!cgroup_lock_live_group(cgrp))
+ return -ENODEV;
+ retval = devcgroup_update_access(cgroup_to_devcgroup(cgrp),
+ cft->private, buffer);
cgroup_unlock();
-out1:
- kfree(buffer);
return retval;
}
static struct cftype dev_cgroup_files[] = {
{
.name = "allow",
- .write = devcgroup_access_write,
+ .write_string = devcgroup_access_write,
.private = DEVCG_ALLOW,
},
{
.name = "deny",
- .write = devcgroup_access_write,
+ .write_string = devcgroup_access_write,
.private = DEVCG_DENY,
},
{
--
^ permalink raw reply [flat|nested] 17+ messages in thread* [PATCH 8/8] CGroup Files: Convert res_counter_write() to be a cgroups write_string() handler
[not found] <20080620234358.182933000@menage.corp.google.com>
` (6 preceding siblings ...)
2008-06-20 23:44 ` [PATCH 7/8] CGroup Files: Convert devcgroup_access_write() into a cgroup write_string() handler menage-hpIqsD4AKlfQT0dZR+AlfA
@ 2008-06-20 23:44 ` menage-hpIqsD4AKlfQT0dZR+AlfA
[not found] ` <20080621000730.052131000@menage.corp.google.com>
` (2 subsequent siblings)
10 siblings, 0 replies; 17+ messages in thread
From: menage-hpIqsD4AKlfQT0dZR+AlfA @ 2008-06-20 23:44 UTC (permalink / raw)
To: pj-sJ/iWh9BUns, xemul-GEFAQzZX7r8dnm+yROfE0A,
balbir-xthvdsQ13ZrQT0dZR+AlfA, serue-r/Jw6+rmf7HQT0dZR+AlfA,
akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b
Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA
[-- Attachment #1: res_counter_write.patch --]
[-- Type: text/plain, Size: 6419 bytes --]
Currently read_counter_write() is a raw file handler even though it's
ultimately taking a number, since in some cases it wants to
pre-process the string when converting it to a number.
This patch converts res_counter_write() from a raw file handler to a
write_string() handler; this allows some of the boilerplate
copying/locking/checking to be removed, and simplies the cleanup path,
since these functions are now performed by the cgroups framework.
Signed-off-by: Paul Menage <menage-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
---
include/linux/res_counter.h | 11 +++++++---
kernel/res_counter.c | 47 ++++++++++++++++++--------------------------
mm/memcontrol.c | 24 ++++------------------
mm/memrlimitcgroup.c | 23 ++++-----------------
4 files changed, 38 insertions(+), 67 deletions(-)
Index: cws-2.6.26-rc5-mm3/include/linux/res_counter.h
===================================================================
--- cws-2.6.26-rc5-mm3.orig/include/linux/res_counter.h
+++ cws-2.6.26-rc5-mm3/include/linux/res_counter.h
@@ -63,9 +63,14 @@ u64 res_counter_read_u64(struct res_coun
ssize_t res_counter_read(struct res_counter *counter, int member,
const char __user *buf, size_t nbytes, loff_t *pos,
int (*read_strategy)(unsigned long long val, char *s));
-ssize_t res_counter_write(struct res_counter *counter, int member,
- const char __user *buf, size_t nbytes, loff_t *pos,
- int (*write_strategy)(char *buf, unsigned long long *val));
+
+typedef int (*write_strategy_fn)(const char *buf, unsigned long long *val);
+
+int res_counter_memparse_write_strategy(const char *buf,
+ unsigned long long *res);
+
+int res_counter_write(struct res_counter *counter, int member,
+ const char *buffer, write_strategy_fn write_strategy);
/*
* the field descriptors. one for each member of res_counter
Index: cws-2.6.26-rc5-mm3/kernel/res_counter.c
===================================================================
--- cws-2.6.26-rc5-mm3.orig/kernel/res_counter.c
+++ cws-2.6.26-rc5-mm3/kernel/res_counter.c
@@ -102,44 +102,37 @@ u64 res_counter_read_u64(struct res_coun
return *res_counter_member(counter, member);
}
-ssize_t res_counter_write(struct res_counter *counter, int member,
- const char __user *userbuf, size_t nbytes, loff_t *pos,
- int (*write_strategy)(char *st_buf, unsigned long long *val))
+int res_counter_memparse_write_strategy(const char *buf,
+ unsigned long long *res)
{
- int ret;
- char *buf, *end;
- unsigned long flags;
- unsigned long long tmp, *val;
+ char *end;
+ /* FIXME - make memparse() take const char* args */
+ *res = memparse((char *)buf, &end);
+ if (*end != '\0')
+ return -EINVAL;
- buf = kmalloc(nbytes + 1, GFP_KERNEL);
- ret = -ENOMEM;
- if (buf == NULL)
- goto out;
-
- buf[nbytes] = '\0';
- ret = -EFAULT;
- if (copy_from_user(buf, userbuf, nbytes))
- goto out_free;
+ *res = PAGE_ALIGN(*res);
+ return 0;
+}
- ret = -EINVAL;
+int res_counter_write(struct res_counter *counter, int member,
+ const char *buf, write_strategy_fn write_strategy)
+{
+ char *end;
+ unsigned long flags;
+ unsigned long long tmp, *val;
- strstrip(buf);
if (write_strategy) {
- if (write_strategy(buf, &tmp)) {
- goto out_free;
- }
+ if (write_strategy(buf, &tmp))
+ return -EINVAL;
} else {
tmp = simple_strtoull(buf, &end, 10);
if (*end != '\0')
- goto out_free;
+ return -EINVAL;
}
spin_lock_irqsave(&counter->lock, flags);
val = res_counter_member(counter, member);
*val = tmp;
spin_unlock_irqrestore(&counter->lock, flags);
- ret = nbytes;
-out_free:
- kfree(buf);
-out:
- return ret;
+ return 0;
}
Index: cws-2.6.26-rc5-mm3/mm/memcontrol.c
===================================================================
--- cws-2.6.26-rc5-mm3.orig/mm/memcontrol.c
+++ cws-2.6.26-rc5-mm3/mm/memcontrol.c
@@ -853,32 +853,18 @@ out:
return ret;
}
-static int mem_cgroup_write_strategy(char *buf, unsigned long long *tmp)
-{
- *tmp = memparse(buf, &buf);
- if (*buf != '\0')
- return -EINVAL;
-
- /*
- * Round up the value to the closest page size
- */
- *tmp = ((*tmp + PAGE_SIZE - 1) >> PAGE_SHIFT) << PAGE_SHIFT;
- return 0;
-}
-
static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft)
{
return res_counter_read_u64(&mem_cgroup_from_cont(cont)->res,
cft->private);
}
-static ssize_t mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
- struct file *file, const char __user *userbuf,
- size_t nbytes, loff_t *ppos)
+static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
+ const char *buffer)
{
return res_counter_write(&mem_cgroup_from_cont(cont)->res,
- cft->private, userbuf, nbytes, ppos,
- mem_cgroup_write_strategy);
+ cft->private, buffer,
+ res_counter_memparse_write_strategy);
}
static int mem_cgroup_reset(struct cgroup *cont, unsigned int event)
@@ -968,7 +954,7 @@ static struct cftype mem_cgroup_files[]
{
.name = "limit_in_bytes",
.private = RES_LIMIT,
- .write = mem_cgroup_write,
+ .write_string = mem_cgroup_write,
.read_u64 = mem_cgroup_read,
},
{
Index: cws-2.6.26-rc5-mm3/mm/memrlimitcgroup.c
===================================================================
--- cws-2.6.26-rc5-mm3.orig/mm/memrlimitcgroup.c
+++ cws-2.6.26-rc5-mm3/mm/memrlimitcgroup.c
@@ -118,25 +118,12 @@ static u64 memrlimit_cgroup_read(struct
cft->private);
}
-static int memrlimit_cgroup_write_strategy(char *buf, unsigned long long *tmp)
-{
- *tmp = memparse(buf, &buf);
- if (*buf != '\0')
- return -EINVAL;
-
- *tmp = PAGE_ALIGN(*tmp);
- return 0;
-}
-
-static ssize_t memrlimit_cgroup_write(struct cgroup *cgrp, struct cftype *cft,
- struct file *file,
- const char __user *userbuf,
- size_t nbytes,
- loff_t *ppos)
+static int memrlimit_cgroup_write(struct cgroup *cgrp, struct cftype *cft,
+ const char *buffer)
{
return res_counter_write(&memrlimit_cgroup_from_cgrp(cgrp)->as_res,
- cft->private, userbuf, nbytes, ppos,
- memrlimit_cgroup_write_strategy);
+ cft->private, buffer,
+ res_counter_memparse_write_strategy);
}
static struct cftype memrlimit_cgroup_files[] = {
@@ -148,7 +135,7 @@ static struct cftype memrlimit_cgroup_fi
{
.name = "limit_in_bytes",
.private = RES_LIMIT,
- .write = memrlimit_cgroup_write,
+ .write_string = memrlimit_cgroup_write,
.read_u64 = memrlimit_cgroup_read,
},
{
--
^ permalink raw reply [flat|nested] 17+ messages in thread[parent not found: <20080621000730.052131000@menage.corp.google.com>]
[parent not found: <20080621000730.052131000-B63HFAS8fGlSzHKm+aFRNNkmqwFzkYv6@public.gmane.org>]
* Re: [PATCH 2/8] CGroup Files: Add write_string cgroup control file method
[not found] ` <20080621000730.052131000-B63HFAS8fGlSzHKm+aFRNNkmqwFzkYv6@public.gmane.org>
@ 2008-06-22 14:32 ` Balbir Singh
2008-06-24 15:34 ` Serge E. Hallyn
2008-06-24 23:19 ` Andrew Morton
2 siblings, 0 replies; 17+ messages in thread
From: Balbir Singh @ 2008-06-22 14:32 UTC (permalink / raw)
To: menage-hpIqsD4AKlfQT0dZR+AlfA
Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA,
akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b,
xemul-GEFAQzZX7r8dnm+yROfE0A
* menage-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org <menage-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> [2008-06-20 16:44:00]:
> This patch adds a write_string() method for cgroups control files. The
> semantics are that a buffer is copied from userspace to kernelspace
> and the handler function invoked on that buffer. The buffer is
> guaranteed to be nul-terminated, and no longer than max_write_len
> (defaulting to 64 bytes if unspecified). Later patches will convert
> existing raw file write handlers in control group subsystems to use
> this method.
>
> Signed-off-by: Paul Menage <menage-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
>
> ---
> include/linux/cgroup.h | 14 ++++++++++++++
> kernel/cgroup.c | 35 +++++++++++++++++++++++++++++++++++
> 2 files changed, 49 insertions(+)
>
> Index: cws-2.6.26-rc5-mm3/include/linux/cgroup.h
> ===================================================================
> --- cws-2.6.26-rc5-mm3.orig/include/linux/cgroup.h
> +++ cws-2.6.26-rc5-mm3/include/linux/cgroup.h
> @@ -205,6 +205,13 @@ struct cftype {
> * subsystem, followed by a period */
> char name[MAX_CFTYPE_NAME];
> int private;
> +
> + /*
> + * If non-zero, defines the maximum length of string that can
> + * be passed to write_string; defaults to 64
> + */
> + size_t max_write_len;
> +
> int (*open)(struct inode *inode, struct file *file);
> ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft,
> struct file *file,
> @@ -249,6 +256,13 @@ struct cftype {
> int (*write_s64)(struct cgroup *cgrp, struct cftype *cft, s64 val);
>
> /*
> + * write_string() is passed a nul-terminated kernelspace
> + * buffer of maximum length determined by max_write_len.
> + * Returns 0 or -ve error code.
> + */
> + int (*write_string)(struct cgroup *cgrp, struct cftype *cft,
> + const char *buffer);
> + /*
> * trigger() callback can be used to get some kick from the
> * userspace, when the actual string written is not important
> * at all. The private field can be used to determine the
> Index: cws-2.6.26-rc5-mm3/kernel/cgroup.c
> ===================================================================
> --- cws-2.6.26-rc5-mm3.orig/kernel/cgroup.c
> +++ cws-2.6.26-rc5-mm3/kernel/cgroup.c
> @@ -1363,6 +1363,39 @@ static ssize_t cgroup_write_X64(struct c
> return retval;
> }
>
> +static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft,
> + struct file *file,
> + const char __user *userbuf,
> + size_t nbytes, loff_t *unused_ppos)
> +{
> + char local_buffer[64];
64? a define would be more meaningful
> + int retval = 0;
> + size_t max_bytes = cft->max_write_len;
> + char *buffer = local_buffer;
> +
> + if (!max_bytes)
> + max_bytes = sizeof(local_buffer) - 1;
> + if (nbytes >= max_bytes)
> + return -E2BIG;
> + /* Allocate a dynamic buffer if we need one */
> + if (nbytes >= sizeof(local_buffer)) {
> + buffer = kmalloc(nbytes + 1, GFP_KERNEL);
> + if (buffer == NULL)
> + return -ENOMEM;
> + }
> + if (nbytes && copy_from_user(buffer, userbuf, nbytes))
> + return -EFAULT;
> +
> + buffer[nbytes] = 0; /* nul-terminate */
> + strstrip(buffer);
> + retval = cft->write_string(cgrp, cft, buffer);
> + if (!retval)
> + retval = nbytes;
> + if (buffer != local_buffer)
> + kfree(buffer);
> + return retval;
> +}
> +
> static ssize_t cgroup_common_file_write(struct cgroup *cgrp,
> struct cftype *cft,
> struct file *file,
> @@ -1440,6 +1473,8 @@ static ssize_t cgroup_file_write(struct
> return cft->write(cgrp, cft, file, buf, nbytes, ppos);
> if (cft->write_u64 || cft->write_s64)
> return cgroup_write_X64(cgrp, cft, file, buf, nbytes, ppos);
> + if (cft->write_string)
> + return cgroup_write_string(cgrp, cft, file, buf, nbytes, ppos);
> if (cft->trigger) {
> int ret = cft->trigger(cgrp, (unsigned int)cft->private);
> return ret ? ret : nbytes;
>
> --
Looks good
Acked-by: Balbir Singh <balbir-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
--
Warm Regards,
Balbir Singh
Linux Technology Center
IBM, ISTL
^ permalink raw reply [flat|nested] 17+ messages in thread* Re: [PATCH 2/8] CGroup Files: Add write_string cgroup control file method
[not found] ` <20080621000730.052131000-B63HFAS8fGlSzHKm+aFRNNkmqwFzkYv6@public.gmane.org>
2008-06-22 14:32 ` [PATCH 2/8] CGroup Files: Add write_string cgroup control file method Balbir Singh
@ 2008-06-24 15:34 ` Serge E. Hallyn
2008-06-24 23:19 ` Andrew Morton
2 siblings, 0 replies; 17+ messages in thread
From: Serge E. Hallyn @ 2008-06-24 15:34 UTC (permalink / raw)
To: menage-hpIqsD4AKlfQT0dZR+AlfA
Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA,
akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b,
xemul-GEFAQzZX7r8dnm+yROfE0A
Quoting menage-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org (menage-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org):
> This patch adds a write_string() method for cgroups control files. The
> semantics are that a buffer is copied from userspace to kernelspace
> and the handler function invoked on that buffer. The buffer is
> guaranteed to be nul-terminated, and no longer than max_write_len
> (defaulting to 64 bytes if unspecified). Later patches will convert
> existing raw file write handlers in control group subsystems to use
> this method.
>
> Signed-off-by: Paul Menage <menage-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
Looks sane to me.
Acked-by: Serge Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
thanks,
-serge
>
> ---
> include/linux/cgroup.h | 14 ++++++++++++++
> kernel/cgroup.c | 35 +++++++++++++++++++++++++++++++++++
> 2 files changed, 49 insertions(+)
>
> Index: cws-2.6.26-rc5-mm3/include/linux/cgroup.h
> ===================================================================
> --- cws-2.6.26-rc5-mm3.orig/include/linux/cgroup.h
> +++ cws-2.6.26-rc5-mm3/include/linux/cgroup.h
> @@ -205,6 +205,13 @@ struct cftype {
> * subsystem, followed by a period */
> char name[MAX_CFTYPE_NAME];
> int private;
> +
> + /*
> + * If non-zero, defines the maximum length of string that can
> + * be passed to write_string; defaults to 64
> + */
> + size_t max_write_len;
> +
> int (*open)(struct inode *inode, struct file *file);
> ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft,
> struct file *file,
> @@ -249,6 +256,13 @@ struct cftype {
> int (*write_s64)(struct cgroup *cgrp, struct cftype *cft, s64 val);
>
> /*
> + * write_string() is passed a nul-terminated kernelspace
> + * buffer of maximum length determined by max_write_len.
> + * Returns 0 or -ve error code.
> + */
> + int (*write_string)(struct cgroup *cgrp, struct cftype *cft,
> + const char *buffer);
> + /*
> * trigger() callback can be used to get some kick from the
> * userspace, when the actual string written is not important
> * at all. The private field can be used to determine the
> Index: cws-2.6.26-rc5-mm3/kernel/cgroup.c
> ===================================================================
> --- cws-2.6.26-rc5-mm3.orig/kernel/cgroup.c
> +++ cws-2.6.26-rc5-mm3/kernel/cgroup.c
> @@ -1363,6 +1363,39 @@ static ssize_t cgroup_write_X64(struct c
> return retval;
> }
>
> +static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft,
> + struct file *file,
> + const char __user *userbuf,
> + size_t nbytes, loff_t *unused_ppos)
> +{
> + char local_buffer[64];
> + int retval = 0;
> + size_t max_bytes = cft->max_write_len;
> + char *buffer = local_buffer;
> +
> + if (!max_bytes)
> + max_bytes = sizeof(local_buffer) - 1;
> + if (nbytes >= max_bytes)
> + return -E2BIG;
> + /* Allocate a dynamic buffer if we need one */
> + if (nbytes >= sizeof(local_buffer)) {
> + buffer = kmalloc(nbytes + 1, GFP_KERNEL);
> + if (buffer == NULL)
> + return -ENOMEM;
> + }
> + if (nbytes && copy_from_user(buffer, userbuf, nbytes))
> + return -EFAULT;
> +
> + buffer[nbytes] = 0; /* nul-terminate */
> + strstrip(buffer);
> + retval = cft->write_string(cgrp, cft, buffer);
> + if (!retval)
> + retval = nbytes;
> + if (buffer != local_buffer)
> + kfree(buffer);
> + return retval;
> +}
> +
> static ssize_t cgroup_common_file_write(struct cgroup *cgrp,
> struct cftype *cft,
> struct file *file,
> @@ -1440,6 +1473,8 @@ static ssize_t cgroup_file_write(struct
> return cft->write(cgrp, cft, file, buf, nbytes, ppos);
> if (cft->write_u64 || cft->write_s64)
> return cgroup_write_X64(cgrp, cft, file, buf, nbytes, ppos);
> + if (cft->write_string)
> + return cgroup_write_string(cgrp, cft, file, buf, nbytes, ppos);
> if (cft->trigger) {
> int ret = cft->trigger(cgrp, (unsigned int)cft->private);
> return ret ? ret : nbytes;
>
> --
^ permalink raw reply [flat|nested] 17+ messages in thread* Re: [PATCH 2/8] CGroup Files: Add write_string cgroup control file method
[not found] ` <20080621000730.052131000-B63HFAS8fGlSzHKm+aFRNNkmqwFzkYv6@public.gmane.org>
2008-06-22 14:32 ` [PATCH 2/8] CGroup Files: Add write_string cgroup control file method Balbir Singh
2008-06-24 15:34 ` Serge E. Hallyn
@ 2008-06-24 23:19 ` Andrew Morton
2 siblings, 0 replies; 17+ messages in thread
From: Andrew Morton @ 2008-06-24 23:19 UTC (permalink / raw)
To: menage-hpIqsD4AKlfQT0dZR+AlfA
Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA, xemul-GEFAQzZX7r8dnm+yROfE0A
On Fri, 20 Jun 2008 16:44:00 -0700
menage-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org wrote:
> This patch adds a write_string() method for cgroups control files. The
> semantics are that a buffer is copied from userspace to kernelspace
> and the handler function invoked on that buffer. The buffer is
> guaranteed to be nul-terminated, and no longer than max_write_len
> (defaulting to 64 bytes if unspecified). Later patches will convert
> existing raw file write handlers in control group subsystems to use
> this method.
>
> Signed-off-by: Paul Menage <menage-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
>
> ---
> include/linux/cgroup.h | 14 ++++++++++++++
> kernel/cgroup.c | 35 +++++++++++++++++++++++++++++++++++
> 2 files changed, 49 insertions(+)
>
> Index: cws-2.6.26-rc5-mm3/include/linux/cgroup.h
> ===================================================================
> --- cws-2.6.26-rc5-mm3.orig/include/linux/cgroup.h
> +++ cws-2.6.26-rc5-mm3/include/linux/cgroup.h
> @@ -205,6 +205,13 @@ struct cftype {
> * subsystem, followed by a period */
> char name[MAX_CFTYPE_NAME];
> int private;
> +
> + /*
> + * If non-zero, defines the maximum length of string that can
> + * be passed to write_string; defaults to 64
> + */
> + size_t max_write_len;
> +
> int (*open)(struct inode *inode, struct file *file);
> ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft,
> struct file *file,
> @@ -249,6 +256,13 @@ struct cftype {
> int (*write_s64)(struct cgroup *cgrp, struct cftype *cft, s64 val);
>
> /*
> + * write_string() is passed a nul-terminated kernelspace
> + * buffer of maximum length determined by max_write_len.
> + * Returns 0 or -ve error code.
> + */
> + int (*write_string)(struct cgroup *cgrp, struct cftype *cft,
> + const char *buffer);
Everything seems to use size_t (or ssize_t?) except for the ->write_string
return value. Can any of this be improved?
> + /*
> * trigger() callback can be used to get some kick from the
> * userspace, when the actual string written is not important
> * at all. The private field can be used to determine the
> Index: cws-2.6.26-rc5-mm3/kernel/cgroup.c
> ===================================================================
> --- cws-2.6.26-rc5-mm3.orig/kernel/cgroup.c
> +++ cws-2.6.26-rc5-mm3/kernel/cgroup.c
> @@ -1363,6 +1363,39 @@ static ssize_t cgroup_write_X64(struct c
> return retval;
> }
>
> +static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft,
> + struct file *file,
> + const char __user *userbuf,
> + size_t nbytes, loff_t *unused_ppos)
> +{
> + char local_buffer[64];
> + int retval = 0;
> + size_t max_bytes = cft->max_write_len;
> + char *buffer = local_buffer;
> +
> + if (!max_bytes)
> + max_bytes = sizeof(local_buffer) - 1;
> + if (nbytes >= max_bytes)
> + return -E2BIG;
> + /* Allocate a dynamic buffer if we need one */
> + if (nbytes >= sizeof(local_buffer)) {
> + buffer = kmalloc(nbytes + 1, GFP_KERNEL);
> + if (buffer == NULL)
> + return -ENOMEM;
> + }
> + if (nbytes && copy_from_user(buffer, userbuf, nbytes))
> + return -EFAULT;
> +
> + buffer[nbytes] = 0; /* nul-terminate */
> + strstrip(buffer);
> + retval = cft->write_string(cgrp, cft, buffer);
> + if (!retval)
> + retval = nbytes;
> + if (buffer != local_buffer)
> + kfree(buffer);
> + return retval;
> +}
> +
> static ssize_t cgroup_common_file_write(struct cgroup *cgrp,
> struct cftype *cft,
> struct file *file,
> @@ -1440,6 +1473,8 @@ static ssize_t cgroup_file_write(struct
> return cft->write(cgrp, cft, file, buf, nbytes, ppos);
> if (cft->write_u64 || cft->write_s64)
> return cgroup_write_X64(cgrp, cft, file, buf, nbytes, ppos);
> + if (cft->write_string)
> + return cgroup_write_string(cgrp, cft, file, buf, nbytes, ppos);
> if (cft->trigger) {
> int ret = cft->trigger(cgrp, (unsigned int)cft->private);
> return ret ? ret : nbytes;
>
> --
^ permalink raw reply [flat|nested] 17+ messages in thread
[parent not found: <20080622143236.GA19380@balbir.in.ibm.com>]
[parent not found: <20080624161923.449ecea4.akpm@linux-foundation.org>]
[parent not found: <20080621000731.082343000@menage.corp.google.com>]
[parent not found: <20080621000730.255258000@menage.corp.google.com>]