From: Andrea Righi <righiandr@users.sourceforge.net>
To: Balbir Singh <balbir@linux.vnet.ibm.com>,
Naveen Gupta <ngupta@google.com>, Paul Menage <menage@google.com>
Cc: LKML <linux-kernel@vger.kernel.org>, David Miller <davem@davemloft.net>
Subject: [RFC] [PATCH] cgroup: limit network bandwidth
Date: Wed, 23 Jan 2008 10:09:28 +0100 (MET) [thread overview]
Message-ID: <47970448.7010609@users.sourceforge.net> (raw)
Allow to limit the network bandwidth for specific process containers (cgroups)
imposing additional delays in the sockets' sendmsg()/recvmsg() calls made by
those processes that exceed the limits defined in the control group filesystem.
Example:
# mkdir /dev/cgroup
# mount -t cgroup -onet net /dev/cgroup
# cd /dev/cgroup
# mkdir foo
--> the cgroup foo has been created
# /bin/echo $$ > foo/tasks
# /bin/echo 1024 > foo/net.tcp
# /bin/echo 2048 > foo/net.tot
# sh
--> the subshell 'sh' is running in cgroup "foo" that has a maximum network
bandwidth for TCP traffic of 1MB/s and 2MB/s for total network
activities.
The netlimit approach can be easily extended to support additional network
protocols or different socket families or types (PF_UNIX, PF_BLUETOOTH,
SOCK_SEQPACKET, etc.).
Signed-off-by: Andrea Righi <a.righi@cineca.it>
---
diff -urpN linux-2.6.24-rc8/include/linux/cgroup_netlimit.h linux-2.6.24-rc8-cgroup-netlimit/include/linux/cgroup_netlimit.h
--- linux-2.6.24-rc8/include/linux/cgroup_netlimit.h 1970-01-01 01:00:00.000000000 +0100
+++ linux-2.6.24-rc8-cgroup-netlimit/include/linux/cgroup_netlimit.h 2008-01-22 21:36:15.000000000 +0100
@@ -0,0 +1,29 @@
+#ifndef CGROUP_NETLIMIT_H
+#define CGROUP_NETLIMIT_H
+
+enum {
+ CGROUP_NETLIMIT_TOT,
+ CGROUP_NETLIMIT_TCP,
+ CGROUP_NETLIMIT_UDP,
+ CGROUP_NETLIMIT_RAW,
+ /* This sets the size of the different netlimit types */
+ CGROUP_NETLIMIT_END,
+};
+
+#define CGROUP_NETLIMIT_FILE(_x, _y) \
+ { \
+ .name = _x, \
+ .read = netlimit_read, \
+ .write_uint = netlimit_write_uint, \
+ .private = _y, \
+ }
+
+#ifdef CONFIG_CGROUP_NETLIMIT
+extern void cgroup_nl_acct(int limit_id, size_t bytes);
+extern void cgroup_nl_throttle(int limit_id, int interruptible);
+#else
+static inline void cgroup_nl_acct(int limit_id, size_t bytes) { }
+static inline void cgroup_nl_throttle(int limit_id, int interruptible) { }
+#endif /* CONFIG_CGROUP_NETLIMIT */
+
+#endif
diff -urpN linux-2.6.24-rc8/include/linux/cgroup_subsys.h linux-2.6.24-rc8-cgroup-netlimit/include/linux/cgroup_subsys.h
--- linux-2.6.24-rc8/include/linux/cgroup_subsys.h 2008-01-16 05:22:48.000000000 +0100
+++ linux-2.6.24-rc8-cgroup-netlimit/include/linux/cgroup_subsys.h 2008-01-22 14:42:17.000000000 +0100
@@ -37,3 +37,9 @@ SUBSYS(cpuacct)
/* */
+#ifdef CONFIG_CGROUP_NETLIMIT
+SUBSYS(netlimit)
+#endif
+
+/* */
+
diff -urpN linux-2.6.24-rc8/init/Kconfig linux-2.6.24-rc8-cgroup-netlimit/init/Kconfig
--- linux-2.6.24-rc8/init/Kconfig 2008-01-16 05:22:48.000000000 +0100
+++ linux-2.6.24-rc8-cgroup-netlimit/init/Kconfig 2008-01-23 00:44:04.000000000 +0100
@@ -313,6 +313,15 @@ config CGROUP_NS
for instance virtual servers and checkpoint/restart
jobs.
+config CGROUP_NETLIMIT
+ bool "Enable cgroup network bandwidth limitinig (EXPERIMENTAL)"
+ depends on EXPERIMENTAL && CGROUPS
+ help
+ This allows to define network bandwidth limiting/shaping rules for
+ specific cgroup(s).
+
+ Say N if unsure.
+
config CPUSETS
bool "Cpuset support"
depends on SMP && CGROUPS
diff -urpN linux-2.6.24-rc8/kernel/cgroup_netlimit.c linux-2.6.24-rc8-cgroup-netlimit/kernel/cgroup_netlimit.c
--- linux-2.6.24-rc8/kernel/cgroup_netlimit.c 1970-01-01 01:00:00.000000000 +0100
+++ linux-2.6.24-rc8-cgroup-netlimit/kernel/cgroup_netlimit.c 2008-01-22 21:35:53.000000000 +0100
@@ -0,0 +1,229 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Copyright (C) 2008 Andrea Righi <a.righi@cineca.it>
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/cgroup.h>
+#include <linux/slab.h>
+#include <linux/gfp.h>
+#include <linux/err.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/jiffies.h>
+#include <linux/spinlock.h>
+#include <linux/cgroup_netlimit.h>
+
+struct netlimit {
+ struct cgroup_subsys_state css;
+ spinlock_t lock[CGROUP_NETLIMIT_END];
+ unsigned long bandwidth[CGROUP_NETLIMIT_END];
+ unsigned long req[CGROUP_NETLIMIT_END];
+ unsigned long last_request[CGROUP_NETLIMIT_END];
+};
+
+static inline struct netlimit *cgroup_to_netlimit(struct cgroup *cont)
+{
+ return container_of(cgroup_subsys_state(cont, netlimit_subsys_id),
+ struct netlimit, css);
+}
+
+static inline struct netlimit *task_to_netlimit(struct task_struct *task)
+{
+ return container_of(task_subsys_state(task, netlimit_subsys_id),
+ struct netlimit, css);
+}
+
+/*
+ * Rules: you can only create a cgroup if:
+ * 1. you are capable(CAP_SYS_ADMIN)
+ * 2. the target cgroup is a descendant of your own cgroup
+ *
+ * Note: called from kernel/cgroup.c with cgroup_lock() held.
+ */
+static struct cgroup_subsys_state *netlimit_create(
+ struct cgroup_subsys *ss, struct cgroup *cont)
+{
+ struct netlimit *nl;
+ int i;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return ERR_PTR(-EPERM);
+
+ if (!cgroup_is_descendant(cont))
+ return ERR_PTR(-EPERM);
+
+ nl = kzalloc(sizeof(struct netlimit), GFP_KERNEL);
+ if (unlikely(!nl))
+ return ERR_PTR(-ENOMEM);
+
+ for (i = 0; i < CGROUP_NETLIMIT_END; i++) {
+ spin_lock_init(&nl->lock[i]);
+ nl->last_request[i] = jiffies;
+ }
+
+ return &nl->css;
+}
+
+/*
+ * Note: called from kernel/cgroup.c with cgroup_lock() held.
+ */
+static void netlimit_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
+{
+ kfree(cgroup_to_netlimit(cont));
+}
+
+static ssize_t netlimit_read(struct cgroup *cont, struct cftype *cft,
+ struct file *file, char __user *buf,
+ size_t nbytes, loff_t *ppos)
+{
+ ssize_t count, ret;
+ unsigned long delta, bandwidth, req, last_request;
+ struct netlimit *nl;
+ char *page;
+
+ page = (char *)__get_free_page(GFP_TEMPORARY);
+ if (!page)
+ return -ENOMEM;
+
+ cgroup_lock();
+ if (cgroup_is_removed(cont)) {
+ cgroup_unlock();
+ ret = -ENODEV;
+ goto out;
+ }
+
+ nl = cgroup_to_netlimit(cont);
+ spin_lock_irq(&nl->lock[cft->private]);
+
+ delta = (long)jiffies - (long)nl->last_request[cft->private];
+ bandwidth = nl->bandwidth[cft->private];
+ req = nl->req[cft->private];
+ last_request = nl->last_request[cft->private];
+
+ spin_unlock_irq(&nl->lock[cft->private]);
+ cgroup_unlock();
+
+ /* print additional debugging stuff */
+ count = sprintf(page, " type: %s\n"
+ "max-bandwidth: %lu KB/sec\n"
+ " requested: %lu KB\n"
+ " last request: %lu jiffies\n"
+ " delta: %lu jiffies\n",
+ cft->name,
+ bandwidth, req >> 10, last_request, delta);
+
+ ret = simple_read_from_buffer(buf, nbytes, ppos, page, count);
+
+out:
+ free_page((unsigned long)page);
+ return ret;
+}
+
+static int netlimit_write_uint(struct cgroup *cont, struct cftype *cft,
+ u64 val)
+{
+ struct netlimit *nl;
+ int ret = 0;
+
+ cgroup_lock();
+ if (cgroup_is_removed(cont)) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ nl = cgroup_to_netlimit(cont);
+
+ spin_lock_irq(&nl->lock[cft->private]);
+ nl->bandwidth[cft->private] = (unsigned long)val;
+ nl->req[cft->private] = 0;
+ nl->last_request[cft->private] = jiffies;
+ spin_unlock_irq(&nl->lock[cft->private]);
+
+out:
+ cgroup_unlock();
+ return ret;
+}
+
+static struct cftype files[] = {
+ CGROUP_NETLIMIT_FILE("tot", CGROUP_NETLIMIT_TOT),
+ CGROUP_NETLIMIT_FILE("tcp", CGROUP_NETLIMIT_TCP),
+ CGROUP_NETLIMIT_FILE("udp", CGROUP_NETLIMIT_UDP),
+ CGROUP_NETLIMIT_FILE("raw", CGROUP_NETLIMIT_RAW),
+};
+
+static int netlimit_populate(struct cgroup_subsys *ss, struct cgroup *cont)
+{
+ return cgroup_add_files(cont, ss, files, ARRAY_SIZE(files));
+}
+
+struct cgroup_subsys netlimit_subsys = {
+ .name = "net",
+ .create = netlimit_create,
+ .destroy = netlimit_destroy,
+ .populate = netlimit_populate,
+ .subsys_id = netlimit_subsys_id,
+};
+
+void cgroup_nl_acct(int limit_id, size_t bytes)
+{
+ struct netlimit *nl;
+
+ nl = task_to_netlimit(current);
+ if (!nl || !nl->bandwidth[limit_id])
+ return;
+
+ nl->req[limit_id] += bytes;
+}
+EXPORT_SYMBOL(cgroup_nl_acct);
+
+void cgroup_nl_throttle(int limit_id, int interruptible)
+{
+ struct netlimit *nl;
+ unsigned long delta, t;
+ long sleep;
+
+ nl = task_to_netlimit(current);
+ if (!nl || !nl->bandwidth[limit_id])
+ return;
+
+ delta = (long)jiffies - (long)nl->last_request[limit_id];
+ if (!delta)
+ return;
+
+ t = msecs_to_jiffies(nl->req[limit_id] / nl->bandwidth[limit_id]);
+ if (!t)
+ return;
+
+ sleep = t - delta;
+ if (sleep > 0) {
+ pr_debug("cgroup-netlimit(%s):"
+ " task %p (%s) must sleep %lu jiffies\n",
+ files[limit_id].name,
+ current, current->comm, sleep);
+ if (interruptible)
+ schedule_timeout_interruptible(sleep);
+ else
+ schedule_timeout_uninterruptible(sleep);
+ return;
+ }
+
+ nl->req[limit_id] = 0;
+ nl->last_request[limit_id] = jiffies;
+}
+EXPORT_SYMBOL(cgroup_nl_throttle);
diff -urpN linux-2.6.24-rc8/kernel/Makefile linux-2.6.24-rc8-cgroup-netlimit/kernel/Makefile
--- linux-2.6.24-rc8/kernel/Makefile 2008-01-16 05:22:48.000000000 +0100
+++ linux-2.6.24-rc8-cgroup-netlimit/kernel/Makefile 2008-01-22 11:39:23.000000000 +0100
@@ -41,6 +41,7 @@ obj-$(CONFIG_CGROUPS) += cgroup.o
obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o
obj-$(CONFIG_CPUSETS) += cpuset.o
obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o
+obj-$(CONFIG_CGROUP_NETLIMIT) += cgroup_netlimit.o
obj-$(CONFIG_IKCONFIG) += configs.o
obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
obj-$(CONFIG_AUDIT) += audit.o auditfilter.o
diff -urpN linux-2.6.24-rc8/net/socket.c linux-2.6.24-rc8-cgroup-netlimit/net/socket.c
--- linux-2.6.24-rc8/net/socket.c 2008-01-16 05:22:48.000000000 +0100
+++ linux-2.6.24-rc8-cgroup-netlimit/net/socket.c 2008-01-22 21:33:46.000000000 +0100
@@ -85,6 +85,7 @@
#include <linux/audit.h>
#include <linux/wireless.h>
#include <linux/nsproxy.h>
+#include <linux/cgroup_netlimit.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
@@ -551,10 +552,33 @@ static inline int __sock_sendmsg(struct
si->size = size;
err = security_socket_sendmsg(sock, msg, size);
- if (err)
- return err;
+ if (!err)
+ err = sock->ops->sendmsg(iocb, sock, msg, size);
- return sock->ops->sendmsg(iocb, sock, msg, size);
+ if (err >= 0 && sock->sk) {
+ switch (sock->sk->sk_family) {
+ case PF_INET:
+ case PF_INET6:
+ switch (sock->sk->sk_type) {
+ case SOCK_STREAM:
+ cgroup_nl_acct(CGROUP_NETLIMIT_TCP, size);
+ cgroup_nl_throttle(CGROUP_NETLIMIT_TCP, 1);
+ break;
+ case SOCK_DGRAM:
+ cgroup_nl_acct(CGROUP_NETLIMIT_UDP, size);
+ cgroup_nl_throttle(CGROUP_NETLIMIT_UDP, 1);
+ break;
+ case SOCK_RAW:
+ cgroup_nl_acct(CGROUP_NETLIMIT_RAW, size);
+ cgroup_nl_throttle(CGROUP_NETLIMIT_RAW, 1);
+ break;
+ }
+ cgroup_nl_acct(CGROUP_NETLIMIT_TOT, size);
+ cgroup_nl_throttle(CGROUP_NETLIMIT_TOT, 1);
+ }
+ }
+
+ return err;
}
int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
@@ -633,10 +657,33 @@ static inline int __sock_recvmsg(struct
si->flags = flags;
err = security_socket_recvmsg(sock, msg, size, flags);
- if (err)
- return err;
+ if (!err)
+ err = sock->ops->recvmsg(iocb, sock, msg, size, flags);
- return sock->ops->recvmsg(iocb, sock, msg, size, flags);
+ if (err >= 0 && sock->sk) {
+ switch (sock->sk->sk_family) {
+ case PF_INET:
+ case PF_INET6:
+ switch (sock->sk->sk_type) {
+ case SOCK_STREAM:
+ cgroup_nl_acct(CGROUP_NETLIMIT_TCP, size);
+ cgroup_nl_throttle(CGROUP_NETLIMIT_TCP, 1);
+ break;
+ case SOCK_DGRAM:
+ cgroup_nl_acct(CGROUP_NETLIMIT_UDP, size);
+ cgroup_nl_throttle(CGROUP_NETLIMIT_UDP, 1);
+ break;
+ case SOCK_RAW:
+ cgroup_nl_acct(CGROUP_NETLIMIT_RAW, size);
+ cgroup_nl_throttle(CGROUP_NETLIMIT_RAW, 1);
+ break;
+ }
+ cgroup_nl_acct(CGROUP_NETLIMIT_TOT, size);
+ cgroup_nl_throttle(CGROUP_NETLIMIT_TOT, 1);
+ }
+ }
+
+ return err;
}
int sock_recvmsg(struct socket *sock, struct msghdr *msg,
next reply other threads:[~2008-01-23 9:10 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-01-23 9:09 Andrea Righi [this message]
2008-01-23 9:24 ` [RFC] [PATCH] cgroup: limit network bandwidth Balbir Singh
2008-01-23 16:48 ` Andrea Righi
2008-01-23 16:59 ` Paul Menage
2008-01-23 17:48 ` Andrea Righi
2008-01-23 9:54 ` Paul Menage
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=47970448.7010609@users.sourceforge.net \
--to=righiandr@users.sourceforge.net \
--cc=balbir@linux.vnet.ibm.com \
--cc=davem@davemloft.net \
--cc=linux-kernel@vger.kernel.org \
--cc=menage@google.com \
--cc=ngupta@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.