cgroups.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Glauber Costa <glommer-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org>
To: David Miller <davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org>
Cc: linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	netdev-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	cgroups-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org,
	eric.dumazet-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org,
	sfr-3FnU+UHB4dNDw9hX6IcOSA@public.gmane.org
Subject: Re: [PATCH 2/2] Explicitly call tcp creation and init from memcontrol.c
Date: Fri, 16 Dec 2011 01:11:36 +0400	[thread overview]
Message-ID: <4EEA6288.1080405@parallels.com> (raw)
In-Reply-To: <20111215.120028.532844419499092747.davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org>

[-- Attachment #1: Type: text/plain, Size: 1344 bytes --]

On 12/15/2011 09:00 PM, David Miller wrote:
> From: Glauber Costa<glommer-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org>
> Date: Thu, 15 Dec 2011 13:34:32 +0400
>
>> Walking the proto_list holds a read_lock, which prevents us from doing
>> allocations. Splitting the tcp create function into create + init is
>> good, but it is not enough since create_files will do allocations as well
>> (dentry ones, mostly).
>>
>> Since this does not involve any protocol state, I propose we call the tcp
>> functions explicitly from memcontrol.c
>>
>> With this, we lose by now the ability of doing cgroup memcontrol for
>> protocols that are loaded as modules. But at least the ones I have in mind
>> won't really need it (tcp_ipv6 being the only one, but it uses the same data
>> structures as tcp_ipv4). So I believe this to be the simpler solution to this
>> problem.
>>
>> Signed-off-by: Glauber Costa<glommer-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org>
>
> This is an unnecessary limitation, please fix this properly otherwise
> DCCP, SCTP, etc. won't be supportable with this stuff.

How about the following patch then ? I am keeping protocols that has the 
cgroup stuff on in a separate list, that can be protected by a mutex. 
Therefore we can allocate without going into troubles.

Let me know if you still have objections, I'll be happy to address them.

[-- Attachment #2: 0001-fix-sleeping-while-atomic-problem-in-sock-mem_cgroup.patch --]
[-- Type: text/plain, Size: 6503 bytes --]

From c8fe669a9cbc7c7cbd87ce48d7eb91ed6c96cbde Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org>
Date: Thu, 15 Dec 2011 23:47:06 +0300
Subject: [PATCH] fix sleeping while atomic problem in sock mem_cgroup.

Since we can't scan the proto_list to initialize sock cgroups, as it
holds a rwlock, and we also want to keep the code generic enough to
avoid calling the initialization functions of protocols directly,
I propose we keep the interested parties in a separate list. This list
is protected by a mutex so we can sleep and do the necessary allocations.

Signed-off-by: Glauber Costa <glommer-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org>
CC: Hiroyouki Kamezawa <kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
CC: David S. Miller <davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org>
CC: Eric Dumazet <eric.dumazet-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
CC: Stephen Rothwell <sfr-3FnU+UHB4dNDw9hX6IcOSA@public.gmane.org>
---
 include/linux/memcontrol.h   |    4 +++
 include/net/sock.h           |    5 +---
 include/net/tcp_memcontrol.h |    2 +
 mm/memcontrol.c              |   52 ++++++++++++++++++++++++++++++++++++++++++
 net/core/sock.c              |   48 +++++++++-----------------------------
 5 files changed, 70 insertions(+), 41 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 9b296ea..1edd0e3 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -390,6 +390,10 @@ enum {
 	OVER_LIMIT,
 };
 
+struct proto;
+void register_sock_cgroup(struct proto *prot);
+void unregister_sock_cgroup(struct proto *prot);
+
 #ifdef CONFIG_INET
 struct sock;
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
diff --git a/include/net/sock.h b/include/net/sock.h
index 6fe0dae..f8237a3 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -64,10 +64,6 @@
 #include <net/dst.h>
 #include <net/checksum.h>
 
-struct cgroup;
-struct cgroup_subsys;
-int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss);
-void mem_cgroup_sockets_destroy(struct cgroup *cgrp, struct cgroup_subsys *ss);
 /*
  * This structure really needs to be cleaned up.
  * Most of it is for TCP, and not used by any of
@@ -858,6 +854,7 @@ struct proto {
 	void			(*destroy_cgroup)(struct cgroup *cgrp,
 						  struct cgroup_subsys *ss);
 	struct cg_proto		*(*proto_cgroup)(struct mem_cgroup *memcg);
+	struct list_head	cgroup_node;
 #endif
 };
 
diff --git a/include/net/tcp_memcontrol.h b/include/net/tcp_memcontrol.h
index 3512082..5aa7c4b 100644
--- a/include/net/tcp_memcontrol.h
+++ b/include/net/tcp_memcontrol.h
@@ -11,6 +11,8 @@ struct tcp_memcontrol {
 	int tcp_memory_pressure;
 };
 
+struct cgroup;
+struct cgroup_subsys;
 struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg);
 int tcp_init_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss);
 void tcp_destroy_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 7266202..6ee250d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4742,6 +4742,58 @@ static struct cftype kmem_cgroup_files[] = {
 	},
 };
 
+static DEFINE_MUTEX(cgroup_proto_list_lock);
+static LIST_HEAD(cgroup_proto_list);
+
+static int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss)
+{
+	struct proto *proto;
+	int ret = 0;
+
+	mutex_lock(&cgroup_proto_list_lock);
+	list_for_each_entry(proto, &cgroup_proto_list, cgroup_node) {
+		if (proto->init_cgroup) {
+			ret = proto->init_cgroup(cgrp, ss);
+			if (ret)
+				goto out;
+		}
+	}
+
+	mutex_unlock(&cgroup_proto_list_lock);
+	return ret;
+out:
+	list_for_each_entry_continue_reverse(proto, &cgroup_proto_list, cgroup_node)
+		if (proto->destroy_cgroup)
+			proto->destroy_cgroup(cgrp, ss);
+	mutex_unlock(&cgroup_proto_list_lock);
+	return ret;
+}
+
+static void mem_cgroup_sockets_destroy(struct cgroup *cgrp, struct cgroup_subsys *ss)
+{
+	struct proto *proto;
+
+	mutex_lock(&cgroup_proto_list_lock);
+	list_for_each_entry_reverse(proto, &cgroup_proto_list, cgroup_node)
+		if (proto->destroy_cgroup)
+			proto->destroy_cgroup(cgrp, ss);
+	mutex_unlock(&cgroup_proto_list_lock);
+}
+
+void register_sock_cgroup(struct proto *prot)
+{
+	mutex_lock(&cgroup_proto_list_lock);
+	list_add(&prot->cgroup_node, &cgroup_proto_list);
+	mutex_unlock(&cgroup_proto_list_lock);
+}
+
+void unregister_sock_cgroup(struct proto *prot)
+{
+	mutex_lock(&cgroup_proto_list_lock);
+	list_del(&prot->cgroup_node);
+	mutex_unlock(&cgroup_proto_list_lock);
+}
+
 static int register_kmem_files(struct cgroup *cont, struct cgroup_subsys *ss)
 {
 	int ret = 0;
diff --git a/net/core/sock.c b/net/core/sock.c
index 5a6a906..3728b50 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -139,43 +139,6 @@
 static DEFINE_RWLOCK(proto_list_lock);
 static LIST_HEAD(proto_list);
 
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
-int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss)
-{
-	struct proto *proto;
-	int ret = 0;
-
-	read_lock(&proto_list_lock);
-	list_for_each_entry(proto, &proto_list, node) {
-		if (proto->init_cgroup) {
-			ret = proto->init_cgroup(cgrp, ss);
-			if (ret)
-				goto out;
-		}
-	}
-
-	read_unlock(&proto_list_lock);
-	return ret;
-out:
-	list_for_each_entry_continue_reverse(proto, &proto_list, node)
-		if (proto->destroy_cgroup)
-			proto->destroy_cgroup(cgrp, ss);
-	read_unlock(&proto_list_lock);
-	return ret;
-}
-
-void mem_cgroup_sockets_destroy(struct cgroup *cgrp, struct cgroup_subsys *ss)
-{
-	struct proto *proto;
-
-	read_lock(&proto_list_lock);
-	list_for_each_entry_reverse(proto, &proto_list, node)
-		if (proto->destroy_cgroup)
-			proto->destroy_cgroup(cgrp, ss);
-	read_unlock(&proto_list_lock);
-}
-#endif
-
 /*
  * Each address family might have different locking rules, so we have
  * one slock key per address family:
@@ -2483,6 +2446,12 @@ int proto_register(struct proto *prot, int alloc_slab)
 	list_add(&prot->node, &proto_list);
 	assign_proto_idx(prot);
 	write_unlock(&proto_list_lock);
+
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+	if (prot->proto_cgroup)
+		register_sock_cgroup(prot);
+#endif
+
 	return 0;
 
 out_free_timewait_sock_slab_name:
@@ -2510,6 +2479,11 @@ void proto_unregister(struct proto *prot)
 	list_del(&prot->node);
 	write_unlock(&proto_list_lock);
 
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+	if (prot->proto_cgroup != NULL)
+		unregister_sock_cgroup(prot);
+#endif
+
 	if (prot->slab != NULL) {
 		kmem_cache_destroy(prot->slab);
 		prot->slab = NULL;
-- 
1.7.6.4


  parent reply	other threads:[~2011-12-15 21:11 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-12-15  9:34 [PATCH 0/2] Proposed fixes for tcp memory pressure Glauber Costa
     [not found] ` <1323941672-14324-1-git-send-email-glommer-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org>
2011-12-15  9:34   ` [PATCH 1/2] Move limit definitions outside CONFIG_INET Glauber Costa
     [not found]     ` <1323941672-14324-2-git-send-email-glommer-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org>
2011-12-15 16:59       ` David Miller
2011-12-15  9:34   ` [PATCH 2/2] Explicitly call tcp creation and init from memcontrol.c Glauber Costa
     [not found]     ` <1323941672-14324-3-git-send-email-glommer-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org>
2011-12-15 16:13       ` KAMEZAWA Hiroyuki
     [not found]         ` <20111216011316.8d58bc8f.kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org>
2011-12-15 16:18           ` Glauber Costa
2011-12-15 16:57         ` David Miller
2011-12-15 22:20         ` KAMEZAWA Hiroyuki
2011-12-16  2:06           ` Glauber Costa
2011-12-15 17:00     ` David Miller
     [not found]       ` <20111215.120028.532844419499092747.davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org>
2011-12-15 21:11         ` Glauber Costa [this message]
     [not found]           ` <4EEA6288.1080405-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org>
2011-12-15 22:44             ` David Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4EEA6288.1080405@parallels.com \
    --to=glommer-bzqdu9zft3wakbo8gow8eq@public.gmane.org \
    --cc=cgroups-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org \
    --cc=eric.dumazet-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org \
    --cc=kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org \
    --cc=linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=netdev-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=sfr-3FnU+UHB4dNDw9hX6IcOSA@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).