public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH RFC] audit: move the tree pruning to a dedicated thread
@ 2015-01-06 14:51 Imre Palik
  2015-01-08 21:53 ` Paul Moore
  0 siblings, 1 reply; 9+ messages in thread
From: Imre Palik @ 2015-01-06 14:51 UTC (permalink / raw)
  To: linux-audit
  Cc: Paul Moore, Eric Paris, linux-kernel, Palik, Imre, Matt Wilson

From: "Palik, Imre" <imrep@amazon.de>

When file auditing is enabled, during a low memory situation, a memory
allocation with __GFP_FS can lead to pruning the inode cache.  Which can,
in turn lead to audit_tree_freeing_mark() being called.  This can call
audit_schedule_prune(), that tries to fork a pruning thread, and
waits until the thread is created.  But forking needs memory, and the
memory allocations there are done with __GFP_FS.

So we are waiting merrily for some __GFP_FS memory allocations to complete,
while holding some filesystem locks.  This can take a while ...

This patch creates a single thread for pruning the tree from
audit_add_tree_rule(), and thus avoids the deadlock that the on-demand thread
creation can cause.

Reported-by: Matt Wilson <msw@amazon.com>
Cc: Matt Wilson <msw@amazon.com>
Signed-off-by: Imre Palik <imrep@amazon.de>
---
 kernel/audit_tree.c |   91 +++++++++++++++++++++++++++++++++++----------------
 1 file changed, 63 insertions(+), 28 deletions(-)

diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 0caf1f8..0ada577 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -37,6 +37,7 @@ struct audit_chunk {
 
 static LIST_HEAD(tree_list);
 static LIST_HEAD(prune_list);
+static struct task_struct *prune_thread;
 
 /*
  * One struct chunk is attached to each inode of interest.
@@ -641,6 +642,55 @@ static int tag_mount(struct vfsmount *mnt, void *arg)
 	return tag_chunk(mnt->mnt_root->d_inode, arg);
 }
 
+/*
+ * That gets run when evict_chunk() ends up needing to kill audit_tree.
+ * Runs from a separate thread.
+ */
+static int prune_tree_thread(void *unused)
+{
+	for (;;) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (list_empty(&prune_list))
+			schedule();
+		__set_current_state(TASK_RUNNING);
+
+		mutex_lock(&audit_cmd_mutex);
+		mutex_lock(&audit_filter_mutex);
+
+		while (!list_empty(&prune_list)) {
+			struct audit_tree *victim;
+
+			victim = list_entry(prune_list.next,
+					struct audit_tree, list);
+			list_del_init(&victim->list);
+
+			mutex_unlock(&audit_filter_mutex);
+
+			prune_one(victim);
+
+			mutex_lock(&audit_filter_mutex);
+		}
+
+		mutex_unlock(&audit_filter_mutex);
+		mutex_unlock(&audit_cmd_mutex);
+	}
+	return 0;
+}
+
+static int launch_prune_thread(void)
+{
+	prune_thread = kthread_create(prune_tree_thread, NULL,
+				"audit_prune_tree");
+	if (IS_ERR(prune_thread)) {
+		audit_panic("cannot start thread audit_prune_tree");
+		prune_thread = NULL;
+		return -ENOSYS;
+	} else {
+		wake_up_process(prune_thread);
+		return 0;
+	}
+}
+
 /* called with audit_filter_mutex */
 int audit_add_tree_rule(struct audit_krule *rule)
 {
@@ -663,6 +713,12 @@ int audit_add_tree_rule(struct audit_krule *rule)
 	/* do not set rule->tree yet */
 	mutex_unlock(&audit_filter_mutex);
 
+	if (unlikely(!prune_thread)) {
+		err = launch_prune_thread();
+		if (err)
+			goto Err;
+	}
+
 	err = kern_path(tree->pathname, 0, &path);
 	if (err)
 		goto Err;
@@ -713,6 +769,9 @@ int audit_tag_tree(char *old, char *new)
 	struct vfsmount *tagged;
 	int err;
 
+	if (!prune_thread)
+		return -ENOSYS;
+
 	err = kern_path(new, 0, &path2);
 	if (err)
 		return err;
@@ -800,36 +859,11 @@ int audit_tag_tree(char *old, char *new)
 	return failed;
 }
 
-/*
- * That gets run when evict_chunk() ends up needing to kill audit_tree.
- * Runs from a separate thread.
- */
-static int prune_tree_thread(void *unused)
-{
-	mutex_lock(&audit_cmd_mutex);
-	mutex_lock(&audit_filter_mutex);
-
-	while (!list_empty(&prune_list)) {
-		struct audit_tree *victim;
-
-		victim = list_entry(prune_list.next, struct audit_tree, list);
-		list_del_init(&victim->list);
-
-		mutex_unlock(&audit_filter_mutex);
-
-		prune_one(victim);
-
-		mutex_lock(&audit_filter_mutex);
-	}
-
-	mutex_unlock(&audit_filter_mutex);
-	mutex_unlock(&audit_cmd_mutex);
-	return 0;
-}
 
 static void audit_schedule_prune(void)
 {
-	kthread_run(prune_tree_thread, NULL, "audit_prune_tree");
+	BUG_ON(!prune_thread);
+	wake_up_process(prune_thread);
 }
 
 /*
@@ -896,9 +930,10 @@ static void evict_chunk(struct audit_chunk *chunk)
 	for (n = 0; n < chunk->count; n++)
 		list_del_init(&chunk->owners[n].list);
 	spin_unlock(&hash_lock);
+	mutex_unlock(&audit_filter_mutex);
 	if (need_prune)
 		audit_schedule_prune();
-	mutex_unlock(&audit_filter_mutex);
+
 }
 
 static int audit_tree_handle_event(struct fsnotify_group *group,
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 9+ messages in thread
* [PATCH RFC] audit: move the tree pruning to a dedicated thread
@ 2014-12-04 11:39 Imre Palik
  2014-12-09 16:33 ` Paul Moore
  0 siblings, 1 reply; 9+ messages in thread
From: Imre Palik @ 2014-12-04 11:39 UTC (permalink / raw)
  To: linux-audit; +Cc: Eric Paris, linux-kernel, Palik, Imre, Matt Wilson, Al Viro

From: "Palik, Imre" <imrep@amazon.de>

When file auditing is enabled, during a low memory situation, a memory
allocation with __GFP_FS can lead to pruning the inode cache.  Which can,
in turn lead to audit_tree_freeing_mark() being called.  This can call
audit_schedule_prune(), that tries to fork a pruning thread, and
waits until the thread is created.  But forking needs memory, and the
memory allocations there are done with __GFP_FS.

So we are waiting merrily for some __GFP_FS memory allocations to complete,
while holding some filesystem locks.  This can take a while ...

This patch creates a single thread for pruning the tree from
audit_tree_init(), and thus avoids the deadlock that the on-demand thread
creation can cause.

An alternative approach would be to move the thread creation outside of the
lock.  This would assume that other layers of the filesystem code don't
hold any locks, and it would need some rewrite of the code to limit the
amount of threads possibly spawned.

Reported-by: Matt Wilson <msw@amazon.com>
Cc: Matt Wilson <msw@amazon.com>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: Imre Palik <imrep@amazon.de>
---
 kernel/audit_tree.c |   53 ++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 35 insertions(+), 18 deletions(-)

diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 0caf1f8..cf6db88 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -37,6 +37,7 @@ struct audit_chunk {
 
 static LIST_HEAD(tree_list);
 static LIST_HEAD(prune_list);
+static struct task_struct *prune_thread;
 
 /*
  * One struct chunk is attached to each inode of interest.
@@ -806,30 +807,39 @@ int audit_tag_tree(char *old, char *new)
  */
 static int prune_tree_thread(void *unused)
 {
-	mutex_lock(&audit_cmd_mutex);
-	mutex_lock(&audit_filter_mutex);
+	for (;;) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (list_empty(&prune_list))
+			schedule();
+		__set_current_state(TASK_RUNNING);
 
-	while (!list_empty(&prune_list)) {
-		struct audit_tree *victim;
+		mutex_lock(&audit_cmd_mutex);
+		mutex_lock(&audit_filter_mutex);
 
-		victim = list_entry(prune_list.next, struct audit_tree, list);
-		list_del_init(&victim->list);
+		while (!list_empty(&prune_list)) {
+			struct audit_tree *victim;
 
-		mutex_unlock(&audit_filter_mutex);
+			victim = list_entry(prune_list.next,
+					struct audit_tree, list);
+			list_del_init(&victim->list);
 
-		prune_one(victim);
+			mutex_unlock(&audit_filter_mutex);
 
-		mutex_lock(&audit_filter_mutex);
-	}
+			prune_one(victim);
 
-	mutex_unlock(&audit_filter_mutex);
-	mutex_unlock(&audit_cmd_mutex);
+			mutex_lock(&audit_filter_mutex);
+		}
+
+		mutex_unlock(&audit_filter_mutex);
+		mutex_unlock(&audit_cmd_mutex);
+	}
 	return 0;
 }
 
 static void audit_schedule_prune(void)
 {
-	kthread_run(prune_tree_thread, NULL, "audit_prune_tree");
+	BUG_ON(!prune_thread);
+	wake_up_process(prune_thread);
 }
 
 /*
@@ -896,9 +906,10 @@ static void evict_chunk(struct audit_chunk *chunk)
 	for (n = 0; n < chunk->count; n++)
 		list_del_init(&chunk->owners[n].list);
 	spin_unlock(&hash_lock);
+	mutex_unlock(&audit_filter_mutex);
 	if (need_prune)
 		audit_schedule_prune();
-	mutex_unlock(&audit_filter_mutex);
+
 }
 
 static int audit_tree_handle_event(struct fsnotify_group *group,
@@ -938,10 +949,16 @@ static int __init audit_tree_init(void)
 {
 	int i;
 
-	audit_tree_group = fsnotify_alloc_group(&audit_tree_ops);
-	if (IS_ERR(audit_tree_group))
-		audit_panic("cannot initialize fsnotify group for rectree watches");
-
+	prune_thread = kthread_create(prune_tree_thread, NULL,
+				"audit_prune_tree");
+	if (IS_ERR(prune_thread)) {
+		audit_panic("cannot start thread audit_prune_tree");
+	} else {
+		wake_up_process(prune_thread);
+		audit_tree_group = fsnotify_alloc_group(&audit_tree_ops);
+		if (IS_ERR(audit_tree_group))
+			audit_panic("cannot initialize fsnotify group for rectree watches");
+	}
 	for (i = 0; i < HASH_SIZE; i++)
 		INIT_LIST_HEAD(&chunk_hash_heads[i]);
 
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 9+ messages in thread
* [PATCH RFC] audit: move the tree pruning to a dedicated thread
@ 2014-11-28 14:26 Imre Palik
  0 siblings, 0 replies; 9+ messages in thread
From: Imre Palik @ 2014-11-28 14:26 UTC (permalink / raw)
  To: Eric Paris; +Cc: linux-kernel, Palik, Imre, Matt Wilson, Al Viro

From: "Palik, Imre" <imrep@amazon.de>

When file auditing is enabled, during a low memory situation, a memory
allocation with __GFP_FS can lead to pruning the inode cache.  Which can,
in turn lead to audit_tree_freeing_mark() being called.  This can call
audit_schedule_prune(), that tries to fork a pruning thread, and
waits until the thread is created.  But forking needs memory, and the
memory allocations there are done with __GFP_FS.

So we are waiting merrily for some __GFP_FS memory allocations to complete,
while holding some filesystem locks.  This can take a while ...

This patch creates a single thread for pruning the tree from
audit_tree_init(), and thus avoids the deadlock that the on-demand thread
creation can cause.

An alternative approach would be to move the thread creation outside of the
lock.  This would assume that other layers of the filesystem code don't
hold any locks, and it would need some rewrite of the code to limit the
amount of threads possibly spawned.

Reported-by: Matt Wilson <msw@amazon.com>
Cc: Matt Wilson <msw@amazon.com>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: Imre Palik <imrep@amazon.de>
---
 kernel/audit_tree.c |   53 ++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 35 insertions(+), 18 deletions(-)

diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 0caf1f8..cf6db88 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -37,6 +37,7 @@ struct audit_chunk {
 
 static LIST_HEAD(tree_list);
 static LIST_HEAD(prune_list);
+static struct task_struct *prune_thread;
 
 /*
  * One struct chunk is attached to each inode of interest.
@@ -806,30 +807,39 @@ int audit_tag_tree(char *old, char *new)
  */
 static int prune_tree_thread(void *unused)
 {
-	mutex_lock(&audit_cmd_mutex);
-	mutex_lock(&audit_filter_mutex);
+	for (;;) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (list_empty(&prune_list))
+			schedule();
+		__set_current_state(TASK_RUNNING);
 
-	while (!list_empty(&prune_list)) {
-		struct audit_tree *victim;
+		mutex_lock(&audit_cmd_mutex);
+		mutex_lock(&audit_filter_mutex);
 
-		victim = list_entry(prune_list.next, struct audit_tree, list);
-		list_del_init(&victim->list);
+		while (!list_empty(&prune_list)) {
+			struct audit_tree *victim;
 
-		mutex_unlock(&audit_filter_mutex);
+			victim = list_entry(prune_list.next,
+					struct audit_tree, list);
+			list_del_init(&victim->list);
 
-		prune_one(victim);
+			mutex_unlock(&audit_filter_mutex);
 
-		mutex_lock(&audit_filter_mutex);
-	}
+			prune_one(victim);
 
-	mutex_unlock(&audit_filter_mutex);
-	mutex_unlock(&audit_cmd_mutex);
+			mutex_lock(&audit_filter_mutex);
+		}
+
+		mutex_unlock(&audit_filter_mutex);
+		mutex_unlock(&audit_cmd_mutex);
+	}
 	return 0;
 }
 
 static void audit_schedule_prune(void)
 {
-	kthread_run(prune_tree_thread, NULL, "audit_prune_tree");
+	BUG_ON(!prune_thread);
+	wake_up_process(prune_thread);
 }
 
 /*
@@ -896,9 +906,10 @@ static void evict_chunk(struct audit_chunk *chunk)
 	for (n = 0; n < chunk->count; n++)
 		list_del_init(&chunk->owners[n].list);
 	spin_unlock(&hash_lock);
+	mutex_unlock(&audit_filter_mutex);
 	if (need_prune)
 		audit_schedule_prune();
-	mutex_unlock(&audit_filter_mutex);
+
 }
 
 static int audit_tree_handle_event(struct fsnotify_group *group,
@@ -938,10 +949,16 @@ static int __init audit_tree_init(void)
 {
 	int i;
 
-	audit_tree_group = fsnotify_alloc_group(&audit_tree_ops);
-	if (IS_ERR(audit_tree_group))
-		audit_panic("cannot initialize fsnotify group for rectree watches");
-
+	prune_thread = kthread_create(prune_tree_thread, NULL,
+				"audit_prune_tree");
+	if (IS_ERR(prune_thread)) {
+		audit_panic("cannot start thread audit_prune_tree");
+	} else {
+		wake_up_process(prune_thread);
+		audit_tree_group = fsnotify_alloc_group(&audit_tree_ops);
+		if (IS_ERR(audit_tree_group))
+			audit_panic("cannot initialize fsnotify group for rectree watches");
+	}
 	for (i = 0; i < HASH_SIZE; i++)
 		INIT_LIST_HEAD(&chunk_hash_heads[i]);
 
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 9+ messages in thread
* [PATCH RFC] audit: move the tree pruning to a dedicated thread
@ 2014-11-20 14:34 Imre Palik
  0 siblings, 0 replies; 9+ messages in thread
From: Imre Palik @ 2014-11-20 14:34 UTC (permalink / raw)
  To: Eric Paris; +Cc: linux-kernel, Palik, Imre, Matt Wilson

From: "Palik, Imre" <imrep@amazon.de>

When file auditing is enabled, during a low memory situation, a memory
allocation with __GFP_FS can lead to pruning the inode cache.  Which can,
in turn lead to audit_tree_freeing_mark() being called.  This can call
audit_schedule_prune(), that tries to fork a pruning thread, and
waits until the thread is created.  But forking needs memory, and the
memory allocations there are done with __GFP_FS.

So we are waiting merrily for some __GFP_FS memory allocations to complete,
while holding some filesystem locks.  This can take a while ...

This patch creates a single thread for pruning the tree from
audit_tree_init(), and thus avoids the deadlock that the on-demand thread
creation can cause.

An alternative approach would be to move the thread creation outside of the
lock.  This would assume that other layers of the filesystem code don't
hold any locks, and it would need some rewrite of the code to limit the
amount of threads possibly spawned.

Reported-by: Matt Wilson <msw@amazon.com>
Cc: Matt Wilson <msw@amazon.com>
Signed-off-by: Imre Palik <imrep@amazon.de>
---
 kernel/audit_tree.c |   53 ++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 35 insertions(+), 18 deletions(-)

diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 0caf1f8..cf6db88 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -37,6 +37,7 @@ struct audit_chunk {
 
 static LIST_HEAD(tree_list);
 static LIST_HEAD(prune_list);
+static struct task_struct *prune_thread;
 
 /*
  * One struct chunk is attached to each inode of interest.
@@ -806,30 +807,39 @@ int audit_tag_tree(char *old, char *new)
  */
 static int prune_tree_thread(void *unused)
 {
-	mutex_lock(&audit_cmd_mutex);
-	mutex_lock(&audit_filter_mutex);
+	for (;;) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (list_empty(&prune_list))
+			schedule();
+		__set_current_state(TASK_RUNNING);
 
-	while (!list_empty(&prune_list)) {
-		struct audit_tree *victim;
+		mutex_lock(&audit_cmd_mutex);
+		mutex_lock(&audit_filter_mutex);
 
-		victim = list_entry(prune_list.next, struct audit_tree, list);
-		list_del_init(&victim->list);
+		while (!list_empty(&prune_list)) {
+			struct audit_tree *victim;
 
-		mutex_unlock(&audit_filter_mutex);
+			victim = list_entry(prune_list.next,
+					struct audit_tree, list);
+			list_del_init(&victim->list);
 
-		prune_one(victim);
+			mutex_unlock(&audit_filter_mutex);
 
-		mutex_lock(&audit_filter_mutex);
-	}
+			prune_one(victim);
 
-	mutex_unlock(&audit_filter_mutex);
-	mutex_unlock(&audit_cmd_mutex);
+			mutex_lock(&audit_filter_mutex);
+		}
+
+		mutex_unlock(&audit_filter_mutex);
+		mutex_unlock(&audit_cmd_mutex);
+	}
 	return 0;
 }
 
 static void audit_schedule_prune(void)
 {
-	kthread_run(prune_tree_thread, NULL, "audit_prune_tree");
+	BUG_ON(!prune_thread);
+	wake_up_process(prune_thread);
 }
 
 /*
@@ -896,9 +906,10 @@ static void evict_chunk(struct audit_chunk *chunk)
 	for (n = 0; n < chunk->count; n++)
 		list_del_init(&chunk->owners[n].list);
 	spin_unlock(&hash_lock);
+	mutex_unlock(&audit_filter_mutex);
 	if (need_prune)
 		audit_schedule_prune();
-	mutex_unlock(&audit_filter_mutex);
+
 }
 
 static int audit_tree_handle_event(struct fsnotify_group *group,
@@ -938,10 +949,16 @@ static int __init audit_tree_init(void)
 {
 	int i;
 
-	audit_tree_group = fsnotify_alloc_group(&audit_tree_ops);
-	if (IS_ERR(audit_tree_group))
-		audit_panic("cannot initialize fsnotify group for rectree watches");
-
+	prune_thread = kthread_create(prune_tree_thread, NULL,
+				"audit_prune_tree");
+	if (IS_ERR(prune_thread)) {
+		audit_panic("cannot start thread audit_prune_tree");
+	} else {
+		wake_up_process(prune_thread);
+		audit_tree_group = fsnotify_alloc_group(&audit_tree_ops);
+		if (IS_ERR(audit_tree_group))
+			audit_panic("cannot initialize fsnotify group for rectree watches");
+	}
 	for (i = 0; i < HASH_SIZE; i++)
 		INIT_LIST_HEAD(&chunk_hash_heads[i]);
 
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2015-01-15  9:33 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-01-06 14:51 [PATCH RFC] audit: move the tree pruning to a dedicated thread Imre Palik
2015-01-08 21:53 ` Paul Moore
2015-01-12  8:11   ` Imre Palik
2015-01-13  1:47     ` Paul Moore
2015-01-15  9:33       ` Imre Palik
  -- strict thread matches above, loose matches on Subject: below --
2014-12-04 11:39 Imre Palik
2014-12-09 16:33 ` Paul Moore
2014-11-28 14:26 Imre Palik
2014-11-20 14:34 Imre Palik

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox