linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jan Kara <jack@suse.cz>
To: linux-audit@redhat.com
Cc: Paul Moore <paul@paul-moore.com>, <linux-fsdevel@vger.kernel.org>,
	Al Viro <viro@ZenIV.linux.org.uk>,
	Richard Guy Briggs <rgb@redhat.com>, Jan Kara <jack@suse.cz>
Subject: [PATCH 6/6] audit: Point to fsnotify mark instead of embedding it
Date: Thu, 28 Jun 2018 18:40:14 +0200	[thread overview]
Message-ID: <20180628164014.4925-7-jack@suse.cz> (raw)
In-Reply-To: <20180628164014.4925-1-jack@suse.cz>

Audit tree code currently embeds fsnotify mark in audit_chunk. As chunk
attached to an inode is replace when new tag is added / removed, we also
need to remove old fsnotify mark and add a new one on such occasion.
This is cumbersome and makes locking rules somewhat difficult to follow.

Fix these problems by allocating fsnotify mark independently and keeping
it all the time while there is some chunk attached to an inode. Also add
documentation about the locking rules so that things are easier to
follow.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 kernel/audit_tree.c | 209 ++++++++++++++++++++++++++++++++--------------------
 1 file changed, 129 insertions(+), 80 deletions(-)

diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 46cc6d046c75..045878d1c060 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -25,9 +25,8 @@ struct audit_tree {
 struct audit_chunk {
 	struct list_head hash;
 	unsigned long key;
-	struct fsnotify_mark mark;
+	struct fsnotify_mark *mark;
 	struct list_head trees;		/* with root here */
-	int dead;
 	int count;
 	atomic_long_t refs;
 	struct rcu_head head;
@@ -38,13 +37,25 @@ struct audit_chunk {
 	} owners[];
 };
 
+struct audit_tree_mark {
+	struct fsnotify_mark fsn_mark;
+	struct audit_chunk *chunk;
+};
+
 static LIST_HEAD(tree_list);
 static LIST_HEAD(prune_list);
 static struct task_struct *prune_thread;
 
 /*
- * One struct chunk is attached to each inode of interest.
- * We replace struct chunk on tagging/untagging.
+ * One struct chunk is attached to each inode of interest through
+ * audit_tree_mark (fsnotify mark). We replace struct chunk on tagging /
+ * untagging, the mark is stable as long as there is chunk attached. The
+ * association between mark and chunk is protected by hash_lock and
+ * audit_tree_group->mark_mutex. Thus as long as we hold
+ * audit_tree_group->mark_mutex and check that the mark is alive by
+ * FSNOTIFY_MARK_FLAG_ATTACHED flag check, we are sure the mark points to
+ * the current chunk.
+ *
  * Rules have pointer to struct audit_tree.
  * Rules have struct list_head rlist forming a list of rules over
  * the same tree.
@@ -63,8 +74,12 @@ static struct task_struct *prune_thread;
  * tree is refcounted; one reference for "some rules on rules_list refer to
  * it", one for each chunk with pointer to it.
  *
- * chunk is refcounted by embedded fsnotify_mark + .refs (non-zero refcount
- * of watch contributes 1 to .refs).
+ * chunk is refcounted by embedded .refs. Mark associated with the chunk holds
+ * one chunk reference. This reference is dropped either when a mark is going
+ * to be freed (corresponding inode goes away) or when chunk attached to the
+ * mark gets replaced. This reference must be dropped using
+ * audit_mark_put_chunk() to make sure the reference is dropped only after RCU
+ * grace period as it protects RCU readers of the hash table.
  *
  * node.index allows to get from node.list to containing chunk.
  * MSB of that sucker is stolen to mark taggings that we might have to
@@ -73,6 +88,7 @@ static struct task_struct *prune_thread;
  */
 
 static struct fsnotify_group *audit_tree_group;
+static struct kmem_cache *audit_tree_mark_cachep __read_mostly;
 
 static struct audit_tree *alloc_tree(const char *s)
 {
@@ -126,16 +142,14 @@ void audit_put_chunk(struct audit_chunk *chunk)
 		free_chunk(chunk);
 }
 
-static void __put_chunk(struct rcu_head *rcu)
+static inline struct audit_tree_mark *AUDIT_M(struct fsnotify_mark *entry)
 {
-	struct audit_chunk *chunk = container_of(rcu, struct audit_chunk, head);
-	audit_put_chunk(chunk);
+	return container_of(entry, struct audit_tree_mark, fsn_mark);
 }
 
 static void audit_tree_destroy_watch(struct fsnotify_mark *entry)
 {
-	struct audit_chunk *chunk = container_of(entry, struct audit_chunk, mark);
-	call_rcu(&chunk->head, __put_chunk);
+	kmem_cache_free(audit_tree_mark_cachep, entry);
 }
 
 static struct audit_chunk *alloc_chunk(int count)
@@ -157,8 +171,6 @@ static struct audit_chunk *alloc_chunk(int count)
 		INIT_LIST_HEAD(&chunk->owners[i].list);
 		chunk->owners[i].index = i;
 	}
-	fsnotify_init_mark(&chunk->mark, audit_tree_group);
-	chunk->mark.mask = FS_IN_IGNORED;
 	return chunk;
 }
 
@@ -184,8 +196,6 @@ static void insert_hash(struct audit_chunk *chunk)
 {
 	struct list_head *list;
 
-	if (!(chunk->mark.flags & FSNOTIFY_MARK_FLAG_ATTACHED))
-		return;
 	list = chunk_hash(chunk->key);
 	list_add_rcu(&chunk->hash, list);
 }
@@ -228,15 +238,49 @@ static struct audit_chunk *find_chunk(struct node *p)
 	return container_of(p, struct audit_chunk, owners[0]);
 }
 
+static void __put_chunk(struct rcu_head *rcu)
+{
+	struct audit_chunk *chunk = container_of(rcu, struct audit_chunk, head);
+	audit_put_chunk(chunk);
+}
+
+/*
+ * Drop reference to the chunk that was held by the mark. This is the reference
+ * that gets dropped after we've removed the chunk from the hash table and we
+ * use it to make sure chunk cannot be freed before RCU grace period expires.
+ */
+static void audit_mark_put_chunk(struct audit_chunk *chunk)
+{
+	call_rcu(&chunk->head, __put_chunk);
+}
+
+static void replace_mark_chunk(struct fsnotify_mark *entry,
+			       struct audit_chunk *chunk)
+{
+	struct audit_chunk *old;
+
+	assert_spin_locked(&hash_lock);
+	old = AUDIT_M(entry)->chunk;
+	AUDIT_M(entry)->chunk = chunk;
+	if (chunk)
+		chunk->mark = entry;
+	if (old)
+		old->mark = NULL;
+}
+
 static void untag_chunk(struct node *p)
 {
 	struct audit_chunk *chunk = find_chunk(p);
-	struct fsnotify_mark *entry = &chunk->mark;
+	struct fsnotify_mark *entry = chunk->mark;
 	struct audit_chunk *new = NULL;
 	struct audit_tree *owner;
 	int size = chunk->count - 1;
 	int i, j;
 
+	/* Racing with audit_tree_freeing_mark()? */
+	if (!entry)
+		return;
+
 	fsnotify_get_mark(entry);
 
 	spin_unlock(&hash_lock);
@@ -246,29 +290,31 @@ static void untag_chunk(struct node *p)
 
 	mutex_lock(&entry->group->mark_mutex);
 	/*
-	 * mark_mutex protects mark from getting detached and thus also from
-	 * mark->connector->obj getting NULL.
+	 * mark_mutex protects mark stabilizes chunk attached to the mark so we
+	 * can check whether it didn't change while we've dropped hash_lock.
 	 */
-	if (chunk->dead || !(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
+	if (!(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED) ||
+	    AUDIT_M(entry)->chunk != chunk) {
 		mutex_unlock(&entry->group->mark_mutex);
 		if (new)
-			fsnotify_put_mark(&new->mark);
+			kfree(new);
 		goto out;
 	}
 
 	owner = p->owner;
 
 	if (!size) {
-		chunk->dead = 1;
 		spin_lock(&hash_lock);
 		list_del_init(&chunk->trees);
 		if (owner->root == chunk)
 			owner->root = NULL;
 		list_del_init(&p->list);
 		list_del_rcu(&chunk->hash);
+		replace_mark_chunk(entry, NULL);
 		spin_unlock(&hash_lock);
 		fsnotify_detach_mark(entry);
 		mutex_unlock(&entry->group->mark_mutex);
+		audit_mark_put_chunk(chunk);
 		fsnotify_free_mark(entry);
 		goto out;
 	}
@@ -276,15 +322,9 @@ static void untag_chunk(struct node *p)
 	if (!new)
 		goto Fallback;
 
-	if (fsnotify_add_mark_locked(&new->mark, entry->connector->obj,
-				     FSNOTIFY_OBJ_TYPE_INODE, 1)) {
-		fsnotify_put_mark(&new->mark);
-		goto Fallback;
-	}
-
-	chunk->dead = 1;
 	spin_lock(&hash_lock);
 	new->key = chunk->key;
+	replace_mark_chunk(entry, new);
 	list_replace_init(&chunk->trees, &new->trees);
 	if (owner->root == chunk) {
 		list_del_init(&owner->same_root);
@@ -317,10 +357,8 @@ static void untag_chunk(struct node *p)
 	smp_wmb();
 	list_replace_rcu(&chunk->hash, &new->hash);
 	spin_unlock(&hash_lock);
-	fsnotify_detach_mark(entry);
 	mutex_unlock(&entry->group->mark_mutex);
-	fsnotify_free_mark(entry);
-	fsnotify_put_mark(&new->mark);	/* drop initial reference */
+	audit_mark_put_chunk(chunk);
 	goto out;
 
 Fallback:
@@ -340,6 +378,18 @@ static void untag_chunk(struct node *p)
 	spin_lock(&hash_lock);
 }
 
+static struct fsnotify_mark *alloc_fsnotify_mark(void)
+{
+	struct audit_tree_mark *mark;
+
+	mark = kmem_cache_zalloc(audit_tree_mark_cachep, GFP_KERNEL);
+	if (!mark)
+		return NULL;
+	fsnotify_init_mark(&mark->fsn_mark, audit_tree_group);
+	mark->fsn_mark.mask = FS_IN_IGNORED;
+	return &mark->fsn_mark;
+}
+
 /* Call with group->mark_mutex held, releases it */
 static int create_chunk(struct inode *inode, struct audit_tree *tree)
 {
@@ -351,23 +401,31 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
 		return -ENOMEM;
 	}
 
-	entry = &chunk->mark;
+	entry = alloc_fsnotify_mark();
+	if (!entry) {
+		mutex_unlock(&audit_tree_group->mark_mutex);
+		kfree(chunk);
+		return -ENOMEM;
+	}
+
 	if (fsnotify_add_inode_mark_locked(entry, inode, 0)) {
 		mutex_unlock(&audit_tree_group->mark_mutex);
 		fsnotify_put_mark(entry);
+		kfree(chunk);
 		return -ENOSPC;
 	}
 
 	spin_lock(&hash_lock);
 	if (tree->goner) {
 		spin_unlock(&hash_lock);
-		chunk->dead = 1;
 		fsnotify_detach_mark(entry);
 		mutex_unlock(&audit_tree_group->mark_mutex);
 		fsnotify_free_mark(entry);
 		fsnotify_put_mark(entry);
+		kfree(chunk);
 		return 0;
 	}
+	replace_mark_chunk(entry, chunk);
 	chunk->owners[0].index = (1U << 31);
 	chunk->owners[0].owner = tree;
 	get_tree(tree);
@@ -386,34 +444,42 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
 	insert_hash(chunk);
 	spin_unlock(&hash_lock);
 	mutex_unlock(&audit_tree_group->mark_mutex);
-	fsnotify_put_mark(entry);	/* drop initial reference */
+	/*
+	 * Drop our initial reference. When mark we point to is getting freed,
+	 * we get notification through ->freeing_mark callback and cleanup
+	 * chunk pointing to this mark.
+	 */
+	fsnotify_put_mark(entry);
 	return 0;
 }
 
 /* the first tagged inode becomes root of tree */
 static int tag_chunk(struct inode *inode, struct audit_tree *tree)
 {
-	struct fsnotify_mark *old_entry, *chunk_entry;
+	struct fsnotify_mark *entry;
 	struct audit_tree *owner;
 	struct audit_chunk *chunk, *old;
 	struct node *p;
 	int n;
 
 	mutex_lock(&audit_tree_group->mark_mutex);
-	old_entry = fsnotify_find_mark(&inode->i_fsnotify_marks,
-				       audit_tree_group);
-	if (!old_entry)
+	entry = fsnotify_find_mark(&inode->i_fsnotify_marks, audit_tree_group);
+	if (!entry)
 		return create_chunk(inode, tree);
 
-	old = container_of(old_entry, struct audit_chunk, mark);
-
+	/*
+	 * Found mark is guaranteed to be attached and mark_mutex protects mark
+	 * from getting detached and thus it makes sure there is chunk attached
+	 * to the mark.
+	 */
 	/* are we already there? */
 	spin_lock(&hash_lock);
+	old = AUDIT_M(entry)->chunk;
 	for (n = 0; n < old->count; n++) {
 		if (old->owners[n].owner == tree) {
 			spin_unlock(&hash_lock);
 			mutex_unlock(&audit_tree_group->mark_mutex);
-			fsnotify_put_mark(old_entry);
+			fsnotify_put_mark(entry);
 			return 0;
 		}
 	}
@@ -422,44 +488,20 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
 	chunk = alloc_chunk(old->count + 1);
 	if (!chunk) {
 		mutex_unlock(&audit_tree_group->mark_mutex);
-		fsnotify_put_mark(old_entry);
+		fsnotify_put_mark(entry);
 		return -ENOMEM;
 	}
 
-	chunk_entry = &chunk->mark;
-
-	/*
-	 * mark_mutex protects mark from getting detached and thus also from
-	 * mark->connector->obj getting NULL.
-	 */
-	if (!(old_entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
-		/* old_entry is being shot, lets just lie */
-		mutex_unlock(&audit_tree_group->mark_mutex);
-		fsnotify_put_mark(old_entry);
-		fsnotify_put_mark(&chunk->mark);
-		return -ENOENT;
-	}
-
-	if (fsnotify_add_mark_locked(chunk_entry, old_entry->connector->obj,
-				     FSNOTIFY_OBJ_TYPE_INODE, 1)) {
-		mutex_unlock(&audit_tree_group->mark_mutex);
-		fsnotify_put_mark(chunk_entry);
-		fsnotify_put_mark(old_entry);
-		return -ENOSPC;
-	}
-
 	spin_lock(&hash_lock);
 	if (tree->goner) {
 		spin_unlock(&hash_lock);
-		chunk->dead = 1;
-		fsnotify_detach_mark(chunk_entry);
 		mutex_unlock(&audit_tree_group->mark_mutex);
-		fsnotify_free_mark(chunk_entry);
-		fsnotify_put_mark(chunk_entry);
-		fsnotify_put_mark(old_entry);
+		fsnotify_put_mark(entry);
+		kfree(chunk);
 		return 0;
 	}
 	chunk->key = old->key;
+	replace_mark_chunk(entry, chunk);
 	list_replace_init(&old->trees, &chunk->trees);
 	for (n = 0, p = chunk->owners; n < old->count; n++, p++) {
 		struct audit_tree *s = old->owners[n].owner;
@@ -476,7 +518,6 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
 	list_add(&p->list, &tree->chunks);
 	list_for_each_entry(owner, &chunk->trees, same_root)
 		owner->root = chunk;
-	old->dead = 1;
 	if (!tree->root) {
 		tree->root = chunk;
 		list_add(&tree->same_root, &chunk->trees);
@@ -489,11 +530,10 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
 	smp_wmb();
 	list_replace_rcu(&old->hash, &chunk->hash);
 	spin_unlock(&hash_lock);
-	fsnotify_detach_mark(old_entry);
 	mutex_unlock(&audit_tree_group->mark_mutex);
-	fsnotify_free_mark(old_entry);
-	fsnotify_put_mark(chunk_entry);	/* drop initial reference */
-	fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */
+	fsnotify_put_mark(entry); /* pair to fsnotify_find mark_entry */
+	audit_mark_put_chunk(old);
+
 	return 0;
 }
 
@@ -960,10 +1000,6 @@ static void evict_chunk(struct audit_chunk *chunk)
 	int need_prune = 0;
 	int n;
 
-	if (chunk->dead)
-		return;
-
-	chunk->dead = 1;
 	mutex_lock(&audit_filter_mutex);
 	spin_lock(&hash_lock);
 	while (!list_empty(&chunk->trees)) {
@@ -1002,9 +1038,20 @@ static int audit_tree_handle_event(struct fsnotify_group *group,
 
 static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify_group *group)
 {
-	struct audit_chunk *chunk = container_of(entry, struct audit_chunk, mark);
+	struct audit_chunk *chunk;
 
-	evict_chunk(chunk);
+	mutex_lock(&entry->group->mark_mutex);
+	spin_lock(&hash_lock);
+	chunk = AUDIT_M(entry)->chunk;
+	replace_mark_chunk(entry, NULL);
+	spin_unlock(&hash_lock);
+	if (chunk) {
+		mutex_unlock(&entry->group->mark_mutex);
+		evict_chunk(chunk);
+		audit_mark_put_chunk(chunk);
+	} else {
+		mutex_unlock(&entry->group->mark_mutex);
+	}
 
 	/*
 	 * We are guaranteed to have at least one reference to the mark from
@@ -1023,6 +1070,8 @@ static int __init audit_tree_init(void)
 {
 	int i;
 
+	audit_tree_mark_cachep = KMEM_CACHE(audit_tree_mark, SLAB_PANIC);
+
 	audit_tree_group = fsnotify_alloc_group(&audit_tree_ops);
 	if (IS_ERR(audit_tree_group))
 		audit_panic("cannot initialize fsnotify group for rectree watches");
-- 
2.16.4

  parent reply	other threads:[~2018-06-28 19:04 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-06-28 16:40 [PATCH 0/6] audit: Fix various races when tagging and untagging mounts Jan Kara
2018-06-28 16:40 ` [PATCH 1/6] audit_tree: Replace mark->lock locking Jan Kara
2018-06-29 11:31   ` Amir Goldstein
2018-07-03 14:07     ` Jan Kara
2018-06-28 16:40 ` [PATCH 2/6] audit: Fix possible spurious -ENOSPC error Jan Kara
2018-06-29 11:42   ` Amir Goldstein
2018-07-02  6:05   ` Dan Carpenter
2018-07-03 14:18     ` Jan Kara
2018-06-28 16:40 ` [PATCH 3/6] audit: Fix possible tagging failures Jan Kara
2018-06-29 12:05   ` Amir Goldstein
2018-07-03 14:21     ` Jan Kara
2018-07-03 17:42       ` Amir Goldstein
2018-07-04  8:28         ` Jan Kara
2018-06-28 16:40 ` [PATCH 4/6] audit: Embed key into chunk Jan Kara
2018-06-29 12:53   ` Amir Goldstein
2018-07-03 14:25     ` Jan Kara
2018-06-28 16:40 ` [PATCH 5/6] audit: Make hash table insertion safe against concurrent lookups Jan Kara
2018-06-29 13:02   ` Amir Goldstein
2018-07-03 15:31     ` Jan Kara
2018-06-28 16:40 ` Jan Kara [this message]
2018-06-29 13:20   ` [PATCH 6/6] audit: Point to fsnotify mark instead of embedding it Amir Goldstein
2018-07-04 12:34     ` Jan Kara
2018-06-29 11:44 ` [PATCH 0/6] audit: Fix various races when tagging and untagging mounts Amir Goldstein
2018-06-29 18:01   ` Paul Moore
2018-07-03 14:14     ` Jan Kara
2018-07-03 17:03       ` Paul Moore

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180628164014.4925-7-jack@suse.cz \
    --to=jack@suse.cz \
    --cc=linux-audit@redhat.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=paul@paul-moore.com \
    --cc=rgb@redhat.com \
    --cc=viro@ZenIV.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).