linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/3] IMA: move read/write counters into struct inode
@ 2010-10-19  1:16 Eric Paris
  2010-10-19  1:16 ` [PATCH 2/3] IMA: only allocate iint when needed Eric Paris
                   ` (4 more replies)
  0 siblings, 5 replies; 30+ messages in thread
From: Eric Paris @ 2010-10-19  1:16 UTC (permalink / raw)
  To: linux-kernel, linux-security-module, linux-fsdevel
  Cc: hch, zohar, warthog9, david, jmorris, kyle, hpa, akpm, torvalds,
	mingo, eparis, viro

IMA currently alocated an inode integrity structure for every inode in
core.  This stucture is about 120 bytes long.  Most files however
(especially on a system which doesn't make use of IMA) will never need any
of this space.  The problem is that if IMA is enabled we need to know
information about the number of readers and the number of writers for every
inode on the box.  At the moment we collect that information in the per
inode iint structure and waste the rest of the space.  This patch moves those
counters into the struct inode so we can eventually stop allocating an IMA
integrity structure except when absolutely needed.

This patch does the minimum needed to move the location of the data.  Further
cleanups, especially the location of counter updates, may still be possible.

Signed-off-by: Eric Paris <eparis@redhat.com>
---

 fs/inode.c                        |    2 ++
 include/linux/fs.h                |    5 ++++
 include/linux/ima.h               |    5 ++++
 security/integrity/ima/ima.h      |    3 --
 security/integrity/ima/ima_iint.c |   32 +++++++++++---------------
 security/integrity/ima/ima_main.c |   46 +++++++++++++++++++++----------------
 6 files changed, 52 insertions(+), 41 deletions(-)

diff --git a/fs/inode.c b/fs/inode.c
index 8646433..5d7ce1e 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -24,6 +24,7 @@
 #include <linux/mount.h>
 #include <linux/async.h>
 #include <linux/posix_acl.h>
+#include <linux/ima.h>
 
 /*
  * This is needed for the following functions:
@@ -224,6 +225,7 @@ static struct inode *alloc_inode(struct super_block *sb)
 void __destroy_inode(struct inode *inode)
 {
 	BUG_ON(inode_has_buffers(inode));
+	ima_check_counters(inode);
 	security_inode_free(inode);
 	fsnotify_inode_delete(inode);
 #ifdef CONFIG_FS_POSIX_ACL
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5fb4dfd..7ec91b5 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -776,6 +776,11 @@ struct inode {
 #ifdef CONFIG_SECURITY
 	void			*i_security;
 #endif
+#ifdef CONFIG_IMA
+	unsigned long		readcount; /* all protected by i_mutex */
+	unsigned long		writecount;
+	unsigned long		opencount;
+#endif
 #ifdef CONFIG_FS_POSIX_ACL
 	struct posix_acl	*i_acl;
 	struct posix_acl	*i_default_acl;
diff --git a/include/linux/ima.h b/include/linux/ima.h
index 975837e..ad90984 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -21,6 +21,7 @@ extern int ima_file_check(struct file *file, int mask);
 extern void ima_file_free(struct file *file);
 extern int ima_file_mmap(struct file *file, unsigned long prot);
 extern void ima_counts_get(struct file *file);
+extern void ima_check_counters(struct inode *inode);
 
 #else
 static inline int ima_bprm_check(struct linux_binprm *bprm)
@@ -58,5 +59,9 @@ static inline void ima_counts_get(struct file *file)
 	return;
 }
 
+static inline void ima_check_counters(struct inode *inode);
+{
+	return;
+}
 #endif /* CONFIG_IMA_H */
 #endif /* _LINUX_IMA_H */
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index 3fbcd1d..e500fe3 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -104,9 +104,6 @@ struct ima_iint_cache {
 	unsigned long flags;
 	u8 digest[IMA_DIGEST_SIZE];
 	struct mutex mutex;	/* protects: version, flags, digest */
-	long readcount;		/* measured files readcount */
-	long writecount;	/* measured files writecount */
-	long opencount;		/* opens reference count */
 	struct kref refcount;	/* ima_iint_cache reference count */
 	struct rcu_head rcu;
 };
diff --git a/security/integrity/ima/ima_iint.c b/security/integrity/ima/ima_iint.c
index afba4ae..8f98a48 100644
--- a/security/integrity/ima/ima_iint.c
+++ b/security/integrity/ima/ima_iint.c
@@ -73,6 +73,20 @@ out:
 	return rc;
 }
 
+void ima_check_counters(struct inode *inode)
+{
+	if (inode->readcount)
+		printk(KERN_INFO "%s: readcount: %ld\n", __func__, inode->readcount);
+	if (inode->writecount)
+		printk(KERN_INFO "%s: writecount: %ld\n", __func__, inode->writecount);
+	if (inode->opencount)
+		printk(KERN_INFO "%s: opencount: %ld\n", __func__, inode->opencount);
+
+	inode->readcount = 0;
+	inode->writecount = 0;
+	inode->opencount = 0;
+}
+
 /* iint_free - called when the iint refcount goes to zero */
 void iint_free(struct kref *kref)
 {
@@ -80,21 +94,6 @@ void iint_free(struct kref *kref)
 						   refcount);
 	iint->version = 0;
 	iint->flags = 0UL;
-	if (iint->readcount != 0) {
-		printk(KERN_INFO "%s: readcount: %ld\n", __func__,
-		       iint->readcount);
-		iint->readcount = 0;
-	}
-	if (iint->writecount != 0) {
-		printk(KERN_INFO "%s: writecount: %ld\n", __func__,
-		       iint->writecount);
-		iint->writecount = 0;
-	}
-	if (iint->opencount != 0) {
-		printk(KERN_INFO "%s: opencount: %ld\n", __func__,
-		       iint->opencount);
-		iint->opencount = 0;
-	}
 	kref_init(&iint->refcount);
 	kmem_cache_free(iint_cache, iint);
 }
@@ -131,9 +130,6 @@ static void init_once(void *foo)
 	iint->version = 0;
 	iint->flags = 0UL;
 	mutex_init(&iint->mutex);
-	iint->readcount = 0;
-	iint->writecount = 0;
-	iint->opencount = 0;
 	kref_init(&iint->refcount);
 }
 
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index e662b89..814fc84 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -97,18 +97,19 @@ out:
  */
 enum iint_pcr_error { TOMTOU, OPEN_WRITERS };
 static void ima_read_write_check(enum iint_pcr_error error,
-				 struct ima_iint_cache *iint,
 				 struct inode *inode,
 				 const unsigned char *filename)
 {
+	BUG_ON(!mutex_is_locked(&inode->i_mutex));
+
 	switch (error) {
 	case TOMTOU:
-		if (iint->readcount > 0)
+		if (inode->readcount > 0)
 			ima_add_violation(inode, filename, "invalid_pcr",
 					  "ToMToU");
 		break;
 	case OPEN_WRITERS:
-		if (iint->writecount > 0)
+		if (inode->writecount > 0)
 			ima_add_violation(inode, filename, "invalid_pcr",
 					  "open_writers");
 		break;
@@ -118,15 +119,15 @@ static void ima_read_write_check(enum iint_pcr_error error,
 /*
  * Update the counts given an fmode_t
  */
-static void ima_inc_counts(struct ima_iint_cache *iint, fmode_t mode)
+static void ima_inc_counts(struct inode *inode, fmode_t mode)
 {
-	BUG_ON(!mutex_is_locked(&iint->mutex));
+	BUG_ON(!mutex_is_locked(&inode->i_mutex));
 
-	iint->opencount++;
+	inode->opencount++;
 	if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
-		iint->readcount++;
+		inode->readcount++;
 	if (mode & FMODE_WRITE)
-		iint->writecount++;
+		inode->writecount++;
 }
 
 /*
@@ -154,6 +155,7 @@ void ima_counts_get(struct file *file)
 	if (!iint)
 		return;
 	mutex_lock(&iint->mutex);
+	mutex_lock(&inode->i_mutex);
 	if (!ima_initialized)
 		goto out;
 	rc = ima_must_measure(iint, inode, MAY_READ, FILE_CHECK);
@@ -161,12 +163,13 @@ void ima_counts_get(struct file *file)
 		goto out;
 
 	if (mode & FMODE_WRITE) {
-		ima_read_write_check(TOMTOU, iint, inode, dentry->d_name.name);
+		ima_read_write_check(TOMTOU, inode, dentry->d_name.name);
 		goto out;
 	}
-	ima_read_write_check(OPEN_WRITERS, iint, inode, dentry->d_name.name);
+	ima_read_write_check(OPEN_WRITERS, inode, dentry->d_name.name);
 out:
-	ima_inc_counts(iint, file->f_mode);
+	ima_inc_counts(inode, file->f_mode);
+	mutex_unlock(&inode->i_mutex);
 	mutex_unlock(&iint->mutex);
 
 	kref_put(&iint->refcount, iint_free);
@@ -180,25 +183,26 @@ static void ima_dec_counts(struct ima_iint_cache *iint, struct inode *inode,
 {
 	mode_t mode = file->f_mode;
 	BUG_ON(!mutex_is_locked(&iint->mutex));
+	BUG_ON(!mutex_is_locked(&inode->i_mutex));
 
-	iint->opencount--;
+	inode->opencount--;
 	if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
-		iint->readcount--;
+		inode->readcount--;
 	if (mode & FMODE_WRITE) {
-		iint->writecount--;
-		if (iint->writecount == 0) {
+		inode->writecount--;
+		if (inode->writecount == 0) {
 			if (iint->version != inode->i_version)
 				iint->flags &= ~IMA_MEASURED;
 		}
 	}
 
-	if (((iint->opencount < 0) ||
-	     (iint->readcount < 0) ||
-	     (iint->writecount < 0)) &&
+	if (((inode->opencount < 0) ||
+	     (inode->readcount < 0) ||
+	     (inode->writecount < 0)) &&
 	    !ima_limit_imbalance(file)) {
 		printk(KERN_INFO "%s: open/free imbalance (r:%ld w:%ld o:%ld)\n",
-		       __func__, iint->readcount, iint->writecount,
-		       iint->opencount);
+		       __func__, inode->readcount, inode->writecount,
+		       inode->opencount);
 		dump_stack();
 	}
 }
@@ -222,8 +226,10 @@ void ima_file_free(struct file *file)
 		return;
 
 	mutex_lock(&iint->mutex);
+	mutex_lock(&inode->i_mutex);
 	ima_dec_counts(iint, inode, file);
 	mutex_unlock(&iint->mutex);
+	mutex_unlock(&inode->i_mutex);
 	kref_put(&iint->refcount, iint_free);
 }
 


^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 2/3] IMA: only allocate iint when needed
  2010-10-19  1:16 [PATCH 1/3] IMA: move read/write counters into struct inode Eric Paris
@ 2010-10-19  1:16 ` Eric Paris
  2010-10-19  1:17 ` [PATCH 3/3] IMA: use rbtree instead of radix tree for inode information cache Eric Paris
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 30+ messages in thread
From: Eric Paris @ 2010-10-19  1:16 UTC (permalink / raw)
  To: linux-kernel, linux-security-module, linux-fsdevel
  Cc: hch, zohar, warthog9, david, jmorris, kyle, hpa, akpm, torvalds,
	mingo, eparis, viro

IMA always allocates an integrity structure to hold information about every
inode, but only needed this structure to tract the number of readers and
writers currently accessing a given inode.  Since that information was moved
into struct inode instead of the integrity struct this patch stops allocating
the integrity stucture until it is needed.  Thurs greatly reducing memory
usage.

Signed-off-by: Eric Paris <eparis@redhat.com>
---

 include/linux/ima.h               |    6 --
 security/integrity/ima/ima.h      |    1 
 security/integrity/ima/ima_api.c  |    2 -
 security/integrity/ima/ima_iint.c |    4 --
 security/integrity/ima/ima_main.c |   91 ++++++++++++++++++++++++++-----------
 security/security.c               |    4 --
 6 files changed, 67 insertions(+), 41 deletions(-)

diff --git a/include/linux/ima.h b/include/linux/ima.h
index ad90984..4359f28 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -15,7 +15,6 @@ struct linux_binprm;
 
 #ifdef CONFIG_IMA
 extern int ima_bprm_check(struct linux_binprm *bprm);
-extern int ima_inode_alloc(struct inode *inode);
 extern void ima_inode_free(struct inode *inode);
 extern int ima_file_check(struct file *file, int mask);
 extern void ima_file_free(struct file *file);
@@ -29,11 +28,6 @@ static inline int ima_bprm_check(struct linux_binprm *bprm)
 	return 0;
 }
 
-static inline int ima_inode_alloc(struct inode *inode)
-{
-	return 0;
-}
-
 static inline void ima_inode_free(struct inode *inode)
 {
 	return;
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index e500fe3..0767717 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -70,6 +70,7 @@ int ima_init(void);
 void ima_cleanup(void);
 int ima_fs_init(void);
 void ima_fs_cleanup(void);
+int ima_inode_alloc(struct inode *inode);
 int ima_add_template_entry(struct ima_template_entry *entry, int violation,
 			   const char *op, struct inode *inode);
 int ima_calc_hash(struct file *file, char *digest);
diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c
index 52015d0..44f779f 100644
--- a/security/integrity/ima/ima_api.c
+++ b/security/integrity/ima/ima_api.c
@@ -116,7 +116,7 @@ int ima_must_measure(struct ima_iint_cache *iint, struct inode *inode,
 {
 	int must_measure;
 
-	if (iint->flags & IMA_MEASURED)
+	if (iint && (iint->flags & IMA_MEASURED))
 		return 1;
 
 	must_measure = ima_match_policy(inode, function, mask);
diff --git a/security/integrity/ima/ima_iint.c b/security/integrity/ima/ima_iint.c
index 8f98a48..1aff8b9 100644
--- a/security/integrity/ima/ima_iint.c
+++ b/security/integrity/ima/ima_iint.c
@@ -126,9 +126,7 @@ static void init_once(void *foo)
 {
 	struct ima_iint_cache *iint = foo;
 
-	memset(iint, 0, sizeof *iint);
-	iint->version = 0;
-	iint->flags = 0UL;
+	memset(iint, 0, sizeof(*iint));
 	mutex_init(&iint->mutex);
 	kref_init(&iint->refcount);
 }
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 814fc84..a09d975 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -146,19 +146,17 @@ void ima_counts_get(struct file *file)
 	struct dentry *dentry = file->f_path.dentry;
 	struct inode *inode = dentry->d_inode;
 	fmode_t mode = file->f_mode;
-	struct ima_iint_cache *iint;
 	int rc;
 
 	if (!iint_initialized || !S_ISREG(inode->i_mode))
 		return;
-	iint = ima_iint_find_get(inode);
-	if (!iint)
-		return;
-	mutex_lock(&iint->mutex);
+
 	mutex_lock(&inode->i_mutex);
+
 	if (!ima_initialized)
 		goto out;
-	rc = ima_must_measure(iint, inode, MAY_READ, FILE_CHECK);
+
+	rc = ima_must_measure(NULL, inode, MAY_READ, FILE_CHECK);
 	if (rc < 0)
 		goto out;
 
@@ -170,31 +168,22 @@ void ima_counts_get(struct file *file)
 out:
 	ima_inc_counts(inode, file->f_mode);
 	mutex_unlock(&inode->i_mutex);
-	mutex_unlock(&iint->mutex);
-
-	kref_put(&iint->refcount, iint_free);
 }
 
 /*
  * Decrement ima counts
  */
-static void ima_dec_counts(struct ima_iint_cache *iint, struct inode *inode,
-			   struct file *file)
+static void ima_dec_counts(struct inode *inode, struct file *file)
 {
 	mode_t mode = file->f_mode;
-	BUG_ON(!mutex_is_locked(&iint->mutex));
+
 	BUG_ON(!mutex_is_locked(&inode->i_mutex));
 
 	inode->opencount--;
 	if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
 		inode->readcount--;
-	if (mode & FMODE_WRITE) {
+	if (mode & FMODE_WRITE)
 		inode->writecount--;
-		if (inode->writecount == 0) {
-			if (iint->version != inode->i_version)
-				iint->flags &= ~IMA_MEASURED;
-		}
-	}
 
 	if (((inode->opencount < 0) ||
 	     (inode->readcount < 0) ||
@@ -207,6 +196,45 @@ static void ima_dec_counts(struct ima_iint_cache *iint, struct inode *inode,
 	}
 }
 
+static void ima_check_last_writer(struct ima_iint_cache *iint,
+				  struct inode *inode,
+				  struct file *file)
+{
+	mode_t mode = file->f_mode;
+
+	BUG_ON(!mutex_is_locked(&iint->mutex));
+	BUG_ON(!mutex_is_locked(&inode->i_mutex));
+
+	if (mode & FMODE_WRITE &&
+	    inode->writecount == 0 &&
+	    iint->version != inode->i_version)
+		iint->flags &= ~IMA_MEASURED;
+}
+
+static void ima_file_free_iint(struct ima_iint_cache *iint, struct inode *inode,
+			       struct file *file)
+{
+	mutex_lock(&iint->mutex);
+	mutex_lock(&inode->i_mutex);
+
+	ima_dec_counts(inode, file);
+	ima_check_last_writer(iint, inode, file);
+
+	mutex_unlock(&inode->i_mutex);
+	mutex_unlock(&iint->mutex);
+
+	kref_put(&iint->refcount, iint_free);
+}
+
+static void ima_file_free_noiint(struct inode *inode, struct file *file)
+{
+	mutex_lock(&inode->i_mutex);
+
+	ima_dec_counts(inode, file);
+
+	mutex_unlock(&inode->i_mutex);
+}
+
 /**
  * ima_file_free - called on __fput()
  * @file: pointer to file structure being freed
@@ -222,15 +250,12 @@ void ima_file_free(struct file *file)
 	if (!iint_initialized || !S_ISREG(inode->i_mode))
 		return;
 	iint = ima_iint_find_get(inode);
-	if (!iint)
-		return;
 
-	mutex_lock(&iint->mutex);
-	mutex_lock(&inode->i_mutex);
-	ima_dec_counts(iint, inode, file);
-	mutex_unlock(&iint->mutex);
-	mutex_unlock(&inode->i_mutex);
-	kref_put(&iint->refcount, iint_free);
+	if (iint)
+		ima_file_free_iint(iint, inode, file);
+	else
+		ima_file_free_noiint(inode, file);
+
 }
 
 static int process_measurement(struct file *file, const unsigned char *filename,
@@ -242,11 +267,21 @@ static int process_measurement(struct file *file, const unsigned char *filename,
 
 	if (!ima_initialized || !S_ISREG(inode->i_mode))
 		return 0;
+
+	rc = ima_must_measure(NULL, inode, mask, function);
+	if (rc != 0)
+		return rc;
+retry:
 	iint = ima_iint_find_get(inode);
-	if (!iint)
-		return -ENOMEM;
+	if (!iint) {
+		rc = ima_inode_alloc(inode);
+		if (!rc || rc == -EEXIST)
+			goto retry;
+		return rc;
+	}
 
 	mutex_lock(&iint->mutex);
+
 	rc = ima_must_measure(iint, inode, mask, function);
 	if (rc != 0)
 		goto out;
diff --git a/security/security.c b/security/security.c
index ad5ca29..04c098d 100644
--- a/security/security.c
+++ b/security/security.c
@@ -331,9 +331,7 @@ int security_inode_alloc(struct inode *inode)
 	ret =  security_ops->inode_alloc_security(inode);
 	if (ret)
 		return ret;
-	ret = ima_inode_alloc(inode);
-	if (ret)
-		security_inode_free(inode);
+
 	return ret;
 }
 


^ permalink raw reply related	[flat|nested] 30+ messages in thread

* [PATCH 3/3] IMA: use rbtree instead of radix tree for inode information cache
  2010-10-19  1:16 [PATCH 1/3] IMA: move read/write counters into struct inode Eric Paris
  2010-10-19  1:16 ` [PATCH 2/3] IMA: only allocate iint when needed Eric Paris
@ 2010-10-19  1:17 ` Eric Paris
  2010-10-19  1:30 ` [PATCH 1/3] IMA: move read/write counters into struct inode Christoph Hellwig
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 30+ messages in thread
From: Eric Paris @ 2010-10-19  1:17 UTC (permalink / raw)
  To: linux-kernel, linux-security-module, linux-fsdevel
  Cc: hch, zohar, warthog9, david, jmorris, kyle, hpa, akpm, torvalds,
	mingo, eparis, viro

The IMA code needs to store the number of tasks which have an open fd
granting permission to write a file even when IMA is not in use.  It needs
this information in order to be enabled at a later point in time without
losing it's integrity garantees.  At the moment that means we store a
little bit of data about every inode in a cache.  We use a radix tree key'd
on the inode's memory address.  Dave Chinner pointed out that a radix tree
is a terrible data structure for such a sparse key space.  This patch
switches to using an rbtree which should be more efficient.

Bug report from Dave:

 I just noticed that slabtop
was reportingi an awfully high usage of radix tree nodes:

 OBJS ACTIVE  USE OBJ SIZE  SLABS OBJ/SLAB CACHE SIZE NAME
4200331 2778082  66%    0.55K 144839       29   2317424K radix_tree_node
2321500 2060290  88%    1.00K  72581       32   2322592K xfs_inode
2235648 2069791  92%    0.12K  69864       32    279456K iint_cache

That is, 2.7M radix tree nodes are allocated, and the cache itself
is consuming 2.3GB of RAM. I know that the XFS inodei caches are
indexed by radix tree node, but for 2 million cached inodes that
would mean a density of 1 inode per radix tree node, which for a
system with 16M inodes in the filsystems is an impossibly low
density. The worst I've seen in a production system like kernel.org
is about 20-25% density, which would mean about 150−200k radix tree
nodes for that many inodes. So it's not the inode cache.

So I looked up what the iint_cache was. It appears to used for storing
per-inode IMA information, and uses a radix tree for indexing.
It uses the *address* of the struct inode as the indexing key. That
means the key space is extremely sparse - for XFS the struct inode
addresses are approximately 1000 bytes apart, which means the
closest the radix tree index keys get is ~1000. Which means
that there is a single entry per radix tree leaf node, so the radix
tree is using roughly 550 bytes for every 120byte structure being
cached. For the above example, it's probably wasting close to 1GB of
RAM....

Reported-by: Dave Chinner <david@fromorbit.com>
Signed-off-by: Eric Paris <eparis@redhat.com>
---

 security/integrity/ima/ima.h      |    4 +-
 security/integrity/ima/ima_iint.c |   86 +++++++++++++++++++++++++++----------
 2 files changed, 66 insertions(+), 24 deletions(-)

diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index 0767717..386026a 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -101,6 +101,8 @@ static inline unsigned long ima_hash_key(u8 *digest)
 
 /* integrity data associated with an inode */
 struct ima_iint_cache {
+	struct rb_node rb_node; /* rooted in ima_iint_tree */
+	struct inode *inode;	/* back pointer to inode in question */
 	u64 version;		/* track inode changes */
 	unsigned long flags;
 	u8 digest[IMA_DIGEST_SIZE];
@@ -120,7 +122,7 @@ int ima_store_template(struct ima_template_entry *entry, int violation,
 void ima_template_show(struct seq_file *m, void *e,
 		       enum ima_show_type show);
 
-/* radix tree calls to lookup, insert, delete
+/* rbtree tree calls to lookup, insert, delete
  * integrity data associated with an inode.
  */
 struct ima_iint_cache *ima_iint_insert(struct inode *inode);
diff --git a/security/integrity/ima/ima_iint.c b/security/integrity/ima/ima_iint.c
index 1aff8b9..f9f3b46 100644
--- a/security/integrity/ima/ima_iint.c
+++ b/security/integrity/ima/ima_iint.c
@@ -12,21 +12,48 @@
  * File: ima_iint.c
  * 	- implements the IMA hooks: ima_inode_alloc, ima_inode_free
  *	- cache integrity information associated with an inode
- *	  using a radix tree.
+ *	  using a rbtree tree.
  */
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/spinlock.h>
-#include <linux/radix-tree.h>
+#include <linux/rbtree.h>
 #include "ima.h"
 
-RADIX_TREE(ima_iint_store, GFP_ATOMIC);
+static struct rb_root ima_iint_tree = RB_ROOT;
 DEFINE_SPINLOCK(ima_iint_lock);
 static struct kmem_cache *iint_cache __read_mostly;
 
 int iint_initialized = 0;
 
-/* ima_iint_find_get - return the iint associated with an inode
+/*
+ * __ima_iint_find - return the iint associated with an inode
+ *
+ * The caller must hold either the rcu_read_lock or the ima_iint_lock
+ */
+static struct ima_iint_cache *__ima_iint_find(struct inode *inode)
+{
+	struct ima_iint_cache *iint;
+	struct rb_node *n = ima_iint_tree.rb_node;
+
+	while (n) {
+		iint = rb_entry(n, struct ima_iint_cache, rb_node);
+
+		if (inode < iint->inode)
+			n = n->rb_left;
+		else if (inode > iint->inode)
+			n = n->rb_right;
+		else
+			break;
+	}
+	if (!n)
+		return NULL;
+
+	return iint;
+}
+
+/*
+ * ima_iint_find_get - return the iint associated with an inode
  *
  * ima_iint_find_get gets a reference to the iint. Caller must
  * remember to put the iint reference.
@@ -36,12 +63,11 @@ struct ima_iint_cache *ima_iint_find_get(struct inode *inode)
 	struct ima_iint_cache *iint;
 
 	rcu_read_lock();
-	iint = radix_tree_lookup(&ima_iint_store, (unsigned long)inode);
-	if (!iint)
-		goto out;
-	kref_get(&iint->refcount);
-out:
+	iint = __ima_iint_find(inode);
+	if (iint)
+		kref_get(&iint->refcount);
 	rcu_read_unlock();
+
 	return iint;
 }
 
@@ -51,26 +77,38 @@ out:
  */
 int ima_inode_alloc(struct inode *inode)
 {
-	struct ima_iint_cache *iint = NULL;
-	int rc = 0;
+	struct rb_node **p;
+	struct rb_node *new_node, *parent = NULL;
+	struct ima_iint_cache *new_iint, *test_iint;
 
-	iint = kmem_cache_alloc(iint_cache, GFP_NOFS);
-	if (!iint)
+	new_iint = kmem_cache_alloc(iint_cache, GFP_NOFS);
+	if (!new_iint)
 		return -ENOMEM;
 
-	rc = radix_tree_preload(GFP_NOFS);
-	if (rc < 0)
-		goto out;
+	new_iint->inode = inode;
+	new_node = &new_iint->rb_node;
 
 	spin_lock(&ima_iint_lock);
-	rc = radix_tree_insert(&ima_iint_store, (unsigned long)inode, iint);
+
+	p = &ima_iint_tree.rb_node;
+	while (*p) {
+		parent = *p;
+		test_iint = rb_entry(parent, struct ima_iint_cache, rb_node);
+
+		if (inode < test_iint->inode)
+			p = &(*p)->rb_left;
+		else if (inode > test_iint->inode)
+			p = &(*p)->rb_right;
+		else
+			BUG();
+	}
+
+	rb_link_node(new_node, parent, p);
+	rb_insert_color(new_node, &ima_iint_tree);
+
 	spin_unlock(&ima_iint_lock);
-	radix_tree_preload_end();
-out:
-	if (rc < 0)
-		kmem_cache_free(iint_cache, iint);
 
-	return rc;
+	return 0;
 }
 
 void ima_check_counters(struct inode *inode)
@@ -116,7 +154,9 @@ void ima_inode_free(struct inode *inode)
 	struct ima_iint_cache *iint;
 
 	spin_lock(&ima_iint_lock);
-	iint = radix_tree_delete(&ima_iint_store, (unsigned long)inode);
+	iint = __ima_iint_find(inode);
+	if (iint)
+		rb_erase(&iint->rb_node, &ima_iint_tree);
 	spin_unlock(&ima_iint_lock);
 	if (iint)
 		call_rcu(&iint->rcu, iint_rcu_free);

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/3] IMA: move read/write counters into struct inode
  2010-10-19  1:16 [PATCH 1/3] IMA: move read/write counters into struct inode Eric Paris
  2010-10-19  1:16 ` [PATCH 2/3] IMA: only allocate iint when needed Eric Paris
  2010-10-19  1:17 ` [PATCH 3/3] IMA: use rbtree instead of radix tree for inode information cache Eric Paris
@ 2010-10-19  1:30 ` Christoph Hellwig
  2010-10-19  2:14   ` Eric Paris
  2010-10-19  2:46 ` Eric Paris
  2010-10-19 15:52 ` Linus Torvalds
  4 siblings, 1 reply; 30+ messages in thread
From: Christoph Hellwig @ 2010-10-19  1:30 UTC (permalink / raw)
  To: Eric Paris
  Cc: linux-kernel, linux-security-module, linux-fsdevel, hch, zohar,
	warthog9, david, jmorris, kyle, hpa, akpm, torvalds, mingo, viro


I do not like this at all.   It bloats the inode with three unsigned
long values for a feature no sane person would ever use.  And given
that distros are sweet-talked by IBM to enable it the world will pay
a huge penality for those 0.5% of the userbase that use IMA.

Please reorder your series to have patch to disable IMA unless
explicitly enabled on the kernel command line first, and then second
use the rbtree from your last patch.


^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/3] IMA: move read/write counters into struct inode
  2010-10-19  1:30 ` [PATCH 1/3] IMA: move read/write counters into struct inode Christoph Hellwig
@ 2010-10-19  2:14   ` Eric Paris
  2010-10-19  7:39     ` Dave Chinner
  2010-10-19  8:39     ` Ingo Molnar
  0 siblings, 2 replies; 30+ messages in thread
From: Eric Paris @ 2010-10-19  2:14 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: linux-kernel, linux-security-module, linux-fsdevel, zohar,
	warthog9, david, jmorris, kyle, hpa, akpm, torvalds, mingo, viro

On Mon, 2010-10-18 at 21:30 -0400, Christoph Hellwig wrote:
> I do not like this at all.   It bloats the inode with three unsigned
> long values for a feature no sane person would ever use.  And given
> that distros are sweet-talked by IBM to enable it the world will pay
> a huge penality for those 0.5% of the userbase that use IMA.
> 
> Please reorder your series to have patch to disable IMA unless
> explicitly enabled on the kernel command line first, and then second
> use the rbtree from your last patch.

More cryptic command line options and complexity is not the solution.  I
have no plans to send (a fixed/"working" version of) Kyle's patch which
would cause a userspace regression since working machines would
magically stop working.

The right solution is to provide features without unreasonably impacting
those who do not want those features.  At the moment my patch series
reduces memory usage by a factor of at least 40 and eliminates the
locking contention and serialization of bringing inodes into and out of
core.  It does so without introducing ANY additional overhead or
complexity.

If there is a general consensus that 24 bytes per inode is too large we
can move forward from here and drop the 'opencount' and save 8 bytes
(while eliminating the debugging and verification this code has helped
to provide in the past)

If there is a general consensus that 16 bytes per inode is too large we
can travel down the much more complex route of attempting to collect
this information dynamically by freezing userspace, scanning every open
file, and calculating the information.  We would then have a 2 stage
integrity structure, the first with just counters and the second would
contain integrity information if needed.

I'm willing to add those next 2 steps to my todo list if there is a
consensus that the situation at hand is untenable, but those cannot be
my highest priority.  We are talking about a feature which has been
enabled without massive complaint (aside from having bugs fixed in the
kernel, in IMA, and in out of tree modules) in Fedora kernels for over a
year.  Obviously I think Dave's report is a big deal.  Things were
really really wrong, and I'm willing to fix it when things are wrong,
but eventually I hit the point of diminishing returns.

-Eric


^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/3] IMA: move read/write counters into struct inode
  2010-10-19  1:16 [PATCH 1/3] IMA: move read/write counters into struct inode Eric Paris
                   ` (2 preceding siblings ...)
  2010-10-19  1:30 ` [PATCH 1/3] IMA: move read/write counters into struct inode Christoph Hellwig
@ 2010-10-19  2:46 ` Eric Paris
  2010-10-19 15:52 ` Linus Torvalds
  4 siblings, 0 replies; 30+ messages in thread
From: Eric Paris @ 2010-10-19  2:46 UTC (permalink / raw)
  To: linux-kernel
  Cc: linux-security-module, linux-fsdevel, hch, zohar, warthog9, david,
	jmorris, kyle, hpa, akpm, torvalds, mingo, viro

On Mon, 2010-10-18 at 21:16 -0400, Eric Paris wrote:
> IMA currently alocated an inode integrity structure for every inode in
> core.  This stucture is about 120 bytes long.  Most files however
> (especially on a system which doesn't make use of IMA) will never need any
> of this space.  The problem is that if IMA is enabled we need to know
> information about the number of readers and the number of writers for every
> inode on the box.  At the moment we collect that information in the per
> inode iint structure and waste the rest of the space.  This patch moves those
> counters into the struct inode so we can eventually stop allocating an IMA
> integrity structure except when absolutely needed.
> 
> This patch does the minimum needed to move the location of the data.  Further
> cleanups, especially the location of counter updates, may still be possible.
> 
> Signed-off-by: Eric Paris <eparis@redhat.com>

Noone should apply this, it doesn't build on !CONFIG_IMA.  Notice my
extra ; on the end of the line   :(

> +static inline void ima_check_counters(struct inode *inode);
> +{
> +	return;
> +}
>  #endif /* CONFIG_IMA_H */
>  #endif /* _LINUX_IMA_H */



^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/3] IMA: move read/write counters into struct inode
  2010-10-19  2:14   ` Eric Paris
@ 2010-10-19  7:39     ` Dave Chinner
  2010-10-19 16:24       ` Eric Paris
  2010-10-19  8:39     ` Ingo Molnar
  1 sibling, 1 reply; 30+ messages in thread
From: Dave Chinner @ 2010-10-19  7:39 UTC (permalink / raw)
  To: Eric Paris
  Cc: Christoph Hellwig, linux-kernel, linux-security-module,
	linux-fsdevel, zohar, warthog9, jmorris, kyle, hpa, akpm,
	torvalds, mingo, viro

On Mon, Oct 18, 2010 at 10:14:03PM -0400, Eric Paris wrote:
> On Mon, 2010-10-18 at 21:30 -0400, Christoph Hellwig wrote:
> > I do not like this at all.   It bloats the inode with three unsigned
> > long values for a feature no sane person would ever use.  And given
> > that distros are sweet-talked by IBM to enable it the world will pay
> > a huge penality for those 0.5% of the userbase that use IMA.
> > 
> > Please reorder your series to have patch to disable IMA unless
> > explicitly enabled on the kernel command line first, and then second
> > use the rbtree from your last patch.
> 
> More cryptic command line options and complexity is not the solution.  I
> have no plans to send (a fixed/"working" version of) Kyle's patch which
> would cause a userspace regression since working machines would
> magically stop working.
> 
> The right solution is to provide features without unreasonably impacting
> those who do not want those features.  At the moment my patch series
> reduces memory usage by a factor of at least 40 and eliminates the
> locking contention and serialization of bringing inodes into and out of
> core.  It does so without introducing ANY additional overhead or
> complexity.
> 
> If there is a general consensus that 24 bytes per inode is too large we
> can move forward from here and drop the 'opencount' and save 8 bytes
> (while eliminating the debugging and verification this code has helped
> to provide in the past)

Eric, just to put that in context - changing the size of an inode
needs to be conidered carefully because we cache so many of them. We
often jump through hoops just to reduce it by 4 or 8 bytes. You are
proposing to increase it by 24 bytes (roughly 5%) and as such that
_should_ be considered a big deal, especially for something that is
currently rarely used.

Personally I that adding a pointer into the struct inode is as much
as I'd want to compromise to. Those that want to use IMA or have the
possibility of turning it on dynamicaly can accept the additional
overhead of another memory allocation during inode allocation as the
cost of using this functionality.  That's the way the security
subsystem works, so I don't see any problems with doing this for IMA
and it turns the overhead problem into one that only affects those
that have it both configured and enabled.  That seems like a
reasonable compromise to me....

Cheers,

Dave.
-- 
Dave Chinner
david@fromorbit.com

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/3] IMA: move read/write counters into struct inode
  2010-10-19  2:14   ` Eric Paris
  2010-10-19  7:39     ` Dave Chinner
@ 2010-10-19  8:39     ` Ingo Molnar
  1 sibling, 0 replies; 30+ messages in thread
From: Ingo Molnar @ 2010-10-19  8:39 UTC (permalink / raw)
  To: Eric Paris
  Cc: Christoph Hellwig, linux-kernel, linux-security-module,
	linux-fsdevel, zohar, warthog9, david, jmorris, kyle, hpa, akpm,
	torvalds, viro


* Eric Paris <eparis@redhat.com> wrote:

> More cryptic command line options and complexity is not the solution.  I have no 
> plans to send (a fixed/"working" version of) Kyle's patch which would cause a 
> userspace regression since working machines would magically stop working.

That is a valid argument.

> The right solution is to provide features without unreasonably impacting those who 
> do not want those features.  At the moment my patch series reduces memory usage by 
> a factor of at least 40 and eliminates the locking contention and serialization of 
> bringing inodes into and out of core.  It does so without introducing ANY 
> additional overhead or complexity.

And that argument is missing the point IMO. Christoph is perfectly correct to 
observe:

  It bloats the inode with three unsigned long values for a feature no sane person
  would ever use.

that's 24 bytes per inode - when in the past we've made _huge_ efforts to minimally 
shrink it by a few bytes.

> If there is a general consensus that 24 bytes per inode is too large we can move 
> forward from here and drop the 'opencount' and save 8 bytes (while eliminating the 
> debugging and verification this code has helped to provide in the past)
> 
> If there is a general consensus that 16 bytes per inode is too large we can travel 
> down the much more complex route of attempting to collect this information 
> dynamically by freezing userspace, scanning every open file, and calculating the 
> information.  We would then have a 2 stage integrity structure, the first with 
> just counters and the second would contain integrity information if needed.

Hm, why would all that be needed?

The security subsystem already has a private-data extension in struct inode:

 struct inode {
 ...
 #ifdef CONFIG_SECURITY
         void                    *i_security;
 #endif
 ...
 }

That could be used for IMA, right? IMA state can either embedd in struct 
inode_security_struct or the security subsystem could define an intermediate data 
structure. (it's up to the security subsystem how it wants to make use of this data 
pointer.)

An unlimited amount of optional information can be attached that way.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/3] IMA: move read/write counters into struct inode
  2010-10-19  1:16 [PATCH 1/3] IMA: move read/write counters into struct inode Eric Paris
                   ` (3 preceding siblings ...)
  2010-10-19  2:46 ` Eric Paris
@ 2010-10-19 15:52 ` Linus Torvalds
  2010-10-19 16:36   ` Eric Paris
  4 siblings, 1 reply; 30+ messages in thread
From: Linus Torvalds @ 2010-10-19 15:52 UTC (permalink / raw)
  To: Eric Paris
  Cc: linux-kernel, linux-security-module, linux-fsdevel, hch, zohar,
	warthog9, david, jmorris, kyle, hpa, akpm, mingo, viro

On Mon, Oct 18, 2010 at 6:16 PM, Eric Paris <eparis@redhat.com> wrote:
>
> IMA currently alocated an inode integrity structure for every inode in
> core.  This stucture is about 120 bytes long.  Most files however
> (especially on a system which doesn't make use of IMA) will never need any
> of this space.  The problem is that if IMA is enabled we need to know
> information about the number of readers and the number of writers for every
> inode on the box.  At the moment we collect that information in the per
> inode iint structure and waste the rest of the space.  This patch moves those
> counters into the struct inode so we can eventually stop allocating an IMA
> integrity structure except when absolutely needed.

Hmm. I don't think this is really acceptable as-is.

First off (and most trivially) - the fields are misnamed. Just calling
them "{open,read,write}count" was fine when it was part of an ima
structure, but for all the historical reasons, inode fields are called
'i_xyzzy'.

Secondly, we already maintain a write count (called "i_writecount").
Why is the IMA writecount different, and should it be?

Thirdly, why is it an "unsigned long"? Are the IMA numbers cumulative
or something? How could you ever overflow a 32-bit counter if not?

Finally, why does IMA even care about the read-counts vs open-counts?
Why not just open-counts, and consider any non-write to be an open?

In short, I think this patch would be _much_ more acceptable if it
added just a _single_ 32-bit "i_opencount". And even then I'd ask
"what's the difference between i_opencount and our already existing
i_count?

                                 Linus

IOW, at a glance, I think it might be much more acceptable if we only added
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/3] IMA: move read/write counters into struct inode
  2010-10-19  7:39     ` Dave Chinner
@ 2010-10-19 16:24       ` Eric Paris
  2010-10-19 16:29         ` Christoph Hellwig
  0 siblings, 1 reply; 30+ messages in thread
From: Eric Paris @ 2010-10-19 16:24 UTC (permalink / raw)
  To: Dave Chinner
  Cc: Christoph Hellwig, linux-kernel, linux-security-module,
	linux-fsdevel, zohar, warthog9, jmorris, kyle, hpa, akpm,
	torvalds, mingo, viro

On Tue, 2010-10-19 at 18:39 +1100, Dave Chinner wrote:
> On Mon, Oct 18, 2010 at 10:14:03PM -0400, Eric Paris wrote:

> Eric, just to put that in context - changing the size of an inode
> needs to be conidered carefully because we cache so many of them. We
> often jump through hoops just to reduce it by 4 or 8 bytes. You are
> proposing to increase it by 24 bytes (roughly 5%) and as such that
> _should_ be considered a big deal, especially for something that is
> currently rarely used.

In my mind it's framed a little differently, my patch series is reducing
it from ~900 bytes to 24 bytes.  Even though that memory might not have
been inside struct inode if there is always a 1-1 mapping it might as
well be....   I'm going from seriously broken to a hell of a lot better.
I believe that when I resend this series I'll drop 8 more of those bytes
(open count as I think we can do without that these days).

> Personally I that adding a pointer into the struct inode is as much
> as I'd want to compromise to. Those that want to use IMA or have the
> possibility of turning it on dynamicaly can accept the additional
> overhead of another memory allocation during inode allocation as the
> cost of using this functionality.  That's the way the security
> subsystem works, so I don't see any problems with doing this for IMA
> and it turns the overhead problem into one that only affects those
> that have it both configured and enabled.  That seems like a
> reasonable compromise to me....

The problem is that this would actually waste another 8 bytes (the size
of the pointer in struct inode) since IMA is still going to need to
allocate a structure for every inode to hold the 16-24 bytes of
counters.  That 16-24 might not be in struct inode, but like I said, if
there is a 1-1 mapping between the two there is no difference.

I said that if there was a consensus that this overhead was still too
large (and it seems that may be the case) I would put looking at using a
userspace freezer to attempt to collect the information dynamically on
my todo list.  I'll gladly do that but we have a space/time tradeoff I'd
rather have a consensus on before I start.

If I go the pointer in struct inode route, I don't need to serialize
entry and removal from core of every inode if IMA is enabled (while I
add and remove it from the IMA lookup tree.)  If I don't add any fields
to struct inode I'll need to serialize while I add them to the IMA
lookup tree, but at the savings of a void * in struct inode.

My guess is that most people will say forcing users to serialize and
saving 8bytes per inode is the better choice, but I know there is
scalability work going on and I want to make sure everyone agrees that
is the right choice before we spend a lot of time on anything like
this...

-Eric


^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/3] IMA: move read/write counters into struct inode
  2010-10-19 16:24       ` Eric Paris
@ 2010-10-19 16:29         ` Christoph Hellwig
  0 siblings, 0 replies; 30+ messages in thread
From: Christoph Hellwig @ 2010-10-19 16:29 UTC (permalink / raw)
  To: Eric Paris
  Cc: Dave Chinner, Christoph Hellwig, linux-kernel,
	linux-security-module, linux-fsdevel, zohar, warthog9, jmorris,
	kyle, hpa, akpm, torvalds, mingo, viro

Eric,

I think you and just about everyone here are on a different page, and
before we have the basic disagreement settled I'm not sure we can
make much progress.

Can you please explain why a feature like IMA that no sane user would
ever want should cause _any_ overhead for users that just have it
compiled in because their distro or defconfig did without actually
using.

What exactly is the problem to require the few people that want it to
use a kernel command line option and/or an _DEFAULT_ON config option?


^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/3] IMA: move read/write counters into struct inode
  2010-10-19 15:52 ` Linus Torvalds
@ 2010-10-19 16:36   ` Eric Paris
  2010-10-19 16:55     ` Al Viro
  0 siblings, 1 reply; 30+ messages in thread
From: Eric Paris @ 2010-10-19 16:36 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, linux-security-module, linux-fsdevel, hch, zohar,
	warthog9, david, jmorris, kyle, hpa, akpm, mingo, viro

On Tue, 2010-10-19 at 08:52 -0700, Linus Torvalds wrote:
> On Mon, Oct 18, 2010 at 6:16 PM, Eric Paris <eparis@redhat.com> wrote:
> >
> > IMA currently alocated an inode integrity structure for every inode in
> > core.  This stucture is about 120 bytes long.  Most files however
> > (especially on a system which doesn't make use of IMA) will never need any
> > of this space.  The problem is that if IMA is enabled we need to know
> > information about the number of readers and the number of writers for every
> > inode on the box.  At the moment we collect that information in the per
> > inode iint structure and waste the rest of the space.  This patch moves those
> > counters into the struct inode so we can eventually stop allocating an IMA
> > integrity structure except when absolutely needed.
> 
> Hmm. I don't think this is really acceptable as-is.
> 
> First off (and most trivially) - the fields are misnamed. Just calling
> them "{open,read,write}count" was fine when it was part of an ima
> structure, but for all the historical reasons, inode fields are called
> 'i_xyzzy'.

Will fix.

> Secondly, we already maintain a write count (called "i_writecount").
> Why is the IMA writecount different, and should it be?

I ask Al about reusing this field long ago and he indicated it had a
very different meaning.  I can't remember what he indicated it meant off
the top of my head but I'll take a look at it again.  Lines like this
leave me leary:

drivers/md/md.c::deny_bitmap_write_access()
	atomic_set(&inode->i_writecount, -1);

> Thirdly, why is it an "unsigned long"? Are the IMA numbers cumulative
> or something? How could you ever overflow a 32-bit counter if not?

Not cumulative.  32bits would probably be fine.

> Finally, why does IMA even care about the read-counts vs open-counts?
> Why not just open-counts, and consider any non-write to be an open?

What IMA needs to function is the current readers and current writers.
The open count was originally very useful when a number of places inside
the kernel were allocating struct files themselves rather than letting
the VFS do the lifting and we could end up with more struct files to a
given inode than IMA realized.  Back then IMA started trying to do
one-off hooks to each filesystem doing this to fix the counters and
measure appropriately but we eventually decided it was best to move all
struct file creation into the vfs so it couldn't get out of whack.  I
believe at this point we could drop the opencount....

> In short, I think this patch would be _much_ more acceptable if it
> added just a _single_ 32-bit "i_opencount". And even then I'd ask
> "what's the difference between i_opencount and our already existing
> i_count?

i_count, I believe, is much different.  i_count is counting the number
of dentries in core referencing the inode, even if none of them are
being used in any struct file or if one dentry is being referenced in
1000 struct files.  The IMA counters are from a higher level, they
counts the number of struct files referencing this inode.

I'll resend, shrinking from unsigned long to unsigned int and dropping
opencount from struct inode.  Should get us from using ~900 bytes per
inode to using about 8 bytes per inode.

And like I said, if that still seems like too much overhead to most
people (and it seems that's the case) I'll look at how to get down to 0,
but it isn't going to be a fast obvious change...

-Eric



^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/3] IMA: move read/write counters into struct inode
  2010-10-19 16:36   ` Eric Paris
@ 2010-10-19 16:55     ` Al Viro
  2010-10-19 17:03       ` Linus Torvalds
  0 siblings, 1 reply; 30+ messages in thread
From: Al Viro @ 2010-10-19 16:55 UTC (permalink / raw)
  To: Eric Paris
  Cc: Linus Torvalds, linux-kernel, linux-security-module,
	linux-fsdevel, hch, zohar, warthog9, david, jmorris, kyle, hpa,
	akpm, mingo

On Tue, Oct 19, 2010 at 12:36:55PM -0400, Eric Paris wrote:
> I ask Al about reusing this field long ago and he indicated it had a
> very different meaning.  I can't remember what he indicated it meant off
> the top of my head but I'll take a look at it again.

> i_count, I believe, is much different.  i_count is counting the number
> of dentries in core referencing the inode, even if none of them are
> being used in any struct file or if one dentry is being referenced in
> 1000 struct files.  The IMA counters are from a higher level, they
> counts the number of struct files referencing this inode.


a) i_writecount is about VM_DENYWRITE, basically.  Reusing it for ima could
get unpleasant; when it's positive, we are fine, but it can get negative as
well.  IMA will have interesting time dealing with that.

b) i_count is simply a refcount for struct inode.  Not exactly the number
of dentries, but that's the main contributor.  Basically, that's "how many
pointers outside of inode hash chains point that that struct inode at the
moment".

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/3] IMA: move read/write counters into struct inode
  2010-10-19 16:55     ` Al Viro
@ 2010-10-19 17:03       ` Linus Torvalds
  2010-10-19 17:28         ` Al Viro
  2010-10-19 22:49         ` Eric Paris
  0 siblings, 2 replies; 30+ messages in thread
From: Linus Torvalds @ 2010-10-19 17:03 UTC (permalink / raw)
  To: Al Viro
  Cc: Eric Paris, linux-kernel, linux-security-module, linux-fsdevel,
	hch, zohar, warthog9, david, jmorris, kyle, hpa, akpm, mingo

On Tue, Oct 19, 2010 at 9:55 AM, Al Viro <viro@zeniv.linux.org.uk> wrote:
>
> a) i_writecount is about VM_DENYWRITE, basically.  Reusing it for ima could
> get unpleasant; when it's positive, we are fine, but it can get negative as
> well.  IMA will have interesting time dealing with that.
>
> b) i_count is simply a refcount for struct inode.  Not exactly the number
> of dentries, but that's the main contributor.  Basically, that's "how many
> pointers outside of inode hash chains point that that struct inode at the
> moment".

My question was deeper. More along the lines of "why would IMA care?"

How/why could IMA ever care about the pointless and trivial
differences between its current private open/read/write counts and the
counts that we already maintain?

Yes, yes, I realize that they have technical differences in what they
count. That's not the question. The question is "Why would IMA care?"

                       Linus
--
To unsubscribe from this list: send the line "unsubscribe linux-security-module" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/3] IMA: move read/write counters into struct inode
  2010-10-19 17:03       ` Linus Torvalds
@ 2010-10-19 17:28         ` Al Viro
  2010-10-19 18:16           ` Mimi Zohar
  2010-10-19 19:11           ` Matthew Wilcox
  2010-10-19 22:49         ` Eric Paris
  1 sibling, 2 replies; 30+ messages in thread
From: Al Viro @ 2010-10-19 17:28 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Eric Paris, linux-kernel, linux-security-module, linux-fsdevel,
	hch, zohar, warthog9, david, jmorris, kyle, hpa, akpm, mingo

On Tue, Oct 19, 2010 at 10:03:48AM -0700, Linus Torvalds wrote:
> On Tue, Oct 19, 2010 at 9:55 AM, Al Viro <viro@zeniv.linux.org.uk> wrote:
> >
> > a) i_writecount is about VM_DENYWRITE, basically. ?Reusing it for ima could
> > get unpleasant; when it's positive, we are fine, but it can get negative as
> > well. ?IMA will have interesting time dealing with that.
> >
> > b) i_count is simply a refcount for struct inode. ?Not exactly the number
> > of dentries, but that's the main contributor. ?Basically, that's "how many
> > pointers outside of inode hash chains point that that struct inode at the
> > moment".
> 
> My question was deeper. More along the lines of "why would IMA care?"
> 
> How/why could IMA ever care about the pointless and trivial
> differences between its current private open/read/write counts and the
> counts that we already maintain?
> 
> Yes, yes, I realize that they have technical differences in what they
> count. That's not the question. The question is "Why would IMA care?"

I'd rather not say what I think about IMA sanity (and usefulness); as for what
it tries to do...  They want to whine if you open a file that is already opened
for write and they want to whine if you open a file for write when it's already
opened for read.  Unless they decide to leave the file alone, that is.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/3] IMA: move read/write counters into struct inode
  2010-10-19 17:28         ` Al Viro
@ 2010-10-19 18:16           ` Mimi Zohar
  2010-10-20 13:10             ` John Stoffel
  2010-10-19 19:11           ` Matthew Wilcox
  1 sibling, 1 reply; 30+ messages in thread
From: Mimi Zohar @ 2010-10-19 18:16 UTC (permalink / raw)
  To: Al Viro
  Cc: Linus Torvalds, Eric Paris, linux-kernel, linux-security-module,
	linux-fsdevel, hch, Mimi Zohar, warthog9, david, jmorris, kyle,
	hpa, akpm, mingo

On Tue, 2010-10-19 at 18:28 +0100, Al Viro wrote:
> On Tue, Oct 19, 2010 at 10:03:48AM -0700, Linus Torvalds wrote:
> > On Tue, Oct 19, 2010 at 9:55 AM, Al Viro <viro@zeniv.linux.org.uk> wrote:
> > >
> > > a) i_writecount is about VM_DENYWRITE, basically. ?Reusing it for ima could
> > > get unpleasant; when it's positive, we are fine, but it can get negative as
> > > well. ?IMA will have interesting time dealing with that.
> > >
> > > b) i_count is simply a refcount for struct inode. ?Not exactly the number
> > > of dentries, but that's the main contributor. ?Basically, that's "how many
> > > pointers outside of inode hash chains point that that struct inode at the
> > > moment".
> > 
> > My question was deeper. More along the lines of "why would IMA care?"
> > 
> > How/why could IMA ever care about the pointless and trivial
> > differences between its current private open/read/write counts and the
> > counts that we already maintain?
> > 
> > Yes, yes, I realize that they have technical differences in what they
> > count. That's not the question. The question is "Why would IMA care?"

The filesystem prevents files being executed from being opened for
write.  The same guarantees that the file won't change, obviously,
doesn't exist for files being opened for read. Thus measuring a file
opened for read that has already been open for write, has no meaning.
Unfortunately, since the inode counters don't provide this information,
IMA maintains a separate set of counters.

> I'd rather not say what I think about IMA sanity (and usefulness); as for what
> it tries to do...  They want to whine if you open a file that is already opened
> for write and they want to whine if you open a file for write when it's already
> opened for read.  Unless they decide to leave the file alone, that is.

You left out one minor detail, invalidate the PCR as well.

Mimi


^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/3] IMA: move read/write counters into struct inode
  2010-10-19 17:28         ` Al Viro
  2010-10-19 18:16           ` Mimi Zohar
@ 2010-10-19 19:11           ` Matthew Wilcox
  2010-10-20  3:15             ` Al Viro
  1 sibling, 1 reply; 30+ messages in thread
From: Matthew Wilcox @ 2010-10-19 19:11 UTC (permalink / raw)
  To: Al Viro
  Cc: Linus Torvalds, Eric Paris, linux-kernel, linux-security-module,
	linux-fsdevel, hch, zohar, warthog9, david, jmorris, kyle, hpa,
	akpm, mingo

On Tue, Oct 19, 2010 at 06:28:05PM +0100, Al Viro wrote:
> I'd rather not say what I think about IMA sanity (and usefulness); as for what
> it tries to do...  They want to whine if you open a file that is already opened
> for write and they want to whine if you open a file for write when it's already
> opened for read.  Unless they decide to leave the file alone, that is.

Hm.  Sounds like the same question that the file leases code needs
answered.  The important difference is that the leases code can just
refuse to set a lease on inodes with multiple dentries.

While my mind's on it ... Al, is this code even close to correct?

                if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
                        goto out;
                if ((arg == F_WRLCK)
                    && ((atomic_read(&dentry->d_count) > 1)
                        || (atomic_read(&inode->i_count) > 1)))
                        goto out;

-- 
Matthew Wilcox				Intel Open Source Technology Centre
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours.  We can't possibly take such
a retrograde step."

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/3] IMA: move read/write counters into struct inode
  2010-10-19 17:03       ` Linus Torvalds
  2010-10-19 17:28         ` Al Viro
@ 2010-10-19 22:49         ` Eric Paris
  2010-10-20 14:38           ` Ingo Molnar
  1 sibling, 1 reply; 30+ messages in thread
From: Eric Paris @ 2010-10-19 22:49 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Al Viro, linux-kernel, linux-security-module, linux-fsdevel, hch,
	zohar, warthog9, david, jmorris, kyle, hpa, akpm, mingo

Executive summary of the day's work:
Yesterday morning: 944 bytes per inode in core
Yesterday night: 24 bytes per inode in core
Tonight: 4 bytes per inode in core.

That's a x236 time reduction in memory usage.  No I didn't even start
looking at a freezer.  Which could bring that 4 down to 0, but would add
a scalability penalty on all inodes when IMA was enabled.

The memory associated with inodes that IMA actually cares about has gone
from 312 to 320 bytes.  

I'm going to follow up with my patch series again but they aren't really
ready to be applied.  The IBM people who wrote IMA are reviewing them.
I have some questions if my RCU+RBTREE usage is valid/correct.  I'd
really like Al to take a close look at the last patch in the series to
make sure my use of i_writecount actually does what I want it to do...

-Eric


^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/3] IMA: move read/write counters into struct inode
  2010-10-19 19:11           ` Matthew Wilcox
@ 2010-10-20  3:15             ` Al Viro
  2010-10-20 17:38               ` J. Bruce Fields
  0 siblings, 1 reply; 30+ messages in thread
From: Al Viro @ 2010-10-20  3:15 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Linus Torvalds, Eric Paris, linux-kernel, linux-security-module,
	linux-fsdevel, hch, zohar, warthog9, david, jmorris, kyle, hpa,
	akpm, mingo

On Tue, Oct 19, 2010 at 01:11:33PM -0600, Matthew Wilcox wrote:
> Hm.  Sounds like the same question that the file leases code needs
> answered.  The important difference is that the leases code can just
> refuse to set a lease on inodes with multiple dentries.
> 
> While my mind's on it ... Al, is this code even close to correct?
> 
>                 if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
>                         goto out;
>                 if ((arg == F_WRLCK)
>                     && ((atomic_read(&dentry->d_count) > 1)
>                         || (atomic_read(&inode->i_count) > 1)))
>                         goto out;

No.  This is complete junk; note that e.g. ls -lR will disrupt it, since
lstat(2) will bump dentry refcount.  The first part is more or less OK;
the second makes no sense.

What is it trying to do?  Note that the first part also doesn't make a lot
of sense, since you could be acquiring a write reference *right* *now*,
just as that check passes.  And you could finish getting it before you get
to do anything else in generic_setlease().

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/3] IMA: move read/write counters into struct inode
  2010-10-19 18:16           ` Mimi Zohar
@ 2010-10-20 13:10             ` John Stoffel
  2010-10-20 13:36               ` Al Viro
  0 siblings, 1 reply; 30+ messages in thread
From: John Stoffel @ 2010-10-20 13:10 UTC (permalink / raw)
  To: Mimi Zohar
  Cc: Al Viro, Linus Torvalds, Eric Paris, linux-kernel,
	linux-security-module, linux-fsdevel, hch, Mimi Zohar, warthog9,
	david, jmorris, kyle, hpa, akpm, mingo

>>>>> "Mimi" == Mimi Zohar <zohar@linux.vnet.ibm.com> writes:

Mimi> On Tue, 2010-10-19 at 18:28 +0100, Al Viro wrote:
>> On Tue, Oct 19, 2010 at 10:03:48AM -0700, Linus Torvalds wrote:
>> > On Tue, Oct 19, 2010 at 9:55 AM, Al Viro <viro@zeniv.linux.org.uk> wrote:
>> > >
>> > > a) i_writecount is about VM_DENYWRITE, basically. ?Reusing it for ima could
>> > > get unpleasant; when it's positive, we are fine, but it can get negative as
>> > > well. ?IMA will have interesting time dealing with that.
>> > >
>> > > b) i_count is simply a refcount for struct inode. ?Not exactly the number
>> > > of dentries, but that's the main contributor. ?Basically, that's "how many
>> > > pointers outside of inode hash chains point that that struct inode at the
>> > > moment".
>> > 
>> > My question was deeper. More along the lines of "why would IMA care?"
>> > 
>> > How/why could IMA ever care about the pointless and trivial
>> > differences between its current private open/read/write counts and the
>> > counts that we already maintain?
>> > 
>> > Yes, yes, I realize that they have technical differences in what they
>> > count. That's not the question. The question is "Why would IMA care?"

Mimi> The filesystem prevents files being executed from being opened
Mimi> for write.  The same guarantees that the file won't change,
Mimi> obviously, doesn't exist for files being opened for read. Thus
Mimi> measuring a file opened for read that has already been open for
Mimi> write, has no meaning.  Unfortunately, since the inode counters
Mimi> don't provide this information, IMA maintains a separate set of
Mimi> counters.

Does this mean I can't replace /bin/sh on a running system using IMA
at all, even if just one process has it opened and is running?  So how
the hell am I supposed to do live upgrades on a system?  

Currently, /bin/sh gets replaced with the newer, better (for some
value of better :-) version, while currently running users aren't
impacted at all.  New users pick up the new binary.

Gah!  The only way to upgrade such a system would look be via a
reboot.  Not very nice at all... or can the root user disable IMA,
upgrade a binary, then re-start IMA on a system?  

So how does this improve security if root is compromised?

John

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/3] IMA: move read/write counters into struct inode
  2010-10-20 13:10             ` John Stoffel
@ 2010-10-20 13:36               ` Al Viro
  2010-10-20 14:09                 ` John Stoffel
  0 siblings, 1 reply; 30+ messages in thread
From: Al Viro @ 2010-10-20 13:36 UTC (permalink / raw)
  To: John Stoffel
  Cc: Mimi Zohar, Linus Torvalds, Eric Paris, linux-kernel,
	linux-security-module, linux-fsdevel, hch, Mimi Zohar, warthog9,
	david, jmorris, kyle, hpa, akpm, mingo

On Wed, Oct 20, 2010 at 09:10:27AM -0400, John Stoffel wrote:

> Does this mean I can't replace /bin/sh on a running system using IMA
> at all, even if just one process has it opened and is running?  So how
> the hell am I supposed to do live upgrades on a system?  

rename(2).  Prohibition against write(2) to binary being executed is
*old*.  Try to do the following:
cp /bin/sh /tmp
/tmp/sh &
cat /bin/ls >/tmp/sh
and you'll get
sh: /tmp/sh: Text file busy

That has nothing to do with IMA...

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/3] IMA: move read/write counters into struct inode
  2010-10-20 13:36               ` Al Viro
@ 2010-10-20 14:09                 ` John Stoffel
  0 siblings, 0 replies; 30+ messages in thread
From: John Stoffel @ 2010-10-20 14:09 UTC (permalink / raw)
  To: Al Viro
  Cc: John Stoffel, Mimi Zohar, Linus Torvalds, Eric Paris,
	linux-kernel, linux-security-module, linux-fsdevel, hch,
	Mimi Zohar, warthog9, david, jmorris, kyle, hpa, akpm, mingo

>>>>> "Al" == Al Viro <viro@ZenIV.linux.org.uk> writes:

Al> On Wed, Oct 20, 2010 at 09:10:27AM -0400, John Stoffel wrote:
>> Does this mean I can't replace /bin/sh on a running system using IMA
>> at all, even if just one process has it opened and is running?  So how
>> the hell am I supposed to do live upgrades on a system?  

Al> rename(2).  Prohibition against write(2) to binary being executed is
Al> *old*.  Try to do the following:
Al> cp /bin/sh /tmp
Al> /tmp/sh &
Al> cat /bin/ls >/tmp/sh
Al> and you'll get
Al> sh: /tmp/sh: Text file busy

Al> That has nothing to do with IMA...

Duh, makes sense.  Thanks for educating me.  I still IMA is a waste of
system resources...

John

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/3] IMA: move read/write counters into struct inode
  2010-10-19 22:49         ` Eric Paris
@ 2010-10-20 14:38           ` Ingo Molnar
  2010-10-20 14:46             ` Eric Paris
  0 siblings, 1 reply; 30+ messages in thread
From: Ingo Molnar @ 2010-10-20 14:38 UTC (permalink / raw)
  To: Eric Paris
  Cc: Linus Torvalds, Al Viro, linux-kernel, linux-security-module,
	linux-fsdevel, hch, zohar, warthog9, david, jmorris, kyle, hpa,
	akpm


* Eric Paris <eparis@redhat.com> wrote:

> Executive summary of the day's work:
> Yesterday morning: 944 bytes per inode in core
> Yesterday night: 24 bytes per inode in core
> Tonight: 4 bytes per inode in core.
> 
> That's a x236 time reduction in memory usage.  No I didn't even start looking at a 
> freezer.  Which could bring that 4 down to 0, but would add a scalability penalty 
> on all inodes when IMA was enabled.

Why not use inode->i_security intelligently? That already exists so that way it's 0 
bytes.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/3] IMA: move read/write counters into struct inode
  2010-10-20 14:38           ` Ingo Molnar
@ 2010-10-20 14:46             ` Eric Paris
  2010-10-20 15:15               ` Ingo Molnar
  0 siblings, 1 reply; 30+ messages in thread
From: Eric Paris @ 2010-10-20 14:46 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, Al Viro, linux-kernel, linux-security-module,
	linux-fsdevel, hch, zohar, warthog9, david, jmorris, kyle, hpa,
	akpm

On Wed, 2010-10-20 at 16:38 +0200, Ingo Molnar wrote:
> * Eric Paris <eparis@redhat.com> wrote:
> 
> > Executive summary of the day's work:
> > Yesterday morning: 944 bytes per inode in core
> > Yesterday night: 24 bytes per inode in core
> > Tonight: 4 bytes per inode in core.
> > 
> > That's a x236 time reduction in memory usage.  No I didn't even start looking at a 
> > freezer.  Which could bring that 4 down to 0, but would add a scalability penalty 
> > on all inodes when IMA was enabled.
> 
> Why not use inode->i_security intelligently? That already exists so that way it's 0 
> bytes.
> 
> Thanks,

It still wouldn't be 0 bytes since there would be a 1-1 mapping from
inode to i_security structs.  It would just be hiding the memory
somewhere else so it's harder to spot.  And that's exactly how we got
into this situation, instead of just doing thing in the inode we hid it
away in a radix tree node and ima_iint_cache.  In any case it would
require a secondary structure

struct generic_lsm_inode_structure
{
	IMA Fields
	void *lsm_inode_structure;
}

Which means yet another pointer to the real per LSM inode struct.  So it
would actually likely cost memory....

The real reason I don't pursue this route is because of the litany of
different ways this pointer is used in different LSMs (or not used at
all.)  And we all know that LSM authors aren't known for seeing the
world the same way as each other.  As a maintainer of one of those LSMs
even I'm scared to try pushing that forward....

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/3] IMA: move read/write counters into struct inode
  2010-10-20 14:46             ` Eric Paris
@ 2010-10-20 15:15               ` Ingo Molnar
  2010-10-20 15:25                 ` Eric Paris
  2010-10-21 16:15                 ` Casey Schaufler
  0 siblings, 2 replies; 30+ messages in thread
From: Ingo Molnar @ 2010-10-20 15:15 UTC (permalink / raw)
  To: Eric Paris
  Cc: Linus Torvalds, Al Viro, linux-kernel, linux-security-module,
	linux-fsdevel, hch, zohar, warthog9, david, jmorris, kyle, hpa,
	akpm


* Eric Paris <eparis@redhat.com> wrote:

> On Wed, 2010-10-20 at 16:38 +0200, Ingo Molnar wrote:
> > * Eric Paris <eparis@redhat.com> wrote:
> > 
> > > Executive summary of the day's work:
> > > Yesterday morning: 944 bytes per inode in core
> > > Yesterday night: 24 bytes per inode in core
> > > Tonight: 4 bytes per inode in core.
> > > 
> > > That's a x236 time reduction in memory usage.  No I didn't even start looking 
> > > at a freezer.  Which could bring that 4 down to 0, but would add a scalability 
> > > penalty on all inodes when IMA was enabled.
> > 
> > Why not use inode->i_security intelligently? That already exists so that way 
> > it's 0 bytes.
> > 
> > Thanks,
> 
> It still wouldn't be 0 bytes since there would be a 1-1 mapping from inode to 
> i_security structs. [...]

Only for IMA-affected files, right?

My point is to keep it 0 overhead for the _non IMA common case_.

> The real reason I don't pursue this route is because of the litany of different 
> ways this pointer is used in different LSMs (or not used at all.)  And we all know 
> that LSM authors aren't known for seeing the world the same way as each other.  As 
> a maintainer of one of those LSMs even I'm scared to try pushing that forward....

Ugh. That's a perfect reason to do it exactly like i suggested.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/3] IMA: move read/write counters into struct inode
  2010-10-20 15:15               ` Ingo Molnar
@ 2010-10-20 15:25                 ` Eric Paris
  2010-10-21 16:15                 ` Casey Schaufler
  1 sibling, 0 replies; 30+ messages in thread
From: Eric Paris @ 2010-10-20 15:25 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, Al Viro, linux-kernel, linux-security-module,
	linux-fsdevel, hch, zohar, warthog9, david, jmorris, kyle, hpa,
	akpm

On Wed, 2010-10-20 at 17:15 +0200, Ingo Molnar wrote:
> * Eric Paris <eparis@redhat.com> wrote:
> 
> > On Wed, 2010-10-20 at 16:38 +0200, Ingo Molnar wrote:
> > > * Eric Paris <eparis@redhat.com> wrote:
> > > 
> > > > Executive summary of the day's work:
> > > > Yesterday morning: 944 bytes per inode in core
> > > > Yesterday night: 24 bytes per inode in core
> > > > Tonight: 4 bytes per inode in core.
> > > > 
> > > > That's a x236 time reduction in memory usage.  No I didn't even start looking 
> > > > at a freezer.  Which could bring that 4 down to 0, but would add a scalability 
> > > > penalty on all inodes when IMA was enabled.
> > > 
> > > Why not use inode->i_security intelligently? That already exists so that way 
> > > it's 0 bytes.
> > > 
> > > Thanks,
> > 
> > It still wouldn't be 0 bytes since there would be a 1-1 mapping from inode to 
> > i_security structs. [...]
> 
> Only for IMA-affected files, right?

No, we need to keep the open read counter even for non-IMA-affected
files in case we later determine that it is IMA-affected.  That's the 4
bytes I have today, which I said could be eliminated with a freezer that
calculated it when IMA was enabled, but isn't something I'm looking at
right now....

-Eric

-Eric


^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/3] IMA: move read/write counters into struct inode
  2010-10-20  3:15             ` Al Viro
@ 2010-10-20 17:38               ` J. Bruce Fields
  0 siblings, 0 replies; 30+ messages in thread
From: J. Bruce Fields @ 2010-10-20 17:38 UTC (permalink / raw)
  To: Al Viro
  Cc: Matthew Wilcox, Linus Torvalds, Eric Paris, linux-kernel,
	linux-security-module, linux-fsdevel, hch, zohar, warthog9, david,
	jmorris, kyle, hpa, akpm, mingo

On Wed, Oct 20, 2010 at 04:15:04AM +0100, Al Viro wrote:
> On Tue, Oct 19, 2010 at 01:11:33PM -0600, Matthew Wilcox wrote:
> > Hm.  Sounds like the same question that the file leases code needs
> > answered.  The important difference is that the leases code can just
> > refuse to set a lease on inodes with multiple dentries.
> > 
> > While my mind's on it ... Al, is this code even close to correct?
> > 
> >                 if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
> >                         goto out;
> >                 if ((arg == F_WRLCK)
> >                     && ((atomic_read(&dentry->d_count) > 1)
> >                         || (atomic_read(&inode->i_count) > 1)))
> >                         goto out;
> 
> No.  This is complete junk; note that e.g. ls -lR will disrupt it, since
> lstat(2) will bump dentry refcount.  The first part is more or less OK;
> the second makes no sense.

It may be nobody cares about false positives--if leases are something
provided as a caching optimization, then it's OK to deny them more often
than strictly necessary.

(In fact, if you're using the write lease to allow somebody else to
cache writes, then you want the stat to break the lease so it can get
can trigger a cache flush and get an updated mtime.)

Not that I'll claim those checks are correct.

> What is it trying to do?  Note that the first part also doesn't make a lot
> of sense, since you could be acquiring a write reference *right* *now*,
> just as that check passes.  And you could finish getting it before you get
> to do anything else in generic_setlease().

Yeah, that's a race.

The lease code is broken.  On my list of problems:
    
        - Leases are broken only by conflicting opens; but both nfsv4
          delegations and (I'm told) Windows op locks actually require
          that read leases be broken on any operation that changes file
          metadata--including unlink, link, rename, chmod, and chown.
    
	- The internal kernel api used for lease-breaking is inherently
	  racy as long as it consists of a single break_lease() call.
	  (We probably need deny_leases() and undeny_leases(), or
	  something.)

I'd be happy just to have read leases that worked correctly; write
leases (which, at least for the NFSv4 server's purposes, also need to be
broken on *access* to metadata) are worse.

I have patches that attempted to replace the current mechanism for
F_RDLCK leases with an i_leasecount modeled on i_writecount.  (So
positive counts the number of leases held, negative counts operations
temporarily disabling leases.)  They were never completely correct.

I also wasn't sure what sort of requirements I should meet to avoid
noticeable scalability problems (how much additional space in the inode
could I get away with?  What about additional locks, or just writes to
inode fields, on read opens?).

--b.

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/3] IMA: move read/write counters into struct inode
  2010-10-20 15:15               ` Ingo Molnar
  2010-10-20 15:25                 ` Eric Paris
@ 2010-10-21 16:15                 ` Casey Schaufler
  2010-10-22  8:48                   ` Ingo Molnar
  1 sibling, 1 reply; 30+ messages in thread
From: Casey Schaufler @ 2010-10-21 16:15 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Eric Paris, Linus Torvalds, Al Viro, linux-kernel,
	linux-security-module, linux-fsdevel, hch, zohar, warthog9, david,
	jmorris, kyle, hpa, akpm

 On 10/20/2010 8:15 AM, Ingo Molnar wrote:
> * Eric Paris <eparis@redhat.com> wrote:
>
>> On Wed, 2010-10-20 at 16:38 +0200, Ingo Molnar wrote:
>>> * Eric Paris <eparis@redhat.com> wrote:
>>>
>>>> Executive summary of the day's work:
>>>> Yesterday morning: 944 bytes per inode in core
>>>> Yesterday night: 24 bytes per inode in core
>>>> Tonight: 4 bytes per inode in core.
>>>>
>>>> That's a x236 time reduction in memory usage.  No I didn't even start looking 
>>>> at a freezer.  Which could bring that 4 down to 0, but would add a scalability 
>>>> penalty on all inodes when IMA was enabled.
>>> Why not use inode->i_security intelligently? That already exists so that way 
>>> it's 0 bytes.
>>>
>>> Thanks,
>> It still wouldn't be 0 bytes since there would be a 1-1 mapping from inode to 
>> i_security structs. [...]
> Only for IMA-affected files, right?
>
> My point is to keep it 0 overhead for the _non IMA common case_.
>
>> The real reason I don't pursue this route is because of the litany of different 
>> ways this pointer is used in different LSMs (or not used at all.)  And we all know 
>> that LSM authors aren't known for seeing the world the same way as each other.  As 
>> a maintainer of one of those LSMs even I'm scared to try pushing that forward....
> Ugh. That's a perfect reason to do it exactly like i suggested.

If you would like to make a proposal on LSM stacking other than
the traditional "rip the LSM out" I am sure that everyone in the
IMA, SELinux, TOMOYO, AppArmor and Smack communities would be
happy to have a look. Short of having a viable mechanism for
multiple LSMs to coexist IMA needs its separate space. Yes, people
do use both IMA and LSMs on the same machine at the same time.


> Thanks,
>
> 	Ingo
> --
> To unsubscribe from this list: send the line "unsubscribe linux-security-module" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
>


^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/3] IMA: move read/write counters into struct inode
  2010-10-21 16:15                 ` Casey Schaufler
@ 2010-10-22  8:48                   ` Ingo Molnar
  2010-10-22 17:50                     ` Casey Schaufler
  0 siblings, 1 reply; 30+ messages in thread
From: Ingo Molnar @ 2010-10-22  8:48 UTC (permalink / raw)
  To: Casey Schaufler
  Cc: Eric Paris, Linus Torvalds, Al Viro, linux-kernel,
	linux-security-module, linux-fsdevel, hch, zohar, warthog9, david,
	jmorris, kyle, hpa, akpm


* Casey Schaufler <casey@schaufler-ca.com> wrote:

>  On 10/20/2010 8:15 AM, Ingo Molnar wrote:
> > * Eric Paris <eparis@redhat.com> wrote:
> >
> >> On Wed, 2010-10-20 at 16:38 +0200, Ingo Molnar wrote:
> >>> * Eric Paris <eparis@redhat.com> wrote:
> >>>
> >>>> Executive summary of the day's work:
> >>>> Yesterday morning: 944 bytes per inode in core
> >>>> Yesterday night: 24 bytes per inode in core
> >>>> Tonight: 4 bytes per inode in core.
> >>>>
> >>>> That's a x236 time reduction in memory usage.  No I didn't even start looking 
> >>>> at a freezer.  Which could bring that 4 down to 0, but would add a scalability 
> >>>> penalty on all inodes when IMA was enabled.
> >>> Why not use inode->i_security intelligently? That already exists so that way 
> >>> it's 0 bytes.
> >>>
> >>> Thanks,
> >> It still wouldn't be 0 bytes since there would be a 1-1 mapping from inode to 
> >> i_security structs. [...]
> > Only for IMA-affected files, right?
> >
> > My point is to keep it 0 overhead for the _non IMA common case_.
> >
> >> The real reason I don't pursue this route is because of the litany of different 
> >> ways this pointer is used in different LSMs (or not used at all.)  And we all know 
> >> that LSM authors aren't known for seeing the world the same way as each other.  As 
> >> a maintainer of one of those LSMs even I'm scared to try pushing that forward....
> > Ugh. That's a perfect reason to do it exactly like i suggested.
> 
> If you would like to make a proposal on LSM stacking other than the traditional 
> "rip the LSM out" I am sure that everyone in the IMA, SELinux, TOMOYO, AppArmor 
> and Smack communities would be happy to have a look. Short of having a viable 
> mechanism for multiple LSMs to coexist IMA needs its separate space. Yes, people 
> do use both IMA and LSMs on the same machine at the same time.

Yes, that's the essence of what i suggested: if various security concepts can be 
present at once then inode->security should not be a stupid pointer to a single, 
exclusive data structure (because that hardwires a "only a single security subsystem 
active" assumption), but should be a pointer to a linked list of security structures 
- as many as there are security subsystems interested in that inode.
 
Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 30+ messages in thread

* Re: [PATCH 1/3] IMA: move read/write counters into struct inode
  2010-10-22  8:48                   ` Ingo Molnar
@ 2010-10-22 17:50                     ` Casey Schaufler
  0 siblings, 0 replies; 30+ messages in thread
From: Casey Schaufler @ 2010-10-22 17:50 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Eric Paris, Linus Torvalds, Al Viro, linux-kernel,
	linux-security-module, linux-fsdevel, hch, zohar, warthog9, david,
	jmorris, kyle, hpa, akpm

 On 10/22/2010 1:48 AM, Ingo Molnar wrote:
> * Casey Schaufler <casey@schaufler-ca.com> wrote:
>
>>  On 10/20/2010 8:15 AM, Ingo Molnar wrote:
>>> * Eric Paris <eparis@redhat.com> wrote:
>>>
>>>> On Wed, 2010-10-20 at 16:38 +0200, Ingo Molnar wrote:
>>>>> * Eric Paris <eparis@redhat.com> wrote:
>>>>>
>>>>>> Executive summary of the day's work:
>>>>>> Yesterday morning: 944 bytes per inode in core
>>>>>> Yesterday night: 24 bytes per inode in core
>>>>>> Tonight: 4 bytes per inode in core.
>>>>>>
>>>>>> That's a x236 time reduction in memory usage.  No I didn't even start looking 
>>>>>> at a freezer.  Which could bring that 4 down to 0, but would add a scalability 
>>>>>> penalty on all inodes when IMA was enabled.
>>>>> Why not use inode->i_security intelligently? That already exists so that way 
>>>>> it's 0 bytes.
>>>>>
>>>>> Thanks,
>>>> It still wouldn't be 0 bytes since there would be a 1-1 mapping from inode to 
>>>> i_security structs. [...]
>>> Only for IMA-affected files, right?
>>>
>>> My point is to keep it 0 overhead for the _non IMA common case_.
>>>
>>>> The real reason I don't pursue this route is because of the litany of different 
>>>> ways this pointer is used in different LSMs (or not used at all.)  And we all know 
>>>> that LSM authors aren't known for seeing the world the same way as each other.  As 
>>>> a maintainer of one of those LSMs even I'm scared to try pushing that forward....
>>> Ugh. That's a perfect reason to do it exactly like i suggested.
>> If you would like to make a proposal on LSM stacking other than the traditional 
>> "rip the LSM out" I am sure that everyone in the IMA, SELinux, TOMOYO, AppArmor 
>> and Smack communities would be happy to have a look. Short of having a viable 
>> mechanism for multiple LSMs to coexist IMA needs its separate space. Yes, people 
>> do use both IMA and LSMs on the same machine at the same time.
> Yes, that's the essence of what i suggested: if various security concepts can be 
> present at once then inode->security should not be a stupid pointer to a single, 
> exclusive data structure (because that hardwires a "only a single security subsystem 
> active" assumption), but should be a pointer to a linked list of security structures 
> - as many as there are security subsystems interested in that inode.

Glad to see support for an LSM module stacker (modulator, combiner, ...)
growing. All we really need to do is get someone a case of the beverage
of their choice and turn them loose on the problem. I think that the few
anti-stacking holdouts (I was one, but converted a couple years ago) can
be swayed by a reasonable implementation. It won't be easy, there are
plenty of problems that need to be solved, but anyone who wants easy
should stick to developing web portals and stay out of the kernel.

>  
> Thanks,
>
> 	Ingo


^ permalink raw reply	[flat|nested] 30+ messages in thread

end of thread, other threads:[~2010-10-22 17:50 UTC | newest]

Thread overview: 30+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-10-19  1:16 [PATCH 1/3] IMA: move read/write counters into struct inode Eric Paris
2010-10-19  1:16 ` [PATCH 2/3] IMA: only allocate iint when needed Eric Paris
2010-10-19  1:17 ` [PATCH 3/3] IMA: use rbtree instead of radix tree for inode information cache Eric Paris
2010-10-19  1:30 ` [PATCH 1/3] IMA: move read/write counters into struct inode Christoph Hellwig
2010-10-19  2:14   ` Eric Paris
2010-10-19  7:39     ` Dave Chinner
2010-10-19 16:24       ` Eric Paris
2010-10-19 16:29         ` Christoph Hellwig
2010-10-19  8:39     ` Ingo Molnar
2010-10-19  2:46 ` Eric Paris
2010-10-19 15:52 ` Linus Torvalds
2010-10-19 16:36   ` Eric Paris
2010-10-19 16:55     ` Al Viro
2010-10-19 17:03       ` Linus Torvalds
2010-10-19 17:28         ` Al Viro
2010-10-19 18:16           ` Mimi Zohar
2010-10-20 13:10             ` John Stoffel
2010-10-20 13:36               ` Al Viro
2010-10-20 14:09                 ` John Stoffel
2010-10-19 19:11           ` Matthew Wilcox
2010-10-20  3:15             ` Al Viro
2010-10-20 17:38               ` J. Bruce Fields
2010-10-19 22:49         ` Eric Paris
2010-10-20 14:38           ` Ingo Molnar
2010-10-20 14:46             ` Eric Paris
2010-10-20 15:15               ` Ingo Molnar
2010-10-20 15:25                 ` Eric Paris
2010-10-21 16:15                 ` Casey Schaufler
2010-10-22  8:48                   ` Ingo Molnar
2010-10-22 17:50                     ` Casey Schaufler

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).