Linux userland API discussions
 help / color / mirror / Atom feed
* Re: [PATCH v5 10/16] fs-verity: implement FS_IOC_ENABLE_VERITY ioctl
From: Jaegeuk Kim @ 2019-06-22 22:43 UTC (permalink / raw)
  To: Eric Biggers
  Cc: Theodore Y . Ts'o, Darrick J . Wong, linux-api, Dave Chinner,
	linux-f2fs-devel, linux-fscrypt, linux-fsdevel, linux-integrity,
	linux-ext4, Linus Torvalds, Christoph Hellwig, Victor Hsieh
In-Reply-To: <20190620205043.64350-11-ebiggers@kernel.org>

On 06/20, Eric Biggers wrote:
> From: Eric Biggers <ebiggers@google.com>
> 
> Add a function for filesystems to call to implement the
> FS_IOC_ENABLE_VERITY ioctl.  This ioctl enables fs-verity on a file.
> 
> See the "FS_IOC_ENABLE_VERITY" section of
> Documentation/filesystems/fsverity.rst for the documentation.
> 

Reviewed-by: Jaegeuk Kim <jaegeuk@kernel.org>

> Signed-off-by: Eric Biggers <ebiggers@google.com>
> ---
>  fs/verity/Makefile       |   3 +-
>  fs/verity/enable.c       | 341 +++++++++++++++++++++++++++++++++++++++
>  include/linux/fsverity.h |  64 ++++++++
>  3 files changed, 407 insertions(+), 1 deletion(-)
>  create mode 100644 fs/verity/enable.c
> 
> diff --git a/fs/verity/Makefile b/fs/verity/Makefile
> index 7fa628cd5eba24..04b37475fd280a 100644
> --- a/fs/verity/Makefile
> +++ b/fs/verity/Makefile
> @@ -1,6 +1,7 @@
>  # SPDX-License-Identifier: GPL-2.0
>  
> -obj-$(CONFIG_FS_VERITY) += hash_algs.o \
> +obj-$(CONFIG_FS_VERITY) += enable.o \
> +			   hash_algs.o \
>  			   init.o \
>  			   open.o \
>  			   verify.o
> diff --git a/fs/verity/enable.c b/fs/verity/enable.c
> new file mode 100644
> index 00000000000000..144721bbe4aab9
> --- /dev/null
> +++ b/fs/verity/enable.c
> @@ -0,0 +1,341 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * fs/verity/enable.c: ioctl to enable verity on a file
> + *
> + * Copyright 2019 Google LLC
> + */
> +
> +#include "fsverity_private.h"
> +
> +#include <crypto/hash.h>
> +#include <linux/mount.h>
> +#include <linux/pagemap.h>
> +#include <linux/sched/signal.h>
> +#include <linux/uaccess.h>
> +
> +static int build_merkle_tree_level(struct inode *inode, unsigned int level,
> +				   u64 num_blocks_to_hash,
> +				   const struct merkle_tree_params *params,
> +				   u8 *pending_hashes,
> +				   struct ahash_request *req)
> +{
> +	const struct fsverity_operations *vops = inode->i_sb->s_vop;
> +	unsigned int pending_size = 0;
> +	u64 dst_block_num;
> +	u64 i;
> +	int err;
> +
> +	if (WARN_ON(params->block_size != PAGE_SIZE)) /* checked earlier too */
> +		return -EINVAL;
> +
> +	if (level < params->num_levels) {
> +		dst_block_num = params->level_start[level];
> +	} else {
> +		if (WARN_ON(num_blocks_to_hash != 1))
> +			return -EINVAL;
> +		dst_block_num = 0; /* unused */
> +	}
> +
> +	for (i = 0; i < num_blocks_to_hash; i++) {
> +		struct page *src_page;
> +
> +		if ((pgoff_t)i % 10000 == 0 || i + 1 == num_blocks_to_hash)
> +			pr_debug("Hashing block %llu of %llu for level %u\n",
> +				 i + 1, num_blocks_to_hash, level);
> +
> +		if (level == 0)
> +			/* Leaf: hashing a data block */
> +			src_page = read_mapping_page(inode->i_mapping, i, NULL);
> +		else
> +			/* Non-leaf: hashing hash block from level below */
> +			src_page = vops->read_merkle_tree_page(inode,
> +					params->level_start[level - 1] + i);
> +		if (IS_ERR(src_page)) {
> +			err = PTR_ERR(src_page);
> +			fsverity_err(inode,
> +				     "Error %d reading Merkle tree page %llu",
> +				     err, params->level_start[level - 1] + i);
> +			return err;
> +		}
> +
> +		err = fsverity_hash_page(params, inode, req, src_page,
> +					 &pending_hashes[pending_size]);
> +		put_page(src_page);
> +		if (err)
> +			return err;
> +		pending_size += params->digest_size;
> +
> +		if (level == params->num_levels) /* Root hash? */
> +			return 0;
> +
> +		if (pending_size + params->digest_size > params->block_size ||
> +		    i + 1 == num_blocks_to_hash) {
> +			/* Flush the pending hash block */
> +			memset(&pending_hashes[pending_size], 0,
> +			       params->block_size - pending_size);
> +			err = vops->write_merkle_tree_block(inode,
> +					pending_hashes,
> +					dst_block_num,
> +					params->log_blocksize);
> +			if (err) {
> +				fsverity_err(inode,
> +					     "Error %d writing Merkle tree block %llu",
> +					     err, dst_block_num);
> +				return err;
> +			}
> +			dst_block_num++;
> +			pending_size = 0;
> +		}
> +
> +		if (fatal_signal_pending(current))
> +			return -EINTR;
> +		cond_resched();
> +	}
> +	return 0;
> +}
> +
> +/*
> + * Build the Merkle tree for the given inode using the given parameters, and
> + * return the root hash in @root_hash.
> + *
> + * The tree is written to a filesystem-specific location as determined by the
> + * ->write_merkle_tree_block() method.  However, the blocks that comprise the
> + * tree are the same for all filesystems.
> + */
> +static int build_merkle_tree(struct inode *inode,
> +			     const struct merkle_tree_params *params,
> +			     u8 *root_hash)
> +{
> +	u8 *pending_hashes;
> +	struct ahash_request *req;
> +	u64 blocks;
> +	unsigned int level;
> +	int err = -ENOMEM;
> +
> +	if (inode->i_size == 0) {
> +		/* Empty file is a special case; root hash is all 0's */
> +		memset(root_hash, 0, params->digest_size);
> +		return 0;
> +	}
> +
> +	pending_hashes = kmalloc(params->block_size, GFP_KERNEL);
> +	req = ahash_request_alloc(params->hash_alg->tfm, GFP_KERNEL);
> +	if (!pending_hashes || !req)
> +		goto out;
> +
> +	/*
> +	 * Build each level of the Merkle tree, starting at the leaf level
> +	 * (level 0) and ascending to the root node (level 'num_levels - 1').
> +	 * Then at the end (level 'num_levels'), calculate the root hash.
> +	 */
> +	blocks = (inode->i_size + params->block_size - 1) >>
> +		 params->log_blocksize;
> +	for (level = 0; level <= params->num_levels; level++) {
> +		err = build_merkle_tree_level(inode, level, blocks, params,
> +					      pending_hashes, req);
> +		if (err)
> +			goto out;
> +		blocks = (blocks + params->hashes_per_block - 1) >>
> +			 params->log_arity;
> +	}
> +	memcpy(root_hash, pending_hashes, params->digest_size);
> +	err = 0;
> +out:
> +	kfree(pending_hashes);
> +	ahash_request_free(req);
> +	return err;
> +}
> +
> +static int enable_verity(struct file *filp,
> +			 const struct fsverity_enable_arg *arg)
> +{
> +	struct inode *inode = file_inode(filp);
> +	const struct fsverity_operations *vops = inode->i_sb->s_vop;
> +	struct merkle_tree_params params = { };
> +	struct fsverity_descriptor *desc;
> +	size_t desc_size = sizeof(*desc);
> +	struct fsverity_info *vi;
> +	int err;
> +
> +	/* Start initializing the fsverity_descriptor */
> +	desc = kzalloc(desc_size, GFP_KERNEL);
> +	if (!desc)
> +		return -ENOMEM;
> +	desc->version = 1;
> +	desc->hash_algorithm = arg->hash_algorithm;
> +	desc->log_blocksize = ilog2(arg->block_size);
> +
> +	/* Get the salt if the user provided one */
> +	if (arg->salt_size &&
> +	    copy_from_user(desc->salt,
> +			   (const u8 __user *)(uintptr_t)arg->salt_ptr,
> +			   arg->salt_size)) {
> +		err = -EFAULT;
> +		goto out;
> +	}
> +	desc->salt_size = arg->salt_size;
> +
> +	desc->data_size = cpu_to_le64(inode->i_size);
> +
> +	pr_debug("Building Merkle tree...\n");
> +
> +	/* Prepare the Merkle tree parameters */
> +	err = fsverity_init_merkle_tree_params(&params, inode,
> +					       arg->hash_algorithm,
> +					       desc->log_blocksize,
> +					       desc->salt, desc->salt_size);
> +	if (err)
> +		goto out;
> +
> +	/* Tell the filesystem that verity is being enabled on the file */
> +	err = vops->begin_enable_verity(filp);
> +	if (err)
> +		goto out;
> +
> +	/* Build the Merkle tree */
> +	BUILD_BUG_ON(sizeof(desc->root_hash) < FS_VERITY_MAX_DIGEST_SIZE);
> +	err = build_merkle_tree(inode, &params, desc->root_hash);
> +	if (err) {
> +		fsverity_err(inode, "Error %d building Merkle tree", err);
> +		goto rollback;
> +	}
> +	pr_debug("Done building Merkle tree.  Root hash is %s:%*phN\n",
> +		 params.hash_alg->name, params.digest_size, desc->root_hash);
> +
> +	/*
> +	 * Create the fsverity_info.  Don't bother trying to save work by
> +	 * reusing the merkle_tree_params from above.  Instead, just create the
> +	 * fsverity_info from the fsverity_descriptor as if it were just loaded
> +	 * from disk.  This is simpler, and it serves as an extra check that the
> +	 * metadata we're writing is valid before actually enabling verity.
> +	 */
> +	vi = fsverity_create_info(inode, desc, desc_size);
> +	if (IS_ERR(vi)) {
> +		err = PTR_ERR(vi);
> +		goto rollback;
> +	}
> +
> +	/* Tell the filesystem to finish enabling verity on the file */
> +	err = vops->end_enable_verity(filp, desc, desc_size, params.tree_size);
> +	if (err) {
> +		fsverity_err(inode, "%ps() failed with err %d",
> +			     vops->end_enable_verity, err);
> +		fsverity_free_info(vi);
> +	} else if (WARN_ON(!IS_VERITY(inode))) {
> +		err = -EINVAL;
> +		fsverity_free_info(vi);
> +	} else {
> +		/* Successfully enabled verity */
> +
> +		/*
> +		 * Readers can start using ->i_verity_info immediately, so it
> +		 * can't be rolled back once set.  So don't set it until just
> +		 * after the filesystem has successfully enabled verity.
> +		 */
> +		fsverity_set_info(inode, vi);
> +	}
> +out:
> +	kfree(params.hashstate);
> +	kfree(desc);
> +	return err;
> +
> +rollback:
> +	(void)vops->end_enable_verity(filp, NULL, 0, params.tree_size);
> +	goto out;
> +}
> +
> +/**
> + * fsverity_ioctl_enable() - enable verity on a file
> + *
> + * Enable fs-verity on a file.  See the "FS_IOC_ENABLE_VERITY" section of
> + * Documentation/filesystems/fsverity.rst for the documentation.
> + *
> + * Return: 0 on success, -errno on failure
> + */
> +int fsverity_ioctl_enable(struct file *filp, const void __user *uarg)
> +{
> +	struct inode *inode = file_inode(filp);
> +	struct fsverity_enable_arg arg;
> +	int err;
> +
> +	if (copy_from_user(&arg, uarg, sizeof(arg)))
> +		return -EFAULT;
> +
> +	if (arg.version != 1)
> +		return -EINVAL;
> +
> +	if (arg.__reserved1 ||
> +	    memchr_inv(arg.__reserved2, 0, sizeof(arg.__reserved2)))
> +		return -EINVAL;
> +
> +	if (arg.block_size != PAGE_SIZE)
> +		return -EINVAL;
> +
> +	if (arg.salt_size > FIELD_SIZEOF(struct fsverity_descriptor, salt))
> +		return -EMSGSIZE;
> +
> +	if (arg.sig_size)
> +		return -EINVAL;
> +
> +	/*
> +	 * Require a regular file with write access.  But the actual fd must
> +	 * still be readonly so that we can lock out all writers.  This is
> +	 * needed to guarantee that no writable fds exist to the file once it
> +	 * has verity enabled, and to stabilize the data being hashed.
> +	 */
> +
> +	err = inode_permission(inode, MAY_WRITE);
> +	if (err)
> +		return err;
> +
> +	if (IS_APPEND(inode))
> +		return -EPERM;
> +
> +	if (S_ISDIR(inode->i_mode))
> +		return -EISDIR;
> +
> +	if (!S_ISREG(inode->i_mode))
> +		return -EINVAL;
> +
> +	err = mnt_want_write_file(filp);
> +	if (err) /* -EROFS */
> +		return err;
> +
> +	err = deny_write_access(filp);
> +	if (err) /* -ETXTBSY */
> +		goto out_drop_write;
> +
> +	inode_lock(inode);
> +
> +	if (IS_VERITY(inode)) {
> +		err = -EEXIST;
> +		goto out_unlock;
> +	}
> +
> +	err = enable_verity(filp, &arg);
> +	if (err)
> +		goto out_unlock;
> +
> +	/*
> +	 * Some pages of the file may have been evicted from pagecache after
> +	 * being used in the Merkle tree construction, then read into pagecache
> +	 * again by another process reading from the file concurrently.  Since
> +	 * these pages didn't undergo verification against the file measurement
> +	 * which fs-verity now claims to be enforcing, we have to wipe the
> +	 * pagecache to ensure that all future reads are verified.
> +	 */
> +	filemap_write_and_wait(inode->i_mapping);
> +	truncate_inode_pages(inode->i_mapping, 0);
> +
> +	/*
> +	 * allow_write_access() is needed to pair with deny_write_access().
> +	 * Regardless, the filesystem won't allow writing to verity files.
> +	 */
> +out_unlock:
> +	inode_unlock(inode);
> +	allow_write_access(filp);
> +out_drop_write:
> +	mnt_drop_write_file(filp);
> +	return err;
> +}
> +EXPORT_SYMBOL_GPL(fsverity_ioctl_enable);
> diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h
> index ecd47e748c7f64..7ef2ef82653409 100644
> --- a/include/linux/fsverity.h
> +++ b/include/linux/fsverity.h
> @@ -17,6 +17,42 @@
>  /* Verity operations for filesystems */
>  struct fsverity_operations {
>  
> +	/**
> +	 * Begin enabling verity on the given file.
> +	 *
> +	 * @filp: a readonly file descriptor for the file
> +	 *
> +	 * The filesystem must do any needed filesystem-specific preparations
> +	 * for enabling verity, e.g. evicting inline data.
> +	 *
> +	 * i_rwsem is held for write.
> +	 *
> +	 * Return: 0 on success, -errno on failure
> +	 */
> +	int (*begin_enable_verity)(struct file *filp);
> +
> +	/**
> +	 * End enabling verity on the given file.
> +	 *
> +	 * @filp: a readonly file descriptor for the file
> +	 * @desc: the verity descriptor to write, or NULL on failure
> +	 * @desc_size: size of verity descriptor, or 0 on failure
> +	 * @merkle_tree_size: total bytes the Merkle tree took up
> +	 *
> +	 * If desc == NULL, then enabling verity failed and the filesystem only
> +	 * must do any necessary cleanups.  Else, it must also store the given
> +	 * verity descriptor to a fs-specific location associated with the inode
> +	 * and do any fs-specific actions needed to mark the inode as a verity
> +	 * inode, e.g. setting a bit in the on-disk inode.  The filesystem is
> +	 * also responsible for setting the S_VERITY flag in the VFS inode.
> +	 *
> +	 * i_rwsem is held for write.
> +	 *
> +	 * Return: 0 on success, -errno on failure
> +	 */
> +	int (*end_enable_verity)(struct file *filp, const void *desc,
> +				 size_t desc_size, u64 merkle_tree_size);
> +
>  	/**
>  	 * Get the verity descriptor of the given inode.
>  	 *
> @@ -50,6 +86,22 @@ struct fsverity_operations {
>  	 */
>  	struct page *(*read_merkle_tree_page)(struct inode *inode,
>  					      pgoff_t index);
> +
> +	/**
> +	 * Write a Merkle tree block to the given inode.
> +	 *
> +	 * @inode: the inode for which the Merkle tree is being built
> +	 * @buf: block to write
> +	 * @index: 0-based index of the block within the Merkle tree
> +	 * @log_blocksize: log base 2 of the Merkle tree block size
> +	 *
> +	 * This is only called between ->begin_enable_verity() and
> +	 * ->end_enable_verity().  i_rwsem is held for write.
> +	 *
> +	 * Return: 0 on success, -errno on failure
> +	 */
> +	int (*write_merkle_tree_block)(struct inode *inode, const void *buf,
> +				       u64 index, int log_blocksize);
>  };
>  
>  #ifdef CONFIG_FS_VERITY
> @@ -60,6 +112,10 @@ static inline struct fsverity_info *fsverity_get_info(const struct inode *inode)
>  	return READ_ONCE(inode->i_verity_info);
>  }
>  
> +/* enable.c */
> +
> +extern int fsverity_ioctl_enable(struct file *filp, const void __user *arg);
> +
>  /* open.c */
>  
>  extern int fsverity_file_open(struct inode *inode, struct file *filp);
> @@ -79,6 +135,14 @@ static inline struct fsverity_info *fsverity_get_info(const struct inode *inode)
>  	return NULL;
>  }
>  
> +/* enable.c */
> +
> +static inline int fsverity_ioctl_enable(struct file *filp,
> +					const void __user *arg)
> +{
> +	return -EOPNOTSUPP;
> +}
> +
>  /* open.c */
>  
>  static inline int fsverity_file_open(struct inode *inode, struct file *filp)
> -- 
> 2.22.0.410.gd8fdbe21b5-goog

^ permalink raw reply

* Re: [PATCH v5 09/16] fs-verity: add data verification hooks for ->readpages()
From: Jaegeuk Kim @ 2019-06-22 22:32 UTC (permalink / raw)
  To: Eric Biggers
  Cc: Theodore Y . Ts'o, Darrick J . Wong, linux-api, Dave Chinner,
	linux-f2fs-devel, linux-fscrypt, linux-fsdevel, linux-integrity,
	linux-ext4, Linus Torvalds, Christoph Hellwig, Victor Hsieh
In-Reply-To: <20190620205043.64350-10-ebiggers@kernel.org>

On 06/20, Eric Biggers wrote:
> From: Eric Biggers <ebiggers@google.com>
> 
> Add functions that verify data pages that have been read from a
> fs-verity file, against that file's Merkle tree.  These will be called
> from filesystems' ->readpage() and ->readpages() methods.
> 
> Since data verification can block, a workqueue is provided for these
> methods to enqueue verification work from their bio completion callback.
> 
> See the "Verifying data" section of
> Documentation/filesystems/fsverity.rst for more information.
> 
> Reviewed-by: Theodore Ts'o <tytso@mit.edu>

Reviewed-by: Jaegeuk Kim <jaegeuk@kernel.org>

> Signed-off-by: Eric Biggers <ebiggers@google.com>
> ---
>  fs/verity/Makefile           |   3 +-
>  fs/verity/fsverity_private.h |   5 +
>  fs/verity/init.c             |   8 +
>  fs/verity/open.c             |   6 +
>  fs/verity/verify.c           | 275 +++++++++++++++++++++++++++++++++++
>  include/linux/fsverity.h     |  56 +++++++
>  6 files changed, 352 insertions(+), 1 deletion(-)
>  create mode 100644 fs/verity/verify.c
> 
> diff --git a/fs/verity/Makefile b/fs/verity/Makefile
> index e6a8951c493a5e..7fa628cd5eba24 100644
> --- a/fs/verity/Makefile
> +++ b/fs/verity/Makefile
> @@ -2,4 +2,5 @@
>  
>  obj-$(CONFIG_FS_VERITY) += hash_algs.o \
>  			   init.o \
> -			   open.o
> +			   open.o \
> +			   verify.o
> diff --git a/fs/verity/fsverity_private.h b/fs/verity/fsverity_private.h
> index c79746ff335e14..eaa2b3b93bbf6b 100644
> --- a/fs/verity/fsverity_private.h
> +++ b/fs/verity/fsverity_private.h
> @@ -134,5 +134,10 @@ void fsverity_set_info(struct inode *inode, struct fsverity_info *vi);
>  void fsverity_free_info(struct fsverity_info *vi);
>  
>  int __init fsverity_init_info_cache(void);
> +void __init fsverity_exit_info_cache(void);
> +
> +/* verify.c */
> +
> +int __init fsverity_init_workqueue(void);
>  
>  #endif /* _FSVERITY_PRIVATE_H */
> diff --git a/fs/verity/init.c b/fs/verity/init.c
> index fff1fd6343357d..b593805aafcc89 100644
> --- a/fs/verity/init.c
> +++ b/fs/verity/init.c
> @@ -41,7 +41,15 @@ static int __init fsverity_init(void)
>  	if (err)
>  		return err;
>  
> +	err = fsverity_init_workqueue();
> +	if (err)
> +		goto err_exit_info_cache;
> +
>  	pr_debug("Initialized fs-verity\n");
>  	return 0;
> +
> +err_exit_info_cache:
> +	fsverity_exit_info_cache();
> +	return err;
>  }
>  late_initcall(fsverity_init)
> diff --git a/fs/verity/open.c b/fs/verity/open.c
> index 21ae0ef254a695..7a2cd000dc4f06 100644
> --- a/fs/verity/open.c
> +++ b/fs/verity/open.c
> @@ -338,3 +338,9 @@ int __init fsverity_init_info_cache(void)
>  		return -ENOMEM;
>  	return 0;
>  }
> +
> +void __init fsverity_exit_info_cache(void)
> +{
> +	kmem_cache_destroy(fsverity_info_cachep);
> +	fsverity_info_cachep = NULL;
> +}
> diff --git a/fs/verity/verify.c b/fs/verity/verify.c
> new file mode 100644
> index 00000000000000..2a0f9e2ebc9f16
> --- /dev/null
> +++ b/fs/verity/verify.c
> @@ -0,0 +1,275 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * fs/verity/verify.c: data verification functions, i.e. hooks for ->readpages()
> + *
> + * Copyright 2019 Google LLC
> + */
> +
> +#include "fsverity_private.h"
> +
> +#include <crypto/hash.h>
> +#include <linux/bio.h>
> +#include <linux/ratelimit.h>
> +
> +static struct workqueue_struct *fsverity_read_workqueue;
> +
> +/**
> + * hash_at_level() - compute the location of the block's hash at the given level
> + *
> + * @params:	(in) the Merkle tree parameters
> + * @dindex:	(in) the index of the data block being verified
> + * @level:	(in) the level of hash we want (0 is leaf level)
> + * @hindex:	(out) the index of the hash block containing the wanted hash
> + * @hoffset:	(out) the byte offset to the wanted hash within the hash block
> + */
> +static void hash_at_level(const struct merkle_tree_params *params,
> +			  pgoff_t dindex, unsigned int level, pgoff_t *hindex,
> +			  unsigned int *hoffset)
> +{
> +	pgoff_t position;
> +
> +	/* Offset of the hash within the level's region, in hashes */
> +	position = dindex >> (level * params->log_arity);
> +
> +	/* Index of the hash block in the tree overall */
> +	*hindex = params->level_start[level] + (position >> params->log_arity);
> +
> +	/* Offset of the wanted hash (in bytes) within the hash block */
> +	*hoffset = (position & ((1 << params->log_arity) - 1)) <<
> +		   (params->log_blocksize - params->log_arity);
> +}
> +
> +/* Extract a hash from a hash page */
> +static void extract_hash(struct page *hpage, unsigned int hoffset,
> +			 unsigned int hsize, u8 *out)
> +{
> +	void *virt = kmap_atomic(hpage);
> +
> +	memcpy(out, virt + hoffset, hsize);
> +	kunmap_atomic(virt);
> +}
> +
> +static inline int cmp_hashes(const struct fsverity_info *vi,
> +			     const u8 *want_hash, const u8 *real_hash,
> +			     pgoff_t index, int level)
> +{
> +	const unsigned int hsize = vi->tree_params.digest_size;
> +
> +	if (memcmp(want_hash, real_hash, hsize) == 0)
> +		return 0;
> +
> +	fsverity_err(vi->inode,
> +		     "FILE CORRUPTED! index=%lu, level=%d, want_hash=%s:%*phN, real_hash=%s:%*phN",
> +		     index, level,
> +		     vi->tree_params.hash_alg->name, hsize, want_hash,
> +		     vi->tree_params.hash_alg->name, hsize, real_hash);
> +	return -EBADMSG;
> +}
> +
> +/*
> + * Verify a single data page against the file's Merkle tree.
> + *
> + * In principle, we need to verify the entire path to the root node.  However,
> + * for efficiency the filesystem may cache the hash pages.  Therefore we need
> + * only ascend the tree until an already-verified page is seen, as indicated by
> + * the PageChecked bit being set; then verify the path to that page.
> + *
> + * This code currently only supports the case where the verity block size is
> + * equal to PAGE_SIZE.  Doing otherwise would be possible but tricky, since we
> + * wouldn't be able to use the PageChecked bit.
> + *
> + * Note that multiple processes may race to verify a hash page and mark it
> + * Checked, but it doesn't matter; the result will be the same either way.
> + *
> + * Return: true if the page is valid, else false.
> + */
> +static bool verify_page(struct inode *inode, const struct fsverity_info *vi,
> +			struct ahash_request *req, struct page *data_page)
> +{
> +	const struct merkle_tree_params *params = &vi->tree_params;
> +	const unsigned int hsize = params->digest_size;
> +	const pgoff_t index = data_page->index;
> +	int level;
> +	u8 _want_hash[FS_VERITY_MAX_DIGEST_SIZE];
> +	const u8 *want_hash;
> +	u8 real_hash[FS_VERITY_MAX_DIGEST_SIZE];
> +	struct page *hpages[FS_VERITY_MAX_LEVELS];
> +	unsigned int hoffsets[FS_VERITY_MAX_LEVELS];
> +	int err;
> +
> +	if (WARN_ON_ONCE(!PageLocked(data_page) || PageUptodate(data_page)))
> +		return false;
> +
> +	pr_debug_ratelimited("Verifying data page %lu...\n", index);
> +
> +	/*
> +	 * Starting at the leaf level, ascend the tree saving hash pages along
> +	 * the way until we find a verified hash page, indicated by PageChecked;
> +	 * or until we reach the root.
> +	 */
> +	for (level = 0; level < params->num_levels; level++) {
> +		pgoff_t hindex;
> +		unsigned int hoffset;
> +		struct page *hpage;
> +
> +		hash_at_level(params, index, level, &hindex, &hoffset);
> +
> +		pr_debug_ratelimited("Level %d: hindex=%lu, hoffset=%u\n",
> +				     level, hindex, hoffset);
> +
> +		hpage = inode->i_sb->s_vop->read_merkle_tree_page(inode,
> +								  hindex);
> +		if (IS_ERR(hpage)) {
> +			err = PTR_ERR(hpage);
> +			fsverity_err(inode,
> +				     "Error %d reading Merkle tree page %lu",
> +				     err, hindex);
> +			goto out;
> +		}
> +
> +		if (PageChecked(hpage)) {
> +			extract_hash(hpage, hoffset, hsize, _want_hash);
> +			want_hash = _want_hash;
> +			put_page(hpage);
> +			pr_debug_ratelimited("Hash page already checked, want %s:%*phN\n",
> +					     params->hash_alg->name,
> +					     hsize, want_hash);
> +			goto descend;
> +		}
> +		pr_debug_ratelimited("Hash page not yet checked\n");
> +		hpages[level] = hpage;
> +		hoffsets[level] = hoffset;
> +	}
> +
> +	want_hash = vi->root_hash;
> +	pr_debug("Want root hash: %s:%*phN\n",
> +		 params->hash_alg->name, hsize, want_hash);
> +descend:
> +	/* Descend the tree verifying hash pages */
> +	for (; level > 0; level--) {
> +		struct page *hpage = hpages[level - 1];
> +		unsigned int hoffset = hoffsets[level - 1];
> +
> +		err = fsverity_hash_page(params, inode, req, hpage, real_hash);
> +		if (err)
> +			goto out;
> +		err = cmp_hashes(vi, want_hash, real_hash, index, level - 1);
> +		if (err)
> +			goto out;
> +		SetPageChecked(hpage);
> +		extract_hash(hpage, hoffset, hsize, _want_hash);
> +		want_hash = _want_hash;
> +		put_page(hpage);
> +		pr_debug("Verified hash page at level %d, now want %s:%*phN\n",
> +			 level - 1, params->hash_alg->name, hsize, want_hash);
> +	}
> +
> +	/* Finally, verify the data page */
> +	err = fsverity_hash_page(params, inode, req, data_page, real_hash);
> +	if (err)
> +		goto out;
> +	err = cmp_hashes(vi, want_hash, real_hash, index, -1);
> +out:
> +	for (; level > 0; level--)
> +		put_page(hpages[level - 1]);
> +
> +	return err == 0;
> +}
> +
> +/**
> + * fsverity_verify_page - verify a data page
> + *
> + * Verify a page that has just been read from a verity file.  The page must be a
> + * pagecache page that is still locked and not yet uptodate.
> + *
> + * Return: true if the page is valid, else false.
> + */
> +bool fsverity_verify_page(struct page *page)
> +{
> +	struct inode *inode = page->mapping->host;
> +	const struct fsverity_info *vi = inode->i_verity_info;
> +	struct ahash_request *req;
> +	bool valid;
> +
> +	req = ahash_request_alloc(vi->tree_params.hash_alg->tfm, GFP_NOFS);
> +	if (unlikely(!req))
> +		return false;
> +
> +	valid = verify_page(inode, vi, req, page);
> +
> +	ahash_request_free(req);
> +
> +	return valid;
> +}
> +EXPORT_SYMBOL_GPL(fsverity_verify_page);
> +
> +#ifdef CONFIG_BLOCK
> +/**
> + * fsverity_verify_bio - verify a 'read' bio that has just completed
> + *
> + * Verify a set of pages that have just been read from a verity file.  The pages
> + * must be pagecache pages that are still locked and not yet uptodate.  Pages
> + * that fail verification are set to the Error state.  Verification is skipped
> + * for pages already in the Error state, e.g. due to fscrypt decryption failure.
> + *
> + * This is a helper function for use by the ->readpages() method of filesystems
> + * that issue bios to read data directly into the page cache.  Filesystems that
> + * populate the page cache without issuing bios (e.g. non block-based
> + * filesystems) must instead call fsverity_verify_page() directly on each page.
> + * All filesystems must also call fsverity_verify_page() on holes.
> + */
> +void fsverity_verify_bio(struct bio *bio)
> +{
> +	struct inode *inode = bio_first_page_all(bio)->mapping->host;
> +	const struct fsverity_info *vi = inode->i_verity_info;
> +	struct ahash_request *req;
> +	struct bio_vec *bv;
> +	struct bvec_iter_all iter_all;
> +
> +	req = ahash_request_alloc(vi->tree_params.hash_alg->tfm, GFP_NOFS);
> +	if (unlikely(!req)) {
> +		bio_for_each_segment_all(bv, bio, iter_all)
> +			SetPageError(bv->bv_page);
> +		return;
> +	}
> +
> +	bio_for_each_segment_all(bv, bio, iter_all) {
> +		struct page *page = bv->bv_page;
> +
> +		if (!PageError(page) && !verify_page(inode, vi, req, page))
> +			SetPageError(page);
> +	}
> +
> +	ahash_request_free(req);
> +}
> +EXPORT_SYMBOL_GPL(fsverity_verify_bio);
> +#endif /* CONFIG_BLOCK */
> +
> +/**
> + * fsverity_enqueue_verify_work - enqueue work on the fs-verity workqueue
> + *
> + * Enqueue verification work for asynchronous processing.
> + */
> +void fsverity_enqueue_verify_work(struct work_struct *work)
> +{
> +	queue_work(fsverity_read_workqueue, work);
> +}
> +EXPORT_SYMBOL_GPL(fsverity_enqueue_verify_work);
> +
> +int __init fsverity_init_workqueue(void)
> +{
> +	/*
> +	 * Use an unbound workqueue to allow bios to be verified in parallel
> +	 * even when they happen to complete on the same CPU.  This sacrifices
> +	 * locality, but it's worthwhile since hashing is CPU-intensive.
> +	 *
> +	 * Also use a high-priority workqueue to prioritize verification work,
> +	 * which blocks reads from completing, over regular application tasks.
> +	 */
> +	fsverity_read_workqueue = alloc_workqueue("fsverity_read_queue",
> +						  WQ_UNBOUND | WQ_HIGHPRI,
> +						  num_online_cpus());
> +	if (!fsverity_read_workqueue)
> +		return -ENOMEM;
> +	return 0;
> +}
> diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h
> index cbcc358d073652..ecd47e748c7f64 100644
> --- a/include/linux/fsverity.h
> +++ b/include/linux/fsverity.h
> @@ -33,6 +33,23 @@ struct fsverity_operations {
>  	 */
>  	int (*get_verity_descriptor)(struct inode *inode, void *buf,
>  				     size_t bufsize);
> +
> +	/**
> +	 * Read a Merkle tree page of the given inode.
> +	 *
> +	 * @inode: the inode
> +	 * @index: 0-based index of the page within the Merkle tree
> +	 *
> +	 * This can be called at any time on an open verity file, as well as
> +	 * between ->begin_enable_verity() and ->end_enable_verity().  It may be
> +	 * called by multiple processes concurrently, even with the same page.
> +	 *
> +	 * Note that this must retrieve a *page*, not necessarily a *block*.
> +	 *
> +	 * Return: the page on success, ERR_PTR() on failure
> +	 */
> +	struct page *(*read_merkle_tree_page)(struct inode *inode,
> +					      pgoff_t index);
>  };
>  
>  #ifdef CONFIG_FS_VERITY
> @@ -49,6 +66,12 @@ extern int fsverity_file_open(struct inode *inode, struct file *filp);
>  extern int fsverity_prepare_setattr(struct dentry *dentry, struct iattr *attr);
>  extern void fsverity_cleanup_inode(struct inode *inode);
>  
> +/* verify.c */
> +
> +extern bool fsverity_verify_page(struct page *page);
> +extern void fsverity_verify_bio(struct bio *bio);
> +extern void fsverity_enqueue_verify_work(struct work_struct *work);
> +
>  #else /* !CONFIG_FS_VERITY */
>  
>  static inline struct fsverity_info *fsverity_get_info(const struct inode *inode)
> @@ -73,6 +96,39 @@ static inline void fsverity_cleanup_inode(struct inode *inode)
>  {
>  }
>  
> +/* verify.c */
> +
> +static inline bool fsverity_verify_page(struct page *page)
> +{
> +	WARN_ON(1);
> +	return false;
> +}
> +
> +static inline void fsverity_verify_bio(struct bio *bio)
> +{
> +	WARN_ON(1);
> +}
> +
> +static inline void fsverity_enqueue_verify_work(struct work_struct *work)
> +{
> +	WARN_ON(1);
> +}
> +
>  #endif	/* !CONFIG_FS_VERITY */
>  
> +/**
> + * fsverity_active() - do reads from the inode need to go through fs-verity?
> + *
> + * This checks whether ->i_verity_info has been set.
> + *
> + * Filesystems call this from ->readpages() to check whether the pages need to
> + * be verified or not.  Don't use IS_VERITY() for this purpose; it's subject to
> + * a race condition where the file is being read concurrently with
> + * FS_IOC_ENABLE_VERITY completing.  (S_VERITY is set before ->i_verity_info.)
> + */
> +static inline bool fsverity_active(const struct inode *inode)
> +{
> +	return fsverity_get_info(inode) != NULL;
> +}
> +
>  #endif	/* _LINUX_FSVERITY_H */
> -- 
> 2.22.0.410.gd8fdbe21b5-goog

^ permalink raw reply

* Re: [PATCH v5 08/16] fs-verity: add the hook for file ->setattr()
From: Jaegeuk Kim @ 2019-06-22 22:28 UTC (permalink / raw)
  To: Eric Biggers
  Cc: Theodore Y . Ts'o, Darrick J . Wong, linux-api, Dave Chinner,
	linux-f2fs-devel, linux-fscrypt, linux-fsdevel, linux-integrity,
	linux-ext4, Linus Torvalds, Christoph Hellwig, Victor Hsieh
In-Reply-To: <20190620205043.64350-9-ebiggers@kernel.org>

On 06/20, Eric Biggers wrote:
> From: Eric Biggers <ebiggers@google.com>
> 
> Add a function fsverity_prepare_setattr() which filesystems that support
> fs-verity must call to deny truncates of verity files.
> 
> Reviewed-by: Theodore Ts'o <tytso@mit.edu>

Reviewed-by: Jaegeuk Kim <jaegeuk@kernel.org>

> Signed-off-by: Eric Biggers <ebiggers@google.com>
> ---
>  fs/verity/open.c         | 21 +++++++++++++++++++++
>  include/linux/fsverity.h |  7 +++++++
>  2 files changed, 28 insertions(+)
> 
> diff --git a/fs/verity/open.c b/fs/verity/open.c
> index 3a3bb27e23f5e3..21ae0ef254a695 100644
> --- a/fs/verity/open.c
> +++ b/fs/verity/open.c
> @@ -296,6 +296,27 @@ int fsverity_file_open(struct inode *inode, struct file *filp)
>  }
>  EXPORT_SYMBOL_GPL(fsverity_file_open);
>  
> +/**
> + * fsverity_prepare_setattr - prepare to change a verity inode's attributes
> + * @dentry: dentry through which the inode is being changed
> + * @attr: attributes to change
> + *
> + * Verity files are immutable, so deny truncates.  This isn't covered by the
> + * open-time check because sys_truncate() takes a path, not a file descriptor.
> + *
> + * Return: 0 on success, -errno on failure
> + */
> +int fsverity_prepare_setattr(struct dentry *dentry, struct iattr *attr)
> +{
> +	if (IS_VERITY(d_inode(dentry)) && (attr->ia_valid & ATTR_SIZE)) {
> +		pr_debug("Denying truncate of verity file (ino %lu)\n",
> +			 d_inode(dentry)->i_ino);
> +		return -EPERM;
> +	}
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(fsverity_prepare_setattr);
> +
>  /**
>   * fsverity_cleanup_inode - free the inode's verity info, if present
>   *
> diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h
> index 1372c236c8770c..cbcc358d073652 100644
> --- a/include/linux/fsverity.h
> +++ b/include/linux/fsverity.h
> @@ -46,6 +46,7 @@ static inline struct fsverity_info *fsverity_get_info(const struct inode *inode)
>  /* open.c */
>  
>  extern int fsverity_file_open(struct inode *inode, struct file *filp);
> +extern int fsverity_prepare_setattr(struct dentry *dentry, struct iattr *attr);
>  extern void fsverity_cleanup_inode(struct inode *inode);
>  
>  #else /* !CONFIG_FS_VERITY */
> @@ -62,6 +63,12 @@ static inline int fsverity_file_open(struct inode *inode, struct file *filp)
>  	return IS_VERITY(inode) ? -EOPNOTSUPP : 0;
>  }
>  
> +static inline int fsverity_prepare_setattr(struct dentry *dentry,
> +					   struct iattr *attr)
> +{
> +	return IS_VERITY(d_inode(dentry)) ? -EOPNOTSUPP : 0;
> +}
> +
>  static inline void fsverity_cleanup_inode(struct inode *inode)
>  {
>  }
> -- 
> 2.22.0.410.gd8fdbe21b5-goog

^ permalink raw reply

* Re: [PATCH v5 07/16] fs-verity: add the hook for file ->open()
From: Jaegeuk Kim @ 2019-06-22 22:28 UTC (permalink / raw)
  To: Eric Biggers
  Cc: Theodore Y . Ts'o, Darrick J . Wong, linux-api, Dave Chinner,
	linux-f2fs-devel, linux-fscrypt, linux-fsdevel, linux-integrity,
	linux-ext4, Linus Torvalds, Christoph Hellwig, Victor Hsieh
In-Reply-To: <20190620205043.64350-8-ebiggers@kernel.org>

On 06/20, Eric Biggers wrote:
> From: Eric Biggers <ebiggers@google.com>
> 
> Add the fsverity_file_open() function, which prepares an fs-verity file
> to be read from.  If not already done, it loads the fs-verity descriptor
> from the filesystem and sets up an fsverity_info structure for the inode
> which describes the Merkle tree and contains the file measurement.  It
> also denies all attempts to open verity files for writing.
> 
> This commit also begins the include/linux/fsverity.h header, which
> declares the interface between fs/verity/ and filesystems.
> 
> Reviewed-by: Theodore Ts'o <tytso@mit.edu>

Reviewed-by: Jaegeuk Kim <jaegeuk@kernel.org>

> Signed-off-by: Eric Biggers <ebiggers@google.com>
> ---
>  fs/verity/Makefile           |   3 +-
>  fs/verity/fsverity_private.h |  54 +++++-
>  fs/verity/init.c             |   6 +
>  fs/verity/open.c             | 319 +++++++++++++++++++++++++++++++++++
>  include/linux/fsverity.h     |  71 ++++++++
>  5 files changed, 450 insertions(+), 3 deletions(-)
>  create mode 100644 fs/verity/open.c
>  create mode 100644 include/linux/fsverity.h
> 
> diff --git a/fs/verity/Makefile b/fs/verity/Makefile
> index 398f3f85fa184b..e6a8951c493a5e 100644
> --- a/fs/verity/Makefile
> +++ b/fs/verity/Makefile
> @@ -1,4 +1,5 @@
>  # SPDX-License-Identifier: GPL-2.0
>  
>  obj-$(CONFIG_FS_VERITY) += hash_algs.o \
> -			   init.o
> +			   init.o \
> +			   open.o
> diff --git a/fs/verity/fsverity_private.h b/fs/verity/fsverity_private.h
> index 9697aaebb5dc1f..c79746ff335e14 100644
> --- a/fs/verity/fsverity_private.h
> +++ b/fs/verity/fsverity_private.h
> @@ -15,8 +15,7 @@
>  #define pr_fmt(fmt) "fs-verity: " fmt
>  
>  #include <crypto/sha.h>
> -#include <linux/fs.h>
> -#include <uapi/linux/fsverity.h>
> +#include <linux/fsverity.h>
>  
>  struct ahash_request;
>  
> @@ -59,6 +58,40 @@ struct merkle_tree_params {
>  	u64 level_start[FS_VERITY_MAX_LEVELS];
>  };
>  
> +/**
> + * fsverity_info - cached verity metadata for an inode
> + *
> + * When a verity file is first opened, an instance of this struct is allocated
> + * and stored in ->i_verity_info; it remains until the inode is evicted.  It
> + * caches information about the Merkle tree that's needed to efficiently verify
> + * data read from the file.  It also caches the file measurement.  The Merkle
> + * tree pages themselves are not cached here, but the filesystem may cache them.
> + */
> +struct fsverity_info {
> +	struct merkle_tree_params tree_params;
> +	u8 root_hash[FS_VERITY_MAX_DIGEST_SIZE];
> +	u8 measurement[FS_VERITY_MAX_DIGEST_SIZE];
> +	const struct inode *inode;
> +};
> +
> +/*
> + * Merkle tree properties.  The file measurement is the hash of this structure.
> + */
> +struct fsverity_descriptor {
> +	__u8 version;		/* must be 1 */
> +	__u8 hash_algorithm;	/* Merkle tree hash algorithm */
> +	__u8 log_blocksize;	/* log2 of size of data and tree blocks */
> +	__u8 salt_size;		/* size of salt in bytes; 0 if none */
> +	__le32 sig_size;	/* reserved, must be 0 */
> +	__le64 data_size;	/* size of file the Merkle tree is built over */
> +	__u8 root_hash[64];	/* Merkle tree root hash */
> +	__u8 salt[32];		/* salt prepended to each hashed block */
> +	__u8 __reserved[144];	/* must be 0's */
> +};
> +
> +/* Arbitrary limit to bound the kmalloc() size.  Can be changed. */
> +#define FS_VERITY_MAX_DESCRIPTOR_SIZE	16384
> +
>  /* hash_algs.c */
>  
>  extern struct fsverity_hash_alg fsverity_hash_algs[];
> @@ -85,4 +118,21 @@ fsverity_msg(const struct inode *inode, const char *level,
>  #define fsverity_err(inode, fmt, ...)		\
>  	fsverity_msg((inode), KERN_ERR, fmt, ##__VA_ARGS__)
>  
> +/* open.c */
> +
> +int fsverity_init_merkle_tree_params(struct merkle_tree_params *params,
> +				     const struct inode *inode,
> +				     unsigned int hash_algorithm,
> +				     unsigned int log_blocksize,
> +				     const u8 *salt, size_t salt_size);
> +
> +struct fsverity_info *fsverity_create_info(const struct inode *inode,
> +					   const void *desc, size_t desc_size);
> +
> +void fsverity_set_info(struct inode *inode, struct fsverity_info *vi);
> +
> +void fsverity_free_info(struct fsverity_info *vi);
> +
> +int __init fsverity_init_info_cache(void);
> +
>  #endif /* _FSVERITY_PRIVATE_H */
> diff --git a/fs/verity/init.c b/fs/verity/init.c
> index 40076bbe452a48..fff1fd6343357d 100644
> --- a/fs/verity/init.c
> +++ b/fs/verity/init.c
> @@ -33,8 +33,14 @@ void fsverity_msg(const struct inode *inode, const char *level,
>  
>  static int __init fsverity_init(void)
>  {
> +	int err;
> +
>  	fsverity_check_hash_algs();
>  
> +	err = fsverity_init_info_cache();
> +	if (err)
> +		return err;
> +
>  	pr_debug("Initialized fs-verity\n");
>  	return 0;
>  }
> diff --git a/fs/verity/open.c b/fs/verity/open.c
> new file mode 100644
> index 00000000000000..3a3bb27e23f5e3
> --- /dev/null
> +++ b/fs/verity/open.c
> @@ -0,0 +1,319 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * fs/verity/open.c: opening fs-verity files
> + *
> + * Copyright 2019 Google LLC
> + */
> +
> +#include "fsverity_private.h"
> +
> +#include <linux/slab.h>
> +
> +static struct kmem_cache *fsverity_info_cachep;
> +
> +/**
> + * fsverity_init_merkle_tree_params() - initialize Merkle tree parameters
> + * @params: the parameters struct to initialize
> + * @inode: the inode for which the Merkle tree is being built
> + * @hash_algorithm: number of hash algorithm to use
> + * @log_blocksize: log base 2 of block size to use
> + * @salt: pointer to salt (optional)
> + * @salt_size: size of salt, possibly 0
> + *
> + * Validate the hash algorithm and block size, then compute the tree topology
> + * (num levels, num blocks in each level, etc.) and initialize @params.
> + *
> + * Return: 0 on success, -errno on failure
> + */
> +int fsverity_init_merkle_tree_params(struct merkle_tree_params *params,
> +				     const struct inode *inode,
> +				     unsigned int hash_algorithm,
> +				     unsigned int log_blocksize,
> +				     const u8 *salt, size_t salt_size)
> +{
> +	const struct fsverity_hash_alg *hash_alg;
> +	int err;
> +	u64 blocks;
> +	u64 offset;
> +	int level;
> +
> +	memset(params, 0, sizeof(*params));
> +
> +	hash_alg = fsverity_get_hash_alg(inode, hash_algorithm);
> +	if (IS_ERR(hash_alg))
> +		return PTR_ERR(hash_alg);
> +	params->hash_alg = hash_alg;
> +	params->digest_size = hash_alg->digest_size;
> +
> +	params->hashstate = fsverity_prepare_hash_state(hash_alg, salt,
> +							salt_size);
> +	if (IS_ERR(params->hashstate)) {
> +		err = PTR_ERR(params->hashstate);
> +		params->hashstate = NULL;
> +		fsverity_err(inode, "Error %d preparing hash state", err);
> +		goto out_err;
> +	}
> +
> +	if (log_blocksize != PAGE_SHIFT) {
> +		fsverity_warn(inode, "Unsupported log_blocksize: %u",
> +			      log_blocksize);
> +		err = -EINVAL;
> +		goto out_err;
> +	}
> +	params->log_blocksize = log_blocksize;
> +	params->block_size = 1 << log_blocksize;
> +
> +	if (WARN_ON(!is_power_of_2(params->digest_size))) {
> +		err = -EINVAL;
> +		goto out_err;
> +	}
> +	if (params->block_size < 2 * params->digest_size) {
> +		fsverity_warn(inode,
> +			      "Merkle tree block size (%u) too small for hash algorithm \"%s\"",
> +			      params->block_size, hash_alg->name);
> +		err = -EINVAL;
> +		goto out_err;
> +	}
> +	params->log_arity = params->log_blocksize - ilog2(params->digest_size);
> +	params->hashes_per_block = 1 << params->log_arity;
> +
> +	pr_debug("Merkle tree uses %s with %u-byte blocks (%u hashes/block), salt=%*phN\n",
> +		 hash_alg->name, params->block_size, params->hashes_per_block,
> +		 (int)salt_size, salt);
> +
> +	/*
> +	 * Compute the number of levels in the Merkle tree and create a map from
> +	 * level to the starting block of that level.  Level 'num_levels - 1' is
> +	 * the root and is stored first.  Level 0 is the level directly "above"
> +	 * the data blocks and is stored last.
> +	 */
> +
> +	/* Compute number of levels and the number of blocks in each level */
> +	blocks = (inode->i_size + params->block_size - 1) >> log_blocksize;
> +	pr_debug("Data is %lld bytes (%llu blocks)\n", inode->i_size, blocks);
> +	while (blocks > 1) {
> +		if (params->num_levels >= FS_VERITY_MAX_LEVELS) {
> +			fsverity_err(inode, "Too many levels in Merkle tree");
> +			err = -EINVAL;
> +			goto out_err;
> +		}
> +		blocks = (blocks + params->hashes_per_block - 1) >>
> +			 params->log_arity;
> +		/* temporarily using level_start[] to store blocks in level */
> +		params->level_start[params->num_levels++] = blocks;
> +	}
> +
> +	/* Compute the starting block of each level */
> +	offset = 0;
> +	for (level = (int)params->num_levels - 1; level >= 0; level--) {
> +		blocks = params->level_start[level];
> +		params->level_start[level] = offset;
> +		pr_debug("Level %d is %llu blocks starting at index %llu\n",
> +			 level, blocks, offset);
> +		offset += blocks;
> +	}
> +
> +	params->tree_size = offset << log_blocksize;
> +	return 0;
> +
> +out_err:
> +	kfree(params->hashstate);
> +	memset(params, 0, sizeof(*params));
> +	return err;
> +}
> +
> +/* Compute the file measurement by hashing the fsverity_descriptor. */
> +static int compute_file_measurement(const struct fsverity_hash_alg *hash_alg,
> +				    const struct fsverity_descriptor *desc,
> +				    u8 *measurement)
> +{
> +	return fsverity_hash_buffer(hash_alg, desc, sizeof(*desc), measurement);
> +}
> +
> +/*
> + * Validate the given fsverity_descriptor and create a new fsverity_info from
> + * it.
> + */
> +struct fsverity_info *fsverity_create_info(const struct inode *inode,
> +					   const void *_desc, size_t desc_size)
> +{
> +	const struct fsverity_descriptor *desc = _desc;
> +	struct fsverity_info *vi;
> +	int err;
> +
> +	if (desc_size < sizeof(*desc)) {
> +		fsverity_err(inode, "Unrecognized descriptor size (%zu)",
> +			     desc_size);
> +		return ERR_PTR(-EINVAL);
> +	}
> +
> +	if (desc->version != 1) {
> +		fsverity_err(inode, "Unrecognized descriptor version: %u",
> +			     desc->version);
> +		return ERR_PTR(-EINVAL);
> +	}
> +
> +	if (desc->sig_size ||
> +	    memchr_inv(desc->__reserved, 0, sizeof(desc->__reserved))) {
> +		fsverity_err(inode, "Reserved bits set in descriptor");
> +		return ERR_PTR(-EINVAL);
> +	}
> +
> +	if (desc->salt_size > sizeof(desc->salt)) {
> +		fsverity_err(inode, "Invalid salt_size: %u", desc->salt_size);
> +		return ERR_PTR(-EINVAL);
> +	}
> +
> +	if (le64_to_cpu(desc->data_size) != inode->i_size) {
> +		fsverity_err(inode,
> +			     "Wrong data_size: %llu (desc) != %lld (inode)",
> +			     le64_to_cpu(desc->data_size), inode->i_size);
> +		return ERR_PTR(-EINVAL);
> +	}
> +
> +	vi = kmem_cache_zalloc(fsverity_info_cachep, GFP_KERNEL);
> +	if (!vi)
> +		return ERR_PTR(-ENOMEM);
> +	vi->inode = inode;
> +
> +	err = fsverity_init_merkle_tree_params(&vi->tree_params, inode,
> +					       desc->hash_algorithm,
> +					       desc->log_blocksize,
> +					       desc->salt, desc->salt_size);
> +	if (err) {
> +		fsverity_err(inode,
> +			     "Error %d initializing Merkle tree parameters",
> +			     err);
> +		goto out;
> +	}
> +
> +	memcpy(vi->root_hash, desc->root_hash, vi->tree_params.digest_size);
> +
> +	err = compute_file_measurement(vi->tree_params.hash_alg, desc,
> +				       vi->measurement);
> +	if (err) {
> +		fsverity_err(vi->inode, "Error %d computing file measurement",
> +			     err);
> +		goto out;
> +	}
> +	pr_debug("Computed file measurement: %s:%*phN\n",
> +		 vi->tree_params.hash_alg->name,
> +		 vi->tree_params.digest_size, vi->measurement);
> +out:
> +	if (err) {
> +		fsverity_free_info(vi);
> +		vi = ERR_PTR(err);
> +	}
> +	return vi;
> +}
> +
> +void fsverity_set_info(struct inode *inode, struct fsverity_info *vi)
> +{
> +	/*
> +	 * Multiple processes may race to set ->i_verity_info, so use cmpxchg.
> +	 * This pairs with the READ_ONCE() in fsverity_get_info().
> +	 */
> +	if (cmpxchg_release(&inode->i_verity_info, NULL, vi) != NULL)
> +		fsverity_free_info(vi);
> +}
> +
> +void fsverity_free_info(struct fsverity_info *vi)
> +{
> +	if (!vi)
> +		return;
> +	kfree(vi->tree_params.hashstate);
> +	kmem_cache_free(fsverity_info_cachep, vi);
> +}
> +
> +/* Ensure the inode has an ->i_verity_info */
> +static int ensure_verity_info(struct inode *inode)
> +{
> +	struct fsverity_info *vi = fsverity_get_info(inode);
> +	struct fsverity_descriptor *desc;
> +	int res;
> +
> +	if (vi)
> +		return 0;
> +
> +	res = inode->i_sb->s_vop->get_verity_descriptor(inode, NULL, 0);
> +	if (res < 0) {
> +		fsverity_err(inode,
> +			     "Error %d getting verity descriptor size", res);
> +		return res;
> +	}
> +	if (res > FS_VERITY_MAX_DESCRIPTOR_SIZE) {
> +		fsverity_err(inode, "Verity descriptor is too large (%d bytes)",
> +			     res);
> +		return -EMSGSIZE;
> +	}
> +	desc = kmalloc(res, GFP_KERNEL);
> +	if (!desc)
> +		return -ENOMEM;
> +	res = inode->i_sb->s_vop->get_verity_descriptor(inode, desc, res);
> +	if (res < 0) {
> +		fsverity_err(inode, "Error %d reading verity descriptor", res);
> +		goto out_free_desc;
> +	}
> +
> +	vi = fsverity_create_info(inode, desc, res);
> +	if (IS_ERR(vi)) {
> +		res = PTR_ERR(vi);
> +		goto out_free_desc;
> +	}
> +
> +	fsverity_set_info(inode, vi);
> +	res = 0;
> +out_free_desc:
> +	kfree(desc);
> +	return res;
> +}
> +
> +/**
> + * fsverity_file_open - prepare to open a verity file
> + * @inode: the inode being opened
> + * @filp: the struct file being set up
> + *
> + * When opening a verity file, deny the open if it is for writing.  Otherwise,
> + * set up the inode's ->i_verity_info if not already done.
> + *
> + * When combined with fscrypt, this must be called after fscrypt_file_open().
> + * Otherwise, we won't have the key set up to decrypt the verity metadata.
> + *
> + * Return: 0 on success, -errno on failure
> + */
> +int fsverity_file_open(struct inode *inode, struct file *filp)
> +{
> +	if (!IS_VERITY(inode))
> +		return 0;
> +
> +	if (filp->f_mode & FMODE_WRITE) {
> +		pr_debug("Denying opening verity file (ino %lu) for write\n",
> +			 inode->i_ino);
> +		return -EPERM;
> +	}
> +
> +	return ensure_verity_info(inode);
> +}
> +EXPORT_SYMBOL_GPL(fsverity_file_open);
> +
> +/**
> + * fsverity_cleanup_inode - free the inode's verity info, if present
> + *
> + * Filesystems must call this on inode eviction to free ->i_verity_info.
> + */
> +void fsverity_cleanup_inode(struct inode *inode)
> +{
> +	fsverity_free_info(inode->i_verity_info);
> +	inode->i_verity_info = NULL;
> +}
> +EXPORT_SYMBOL_GPL(fsverity_cleanup_inode);
> +
> +int __init fsverity_init_info_cache(void)
> +{
> +	fsverity_info_cachep = KMEM_CACHE_USERCOPY(fsverity_info,
> +						   SLAB_RECLAIM_ACCOUNT,
> +						   measurement);
> +	if (!fsverity_info_cachep)
> +		return -ENOMEM;
> +	return 0;
> +}
> diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h
> new file mode 100644
> index 00000000000000..1372c236c8770c
> --- /dev/null
> +++ b/include/linux/fsverity.h
> @@ -0,0 +1,71 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * fs-verity: read-only file-based authenticity protection
> + *
> + * This header declares the interface between the fs/verity/ support layer and
> + * filesystems that support fs-verity.
> + *
> + * Copyright 2019 Google LLC
> + */
> +
> +#ifndef _LINUX_FSVERITY_H
> +#define _LINUX_FSVERITY_H
> +
> +#include <linux/fs.h>
> +#include <uapi/linux/fsverity.h>
> +
> +/* Verity operations for filesystems */
> +struct fsverity_operations {
> +
> +	/**
> +	 * Get the verity descriptor of the given inode.
> +	 *
> +	 * @inode: an inode with the S_VERITY flag set
> +	 * @buf: buffer in which to place the verity descriptor
> +	 * @bufsize: size of @buf, or 0 to retrieve the size only
> +	 *
> +	 * If bufsize == 0, then the size of the verity descriptor is returned.
> +	 * Otherwise the verity descriptor is written to 'buf' and its actual
> +	 * size is returned; -ERANGE is returned if it's too large.  This may be
> +	 * called by multiple processes concurrently on the same inode.
> +	 *
> +	 * Return: the size on success, -errno on failure
> +	 */
> +	int (*get_verity_descriptor)(struct inode *inode, void *buf,
> +				     size_t bufsize);
> +};
> +
> +#ifdef CONFIG_FS_VERITY
> +
> +static inline struct fsverity_info *fsverity_get_info(const struct inode *inode)
> +{
> +	/* pairs with the cmpxchg_release() in fsverity_set_info() */
> +	return READ_ONCE(inode->i_verity_info);
> +}
> +
> +/* open.c */
> +
> +extern int fsverity_file_open(struct inode *inode, struct file *filp);
> +extern void fsverity_cleanup_inode(struct inode *inode);
> +
> +#else /* !CONFIG_FS_VERITY */
> +
> +static inline struct fsverity_info *fsverity_get_info(const struct inode *inode)
> +{
> +	return NULL;
> +}
> +
> +/* open.c */
> +
> +static inline int fsverity_file_open(struct inode *inode, struct file *filp)
> +{
> +	return IS_VERITY(inode) ? -EOPNOTSUPP : 0;
> +}
> +
> +static inline void fsverity_cleanup_inode(struct inode *inode)
> +{
> +}
> +
> +#endif	/* !CONFIG_FS_VERITY */
> +
> +#endif	/* _LINUX_FSVERITY_H */
> -- 
> 2.22.0.410.gd8fdbe21b5-goog

^ permalink raw reply

* Re: [PATCH v5 06/16] fs-verity: add inode and superblock fields
From: Jaegeuk Kim @ 2019-06-22 22:18 UTC (permalink / raw)
  To: Eric Biggers
  Cc: Theodore Y . Ts'o, Darrick J . Wong, linux-api, Dave Chinner,
	linux-f2fs-devel, linux-fscrypt, linux-fsdevel, linux-integrity,
	linux-ext4, Linus Torvalds, Christoph Hellwig, Victor Hsieh
In-Reply-To: <20190620205043.64350-7-ebiggers@kernel.org>

On 06/20, Eric Biggers wrote:
> From: Eric Biggers <ebiggers@google.com>
> 
> Analogous to fs/crypto/, add fields to the VFS inode and superblock for
> use by the fs/verity/ support layer:
> 
> - ->s_vop: points to the fsverity_operations if the filesystem supports
>   fs-verity, otherwise is NULL.
> 
> - ->i_verity_info: points to cached fs-verity information for the inode
>   after someone opens it, otherwise is NULL.
> 
> - S_VERITY: bit in ->i_flags that identifies verity inodes, even when
>   they haven't been opened yet and thus still have NULL ->i_verity_info.
> 
> Reviewed-by: Theodore Ts'o <tytso@mit.edu>

Reviewed-by: Jaegeuk Kim <jaegeuk@kernel.org>

> Signed-off-by: Eric Biggers <ebiggers@google.com>
> ---
>  include/linux/fs.h | 11 +++++++++++
>  1 file changed, 11 insertions(+)
> 
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index f7fdfe93e25d3e..a80a192cdcf285 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -64,6 +64,8 @@ struct workqueue_struct;
>  struct iov_iter;
>  struct fscrypt_info;
>  struct fscrypt_operations;
> +struct fsverity_info;
> +struct fsverity_operations;
>  struct fs_context;
>  struct fs_parameter_description;
>  
> @@ -723,6 +725,10 @@ struct inode {
>  	struct fscrypt_info	*i_crypt_info;
>  #endif
>  
> +#ifdef CONFIG_FS_VERITY
> +	struct fsverity_info	*i_verity_info;
> +#endif
> +
>  	void			*i_private; /* fs or device private pointer */
>  } __randomize_layout;
>  
> @@ -1429,6 +1435,9 @@ struct super_block {
>  	const struct xattr_handler **s_xattr;
>  #ifdef CONFIG_FS_ENCRYPTION
>  	const struct fscrypt_operations	*s_cop;
> +#endif
> +#ifdef CONFIG_FS_VERITY
> +	const struct fsverity_operations *s_vop;
>  #endif
>  	struct hlist_bl_head	s_roots;	/* alternate root dentries for NFS */
>  	struct list_head	s_mounts;	/* list of mounts; _not_ for fs use */
> @@ -1964,6 +1973,7 @@ struct super_operations {
>  #endif
>  #define S_ENCRYPTED	16384	/* Encrypted file (using fs/crypto/) */
>  #define S_CASEFOLD	32768	/* Casefolded file */
> +#define S_VERITY	65536	/* Verity file (using fs/verity/) */
>  
>  /*
>   * Note that nosuid etc flags are inode-specific: setting some file-system
> @@ -2005,6 +2015,7 @@ static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags
>  #define IS_DAX(inode)		((inode)->i_flags & S_DAX)
>  #define IS_ENCRYPTED(inode)	((inode)->i_flags & S_ENCRYPTED)
>  #define IS_CASEFOLDED(inode)	((inode)->i_flags & S_CASEFOLD)
> +#define IS_VERITY(inode)	((inode)->i_flags & S_VERITY)
>  
>  #define IS_WHITEOUT(inode)	(S_ISCHR(inode->i_mode) && \
>  				 (inode)->i_rdev == WHITEOUT_DEV)
> -- 
> 2.22.0.410.gd8fdbe21b5-goog

^ permalink raw reply

* Re: [PATCH v5 05/16] fs-verity: add Kconfig and the helper functions for hashing
From: Jaegeuk Kim @ 2019-06-22 22:17 UTC (permalink / raw)
  To: Eric Biggers
  Cc: Theodore Y . Ts'o, Darrick J . Wong, linux-api, Dave Chinner,
	linux-f2fs-devel, linux-fscrypt, linux-fsdevel, linux-integrity,
	linux-ext4, Linus Torvalds, Christoph Hellwig, Victor Hsieh
In-Reply-To: <20190620205043.64350-6-ebiggers@kernel.org>

On 06/20, Eric Biggers wrote:
> From: Eric Biggers <ebiggers@google.com>
> 
> Add the beginnings of the fs/verity/ support layer, including the
> Kconfig option and various helper functions for hashing.  To start, only
> SHA-256 is supported, but other hash algorithms can easily be added.
> 
> Reviewed-by: Theodore Ts'o <tytso@mit.edu>

Reviewed-by: Jaegeuk Kim <jaegeuk@kernel.org>

> Signed-off-by: Eric Biggers <ebiggers@google.com>
> ---
>  fs/Kconfig                   |   2 +
>  fs/Makefile                  |   1 +
>  fs/verity/Kconfig            |  38 +++++
>  fs/verity/Makefile           |   4 +
>  fs/verity/fsverity_private.h |  88 +++++++++++
>  fs/verity/hash_algs.c        | 274 +++++++++++++++++++++++++++++++++++
>  fs/verity/init.c             |  41 ++++++
>  7 files changed, 448 insertions(+)
>  create mode 100644 fs/verity/Kconfig
>  create mode 100644 fs/verity/Makefile
>  create mode 100644 fs/verity/fsverity_private.h
>  create mode 100644 fs/verity/hash_algs.c
>  create mode 100644 fs/verity/init.c
> 
> diff --git a/fs/Kconfig b/fs/Kconfig
> index f1046cf6ad85e0..4b66dafbdc7b1c 100644
> --- a/fs/Kconfig
> +++ b/fs/Kconfig
> @@ -113,6 +113,8 @@ config MANDATORY_FILE_LOCKING
>  
>  source "fs/crypto/Kconfig"
>  
> +source "fs/verity/Kconfig"
> +
>  source "fs/notify/Kconfig"
>  
>  source "fs/quota/Kconfig"
> diff --git a/fs/Makefile b/fs/Makefile
> index c9aea23aba560c..fe7f2c07f482e1 100644
> --- a/fs/Makefile
> +++ b/fs/Makefile
> @@ -34,6 +34,7 @@ obj-$(CONFIG_AIO)               += aio.o
>  obj-$(CONFIG_IO_URING)		+= io_uring.o
>  obj-$(CONFIG_FS_DAX)		+= dax.o
>  obj-$(CONFIG_FS_ENCRYPTION)	+= crypto/
> +obj-$(CONFIG_FS_VERITY)		+= verity/
>  obj-$(CONFIG_FILE_LOCKING)      += locks.o
>  obj-$(CONFIG_COMPAT)		+= compat.o compat_ioctl.o
>  obj-$(CONFIG_BINFMT_AOUT)	+= binfmt_aout.o
> diff --git a/fs/verity/Kconfig b/fs/verity/Kconfig
> new file mode 100644
> index 00000000000000..c2bca0b01ecfa9
> --- /dev/null
> +++ b/fs/verity/Kconfig
> @@ -0,0 +1,38 @@
> +# SPDX-License-Identifier: GPL-2.0
> +
> +config FS_VERITY
> +	bool "FS Verity (read-only file-based authenticity protection)"
> +	select CRYPTO
> +	# SHA-256 is selected as it's intended to be the default hash algorithm.
> +	# To avoid bloat, other wanted algorithms must be selected explicitly.
> +	select CRYPTO_SHA256
> +	help
> +	  This option enables fs-verity.  fs-verity is the dm-verity
> +	  mechanism implemented at the file level.  On supported
> +	  filesystems (currently EXT4 and F2FS), userspace can use an
> +	  ioctl to enable verity for a file, which causes the filesystem
> +	  to build a Merkle tree for the file.  The filesystem will then
> +	  transparently verify any data read from the file against the
> +	  Merkle tree.  The file is also made read-only.
> +
> +	  This serves as an integrity check, but the availability of the
> +	  Merkle tree root hash also allows efficiently supporting
> +	  various use cases where normally the whole file would need to
> +	  be hashed at once, such as: (a) auditing (logging the file's
> +	  hash), or (b) authenticity verification (comparing the hash
> +	  against a known good value, e.g. from a digital signature).
> +
> +	  fs-verity is especially useful on large files where not all
> +	  the contents may actually be needed.  Also, fs-verity verifies
> +	  data each time it is paged back in, which provides better
> +	  protection against malicious disks vs. an ahead-of-time hash.
> +
> +	  If unsure, say N.
> +
> +config FS_VERITY_DEBUG
> +	bool "FS Verity debugging"
> +	depends on FS_VERITY
> +	help
> +	  Enable debugging messages related to fs-verity by default.
> +
> +	  Say N unless you are an fs-verity developer.
> diff --git a/fs/verity/Makefile b/fs/verity/Makefile
> new file mode 100644
> index 00000000000000..398f3f85fa184b
> --- /dev/null
> +++ b/fs/verity/Makefile
> @@ -0,0 +1,4 @@
> +# SPDX-License-Identifier: GPL-2.0
> +
> +obj-$(CONFIG_FS_VERITY) += hash_algs.o \
> +			   init.o
> diff --git a/fs/verity/fsverity_private.h b/fs/verity/fsverity_private.h
> new file mode 100644
> index 00000000000000..9697aaebb5dc1f
> --- /dev/null
> +++ b/fs/verity/fsverity_private.h
> @@ -0,0 +1,88 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * fs-verity: read-only file-based authenticity protection
> + *
> + * Copyright 2019 Google LLC
> + */
> +
> +#ifndef _FSVERITY_PRIVATE_H
> +#define _FSVERITY_PRIVATE_H
> +
> +#ifdef CONFIG_FS_VERITY_DEBUG
> +#define DEBUG
> +#endif
> +
> +#define pr_fmt(fmt) "fs-verity: " fmt
> +
> +#include <crypto/sha.h>
> +#include <linux/fs.h>
> +#include <uapi/linux/fsverity.h>
> +
> +struct ahash_request;
> +
> +/*
> + * Implementation limit: maximum depth of the Merkle tree.  For now 8 is plenty;
> + * it's enough for over U64_MAX bytes of data using SHA-256 and 4K blocks.
> + */
> +#define FS_VERITY_MAX_LEVELS		8
> +
> +/*
> + * Largest digest size among all hash algorithms supported by fs-verity.
> + * Currently assumed to be <= size of fsverity_descriptor::root_hash.
> + */
> +#define FS_VERITY_MAX_DIGEST_SIZE	SHA256_DIGEST_SIZE
> +
> +/* A hash algorithm supported by fs-verity */
> +struct fsverity_hash_alg {
> +	struct crypto_ahash *tfm; /* hash tfm, allocated on demand */
> +	const char *name;	  /* crypto API name, e.g. sha256 */
> +	unsigned int digest_size; /* digest size in bytes, e.g. 32 for SHA-256 */
> +	unsigned int block_size;  /* block size in bytes, e.g. 64 for SHA-256 */
> +};
> +
> +/* Merkle tree parameters: hash algorithm, initial hash state, and topology */
> +struct merkle_tree_params {
> +	const struct fsverity_hash_alg *hash_alg; /* the hash algorithm */
> +	const u8 *hashstate;		/* initial hash state or NULL */
> +	unsigned int digest_size;	/* same as hash_alg->digest_size */
> +	unsigned int block_size;	/* size of data and tree blocks */
> +	unsigned int hashes_per_block;	/* number of hashes per tree block */
> +	unsigned int log_blocksize;	/* log2(block_size) */
> +	unsigned int log_arity;		/* log2(hashes_per_block) */
> +	unsigned int num_levels;	/* number of levels in Merkle tree */
> +	u64 tree_size;			/* Merkle tree size in bytes */
> +
> +	/*
> +	 * Starting block index for each tree level, ordered from leaf level (0)
> +	 * to root level ('num_levels - 1')
> +	 */
> +	u64 level_start[FS_VERITY_MAX_LEVELS];
> +};
> +
> +/* hash_algs.c */
> +
> +extern struct fsverity_hash_alg fsverity_hash_algs[];
> +
> +const struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode,
> +						      unsigned int num);
> +const u8 *fsverity_prepare_hash_state(const struct fsverity_hash_alg *alg,
> +				      const u8 *salt, size_t salt_size);
> +int fsverity_hash_page(const struct merkle_tree_params *params,
> +		       const struct inode *inode,
> +		       struct ahash_request *req, struct page *page, u8 *out);
> +int fsverity_hash_buffer(const struct fsverity_hash_alg *alg,
> +			 const void *data, size_t size, u8 *out);
> +void __init fsverity_check_hash_algs(void);
> +
> +/* init.c */
> +
> +extern void __printf(3, 4) __cold
> +fsverity_msg(const struct inode *inode, const char *level,
> +	     const char *fmt, ...);
> +
> +#define fsverity_warn(inode, fmt, ...)		\
> +	fsverity_msg((inode), KERN_WARNING, fmt, ##__VA_ARGS__)
> +#define fsverity_err(inode, fmt, ...)		\
> +	fsverity_msg((inode), KERN_ERR, fmt, ##__VA_ARGS__)
> +
> +#endif /* _FSVERITY_PRIVATE_H */
> diff --git a/fs/verity/hash_algs.c b/fs/verity/hash_algs.c
> new file mode 100644
> index 00000000000000..46df17094fc252
> --- /dev/null
> +++ b/fs/verity/hash_algs.c
> @@ -0,0 +1,274 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * fs/verity/hash_algs.c: fs-verity hash algorithms
> + *
> + * Copyright 2019 Google LLC
> + */
> +
> +#include "fsverity_private.h"
> +
> +#include <crypto/hash.h>
> +#include <linux/scatterlist.h>
> +
> +/* The hash algorithms supported by fs-verity */
> +struct fsverity_hash_alg fsverity_hash_algs[] = {
> +	[FS_VERITY_HASH_ALG_SHA256] = {
> +		.name = "sha256",
> +		.digest_size = SHA256_DIGEST_SIZE,
> +		.block_size = SHA256_BLOCK_SIZE,
> +	},
> +};
> +
> +/**
> + * fsverity_get_hash_alg() - validate and prepare a hash algorithm
> + * @inode: optional inode for logging purposes
> + * @num: the hash algorithm number
> + *
> + * Get the struct fsverity_hash_alg for the given hash algorithm number, and
> + * ensure it has a hash transform ready to go.  The hash transforms are
> + * allocated on-demand so that we don't waste resources unnecessarily, and
> + * because the crypto modules may be initialized later than fs/verity/.
> + *
> + * Return: pointer to the hash alg on success, else an ERR_PTR()
> + */
> +const struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode,
> +						      unsigned int num)
> +{
> +	struct fsverity_hash_alg *alg;
> +	struct crypto_ahash *tfm;
> +	int err;
> +
> +	if (num >= ARRAY_SIZE(fsverity_hash_algs) ||
> +	    !fsverity_hash_algs[num].name) {
> +		fsverity_warn(inode, "Unknown hash algorithm number: %u", num);
> +		return ERR_PTR(-EINVAL);
> +	}
> +	alg = &fsverity_hash_algs[num];
> +
> +	/* pairs with cmpxchg() below */
> +	tfm = READ_ONCE(alg->tfm);
> +	if (likely(tfm != NULL))
> +		return alg;
> +	/*
> +	 * Using the shash API would make things a bit simpler, but the ahash
> +	 * API is preferable as it allows the use of crypto accelerators.
> +	 */
> +	tfm = crypto_alloc_ahash(alg->name, 0, 0);
> +	if (IS_ERR(tfm)) {
> +		if (PTR_ERR(tfm) == -ENOENT)
> +			fsverity_warn(inode,
> +				      "Missing crypto API support for hash algorithm \"%s\"",
> +				      alg->name);
> +		else
> +			fsverity_err(inode,
> +				     "Error allocating hash algorithm \"%s\": %ld",
> +				     alg->name, PTR_ERR(tfm));
> +		return ERR_CAST(tfm);
> +	}
> +
> +	err = -EINVAL;
> +	if (WARN_ON(alg->digest_size != crypto_ahash_digestsize(tfm)))
> +		goto err_free_tfm;
> +	if (WARN_ON(alg->block_size != crypto_ahash_blocksize(tfm)))
> +		goto err_free_tfm;
> +
> +	pr_info("%s using implementation \"%s\"\n",
> +		alg->name, crypto_ahash_driver_name(tfm));
> +
> +	/* pairs with READ_ONCE() above */
> +	if (cmpxchg(&alg->tfm, NULL, tfm) != NULL)
> +		crypto_free_ahash(tfm);
> +
> +	return alg;
> +
> +err_free_tfm:
> +	crypto_free_ahash(tfm);
> +	return ERR_PTR(err);
> +}
> +
> +/**
> + * fsverity_prepare_hash_state() - precompute the initial hash state
> + * @alg: hash algorithm
> + * @salt: a salt which is to be prepended to all data to be hashed
> + * @salt_size: salt size in bytes, possibly 0
> + *
> + * Return: NULL if the salt is empty, otherwise the kmalloc()'ed precomputed
> + *	   initial hash state on success or an ERR_PTR() on failure.
> + */
> +const u8 *fsverity_prepare_hash_state(const struct fsverity_hash_alg *alg,
> +				      const u8 *salt, size_t salt_size)
> +{
> +	u8 *hashstate = NULL;
> +	struct ahash_request *req = NULL;
> +	u8 *padded_salt = NULL;
> +	size_t padded_salt_size;
> +	struct scatterlist sg;
> +	DECLARE_CRYPTO_WAIT(wait);
> +	int err;
> +
> +	if (salt_size == 0)
> +		return NULL;
> +
> +	hashstate = kmalloc(crypto_ahash_statesize(alg->tfm), GFP_KERNEL);
> +	if (!hashstate)
> +		return ERR_PTR(-ENOMEM);
> +
> +	req = ahash_request_alloc(alg->tfm, GFP_KERNEL);
> +	if (!req) {
> +		err = -ENOMEM;
> +		goto err_free;
> +	}
> +
> +	/*
> +	 * Zero-pad the salt to the next multiple of the input size of the hash
> +	 * algorithm's compression function, e.g. 64 bytes for SHA-256 or 128
> +	 * bytes for SHA-512.  This ensures that the hash algorithm won't have
> +	 * any bytes buffered internally after processing the salt, thus making
> +	 * salted hashing just as fast as unsalted hashing.
> +	 */
> +	padded_salt_size = round_up(salt_size, alg->block_size);
> +	padded_salt = kzalloc(padded_salt_size, GFP_KERNEL);
> +	if (!padded_salt) {
> +		err = -ENOMEM;
> +		goto err_free;
> +	}
> +	memcpy(padded_salt, salt, salt_size);
> +
> +	sg_init_one(&sg, padded_salt, padded_salt_size);
> +	ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP |
> +					CRYPTO_TFM_REQ_MAY_BACKLOG,
> +				   crypto_req_done, &wait);
> +	ahash_request_set_crypt(req, &sg, NULL, padded_salt_size);
> +
> +	err = crypto_wait_req(crypto_ahash_init(req), &wait);
> +	if (err)
> +		goto err_free;
> +
> +	err = crypto_wait_req(crypto_ahash_update(req), &wait);
> +	if (err)
> +		goto err_free;
> +
> +	err = crypto_ahash_export(req, hashstate);
> +	if (err)
> +		goto err_free;
> +out:
> +	kfree(padded_salt);
> +	ahash_request_free(req);
> +	return hashstate;
> +
> +err_free:
> +	kfree(hashstate);
> +	hashstate = ERR_PTR(err);
> +	goto out;
> +}
> +
> +/**
> + * fsverity_hash_page() - hash a single data or hash page
> + * @params: the Merkle tree's parameters
> + * @inode: inode for which the hashing is being done
> + * @req: preallocated hash request
> + * @page: the page to hash
> + * @out: output digest, size 'params->digest_size' bytes
> + *
> + * Hash a single data or hash block, assuming block_size == PAGE_SIZE.
> + * The hash is salted if a salt is specified in the Merkle tree parameters.
> + *
> + * Return: 0 on success, -errno on failure
> + */
> +int fsverity_hash_page(const struct merkle_tree_params *params,
> +		       const struct inode *inode,
> +		       struct ahash_request *req, struct page *page, u8 *out)
> +{
> +	struct scatterlist sg;
> +	DECLARE_CRYPTO_WAIT(wait);
> +	int err;
> +
> +	if (WARN_ON(params->block_size != PAGE_SIZE))
> +		return -EINVAL;
> +
> +	sg_init_table(&sg, 1);
> +	sg_set_page(&sg, page, PAGE_SIZE, 0);
> +	ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP |
> +					CRYPTO_TFM_REQ_MAY_BACKLOG,
> +				   crypto_req_done, &wait);
> +	ahash_request_set_crypt(req, &sg, out, PAGE_SIZE);
> +
> +	if (params->hashstate) {
> +		err = crypto_ahash_import(req, params->hashstate);
> +		if (err) {
> +			fsverity_err(inode,
> +				     "Error %d importing hash state", err);
> +			return err;
> +		}
> +		err = crypto_ahash_finup(req);
> +	} else {
> +		err = crypto_ahash_digest(req);
> +	}
> +
> +	err = crypto_wait_req(err, &wait);
> +	if (err)
> +		fsverity_err(inode, "Error %d computing page hash", err);
> +	return err;
> +}
> +
> +/**
> + * fsverity_hash_buffer() - hash some data
> + * @alg: the hash algorithm to use
> + * @data: the data to hash
> + * @size: size of data to hash
> + * @out: output digest, size 'alg->digest_size' bytes
> + *
> + * Hash some data which is located in physically contiguous memory (i.e. memory
> + * allocated by kmalloc(), not by vmalloc()).  No salt is used.
> + *
> + * Return: 0 on success, -errno on failure
> + */
> +int fsverity_hash_buffer(const struct fsverity_hash_alg *alg,
> +			 const void *data, size_t size, u8 *out)
> +{
> +	struct ahash_request *req;
> +	struct scatterlist sg;
> +	DECLARE_CRYPTO_WAIT(wait);
> +	int err;
> +
> +	req = ahash_request_alloc(alg->tfm, GFP_KERNEL);
> +	if (!req)
> +		return -ENOMEM;
> +
> +	sg_init_one(&sg, data, size);
> +	ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP |
> +					CRYPTO_TFM_REQ_MAY_BACKLOG,
> +				   crypto_req_done, &wait);
> +	ahash_request_set_crypt(req, &sg, out, size);
> +
> +	err = crypto_wait_req(crypto_ahash_digest(req), &wait);
> +
> +	ahash_request_free(req);
> +	return err;
> +}
> +
> +void __init fsverity_check_hash_algs(void)
> +{
> +	size_t i;
> +
> +	/*
> +	 * Sanity check the hash algorithms (could be a build-time check, but
> +	 * they're in an array)
> +	 */
> +	for (i = 0; i < ARRAY_SIZE(fsverity_hash_algs); i++) {
> +		const struct fsverity_hash_alg *alg = &fsverity_hash_algs[i];
> +
> +		if (!alg->name)
> +			continue;
> +
> +		BUG_ON(alg->digest_size > FS_VERITY_MAX_DIGEST_SIZE);
> +
> +		/*
> +		 * For efficiency, the implementation currently assumes the
> +		 * digest and block sizes are powers of 2.  This limitation can
> +		 * be lifted if the code is updated to handle other values.
> +		 */
> +		BUG_ON(!is_power_of_2(alg->digest_size));
> +		BUG_ON(!is_power_of_2(alg->block_size));
> +	}
> +}
> diff --git a/fs/verity/init.c b/fs/verity/init.c
> new file mode 100644
> index 00000000000000..40076bbe452a48
> --- /dev/null
> +++ b/fs/verity/init.c
> @@ -0,0 +1,41 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * fs/verity/init.c: fs-verity module initialization and logging
> + *
> + * Copyright 2019 Google LLC
> + */
> +
> +#include "fsverity_private.h"
> +
> +#include <linux/ratelimit.h>
> +
> +void fsverity_msg(const struct inode *inode, const char *level,
> +		  const char *fmt, ...)
> +{
> +	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
> +				      DEFAULT_RATELIMIT_BURST);
> +	struct va_format vaf;
> +	va_list args;
> +
> +	if (!__ratelimit(&rs))
> +		return;
> +
> +	va_start(args, fmt);
> +	vaf.fmt = fmt;
> +	vaf.va = &args;
> +	if (inode)
> +		printk("%sfs-verity (%s, inode %lu): %pV\n",
> +		       level, inode->i_sb->s_id, inode->i_ino, &vaf);
> +	else
> +		printk("%sfs-verity: %pV\n", level, &vaf);
> +	va_end(args);
> +}
> +
> +static int __init fsverity_init(void)
> +{
> +	fsverity_check_hash_algs();
> +
> +	pr_debug("Initialized fs-verity\n");
> +	return 0;
> +}
> +late_initcall(fsverity_init)
> -- 
> 2.22.0.410.gd8fdbe21b5-goog

^ permalink raw reply

* Re: [PATCH v5 04/16] fs: uapi: define verity bit for FS_IOC_GETFLAGS
From: Jaegeuk Kim @ 2019-06-22 22:11 UTC (permalink / raw)
  To: Eric Biggers
  Cc: Theodore Y . Ts'o, Darrick J . Wong, linux-api, Dave Chinner,
	linux-f2fs-devel, linux-fscrypt, linux-fsdevel, linux-integrity,
	linux-ext4, Linus Torvalds, Christoph Hellwig, Victor Hsieh
In-Reply-To: <20190620205043.64350-5-ebiggers@kernel.org>

On 06/20, Eric Biggers wrote:
> From: Eric Biggers <ebiggers@google.com>
> 
> Add FS_VERITY_FL to the flags for FS_IOC_GETFLAGS, so that applications
> can easily determine whether a file is a verity file at the same time as
> they're checking other file flags.  This flag will be gettable only;
> FS_IOC_SETFLAGS won't allow setting it, since an ioctl must be used
> instead to provide more parameters.
> 
> This flag matches the on-disk bit that was already allocated for ext4.
> 
> Reviewed-by: Theodore Ts'o <tytso@mit.edu>

Reviewed-by: Jaegeuk Kim <jaegeuk@kernel.org>

> Signed-off-by: Eric Biggers <ebiggers@google.com>
> ---
>  include/uapi/linux/fs.h | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
> index 59c71fa8c553a3..df261b7e0587ed 100644
> --- a/include/uapi/linux/fs.h
> +++ b/include/uapi/linux/fs.h
> @@ -306,6 +306,7 @@ struct fscrypt_key {
>  #define FS_TOPDIR_FL			0x00020000 /* Top of directory hierarchies*/
>  #define FS_HUGE_FILE_FL			0x00040000 /* Reserved for ext4 */
>  #define FS_EXTENT_FL			0x00080000 /* Extents */
> +#define FS_VERITY_FL			0x00100000 /* Verity protected inode */
>  #define FS_EA_INODE_FL			0x00200000 /* Inode used for large EA */
>  #define FS_EOFBLOCKS_FL			0x00400000 /* Reserved for ext4 */
>  #define FS_NOCOW_FL			0x00800000 /* Do not cow file */
> -- 
> 2.22.0.410.gd8fdbe21b5-goog

^ permalink raw reply

* Re: [PATCH v5 03/16] fs-verity: add UAPI header
From: Jaegeuk Kim @ 2019-06-22 22:11 UTC (permalink / raw)
  To: Eric Biggers
  Cc: Theodore Y . Ts'o, Darrick J . Wong, linux-api, Dave Chinner,
	linux-f2fs-devel, linux-fscrypt, linux-fsdevel, linux-integrity,
	linux-ext4, Linus Torvalds, Christoph Hellwig, Victor Hsieh
In-Reply-To: <20190620205043.64350-4-ebiggers@kernel.org>

On 06/20, Eric Biggers wrote:
> From: Eric Biggers <ebiggers@google.com>
> 
> Add the UAPI header for fs-verity, including two ioctls:
> 
> - FS_IOC_ENABLE_VERITY
> - FS_IOC_MEASURE_VERITY
> 
> These ioctls are documented in the "User API" section of
> Documentation/filesystems/fsverity.rst.
> 
> Examples of using these ioctls can be found in fsverity-utils
> (https://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/fsverity-utils.git).
> 
> I've also written xfstests that test these ioctls
> (https://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/xfstests-dev.git/log/?h=fsverity).
> 
> Reviewed-by: Theodore Ts'o <tytso@mit.edu>

Reviewed-by: Jaegeuk Kim <jaegeuk@kernel.org>

> Signed-off-by: Eric Biggers <ebiggers@google.com>
> ---
>  Documentation/ioctl/ioctl-number.txt |  1 +
>  include/uapi/linux/fsverity.h        | 39 ++++++++++++++++++++++++++++
>  2 files changed, 40 insertions(+)
>  create mode 100644 include/uapi/linux/fsverity.h
> 
> diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
> index c9558146ac5896..21767c81e86d58 100644
> --- a/Documentation/ioctl/ioctl-number.txt
> +++ b/Documentation/ioctl/ioctl-number.txt
> @@ -225,6 +225,7 @@ Code  Seq#(hex)	Include File		Comments
>  'f'	00-0F	fs/ext4/ext4.h		conflict!
>  'f'	00-0F	linux/fs.h		conflict!
>  'f'	00-0F	fs/ocfs2/ocfs2_fs.h	conflict!
> +'f'	81-8F	linux/fsverity.h
>  'g'	00-0F	linux/usb/gadgetfs.h
>  'g'	20-2F	linux/usb/g_printer.h
>  'h'	00-7F				conflict! Charon filesystem
> diff --git a/include/uapi/linux/fsverity.h b/include/uapi/linux/fsverity.h
> new file mode 100644
> index 00000000000000..57d1d7fc0c345a
> --- /dev/null
> +++ b/include/uapi/linux/fsverity.h
> @@ -0,0 +1,39 @@
> +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
> +/*
> + * fs-verity user API
> + *
> + * These ioctls can be used on filesystems that support fs-verity.  See the
> + * "User API" section of Documentation/filesystems/fsverity.rst.
> + *
> + * Copyright 2019 Google LLC
> + */
> +#ifndef _UAPI_LINUX_FSVERITY_H
> +#define _UAPI_LINUX_FSVERITY_H
> +
> +#include <linux/ioctl.h>
> +#include <linux/types.h>
> +
> +#define FS_VERITY_HASH_ALG_SHA256	1
> +
> +struct fsverity_enable_arg {
> +	__u32 version;
> +	__u32 hash_algorithm;
> +	__u32 block_size;
> +	__u32 salt_size;
> +	__u64 salt_ptr;
> +	__u32 sig_size;
> +	__u32 __reserved1;
> +	__u64 sig_ptr;
> +	__u64 __reserved2[11];
> +};
> +
> +struct fsverity_digest {
> +	__u16 digest_algorithm;
> +	__u16 digest_size; /* input/output */
> +	__u8 digest[];
> +};
> +
> +#define FS_IOC_ENABLE_VERITY	_IOW('f', 133, struct fsverity_enable_arg)
> +#define FS_IOC_MEASURE_VERITY	_IOWR('f', 134, struct fsverity_digest)
> +
> +#endif /* _UAPI_LINUX_FSVERITY_H */
> -- 
> 2.22.0.410.gd8fdbe21b5-goog

^ permalink raw reply

* Re: [PATCH v5 02/16] fs-verity: add MAINTAINERS file entry
From: Jaegeuk Kim @ 2019-06-22 22:11 UTC (permalink / raw)
  To: Eric Biggers
  Cc: Theodore Y . Ts'o, Darrick J . Wong, linux-api, Dave Chinner,
	linux-f2fs-devel, linux-fscrypt, linux-fsdevel, linux-integrity,
	linux-ext4, Linus Torvalds, Christoph Hellwig, Victor Hsieh
In-Reply-To: <20190620205043.64350-3-ebiggers@kernel.org>

On 06/20, Eric Biggers wrote:
> From: Eric Biggers <ebiggers@google.com>
> 
> fs-verity will be jointly maintained by Eric Biggers and Theodore Ts'o.
> 
> Reviewed-by: Theodore Ts'o <tytso@mit.edu>

Reviewed-by: Jaegeuk Kim <jaegeuk@kernel.org>

> Signed-off-by: Eric Biggers <ebiggers@google.com>
> ---
>  MAINTAINERS | 12 ++++++++++++
>  1 file changed, 12 insertions(+)
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index a6954776a37e70..655065116f9228 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -6505,6 +6505,18 @@ S:	Maintained
>  F:	fs/notify/
>  F:	include/linux/fsnotify*.h
>  
> +FSVERITY: READ-ONLY FILE-BASED AUTHENTICITY PROTECTION
> +M:	Eric Biggers <ebiggers@kernel.org>
> +M:	Theodore Y. Ts'o <tytso@mit.edu>
> +L:	linux-fscrypt@vger.kernel.org
> +Q:	https://patchwork.kernel.org/project/linux-fscrypt/list/
> +T:	git git://git.kernel.org/pub/scm/fs/fscrypt/fscrypt.git fsverity
> +S:	Supported
> +F:	fs/verity/
> +F:	include/linux/fsverity.h
> +F:	include/uapi/linux/fsverity.h
> +F:	Documentation/filesystems/fsverity.rst
> +
>  FUJITSU LAPTOP EXTRAS
>  M:	Jonathan Woithe <jwoithe@just42.net>
>  L:	platform-driver-x86@vger.kernel.org
> -- 
> 2.22.0.410.gd8fdbe21b5-goog

^ permalink raw reply

* Re: [PATCH v5 01/16] fs-verity: add a documentation file
From: Jaegeuk Kim @ 2019-06-22 22:10 UTC (permalink / raw)
  To: Eric Biggers
  Cc: Theodore Y . Ts'o, Darrick J . Wong, linux-api, Dave Chinner,
	linux-f2fs-devel, linux-fscrypt, linux-fsdevel, linux-integrity,
	linux-ext4, Linus Torvalds, Christoph Hellwig, Victor Hsieh
In-Reply-To: <20190620205043.64350-2-ebiggers@kernel.org>

On 06/20, Eric Biggers wrote:
> From: Eric Biggers <ebiggers@google.com>
> 
> Add a documentation file for fs-verity, covering:
> 
> - Introduction
> - Use cases
> - User API
>     - FS_IOC_ENABLE_VERITY
>     - FS_IOC_MEASURE_VERITY
>     - FS_IOC_GETFLAGS
> - Accessing verity files
> - File measurement computation
>     - Merkle tree
>     - fs-verity descriptor
> - Built-in signature verification
> - Filesystem support
>     - ext4
>     - f2fs
> - Implementation details
>     - Verifying data
>         - Pagecache
>         - Block device based filesystems
> - Userspace utility
> - Tests
> - FAQ
> 
> Reviewed-by: Theodore Ts'o <tytso@mit.edu>

Reviewed-by: Jaegeuk Kim <jaegeuk@kernel.org>

> Signed-off-by: Eric Biggers <ebiggers@google.com>
> ---
>  Documentation/filesystems/fsverity.rst | 710 +++++++++++++++++++++++++
>  Documentation/filesystems/index.rst    |   1 +
>  2 files changed, 711 insertions(+)
>  create mode 100644 Documentation/filesystems/fsverity.rst
> 
> diff --git a/Documentation/filesystems/fsverity.rst b/Documentation/filesystems/fsverity.rst
> new file mode 100644
> index 00000000000000..49524d7ea190e5
> --- /dev/null
> +++ b/Documentation/filesystems/fsverity.rst
> @@ -0,0 +1,710 @@
> +=======================================================
> +fs-verity: read-only file-based authenticity protection
> +=======================================================
> +
> +Introduction
> +============
> +
> +fs-verity (``fs/verity/``) is a support layer that filesystems can
> +hook into to support transparent integrity and authenticity protection
> +of read-only files.  Currently, it is supported by the ext4 and f2fs
> +filesystems.  Like fscrypt, not too much filesystem-specific code is
> +needed to support fs-verity.
> +
> +fs-verity is similar to `dm-verity
> +<https://www.kernel.org/doc/Documentation/device-mapper/verity.txt>`_
> +but works on files rather than block devices.  On regular files on
> +filesystems supporting fs-verity, userspace can execute an ioctl that
> +causes the filesystem to build a Merkle tree for the file and persist
> +it to a filesystem-specific location associated with the file.
> +
> +After this, the file is made readonly, and all reads from the file are
> +automatically verified against the file's Merkle tree.  Reads of any
> +corrupted data, including mmap reads, will fail.
> +
> +Userspace can use another ioctl to retrieve the root hash (actually
> +the "file measurement", which is a hash that includes the root hash)
> +that fs-verity is enforcing for the file.  This ioctl executes in
> +constant time, regardless of the file size.
> +
> +fs-verity is essentially a way to hash a file in constant time,
> +subject to the caveat that reads which would violate the hash will
> +fail at runtime.
> +
> +Use cases
> +=========
> +
> +By itself, the base fs-verity feature only provides integrity
> +protection, i.e. detection of accidental (non-malicious) corruption.
> +
> +However, because fs-verity makes retrieving the file hash extremely
> +efficient, it's primarily meant to be used as a tool to support
> +authentication (detection of malicious modifications) or auditing
> +(logging file hashes before use).
> +
> +Trusted userspace code (e.g. operating system code running on a
> +read-only partition that is itself authenticated by dm-verity) can
> +authenticate the contents of an fs-verity file by using the
> +`FS_IOC_MEASURE_VERITY`_ ioctl to retrieve its hash, then verifying a
> +digital signature of it.
> +
> +A standard file hash could be used instead of fs-verity.  However,
> +this is inefficient if the file is large and only a small portion may
> +be accessed.  This is often the case for Android application package
> +(APK) files, for example.  These typically contain many translations,
> +classes, and other resources that are infrequently or even never
> +accessed on a particular device.  It would be slow and wasteful to
> +read and hash the entire file before starting the application.
> +
> +Unlike an ahead-of-time hash, fs-verity also re-verifies data each
> +time it's paged in.  This ensures that malicious disk firmware can't
> +undetectably change the contents of the file at runtime.
> +
> +fs-verity does not replace or obsolete dm-verity.  dm-verity should
> +still be used on read-only filesystems.  fs-verity is for files that
> +must live on a read-write filesystem because they are independently
> +updated and potentially user-installed, so dm-verity cannot be used.
> +
> +The base fs-verity feature is a hashing mechanism only; actually
> +authenticating the files is up to userspace.  However, to meet some
> +users' needs, fs-verity optionally supports a simple signature
> +verification mechanism where users can configure the kernel to require
> +that all fs-verity files be signed by a key loaded into a keyring; see
> +`Built-in signature verification`_.  Support for fs-verity file hashes
> +in IMA (Integrity Measurement Architecture) policies is also planned.
> +
> +User API
> +========
> +
> +FS_IOC_ENABLE_VERITY
> +--------------------
> +
> +The FS_IOC_ENABLE_VERITY ioctl enables fs-verity on a file.  It takes
> +in a pointer to a :c:type:`struct fsverity_enable_arg`, defined as
> +follows::
> +
> +    struct fsverity_enable_arg {
> +            __u32 version;
> +            __u32 hash_algorithm;
> +            __u32 block_size;
> +            __u32 salt_size;
> +            __u64 salt_ptr;
> +            __u32 sig_size;
> +            __u32 __reserved1;
> +            __u64 sig_ptr;
> +            __u64 __reserved2[11];
> +    };
> +
> +This structure contains the parameters of the Merkle tree to build for
> +the file, and optionally contains a signature.  It must be initialized
> +as follows:
> +
> +- ``version`` must be 1.
> +- ``hash_algorithm`` must be the identifier for the hash algorithm to
> +  use for the Merkle tree, such as FS_VERITY_HASH_ALG_SHA256.  See
> +  ``include/uapi/linux/fsverity.h`` for the list of possible values.
> +- ``block_size`` must be the Merkle tree block size.  Currently, this
> +  must be equal to the system page size, which is usually 4096 bytes.
> +  Other sizes may be supported in the future.  This value is not
> +  necessarily the same as the filesystem block size.
> +- ``salt_size`` is the size of the salt in bytes, or 0 if no salt is
> +  provided.  The salt is a value that is prepended to every hashed
> +  block; it can be used to personalize the hashing for a particular
> +  file or device.  Currently the maximum salt size is 32 bytes.
> +- ``salt_ptr`` is the pointer to the salt, or NULL if no salt is
> +  provided.
> +- ``sig_size`` is the size of the signature in bytes, or 0 if no
> +  signature is provided.  Currently the signature is (somewhat
> +  arbitrarily) limited to 16128 bytes.  See `Built-in signature
> +  verification`_ for more information.
> +- ``sig_ptr``  is the pointer to the signature, or NULL if no
> +  signature is provided.
> +- All reserved fields must be zeroed.
> +
> +FS_IOC_ENABLE_VERITY causes the filesystem to build a Merkle tree for
> +the file and persist it to a filesystem-specific location associated
> +with the file, then mark the file as a verity file.  This ioctl may
> +take a long time to execute on large files, and it is interruptible by
> +fatal signals.
> +
> +FS_IOC_ENABLE_VERITY checks for write access to the inode.  However,
> +it must be executed on an O_RDONLY file descriptor and no processes
> +can have the file open for writing.  Attempts to open the file for
> +writing while this ioctl is executing will fail with ETXTBSY.  (This
> +is necessary to guarantee that no writable file descriptors will exist
> +after verity is enabled, and to guarantee that the file's contents are
> +stable while the Merkle tree is being built over it.)
> +
> +On success, FS_IOC_ENABLE_VERITY returns 0, and the file becomes a
> +verity file.  On failure (including the case of interruption by a
> +fatal signal), no changes are made to the file.
> +
> +FS_IOC_ENABLE_VERITY can fail with the following errors:
> +
> +- ``EACCES``: the process does not have write access to the file
> +- ``EEXIST``: the file already has verity enabled
> +- ``EFAULT``: the caller provided inaccessible memory
> +- ``EINTR``: the operation was interrupted by a fatal signal
> +- ``EINVAL``: unsupported version, hash algorithm, or block size; or
> +  reserved bits are set; or the file descriptor refers to neither a
> +  regular file nor a directory.
> +- ``EISDIR``: the file descriptor refers to a directory
> +- ``EMSGSIZE``: the salt or signature is too long
> +- ``ENOENT``: fs-verity recognizes the hash algorithm, but it's not
> +  available in the kernel's crypto API as currently configured (e.g.
> +  for SHA-512, missing CONFIG_CRYPTO_SHA512).
> +- ``ENOTTY``: this type of filesystem does not implement fs-verity
> +- ``EOPNOTSUPP``: the kernel was not configured with fs-verity
> +  support; or the filesystem superblock has not had the 'verity'
> +  feature enabled on it; or the filesystem does not support fs-verity
> +  on this file.  (See `Filesystem support`_.)
> +- ``EPERM``: the file is append-only
> +- ``EROFS``: the filesystem is read-only
> +- ``ETXTBSY``: someone has the file open for writing.  This can be the
> +  caller's file descriptor, another open file descriptor, or the file
> +  reference held by a writable memory map.
> +
> +FS_IOC_MEASURE_VERITY
> +---------------------
> +
> +The FS_IOC_MEASURE_VERITY ioctl retrieves the measurement of a verity
> +file.  The file measurement is a digest that cryptographically
> +identifies the file contents that are being enforced on reads.
> +
> +This ioctl takes in a pointer to a variable-length structure::
> +
> +    struct fsverity_digest {
> +            __u16 digest_algorithm;
> +            __u16 digest_size; /* input/output */
> +            __u8 digest[];
> +    };
> +
> +``digest_size`` is an input/output field.  On input, it must be
> +initialized to the number of bytes allocated for the variable-length
> +``digest`` field.
> +
> +On success, 0 is returned and the kernel fills in the structure as
> +follows:
> +
> +- ``digest_algorithm`` will be the hash algorithm used for the file
> +  measurement.  It will match ``fsverity_enable_arg::hash_algorithm``.
> +- ``digest_size`` will be the size of the digest in bytes, e.g. 32
> +  for SHA-256.  (This can be redundant with ``digest_algorithm``.)
> +- ``digest`` will be the actual bytes of the digest.
> +
> +FS_IOC_MEASURE_VERITY is guaranteed to execute in constant time,
> +regardless of the size of the file.
> +
> +FS_IOC_MEASURE_VERITY can fail with the following errors:
> +
> +- ``EFAULT``: the caller provided inaccessible memory
> +- ``ENODATA``: the file is not a verity file
> +- ``ENOTTY``: this type of filesystem does not implement fs-verity
> +- ``EOPNOTSUPP``: the kernel was not configured with fs-verity
> +  support, or the filesystem superblock has not had the 'verity'
> +  feature enabled on it.  (See `Filesystem support`_.)
> +- ``EOVERFLOW``: the digest is longer than the specified
> +  ``digest_size`` bytes.  Try providing a larger buffer.
> +
> +FS_IOC_GETFLAGS
> +---------------
> +
> +The existing ioctl FS_IOC_GETFLAGS (which isn't specific to fs-verity)
> +can also be used to check whether a file has fs-verity enabled or not.
> +To do so, check for FS_VERITY_FL (0x00100000) in the returned flags.
> +
> +The verity flag is not settable via FS_IOC_SETFLAGS.  You must use
> +FS_IOC_ENABLE_VERITY instead, since parameters must be provided.
> +
> +Accessing verity files
> +======================
> +
> +Applications can transparently access a verity file just like a
> +non-verity one, with the following exceptions:
> +
> +- Verity files are readonly.  They cannot be opened for writing or
> +  truncate()d, even if the file mode bits allow it.  Attempts to do
> +  one of these things will fail with EPERM.  However, changes to
> +  metadata such as owner, mode, timestamps, and xattrs are still
> +  allowed, since these are not measured by fs-verity.  Verity files
> +  can also still be renamed, deleted, and linked to.
> +
> +- Direct I/O is not supported on verity files.  Attempts to use direct
> +  I/O on such files will fall back to buffered I/O.
> +
> +- DAX (Direct Access) is not supported on verity files, because this
> +  would circumvent the data verification.
> +
> +- Reads of data that doesn't match the verity Merkle tree will fail
> +  with EIO (for read()) or SIGBUS (for mmap() reads).
> +
> +- If the sysctl "fs.verity.require_signatures" is set to 1 and the
> +  file's verity measurement is not signed by a key in the fs-verity
> +  keyring, then opening the file will fail.  See `Built-in signature
> +  verification`_.
> +
> +Direct access to the Merkle tree is not supported.  Therefore, if a
> +verity file is copied, or is backed up and restored, then it will lose
> +its "verity"-ness.  fs-verity is primarily meant for files like
> +executables that are managed by a package manager.
> +
> +File measurement computation
> +============================
> +
> +This section describes how fs-verity hashes the file contents using a
> +Merkle tree to produce the "file measurement" which cryptographically
> +identifies the file contents.  This algorithm is the same for all
> +filesystems that support fs-verity.
> +
> +Userspace only needs to be aware of this algorithm if it needs to
> +compute the file measurement itself, e.g. in order to sign the file.
> +
> +Merkle tree
> +-----------
> +
> +The file contents is divided into blocks, where the block size is
> +configurable but is usually 4096 bytes.  The end of the last block is
> +zero-padded if needed.  Each block is then hashed, producing the first
> +level of hashes.  Then, the hashes in this first level are grouped
> +into 'blocksize'-byte blocks (zero-padding the ends as needed) and
> +these blocks are hashed, producing the second level of hashes.  This
> +proceeds up the tree until only a single block remains.  The hash of
> +this block is the "Merkle tree root hash".
> +
> +If the file is nonempty and fits in one block, then the "Merkle tree
> +root hash" is simply the hash of the single data block.  If the file
> +is empty, then the "Merkle tree root hash" is all zeroes.
> +
> +The "blocks" here are not necessarily the same as "filesystem blocks".
> +
> +If a salt was specified, then it's zero-padded to the closest multiple
> +of the input size of the hash algorithm's compression function, e.g.
> +64 bytes for SHA-256 or 128 bytes for SHA-512.  The padded salt is
> +prepended to every data or Merkle tree block that is hashed.
> +
> +The purpose of the block padding is to cause every hash to be taken
> +over the same amount of data, which simplifies the implementation and
> +keeps open more possibilities for hardware acceleration.  The purpose
> +of the salt padding is to make the salting "free" when the salted hash
> +state is precomputed, then imported for each hash.
> +
> +Example: in the recommended configuration of SHA-256 and 4K blocks,
> +128 hash values fit in each block.  Thus, each level of the Merkle
> +tree is approximately 128 times smaller than the previous, and for
> +large files the Merkle tree's size converges to approximately 1/127 of
> +the original file size.  However, for small files, the padding is
> +significant, making the space overhead proportionally more.
> +
> +fs-verity descriptor
> +--------------------
> +
> +By itself, the Merkle tree root hash is ambiguous.  For example, it
> +can't a distinguish a large file from a small second file whose data
> +is exactly the top-level hash block of the first file.  Ambiguities
> +also arise from the convention of padding to the next block boundary.
> +
> +To solve this problem, the verity file measurement is actually
> +computed as a hash of the following structure, which contains the
> +Merkle tree root hash as well as other fields such as the file size::
> +
> +    struct fsverity_descriptor {
> +            __u8 version;           /* must be 1 */
> +            __u8 hash_algorithm;    /* Merkle tree hash algorithm */
> +            __u8 log_blocksize;     /* log2 of size of data and tree blocks */
> +            __u8 salt_size;         /* size of salt in bytes; 0 if none */
> +            __le32 sig_size;        /* must be 0 */
> +            __le64 data_size;       /* size of file the Merkle tree is built over */
> +            __u8 root_hash[64];     /* Merkle tree root hash */
> +            __u8 salt[32];          /* salt prepended to each hashed block */
> +            __u8 __reserved[144];   /* must be 0's */
> +    };
> +
> +Note that the ``sig_size`` field must be set to 0 for the purpose of
> +computing the file measurement, even if a signature was provided (or
> +will be provided) to `FS_IOC_ENABLE_VERITY`_.
> +
> +Built-in signature verification
> +===============================
> +
> +With CONFIG_FS_VERITY_BUILTIN_SIGNATURES=y, fs-verity supports putting
> +a portion of an authentication policy (see `Use cases`_) in the
> +kernel.  Specifically, it adds support for:
> +
> +1. At fs-verity module initialization time, a keyring ".fs-verity" is
> +   created.  The root user can add trusted X.509 certificates to this
> +   keyring using the add_key() system call, then (when done)
> +   optionally use keyctl_restrict_keyring() to prevent additional
> +   certificates from being added.
> +
> +2. `FS_IOC_ENABLE_VERITY`_ accepts a pointer to a PKCS#7 formatted
> +   signature in DER format of the file measurement.  On success, this
> +   signature is persisted alongside the Merkle tree.  Then, any time
> +   the file is opened, the kernel will verify this signature against
> +   the certificates in the ".fs-verity" keyring, and verify that it
> +   matches the actual file measurement.
> +
> +3. A new sysctl "fs.verity.require_signatures" is made available.
> +   When set to 1, the kernel requires that all verity files have a
> +   correctly signed file measurement as described in (2).
> +
> +File measurements must be signed in the following format, which is
> +similar to the structure used by `FS_IOC_MEASURE_VERITY`_::
> +
> +    struct fsverity_signed_digest {
> +            char magic[8];                  /* must be "FSVerity" */
> +            __le16 digest_algorithm;
> +            __le16 digest_size;
> +            __u8 digest[];
> +    };
> +
> +fs-verity's built-in signature verification support is meant as a
> +relatively simple mechanism that can be used to provide some level of
> +authenticity protection for verity files, as an alternative to doing
> +the signature verification in userspace or using IMA-appraisal.
> +However, with this mechanism, userspace programs still need to check
> +that the verity bit is set, and there is no protection against verity
> +files being swapped around.
> +
> +Filesystem support
> +==================
> +
> +fs-verity is currently supported by the ext4 and f2fs filesystems.
> +The CONFIG_FS_VERITY kconfig option must be enabled to use fs-verity
> +on either filesystem.
> +
> +``include/linux/fsverity.h`` declares the interface between the
> +``fs/verity/`` support layer and filesystems.  Briefly, filesystems
> +must provide an ``fsverity_operations`` structure that provides
> +methods to read and write the verity metadata to a filesystem-specific
> +location, including the Merkle tree blocks and
> +``fsverity_descriptor``.  Filesystems must also call functions in
> +``fs/verity/`` at certain times, such as when a file is opened or when
> +pages have been read into the pagecache.  (See `Verifying data`_.)
> +
> +ext4
> +----
> +
> +ext4 supports fs-verity since Linux TODO and e2fsprogs v1.45.2.
> +
> +To create verity files on an ext4 filesystem, the filesystem must have
> +been formatted with ``-O verity`` or had ``tune2fs -O verity`` run on
> +it.  "verity" is an RO_COMPAT filesystem feature, so once set, old
> +kernels will only be able to mount the filesystem readonly, and old
> +versions of e2fsck will be unable to check the filesystem.  Moreover,
> +currently ext4 only supports mounting a filesystem with the "verity"
> +feature when its block size is equal to PAGE_SIZE (often 4096 bytes).
> +
> +ext4 sets the EXT4_VERITY_FL on-disk inode flag on verity files.  It
> +can only be set by `FS_IOC_ENABLE_VERITY`_, and it cannot be cleared.
> +
> +ext4 also supports encryption, which can be used simultaneously with
> +fs-verity.  In this case, the plaintext data is verified rather than
> +the ciphertext.  This is necessary in order to make the file
> +measurement meaningful, since every file is encrypted differently.
> +
> +ext4 stores the verity metadata (Merkle tree and fsverity_descriptor)
> +past the end of the file, starting at the first 64K boundary beyond
> +i_size.  This approach works because (a) verity files are readonly,
> +and (b) pages fully beyond i_size aren't visible to userspace but can
> +be read/written internally by ext4 with only some relatively small
> +changes to ext4.  This approach avoids having to depend on the
> +EA_INODE feature and on rearchitecturing ext4's xattr support to
> +support paging multi-gigabyte xattrs into memory, and to support
> +encrypting xattrs.  Note that the verity metadata *must* be encrypted
> +when the file is, since it contains hashes of the plaintext data.
> +
> +Currently, ext4 verity only supports the case where the Merkle tree
> +block size, filesystem block size, and page size are all the same.  It
> +also only supports extent-based files.
> +
> +f2fs
> +----
> +
> +f2fs supports fs-verity since Linux TODO and f2fs-tools v1.11.0.
> +
> +To create verity files on an f2fs filesystem, the filesystem must have
> +been formatted with ``-O verity``.
> +
> +f2fs sets the FADVISE_VERITY_BIT on-disk inode flag on verity files.
> +It can only be set by `FS_IOC_ENABLE_VERITY`_, and it cannot be
> +cleared.
> +
> +Like ext4, f2fs stores the verity metadata (Merkle tree and
> +fsverity_descriptor) past the end of the file, starting at the first
> +64K boundary beyond i_size.  See explanation for ext4 above.
> +Moreover, f2fs supports at most 4096 bytes of xattr entries per inode
> +which wouldn't be enough for even a single Merkle tree block.
> +
> +Currently, f2fs verity only supports a Merkle tree block size of 4096.
> +
> +Implementation details
> +======================
> +
> +Verifying data
> +--------------
> +
> +fs-verity ensures that all reads of a verity file's data are verified,
> +regardless of which syscall is used to do the read (e.g. mmap(),
> +read(), pread()) and regardless of whether it's the first read or a
> +later read (unless the later read can return cached data that was
> +already verified).  Below, we describe how filesystems implement this.
> +
> +Pagecache
> +~~~~~~~~~
> +
> +For filesystems using Linux's pagecache, the ``->readpage()`` and
> +``->readpages()`` methods must be modified to verify pages before they
> +are marked Uptodate.  Merely hooking ``->read_iter()`` would be
> +insufficient, since ``->read_iter()`` is not used for memory maps.
> +
> +Therefore, fs/verity/ provides a function fsverity_verify_page() which
> +verifies a page that has been read into the pagecache of a verity
> +inode, but is still locked and not Uptodate, so it's not yet readable
> +by userspace.  As needed to do the verification,
> +fsverity_verify_page() will call back into the filesystem to read
> +Merkle tree pages via fsverity_operations::read_merkle_tree_page().
> +
> +fsverity_verify_page() returns false if verification failed; in this
> +case, the filesystem must not set the page Uptodate.  Following this,
> +as per the usual Linux pagecache behavior, attempts by userspace to
> +read() from the part of the file containing the page will fail with
> +EIO, and accesses to the page within a memory map will raise SIGBUS.
> +
> +fsverity_verify_page() currently only supports the case where the
> +Merkle tree block size is equal to PAGE_SIZE (often 4096 bytes).
> +
> +In principle, fsverity_verify_page() verifies the entire path in the
> +Merkle tree from the data page to the root hash.  However, for
> +efficiency the filesystem may cache the hash pages.  Therefore,
> +fsverity_verify_page() only ascends the tree reading hash pages until
> +an already-verified hash page is seen, as indicated by the PageChecked
> +bit being set.  It then verifies the path to that page.
> +
> +This optimization, which is also used by dm-verity, results in
> +excellent sequential read performance.  This is because usually (e.g.
> +127 in 128 times for 4K blocks and SHA-256) the hash page from the
> +bottom level of the tree will already be cached and checked from
> +reading a previous data page.  However, random reads perform worse.
> +
> +Block device based filesystems
> +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> +
> +Block device based filesystems (e.g. ext4 and f2fs) in Linux also use
> +the pagecache, so the above subsection applies too.  However, they
> +also usually read many pages from a file at once, grouped into a
> +structure called a "bio".  To make it easier for these types of
> +filesystems to support fs-verity, fs/verity/ also provides a function
> +fsverity_verify_bio() which verifies all pages in a bio.
> +
> +ext4 and f2fs also support encryption.  If a verity file is also
> +encrypted, the pages must be decrypted before being verified.  To
> +support this, these filesystems allocate a "post-read context" for
> +each bio and store it in ``->bi_private``::
> +
> +    struct bio_post_read_ctx {
> +           struct bio *bio;
> +           struct work_struct work;
> +           unsigned int cur_step;
> +           unsigned int enabled_steps;
> +    };
> +
> +``enabled_steps`` is a bitmask that specifies whether decryption,
> +verity, or both is enabled.  After the bio completes, for each needed
> +postprocessing step the filesystem enqueues the bio_post_read_ctx on a
> +workqueue, and then the workqueue work does the decryption or
> +verification.  Finally, pages where no decryption or verity error
> +occurred are marked Uptodate, and the pages are unlocked.
> +
> +Files on ext4 and f2fs may contain holes.  Normally, ``->readpages()``
> +simply zeroes holes and sets the corresponding pages Uptodate; no bios
> +are issued.  To prevent this case from bypassing fs-verity, these
> +filesystems use fsverity_verify_page() to verify hole pages.
> +
> +ext4 and f2fs disable direct I/O on verity files, since otherwise
> +direct I/O would bypass fs-verity.  (They also do the same for
> +encrypted files.)
> +
> +Userspace utility
> +=================
> +
> +This document focuses on the kernel, but a userspace utility for
> +fs-verity can be found at:
> +
> +	https://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/fsverity-utils.git
> +
> +See the README.md file in the fsverity-utils source tree for details,
> +including examples of setting up fs-verity protected files.
> +
> +Tests
> +=====
> +
> +To test fs-verity, use xfstests.  For example, using `kvm-xfstests
> +<https://github.com/tytso/xfstests-bld/blob/master/Documentation/kvm-quickstart.md>`_::
> +
> +    kvm-xfstests -c ext4,f2fs -g verity
> +
> +FAQ
> +===
> +
> +This section answers frequently asked questions about fs-verity that
> +weren't already directly answered in other parts of this document.
> +
> +:Q: Why isn't fs-verity part of IMA?
> +:A: fs-verity and IMA (Integrity Measurement Architecture) have
> +    different focuses.  fs-verity is a filesystem-level mechanism for
> +    hashing individual files using a Merkle tree.  In contrast, IMA
> +    specifies a system-wide policy that specifies which files are
> +    hashed and what to do with those hashes, such as log them,
> +    authenticate them, or add them to a measurement list.
> +
> +    IMA is planned to support the fs-verity hashing mechanism as an
> +    alternative to doing full file hashes, for people who want the
> +    performance and security benefits of the Merkle tree based hash.
> +    But it doesn't make sense to force all uses of fs-verity to be
> +    through IMA.  As a standalone filesystem feature, fs-verity
> +    already meets many users' needs, and it's testable like other
> +    filesystem features e.g. with xfstests.
> +
> +:Q: Isn't fs-verity useless because the attacker can just modify the
> +    hashes in the Merkle tree, which is stored on-disk?
> +:A: To verify the authenticity of an fs-verity file you must verify
> +    the authenticity of the "file measurement", which is basically the
> +    root hash of the Merkle tree.  See `Use cases`_.
> +
> +:Q: Isn't fs-verity useless because the attacker can just replace a
> +    verity file with a non-verity one?
> +:A: See `Use cases`_.  In the initial use case, it's really trusted
> +    userspace code that authenticates the files; fs-verity is just a
> +    tool to do this job efficiently and securely.  The trusted
> +    userspace code will consider non-verity files to be inauthentic.
> +
> +:Q: Why does the Merkle tree need to be stored on-disk?  Couldn't you
> +    store just the root hash?
> +:A: If the Merkle tree wasn't stored on-disk, then you'd have to
> +    compute the entire tree when the file is first accessed, even if
> +    just one byte is being read.  This is a fundamental consequence of
> +    how Merkle tree hashing works.  To verify a leaf node, you need to
> +    verify the whole path to the root hash, including the root node
> +    (the thing which the root hash is a hash of).  But if the root
> +    node isn't stored on-disk, you have to compute it by hashing its
> +    children, and so on until you've actually hashed the entire file.
> +
> +    That defeats most of the point of doing a Merkle tree-based hash,
> +    since if you have to hash the whole file ahead of time anyway,
> +    then you could simply do sha256(file) instead.  That would be much
> +    simpler, and a bit faster too.
> +
> +    It's true that an in-memory Merkle tree could still provide the
> +    advantage of verification on every read rather than just on the
> +    first read.  However, it would be inefficient because every time a
> +    hash page gets evicted (you can't pin the entire Merkle tree into
> +    memory, since it may be very large), in order to restore it you
> +    again need to hash everything below it in the tree.  This again
> +    defeats most of the point of doing a Merkle tree-based hash, since
> +    a single block read could trigger re-hashing gigabytes of data.
> +
> +:Q: But couldn't you store just the leaf nodes and compute the rest?
> +:A: See previous answer; this really just moves up one level, since
> +    one could alternatively interpret the data blocks as being the
> +    leaf nodes of the Merkle tree.  It's true that the tree can be
> +    computed much faster if the leaf level is stored rather than just
> +    the data, but that's only because each level is less than 1% the
> +    size of the level below (assuming the recommended settings of
> +    SHA-256 and 4K blocks).  For the exact same reason, by storing
> +    "just the leaf nodes" you'd already be storing over 99% of the
> +    tree, so you might as well simply store the whole tree.
> +
> +:Q: Can the Merkle tree be built ahead of time, e.g. distributed as
> +    part of a package that is installed to many computers?
> +:A: This isn't currently supported.  It was part of the original
> +    design, but was removed to simplify the kernel UAPI and because it
> +    wasn't a critical use case.  Files are usually installed once and
> +    used many times, and cryptographic hashing is somewhat fast on
> +    most modern processors.
> +
> +:Q: Why doesn't fs-verity support writes?
> +:A: Write support would be very difficult and would require a
> +    completely different design, so it's well outside the scope of
> +    fs-verity.  Write support would require:
> +
> +    - A way to maintain consistency between the data and hashes,
> +      including all levels of hashes, since corruption after a crash
> +      (especially of potentially the entire file!) is unacceptable.
> +      The main options for solving this are data journalling,
> +      copy-on-write, and log-structured volume.  But it's very hard to
> +      retrofit existing filesystems with new consistency mechanisms.
> +      Data journalling is available on ext4, but is very slow.
> +
> +    - Rebuilding the the Merkle tree after every write, which would be
> +      extremely inefficient.  Alternatively, a different authenticated
> +      dictionary structure such as an "authenticated skiplist" could
> +      be used.  However, this would be far more complex.
> +
> +    Compare it to dm-verity vs. dm-integrity.  dm-verity is very
> +    simple: the kernel just verifies read-only data against a
> +    read-only Merkle tree.  In contrast, dm-integrity supports writes
> +    but is slow, is much more complex, and doesn't actually support
> +    full-device authentication since it authenticates each sector
> +    independently, i.e. there is no "root hash".  It doesn't really
> +    make sense for the same device-mapper target to support these two
> +    very different cases; the same applies to fs-verity.
> +
> +:Q: Since verity files are immutable, why isn't the immutable bit set?
> +:A: The existing "immutable" bit (FS_IMMUTABLE_FL) already has a
> +    specific set of semantics which not only make the file contents
> +    read-only, but also prevent the file from being deleted, renamed,
> +    linked to, or having its owner or mode changed.  These extra
> +    properties are unwanted for fs-verity, so reusing the immutable
> +    bit isn't appropriate.
> +
> +:Q: Why does the API use ioctls instead of setxattr() and getxattr()?
> +:A: Abusing the xattr interface for basically arbitrary syscalls is
> +    heavily frowned upon by most of the Linux filesystem developers.
> +    An xattr should really just be an xattr on-disk, not an API to
> +    e.g. magically trigger construction of a Merkle tree.
> +
> +:Q: Does fs-verity support remote filesystems?
> +:A: Only ext4 and f2fs support is implemented currently, but in
> +    principle any filesystem that can store per-file verity metadata
> +    can support fs-verity, regardless of whether it's local or remote.
> +    Some filesystems may have fewer options of where to store the
> +    verity metadata; one possibility is to store it past the end of
> +    the file and "hide" it from userspace by manipulating i_size.  The
> +    data verification functions provided by ``fs/verity/`` also assume
> +    that the filesystem uses the Linux pagecache, but both local and
> +    remote filesystems normally do so.
> +
> +:Q: Why is anything filesystem-specific at all?  Shouldn't fs-verity
> +    be implemented entirely at the VFS level?
> +:A: There are many reasons why this is not possible or would be very
> +    difficult, including the following:
> +
> +    - To prevent bypassing verification, pages must not be marked
> +      Uptodate until they've been verified.  Currently, each
> +      filesystem is responsible for marking pages Uptodate via
> +      ``->readpages()``.  Therefore, currently it's not possible for
> +      the VFS to do the verification on its own.  Changing this would
> +      require significant changes to the VFS and all filesystems.
> +
> +    - It would require defining a filesystem-independent way to store
> +      the verity metadata.  Extended attributes don't work for this
> +      because (a) the Merkle tree may be gigabytes, but many
> +      filesystems assume that all xattrs fit into a single 4K
> +      filesystem block, and (b) ext4 and f2fs encryption doesn't
> +      encrypt xattrs, yet the Merkle tree *must* be encrypted when the
> +      file contents are, because it stores hashes of the plaintext
> +      file contents.
> +
> +      So the verity metadata would have to be stored in an actual
> +      file.  Using a separate file would be very ugly, since the
> +      metadata is fundamentally part of the file to be protected, and
> +      it could cause problems where users could delete the real file
> +      but not the metadata file or vice versa.  On the other hand,
> +      having it be in the same file would break applications unless
> +      filesystems' notion of i_size were divorced from the VFS's,
> +      which would be complex and require changes to all filesystems.
> +
> +    - It's desirable that FS_IOC_ENABLE_VERITY uses the filesystem's
> +      transaction mechanism so that either the file ends up with
> +      verity enabled, or no changes were made.  Allowing intermediate
> +      states to occur after a crash may cause problems.
> diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst
> index 1131c34d77f6f1..416c7f0e123af7 100644
> --- a/Documentation/filesystems/index.rst
> +++ b/Documentation/filesystems/index.rst
> @@ -31,6 +31,7 @@ filesystem implementations.
>  
>     journalling
>     fscrypt
> +   fsverity
>  
>  Filesystem-specific documentation
>  =================================
> -- 
> 2.22.0.410.gd8fdbe21b5-goog

^ permalink raw reply

* Re: [PATCH V34 10/29] hibernate: Disable when the kernel is locked down
From: Pavel Machek @ 2019-06-22 17:52 UTC (permalink / raw)
  To: Matthew Garrett
  Cc: jmorris, linux-security-module, linux-kernel, linux-api,
	Josh Boyer, David Howells, Matthew Garrett, rjw, linux-pm, jikos
In-Reply-To: <20190622000358.19895-11-matthewgarrett@google.com>

[-- Attachment #1: Type: text/plain, Size: 586 bytes --]

On Fri 2019-06-21 17:03:39, Matthew Garrett wrote:
> From: Josh Boyer <jwboyer@fedoraproject.org>
> 
> There is currently no way to verify the resume image when returning
> from hibernate.  This might compromise the signed modules trust model,
> so until we can work with signed hibernate images we disable it when the
> kernel is locked down.

I keep getting these...

IIRC suse has patches to verify the images.
								Pavel 



-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 181 bytes --]

^ permalink raw reply

* [PATCH V34 29/29] lockdown: Print current->comm in restriction messages
From: Matthew Garrett @ 2019-06-22  0:03 UTC (permalink / raw)
  To: jmorris
  Cc: linux-security-module, linux-kernel, linux-api, Matthew Garrett,
	David Howells, Matthew Garrett
In-Reply-To: <20190622000358.19895-1-matthewgarrett@google.com>

Print the content of current->comm in messages generated by lockdown to
indicate a restriction that was hit.  This makes it a bit easier to find
out what caused the message.

The message now patterned something like:

        Lockdown: <comm>: <what> is restricted; see man kernel_lockdown.7

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Matthew Garrett <mjg59@google.com>
---
 security/lockdown/lockdown.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
index 98f9ee0026d5..9ca6f442fbc7 100644
--- a/security/lockdown/lockdown.c
+++ b/security/lockdown/lockdown.c
@@ -83,8 +83,8 @@ static int lockdown_is_locked_down(enum lockdown_reason what)
 {	
 	if ((kernel_locked_down >= what)) {
 		if (lockdown_reasons[what])
-			pr_notice("Lockdown: %s is restricted; see man kernel_lockdown.7\n",
-				  lockdown_reasons[what]);
+			pr_notice("Lockdown: %s: %s is restricted; see man kernel_lockdown.7\n",
+				  current->comm, lockdown_reasons[what]);
 		return -EPERM;
 	}
 
-- 
2.22.0.410.gd8fdbe21b5-goog

^ permalink raw reply related

* [PATCH V34 28/29] efi: Restrict efivar_ssdt_load when the kernel is locked down
From: Matthew Garrett @ 2019-06-22  0:03 UTC (permalink / raw)
  To: jmorris
  Cc: linux-security-module, linux-kernel, linux-api, Matthew Garrett,
	Matthew Garrett, Ard Biesheuvel, linux-efi
In-Reply-To: <20190622000358.19895-1-matthewgarrett@google.com>

efivar_ssdt_load allows the kernel to import arbitrary ACPI code from an
EFI variable, which gives arbitrary code execution in ring 0. Prevent
that when the kernel is locked down.

Signed-off-by: Matthew Garrett <mjg59@google.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: linux-efi@vger.kernel.org
---
 drivers/firmware/efi/efi.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 55b77c576c42..9f92a013ab27 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -31,6 +31,7 @@
 #include <linux/acpi.h>
 #include <linux/ucs2_string.h>
 #include <linux/memblock.h>
+#include <linux/security.h>
 
 #include <asm/early_ioremap.h>
 
@@ -242,6 +243,11 @@ static void generic_ops_unregister(void)
 static char efivar_ssdt[EFIVAR_SSDT_NAME_MAX] __initdata;
 static int __init efivar_ssdt_setup(char *str)
 {
+	int ret = security_locked_down(LOCKDOWN_ACPI_TABLES);
+
+	if (ret)
+		return ret;
+
 	if (strlen(str) < sizeof(efivar_ssdt))
 		memcpy(efivar_ssdt, str, strlen(str));
 	else
-- 
2.22.0.410.gd8fdbe21b5-goog

^ permalink raw reply related

* [PATCH V34 27/29] tracefs: Restrict tracefs when the kernel is locked down
From: Matthew Garrett @ 2019-06-22  0:03 UTC (permalink / raw)
  To: jmorris
  Cc: linux-security-module, linux-kernel, linux-api, Matthew Garrett,
	Matthew Garrett, Steven Rostedt
In-Reply-To: <20190622000358.19895-1-matthewgarrett@google.com>

Tracefs may release more information about the kernel than desirable, so
restrict it when the kernel is locked down in confidentiality mode by
preventing open().

Signed-off-by: Matthew Garrett <mjg59@google.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
---
 fs/tracefs/inode.c           | 43 +++++++++++++++++++++++++++++++++++-
 include/linux/security.h     |  1 +
 security/lockdown/lockdown.c |  1 +
 3 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
index 7098c49f3693..487d41f234f8 100644
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -24,6 +24,7 @@
 #include <linux/parser.h>
 #include <linux/magic.h>
 #include <linux/slab.h>
+#include <linux/security.h>
 
 #define TRACEFS_DEFAULT_MODE	0700
 
@@ -31,6 +32,23 @@ static struct vfsmount *tracefs_mount;
 static int tracefs_mount_count;
 static bool tracefs_registered;
 
+static int default_open_file(struct inode *inode, struct file *filp)
+{
+	struct dentry *dentry = filp->f_path.dentry;
+	struct file_operations *real_fops;
+	int ret;
+
+	if (!dentry)
+		return -EINVAL;
+
+	ret = security_locked_down(LOCKDOWN_TRACEFS);
+	if (ret)
+		return ret;
+
+	real_fops = dentry->d_fsdata;
+	return real_fops->open(inode, filp);
+}
+
 static ssize_t default_read_file(struct file *file, char __user *buf,
 				 size_t count, loff_t *ppos)
 {
@@ -50,6 +68,13 @@ static const struct file_operations tracefs_file_operations = {
 	.llseek =	noop_llseek,
 };
 
+static const struct file_operations tracefs_proxy_file_operations = {
+	.read =		default_read_file,
+	.write =	default_write_file,
+	.open =		default_open_file,
+	.llseek =	noop_llseek,
+};
+
 static struct tracefs_dir_ops {
 	int (*mkdir)(const char *name);
 	int (*rmdir)(const char *name);
@@ -225,6 +250,12 @@ static int tracefs_apply_options(struct super_block *sb)
 	return 0;
 }
 
+static void tracefs_destroy_inode(struct inode *inode)
+{
+	if (S_ISREG(inode->i_mode))
+		kfree(inode->i_fop);
+}
+
 static int tracefs_remount(struct super_block *sb, int *flags, char *data)
 {
 	int err;
@@ -260,6 +291,7 @@ static int tracefs_show_options(struct seq_file *m, struct dentry *root)
 
 static const struct super_operations tracefs_super_operations = {
 	.statfs		= simple_statfs,
+	.destroy_inode  = tracefs_destroy_inode,
 	.remount_fs	= tracefs_remount,
 	.show_options	= tracefs_show_options,
 };
@@ -393,6 +425,7 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode,
 {
 	struct dentry *dentry;
 	struct inode *inode;
+	struct file_operations *proxy_fops;
 
 	if (!(mode & S_IFMT))
 		mode |= S_IFREG;
@@ -406,8 +439,16 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode,
 	if (unlikely(!inode))
 		return failed_creating(dentry);
 
+	proxy_fops = kzalloc(sizeof(struct file_operations), GFP_KERNEL);
+	if (!proxy_fops)
+		return failed_creating(dentry);
+
+	dentry->d_fsdata = fops ? (void *)fops :
+		(void *)&tracefs_file_operations;
+	memcpy(proxy_fops, dentry->d_fsdata, sizeof(struct file_operations));
+	proxy_fops->open = default_open_file;
 	inode->i_mode = mode;
-	inode->i_fop = fops ? fops : &tracefs_file_operations;
+	inode->i_fop = proxy_fops;
 	inode->i_private = data;
 	d_instantiate(dentry, inode);
 	fsnotify_create(dentry->d_parent->d_inode, dentry);
diff --git a/include/linux/security.h b/include/linux/security.h
index 097e4b0ce73f..438dc0892b96 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -100,6 +100,7 @@ enum lockdown_reason {
 	LOCKDOWN_KPROBES,
 	LOCKDOWN_BPF_READ,
 	LOCKDOWN_PERF,
+	LOCKDOWN_TRACEFS,
 	LOCKDOWN_CONFIDENTIALITY_MAX,
 };
 
diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
index bbcb82985765..98f9ee0026d5 100644
--- a/security/lockdown/lockdown.c
+++ b/security/lockdown/lockdown.c
@@ -36,6 +36,7 @@ static char *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
 	[LOCKDOWN_KPROBES] = "use of kprobes",
 	[LOCKDOWN_BPF_READ] = "use of bpf to read kernel RAM",
 	[LOCKDOWN_PERF] = "unsafe use of perf",
+	[LOCKDOWN_TRACEFS] = "use of tracefs",
 	[LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
 };
 
-- 
2.22.0.410.gd8fdbe21b5-goog

^ permalink raw reply related

* [PATCH V34 26/29] debugfs: Restrict debugfs when the kernel is locked down
From: Matthew Garrett @ 2019-06-22  0:03 UTC (permalink / raw)
  To: jmorris
  Cc: linux-security-module, linux-kernel, linux-api, David Howells,
	Andy Shevchenko, acpi4asus-user, platform-driver-x86,
	Matthew Garrett, Thomas Gleixner, Matthew Garrett
In-Reply-To: <20190622000358.19895-1-matthewgarrett@google.com>

From: David Howells <dhowells@redhat.com>

Disallow opening of debugfs files that might be used to muck around when
the kernel is locked down as various drivers give raw access to hardware
through debugfs.  Given the effort of auditing all 2000 or so files and
manually fixing each one as necessary, I've chosen to apply a heuristic
instead.  The following changes are made:

 (1) chmod and chown are disallowed on debugfs objects (though the root dir
     can be modified by mount and remount, but I'm not worried about that).

 (2) When the kernel is locked down, only files with the following criteria
     are permitted to be opened:

	- The file must have mode 00444
	- The file must not have ioctl methods
	- The file must not have mmap

 (3) When the kernel is locked down, files may only be opened for reading.

Normal device interaction should be done through configfs, sysfs or a
miscdev, not debugfs.

Note that this makes it unnecessary to specifically lock down show_dsts(),
show_devs() and show_call() in the asus-wmi driver.

I would actually prefer to lock down all files by default and have the
the files unlocked by the creator.  This is tricky to manage correctly,
though, as there are 19 creation functions and ~1600 call sites (some of
them in loops scanning tables).

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Andy Shevchenko <andy.shevchenko@gmail.com>
cc: acpi4asus-user@lists.sourceforge.net
cc: platform-driver-x86@vger.kernel.org
cc: Matthew Garrett <mjg59@srcf.ucam.org>
cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Matthew Garrett <matthewgarrett@google.com>
---
 fs/debugfs/file.c            | 30 ++++++++++++++++++++++++++++++
 fs/debugfs/inode.c           | 32 ++++++++++++++++++++++++++++++--
 include/linux/security.h     |  1 +
 security/lockdown/lockdown.c |  1 +
 4 files changed, 62 insertions(+), 2 deletions(-)

diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 4fce1da7db23..f60518f0e3aa 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -19,6 +19,7 @@
 #include <linux/atomic.h>
 #include <linux/device.h>
 #include <linux/poll.h>
+#include <linux/security.h>
 
 #include "internal.h"
 
@@ -136,6 +137,25 @@ void debugfs_file_put(struct dentry *dentry)
 }
 EXPORT_SYMBOL_GPL(debugfs_file_put);
 
+/*
+ * Only permit access to world-readable files when the kernel is locked down.
+ * We also need to exclude any file that has ways to write or alter it as root
+ * can bypass the permissions check.
+ */
+static bool debugfs_is_locked_down(struct inode *inode,
+				   struct file *filp,
+				   const struct file_operations *real_fops)
+{
+	if ((inode->i_mode & 07777) == 0444 &&
+	    !(filp->f_mode & FMODE_WRITE) &&
+	    !real_fops->unlocked_ioctl &&
+	    !real_fops->compat_ioctl &&
+	    !real_fops->mmap)
+		return false;
+
+	return security_locked_down(LOCKDOWN_DEBUGFS);
+}
+
 static int open_proxy_open(struct inode *inode, struct file *filp)
 {
 	struct dentry *dentry = F_DENTRY(filp);
@@ -147,6 +167,11 @@ static int open_proxy_open(struct inode *inode, struct file *filp)
 		return r == -EIO ? -ENOENT : r;
 
 	real_fops = debugfs_real_fops(filp);
+
+	r = debugfs_is_locked_down(inode, filp, real_fops);
+	if (r)
+		goto out;
+
 	real_fops = fops_get(real_fops);
 	if (!real_fops) {
 		/* Huh? Module did not clean up after itself at exit? */
@@ -272,6 +297,11 @@ static int full_proxy_open(struct inode *inode, struct file *filp)
 		return r == -EIO ? -ENOENT : r;
 
 	real_fops = debugfs_real_fops(filp);
+
+	r = debugfs_is_locked_down(inode, filp, real_fops);
+	if (r)
+		goto out;
+
 	real_fops = fops_get(real_fops);
 	if (!real_fops) {
 		/* Huh? Module did not cleanup after itself at exit? */
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 95b5e78c22b1..a53a4748ebc1 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -23,6 +23,7 @@
 #include <linux/parser.h>
 #include <linux/magic.h>
 #include <linux/slab.h>
+#include <linux/security.h>
 
 #include "internal.h"
 
@@ -32,6 +33,32 @@ static struct vfsmount *debugfs_mount;
 static int debugfs_mount_count;
 static bool debugfs_registered;
 
+/*
+ * Don't allow access attributes to be changed whilst the kernel is locked down
+ * so that we can use the file mode as part of a heuristic to determine whether
+ * to lock down individual files.
+ */
+static int debugfs_setattr(struct dentry *dentry, struct iattr *ia)
+{
+	int ret = security_locked_down(LOCKDOWN_DEBUGFS);
+
+	if (ret && (ia->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID)))
+		return ret;
+	return simple_setattr(dentry, ia);
+}
+
+static const struct inode_operations debugfs_file_inode_operations = {
+	.setattr	= debugfs_setattr,
+};
+static const struct inode_operations debugfs_dir_inode_operations = {
+	.lookup		= simple_lookup,
+	.setattr	= debugfs_setattr,
+};
+static const struct inode_operations debugfs_symlink_inode_operations = {
+	.get_link	= simple_get_link,
+	.setattr	= debugfs_setattr,
+};
+
 static struct inode *debugfs_get_inode(struct super_block *sb)
 {
 	struct inode *inode = new_inode(sb);
@@ -356,6 +383,7 @@ static struct dentry *__debugfs_create_file(const char *name, umode_t mode,
 	inode->i_mode = mode;
 	inode->i_private = data;
 
+	inode->i_op = &debugfs_file_inode_operations;
 	inode->i_fop = proxy_fops;
 	dentry->d_fsdata = (void *)((unsigned long)real_fops |
 				DEBUGFS_FSDATA_IS_REAL_FOPS_BIT);
@@ -516,7 +544,7 @@ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent)
 		return failed_creating(dentry);
 
 	inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
-	inode->i_op = &simple_dir_inode_operations;
+	inode->i_op = &debugfs_dir_inode_operations;
 	inode->i_fop = &simple_dir_operations;
 
 	/* directory inodes start off with i_nlink == 2 (for "." entry) */
@@ -611,7 +639,7 @@ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent,
 		return failed_creating(dentry);
 	}
 	inode->i_mode = S_IFLNK | S_IRWXUGO;
-	inode->i_op = &simple_symlink_inode_operations;
+	inode->i_op = &debugfs_symlink_inode_operations;
 	inode->i_link = link;
 	d_instantiate(dentry, inode);
 	return end_creating(dentry);
diff --git a/include/linux/security.h b/include/linux/security.h
index 53ea85889a48..097e4b0ce73f 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -94,6 +94,7 @@ enum lockdown_reason {
 	LOCKDOWN_TIOCSSERIAL,
 	LOCKDOWN_MODULE_PARAMETERS,
 	LOCKDOWN_MMIOTRACE,
+	LOCKDOWN_DEBUGFS,
 	LOCKDOWN_INTEGRITY_MAX,
 	LOCKDOWN_KCORE,
 	LOCKDOWN_KPROBES,
diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
index a7e75c614416..bbcb82985765 100644
--- a/security/lockdown/lockdown.c
+++ b/security/lockdown/lockdown.c
@@ -30,6 +30,7 @@ static char *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
 	[LOCKDOWN_TIOCSSERIAL] = "reconfiguration of serial port IO",
 	[LOCKDOWN_MODULE_PARAMETERS] = "unsafe module parameters",
 	[LOCKDOWN_MMIOTRACE] = "unsafe mmio",
+	[LOCKDOWN_DEBUGFS] = "debugfs access",
 	[LOCKDOWN_INTEGRITY_MAX] = "integrity",
 	[LOCKDOWN_KCORE] = "/proc/kcore access",
 	[LOCKDOWN_KPROBES] = "use of kprobes",
-- 
2.22.0.410.gd8fdbe21b5-goog

^ permalink raw reply related

* [PATCH V34 25/29] kexec: Allow kexec_file() with appropriate IMA policy when locked down
From: Matthew Garrett @ 2019-06-22  0:03 UTC (permalink / raw)
  To: jmorris
  Cc: linux-security-module, linux-kernel, linux-api, Matthew Garrett,
	Matthew Garrett, Mimi Zohar, Dmitry Kasatkin, linux-integrity
In-Reply-To: <20190622000358.19895-1-matthewgarrett@google.com>

Systems in lockdown mode should block the kexec of untrusted kernels.
For x86 and ARM we can ensure that a kernel is trustworthy by validating
a PE signature, but this isn't possible on other architectures. On those
platforms we can use IMA digital signatures instead. Add a function to
determine whether IMA has or will verify signatures for a given event type,
and if so permit kexec_file() even if the kernel is otherwise locked down.
This is restricted to cases where CONFIG_INTEGRITY_TRUSTED_KEYRING is set
in order to prevent an attacker from loading additional keys at runtime.

Signed-off-by: Matthew Garrett <mjg59@google.com>
Acked-by: Mimi Zohar <zohar@linux.ibm.com>
Cc: Dmitry Kasatkin <dmitry.kasatkin@gmail.com>
Cc: linux-integrity@vger.kernel.org
---
 include/linux/ima.h                 |  9 ++++++
 kernel/kexec_file.c                 | 11 +++++--
 security/integrity/ima/ima.h        |  2 ++
 security/integrity/ima/ima_main.c   |  2 +-
 security/integrity/ima/ima_policy.c | 50 +++++++++++++++++++++++++++++
 5 files changed, 71 insertions(+), 3 deletions(-)

diff --git a/include/linux/ima.h b/include/linux/ima.h
index dc12fbcf484c..c30954acc660 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -132,4 +132,13 @@ static inline int ima_inode_removexattr(struct dentry *dentry,
 	return 0;
 }
 #endif /* CONFIG_IMA_APPRAISE */
+
+#if defined(CONFIG_IMA_APPRAISE) && defined(CONFIG_INTEGRITY_TRUSTED_KEYRING)
+extern bool ima_appraise_signature(enum kernel_read_file_id func);
+#else
+static inline bool ima_appraise_signature(enum kernel_read_file_id func)
+{
+	return false;
+}
+#endif /* CONFIG_IMA_APPRAISE && CONFIG_INTEGRITY_TRUSTED_KEYRING */
 #endif /* _LINUX_IMA_H */
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 27adb4312b03..539d0ca855bc 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -237,8 +237,15 @@ kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd,
 			goto out;
 		}
 
-		ret = security_locked_down(LOCKDOWN_KEXEC);
-		if (ret)
+		ret = 0;
+
+		/* If IMA is guaranteed to appraise a signature on the kexec
+		 * image, permit it even if the kernel is otherwise locked
+		 * down.
+		 */
+		if (!ima_appraise_signature(READING_KEXEC_IMAGE) &&
+		    security_locked_down(LOCKDOWN_KEXEC)) {
+			ret = -EPERM;
 			goto out;
 
 		break;
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index d213e835c498..3bc62062cfe8 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -115,6 +115,8 @@ struct ima_kexec_hdr {
 	u64 count;
 };
 
+extern const int read_idmap[];
+
 #ifdef CONFIG_HAVE_IMA_KEXEC
 void ima_load_kexec_buffer(void);
 #else
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 357edd140c09..927fe889201a 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -473,7 +473,7 @@ int ima_read_file(struct file *file, enum kernel_read_file_id read_id)
 	return 0;
 }
 
-static const int read_idmap[READING_MAX_ID] = {
+const int read_idmap[READING_MAX_ID] = {
 	[READING_FIRMWARE] = FIRMWARE_CHECK,
 	[READING_FIRMWARE_PREALLOC_BUFFER] = FIRMWARE_CHECK,
 	[READING_MODULE] = MODULE_CHECK,
diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index e0cc323f948f..8784449918e2 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -1339,3 +1339,53 @@ int ima_policy_show(struct seq_file *m, void *v)
 	return 0;
 }
 #endif	/* CONFIG_IMA_READ_POLICY */
+
+#if defined(CONFIG_IMA_APPRAISE) && defined(CONFIG_INTEGRITY_TRUSTED_KEYRING)
+/*
+ * ima_appraise_signature: whether IMA will appraise a given function using
+ * an IMA digital signature. This is restricted to cases where the kernel
+ * has a set of built-in trusted keys in order to avoid an attacker simply
+ * loading additional keys.
+ */
+bool ima_appraise_signature(enum kernel_read_file_id id)
+{
+	struct ima_rule_entry *entry;
+	bool found = false;
+	enum ima_hooks func;
+
+	if (id >= READING_MAX_ID)
+		return false;
+
+	func = read_idmap[id] ?: FILE_CHECK;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(entry, ima_rules, list) {
+		if (entry->action != APPRAISE)
+			continue;
+
+		/*
+		 * A generic entry will match, but otherwise require that it
+		 * match the func we're looking for
+		 */
+		if (entry->func && entry->func != func)
+			continue;
+
+		/*
+		 * We require this to be a digital signature, not a raw IMA
+		 * hash.
+		 */
+		if (entry->flags & IMA_DIGSIG_REQUIRED)
+			found = true;
+
+		/*
+		 * We've found a rule that matches, so break now even if it
+		 * didn't require a digital signature - a later rule that does
+		 * won't override it, so would be a false positive.
+		 */
+		break;
+	}
+
+	rcu_read_unlock();
+	return found;
+}
+#endif /* CONFIG_IMA_APPRAISE && CONFIG_INTEGRITY_TRUSTED_KEYRING */
-- 
2.22.0.410.gd8fdbe21b5-goog

^ permalink raw reply related

* [PATCH V34 24/29] Lock down perf when in confidentiality mode
From: Matthew Garrett @ 2019-06-22  0:03 UTC (permalink / raw)
  To: jmorris
  Cc: linux-security-module, linux-kernel, linux-api, David Howells,
	Matthew Garrett, Peter Zijlstra, Ingo Molnar,
	Arnaldo Carvalho de Melo
In-Reply-To: <20190622000358.19895-1-matthewgarrett@google.com>

From: David Howells <dhowells@redhat.com>

Disallow the use of certain perf facilities that might allow userspace to
access kernel data.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Matthew Garrett <mjg59@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
---
 include/linux/security.h     | 1 +
 kernel/events/core.c         | 7 +++++++
 security/lockdown/lockdown.c | 1 +
 3 files changed, 9 insertions(+)

diff --git a/include/linux/security.h b/include/linux/security.h
index de0d37b1fe79..53ea85889a48 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -98,6 +98,7 @@ enum lockdown_reason {
 	LOCKDOWN_KCORE,
 	LOCKDOWN_KPROBES,
 	LOCKDOWN_BPF_READ,
+	LOCKDOWN_PERF,
 	LOCKDOWN_CONFIDENTIALITY_MAX,
 };
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 72d06e302e99..77f36551756e 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -10731,6 +10731,13 @@ SYSCALL_DEFINE5(perf_event_open,
 			return -EINVAL;
 	}
 
+	err = security_locked_down(LOCKDOWN_PERF);
+	if (err && (attr.sample_type & PERF_SAMPLE_REGS_INTR))
+		/* REGS_INTR can leak data, lockdown must prevent this */
+		return err;
+	else
+		err = 0;
+
 	/* Only privileged users can get physical addresses */
 	if ((attr.sample_type & PERF_SAMPLE_PHYS_ADDR) &&
 	    perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
index 2eea2cc13117..a7e75c614416 100644
--- a/security/lockdown/lockdown.c
+++ b/security/lockdown/lockdown.c
@@ -34,6 +34,7 @@ static char *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
 	[LOCKDOWN_KCORE] = "/proc/kcore access",
 	[LOCKDOWN_KPROBES] = "use of kprobes",
 	[LOCKDOWN_BPF_READ] = "use of bpf to read kernel RAM",
+	[LOCKDOWN_PERF] = "unsafe use of perf",
 	[LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
 };
 
-- 
2.22.0.410.gd8fdbe21b5-goog

^ permalink raw reply related

* [PATCH V34 23/29] bpf: Restrict bpf when kernel lockdown is in confidentiality mode
From: Matthew Garrett @ 2019-06-22  0:03 UTC (permalink / raw)
  To: jmorris
  Cc: linux-security-module, linux-kernel, linux-api, David Howells,
	Alexei Starovoitov, Matthew Garrett, netdev, Chun-Yi Lee,
	Daniel Borkmann
In-Reply-To: <20190622000358.19895-1-matthewgarrett@google.com>

From: David Howells <dhowells@redhat.com>

There are some bpf functions can be used to read kernel memory:
bpf_probe_read, bpf_probe_write_user and bpf_trace_printk.  These allow
private keys in kernel memory (e.g. the hibernation image signing key) to
be read by an eBPF program and kernel memory to be altered without
restriction. Disable them if the kernel has been locked down in
confidentiality mode.

Suggested-by: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Matthew Garrett <mjg59@google.com>
cc: netdev@vger.kernel.org
cc: Chun-Yi Lee <jlee@suse.com>
cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/security.h     |  1 +
 kernel/trace/bpf_trace.c     | 20 +++++++++++++++++++-
 security/lockdown/lockdown.c |  1 +
 3 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/include/linux/security.h b/include/linux/security.h
index e6e3e2403474..de0d37b1fe79 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -97,6 +97,7 @@ enum lockdown_reason {
 	LOCKDOWN_INTEGRITY_MAX,
 	LOCKDOWN_KCORE,
 	LOCKDOWN_KPROBES,
+	LOCKDOWN_BPF_READ,
 	LOCKDOWN_CONFIDENTIALITY_MAX,
 };
 
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index d64c00afceb5..638f9b00a8df 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -137,6 +137,10 @@ BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr)
 {
 	int ret;
 
+	ret = security_locked_down(LOCKDOWN_BPF_READ);
+	if (ret)
+		return ret;
+
 	ret = probe_kernel_read(dst, unsafe_ptr, size);
 	if (unlikely(ret < 0))
 		memset(dst, 0, size);
@@ -156,6 +160,12 @@ static const struct bpf_func_proto bpf_probe_read_proto = {
 BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src,
 	   u32, size)
 {
+	int ret;
+
+	ret = security_locked_down(LOCKDOWN_BPF_READ);
+	if (ret)
+		return ret;
+
 	/*
 	 * Ensure we're in user context which is safe for the helper to
 	 * run. This helper has no business in a kthread.
@@ -205,7 +215,11 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
 	int fmt_cnt = 0;
 	u64 unsafe_addr;
 	char buf[64];
-	int i;
+	int i, ret;
+
+	ret = security_locked_down(LOCKDOWN_BPF_READ);
+	if (ret)
+		return ret;
 
 	/*
 	 * bpf_check()->check_func_arg()->check_stack_boundary()
@@ -534,6 +548,10 @@ BPF_CALL_3(bpf_probe_read_str, void *, dst, u32, size,
 {
 	int ret;
 
+	ret = security_locked_down(LOCKDOWN_BPF_READ);
+	if (ret)
+		return ret;
+
 	/*
 	 * The strncpy_from_unsafe() call will likely not fill the entire
 	 * buffer, but that's okay in this circumstance as we're probing
diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
index 5a08c17f224d..2eea2cc13117 100644
--- a/security/lockdown/lockdown.c
+++ b/security/lockdown/lockdown.c
@@ -33,6 +33,7 @@ static char *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
 	[LOCKDOWN_INTEGRITY_MAX] = "integrity",
 	[LOCKDOWN_KCORE] = "/proc/kcore access",
 	[LOCKDOWN_KPROBES] = "use of kprobes",
+	[LOCKDOWN_BPF_READ] = "use of bpf to read kernel RAM",
 	[LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
 };
 
-- 
2.22.0.410.gd8fdbe21b5-goog

^ permalink raw reply related

* [PATCH V34 22/29] Lock down tracing and perf kprobes when in confidentiality mode
From: Matthew Garrett @ 2019-06-22  0:03 UTC (permalink / raw)
  To: jmorris
  Cc: linux-security-module, linux-kernel, linux-api, David Howells,
	Alexei Starovoitov, Matthew Garrett, Naveen N . Rao,
	Anil S Keshavamurthy, davem, Masami Hiramatsu
In-Reply-To: <20190622000358.19895-1-matthewgarrett@google.com>

From: David Howells <dhowells@redhat.com>

Disallow the creation of perf and ftrace kprobes when the kernel is
locked down in confidentiality mode by preventing their registration.
This prevents kprobes from being used to access kernel memory to steal
crypto data, but continues to allow the use of kprobes from signed
modules.

Reported-by: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Matthew Garrett <mjg59@google.com>
Cc: Naveen N. Rao <naveen.n.rao@linux.ibm.com>
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Cc: davem@davemloft.net
Cc: Masami Hiramatsu <mhiramat@kernel.org>
---
 include/linux/security.h     | 1 +
 kernel/trace/trace_kprobe.c  | 5 +++++
 security/lockdown/lockdown.c | 1 +
 3 files changed, 7 insertions(+)

diff --git a/include/linux/security.h b/include/linux/security.h
index 3875f6df2ecc..e6e3e2403474 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -96,6 +96,7 @@ enum lockdown_reason {
 	LOCKDOWN_MMIOTRACE,
 	LOCKDOWN_INTEGRITY_MAX,
 	LOCKDOWN_KCORE,
+	LOCKDOWN_KPROBES,
 	LOCKDOWN_CONFIDENTIALITY_MAX,
 };
 
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 5d5129b05df7..5a76a0f79d48 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -11,6 +11,7 @@
 #include <linux/uaccess.h>
 #include <linux/rculist.h>
 #include <linux/error-injection.h>
+#include <linux/security.h>
 
 #include "trace_dynevent.h"
 #include "trace_kprobe_selftest.h"
@@ -415,6 +416,10 @@ static int __register_trace_kprobe(struct trace_kprobe *tk)
 {
 	int i, ret;
 
+	ret = security_locked_down(LOCKDOWN_KPROBES);
+	if (ret)
+		return ret;
+
 	if (trace_probe_is_registered(&tk->tp))
 		return -EINVAL;
 
diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
index 4c9b324dfc55..5a08c17f224d 100644
--- a/security/lockdown/lockdown.c
+++ b/security/lockdown/lockdown.c
@@ -32,6 +32,7 @@ static char *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
 	[LOCKDOWN_MMIOTRACE] = "unsafe mmio",
 	[LOCKDOWN_INTEGRITY_MAX] = "integrity",
 	[LOCKDOWN_KCORE] = "/proc/kcore access",
+	[LOCKDOWN_KPROBES] = "use of kprobes",
 	[LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
 };
 
-- 
2.22.0.410.gd8fdbe21b5-goog

^ permalink raw reply related

* [PATCH V34 21/29] Lock down /proc/kcore
From: Matthew Garrett @ 2019-06-22  0:03 UTC (permalink / raw)
  To: jmorris
  Cc: linux-security-module, linux-kernel, linux-api, David Howells,
	Matthew Garrett
In-Reply-To: <20190622000358.19895-1-matthewgarrett@google.com>

From: David Howells <dhowells@redhat.com>

Disallow access to /proc/kcore when the kernel is locked down to prevent
access to cryptographic data. This is limited to lockdown
confidentiality mode and is still permitted in integrity mode.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Matthew Garrett <mjg59@google.com>
---
 fs/proc/kcore.c              | 5 +++++
 include/linux/security.h     | 1 +
 security/lockdown/lockdown.c | 1 +
 3 files changed, 7 insertions(+)

diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index d29d869abec1..4e95edb1e282 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -31,6 +31,7 @@
 #include <linux/ioport.h>
 #include <linux/memory.h>
 #include <linux/sched/task.h>
+#include <linux/security.h>
 #include <asm/sections.h>
 #include "internal.h"
 
@@ -545,6 +546,10 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
 
 static int open_kcore(struct inode *inode, struct file *filp)
 {
+	int ret = security_locked_down(LOCKDOWN_KCORE);
+
+	if (ret)
+		return ret;
 	if (!capable(CAP_SYS_RAWIO))
 		return -EPERM;
 
diff --git a/include/linux/security.h b/include/linux/security.h
index c649cb91e762..3875f6df2ecc 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -95,6 +95,7 @@ enum lockdown_reason {
 	LOCKDOWN_MODULE_PARAMETERS,
 	LOCKDOWN_MMIOTRACE,
 	LOCKDOWN_INTEGRITY_MAX,
+	LOCKDOWN_KCORE,
 	LOCKDOWN_CONFIDENTIALITY_MAX,
 };
 
diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
index cd86ed9f4d4b..4c9b324dfc55 100644
--- a/security/lockdown/lockdown.c
+++ b/security/lockdown/lockdown.c
@@ -31,6 +31,7 @@ static char *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
 	[LOCKDOWN_MODULE_PARAMETERS] = "unsafe module parameters",
 	[LOCKDOWN_MMIOTRACE] = "unsafe mmio",
 	[LOCKDOWN_INTEGRITY_MAX] = "integrity",
+	[LOCKDOWN_KCORE] = "/proc/kcore access",
 	[LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
 };
 
-- 
2.22.0.410.gd8fdbe21b5-goog

^ permalink raw reply related

* [PATCH V34 20/29] x86/mmiotrace: Lock down the testmmiotrace module
From: Matthew Garrett @ 2019-06-22  0:03 UTC (permalink / raw)
  To: jmorris
  Cc: linux-security-module, linux-kernel, linux-api, David Howells,
	Thomas Gleixner, Matthew Garrett, Steven Rostedt, Ingo Molnar,
	H. Peter Anvin, x86
In-Reply-To: <20190622000358.19895-1-matthewgarrett@google.com>

From: David Howells <dhowells@redhat.com>

The testmmiotrace module shouldn't be permitted when the kernel is locked
down as it can be used to arbitrarily read and write MMIO space. This is
a runtime check rather than buildtime in order to allow configurations
where the same kernel may be run in both locked down or permissive modes
depending on local policy.

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Howells <dhowells@redhat.com
Signed-off-by: Matthew Garrett <mjg59@google.com>
cc: Thomas Gleixner <tglx@linutronix.de>
cc: Steven Rostedt <rostedt@goodmis.org>
cc: Ingo Molnar <mingo@kernel.org>
cc: "H. Peter Anvin" <hpa@zytor.com>
cc: x86@kernel.org
---
 arch/x86/mm/testmmiotrace.c  | 5 +++++
 include/linux/security.h     | 1 +
 security/lockdown/lockdown.c | 1 +
 3 files changed, 7 insertions(+)

diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c
index f6ae6830b341..6b9486baa2e9 100644
--- a/arch/x86/mm/testmmiotrace.c
+++ b/arch/x86/mm/testmmiotrace.c
@@ -7,6 +7,7 @@
 #include <linux/module.h>
 #include <linux/io.h>
 #include <linux/mmiotrace.h>
+#include <linux/security.h>
 
 static unsigned long mmio_address;
 module_param_hw(mmio_address, ulong, iomem, 0);
@@ -114,6 +115,10 @@ static void do_test_bulk_ioremapping(void)
 static int __init init(void)
 {
 	unsigned long size = (read_far) ? (8 << 20) : (16 << 10);
+	int ret = security_locked_down(LOCKDOWN_MMIOTRACE);
+
+	if (ret)
+		return ret;
 
 	if (mmio_address == 0) {
 		pr_err("you have to use the module argument mmio_address.\n");
diff --git a/include/linux/security.h b/include/linux/security.h
index 88064d7f6827..c649cb91e762 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -93,6 +93,7 @@ enum lockdown_reason {
 	LOCKDOWN_PCMCIA_CIS,
 	LOCKDOWN_TIOCSSERIAL,
 	LOCKDOWN_MODULE_PARAMETERS,
+	LOCKDOWN_MMIOTRACE,
 	LOCKDOWN_INTEGRITY_MAX,
 	LOCKDOWN_CONFIDENTIALITY_MAX,
 };
diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
index d03c4c296af7..cd86ed9f4d4b 100644
--- a/security/lockdown/lockdown.c
+++ b/security/lockdown/lockdown.c
@@ -29,6 +29,7 @@ static char *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
 	[LOCKDOWN_PCMCIA_CIS] = "direct PCMCIA CIS storage",
 	[LOCKDOWN_TIOCSSERIAL] = "reconfiguration of serial port IO",
 	[LOCKDOWN_MODULE_PARAMETERS] = "unsafe module parameters",
+	[LOCKDOWN_MMIOTRACE] = "unsafe mmio",
 	[LOCKDOWN_INTEGRITY_MAX] = "integrity",
 	[LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
 };
-- 
2.22.0.410.gd8fdbe21b5-goog

^ permalink raw reply related

* [PATCH V34 19/29] Lock down module params that specify hardware parameters (eg. ioport)
From: Matthew Garrett @ 2019-06-22  0:03 UTC (permalink / raw)
  To: jmorris
  Cc: linux-security-module, linux-kernel, linux-api, David Howells,
	Alan Cox, Matthew Garrett
In-Reply-To: <20190622000358.19895-1-matthewgarrett@google.com>

From: David Howells <dhowells@redhat.com>

Provided an annotation for module parameters that specify hardware
parameters (such as io ports, iomem addresses, irqs, dma channels, fixed
dma buffers and other types).

Suggested-by: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Matthew Garrett <mjg59@google.com>
---
 include/linux/security.h     |  1 +
 kernel/params.c              | 27 ++++++++++++++++++++++-----
 security/lockdown/lockdown.c |  1 +
 3 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/include/linux/security.h b/include/linux/security.h
index 61e3f4a62d16..88064d7f6827 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -92,6 +92,7 @@ enum lockdown_reason {
 	LOCKDOWN_ACPI_TABLES,
 	LOCKDOWN_PCMCIA_CIS,
 	LOCKDOWN_TIOCSSERIAL,
+	LOCKDOWN_MODULE_PARAMETERS,
 	LOCKDOWN_INTEGRITY_MAX,
 	LOCKDOWN_CONFIDENTIALITY_MAX,
 };
diff --git a/kernel/params.c b/kernel/params.c
index ce89f757e6da..f94fe79e331d 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -24,6 +24,7 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/ctype.h>
+#include <linux/security.h>
 
 #ifdef CONFIG_SYSFS
 /* Protects all built-in parameters, modules use their own param_lock */
@@ -108,13 +109,19 @@ bool parameq(const char *a, const char *b)
 	return parameqn(a, b, strlen(a)+1);
 }
 
-static void param_check_unsafe(const struct kernel_param *kp)
+static bool param_check_unsafe(const struct kernel_param *kp,
+			       const char *doing)
 {
 	if (kp->flags & KERNEL_PARAM_FL_UNSAFE) {
 		pr_notice("Setting dangerous option %s - tainting kernel\n",
 			  kp->name);
 		add_taint(TAINT_USER, LOCKDEP_STILL_OK);
 	}
+
+	if (kp->flags & KERNEL_PARAM_FL_HWPARAM &&
+	    security_locked_down(LOCKDOWN_MODULE_PARAMETERS))
+		return false;
+	return true;
 }
 
 static int parse_one(char *param,
@@ -144,8 +151,10 @@ static int parse_one(char *param,
 			pr_debug("handling %s with %p\n", param,
 				params[i].ops->set);
 			kernel_param_lock(params[i].mod);
-			param_check_unsafe(&params[i]);
-			err = params[i].ops->set(val, &params[i]);
+			if (param_check_unsafe(&params[i], doing))
+				err = params[i].ops->set(val, &params[i]);
+			else
+				err = -EPERM;
 			kernel_param_unlock(params[i].mod);
 			return err;
 		}
@@ -553,6 +562,12 @@ static ssize_t param_attr_show(struct module_attribute *mattr,
 	return count;
 }
 
+#ifdef CONFIG_MODULES
+#define mod_name(mod) (mod)->name
+#else
+#define mod_name(mod) "unknown"
+#endif
+
 /* sysfs always hands a nul-terminated string in buf.  We rely on that. */
 static ssize_t param_attr_store(struct module_attribute *mattr,
 				struct module_kobject *mk,
@@ -565,8 +580,10 @@ static ssize_t param_attr_store(struct module_attribute *mattr,
 		return -EPERM;
 
 	kernel_param_lock(mk->mod);
-	param_check_unsafe(attribute->param);
-	err = attribute->param->ops->set(buf, attribute->param);
+	if (param_check_unsafe(attribute->param, mod_name(mk->mod)))
+		err = attribute->param->ops->set(buf, attribute->param);
+	else
+		err = -EPERM;
 	kernel_param_unlock(mk->mod);
 	if (!err)
 		return len;
diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
index c89046dc2155..d03c4c296af7 100644
--- a/security/lockdown/lockdown.c
+++ b/security/lockdown/lockdown.c
@@ -28,6 +28,7 @@ static char *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
 	[LOCKDOWN_ACPI_TABLES] = "modified ACPI tables",
 	[LOCKDOWN_PCMCIA_CIS] = "direct PCMCIA CIS storage",
 	[LOCKDOWN_TIOCSSERIAL] = "reconfiguration of serial port IO",
+	[LOCKDOWN_MODULE_PARAMETERS] = "unsafe module parameters",
 	[LOCKDOWN_INTEGRITY_MAX] = "integrity",
 	[LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
 };
-- 
2.22.0.410.gd8fdbe21b5-goog

^ permalink raw reply related

* [PATCH V34 18/29] Lock down TIOCSSERIAL
From: Matthew Garrett @ 2019-06-22  0:03 UTC (permalink / raw)
  To: jmorris
  Cc: linux-security-module, linux-kernel, linux-api, David Howells,
	Greg Kroah-Hartman, Matthew Garrett, Jiri Slaby, linux-serial
In-Reply-To: <20190622000358.19895-1-matthewgarrett@google.com>

From: David Howells <dhowells@redhat.com>

Lock down TIOCSSERIAL as that can be used to change the ioport and irq
settings on a serial port.  This only appears to be an issue for the serial
drivers that use the core serial code.  All other drivers seem to either
ignore attempts to change port/irq or give an error.

Reported-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Matthew Garrett <mjg59@google.com>
cc: Jiri Slaby <jslaby@suse.com>
Cc: linux-serial@vger.kernel.org
---
 drivers/tty/serial/serial_core.c | 5 +++++
 include/linux/security.h         | 1 +
 security/lockdown/lockdown.c     | 1 +
 3 files changed, 7 insertions(+)

diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 351843f847c0..a84f231a5df4 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -22,6 +22,7 @@
 #include <linux/serial_core.h>
 #include <linux/delay.h>
 #include <linux/mutex.h>
+#include <linux/security.h>
 
 #include <linux/irq.h>
 #include <linux/uaccess.h>
@@ -852,6 +853,10 @@ static int uart_set_info(struct tty_struct *tty, struct tty_port *port,
 	new_flags = (__force upf_t)new_info->flags;
 	old_custom_divisor = uport->custom_divisor;
 
+	retval = security_locked_down(LOCKDOWN_TIOCSSERIAL);
+	if (retval && (change_port || change_irq))
+		goto exit;
+
 	if (!capable(CAP_SYS_ADMIN)) {
 		retval = -EPERM;
 		if (change_irq || change_port ||
diff --git a/include/linux/security.h b/include/linux/security.h
index 03c125b277ca..61e3f4a62d16 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -91,6 +91,7 @@ enum lockdown_reason {
 	LOCKDOWN_MSR,
 	LOCKDOWN_ACPI_TABLES,
 	LOCKDOWN_PCMCIA_CIS,
+	LOCKDOWN_TIOCSSERIAL,
 	LOCKDOWN_INTEGRITY_MAX,
 	LOCKDOWN_CONFIDENTIALITY_MAX,
 };
diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
index 7be3e8fb5847..c89046dc2155 100644
--- a/security/lockdown/lockdown.c
+++ b/security/lockdown/lockdown.c
@@ -27,6 +27,7 @@ static char *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
 	[LOCKDOWN_MSR] = "raw MSR access",
 	[LOCKDOWN_ACPI_TABLES] = "modified ACPI tables",
 	[LOCKDOWN_PCMCIA_CIS] = "direct PCMCIA CIS storage",
+	[LOCKDOWN_TIOCSSERIAL] = "reconfiguration of serial port IO",
 	[LOCKDOWN_INTEGRITY_MAX] = "integrity",
 	[LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
 };
-- 
2.22.0.410.gd8fdbe21b5-goog

^ permalink raw reply related

* [PATCH V34 17/29] Prohibit PCMCIA CIS storage when the kernel is locked down
From: Matthew Garrett @ 2019-06-22  0:03 UTC (permalink / raw)
  To: jmorris
  Cc: linux-security-module, linux-kernel, linux-api, David Howells,
	Dominik Brodowski, Matthew Garrett
In-Reply-To: <20190622000358.19895-1-matthewgarrett@google.com>

From: David Howells <dhowells@redhat.com>

Prohibit replacement of the PCMCIA Card Information Structure when the
kernel is locked down.

Suggested-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Matthew Garrett <mjg59@google.com>
---
 drivers/pcmcia/cistpl.c      | 5 +++++
 include/linux/security.h     | 1 +
 security/lockdown/lockdown.c | 1 +
 3 files changed, 7 insertions(+)

diff --git a/drivers/pcmcia/cistpl.c b/drivers/pcmcia/cistpl.c
index ac0672b8dfca..379c53610102 100644
--- a/drivers/pcmcia/cistpl.c
+++ b/drivers/pcmcia/cistpl.c
@@ -24,6 +24,7 @@
 #include <linux/pci.h>
 #include <linux/ioport.h>
 #include <linux/io.h>
+#include <linux/security.h>
 #include <asm/byteorder.h>
 #include <asm/unaligned.h>
 
@@ -1578,6 +1579,10 @@ static ssize_t pccard_store_cis(struct file *filp, struct kobject *kobj,
 	struct pcmcia_socket *s;
 	int error;
 
+	error = security_locked_down(LOCKDOWN_PCMCIA_CIS);
+	if (error)
+		return error;
+
 	s = to_socket(container_of(kobj, struct device, kobj));
 
 	if (off)
diff --git a/include/linux/security.h b/include/linux/security.h
index cc2b5ee4cadd..03c125b277ca 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -90,6 +90,7 @@ enum lockdown_reason {
 	LOCKDOWN_IOPORT,
 	LOCKDOWN_MSR,
 	LOCKDOWN_ACPI_TABLES,
+	LOCKDOWN_PCMCIA_CIS,
 	LOCKDOWN_INTEGRITY_MAX,
 	LOCKDOWN_CONFIDENTIALITY_MAX,
 };
diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
index 1725224f0024..7be3e8fb5847 100644
--- a/security/lockdown/lockdown.c
+++ b/security/lockdown/lockdown.c
@@ -26,6 +26,7 @@ static char *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
 	[LOCKDOWN_IOPORT] = "raw io port access",
 	[LOCKDOWN_MSR] = "raw MSR access",
 	[LOCKDOWN_ACPI_TABLES] = "modified ACPI tables",
+	[LOCKDOWN_PCMCIA_CIS] = "direct PCMCIA CIS storage",
 	[LOCKDOWN_INTEGRITY_MAX] = "integrity",
 	[LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
 };
-- 
2.22.0.410.gd8fdbe21b5-goog

^ permalink raw reply related

* [PATCH V34 16/29] acpi: Disable ACPI table override if the kernel is locked down
From: Matthew Garrett @ 2019-06-22  0:03 UTC (permalink / raw)
  To: jmorris
  Cc: linux-security-module, linux-kernel, linux-api, Linn Crosetto,
	David Howells, Matthew Garrett, linux-acpi
In-Reply-To: <20190622000358.19895-1-matthewgarrett@google.com>

From: Linn Crosetto <linn@hpe.com>

>From the kernel documentation (initrd_table_override.txt):

  If the ACPI_INITRD_TABLE_OVERRIDE compile option is true, it is possible
  to override nearly any ACPI table provided by the BIOS with an
  instrumented, modified one.

When lockdown is enabled, the kernel should disallow any unauthenticated
changes to kernel space.  ACPI tables contain code invoked by the kernel,
so do not allow ACPI tables to be overridden if the kernel is locked down.

Signed-off-by: Linn Crosetto <linn@hpe.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Matthew Garrett <mjg59@google.com>
cc: linux-acpi@vger.kernel.org
---
 drivers/acpi/tables.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c
index 8fccbe49612a..41d9ccd0e075 100644
--- a/drivers/acpi/tables.c
+++ b/drivers/acpi/tables.c
@@ -34,6 +34,7 @@
 #include <linux/memblock.h>
 #include <linux/earlycpio.h>
 #include <linux/initrd.h>
+#include <linux/security.h>
 #include "internal.h"
 
 #ifdef CONFIG_ACPI_CUSTOM_DSDT
@@ -539,6 +540,11 @@ void __init acpi_table_upgrade(void)
 	if (table_nr == 0)
 		return;
 
+	if (security_locked_down(LOCKDOWN_ACPI_TABLES)) {
+		pr_notice("kernel is locked down, ignoring table override\n");
+		return;
+	}
+
 	acpi_tables_addr =
 		memblock_find_in_range(0, ACPI_TABLE_UPGRADE_MAX_PHYS,
 				       all_tables_size, PAGE_SIZE);
-- 
2.22.0.410.gd8fdbe21b5-goog

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox