All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 28/28] devpts: handle fsid mappings
From: Christian Brauner @ 2020-02-14 18:35 UTC (permalink / raw)
  To: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn
  Cc: smbarber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, linux-kernel, linux-fsdevel, containers,
	linux-security-module, linux-api, Christian Brauner
In-Reply-To: <20200214183554.1133805-1-christian.brauner@ubuntu.com>

When a uid or gid mount option is specified with devpts have it lookup the
corresponding kfsids in the fsid mappings. If no fsid mappings are setup the
behavior is unchanged, i.e. fsids are looked up in the id mappings.

Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
/* v2 */
unchanged
---
 fs/devpts/inode.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 42e5a766d33c..139958892572 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -24,6 +24,7 @@
 #include <linux/parser.h>
 #include <linux/fsnotify.h>
 #include <linux/seq_file.h>
+#include <linux/fsuidgid.h>
 
 #define DEVPTS_DEFAULT_MODE 0600
 /*
@@ -277,7 +278,7 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts)
 		case Opt_uid:
 			if (match_int(&args[0], &option))
 				return -EINVAL;
-			uid = make_kuid(current_user_ns(), option);
+			uid = make_kfsuid(current_user_ns(), option);
 			if (!uid_valid(uid))
 				return -EINVAL;
 			opts->uid = uid;
@@ -286,7 +287,7 @@ static int parse_mount_options(char *data, int op, struct pts_mount_opts *opts)
 		case Opt_gid:
 			if (match_int(&args[0], &option))
 				return -EINVAL;
-			gid = make_kgid(current_user_ns(), option);
+			gid = make_kfsgid(current_user_ns(), option);
 			if (!gid_valid(gid))
 				return -EINVAL;
 			opts->gid = gid;
@@ -410,7 +411,7 @@ static int devpts_show_options(struct seq_file *seq, struct dentry *root)
 			   from_kuid_munged(&init_user_ns, opts->uid));
 	if (opts->setgid)
 		seq_printf(seq, ",gid=%u",
-			   from_kgid_munged(&init_user_ns, opts->gid));
+			   from_kfsgid_munged(&init_user_ns, opts->gid));
 	seq_printf(seq, ",mode=%03o", opts->mode);
 	seq_printf(seq, ",ptmxmode=%03o", opts->ptmxmode);
 	if (opts->max < NR_UNIX98_PTY_MAX)
-- 
2.25.0


^ permalink raw reply related

* Re: [alsa-devel] alsa-ucm-conf via portage on arm
From: Jaroslav Kysela @ 2020-02-14 18:38 UTC (permalink / raw)
  To: Curtis Malainey, ALSA development, Takashi Iwai
  Cc: Dylan Reid, Jimmy Cheng-Yi Chiang
In-Reply-To: <CAOReqxhbvG03QvqkbjnGB8w5cM4WOK4qeZ+bh0apNna7i3ZimA@mail.gmail.com>

Dne 14. 02. 20 v 19:23 Curtis Malainey napsal(a):
> Hello Takashi and Jaroslav,
> 
> There appears to be a upstream package issue for alsa-ucm-conf in that
> portage seems to have it restricted to x86 and amd64 platforms. Given
> the nature of the ucms I figure this is an error and was hoping you
> know who to speak to in order to resolve this. If this isn't an error
> what can I do to help get the repo to state in which it is supported
> on arm/arm64?
> 
> See https://gitweb.gentoo.org/repo/gentoo.git/commit/media-libs/alsa-ucm-conf/alsa-ucm-conf-1.2.1.2.ebuild?id=dd3e0570e2465639431709bce0410b787d312bbe
> 
> Curtis

It's Gentoo specific and you see signed-off-by in this commit.

Did you contact this person (Lars Wendler) from Gentoo?

					Jaroslav

-- 
Jaroslav Kysela <perex@perex.cz>
Linux Sound Maintainer; ALSA Project; Red Hat, Inc.
_______________________________________________
Alsa-devel mailing list
Alsa-devel@alsa-project.org
https://mailman.alsa-project.org/mailman/listinfo/alsa-devel

^ permalink raw reply

* [PATCH AUTOSEL 4.19 043/252] powerpc/pseries/vio: Fix iommu_table use-after-free refcount warning
From: Sasha Levin @ 2020-02-14 16:08 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Sasha Levin, Tyrel Datwyler, Alexey Kardashevskiy, Tyrel Datwyler,
	linuxppc-dev
In-Reply-To: <20200214161147.15842-1-sashal@kernel.org>

From: Tyrel Datwyler <tyreld@linux.vnet.ibm.com>

[ Upstream commit aff8c8242bc638ba57247ae1ec5f272ac3ed3b92 ]

Commit e5afdf9dd515 ("powerpc/vfio_spapr_tce: Add reference counting to
iommu_table") missed an iommu_table allocation in the pseries vio code.
The iommu_table is allocated with kzalloc and as a result the associated
kref gets a value of zero. This has the side effect that during a DLPAR
remove of the associated virtual IOA the iommu_tce_table_put() triggers
a use-after-free underflow warning.

Call Trace:
[c0000002879e39f0] [c00000000071ecb4] refcount_warn_saturate+0x184/0x190
(unreliable)
[c0000002879e3a50] [c0000000000500ac] iommu_tce_table_put+0x9c/0xb0
[c0000002879e3a70] [c0000000000f54e4] vio_dev_release+0x34/0x70
[c0000002879e3aa0] [c00000000087cfa4] device_release+0x54/0xf0
[c0000002879e3b10] [c000000000d64c84] kobject_cleanup+0xa4/0x240
[c0000002879e3b90] [c00000000087d358] put_device+0x28/0x40
[c0000002879e3bb0] [c0000000007a328c] dlpar_remove_slot+0x15c/0x250
[c0000002879e3c50] [c0000000007a348c] remove_slot_store+0xac/0xf0
[c0000002879e3cd0] [c000000000d64220] kobj_attr_store+0x30/0x60
[c0000002879e3cf0] [c0000000004ff13c] sysfs_kf_write+0x6c/0xa0
[c0000002879e3d10] [c0000000004fde4c] kernfs_fop_write+0x18c/0x260
[c0000002879e3d60] [c000000000410f3c] __vfs_write+0x3c/0x70
[c0000002879e3d80] [c000000000415408] vfs_write+0xc8/0x250
[c0000002879e3dd0] [c0000000004157dc] ksys_write+0x7c/0x120
[c0000002879e3e20] [c00000000000b278] system_call+0x5c/0x68

Further, since the refcount was always zero the iommu_tce_table_put()
fails to call the iommu_table release function resulting in a leak.

Fix this issue be initilizing the iommu_table kref immediately after
allocation.

Fixes: e5afdf9dd515 ("powerpc/vfio_spapr_tce: Add reference counting to iommu_table")
Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/1579558202-26052-1-git-send-email-tyreld@linux.ibm.com
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 arch/powerpc/platforms/pseries/vio.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c
index 49e04ec19238a..0e7778be4c490 100644
--- a/arch/powerpc/platforms/pseries/vio.c
+++ b/arch/powerpc/platforms/pseries/vio.c
@@ -1195,6 +1195,8 @@ static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev)
 	if (tbl == NULL)
 		return NULL;
 
+	kref_init(&tbl->it_kref);
+
 	of_parse_dma_window(dev->dev.of_node, dma_window,
 			    &tbl->it_index, &offset, &size);
 
-- 
2.20.1


^ permalink raw reply related

* [PATCH v2 23/28] commoncap: cap_bprm_set_creds(): handle fsid mappings
From: Christian Brauner @ 2020-02-14 18:35 UTC (permalink / raw)
  To: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn
  Cc: smbarber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, linux-kernel, linux-fsdevel, containers,
	linux-security-module, linux-api, Christian Brauner
In-Reply-To: <20200214183554.1133805-1-christian.brauner@ubuntu.com>

During exec the kfsids are currently reset to the effective kids. To retain the
same semantics with the introduction of fsid mappings, we lookup the userspace
effective id in the id mappings and translate the effective id into the
corresponding kfsid in the fsidmapping. This means, the behavior is unchanged
when no fsid mappings are setup and the semantics stay the same even when fsid
mappings are setup.

Cc: Jann Horn <jannh@google.com>
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
/* v2 */
- Christian Brauner <christian.brauner@ubuntu.com>:
  - Reset kfsids used for userns visible filesystems such as proc too.
---
 security/commoncap.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/security/commoncap.c b/security/commoncap.c
index f4ee0ae106b2..9641695d8383 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -810,7 +810,10 @@ int cap_bprm_set_creds(struct linux_binprm *bprm)
 	struct cred *new = bprm->cred;
 	bool effective = false, has_fcap = false, is_setid;
 	int ret;
-	kuid_t root_uid;
+	kuid_t root_uid, kfsuid;
+	kgid_t kfsgid;
+	uid_t fsuid;
+	gid_t fsgid;
 
 	if (WARN_ON(!cap_ambient_invariant_ok(old)))
 		return -EPERM;
@@ -847,8 +850,15 @@ int cap_bprm_set_creds(struct linux_binprm *bprm)
 						   old->cap_permitted);
 	}
 
-	new->suid = new->fsuid = new->euid;
-	new->sgid = new->fsgid = new->egid;
+	fsuid = from_kuid_munged(new->user_ns, new->euid);
+	kfsuid = make_kfsuid(new->user_ns, fsuid);
+	new->suid = new->kfsuid = new->euid;
+	new->fsuid = kfsuid;
+
+	fsgid = from_kgid_munged(new->user_ns, new->egid);
+	kfsgid = make_kfsgid(new->user_ns, fsgid);
+	new->sgid = new->kfsgid = new->egid;
+	new->fsgid = kfsgid;
 
 	/* File caps or setid cancels ambient. */
 	if (has_fcap || is_setid)
-- 
2.25.0


^ permalink raw reply related

* Re: [PATCH 08/35] KVM: s390: protvirt: Add initial lifecycle handling
From: David Hildenbrand @ 2020-02-14 18:39 UTC (permalink / raw)
  To: Christian Borntraeger, Janosch Frank
  Cc: KVM, Cornelia Huck, Thomas Huth, Ulrich Weigand, Claudio Imbrenda,
	Andrea Arcangeli, linux-s390, Michael Mueller, Vasily Gorbik,
	Janosch Frank
In-Reply-To: <20200207113958.7320-9-borntraeger@de.ibm.com>

On 07.02.20 12:39, Christian Borntraeger wrote:
> From: Janosch Frank <frankja@linux.ibm.com>
> 
> This contains 3 main changes:
> 1. changes in SIE control block handling for secure guests
> 2. helper functions for create/destroy/unpack secure guests
> 3. KVM_S390_PV_COMMAND ioctl to allow userspace dealing with secure
> machines
> 
> Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
> [borntraeger@de.ibm.com: patch merging, splitting, fixing]
> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
> ---
>  arch/s390/include/asm/kvm_host.h |  24 ++-
>  arch/s390/include/asm/uv.h       |  69 +++++++++
>  arch/s390/kvm/Makefile           |   2 +-
>  arch/s390/kvm/kvm-s390.c         | 191 +++++++++++++++++++++++-
>  arch/s390/kvm/kvm-s390.h         |  27 ++++
>  arch/s390/kvm/pv.c               | 244 +++++++++++++++++++++++++++++++
>  include/uapi/linux/kvm.h         |  33 +++++
>  7 files changed, 586 insertions(+), 4 deletions(-)
>  create mode 100644 arch/s390/kvm/pv.c
> 
> diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
> index 884503e05424..3ed31c5f80e1 100644
> --- a/arch/s390/include/asm/kvm_host.h
> +++ b/arch/s390/include/asm/kvm_host.h
> @@ -160,7 +160,13 @@ struct kvm_s390_sie_block {
>  	__u8	reserved08[4];		/* 0x0008 */
>  #define PROG_IN_SIE (1<<0)
>  	__u32	prog0c;			/* 0x000c */
> -	__u8	reserved10[16];		/* 0x0010 */
> +	union {
> +		__u8	reserved10[16];		/* 0x0010 */
> +		struct {
> +			__u64	pv_handle_cpu;
> +			__u64	pv_handle_config;
> +		};
> +	};
>  #define PROG_BLOCK_SIE	(1<<0)
>  #define PROG_REQUEST	(1<<1)
>  	atomic_t prog20;		/* 0x0020 */
> @@ -233,7 +239,7 @@ struct kvm_s390_sie_block {
>  #define ECB3_RI  0x01
>  	__u8    ecb3;			/* 0x0063 */
>  	__u32	scaol;			/* 0x0064 */
> -	__u8	reserved68;		/* 0x0068 */
> +	__u8	sdf;			/* 0x0068 */
>  	__u8    epdx;			/* 0x0069 */
>  	__u8    reserved6a[2];		/* 0x006a */
>  	__u32	todpr;			/* 0x006c */
> @@ -645,6 +651,11 @@ struct kvm_guestdbg_info_arch {
>  	unsigned long last_bp;
>  };
>  
> +struct kvm_s390_pv_vcpu {
> +	u64 handle;
> +	unsigned long stor_base;
> +};
> +
>  struct kvm_vcpu_arch {
>  	struct kvm_s390_sie_block *sie_block;
>  	/* if vsie is active, currently executed shadow sie control block */
> @@ -673,6 +684,7 @@ struct kvm_vcpu_arch {
>  	__u64 cputm_start;
>  	bool gs_enabled;
>  	bool skey_enabled;
> +	struct kvm_s390_pv_vcpu pv;
>  };
>  
>  struct kvm_vm_stat {
> @@ -846,6 +858,13 @@ struct kvm_s390_gisa_interrupt {
>  	DECLARE_BITMAP(kicked_mask, KVM_MAX_VCPUS);
>  };
>  
> +struct kvm_s390_pv {
> +	u64 handle;
> +	u64 guest_len;
> +	unsigned long stor_base;
> +	void *stor_var;
> +};
> +
>  struct kvm_arch{
>  	void *sca;
>  	int use_esca;
> @@ -881,6 +900,7 @@ struct kvm_arch{
>  	DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
>  	DECLARE_BITMAP(idle_mask, KVM_MAX_VCPUS);
>  	struct kvm_s390_gisa_interrupt gisa_int;
> +	struct kvm_s390_pv pv;
>  };
>  
>  #define KVM_HVA_ERR_BAD		(-1UL)
> diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h
> index e1cef772fde1..7c21d55d2e49 100644
> --- a/arch/s390/include/asm/uv.h
> +++ b/arch/s390/include/asm/uv.h
> @@ -23,11 +23,19 @@
>  #define UVC_RC_INV_STATE	0x0003
>  #define UVC_RC_INV_LEN		0x0005
>  #define UVC_RC_NO_RESUME	0x0007
> +#define UVC_RC_NEED_DESTROY	0x8000
>  
>  #define UVC_CMD_QUI			0x0001
>  #define UVC_CMD_INIT_UV			0x000f
> +#define UVC_CMD_CREATE_SEC_CONF		0x0100
> +#define UVC_CMD_DESTROY_SEC_CONF	0x0101
> +#define UVC_CMD_CREATE_SEC_CPU		0x0120
> +#define UVC_CMD_DESTROY_SEC_CPU		0x0121
>  #define UVC_CMD_CONV_TO_SEC_STOR	0x0200
>  #define UVC_CMD_CONV_FROM_SEC_STOR	0x0201
> +#define UVC_CMD_SET_SEC_CONF_PARAMS	0x0300
> +#define UVC_CMD_UNPACK_IMG		0x0301
> +#define UVC_CMD_VERIFY_IMG		0x0302
>  #define UVC_CMD_PIN_PAGE_SHARED		0x0341
>  #define UVC_CMD_UNPIN_PAGE_SHARED	0x0342
>  #define UVC_CMD_SET_SHARED_ACCESS	0x1000
> @@ -37,10 +45,17 @@
>  enum uv_cmds_inst {
>  	BIT_UVC_CMD_QUI = 0,
>  	BIT_UVC_CMD_INIT_UV = 1,
> +	BIT_UVC_CMD_CREATE_SEC_CONF = 2,
> +	BIT_UVC_CMD_DESTROY_SEC_CONF = 3,
> +	BIT_UVC_CMD_CREATE_SEC_CPU = 4,
> +	BIT_UVC_CMD_DESTROY_SEC_CPU = 5,
>  	BIT_UVC_CMD_CONV_TO_SEC_STOR = 6,
>  	BIT_UVC_CMD_CONV_FROM_SEC_STOR = 7,
>  	BIT_UVC_CMD_SET_SHARED_ACCESS = 8,
>  	BIT_UVC_CMD_REMOVE_SHARED_ACCESS = 9,
> +	BIT_UVC_CMD_SET_SEC_PARMS = 11,
> +	BIT_UVC_CMD_UNPACK_IMG = 13,
> +	BIT_UVC_CMD_VERIFY_IMG = 14,
>  	BIT_UVC_CMD_PIN_PAGE_SHARED = 21,
>  	BIT_UVC_CMD_UNPIN_PAGE_SHARED = 22,
>  };
> @@ -52,6 +67,7 @@ struct uv_cb_header {
>  	u16 rrc;	/* Return Reason Code */
>  } __packed __aligned(8);
>  
> +/* Query Ultravisor Information */
>  struct uv_cb_qui {
>  	struct uv_cb_header header;
>  	u64 reserved08;
> @@ -71,6 +87,7 @@ struct uv_cb_qui {
>  	u64 reserveda0;
>  } __packed __aligned(8);
>  
> +/* Initialize Ultravisor */
>  struct uv_cb_init {
>  	struct uv_cb_header header;
>  	u64 reserved08[2];
> @@ -79,6 +96,35 @@ struct uv_cb_init {
>  	u64 reserved28[4];
>  } __packed __aligned(8);
>  
> +/* Create Guest Configuration */
> +struct uv_cb_cgc {
> +	struct uv_cb_header header;
> +	u64 reserved08[2];
> +	u64 guest_handle;
> +	u64 conf_base_stor_origin;
> +	u64 conf_virt_stor_origin;
> +	u64 reserved30;
> +	u64 guest_stor_origin;
> +	u64 guest_stor_len;
> +	u64 guest_sca;
> +	u64 guest_asce;
> +	u64 reserved58[5];
> +} __packed __aligned(8);
> +
> +/* Create Secure CPU */
> +struct uv_cb_csc {
> +	struct uv_cb_header header;
> +	u64 reserved08[2];
> +	u64 cpu_handle;
> +	u64 guest_handle;
> +	u64 stor_origin;
> +	u8  reserved30[6];
> +	u16 num;
> +	u64 state_origin;
> +	u64 reserved40[4];
> +} __packed __aligned(8);
> +
> +/* Convert to Secure */
>  struct uv_cb_cts {
>  	struct uv_cb_header header;
>  	u64 reserved08[2];
> @@ -86,12 +132,34 @@ struct uv_cb_cts {
>  	u64 gaddr;
>  } __packed __aligned(8);
>  
> +/* Convert from Secure / Pin Page Shared */
>  struct uv_cb_cfs {
>  	struct uv_cb_header header;
>  	u64 reserved08[2];
>  	u64 paddr;
>  } __packed __aligned(8);
>  
> +/* Set Secure Config Parameter */
> +struct uv_cb_ssc {
> +	struct uv_cb_header header;
> +	u64 reserved08[2];
> +	u64 guest_handle;
> +	u64 sec_header_origin;
> +	u32 sec_header_len;
> +	u32 reserved2c;
> +	u64 reserved30[4];
> +} __packed __aligned(8);
> +
> +/* Unpack */
> +struct uv_cb_unp {
> +	struct uv_cb_header header;
> +	u64 reserved08[2];
> +	u64 guest_handle;
> +	u64 gaddr;
> +	u64 tweak[2];
> +	u64 reserved38[3];
> +} __packed __aligned(8);
> +
>  /*
>   * A common UV call struct for calls that take no payload
>   * Examples:
> @@ -105,6 +173,7 @@ struct uv_cb_nodata {
>  	u64 reserved20[4];
>  } __packed __aligned(8);
>  
> +/* Set Shared Access */
>  struct uv_cb_share {
>  	struct uv_cb_header header;
>  	u64 reserved08[3];
> diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
> index 05ee90a5ea08..12decca22e7c 100644
> --- a/arch/s390/kvm/Makefile
> +++ b/arch/s390/kvm/Makefile
> @@ -9,6 +9,6 @@ common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o  $(KVM)/async_pf.o $(KVM)/irqch
>  ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
>  
>  kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o
> -kvm-objs += diag.o gaccess.o guestdbg.o vsie.o
> +kvm-objs += diag.o gaccess.o guestdbg.o vsie.o pv.o
>  
>  obj-$(CONFIG_KVM) += kvm.o
> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
> index 1a48214ac507..e1bccbb41fdd 100644
> --- a/arch/s390/kvm/kvm-s390.c
> +++ b/arch/s390/kvm/kvm-s390.c
> @@ -44,6 +44,7 @@
>  #include <asm/cpacf.h>
>  #include <asm/timex.h>
>  #include <asm/ap.h>
> +#include <asm/uv.h>
>  #include "kvm-s390.h"
>  #include "gaccess.h"
>  
> @@ -236,6 +237,7 @@ int kvm_arch_check_processor_compat(void)
>  
>  static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
>  			      unsigned long end);
> +static int sca_switch_to_extended(struct kvm *kvm);
>  
>  static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
>  {
> @@ -568,6 +570,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>  	case KVM_CAP_S390_BPB:
>  		r = test_facility(82);
>  		break;
> +	case KVM_CAP_S390_PROTECTED:
> +		r = is_prot_virt_host();
> +		break;
>  	default:
>  		r = 0;
>  	}
> @@ -2162,6 +2167,115 @@ static int kvm_s390_set_cmma_bits(struct kvm *kvm,
>  	return r;
>  }
>  
> +static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
> +{
> +	int r = 0;
> +	void __user *argp = (void __user *)cmd->data;
> +
> +	switch (cmd->cmd) {
> +	case KVM_PV_VM_CREATE: {
> +		r = -EINVAL;
> +		if (kvm_s390_pv_is_protected(kvm))
> +			break;
> +
> +		r = kvm_s390_pv_alloc_vm(kvm);
> +		if (r)
> +			break;
> +
> +		mutex_lock(&kvm->lock);
> +		kvm_s390_vcpu_block_all(kvm);
> +		/* FMT 4 SIE needs esca */
> +		r = sca_switch_to_extended(kvm);
> +		if (r) {
> +			kvm_s390_pv_dealloc_vm(kvm);
> +			kvm_s390_vcpu_unblock_all(kvm);
> +			mutex_unlock(&kvm->lock);
> +			break;
> +		}
> +		r = kvm_s390_pv_create_vm(kvm);
> +		kvm_s390_vcpu_unblock_all(kvm);
> +		mutex_unlock(&kvm->lock);
> +		break;
> +	}
> +	case KVM_PV_VM_DESTROY: {
> +		r = -EINVAL;
> +		if (!kvm_s390_pv_is_protected(kvm))
> +			break;
> +
> +		/* All VCPUs have to be destroyed before this call. */
> +		mutex_lock(&kvm->lock);
> +		kvm_s390_vcpu_block_all(kvm);
> +		r = kvm_s390_pv_destroy_vm(kvm);
> +		if (!r)
> +			kvm_s390_pv_dealloc_vm(kvm);
> +		kvm_s390_vcpu_unblock_all(kvm);
> +		mutex_unlock(&kvm->lock);
> +		break;
> +	}
> +	case KVM_PV_VM_SET_SEC_PARMS: {
> +		struct kvm_s390_pv_sec_parm parms = {};
> +		void *hdr;
> +
> +		r = -EINVAL;
> +		if (!kvm_s390_pv_is_protected(kvm))
> +			break;
> +
> +		r = -EFAULT;
> +		if (copy_from_user(&parms, argp, sizeof(parms)))
> +			break;
> +
> +		/* Currently restricted to 8KB */
> +		r = -EINVAL;
> +		if (parms.length > PAGE_SIZE * 2)
> +			break;
> +
> +		r = -ENOMEM;
> +		hdr = vmalloc(parms.length);
> +		if (!hdr)
> +			break;
> +
> +		r = -EFAULT;
> +		if (!copy_from_user(hdr, (void __user *)parms.origin,
> +				   parms.length))
> +			r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length);
> +
> +		vfree(hdr);
> +		break;
> +	}
> +	case KVM_PV_VM_UNPACK: {
> +		struct kvm_s390_pv_unp unp = {};
> +
> +		r = -EINVAL;
> +		if (!kvm_s390_pv_is_protected(kvm))
> +			break;
> +
> +		r = -EFAULT;
> +		if (copy_from_user(&unp, argp, sizeof(unp)))
> +			break;
> +
> +		r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak);
> +		break;
> +	}
> +	case KVM_PV_VM_VERIFY: {
> +		u32 ret;
> +
> +		r = -EINVAL;
> +		if (!kvm_s390_pv_is_protected(kvm))
> +			break;
> +
> +		r = uv_cmd_nodata(kvm_s390_pv_handle(kvm),
> +				  UVC_CMD_VERIFY_IMG,
> +				  &ret);
> +		VM_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x",
> +			 ret >> 16, ret & 0x0000ffff);
> +		break;
> +	}
> +	default:
> +		return -ENOTTY;
> +	}
> +	return r;
> +}
> +
>  long kvm_arch_vm_ioctl(struct file *filp,
>  		       unsigned int ioctl, unsigned long arg)
>  {
> @@ -2259,6 +2373,20 @@ long kvm_arch_vm_ioctl(struct file *filp,
>  		mutex_unlock(&kvm->slots_lock);
>  		break;
>  	}
> +	case KVM_S390_PV_COMMAND: {
> +		struct kvm_pv_cmd args;
> +
> +		r = -EINVAL;
> +		if (!is_prot_virt_host())
> +			break;
> +
> +		r = -EFAULT;
> +		if (copy_from_user(&args, argp, sizeof(args)))
> +			break;
> +
> +		r = kvm_s390_handle_pv(kvm, &args);
> +		break;
> +	}
>  	default:
>  		r = -ENOTTY;
>  	}
> @@ -2534,6 +2662,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
>  
>  	if (vcpu->kvm->arch.use_cmma)
>  		kvm_s390_vcpu_unsetup_cmma(vcpu);
> +	if (kvm_s390_pv_handle_cpu(vcpu))
> +		kvm_s390_pv_destroy_cpu(vcpu);
>  	free_page((unsigned long)(vcpu->arch.sie_block));
>  
>  	kvm_vcpu_uninit(vcpu);
> @@ -2560,8 +2690,12 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
>  {
>  	kvm_free_vcpus(kvm);
>  	sca_dispose(kvm);
> -	debug_unregister(kvm->arch.dbf);
>  	kvm_s390_gisa_destroy(kvm);
> +	if (kvm_s390_pv_is_protected(kvm)) {
> +		kvm_s390_pv_destroy_vm(kvm);
> +		kvm_s390_pv_dealloc_vm(kvm);
> +	}
> +	debug_unregister(kvm->arch.dbf);
>  	free_page((unsigned long)kvm->arch.sie_page2);
>  	if (!kvm_is_ucontrol(kvm))
>  		gmap_remove(kvm->arch.gmap);
> @@ -2657,6 +2791,9 @@ static int sca_switch_to_extended(struct kvm *kvm)
>  	unsigned int vcpu_idx;
>  	u32 scaol, scaoh;
>  
> +	if (kvm->arch.use_esca)
> +		return 0;
> +
>  	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
>  	if (!new_sca)
>  		return -ENOMEM;
> @@ -3049,6 +3186,15 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
>  	rc = kvm_vcpu_init(vcpu, kvm, id);
>  	if (rc)
>  		goto out_free_sie_block;
> +
> +	if (kvm_s390_pv_is_protected(kvm)) {
> +		rc = kvm_s390_pv_create_cpu(vcpu);
> +		if (rc) {
> +			kvm_vcpu_uninit(vcpu);
> +			goto out_free_sie_block;
> +		}
> +	}
> +
>  	VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
>  		 vcpu->arch.sie_block);
>  	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
> @@ -4357,6 +4503,35 @@ long kvm_arch_vcpu_async_ioctl(struct file *filp,
>  	return -ENOIOCTLCMD;
>  }
>  
> +static int kvm_s390_handle_pv_vcpu(struct kvm_vcpu *vcpu,
> +				   struct kvm_pv_cmd *cmd)
> +{
> +	int r = 0;
> +
> +	if (!kvm_s390_pv_is_protected(vcpu->kvm))
> +		return -EINVAL;
> +
> +	switch (cmd->cmd) {
> +	case KVM_PV_VCPU_CREATE: {
> +		if (kvm_s390_pv_handle_cpu(vcpu))
> +			return -EINVAL;
> +
> +		r = kvm_s390_pv_create_cpu(vcpu);
> +		break;
> +	}
> +	case KVM_PV_VCPU_DESTROY: {
> +		if (!kvm_s390_pv_handle_cpu(vcpu))
> +			return -EINVAL;
> +
> +		r = kvm_s390_pv_destroy_cpu(vcpu);
> +		break;
> +	}

I feel like my review comments for this patch were lost, so not
repeating them.


-- 
Thanks,

David / dhildenb

^ permalink raw reply

* [RFC v2 5/6] tpm: Add the SysBus TPM TIS device
From: Eric Auger @ 2020-02-14 18:37 UTC (permalink / raw)
  To: eric.auger.pro, eric.auger, stefanb, qemu-devel, qemu-arm,
	peter.maydell
  Cc: marcandre.lureau, lersek, ardb, philmd
In-Reply-To: <20200214183704.14389-1-eric.auger@redhat.com>

Introduce the tpm-tis-device which is a sysbus device
and is bound to be used on ARM.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
---
 hw/tpm/Kconfig          |   5 ++
 hw/tpm/Makefile.objs    |   1 +
 hw/tpm/tpm_tis_sysbus.c | 159 ++++++++++++++++++++++++++++++++++++++++
 include/sysemu/tpm.h    |   1 +
 4 files changed, 166 insertions(+)
 create mode 100644 hw/tpm/tpm_tis_sysbus.c

diff --git a/hw/tpm/Kconfig b/hw/tpm/Kconfig
index 686f8206bb..4794e7fe28 100644
--- a/hw/tpm/Kconfig
+++ b/hw/tpm/Kconfig
@@ -7,6 +7,11 @@ config TPM_TIS_ISA
     depends on TPM && ISA_BUS
     select TPM_TIS
 
+config TPM_TIS_SYSBUS
+    bool
+    depends on TPM
+    select TPM_TIS
+
 config TPM_TIS
     bool
     depends on TPM
diff --git a/hw/tpm/Makefile.objs b/hw/tpm/Makefile.objs
index 3ef2036cca..f1ec4beb95 100644
--- a/hw/tpm/Makefile.objs
+++ b/hw/tpm/Makefile.objs
@@ -1,6 +1,7 @@
 common-obj-$(CONFIG_TPM) += tpm_util.o
 obj-$(call lor,$(CONFIG_TPM_TIS),$(CONFIG_TPM_CRB)) += tpm_ppi.o
 common-obj-$(CONFIG_TPM_TIS_ISA) += tpm_tis_isa.o
+common-obj-$(CONFIG_TPM_TIS_SYSBUS) += tpm_tis_sysbus.o
 common-obj-$(CONFIG_TPM_TIS) += tpm_tis_common.o
 common-obj-$(CONFIG_TPM_CRB) += tpm_crb.o
 common-obj-$(CONFIG_TPM_PASSTHROUGH) += tpm_passthrough.o
diff --git a/hw/tpm/tpm_tis_sysbus.c b/hw/tpm/tpm_tis_sysbus.c
new file mode 100644
index 0000000000..18c02aed67
--- /dev/null
+++ b/hw/tpm/tpm_tis_sysbus.c
@@ -0,0 +1,159 @@
+/*
+ * tpm_tis_sysbus.c - QEMU's TPM TIS SYSBUS Device
+ *
+ * Copyright (C) 2006,2010-2013 IBM Corporation
+ *
+ * Authors:
+ *  Stefan Berger <stefanb@us.ibm.com>
+ *  David Safford <safford@us.ibm.com>
+ *
+ * Xen 4 support: Andrease Niederl <andreas.niederl@iaik.tugraz.at>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ * Implementation of the TIS interface according to specs found at
+ * http://www.trustedcomputinggroup.org. This implementation currently
+ * supports version 1.3, 21 March 2013
+ * In the developers menu choose the PC Client section then find the TIS
+ * specification.
+ *
+ * TPM TIS for TPM 2 implementation following TCG PC Client Platform
+ * TPM Profile (PTP) Specification, Familiy 2.0, Revision 00.43
+ */
+
+#include "qemu/osdep.h"
+#include "hw/qdev-properties.h"
+#include "migration/vmstate.h"
+#include "tpm_util.h"
+#include "hw/sysbus.h"
+#include "tpm_tis.h"
+
+typedef struct TPMStateSysBus {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    TPMState state; /* not a QOM object */
+} TPMStateSysBus;
+
+#define TPM_TIS_SYSBUS(obj) OBJECT_CHECK(TPMStateSysBus, (obj), TYPE_TPM_TIS_SYSBUS)
+
+static int tpm_tis_pre_save_sysbus(void *opaque)
+{
+    TPMStateSysBus *sbdev = opaque;
+
+    return tpm_tis_pre_save(&sbdev->state);
+}
+
+static const VMStateDescription vmstate_tpm_tis_sysbus = {
+    .name = "tpm-tis",
+    .version_id = 0,
+    .pre_save  = tpm_tis_pre_save_sysbus,
+    .fields = (VMStateField[]) {
+        VMSTATE_BUFFER(state.buffer, TPMStateSysBus),
+        VMSTATE_UINT16(state.rw_offset, TPMStateSysBus),
+        VMSTATE_UINT8(state.active_locty, TPMStateSysBus),
+        VMSTATE_UINT8(state.aborting_locty, TPMStateSysBus),
+        VMSTATE_UINT8(state.next_locty, TPMStateSysBus),
+
+        VMSTATE_STRUCT_ARRAY(state.loc, TPMStateSysBus, TPM_TIS_NUM_LOCALITIES,
+                             0, vmstate_locty, TPMLocality),
+
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void tpm_tis_sysbus_request_completed(TPMIf *ti, int ret)
+{
+    TPMStateSysBus *sbdev = TPM_TIS_SYSBUS(ti);
+    TPMState *s = &sbdev->state;
+
+    tpm_tis_request_completed(s, ret);
+}
+
+static enum TPMVersion tpm_tis_sysbus_get_tpm_version(TPMIf *ti)
+{
+    TPMStateSysBus *sbdev = TPM_TIS_SYSBUS(ti);
+    TPMState *s = &sbdev->state;
+
+    return tpm_tis_get_tpm_version(s);
+}
+
+static void tpm_tis_sysbus_reset(DeviceState *dev)
+{
+    TPMStateSysBus *sbdev = TPM_TIS_SYSBUS(dev);
+    TPMState *s = &sbdev->state;
+
+    return tpm_tis_reset(s);
+}
+
+static Property tpm_tis_sysbus_properties[] = {
+    DEFINE_PROP_UINT32("irq", TPMStateSysBus, state.irq_num, TPM_TIS_IRQ),
+    DEFINE_PROP_TPMBE("tpmdev", TPMStateSysBus, state.be_driver),
+    DEFINE_PROP_BOOL("ppi", TPMStateSysBus, state.ppi_enabled, true),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void tpm_tis_sysbus_initfn(Object *obj)
+{
+    TPMStateSysBus *sbdev = TPM_TIS_SYSBUS(obj);
+    TPMState *s = &sbdev->state;
+
+    memory_region_init_io(&s->mmio, obj, &tpm_tis_memory_ops,
+                          s, "tpm-tis-mmio",
+                          TPM_TIS_NUM_LOCALITIES << TPM_TIS_LOCALITY_SHIFT);
+
+    sysbus_init_mmio(SYS_BUS_DEVICE(obj), &s->mmio);
+    sysbus_init_irq(SYS_BUS_DEVICE(obj), &s->irq);
+}
+
+static void tpm_tis_sysbus_realizefn(DeviceState *dev, Error **errp)
+{
+    TPMStateSysBus *sbdev = TPM_TIS_SYSBUS(dev);
+    TPMState *s = &sbdev->state;
+
+    if (!tpm_find()) {
+        error_setg(errp, "at most one TPM device is permitted");
+        return;
+    }
+
+    if (!s->be_driver) {
+        error_setg(errp, "'tpmdev' property is required");
+        return;
+    }
+}
+
+static void tpm_tis_sysbus_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    TPMIfClass *tc = TPM_IF_CLASS(klass);
+
+    device_class_set_props(dc, tpm_tis_sysbus_properties);
+    dc->vmsd  = &vmstate_tpm_tis_sysbus;
+    tc->model = TPM_MODEL_TPM_TIS;
+    dc->realize = tpm_tis_sysbus_realizefn;
+    dc->user_creatable = true;
+    dc->reset = tpm_tis_sysbus_reset;
+    tc->request_completed = tpm_tis_sysbus_request_completed;
+    tc->get_version = tpm_tis_sysbus_get_tpm_version;
+}
+
+static const TypeInfo tpm_tis_sysbus_info = {
+    .name = TYPE_TPM_TIS_SYSBUS,
+    .parent = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(TPMStateSysBus),
+    .instance_init = tpm_tis_sysbus_initfn,
+    .class_init  = tpm_tis_sysbus_class_init,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_TPM_IF },
+        { }
+    }
+};
+
+static void tpm_tis_sysbus_register(void)
+{
+    type_register_static(&tpm_tis_sysbus_info);
+}
+
+type_init(tpm_tis_sysbus_register)
diff --git a/include/sysemu/tpm.h b/include/sysemu/tpm.h
index 1691b92c28..f37851b1aa 100644
--- a/include/sysemu/tpm.h
+++ b/include/sysemu/tpm.h
@@ -44,6 +44,7 @@ typedef struct TPMIfClass {
 } TPMIfClass;
 
 #define TYPE_TPM_TIS_ISA            "tpm-tis"
+#define TYPE_TPM_TIS_SYSBUS         "tpm-tis-device"
 #define TYPE_TPM_CRB                "tpm-crb"
 #define TYPE_TPM_SPAPR              "tpm-spapr"
 
-- 
2.20.1


^ permalink raw reply related

* [PATCH v2 00/28] user_namespace: introduce fsid mappings
From: Christian Brauner @ 2020-02-14 18:35 UTC (permalink / raw)
  To: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn
  Cc: smbarber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, linux-kernel, linux-fsdevel, containers,
	linux-security-module, linux-api, Christian Brauner

Hey everyone,

This is v2 with various fixes after discussions with Jann.

From pings and off-list questions and discussions at Google Container
Security Summit there seems to be quite a lot of interest in this
patchset with use-cases ranging from layer sharing for app containers
and k8s, as well as data sharing between containers with different id
mappings. I haven't Cced all people because I don't have all the email
adresses at hand but I've at least added Phil now. :)

This is the implementation of shiftfs which was cooked up during lunch at
Linux Plumbers 2019 the day after the container's microconference. The
idea is a design-stew from Stéphane, Aleksa, Eric, and myself. Back then
we all were quite busy with other work and couldn't really sit down and
implement it. But I took a few days last week to do this work, including
demos and performance testing.
This implementation does not require us to touch the vfs substantially
at all. Instead, we implement shiftfs via fsid mappings.
With this patch, it took me 20 mins to port both LXD and LXC to support
shiftfs via fsid mappings.

For anyone wanting to play with this the branch can be pulled from:
https://github.com/brauner/linux/tree/fsid_mappings
https://gitlab.com/brauner/linux/-/tree/fsid_mappings
https://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux.git/log/?h=fsid_mappings

The main use case for shiftfs for us is in allowing shared writable
storage to multiple containers using non-overlapping id mappings.
In such a scenario you want the fsids to be valid and identical in both
containers for the shared mount. A demo for this exists in [3].
If you don't want to read on, go straight to the other demos below in
[1] and [2].

People not as familiar with user namespaces might not be aware that fsid
mappings already exist. Right now, fsid mappings are always identical to
id mappings. Specifically, the kernel will lookup fsuids in the uid
mappings and fsgids in the gid mappings of the relevant user namespace.

With this patch series we simply introduce the ability to create fsid
mappings that are different from the id mappings of a user namespace.
The whole feature set is placed under a config option that defaults to
false.

In the usual case of running an unprivileged container we will have
setup an id mapping, e.g. 0 100000 100000. The on-disk mapping will
correspond to this id mapping, i.e. all files which we want to appear as
0:0 inside the user namespace will be chowned to 100000:100000 on the
host. This works, because whenever the kernel needs to do a filesystem
access it will lookup the corresponding uid and gid in the idmapping
tables of the container.
Now think about the case where we want to have an id mapping of 0 100000
100000 but an on-disk mapping of 0 300000 100000 which is needed to e.g.
share a single on-disk mapping with multiple containers that all have
different id mappings.
This will be problematic. Whenever a filesystem access is requested, the
kernel will now try to lookup a mapping for 300000 in the id mapping
tables of the user namespace but since there is none the files will
appear to be owned by the overflow id, i.e. usually 65534:65534 or
nobody:nogroup.

With fsid mappings we can solve this by writing an id mapping of 0
100000 100000 and an fsid mapping of 0 300000 100000. On filesystem
access the kernel will now lookup the mapping for 300000 in the fsid
mapping tables of the user namespace. And since such a mapping exists,
the corresponding files will have correct ownership.

A note on proc (and sys), the proc filesystem is special in sofar as it
only has a single superblock that is (currently but might be about to
change) visible in all user namespaces (same goes for sys). This means
it has special semantics in many ways, including how file ownership and
access works. The fsid mapping implementation does not alter how proc
(and sys) ownership works. proc and sys will both continue to lookup
filesystem access in id mapping tables.

When Writing fsid mappings the same rules apply as when writing id
mappings so I won't reiterate them here. The limit of fs id mappings is
the same as for id mappings, i.e. 340 lines.

# Performance
Back when I extended the range of possible id mappings to 340 I did
performance testing by booting into single user mode, creating 1,000,000
files to fstat()ing them and calculated the mean fstat() time per file.
(Back when Linux was still fast. I won't mention that the stat
 numbers have (thanks microcode!) doubled since then...)
I did the same test for this patchset: one vanilla kernel, one kernel
with my fsid mapping patches but CONFIG_USER_NS_FSID set to n and one
with fsid mappings patches enabled. I then ran the same test on all
three kernels and compared the numbers. The implementation does not
introduce overhead. That's all I can say. Here are the numbers:

             | vanilla v5.5 | fsid mappings       | fsid mappings      | fsid mappings      |
	     |              | disabled in Kconfig | enabled in Kconfig | enabled in Kconfig |
	     |   	    |                     | and unset for all  | and set for all    |
	     |   	    |    		  | test cases         | test cases         |
-------------|--------------|---------------------|--------------------|--------------------|
 0  mappings |       367 ns |              365 ns |             365 ns |             N/A    |
 1  mappings |       362 ns |              367 ns |             363 ns |             363 ns |
 2  mappings |       361 ns |              369 ns |             363 ns |             364 ns |
 3  mappings |       361 ns |              368 ns |             366 ns |             365 ns |
 5  mappings |       365 ns |              368 ns |             363 ns |             365 ns |
 10 mappings |       391 ns |              388 ns |             387 ns |             389 ns |
 50 mappings |       395 ns |              398 ns |             401 ns |             397 ns |
100 mappings |       400 ns |              405 ns |             399 ns |             399 ns |
200 mappings |       404 ns |              407 ns |             430 ns |             404 ns |
300 mappings |       492 ns |              494 ns |             432 ns |             413 ns |
340 mappings |       495 ns |              497 ns |             500 ns |             484 ns |

# Demos
[1]: Create a container with different id and fsid mappings.
     https://asciinema.org/a/300233 
[2]: Create a container with id mappings but without fsid mappings.
     https://asciinema.org/a/300234
[3]: Share storage between multiple containers with non-overlapping id
     mappings.
     https://asciinema.org/a/300235

Thanks!
Christian

Christian Brauner (28):
  user_namespace: introduce fsid mappings infrastructure
  proc: add /proc/<pid>/fsuid_map
  proc: add /proc/<pid>/fsgid_map
  fsuidgid: add fsid mapping helpers
  proc: task_state(): use from_kfs{g,u}id_munged
  cred: add kfs{g,u}id
  sys: __sys_setfsuid(): handle fsid mappings
  sys: __sys_setfsgid(): handle fsid mappings
  sys:__sys_setuid(): handle fsid mappings
  sys:__sys_setgid(): handle fsid mappings
  sys:__sys_setreuid(): handle fsid mappings
  sys:__sys_setregid(): handle fsid mappings
  sys:__sys_setresuid(): handle fsid mappings
  sys:__sys_setresgid(): handle fsid mappings
  fs: add is_userns_visible() helper
  namei: may_{o_}create(): handle fsid mappings
  inode: inode_owner_or_capable(): handle fsid mappings
  capability: privileged_wrt_inode_uidgid(): handle fsid mappings
  stat: handle fsid mappings
  open: handle fsid mappings
  posix_acl: handle fsid mappings
  attr: notify_change(): handle fsid mappings
  commoncap: cap_bprm_set_creds(): handle fsid mappings
  commoncap: cap_task_fix_setuid(): handle fsid mappings
  commoncap: handle fsid mappings with vfs caps
  exec: bprm_fill_uid(): handle fsid mappings
  ptrace: adapt ptrace_may_access() to always uses unmapped fsids
  devpts: handle fsid mappings

 fs/attr.c                      |  23 ++-
 fs/devpts/inode.c              |   7 +-
 fs/exec.c                      |  25 ++-
 fs/inode.c                     |   7 +-
 fs/namei.c                     |  36 +++-
 fs/open.c                      |  16 +-
 fs/posix_acl.c                 |  21 +--
 fs/proc/array.c                |   5 +-
 fs/proc/base.c                 |  34 ++++
 fs/stat.c                      |  48 ++++--
 include/linux/cred.h           |   4 +
 include/linux/fs.h             |   5 +
 include/linux/fsuidgid.h       | 122 +++++++++++++
 include/linux/stat.h           |   1 +
 include/linux/user_namespace.h |  10 ++
 init/Kconfig                   |  11 ++
 kernel/capability.c            |  10 +-
 kernel/ptrace.c                |   4 +-
 kernel/sys.c                   | 106 +++++++++---
 kernel/user.c                  |  22 +++
 kernel/user_namespace.c        | 303 ++++++++++++++++++++++++++++++++-
 security/commoncap.c           |  35 ++--
 22 files changed, 757 insertions(+), 98 deletions(-)
 create mode 100644 include/linux/fsuidgid.h


base-commit: bb6d3fb354c5ee8d6bde2d576eb7220ea09862b9
-- 
2.25.0


^ permalink raw reply

* [PATCH v2 01/28] user_namespace: introduce fsid mappings infrastructure
From: Christian Brauner @ 2020-02-14 18:35 UTC (permalink / raw)
  To: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn
  Cc: smbarber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, linux-kernel, linux-fsdevel, containers,
	linux-security-module, linux-api, Christian Brauner
In-Reply-To: <20200214183554.1133805-1-christian.brauner@ubuntu.com>

This introduces the infrastructure to setup fsid mappings which will be used in
later patches.
All new code depends on CONFIG_USER_NS_FSID=y. It currently defaults to "N".
If CONFIG_USER_NS_FSID is not set, no new code is added.

In this patch fsuid_m_show() and fsgid_m_show() are introduced. They are
identical to uid_m_show() and gid_m_show() until we introduce from_kfsuid() and
from_kfsgid() in a follow-up patch.

Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
/* v2 */
- Randy Dunlap <rdunlap@infradead.org>:
  - Fix typo in USER_NS_FSID kconfig documentation.
---
 include/linux/user_namespace.h |  10 +++
 init/Kconfig                   |  11 +++
 kernel/user.c                  |  22 ++++++
 kernel/user_namespace.c        | 122 +++++++++++++++++++++++++++++++++
 4 files changed, 165 insertions(+)

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 6ef1c7109fc4..e44742b0cf8a 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -56,6 +56,10 @@ enum ucount_type {
 struct user_namespace {
 	struct uid_gid_map	uid_map;
 	struct uid_gid_map	gid_map;
+#ifdef CONFIG_USER_NS_FSID
+	struct uid_gid_map	fsuid_map;
+	struct uid_gid_map	fsgid_map;
+#endif
 	struct uid_gid_map	projid_map;
 	atomic_t		count;
 	struct user_namespace	*parent;
@@ -127,6 +131,12 @@ struct seq_operations;
 extern const struct seq_operations proc_uid_seq_operations;
 extern const struct seq_operations proc_gid_seq_operations;
 extern const struct seq_operations proc_projid_seq_operations;
+#ifdef CONFIG_USER_NS_FSID
+extern const struct seq_operations proc_fsuid_seq_operations;
+extern const struct seq_operations proc_fsgid_seq_operations;
+extern ssize_t proc_fsuid_map_write(struct file *, const char __user *, size_t, loff_t *);
+extern ssize_t proc_fsgid_map_write(struct file *, const char __user *, size_t, loff_t *);
+#endif
 extern ssize_t proc_uid_map_write(struct file *, const char __user *, size_t, loff_t *);
 extern ssize_t proc_gid_map_write(struct file *, const char __user *, size_t, loff_t *);
 extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t, loff_t *);
diff --git a/init/Kconfig b/init/Kconfig
index cfee56c151f1..d4d0beeba48f 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1111,6 +1111,17 @@ config USER_NS
 
 	  If unsure, say N.
 
+config USER_NS_FSID
+	bool "User namespace fsid mappings"
+	depends on USER_NS
+	default n
+	help
+	  This allows containers to alter their filesystem id mappings.
+	  With this containers with different id mappings can still share
+	  the same filesystem.
+
+	  If unsure, say N.
+
 config PID_NS
 	bool "PID Namespaces"
 	default y
diff --git a/kernel/user.c b/kernel/user.c
index 5235d7f49982..2ccaea9b810b 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -55,6 +55,28 @@ struct user_namespace init_user_ns = {
 			},
 		},
 	},
+#ifdef CONFIG_USER_NS_FSID
+	.fsuid_map = {
+		.nr_extents = 1,
+		{
+			.extent[0] = {
+				.first = 0,
+				.lower_first = 0,
+				.count = 4294967295U,
+			},
+		},
+	},
+	.fsgid_map = {
+		.nr_extents = 1,
+		{
+			.extent[0] = {
+				.first = 0,
+				.lower_first = 0,
+				.count = 4294967295U,
+			},
+		},
+	},
+#endif
 	.count = ATOMIC_INIT(3),
 	.owner = GLOBAL_ROOT_UID,
 	.group = GLOBAL_ROOT_GID,
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 8eadadc478f9..cbdf456f95f0 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -191,6 +191,16 @@ static void free_user_ns(struct work_struct *work)
 			kfree(ns->projid_map.forward);
 			kfree(ns->projid_map.reverse);
 		}
+#ifdef CONFIG_USER_NS_FSID
+		if (ns->fsgid_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
+			kfree(ns->fsgid_map.forward);
+			kfree(ns->fsgid_map.reverse);
+		}
+		if (ns->fsuid_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
+			kfree(ns->fsuid_map.forward);
+			kfree(ns->fsuid_map.reverse);
+		}
+#endif
 		retire_userns_sysctls(ns);
 		key_free_user_ns(ns);
 		ns_free_inum(&ns->ns);
@@ -637,6 +647,50 @@ static int projid_m_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
+#ifdef CONFIG_USER_NS_FSID
+static int fsuid_m_show(struct seq_file *seq, void *v)
+{
+	struct user_namespace *ns = seq->private;
+	struct uid_gid_extent *extent = v;
+	struct user_namespace *lower_ns;
+	uid_t lower;
+
+	lower_ns = seq_user_ns(seq);
+	if ((lower_ns == ns) && lower_ns->parent)
+		lower_ns = lower_ns->parent;
+
+	lower = from_kuid(lower_ns, KUIDT_INIT(extent->lower_first));
+
+	seq_printf(seq, "%10u %10u %10u\n",
+		extent->first,
+		lower,
+		extent->count);
+
+	return 0;
+}
+
+static int fsgid_m_show(struct seq_file *seq, void *v)
+{
+	struct user_namespace *ns = seq->private;
+	struct uid_gid_extent *extent = v;
+	struct user_namespace *lower_ns;
+	gid_t lower;
+
+	lower_ns = seq_user_ns(seq);
+	if ((lower_ns == ns) && lower_ns->parent)
+		lower_ns = lower_ns->parent;
+
+	lower = from_kgid(lower_ns, KGIDT_INIT(extent->lower_first));
+
+	seq_printf(seq, "%10u %10u %10u\n",
+		extent->first,
+		lower,
+		extent->count);
+
+	return 0;
+}
+#endif
+
 static void *m_start(struct seq_file *seq, loff_t *ppos,
 		     struct uid_gid_map *map)
 {
@@ -674,6 +728,22 @@ static void *projid_m_start(struct seq_file *seq, loff_t *ppos)
 	return m_start(seq, ppos, &ns->projid_map);
 }
 
+#ifdef CONFIG_USER_NS_FSID
+static void *fsuid_m_start(struct seq_file *seq, loff_t *ppos)
+{
+	struct user_namespace *ns = seq->private;
+
+	return m_start(seq, ppos, &ns->fsuid_map);
+}
+
+static void *fsgid_m_start(struct seq_file *seq, loff_t *ppos)
+{
+	struct user_namespace *ns = seq->private;
+
+	return m_start(seq, ppos, &ns->fsgid_map);
+}
+#endif
+
 static void *m_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	(*pos)++;
@@ -706,6 +776,22 @@ const struct seq_operations proc_projid_seq_operations = {
 	.show = projid_m_show,
 };
 
+#ifdef CONFIG_USER_NS_FSID
+const struct seq_operations proc_fsuid_seq_operations = {
+	.start = fsuid_m_start,
+	.stop = m_stop,
+	.next = m_next,
+	.show = fsuid_m_show,
+};
+
+const struct seq_operations proc_fsgid_seq_operations = {
+	.start = fsgid_m_start,
+	.stop = m_stop,
+	.next = m_next,
+	.show = fsgid_m_show,
+};
+#endif
+
 static bool mappings_overlap(struct uid_gid_map *new_map,
 			     struct uid_gid_extent *extent)
 {
@@ -1081,6 +1167,42 @@ ssize_t proc_projid_map_write(struct file *file, const char __user *buf,
 			 &ns->projid_map, &ns->parent->projid_map);
 }
 
+#ifdef CONFIG_USER_NS_FSID
+ssize_t proc_fsuid_map_write(struct file *file, const char __user *buf,
+			     size_t size, loff_t *ppos)
+{
+	struct seq_file *seq = file->private_data;
+	struct user_namespace *ns = seq->private;
+	struct user_namespace *seq_ns = seq_user_ns(seq);
+
+	if (!ns->parent)
+		return -EPERM;
+
+	if ((seq_ns != ns) && (seq_ns != ns->parent))
+		return -EPERM;
+
+	return map_write(file, buf, size, ppos, CAP_SETUID, &ns->fsuid_map,
+			 &ns->parent->fsuid_map);
+}
+
+ssize_t proc_fsgid_map_write(struct file *file, const char __user *buf,
+			     size_t size, loff_t *ppos)
+{
+	struct seq_file *seq = file->private_data;
+	struct user_namespace *ns = seq->private;
+	struct user_namespace *seq_ns = seq_user_ns(seq);
+
+	if (!ns->parent)
+		return -EPERM;
+
+	if ((seq_ns != ns) && (seq_ns != ns->parent))
+		return -EPERM;
+
+	return map_write(file, buf, size, ppos, CAP_SETGID, &ns->fsgid_map,
+			 &ns->parent->fsgid_map);
+}
+#endif
+
 static bool new_idmap_permitted(const struct file *file,
 				struct user_namespace *ns, int cap_setid,
 				struct uid_gid_map *new_map)
-- 
2.25.0


^ permalink raw reply related

* [PATCH v2 07/28] sys: __sys_setfsuid(): handle fsid mappings
From: Christian Brauner @ 2020-02-14 18:35 UTC (permalink / raw)
  To: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn
  Cc: smbarber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, linux-kernel, linux-fsdevel, containers,
	linux-security-module, linux-api, Christian Brauner
In-Reply-To: <20200214183554.1133805-1-christian.brauner@ubuntu.com>

Switch setfsuid() to lookup fsids in the fsid mappings. If no fsid mappings are
setup the behavior is unchanged, i.e. fsids are looked up in the id mappings.

A caller can only setfs{g,u}id() to a given id if the id maps to a valid kid in
both the id and fsid maps of the caller's user namespace. This is always the
case when no id mappings and fsid mappings have been written. It is also always
the case when an id mapping has been written which includes the target id and
but no fsid mappings have been written. All non-fsid mapping aware workloads
will thus work just as before.
Requiring a valid mapping for the target id in both the id and fsid mappings of
the container simplifies permission checking for userns visible filesystems
such as proc.

Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
/* v2 */
- Christian Brauner <christian.brauner@ubuntu.com>:
  - Set unmapped fsid as well.
---
 kernel/sys.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/kernel/sys.c b/kernel/sys.c
index f9bc5c303e3f..13f790dbda71 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -59,6 +59,7 @@
 #include <linux/sched/cputime.h>
 #include <linux/rcupdate.h>
 #include <linux/uidgid.h>
+#include <linux/fsuidgid.h>
 #include <linux/cred.h>
 
 #include <linux/nospec.h>
@@ -799,15 +800,19 @@ long __sys_setfsuid(uid_t uid)
 	const struct cred *old;
 	struct cred *new;
 	uid_t old_fsuid;
-	kuid_t kuid;
+	kuid_t kuid, kfsuid;
 
 	old = current_cred();
-	old_fsuid = from_kuid_munged(old->user_ns, old->fsuid);
+	old_fsuid = from_kfsuid_munged(old->user_ns, old->fsuid);
 
-	kuid = make_kuid(old->user_ns, uid);
+	kuid = make_kfsuid(old->user_ns, uid);
 	if (!uid_valid(kuid))
 		return old_fsuid;
 
+	kfsuid = make_kuid(old->user_ns, uid);
+	if (!uid_valid(kfsuid))
+		return old_fsuid;
+
 	new = prepare_creds();
 	if (!new)
 		return old_fsuid;
@@ -817,6 +822,7 @@ long __sys_setfsuid(uid_t uid)
 	    ns_capable_setid(old->user_ns, CAP_SETUID)) {
 		if (!uid_eq(kuid, old->fsuid)) {
 			new->fsuid = kuid;
+			new->kfsuid = kfsuid;
 			if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0)
 				goto change_okay;
 		}
-- 
2.25.0


^ permalink raw reply related

* [PATCH v2 06/28] cred: add kfs{g,u}id
From: Christian Brauner @ 2020-02-14 18:35 UTC (permalink / raw)
  To: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn
  Cc: smbarber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, linux-kernel, linux-fsdevel, containers,
	linux-security-module, linux-api, Christian Brauner
In-Reply-To: <20200214183554.1133805-1-christian.brauner@ubuntu.com>

After the introduction of fsid mappings we need to carefully handle
single-superblock filesystems that are visible in user namespaces. This
specifically concerns proc and sysfs. For those filesystems we want to continue
looking up fsid in the id mappings of the relevant user namespace. We can
either do this by dynamically translating between these fsids or we simply keep
them around with the other creds. The latter option is not just simpler but
also more performant since we don't need to do the translation from fsid
mappings into id mappings on the fly.

Link: https://lore.kernel.org/r/20200212145149.zohmc6d3x52bw6j6@wittgenstein
Cc: Jann Horn <jannh@google.com>
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
/* v2 */
patch added
---
 include/linux/cred.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/linux/cred.h b/include/linux/cred.h
index 18639c069263..604914d3fd51 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -125,6 +125,8 @@ struct cred {
 	kgid_t		egid;		/* effective GID of the task */
 	kuid_t		fsuid;		/* UID for VFS ops */
 	kgid_t		fsgid;		/* GID for VFS ops */
+	kuid_t		kfsuid;		/* UID for VFS ops for userns visible filesystems */
+	kgid_t		kfsgid;		/* GID for VFS ops for userns visible filesystems */
 	unsigned	securebits;	/* SUID-less security management */
 	kernel_cap_t	cap_inheritable; /* caps our children can inherit */
 	kernel_cap_t	cap_permitted;	/* caps we're permitted */
@@ -384,6 +386,8 @@ static inline void put_cred(const struct cred *_cred)
 #define current_sgid()		(current_cred_xxx(sgid))
 #define current_fsuid() 	(current_cred_xxx(fsuid))
 #define current_fsgid() 	(current_cred_xxx(fsgid))
+#define current_kfsuid() 	(current_cred_xxx(kfsuid))
+#define current_kfsgid() 	(current_cred_xxx(kfsgid))
 #define current_cap()		(current_cred_xxx(cap_effective))
 #define current_user()		(current_cred_xxx(user))
 
-- 
2.25.0


^ permalink raw reply related

* [PATCH v2 12/28] sys:__sys_setregid(): handle fsid mappings
From: Christian Brauner @ 2020-02-14 18:35 UTC (permalink / raw)
  To: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn
  Cc: smbarber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, linux-kernel, linux-fsdevel, containers,
	linux-security-module, linux-api, Christian Brauner
In-Reply-To: <20200214183554.1133805-1-christian.brauner@ubuntu.com>

Switch setregid() to lookup fsids in the fsid mappings. If no fsid mappings are
setup the behavior is unchanged, i.e. fsids are looked up in the id mappings.

During setregid() the kfsgid is set to the kegid corresponding the egid that is
requested by userspace. If the requested egid is -1 the kfsgid is reset to the
current kegid. For the latter case this means we need to lookup the
corresponding userspace egid corresponding to the current kegid in the id
mappings and translate this egid into the corresponding kfsgid in the fsid
mappings.

The kfsid to cleanly handle userns visible filesystem is set as before.

We require that a user must have a valid fsid mapping for the target id. This
is consistent with how the setid calls work today without fsid mappings.

Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
/* v2 */
- Christian Brauner <christian.brauner@ubuntu.com>:
  - set kfsid which is used when dealing with proc permission checking
---
 kernel/sys.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/kernel/sys.c b/kernel/sys.c
index 4697e010bbd7..22eea030d9e7 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -354,7 +354,7 @@ long __sys_setregid(gid_t rgid, gid_t egid)
 	const struct cred *old;
 	struct cred *new;
 	int retval;
-	kgid_t krgid, kegid;
+	kgid_t krgid, kegid, kfsgid;
 
 	krgid = make_kgid(ns, rgid);
 	kegid = make_kgid(ns, egid);
@@ -386,12 +386,20 @@ long __sys_setregid(gid_t rgid, gid_t egid)
 			new->egid = kegid;
 		else
 			goto error;
+		kfsgid = make_kfsgid(ns, egid);
+	} else {
+		kfsgid = kgid_to_kfsgid(new->user_ns, new->egid);
+	}
+	if (!gid_valid(kfsgid)) {
+		retval = -EINVAL;
+		goto error;
 	}
 
 	if (rgid != (gid_t) -1 ||
 	    (egid != (gid_t) -1 && !gid_eq(kegid, old->gid)))
 		new->sgid = new->egid;
-	new->fsgid = new->egid;
+	new->kfsgid = new->egid;
+	new->fsgid = kfsgid;
 
 	return commit_creds(new);
 
-- 
2.25.0


^ permalink raw reply related

* [PATCH v2 08/28] sys: __sys_setfsgid(): handle fsid mappings
From: Christian Brauner @ 2020-02-14 18:35 UTC (permalink / raw)
  To: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn
  Cc: smbarber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, linux-kernel, linux-fsdevel, containers,
	linux-security-module, linux-api, Christian Brauner
In-Reply-To: <20200214183554.1133805-1-christian.brauner@ubuntu.com>

Switch setfsgid() to lookup fsids in the fsid mappings. If no fsid mappings are
setup the behavior is unchanged, i.e. fsids are looked up in the id mappings.

A caller can only setfs{g,u}id() to a given id if the id maps to a valid kid in
both the id and fsid maps of the caller's user namespace. This is always the
case when no id mappings and fsid mappings have been written. It is also always
the case when an id mapping has been written which includes the target id and
but no fsid mappings have been written. All non-fsid mapping aware workloads
will thus work just as before.
Requiring a valid mapping for the target id in both the id and fsid mappings of
the container simplifies permission checking for userns visible filesystems
such as proc.

Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
/* v2 */
- Christian Brauner <christian.brauner@ubuntu.com>:
  - Set unmapped fsid as well.
---
 kernel/sys.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/kernel/sys.c b/kernel/sys.c
index 13f790dbda71..864fa78f25a7 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -849,15 +849,19 @@ long __sys_setfsgid(gid_t gid)
 	const struct cred *old;
 	struct cred *new;
 	gid_t old_fsgid;
-	kgid_t kgid;
+	kgid_t kgid, kfsgid;
 
 	old = current_cred();
-	old_fsgid = from_kgid_munged(old->user_ns, old->fsgid);
+	old_fsgid = from_kfsgid_munged(old->user_ns, old->fsgid);
 
-	kgid = make_kgid(old->user_ns, gid);
+	kgid = make_kfsgid(old->user_ns, gid);
 	if (!gid_valid(kgid))
 		return old_fsgid;
 
+	kfsgid = make_kgid(old->user_ns, gid);
+	if (!gid_valid(kfsgid))
+		return old_fsgid;
+
 	new = prepare_creds();
 	if (!new)
 		return old_fsgid;
@@ -867,6 +871,7 @@ long __sys_setfsgid(gid_t gid)
 	    ns_capable(old->user_ns, CAP_SETGID)) {
 		if (!gid_eq(kgid, old->fsgid)) {
 			new->fsgid = kgid;
+			new->kfsgid = kfsgid;
 			goto change_okay;
 		}
 	}
-- 
2.25.0


^ permalink raw reply related

* [PATCH v2 09/28] sys:__sys_setuid(): handle fsid mappings
From: Christian Brauner @ 2020-02-14 18:35 UTC (permalink / raw)
  To: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn
  Cc: smbarber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, linux-kernel, linux-fsdevel, containers,
	linux-security-module, linux-api, Christian Brauner
In-Reply-To: <20200214183554.1133805-1-christian.brauner@ubuntu.com>

Switch setuid() to lookup fsids in the fsid mappings. If no fsid mappings are
setup the behavior is unchanged, i.e. fsids are looked up in the id mappings.
The kfsid to cleanly handle userns visible filesystem is set as before.

We require that a user must have a valid fsid mapping for the target id. This
is consistent with how the setid calls work today without fsid mappings.

Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
/* v2 */
- Christian Brauner <christian.brauner@ubuntu.com>:
  - set kfsid which is used when dealing with proc permission checking
---
 kernel/sys.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/kernel/sys.c b/kernel/sys.c
index 864fa78f25a7..a8eefd748327 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -574,11 +574,16 @@ long __sys_setuid(uid_t uid)
 	struct cred *new;
 	int retval;
 	kuid_t kuid;
+	kuid_t kfsuid;
 
 	kuid = make_kuid(ns, uid);
 	if (!uid_valid(kuid))
 		return -EINVAL;
 
+	kfsuid = make_kfsuid(ns, uid);
+	if (!uid_valid(kfsuid))
+		return -EINVAL;
+
 	new = prepare_creds();
 	if (!new)
 		return -ENOMEM;
@@ -596,7 +601,8 @@ long __sys_setuid(uid_t uid)
 		goto error;
 	}
 
-	new->fsuid = new->euid = kuid;
+	new->kfsuid = new->euid = kuid;
+	new->fsuid = kfsuid;
 
 	retval = security_task_fix_setuid(new, old, LSM_SETID_ID);
 	if (retval < 0)
-- 
2.25.0


^ permalink raw reply related

* [PATCH v2] ARM: qcom_defconfig: Enable QRTR
From: Luca Weiss @ 2020-02-14 18:31 UTC (permalink / raw)
  To: linux-arm-kernel
  Cc: ~postmarketos/upstreaming, linux-arm-msm, Luca Weiss,
	Russell King, Bjorn Andersson, Brian Masney, Linus Walleij,
	Georgi Djakov, Krzysztof Kozlowski, Jordan Crouse,
	Matti Lehtimäki, linux-kernel

This option is useful on msm8974, so enable it.

Signed-off-by: Luca Weiss <luca@z3ntu.xyz>
---
Changes from v1:
- set options as =m instead of =y

 arch/arm/configs/qcom_defconfig | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm/configs/qcom_defconfig b/arch/arm/configs/qcom_defconfig
index ad3417a63cdf..4702feef2cc9 100644
--- a/arch/arm/configs/qcom_defconfig
+++ b/arch/arm/configs/qcom_defconfig
@@ -44,6 +44,8 @@ CONFIG_IP_ROUTE_VERBOSE=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 # CONFIG_IPV6 is not set
+CONFIG_QRTR=m
+CONFIG_QRTR_SMD=m
 CONFIG_CFG80211=m
 CONFIG_MAC80211=m
 CONFIG_RFKILL=y
-- 
2.25.0


^ permalink raw reply related

* [PATCH v2 10/28] sys:__sys_setgid(): handle fsid mappings
From: Christian Brauner @ 2020-02-14 18:35 UTC (permalink / raw)
  To: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn
  Cc: smbarber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, linux-kernel, linux-fsdevel, containers,
	linux-security-module, linux-api, Christian Brauner
In-Reply-To: <20200214183554.1133805-1-christian.brauner@ubuntu.com>

Switch setgid() to lookup fsids in the fsid mappings. If no fsid mappings are
setup the behavior is unchanged, i.e. fsids are looked up in the id mappings.
The kfsid to cleanly handle userns visible filesystem is set as before.

We require that a user must have a valid fsid mapping for the target id. This
is consistent with how the setid calls work today without fsid mappings.

Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
/* v2 */
- Christian Brauner <christian.brauner@ubuntu.com>:
  - set kfsid which is used when dealing with proc permission checking
---
 kernel/sys.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/kernel/sys.c b/kernel/sys.c
index a8eefd748327..aa379fb5e93b 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -416,24 +416,31 @@ long __sys_setgid(gid_t gid)
 	const struct cred *old;
 	struct cred *new;
 	int retval;
-	kgid_t kgid;
+	kgid_t kgid, kfsgid;
 
 	kgid = make_kgid(ns, gid);
 	if (!gid_valid(kgid))
 		return -EINVAL;
 
+	kfsgid = make_kfsgid(ns, gid);
+	if (!gid_valid(kfsgid))
+		return -EINVAL;
+
 	new = prepare_creds();
 	if (!new)
 		return -ENOMEM;
 	old = current_cred();
 
 	retval = -EPERM;
-	if (ns_capable(old->user_ns, CAP_SETGID))
-		new->gid = new->egid = new->sgid = new->fsgid = kgid;
-	else if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->sgid))
-		new->egid = new->fsgid = kgid;
-	else
+	if (ns_capable(old->user_ns, CAP_SETGID)) {
+		new->gid = new->egid = new->sgid = new->kfsgid = kgid;
+		new->fsgid = kfsgid;
+	} else if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->sgid)) {
+		new->egid = new->kfsgid = kgid;
+		new->fsgid = kfsgid;
+	} else {
 		goto error;
+	}
 
 	return commit_creds(new);
 
-- 
2.25.0


^ permalink raw reply related

* Re: [PATCH v4 5/6] MIPS: CI20: Modify DTS to support high resolution timer for SMP.
From: Paul Cercueil @ 2020-02-14 18:37 UTC (permalink / raw)
  To: 周琰杰 (Zhou Yanjie)
  Cc: linux-mips, linux-clk, linux-kernel, devicetree, mturquette,
	sboyd, robh+dt, mark.rutland, ralf, paulburton, jiaxun.yang,
	chenhc, allison, tglx, daniel.lezcano, geert+renesas, krzk,
	keescook, ebiederm, miquel.raynal, paul, hns, sernia.zhou,
	zhenwenjin, mips-creator-ci20-dev, 1326991897
In-Reply-To: <1581703360-112557-7-git-send-email-zhouyanjie@wanyeetech.com>

Hi Zhou,

I think you can move this patch before the clocksource one - it will 
work with the old clocksource code and in generally it's a good idea to 
ensure (if possible) that you can git-bisect without ending up with a 
broken kernel.

-Paul


Le sam., févr. 15, 2020 at 02:02, 周琰杰 (Zhou Yanjie) 
<zhouyanjie@wanyeetech.com> a écrit :
> Modify DTS, change tcu channel from 2 to 3, channel #0 and #1 for
> per core local timer, #2 for clocksource.
> 
> Tested-by: H. Nikolaus Schaller <hns@goldelico.com>
> Tested-by: Paul Boddie <paul@boddie.org.uk>
> Signed-off-by: 周琰杰 (Zhou Yanjie) <zhouyanjie@wanyeetech.com>
> ---
> 
> Notes:
>     v1->v2:
>     No change.
> 
>     v2->v3:
>     No change.
> 
>     v3->v4:
>     Rebase on top of kernel 5.6-rc1.
> 
>  arch/mips/boot/dts/ingenic/ci20.dts | 11 +++++++++--
>  1 file changed, 9 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/mips/boot/dts/ingenic/ci20.dts 
> b/arch/mips/boot/dts/ingenic/ci20.dts
> index 37b9316..98c4c42 100644
> --- a/arch/mips/boot/dts/ingenic/ci20.dts
> +++ b/arch/mips/boot/dts/ingenic/ci20.dts
> @@ -456,6 +456,13 @@
> 
>  &tcu {
>  	/* 3 MHz for the system timer and clocksource */
> -	assigned-clocks = <&tcu TCU_CLK_TIMER0>, <&tcu TCU_CLK_TIMER1>;
> -	assigned-clock-rates = <3000000>, <3000000>;
> +	assigned-clocks = <&tcu TCU_CLK_TIMER0>, <&tcu TCU_CLK_TIMER1>,
> +					  <&tcu TCU_CLK_TIMER2>;
> +	assigned-clock-rates = <3000000>, <3000000>, <750000>;
> +
> +	/*
> +	 * Use channel #0 and #1 for the per core system timer,
> +	 * and use channel #2 for the clocksource.
> +	 */
> +	ingenic,pwm-channels-mask = <0xF8>;
>  };
> --
> 2.7.4
> 



^ permalink raw reply

* [PATCH v2 13/28] sys:__sys_setresuid(): handle fsid mappings
From: Christian Brauner @ 2020-02-14 18:35 UTC (permalink / raw)
  To: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn
  Cc: smbarber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, linux-kernel, linux-fsdevel, containers,
	linux-security-module, linux-api, Christian Brauner
In-Reply-To: <20200214183554.1133805-1-christian.brauner@ubuntu.com>

Switch setresuid() to lookup fsids in the fsid mappings. If no fsid mappings
are setup the behavior is unchanged, i.e. fsids are looked up in the id
mappings.

During setresuid() the kfsuid is set to the keuid corresponding the euid that is
requested by userspace. If the requested euid is -1 the kfsuid is reset to the
current keuid. For the latter case this means we need to lookup the
corresponding userspace euid corresponding to the current keuid in the id
mappings and translate this euid into the corresponding kfsuid in the fsid
mappings.

The kfsid to cleanly handle userns visible filesystem is set as before.

We require that a user must have a valid fsid mapping for the target id. This
is consistent with how the setid calls work today without fsid mappings.

Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
/* v2 */
- Christian Brauner <christian.brauner@ubuntu.com>:
  - set kfsid which is used when dealing with proc permission checking
---
 kernel/sys.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/kernel/sys.c b/kernel/sys.c
index 22eea030d9e7..54e072145146 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -654,7 +654,7 @@ long __sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
 	const struct cred *old;
 	struct cred *new;
 	int retval;
-	kuid_t kruid, keuid, ksuid;
+	kuid_t kruid, keuid, ksuid, kfsuid;
 
 	kruid = make_kuid(ns, ruid);
 	keuid = make_kuid(ns, euid);
@@ -696,11 +696,21 @@ long __sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
 				goto error;
 		}
 	}
-	if (euid != (uid_t) -1)
+	if (euid != (uid_t) -1) {
 		new->euid = keuid;
+		kfsuid = make_kfsuid(ns, euid);
+	} else {
+		kfsuid = kuid_to_kfsuid(new->user_ns, new->euid);
+	}
+	if (!uid_valid(kfsuid)) {
+		return -EINVAL;
+		goto error;
+	}
+
 	if (suid != (uid_t) -1)
 		new->suid = ksuid;
-	new->fsuid = new->euid;
+	new->kfsuid = new->euid;
+	new->fsuid = kfsuid;
 
 	retval = security_task_fix_setuid(new, old, LSM_SETID_RES);
 	if (retval < 0)
-- 
2.25.0


^ permalink raw reply related

* [PATCH v2 14/28] sys:__sys_setresgid(): handle fsid mappings
From: Christian Brauner @ 2020-02-14 18:35 UTC (permalink / raw)
  To: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn
  Cc: smbarber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, linux-kernel, linux-fsdevel, containers,
	linux-security-module, linux-api, Christian Brauner
In-Reply-To: <20200214183554.1133805-1-christian.brauner@ubuntu.com>

Switch setresgid() to lookup fsids in the fsid mappings. If no fsid mappings are
setup the behavior is unchanged, i.e. fsids are looked up in the id mappings.

During setresgid() the kfsgid is set to the kegid corresponding the egid that is
requested by userspace. If the requested egid is -1 the kfsgid is reset to the
current kegid. For the latter case this means we need to lookup the
corresponding userspace egid corresponding to the current kegid in the id
mappings and translate this egid into the corresponding kfsgid in the fsid
mappings.

The kfsid to cleanly handle userns visible filesystem is set as before.

We require that a user must have a valid fsid mapping for the target id. This
is consistent with how the setid calls work today without fsid mappings.

Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
/* v2 */
- Christian Brauner <christian.brauner@ubuntu.com>:
  - set kfsid which is used when dealing with proc permission checking
---
 kernel/sys.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/kernel/sys.c b/kernel/sys.c
index 54e072145146..78592deee2d8 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -756,7 +756,7 @@ long __sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
 	const struct cred *old;
 	struct cred *new;
 	int retval;
-	kgid_t krgid, kegid, ksgid;
+	kgid_t krgid, kegid, ksgid, kfsgid;
 
 	krgid = make_kgid(ns, rgid);
 	kegid = make_kgid(ns, egid);
@@ -789,11 +789,21 @@ long __sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
 
 	if (rgid != (gid_t) -1)
 		new->gid = krgid;
-	if (egid != (gid_t) -1)
+	if (egid != (gid_t) -1) {
 		new->egid = kegid;
+		kfsgid = make_kfsgid(ns, egid);
+	} else {
+		kfsgid = kgid_to_kfsgid(new->user_ns, new->egid);
+	}
+	if (!gid_valid(kfsgid)) {
+		retval = -EINVAL;
+		goto error;
+	}
+
 	if (sgid != (gid_t) -1)
 		new->sgid = ksgid;
-	new->fsgid = new->egid;
+	new->kfsgid = new->egid;
+	new->fsgid = kfsgid;
 
 	return commit_creds(new);
 
-- 
2.25.0


^ permalink raw reply related

* [RFC v2 4/6] tpm: Separate TPM_TIS and TPM_TIS_ISA configs
From: Eric Auger @ 2020-02-14 18:37 UTC (permalink / raw)
  To: eric.auger.pro, eric.auger, stefanb, qemu-devel, qemu-arm,
	peter.maydell
  Cc: marcandre.lureau, lersek, ardb, philmd
In-Reply-To: <20200214183704.14389-1-eric.auger@redhat.com>

Let's separate the compilation of tpm_tis_common.c from
the compilation of tpm_tis_isa.c

The common part will be also compiled along with the
tpm_tis_sysbus device.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
---
 default-configs/i386-softmmu.mak | 2 +-
 hw/i386/Kconfig                  | 2 +-
 hw/tpm/Kconfig                   | 7 ++++++-
 hw/tpm/Makefile.objs             | 3 ++-
 tests/qtest/Makefile.include     | 4 ++--
 5 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/default-configs/i386-softmmu.mak b/default-configs/i386-softmmu.mak
index 4cc64dafa2..84d1a2487c 100644
--- a/default-configs/i386-softmmu.mak
+++ b/default-configs/i386-softmmu.mak
@@ -20,7 +20,7 @@
 #CONFIG_SGA=n
 #CONFIG_TEST_DEVICES=n
 #CONFIG_TPM_CRB=n
-#CONFIG_TPM_TIS=n
+#CONFIG_TPM_TIS_ISA=n
 #CONFIG_VTD=n
 
 # Boards:
diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig
index cdc851598c..c93f32f657 100644
--- a/hw/i386/Kconfig
+++ b/hw/i386/Kconfig
@@ -20,7 +20,7 @@ config PC
     imply SGA
     imply TEST_DEVICES
     imply TPM_CRB
-    imply TPM_TIS
+    imply TPM_TIS_ISA
     imply VGA_PCI
     imply VIRTIO_VGA
     select FDC
diff --git a/hw/tpm/Kconfig b/hw/tpm/Kconfig
index 9e67d990e8..686f8206bb 100644
--- a/hw/tpm/Kconfig
+++ b/hw/tpm/Kconfig
@@ -2,9 +2,14 @@ config TPMDEV
     bool
     depends on TPM
 
-config TPM_TIS
+config TPM_TIS_ISA
     bool
     depends on TPM && ISA_BUS
+    select TPM_TIS
+
+config TPM_TIS
+    bool
+    depends on TPM
     select TPMDEV
 
 config TPM_CRB
diff --git a/hw/tpm/Makefile.objs b/hw/tpm/Makefile.objs
index fcc4c2f27c..3ef2036cca 100644
--- a/hw/tpm/Makefile.objs
+++ b/hw/tpm/Makefile.objs
@@ -1,6 +1,7 @@
 common-obj-$(CONFIG_TPM) += tpm_util.o
 obj-$(call lor,$(CONFIG_TPM_TIS),$(CONFIG_TPM_CRB)) += tpm_ppi.o
-common-obj-$(CONFIG_TPM_TIS) += tpm_tis_isa.o tpm_tis_common.o
+common-obj-$(CONFIG_TPM_TIS_ISA) += tpm_tis_isa.o
+common-obj-$(CONFIG_TPM_TIS) += tpm_tis_common.o
 common-obj-$(CONFIG_TPM_CRB) += tpm_crb.o
 common-obj-$(CONFIG_TPM_PASSTHROUGH) += tpm_passthrough.o
 common-obj-$(CONFIG_TPM_EMULATOR) += tpm_emulator.o
diff --git a/tests/qtest/Makefile.include b/tests/qtest/Makefile.include
index eb0f23b108..33dd3c89cc 100644
--- a/tests/qtest/Makefile.include
+++ b/tests/qtest/Makefile.include
@@ -54,8 +54,8 @@ check-qtest-i386-y += q35-test
 check-qtest-i386-y += vmgenid-test
 check-qtest-i386-$(CONFIG_TPM_CRB) += tpm-crb-swtpm-test
 check-qtest-i386-$(CONFIG_TPM_CRB) += tpm-crb-test
-check-qtest-i386-$(CONFIG_TPM_TIS) += tpm-tis-swtpm-test
-check-qtest-i386-$(CONFIG_TPM_TIS) += tpm-tis-test
+check-qtest-i386-$(CONFIG_TPM_TIS_ISA) += tpm-tis-swtpm-test
+check-qtest-i386-$(CONFIG_TPM_TIS_ISA) += tpm-tis-test
 check-qtest-i386-$(CONFIG_SLIRP) += test-netfilter
 check-qtest-i386-$(CONFIG_POSIX) += test-filter-mirror
 check-qtest-i386-$(CONFIG_RTL8139_PCI) += test-filter-redirector
-- 
2.20.1


^ permalink raw reply related

* [PATCH v2 18/28] capability: privileged_wrt_inode_uidgid(): handle fsid mappings
From: Christian Brauner @ 2020-02-14 18:35 UTC (permalink / raw)
  To: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn
  Cc: smbarber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, linux-kernel, linux-fsdevel, containers,
	linux-security-module, linux-api, Christian Brauner
In-Reply-To: <20200214183554.1133805-1-christian.brauner@ubuntu.com>

Switch privileged_wrt_inode_uidgid() to lookup fsids in the fsid mappings. If
no fsid mappings are setup the behavior is unchanged, i.e. fsids are looked up
in the id mappings.

Filesystems that share a superblock in all user namespaces they are mounted in
will retain their old semantics even with the introduction of fsidmappings.

Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
/* v2 */
unchanged
---
 kernel/capability.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/kernel/capability.c b/kernel/capability.c
index 1444f3954d75..2b0c1dc992e2 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -19,6 +19,8 @@
 #include <linux/pid_namespace.h>
 #include <linux/user_namespace.h>
 #include <linux/uaccess.h>
+#include <linux/fsuidgid.h>
+#include <linux/fs.h>
 
 /*
  * Leveraged for setting/resetting capabilities
@@ -486,8 +488,12 @@ EXPORT_SYMBOL(file_ns_capable);
  */
 bool privileged_wrt_inode_uidgid(struct user_namespace *ns, const struct inode *inode)
 {
-	return kuid_has_mapping(ns, inode->i_uid) &&
-		kgid_has_mapping(ns, inode->i_gid);
+	if (is_userns_visible(inode->i_sb->s_iflags))
+		return kuid_has_mapping(ns, inode->i_uid) &&
+		       kgid_has_mapping(ns, inode->i_gid);
+
+	return kfsuid_has_mapping(ns, inode->i_uid) &&
+	       kfsgid_has_mapping(ns, inode->i_gid);
 }
 
 /**
-- 
2.25.0


^ permalink raw reply related

* [PATCH v2 15/28] fs: add is_userns_visible() helper
From: Christian Brauner @ 2020-02-14 18:35 UTC (permalink / raw)
  To: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn
  Cc: smbarber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, linux-kernel, linux-fsdevel, containers,
	linux-security-module, linux-api, Christian Brauner
In-Reply-To: <20200214183554.1133805-1-christian.brauner@ubuntu.com>

Introduce a helper which makes it possible to detect fileystems whose
superblock is visible in multiple user namespace. This currently only
means proc and sys. Such filesystems usually have special semantics so their
behavior will not be changed with the introduction of fsid mappings.

Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
/* v2 */
unchanged
---
 include/linux/fs.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3cd4fe6b845e..fdc8fb2d786b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3651,4 +3651,9 @@ static inline int inode_drain_writes(struct inode *inode)
 	return filemap_write_and_wait(inode->i_mapping);
 }
 
+static inline bool is_userns_visible(unsigned long iflags)
+{
+	return (iflags & SB_I_USERNS_VISIBLE);
+}
+
 #endif /* _LINUX_FS_H */
-- 
2.25.0


^ permalink raw reply related

* [PATCH v2 16/28] namei: may_{o_}create(): handle fsid mappings
From: Christian Brauner @ 2020-02-14 18:35 UTC (permalink / raw)
  To: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn
  Cc: smbarber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, linux-kernel, linux-fsdevel, containers,
	linux-security-module, linux-api, Christian Brauner
In-Reply-To: <20200214183554.1133805-1-christian.brauner@ubuntu.com>

Switch may_{o_}create() to lookup fsids in the fsid mappings. If no fsid
mappings are setup the behavior is unchanged, i.e. fsids are looked up in the
id mappings.

Filesystems that share a superblock in all user namespaces they are mounted in
will retain their old semantics even with the introduction of fsidmappings.

Cc: Jann Horn <jannh@google.com>
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
/* v2 */
- Jann Horn <jannh@google.com>:
  - Ensure that the correct fsid is used when dealing with userns visible
    filesystems like proc.
---
 fs/namei.c | 36 ++++++++++++++++++++++++++++--------
 1 file changed, 28 insertions(+), 8 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index db6565c99825..c5b014000f13 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -39,6 +39,7 @@
 #include <linux/bitops.h>
 #include <linux/init_task.h>
 #include <linux/uaccess.h>
+#include <linux/fsuidgid.h>
 
 #include "internal.h"
 #include "mount.h"
@@ -287,6 +288,13 @@ static int check_acl(struct inode *inode, int mask)
 	return -EAGAIN;
 }
 
+static inline kuid_t get_current_fsuid(const struct inode *inode)
+{
+	if (is_userns_visible(inode->i_sb->s_iflags))
+		return current_kfsuid();
+	return current_fsuid();
+}
+
 /*
  * This does the basic permission checking
  */
@@ -294,7 +302,7 @@ static int acl_permission_check(struct inode *inode, int mask)
 {
 	unsigned int mode = inode->i_mode;
 
-	if (likely(uid_eq(current_fsuid(), inode->i_uid)))
+	if (likely(uid_eq(get_current_fsuid(inode), inode->i_uid)))
 		mode >>= 6;
 	else {
 		if (IS_POSIXACL(inode) && (mode & S_IRWXG)) {
@@ -980,7 +988,7 @@ static inline int may_follow_link(struct nameidata *nd)
 
 	/* Allowed if owner and follower match. */
 	inode = nd->link_inode;
-	if (uid_eq(current_cred()->fsuid, inode->i_uid))
+	if (uid_eq(get_current_fsuid(inode), inode->i_uid))
 		return 0;
 
 	/* Allowed if parent directory not sticky and world-writable. */
@@ -1097,7 +1105,7 @@ static int may_create_in_sticky(umode_t dir_mode, kuid_t dir_uid,
 	    (!sysctl_protected_regular && S_ISREG(inode->i_mode)) ||
 	    likely(!(dir_mode & S_ISVTX)) ||
 	    uid_eq(inode->i_uid, dir_uid) ||
-	    uid_eq(current_fsuid(), inode->i_uid))
+	    uid_eq(get_current_fsuid(inode), inode->i_uid))
 		return 0;
 
 	if (likely(dir_mode & 0002) ||
@@ -2832,7 +2840,7 @@ EXPORT_SYMBOL(kern_path_mountpoint);
 
 int __check_sticky(struct inode *dir, struct inode *inode)
 {
-	kuid_t fsuid = current_fsuid();
+	kuid_t fsuid = get_current_fsuid(inode);
 
 	if (uid_eq(inode->i_uid, fsuid))
 		return 0;
@@ -2902,6 +2910,20 @@ static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
 	return 0;
 }
 
+static bool fsid_has_mapping(struct user_namespace *ns, struct super_block *sb)
+{
+	if (is_userns_visible(sb->s_iflags)) {
+		if (!kuid_has_mapping(ns, current_kfsuid()) ||
+		    !kgid_has_mapping(ns, current_kfsgid()))
+			return false;
+	} else if (!kfsuid_has_mapping(ns, current_fsuid()) ||
+		   !kfsgid_has_mapping(ns, current_fsgid())) {
+		return false;
+	}
+
+	return true;
+}
+
 /*	Check whether we can create an object with dentry child in directory
  *  dir.
  *  1. We can't do it if child already exists (open has special treatment for
@@ -2920,8 +2942,7 @@ static inline int may_create(struct inode *dir, struct dentry *child)
 	if (IS_DEADDIR(dir))
 		return -ENOENT;
 	s_user_ns = dir->i_sb->s_user_ns;
-	if (!kuid_has_mapping(s_user_ns, current_fsuid()) ||
-	    !kgid_has_mapping(s_user_ns, current_fsgid()))
+	if (!fsid_has_mapping(s_user_ns, dir->i_sb))
 		return -EOVERFLOW;
 	return inode_permission(dir, MAY_WRITE | MAY_EXEC);
 }
@@ -3103,8 +3124,7 @@ static int may_o_create(const struct path *dir, struct dentry *dentry, umode_t m
 		return error;
 
 	s_user_ns = dir->dentry->d_sb->s_user_ns;
-	if (!kuid_has_mapping(s_user_ns, current_fsuid()) ||
-	    !kgid_has_mapping(s_user_ns, current_fsgid()))
+	if (!fsid_has_mapping(s_user_ns, dir->dentry->d_sb))
 		return -EOVERFLOW;
 
 	error = inode_permission(dir->dentry->d_inode, MAY_WRITE | MAY_EXEC);
-- 
2.25.0


^ permalink raw reply related

* [PATCH v2 17/28] inode: inode_owner_or_capable(): handle fsid mappings
From: Christian Brauner @ 2020-02-14 18:35 UTC (permalink / raw)
  To: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn
  Cc: smbarber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, linux-kernel, linux-fsdevel, containers,
	linux-security-module, linux-api, Christian Brauner
In-Reply-To: <20200214183554.1133805-1-christian.brauner@ubuntu.com>

Switch inode_owner_or_capable() to lookup fsids in the fsid mappings. If no
fsid mappings are setup the behavior is unchanged, i.e. fsids are looked up in
the id mappings.

Filesystems that share a superblock in all user namespaces they are mounted in
will retain their old semantics even with the introduction of fsidmappings.

Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
/* v2 */
unchanged
---
 fs/inode.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/fs/inode.c b/fs/inode.c
index 7d57068b6b7a..81d7a30b381d 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -21,6 +21,7 @@
 #include <linux/ratelimit.h>
 #include <linux/list_lru.h>
 #include <linux/iversion.h>
+#include <linux/fsuidgid.h>
 #include <trace/events/writeback.h>
 #include "internal.h"
 
@@ -2087,8 +2088,12 @@ bool inode_owner_or_capable(const struct inode *inode)
 		return true;
 
 	ns = current_user_ns();
-	if (kuid_has_mapping(ns, inode->i_uid) && ns_capable(ns, CAP_FOWNER))
+	if (is_userns_visible(inode->i_sb->s_iflags)) {
+		if (kuid_has_mapping(ns, inode->i_uid) && ns_capable(ns, CAP_FOWNER))
+			return true;
+	} else if (kfsuid_has_mapping(ns, inode->i_uid) && ns_capable(ns, CAP_FOWNER)) {
 		return true;
+	}
 	return false;
 }
 EXPORT_SYMBOL(inode_owner_or_capable);
-- 
2.25.0


^ permalink raw reply related

* [PATCH v2 19/28] stat: handle fsid mappings
From: Christian Brauner @ 2020-02-14 18:35 UTC (permalink / raw)
  To: Stéphane Graber, Eric W. Biederman, Aleksa Sarai, Jann Horn
  Cc: smbarber, Seth Forshee, Alexander Viro, Alexey Dobriyan,
	Serge Hallyn, James Morris, Kees Cook, Jonathan Corbet,
	Phil Estes, linux-kernel, linux-fsdevel, containers,
	linux-security-module, linux-api, Christian Brauner
In-Reply-To: <20200214183554.1133805-1-christian.brauner@ubuntu.com>

Switch attribute functions looking up fsids to them up in the fsid mappings. If
no fsid mappings are setup the behavior is unchanged, i.e. fsids are looked up
in the id mappings.

Filesystems that share a superblock in all user namespaces they are mounted in
will retain their old semantics even with the introduction of fsidmappings.

Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
/* v2 */
unchanged
---
 fs/stat.c            | 48 +++++++++++++++++++++++++++++++++++---------
 include/linux/stat.h |  1 +
 2 files changed, 39 insertions(+), 10 deletions(-)

diff --git a/fs/stat.c b/fs/stat.c
index 030008796479..edd45678c4ed 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -10,6 +10,7 @@
 #include <linux/errno.h>
 #include <linux/file.h>
 #include <linux/highuid.h>
+#include <linux/fsuidgid.h>
 #include <linux/fs.h>
 #include <linux/namei.h>
 #include <linux/security.h>
@@ -79,6 +80,8 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat,
 	if (IS_AUTOMOUNT(inode))
 		stat->attributes |= STATX_ATTR_AUTOMOUNT;
 
+	stat->userns_visible = is_userns_visible(inode->i_sb->s_iflags);
+
 	if (inode->i_op->getattr)
 		return inode->i_op->getattr(path, stat, request_mask,
 					    query_flags);
@@ -239,8 +242,13 @@ static int cp_old_stat(struct kstat *stat, struct __old_kernel_stat __user * sta
 	tmp.st_nlink = stat->nlink;
 	if (tmp.st_nlink != stat->nlink)
 		return -EOVERFLOW;
-	SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid));
-	SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid));
+	if (stat->userns_visible) {
+		SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid));
+		SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid));
+	} else {
+		SET_UID(tmp.st_uid, from_kfsuid_munged(current_user_ns(), stat->uid));
+		SET_GID(tmp.st_gid, from_kfsgid_munged(current_user_ns(), stat->gid));
+	}
 	tmp.st_rdev = old_encode_dev(stat->rdev);
 #if BITS_PER_LONG == 32
 	if (stat->size > MAX_NON_LFS)
@@ -327,8 +335,13 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf)
 	tmp.st_nlink = stat->nlink;
 	if (tmp.st_nlink != stat->nlink)
 		return -EOVERFLOW;
-	SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid));
-	SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid));
+	if (stat->userns_visible) {
+		SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid));
+		SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid));
+	} else {
+		SET_UID(tmp.st_uid, from_kfsuid_munged(current_user_ns(), stat->uid));
+		SET_GID(tmp.st_gid, from_kfsgid_munged(current_user_ns(), stat->gid));
+	}
 	tmp.st_rdev = encode_dev(stat->rdev);
 	tmp.st_size = stat->size;
 	tmp.st_atime = stat->atime.tv_sec;
@@ -471,8 +484,13 @@ static long cp_new_stat64(struct kstat *stat, struct stat64 __user *statbuf)
 #endif
 	tmp.st_mode = stat->mode;
 	tmp.st_nlink = stat->nlink;
-	tmp.st_uid = from_kuid_munged(current_user_ns(), stat->uid);
-	tmp.st_gid = from_kgid_munged(current_user_ns(), stat->gid);
+	if (stat->userns_visible) {
+		tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid);
+		tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid);
+	} else {
+		tmp.st_uid, from_kfsuid_munged(current_user_ns(), stat->uid);
+		tmp.st_gid, from_kfsgid_munged(current_user_ns(), stat->gid);
+	}
 	tmp.st_atime = stat->atime.tv_sec;
 	tmp.st_atime_nsec = stat->atime.tv_nsec;
 	tmp.st_mtime = stat->mtime.tv_sec;
@@ -544,8 +562,13 @@ cp_statx(const struct kstat *stat, struct statx __user *buffer)
 	tmp.stx_blksize = stat->blksize;
 	tmp.stx_attributes = stat->attributes;
 	tmp.stx_nlink = stat->nlink;
-	tmp.stx_uid = from_kuid_munged(current_user_ns(), stat->uid);
-	tmp.stx_gid = from_kgid_munged(current_user_ns(), stat->gid);
+	if (stat->userns_visible) {
+		tmp.stx_uid = from_kuid_munged(current_user_ns(), stat->uid);
+		tmp.stx_gid = from_kgid_munged(current_user_ns(), stat->gid);
+	} else {
+		tmp.stx_uid = from_kfsuid_munged(current_user_ns(), stat->uid);
+		tmp.stx_gid = from_kfsgid_munged(current_user_ns(), stat->gid);
+	}
 	tmp.stx_mode = stat->mode;
 	tmp.stx_ino = stat->ino;
 	tmp.stx_size = stat->size;
@@ -615,8 +638,13 @@ static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf)
 	tmp.st_nlink = stat->nlink;
 	if (tmp.st_nlink != stat->nlink)
 		return -EOVERFLOW;
-	SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid));
-	SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid));
+	if (stat->userns_visible) {
+		SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid));
+		SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid));
+	} else {
+		SET_UID(tmp.st_uid, from_kfsuid_munged(current_user_ns(), stat->uid));
+		SET_GID(tmp.st_gid, from_kfsgid_munged(current_user_ns(), stat->gid));
+	}
 	tmp.st_rdev = old_encode_dev(stat->rdev);
 	if ((u64) stat->size > MAX_NON_LFS)
 		return -EOVERFLOW;
diff --git a/include/linux/stat.h b/include/linux/stat.h
index 528c4baad091..e6d4ba73a970 100644
--- a/include/linux/stat.h
+++ b/include/linux/stat.h
@@ -47,6 +47,7 @@ struct kstat {
 	struct timespec64 ctime;
 	struct timespec64 btime;			/* File creation time */
 	u64		blocks;
+	bool		userns_visible;
 };
 
 #endif
-- 
2.25.0


^ permalink raw reply related

* [RFC v2 6/6] hw/arm/virt: vTPM support
From: Eric Auger @ 2020-02-14 18:37 UTC (permalink / raw)
  To: eric.auger.pro, eric.auger, stefanb, qemu-devel, qemu-arm,
	peter.maydell
  Cc: marcandre.lureau, lersek, ardb, philmd
In-Reply-To: <20200214183704.14389-1-eric.auger@redhat.com>

Let the TPM TIS SYSBUS device be dynamically instantiable
in ARM virt.  A device tree node is dynamically created
(TPM via MMIO).

The TPM Physical Presence interface (PPI) is not supported.

To run with the swtmp TPM emulator, the qemu command line must
be augmented with:

        -chardev socket,id=chrtpm,path=swtpm-sock \
        -tpmdev emulator,id=tpm0,chardev=chrtpm \
        -device tpm-tis-device,tpmdev=tpm0 \

swtpm/libtpms command line example:

swtpm socket --tpm2 -t -d --tpmstate dir=/tmp/tpm \
--ctrl type=unixio,path=swtpm-sock

Signed-off-by: Eric Auger <eric.auger@redhat.com>
---
 hw/arm/Kconfig      |  1 +
 hw/arm/sysbus-fdt.c | 36 ++++++++++++++++++++++++++++++++++++
 hw/arm/virt.c       |  7 +++++++
 3 files changed, 44 insertions(+)

diff --git a/hw/arm/Kconfig b/hw/arm/Kconfig
index 3d86691ae0..b6f03f7f53 100644
--- a/hw/arm/Kconfig
+++ b/hw/arm/Kconfig
@@ -5,6 +5,7 @@ config ARM_VIRT
     imply VFIO_AMD_XGBE
     imply VFIO_PLATFORM
     imply VFIO_XGMAC
+    imply TPM_TIS_SYSBUS
     select A15MPCORE
     select ACPI
     select ARM_SMMUV3
diff --git a/hw/arm/sysbus-fdt.c b/hw/arm/sysbus-fdt.c
index 022fc97ecd..adf50444c2 100644
--- a/hw/arm/sysbus-fdt.c
+++ b/hw/arm/sysbus-fdt.c
@@ -30,6 +30,7 @@
 #include "hw/arm/sysbus-fdt.h"
 #include "qemu/error-report.h"
 #include "sysemu/device_tree.h"
+#include "sysemu/tpm.h"
 #include "hw/platform-bus.h"
 #include "hw/vfio/vfio-platform.h"
 #include "hw/vfio/vfio-calxeda-xgmac.h"
@@ -434,6 +435,40 @@ static bool vfio_platform_match(SysBusDevice *sbdev,
 #define VFIO_PLATFORM_BINDING(compat, add_fn) \
     {TYPE_VFIO_PLATFORM, (compat), (add_fn), vfio_platform_match}
 
+/*
+ * add_tpm_tis_fdt_node: Create a DT node for TPM TIS
+ *
+ * See kernel documentation:
+ * Documentation/devicetree/bindings/security/tpm/tpm_tis_mmio.txt
+ * Optional interrupt for command completion is not exposed
+ */
+static int add_tpm_tis_fdt_node(SysBusDevice *sbdev, void *opaque)
+{
+    PlatformBusFDTData *data = opaque;
+    PlatformBusDevice *pbus = data->pbus;
+    void *fdt = data->fdt;
+    const char *parent_node = data->pbus_node_name;
+    int compat_str_len;
+    char *nodename;
+    uint32_t reg_attr[2];
+    uint64_t mmio_base;
+
+    mmio_base = platform_bus_get_mmio_addr(pbus, sbdev, 0);
+    nodename = g_strdup_printf("%s/tpm_tis@%" PRIx64, parent_node, mmio_base);
+    qemu_fdt_add_subnode(fdt, nodename);
+
+    compat_str_len = strlen("tcg,tpm-tis-mmio") + 1;
+    qemu_fdt_setprop(fdt, nodename, "compatible", "tcg,tpm-tis-mmio",
+                     compat_str_len);
+
+    reg_attr[0] = cpu_to_be32(mmio_base);
+    reg_attr[1] = cpu_to_be32(0x5000);
+    qemu_fdt_setprop(fdt, nodename, "reg", reg_attr, 2 * sizeof(uint32_t));
+
+    g_free(nodename);
+    return 0;
+}
+
 #endif /* CONFIG_LINUX */
 
 static int no_fdt_node(SysBusDevice *sbdev, void *opaque)
@@ -455,6 +490,7 @@ static const BindingEntry bindings[] = {
     TYPE_BINDING(TYPE_VFIO_CALXEDA_XGMAC, add_calxeda_midway_xgmac_fdt_node),
     TYPE_BINDING(TYPE_VFIO_AMD_XGBE, add_amd_xgbe_fdt_node),
     VFIO_PLATFORM_BINDING("amd,xgbe-seattle-v1a", add_amd_xgbe_fdt_node),
+    TYPE_BINDING(TYPE_TPM_TIS_SYSBUS, add_tpm_tis_fdt_node),
 #endif
     TYPE_BINDING(TYPE_RAMFB_DEVICE, no_fdt_node),
     TYPE_BINDING("", NULL), /* last element */
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index f788fe27d6..4b967e39d1 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -47,6 +47,7 @@
 #include "sysemu/numa.h"
 #include "sysemu/runstate.h"
 #include "sysemu/sysemu.h"
+#include "sysemu/tpm.h"
 #include "sysemu/kvm.h"
 #include "hw/loader.h"
 #include "exec/address-spaces.h"
@@ -2041,6 +2042,7 @@ static void virt_machine_class_init(ObjectClass *oc, void *data)
     machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_AMD_XGBE);
     machine_class_allow_dynamic_sysbus_dev(mc, TYPE_RAMFB_DEVICE);
     machine_class_allow_dynamic_sysbus_dev(mc, TYPE_VFIO_PLATFORM);
+    machine_class_allow_dynamic_sysbus_dev(mc, TYPE_TPM_TIS_SYSBUS);
     mc->block_default_type = IF_VIRTIO;
     mc->no_cdrom = 1;
     mc->pci_allow_0_address = true;
@@ -2153,6 +2155,11 @@ type_init(machvirt_machine_init);
 
 static void virt_machine_5_0_options(MachineClass *mc)
 {
+    static GlobalProperty compat[] = {
+        { TYPE_TPM_TIS_SYSBUS, "ppi", "false" },
+    };
+
+    compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
 }
 DEFINE_VIRT_MACHINE_AS_LATEST(5, 0)
 
-- 
2.20.1


^ permalink raw reply related


This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.