Linux Security Modules development
 help / color / mirror / Atom feed
* [PATCH v1 2/4] [RFC] x86/trampfd: Provide support for the trampoline file descriptor
From: madvenka @ 2020-07-28 13:10 UTC (permalink / raw)
  To: kernel-hardening, linux-api, linux-arm-kernel, linux-fsdevel,
	linux-integrity, linux-kernel, linux-security-module, oleg, x86,
	madvenka
In-Reply-To: <20200728131050.24443-1-madvenka@linux.microsoft.com>

From: "Madhavan T. Venkataraman" <madvenka@linux.microsoft.com>

Implement 32-bit and 64-bit X86 support for the trampoline file descriptor.

	- Define architecture specific register names
	- Handle the trampoline invocation page fault
	- Setup the user register context on trampoline invocation
	- Setup the user stack context on trampoline invocation

Signed-off-by: Madhavan T. Venkataraman <madvenka@linux.microsoft.com>
---
 arch/x86/entry/syscalls/syscall_32.tbl |   1 +
 arch/x86/entry/syscalls/syscall_64.tbl |   1 +
 arch/x86/include/uapi/asm/ptrace.h     |  38 +++
 arch/x86/kernel/Makefile               |   2 +
 arch/x86/kernel/trampfd.c              | 313 +++++++++++++++++++++++++
 arch/x86/mm/fault.c                    |  11 +
 6 files changed, 366 insertions(+)
 create mode 100644 arch/x86/kernel/trampfd.c

diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index d8f8a1a69ed1..77eb50414591 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -443,3 +443,4 @@
 437	i386	openat2			sys_openat2
 438	i386	pidfd_getfd		sys_pidfd_getfd
 439	i386	faccessat2		sys_faccessat2
+440	i386	trampfd_create		sys_trampfd_create
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 78847b32e137..9d962de1d21f 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -360,6 +360,7 @@
 437	common	openat2			sys_openat2
 438	common	pidfd_getfd		sys_pidfd_getfd
 439	common	faccessat2		sys_faccessat2
+440	common	trampfd_create		sys_trampfd_create
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/arch/x86/include/uapi/asm/ptrace.h b/arch/x86/include/uapi/asm/ptrace.h
index 85165c0edafc..b031598f857e 100644
--- a/arch/x86/include/uapi/asm/ptrace.h
+++ b/arch/x86/include/uapi/asm/ptrace.h
@@ -9,6 +9,44 @@
 
 #ifndef __ASSEMBLY__
 
+/*
+ * These register names are to be used by 32-bit applications.
+ */
+enum reg_32_name {
+	x32_eax,
+	x32_ebx,
+	x32_ecx,
+	x32_edx,
+	x32_esi,
+	x32_edi,
+	x32_ebp,
+	x32_eip,
+	x32_max,
+};
+
+/*
+ * These register names are to be used by 64-bit applications.
+ */
+enum reg_64_name {
+	x64_rax = x32_max,
+	x64_rbx,
+	x64_rcx,
+	x64_rdx,
+	x64_rsi,
+	x64_rdi,
+	x64_rbp,
+	x64_r8,
+	x64_r9,
+	x64_r10,
+	x64_r11,
+	x64_r12,
+	x64_r13,
+	x64_r14,
+	x64_r15,
+	x64_rip,
+	x64_max,
+};
+
 #ifdef __i386__
 /* this struct defines the way the registers are stored on the
    stack during a system call. */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index e77261db2391..5d968ac4c7d9 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -157,3 +157,5 @@ ifeq ($(CONFIG_X86_64),y)
 endif
 
 obj-$(CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT)	+= ima_arch.o
+
+obj-$(CONFIG_TRAMPFD)			+= trampfd.o
diff --git a/arch/x86/kernel/trampfd.c b/arch/x86/kernel/trampfd.c
new file mode 100644
index 000000000000..f6b5507134d2
--- /dev/null
+++ b/arch/x86/kernel/trampfd.c
@@ -0,0 +1,313 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Trampoline File Descriptor - X86 support.
+ *
+ * Author: Madhavan T. Venkataraman (madvenka@linux.microsoft.com)
+ *
+ * Copyright (c) 2020, Microsoft Corporation.
+ */
+
+#include <linux/thread_info.h>
+#include <linux/mm_types.h>
+#include <linux/trampfd.h>
+#include <linux/uaccess.h>
+
+/* ---------------------------- Register Context ---------------------------- */
+
+static inline bool is_compat(void)
+{
+	return (IS_ENABLED(CONFIG_X86_32) ||
+		(IS_ENABLED(CONFIG_COMPAT) && test_thread_flag(TIF_ADDR32)));
+}
+
+static void set_reg_32(struct pt_regs *pt_regs, u32 name, u64 value)
+{
+	switch (name) {
+	case x32_eax:
+		pt_regs->ax = (unsigned long)value;
+		break;
+	case x32_ebx:
+		pt_regs->bx = (unsigned long)value;
+		break;
+	case x32_ecx:
+		pt_regs->cx = (unsigned long)value;
+		break;
+	case x32_edx:
+		pt_regs->dx = (unsigned long)value;
+		break;
+	case x32_esi:
+		pt_regs->si = (unsigned long)value;
+		break;
+	case x32_edi:
+		pt_regs->di = (unsigned long)value;
+		break;
+	case x32_ebp:
+		pt_regs->bp = (unsigned long)value;
+		break;
+	case x32_eip:
+		pt_regs->ip = (unsigned long)value;
+		break;
+	default:
+		WARN(1, "%s: Illegal register name %d\n", __func__, name);
+		break;
+	}
+}
+
+#ifdef __i386__
+
+static void set_reg_64(struct pt_regs *pt_regs, u32 name, u64 value)
+{
+}
+
+#else
+
+static void set_reg_64(struct pt_regs *pt_regs, u32 name, u64 value)
+{
+	switch (name) {
+	case x64_rax:
+		pt_regs->ax = (unsigned long)value;
+		break;
+	case x64_rbx:
+		pt_regs->bx = (unsigned long)value;
+		break;
+	case x64_rcx:
+		pt_regs->cx = (unsigned long)value;
+		break;
+	case x64_rdx:
+		pt_regs->dx = (unsigned long)value;
+		break;
+	case x64_rsi:
+		pt_regs->si = (unsigned long)value;
+		break;
+	case x64_rdi:
+		pt_regs->di = (unsigned long)value;
+		break;
+	case x64_rbp:
+		pt_regs->bp = (unsigned long)value;
+		break;
+	case x64_r8:
+		pt_regs->r8 = (unsigned long)value;
+		break;
+	case x64_r9:
+		pt_regs->r9 = (unsigned long)value;
+		break;
+	case x64_r10:
+		pt_regs->r10 = (unsigned long)value;
+		break;
+	case x64_r11:
+		pt_regs->r11 = (unsigned long)value;
+		break;
+	case x64_r12:
+		pt_regs->r12 = (unsigned long)value;
+		break;
+	case x64_r13:
+		pt_regs->r13 = (unsigned long)value;
+		break;
+	case x64_r14:
+		pt_regs->r14 = (unsigned long)value;
+		break;
+	case x64_r15:
+		pt_regs->r15 = (unsigned long)value;
+		break;
+	case x64_rip:
+		pt_regs->ip = (unsigned long)value;
+		break;
+	default:
+		WARN(1, "%s: Illegal register name %d\n", __func__, name);
+		break;
+	}
+}
+
+#endif /* __i386__ */
+
+static void set_regs(struct pt_regs *pt_regs, struct trampfd_regs *tregs)
+{
+	struct trampfd_reg	*reg = tregs->regs;
+	struct trampfd_reg	*reg_end = reg + tregs->nregs;
+	bool			compat = is_compat();
+
+	for (; reg < reg_end; reg++) {
+		if (compat)
+			set_reg_32(pt_regs, reg->name, reg->value);
+		else
+			set_reg_64(pt_regs, reg->name, reg->value);
+	}
+}
+
+/*
+ * Check if the register names are valid. Check if the user PC has been set.
+ */
+bool trampfd_valid_regs(struct trampfd_regs *tregs)
+{
+	struct trampfd_reg	*reg = tregs->regs;
+	struct trampfd_reg	*reg_end = reg + tregs->nregs;
+	int			min, max, pc_name;
+	bool			pc_set = false;
+
+	if (is_compat()) {
+		min = 0;
+		pc_name = x32_eip;
+		max = x32_max;
+	} else {
+		min = x32_max;
+		pc_name = x64_rip;
+		max = x64_max;
+	}
+
+	for (; reg < reg_end; reg++) {
+		if (reg->name < min || reg->name >= max || reg->reserved)
+			return false;
+		if (reg->name == pc_name && reg->value)
+			pc_set = true;
+	}
+	return pc_set;
+}
+EXPORT_SYMBOL_GPL(trampfd_valid_regs);
+
+/*
+ * Check if the PC specified in a register context is allowed.
+ */
+bool trampfd_allowed_pc(struct trampfd *trampfd, struct trampfd_regs *tregs)
+{
+	struct trampfd_reg	*reg = tregs->regs;
+	struct trampfd_reg	*reg_end = reg + tregs->nregs;
+	struct trampfd_values	*allowed_pcs = trampfd->allowed_pcs;
+	u64			*allowed_values, pc_value = 0;
+	u32			nvalues, pc_name;
+	int			i;
+
+	if (!allowed_pcs)
+		return true;
+
+	pc_name = is_compat() ? x32_eip : x64_rip;
+
+	/*
+	 * Find the PC register and its value. If the PC register has been
+	 * specified multiple times, only the last one counts.
+	 */
+	for (; reg < reg_end; reg++) {
+		if (reg->name == pc_name)
+			pc_value = reg->value;
+	}
+
+	allowed_values = allowed_pcs->values;
+	nvalues = allowed_pcs->nvalues;
+
+	for (i = 0; i < nvalues; i++) {
+		if (pc_value == allowed_values[i])
+			return true;
+	}
+	return false;
+}
+EXPORT_SYMBOL_GPL(trampfd_allowed_pc);
+
+/* ---------------------------- Stack Context ---------------------------- */
+
+static int push_data(struct pt_regs *pt_regs, struct trampfd_stack *tstack)
+{
+	unsigned long	sp;
+
+	sp = user_stack_pointer(pt_regs) - tstack->size - tstack->offset;
+	if (tstack->flags & TRAMPFD_SET_SP) {
+		if (is_compat())
+			sp = ((sp + 4) & -16ul) - 4;
+		else
+			sp = round_down(sp, 16) - 8;
+	}
+
+	if (!access_ok(sp, user_stack_pointer(pt_regs) - sp))
+		return -EFAULT;
+
+	if (copy_to_user(USERPTR(sp), tstack->data, tstack->size))
+		return -EFAULT;
+
+	if (tstack->flags & TRAMPFD_SET_SP)
+		user_stack_pointer_set(pt_regs, sp);
+
+	return 0;
+}
+
+/* ---------------------------- Fault Handlers ---------------------------- */
+
+static int trampfd_user_fault(struct trampfd *trampfd,
+			      struct vm_area_struct *vma,
+			      struct pt_regs *pt_regs)
+{
+	char			buf[TRAMPFD_MAX_STACK_SIZE];
+	struct trampfd_regs	*tregs;
+	struct trampfd_stack	*tstack = NULL;
+	unsigned long		addr;
+	size_t			size;
+	int			rc = 0;
+
+	mutex_lock(&trampfd->lock);
+
+	/*
+	 * Execution of the trampoline must start at the offset specfied by
+	 * the kernel.
+	 */
+	addr = vma->vm_start + trampfd->map.ioffset;
+	if (addr != pt_regs->ip) {
+		rc = -EINVAL;
+		goto unlock;
+	}
+
+	/*
+	 * At a minimum, the user PC register must be specified for a
+	 * user trampoline.
+	 */
+	tregs = trampfd->regs;
+	if (!tregs) {
+		rc = -EINVAL;
+		goto unlock;
+	}
+
+	/*
+	 * Set the register context for the trampoline.
+	 */
+	set_regs(pt_regs, tregs);
+
+	if (trampfd->stack) {
+		/*
+		 * Copy the stack context into a local buffer and push stack
+		 * data after dropping the lock.
+		 */
+		size = sizeof(*trampfd->stack) + trampfd->stack->size;
+		tstack = (struct trampfd_stack *) buf;
+		memcpy(tstack, trampfd->stack, size);
+	}
+unlock:
+	mutex_unlock(&trampfd->lock);
+
+	if (!rc && tstack) {
+		mmap_read_unlock(vma->vm_mm);
+		rc = push_data(pt_regs, tstack);
+		mmap_read_lock(vma->vm_mm);
+	}
+	return rc;
+}
+
+/*
+ * Handle it if it is a trampoline fault.
+ */
+bool trampfd_fault(struct vm_area_struct *vma, struct pt_regs *pt_regs)
+{
+	struct trampfd		*trampfd;
+
+	if (!is_trampfd_vma(vma))
+		return false;
+	trampfd = vma->vm_private_data;
+
+	if (trampfd->type == TRAMPFD_USER)
+		return !trampfd_user_fault(trampfd, vma, pt_regs);
+	return false;
+}
+EXPORT_SYMBOL_GPL(trampfd_fault);
+
+/* ------------------------- Arch Initialization ------------------------- */
+
+int trampfd_check_arch(struct trampfd *trampfd)
+{
+	return 0;
+}
+EXPORT_SYMBOL_GPL(trampfd_check_arch);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 1ead568c0101..a1432ee2a1a2 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -18,6 +18,7 @@
 #include <linux/uaccess.h>		/* faulthandler_disabled()	*/
 #include <linux/efi.h>			/* efi_recover_from_page_fault()*/
 #include <linux/mm_types.h>
+#include <linux/trampfd.h>		/* trampoline invocation */
 
 #include <asm/cpufeature.h>		/* boot_cpu_has, ...		*/
 #include <asm/traps.h>			/* dotraplinkage, ...		*/
@@ -1142,6 +1143,7 @@ void do_user_addr_fault(struct pt_regs *regs,
 	struct mm_struct *mm;
 	vm_fault_t fault, major = 0;
 	unsigned int flags = FAULT_FLAG_DEFAULT;
+	unsigned long tflags = X86_PF_INSTR | X86_PF_USER;
 
 	tsk = current;
 	mm = tsk->mm;
@@ -1275,6 +1277,15 @@ void do_user_addr_fault(struct pt_regs *regs,
 	 */
 good_area:
 	if (unlikely(access_error(hw_error_code, vma))) {
+		/*
+		 * If it is a user execute fault, it could be a trampoline
+		 * invocation.
+		 */
+		if ((hw_error_code & tflags) == tflags &&
+		    trampfd_fault(vma, regs)) {
+			mmap_read_unlock(mm);
+			return;
+		}
 		bad_area_access_error(regs, hw_error_code, address, vma);
 		return;
 	}
-- 
2.17.1


^ permalink raw reply related

* [PATCH v1 4/4] [RFC] arm/trampfd: Provide support for the trampoline file descriptor
From: madvenka @ 2020-07-28 13:10 UTC (permalink / raw)
  To: kernel-hardening, linux-api, linux-arm-kernel, linux-fsdevel,
	linux-integrity, linux-kernel, linux-security-module, oleg, x86,
	madvenka
In-Reply-To: <20200728131050.24443-1-madvenka@linux.microsoft.com>

From: "Madhavan T. Venkataraman" <madvenka@linux.microsoft.com>

Implement 32-bit ARM support for the trampoline file descriptor.

	- Define architecture specific register names
	- Handle the trampoline invocation page fault
	- Setup the user register context on trampoline invocation
	- Setup the user stack context on trampoline invocation

Signed-off-by: Madhavan T. Venkataraman <madvenka@linux.microsoft.com>
---
 arch/arm/include/uapi/asm/ptrace.h |  20 +++
 arch/arm/kernel/Makefile           |   1 +
 arch/arm/kernel/trampfd.c          | 214 +++++++++++++++++++++++++++++
 arch/arm/mm/fault.c                |  12 +-
 arch/arm/tools/syscall.tbl         |   1 +
 5 files changed, 246 insertions(+), 2 deletions(-)
 create mode 100644 arch/arm/kernel/trampfd.c

diff --git a/arch/arm/include/uapi/asm/ptrace.h b/arch/arm/include/uapi/asm/ptrace.h
index e61c65b4018d..47b1c5e2f32c 100644
--- a/arch/arm/include/uapi/asm/ptrace.h
+++ b/arch/arm/include/uapi/asm/ptrace.h
@@ -151,6 +151,26 @@ struct pt_regs {
 #define ARM_r0		uregs[0]
 #define ARM_ORIG_r0	uregs[17]
 
+/*
+ * These register names are to be used by 32-bit applications.
+ */
+enum reg_32_name {
+	arm_r0,
+	arm_r1,
+	arm_r2,
+	arm_r3,
+	arm_r4,
+	arm_r5,
+	arm_r6,
+	arm_r7,
+	arm_r8,
+	arm_r9,
+	arm_r10,
+	arm_ip,
+	arm_pc,
+	arm_max,
+};
+
 /*
  * The size of the user-visible VFP state as seen by PTRACE_GET/SETVFPREGS
  * and core dumps.
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 89e5d864e923..652c54c2f19a 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -105,5 +105,6 @@ obj-$(CONFIG_SMP)		+= psci_smp.o
 endif
 
 obj-$(CONFIG_HAVE_ARM_SMCCC)	+= smccc-call.o
+obj-$(CONFIG_TRAMPFD)		+= trampfd.o
 
 extra-y := $(head-y) vmlinux.lds
diff --git a/arch/arm/kernel/trampfd.c b/arch/arm/kernel/trampfd.c
new file mode 100644
index 000000000000..50fc5706e85b
--- /dev/null
+++ b/arch/arm/kernel/trampfd.c
@@ -0,0 +1,214 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Trampoline File Descriptor - ARM support.
+ *
+ * Author: Madhavan T. Venkataraman (madvenka@linux.microsoft.com)
+ *
+ * Copyright (c) 2020, Microsoft Corporation.
+ */
+
+#include <linux/trampfd.h>
+#include <linux/mm_types.h>
+#include <linux/uaccess.h>
+
+/* ---------------------------- Register Context ---------------------------- */
+
+static void set_reg(long *uregs, u32 name, u64 value)
+{
+	switch (name) {
+	case arm_r0:
+	case arm_r1:
+	case arm_r2:
+	case arm_r3:
+	case arm_r4:
+	case arm_r5:
+	case arm_r6:
+	case arm_r7:
+	case arm_r8:
+	case arm_r9:
+	case arm_r10:
+		uregs[name] = (__u64)value;
+		break;
+	case arm_ip:
+		ARM_ip = (__u64)value;
+		break;
+	case arm_pc:
+		ARM_pc = (__u64)value;
+		break;
+	default:
+		WARN(1, "%s: Illegal register name %d\n", __func__, name);
+		break;
+	}
+}
+
+static void set_regs(long *uregs, struct trampfd_regs *tregs)
+{
+	struct trampfd_reg	*reg = tregs->regs;
+	struct trampfd_reg	*reg_end = reg + tregs->nregs;
+
+	for (; reg < reg_end; reg++)
+		set_reg(uregs, reg->name, reg->value);
+}
+
+/*
+ * Check if the register names are valid. Check if the user PC has been set.
+ */
+bool trampfd_valid_regs(struct trampfd_regs *tregs)
+{
+	struct trampfd_reg	*reg = tregs->regs;
+	struct trampfd_reg	*reg_end = reg + tregs->nregs;
+	bool			pc_set = false;
+
+	for (; reg < reg_end; reg++) {
+		if (reg->name >= arm_max || reg->reserved)
+			return false;
+		if (reg->name == arm_pc && reg->value)
+			pc_set = true;
+	}
+	return pc_set;
+}
+EXPORT_SYMBOL_GPL(trampfd_valid_regs);
+
+/*
+ * Check if the PC specified in a register context is allowed.
+ */
+bool trampfd_allowed_pc(struct trampfd *trampfd, struct trampfd_regs *tregs)
+{
+	struct trampfd_reg	*reg = tregs->regs;
+	struct trampfd_reg	*reg_end = reg + tregs->nregs;
+	struct trampfd_values	*allowed_pcs = trampfd->allowed_pcs;
+	u64			*allowed_values, pc_value = 0;
+	u32			nvalues, pc_name;
+	int			i;
+
+	if (!allowed_pcs)
+		return true;
+
+	pc_name = arm_pc;
+
+	/*
+	 * Find the PC register and its value. If the PC register has been
+	 * specified multiple times, only the last one counts.
+	 */
+	for (; reg < reg_end; reg++) {
+		if (reg->name == pc_name)
+			pc_value = reg->value;
+	}
+
+	allowed_values = allowed_pcs->values;
+	nvalues = allowed_pcs->nvalues;
+
+	for (i = 0; i < nvalues; i++) {
+		if (pc_value == allowed_values[i])
+			return true;
+	}
+	return false;
+}
+EXPORT_SYMBOL_GPL(trampfd_allowed_pc);
+
+/* ---------------------------- Stack Context ---------------------------- */
+
+static int push_data(long *uregs, struct trampfd_stack *tstack)
+{
+	unsigned long	sp;
+
+	sp = ARM_sp - tstack->size - tstack->offset;
+	if (tstack->flags & TRAMPFD_SET_SP)
+		sp &= ~7;
+
+	if (!access_ok(sp, ARM_sp - sp))
+		return -EFAULT;
+
+	if (copy_to_user(USERPTR(sp), tstack->data, tstack->size))
+		return -EFAULT;
+
+	if (tstack->flags & TRAMPFD_SET_SP)
+		ARM_sp = sp;
+	return 0;
+}
+
+/* ---------------------------- Fault Handlers ---------------------------- */
+
+static int trampfd_user_fault(struct trampfd *trampfd,
+			      struct vm_area_struct *vma,
+			      long *uregs)
+{
+	char			buf[TRAMPFD_MAX_STACK_SIZE];
+	struct trampfd_regs	*tregs;
+	struct trampfd_stack	*tstack = NULL;
+	unsigned long		addr;
+	size_t			size;
+	int			rc;
+
+	mutex_lock(&trampfd->lock);
+
+	/*
+	 * Execution of the trampoline must start at the offset specfied by
+	 * the kernel.
+	 */
+	addr = vma->vm_start + trampfd->map.ioffset;
+	if (addr != ARM_pc) {
+		rc = -EINVAL;
+		goto unlock;
+	}
+
+	/*
+	 * At a minimum, the user PC register must be specified for a
+	 * user trampoline.
+	 */
+	tregs = trampfd->regs;
+	if (!tregs) {
+		rc = -EINVAL;
+		goto unlock;
+	}
+
+	/*
+	 * Set the register context for the trampoline.
+	 */
+	set_regs(uregs, tregs);
+
+	if (trampfd->stack) {
+		/*
+		 * Copy the stack context into a local buffer and push stack
+		 * data after dropping the lock.
+		 */
+		size = sizeof(*trampfd->stack) + trampfd->stack->size;
+		tstack = (struct trampfd_stack *) buf;
+		memcpy(tstack, trampfd->stack, size);
+	}
+unlock:
+	mutex_unlock(&trampfd->lock);
+
+	if (!rc && tstack) {
+		mmap_read_unlock(vma->vm_mm);
+		rc = push_data(uregs, tstack);
+		mmap_read_lock(vma->vm_mm);
+	}
+	return rc;
+}
+
+/*
+ * Handle it if it is a trampoline fault.
+ */
+bool trampfd_fault(struct vm_area_struct *vma, struct pt_regs *pt_regs)
+{
+	struct trampfd		*trampfd;
+	unsigned long		*uregs = pt_regs->uregs;
+
+	if (!is_trampfd_vma(vma))
+		return false;
+	trampfd = vma->vm_private_data;
+
+	if (trampfd->type == TRAMPFD_USER)
+		return !trampfd_user_fault(trampfd, vma, uregs);
+	return false;
+}
+EXPORT_SYMBOL_GPL(trampfd_fault);
+
+/* ---------------------------- Miscellaneous ---------------------------- */
+
+int trampfd_check_arch(struct trampfd *trampfd)
+{
+	return 0;
+}
+EXPORT_SYMBOL_GPL(trampfd_check_arch);
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index c6550eddfce1..21a81d19336b 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -17,6 +17,7 @@
 #include <linux/sched/debug.h>
 #include <linux/highmem.h>
 #include <linux/perf_event.h>
+#include <linux/trampfd.h>
 
 #include <asm/system_misc.h>
 #include <asm/system_info.h>
@@ -202,7 +203,8 @@ static inline bool access_error(unsigned int fsr, struct vm_area_struct *vma)
 
 static vm_fault_t __kprobes
 __do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
-		unsigned int flags, struct task_struct *tsk)
+		unsigned int flags, struct task_struct *tsk,
+		struct pt_regs *regs)
 {
 	struct vm_area_struct *vma;
 	vm_fault_t fault;
@@ -220,6 +222,12 @@ __do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
 	 */
 good_area:
 	if (access_error(fsr, vma)) {
+		/*
+		 * If it is an execute fault, it could be a trampoline
+		 * invocation.
+		 */
+		if ((fsr & FSR_LNX_PF) && trampfd_fault(vma, regs))
+			return 0;
 		fault = VM_FAULT_BADACCESS;
 		goto out;
 	}
@@ -290,7 +298,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 #endif
 	}
 
-	fault = __do_page_fault(mm, addr, fsr, flags, tsk);
+	fault = __do_page_fault(mm, addr, fsr, flags, tsk, regs);
 
 	/* If we need to retry but a fatal signal is pending, handle the
 	 * signal first. We do not need to release the mmap_lock because
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index d5cae5ffede0..88cf4c45069a 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -452,3 +452,4 @@
 437	common	openat2				sys_openat2
 438	common	pidfd_getfd			sys_pidfd_getfd
 439	common	faccessat2			sys_faccessat2
+440	common	trampfd_create			sys_trampfd_create
-- 
2.17.1


^ permalink raw reply related

* [PATCH v1 0/4] [RFC] Implement Trampoline File Descriptor
From: madvenka @ 2020-07-28 13:10 UTC (permalink / raw)
  To: kernel-hardening, linux-api, linux-arm-kernel, linux-fsdevel,
	linux-integrity, linux-kernel, linux-security-module, oleg, x86,
	madvenka
In-Reply-To: <aefc85852ea518982e74b233e11e16d2e707bc32>

From: "Madhavan T. Venkataraman" <madvenka@linux.microsoft.com>

Introduction
------------

Trampolines are used in many different user applications. Trampoline
code is often generated at runtime. Trampoline code can also just be a
pre-defined sequence of machine instructions in a data buffer.

Trampoline code is placed either in a data page or in a stack page. In
order to execute a trampoline, the page it resides in needs to be mapped
with execute permissions. Writable pages with execute permissions provide
an attack surface for hackers. Attackers can use this to inject malicious
code, modify existing code or do other harm.

To mitigate this, LSMs such as SELinux may not allow pages to have both
write and execute permissions. This prevents trampolines from executing
and blocks applications that use trampolines. To allow genuine applications
to run, exceptions have to be made for them (by setting execmem, etc).
In this case, the attack surface is just the pages of such applications.

An application that is not allowed to have writable executable pages
may try to load trampoline code into a file and map the file with execute
permissions. In this case, the attack surface is just the buffer that
contains trampoline code. However, a successful exploit may provide the
hacker with means to load his own code in a file, map it and execute it.

LSMs (such as the IPE proposal [1]) may allow only properly signed object
files to be mapped with execute permissions. This will prevent trampoline
files from being mapped. Again, exceptions have to be made for genuine
applications.

We need a way to execute trampolines without making security exceptions
where possible and to reduce the attack surface even further.

Examples of trampolines
-----------------------

libffi (A Portable Foreign Function Interface Library):

libffi allows a user to define functions with an arbitrary list of
arguments and return value through a feature called "Closures".
Closures use trampolines to jump to ABI handlers that handle calling
conventions and call a target function. libffi is used by a lot
of different applications. To name a few:

	- Python
	- Java
	- Javascript
	- Ruby FFI
	- Lisp
	- Objective C

GCC nested functions:

GCC has traditionally used trampolines for implementing nested
functions. The trampoline is placed on the user stack. So, the stack
needs to be executable.

Currently available solution
----------------------------

One solution that has been proposed to allow trampolines to be executed
without making security exceptions is Trampoline Emulation. See:

https://pax.grsecurity.net/docs/emutramp.txt

In this solution, the kernel recognizes certain sequences of instructions
as "well-known" trampolines. When such a trampoline is executed, a page
fault happens because the trampoline page does not have execute permission.
The kernel recognizes the trampoline and emulates it. Basically, the
kernel does the work of the trampoline on behalf of the application.

Here, the attack surface is the buffer that contains the trampoline.
The attack surface is narrower than before. A hacker may still be able to
modify what gets loaded in the registers or modify the target PC to point
to arbitrary locations.

Currently, the emulated trampolines are the ones used in libffi and GCC
nested functions. To my knowledge, only X86 is supported at this time.

As noted in emutramp.txt, this is not a generic solution. For every new
trampoline that needs to be supported, new instruction sequences need to
be recognized by the kernel and emulated. And this has to be done for
every architecture that needs to be supported.

emutramp.txt notes the following:

"... the real solution is not in emulation but by designing a kernel API
for runtime code generation and modifying userland to make use of it."

Trampoline File Descriptor (trampfd)
--------------------------

I am proposing a kernel API using anonymous file descriptors that
can be used to create and execute trampolines with the help of the
kernel. In this solution also, the kernel does the work of the trampoline.
The API is described in patch 1/4 of this patchset. I provide a
summary here:

Trampolines commonly execute the following sequence:

	- Load some values in some registers and/or
	- Push some values on the stack
	- Jump to a target PC

libffi and GCC nested function trampolines fit into this model.

Using the kernel API, applications and libraries can:

	- Create a trampoline object
	- Associate a register context with the trampoline (including
	  a target PC)
	- Associate a stack context with the trampoline
	- Map the trampoline into a process address space
	- Execute the trampoline by executing at the trampoline address

The kernel creates the trampoline mapping without any permissions. When
the trampoline is executed by user code, a page fault happens and the
kernel gets control. The kernel recognizes that this is a trampoline
invocation. It sets up the user registers based on the specified
register context, and/or pushes values on the user stack based on the
specified stack context, and sets the user PC to the requested target
PC. When the kernel returns, execution continues at the target PC.
So, the kernel does the work of the trampoline on behalf of the
application.

In this case, the attack surface is the context buffer. A hacker may
attack an application with a vulnerability and may be able to modify the
context buffer. So, when the register or stack context is set for
a trampoline, the values may have been tampered with. From an attack
surface perspective, this is similar to Trampoline Emulation. But
with trampfd, user code can retrieve a trampoline's context from the
kernel and add defensive checks to see if the context has been
tampered with.

As for the target PC, trampfd implements a measure called the
"Allowed PCs" context (see Advantages) to prevent a hacker from making
the target PC point to arbitrary locations. So, the attack surface is
narrower than Trampoline Emulation.

Advantages of the Trampoline File Descriptor approach
-----------------------------------------------------

- trampfd is customizable. The user can specify any combination of
  allowed register name-value pairs in the register context and the kernel
  will set it up accordingly. This allows different user trampolines to be
  converted to use trampfd.

- trampfd allows a stack context to be set up so that trampolines that
  need to push values on the user stack can do that.

- The initial work is targeted for X86 and ARM. But the implementation
  leverages small portions of existing signal delivery code. Specifically,
  it uses pt_regs for setting up user registers and copy_to_user()
  to push values on the stack. So, this can be very easily ported to other
  architectures.

- trampfd provides a basic framework. In the future, new trampoline types
  can be implemented, new contexts can be defined, and additional rules
  can be implemented for security purposes.

- For instance, trampfd defines an "Allowed PCs" context in this initial
  work. As an example, libffi can create a read-only array of all ABI
  handlers for an architecture at build time. This array can be used to
  set the list of allowed PCs for a trampoline. This will mean that a hacker
  cannot hack the PC part of the register context and make it point to
  arbitrary locations.

- An SELinux setting called "exectramp" can be implemented along the
  lines of "execmem", "execstack" and "execheap" to selectively allow the
  use of trampolines on a per application basis.

- User code can add defensive checks in the code before invoking a
  trampoline to make sure that a hacker has not modified the context data.
  It can do this by getting the trampoline context from the kernel and
  double checking it.

- In the future, if the kernel can be enhanced to use a safe code
  generation component, that code can be placed in the trampoline mapping
  pages. Then, the trampoline invocation does not have to incur a trip
  into the kernel.

- Also, if the kernel can be enhanced to use a safe code generation
  component, other forms of dynamic code such as JIT code can be
  addressed by the trampfd framework.

- Trampolines can be shared across processes which can give rise to
  interesting uses in the future.

- Trampfd can be used for other purposes to extend the kernel's
  functionality.

libffi
------

I have implemented my solution for libffi and provided the changes for
X86 and ARM, 32-bit and 64-bit. Here is the reference patch:

http://linux.microsoft.com/~madvenka/libffi/libffi.txt

If the trampfd patchset gets accepted, I will send the libffi changes
to the maintainers for a review. BTW, I have also successfully executed
the libffi self tests.

Work that is pending
--------------------

- I am working on implementing an SELinux setting called "exectramp"
  similar to "execmem" to allow the use of trampfd on a per application
  basis.

- I have a comprehensive test program to test the kernel API. I am
  working on adding it to selftests.

References
----------

[1] https://microsoft.github.io/ipe/
---
Madhavan T. Venkataraman (4):
  fs/trampfd: Implement the trampoline file descriptor API
  x86/trampfd: Support for the trampoline file descriptor
  arm64/trampfd: Support for the trampoline file descriptor
  arm/trampfd: Support for the trampoline file descriptor

 arch/arm/include/uapi/asm/ptrace.h     |  20 ++
 arch/arm/kernel/Makefile               |   1 +
 arch/arm/kernel/trampfd.c              | 214 +++++++++++++++++
 arch/arm/mm/fault.c                    |  12 +-
 arch/arm/tools/syscall.tbl             |   1 +
 arch/arm64/include/asm/ptrace.h        |   9 +
 arch/arm64/include/asm/unistd.h        |   2 +-
 arch/arm64/include/asm/unistd32.h      |   2 +
 arch/arm64/include/uapi/asm/ptrace.h   |  57 +++++
 arch/arm64/kernel/Makefile             |   2 +
 arch/arm64/kernel/trampfd.c            | 278 ++++++++++++++++++++++
 arch/arm64/mm/fault.c                  |  15 +-
 arch/x86/entry/syscalls/syscall_32.tbl |   1 +
 arch/x86/entry/syscalls/syscall_64.tbl |   1 +
 arch/x86/include/uapi/asm/ptrace.h     |  38 +++
 arch/x86/kernel/Makefile               |   2 +
 arch/x86/kernel/trampfd.c              | 313 +++++++++++++++++++++++++
 arch/x86/mm/fault.c                    |  11 +
 fs/Makefile                            |   1 +
 fs/trampfd/Makefile                    |   6 +
 fs/trampfd/trampfd_data.c              |  43 ++++
 fs/trampfd/trampfd_fops.c              | 131 +++++++++++
 fs/trampfd/trampfd_map.c               |  78 ++++++
 fs/trampfd/trampfd_pcs.c               |  95 ++++++++
 fs/trampfd/trampfd_regs.c              | 137 +++++++++++
 fs/trampfd/trampfd_stack.c             | 131 +++++++++++
 fs/trampfd/trampfd_stubs.c             |  41 ++++
 fs/trampfd/trampfd_syscall.c           |  92 ++++++++
 include/linux/syscalls.h               |   3 +
 include/linux/trampfd.h                |  82 +++++++
 include/uapi/asm-generic/unistd.h      |   4 +-
 include/uapi/linux/trampfd.h           | 171 ++++++++++++++
 init/Kconfig                           |   8 +
 kernel/sys_ni.c                        |   3 +
 34 files changed, 1998 insertions(+), 7 deletions(-)
 create mode 100644 arch/arm/kernel/trampfd.c
 create mode 100644 arch/arm64/kernel/trampfd.c
 create mode 100644 arch/x86/kernel/trampfd.c
 create mode 100644 fs/trampfd/Makefile
 create mode 100644 fs/trampfd/trampfd_data.c
 create mode 100644 fs/trampfd/trampfd_fops.c
 create mode 100644 fs/trampfd/trampfd_map.c
 create mode 100644 fs/trampfd/trampfd_pcs.c
 create mode 100644 fs/trampfd/trampfd_regs.c
 create mode 100644 fs/trampfd/trampfd_stack.c
 create mode 100644 fs/trampfd/trampfd_stubs.c
 create mode 100644 fs/trampfd/trampfd_syscall.c
 create mode 100644 include/linux/trampfd.h
 create mode 100644 include/uapi/linux/trampfd.h

-- 
2.17.1


^ permalink raw reply

* Re: [PATCH V3fix ghak120] audit: initialize context values in case of mandatory events
From: Paul Moore @ 2020-07-28  2:14 UTC (permalink / raw)
  To: Richard Guy Briggs
  Cc: Linux-Audit Mailing List, LKML, Linux Security Module list,
	Eric Paris
In-Reply-To: <e3e9c9da9115fd233c5a7895dbb4a698a365b1b0.1595884394.git.rgb@redhat.com>

On Mon, Jul 27, 2020 at 5:30 PM Richard Guy Briggs <rgb@redhat.com> wrote:
>
> Issue ghak120 enabled syscall records to accompany required records when
> no rules are present to trigger the storage of syscall context.  A
> reported issue showed that the cwd was not always initialized.  That
> issue was already resolved ...

Yes and no.  Yes, it appears to be resolved in v5.8-rc1 and above, but
the problematic commit is in v5.7 and I'm not sure backporting the fix
in v5.8-rcX plus this patch is the right thing to do for a released
kernel.  The lowest risk fix for v5.7 at this point is to do a revert;
regardless of what happens with this patch and v5.8-rcX please post a
revert for the audit/stable-5.7 tree as soon as you can.

> ... but a review of all other records that could
> be triggered at the time of a syscall record revealed other potential
> values that could be missing or misleading.  Initialize them.
>
> The fds array is reset to -1 after the first syscall to indicate it
> isn't valid any more, but was never set to -1 when the context was
> allocated to indicate it wasn't yet valid.
>
> The audit_inode* functions can be called without going through
> getname_flags() or getname_kernel() that sets audit_names and cwd, so
> set the cwd if it has not already been done so due to audit_names being
> valid.
>
> The LSM dump_common_audit_data() LSM_AUDIT_DATA_NET:AF_UNIX case was
> missed with the ghak96 patch, so add that case here.
>
> Please see issue https://github.com/linux-audit/audit-kernel/issues/120
> Please see issue https://github.com/linux-audit/audit-kernel/issues/96
> Passes audit-testsuite.
>
> Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
> ---
>  kernel/auditsc.c     | 3 +++
>  security/lsm_audit.c | 1 +
>  2 files changed, 4 insertions(+)
>
> diff --git a/kernel/auditsc.c b/kernel/auditsc.c
> index 6884b50069d1..2f97618e6a34 100644
> --- a/kernel/auditsc.c
> +++ b/kernel/auditsc.c
> @@ -929,6 +929,7 @@ static inline struct audit_context *audit_alloc_context(enum audit_state state)
>         context->prio = state == AUDIT_RECORD_CONTEXT ? ~0ULL : 0;
>         INIT_LIST_HEAD(&context->killed_trees);
>         INIT_LIST_HEAD(&context->names_list);
> +       context->fds[0] = -1;
>         return context;
>  }
>
> @@ -2076,6 +2077,7 @@ void __audit_inode(struct filename *name, const struct dentry *dentry,
>         }
>         handle_path(dentry);
>         audit_copy_inode(n, dentry, inode, flags & AUDIT_INODE_NOEVAL);
> +       _audit_getcwd(context);
>  }
>
>  void __audit_file(const struct file *file)
> @@ -2194,6 +2196,7 @@ void __audit_inode_child(struct inode *parent,
>                 audit_copy_inode(found_child, dentry, inode, 0);
>         else
>                 found_child->ino = AUDIT_INO_UNSET;
> +       _audit_getcwd(context);
>  }
>  EXPORT_SYMBOL_GPL(__audit_inode_child);
>
> diff --git a/security/lsm_audit.c b/security/lsm_audit.c
> index 53d0d183db8f..e93077612246 100644
> --- a/security/lsm_audit.c
> +++ b/security/lsm_audit.c
> @@ -369,6 +369,7 @@ static void dump_common_audit_data(struct audit_buffer *ab,
>                                         audit_log_untrustedstring(ab, p);
>                                 else
>                                         audit_log_n_hex(ab, p, len);
> +                               audit_getcwd();
>                                 break;
>                         }
>                 }

I understand the "fds[0] = -1" fix in audit_alloc_context()
(ironically, the kzalloc() which is supposed to help with cases like
this, hurts us with this particular field), but I'm still not quite
seeing why we need to sprinkle audit_getcwd() calls everywhere to fix
this bug (this seems more like a feature add than a bigfix).  Yes,
they may fix the problem but it seems like simply adding a
context->pwd test in audit_log_name() similar to what we do in
audit_log_exit() is the correct fix.

We are currently at -rc7 and this really needs to land before v5.8 is
released, presumably this weekend; this means a small and limited bug
fix patch is what is needed.

-- 
paul moore
www.paul-moore.com

^ permalink raw reply

* Re: [RFC PATCH bpf-next] bpf: POC on local_storage charge and uncharge map_ops
From: Martin KaFai Lau @ 2020-07-27 21:43 UTC (permalink / raw)
  To: KP Singh
  Cc: KP Singh, bpf, linux-kernel, linux-security-module,
	Alexei Starovoitov, Daniel Borkmann, Paul Turner, Jann Horn,
	Florent Revest
In-Reply-To: <c3c422c7-b15e-32ee-4156-b9d26896f7a0@chromium.org>

On Mon, Jul 27, 2020 at 10:26:53PM +0200, KP Singh wrote:
> Thanks for this, I was able to update the series with this patch and it works.
> One minor comment though.
> 
> I was wondering how should I send it as a part of the series. I will keep the
> original commit description + mention this thread and add your Co-Developed-by:
> tag and then you can add your Signed-off-by: as well.
Sounds good to me.

Thanks for verifying the idea.  Feel free to make changes or clean up on
this RFC.

> I am not sure of the 
> canonical way here and am open to suggestions :)
> 
> - KP
> 
> On 25.07.20 03:30, Martin KaFai Lau wrote:
> > It is a direct replacement of the patch 3 in discussion [1]
> > and to test out the idea on adding
> > map_local_storage_charge, map_local_storage_uncharge,
> > and map_owner_storage_ptr.
> > 
> > It is only compiler tested to demo the idea.
> > 
> > [1]: https://urldefense.proofpoint.com/v2/url?u=https-3A__patchwork.ozlabs.org_project_netdev_patch_20200723115032.460770-2D4-2Dkpsingh-40chromium.org_&d=DwICaQ&c=5VD0RTtNlTh3ycd41b3MUw&r=VQnoQ7LvghIj0gVEaiQSUw&m=NZVmomh5sMPlIAqLmFQ_MXlMILuq1Z7TQqntbPoZ0ew&s=MLVevCJz2eNWswxXXF3jFYdAV2UG-xJEi0I1PkLL-fw&e= 
> > 
> > Signed-off-by: Martin KaFai Lau <kafai@fb.com>
> > ---
> >  include/linux/bpf.h            |  10 ++
> >  include/net/bpf_sk_storage.h   |  51 +++++++
> >  include/uapi/linux/bpf.h       |   8 +-
> 
> [...]
> 
> > +
> > +static void sk_storage_uncharge(struct bpf_local_storage_map *smap,
> > +				void *owner, u32 size)
> > +{
> > +	struct sock *sk = owner;
> > +
> > +	atomic_sub(size, &sk->sk_omem_alloc);
> > +}
> > +
> > +static struct bpf_local_storage __rcu **
> > +sk_storage_ptr(struct bpf_local_storage_map *smap, void *owner)
> 
> Do we need an smap pointer here? It's not being used and is also not
> used for inode as well.
You are correct.  No, it is not needed.
I threw in there merely because it is a map_ops.  It is unused
and can be removed.

> > +{
> > +	struct sock *sk = owner;
> > +
> > +	return &sk->sk_bpf_storage;
> > +}
> > +

^ permalink raw reply

* [PATCH V3fix ghak120] audit: initialize context values in case of mandatory events
From: Richard Guy Briggs @ 2020-07-27 21:29 UTC (permalink / raw)
  To: Linux-Audit Mailing List, LKML, Linux Security Module list
  Cc: Paul Moore, eparis, Richard Guy Briggs

Issue ghak120 enabled syscall records to accompany required records when
no rules are present to trigger the storage of syscall context.  A
reported issue showed that the cwd was not always initialized.  That
issue was already resolved, but a review of all other records that could
be triggered at the time of a syscall record revealed other potential
values that could be missing or misleading.  Initialize them.

The fds array is reset to -1 after the first syscall to indicate it
isn't valid any more, but was never set to -1 when the context was
allocated to indicate it wasn't yet valid.

The audit_inode* functions can be called without going through
getname_flags() or getname_kernel() that sets audit_names and cwd, so
set the cwd if it has not already been done so due to audit_names being
valid.

The LSM dump_common_audit_data() LSM_AUDIT_DATA_NET:AF_UNIX case was
missed with the ghak96 patch, so add that case here.

Please see issue https://github.com/linux-audit/audit-kernel/issues/120
Please see issue https://github.com/linux-audit/audit-kernel/issues/96
Passes audit-testsuite.

Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
---
 kernel/auditsc.c     | 3 +++
 security/lsm_audit.c | 1 +
 2 files changed, 4 insertions(+)

diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 6884b50069d1..2f97618e6a34 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -929,6 +929,7 @@ static inline struct audit_context *audit_alloc_context(enum audit_state state)
 	context->prio = state == AUDIT_RECORD_CONTEXT ? ~0ULL : 0;
 	INIT_LIST_HEAD(&context->killed_trees);
 	INIT_LIST_HEAD(&context->names_list);
+	context->fds[0] = -1;
 	return context;
 }
 
@@ -2076,6 +2077,7 @@ void __audit_inode(struct filename *name, const struct dentry *dentry,
 	}
 	handle_path(dentry);
 	audit_copy_inode(n, dentry, inode, flags & AUDIT_INODE_NOEVAL);
+	_audit_getcwd(context);
 }
 
 void __audit_file(const struct file *file)
@@ -2194,6 +2196,7 @@ void __audit_inode_child(struct inode *parent,
 		audit_copy_inode(found_child, dentry, inode, 0);
 	else
 		found_child->ino = AUDIT_INO_UNSET;
+	_audit_getcwd(context);
 }
 EXPORT_SYMBOL_GPL(__audit_inode_child);
 
diff --git a/security/lsm_audit.c b/security/lsm_audit.c
index 53d0d183db8f..e93077612246 100644
--- a/security/lsm_audit.c
+++ b/security/lsm_audit.c
@@ -369,6 +369,7 @@ static void dump_common_audit_data(struct audit_buffer *ab,
 					audit_log_untrustedstring(ab, p);
 				else
 					audit_log_n_hex(ab, p, len);
+				audit_getcwd();
 				break;
 			}
 		}
-- 
1.8.3.1


^ permalink raw reply related

* Re: [PATCH v19 02/23] LSM: Create and manage the lsmblob data structure.
From: Casey Schaufler @ 2020-07-27 21:04 UTC (permalink / raw)
  To: Stephen Smalley
  Cc: Casey Schaufler, James Morris, LSM List, SElinux list,
	John Johansen, linux-audit, Stephen Smalley, Casey Schaufler
In-Reply-To: <CAEjxPJ453W-8PmB4WPq2vZHfvvG6yWFmoqnuHtHvz5Y5MYaj1g@mail.gmail.com>

On 7/27/2020 9:12 AM, Stephen Smalley wrote:
> On Fri, Jul 24, 2020 at 4:35 PM Casey Schaufler <casey@schaufler-ca.com> wrote:
>> When more than one security module is exporting data to
>> audit and networking sub-systems a single 32 bit integer
>> is no longer sufficient to represent the data. Add a
>> structure to be used instead.
>>
>> The lsmblob structure is currently an array of
>> u32 "secids". There is an entry for each of the
>> security modules built into the system that would
>> use secids if active. The system assigns the module
>> a "slot" when it registers hooks. If modules are
>> compiled in but not registered there will be unused
>> slots.
>>
>> A new lsm_id structure, which contains the name
>> of the LSM and its slot number, is created. There
>> is an instance for each LSM, which assigns the name
>> and passes it to the infrastructure to set the slot.
>>
>> The audit rules data is expanded to use an array of
>> security module data rather than a single instance.
>> Because IMA uses the audit rule functions it is
>> affected as well.
>>
>> Acked-by: Stephen Smalley <sds@tycho.nsa.gov>
>> Acked-by: Paul Moore <paul@paul-moore.com>
>> Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
> With CONFIG_BPF_LSM=y:

Thanks. I am surprised that this config option isn't
under security. No problem, an easy fix.

>
> security/bpf/hooks.c: In function ‘bpf_lsm_init’:
> security/bpf/hooks.c:18:63: error: passing argument 3 of
> ‘security_add_hooks’ from incompatible pointer type
> [-Werror=incompatible-pointer-types]
>    18 |  security_add_hooks(bpf_lsm_hooks, ARRAY_SIZE(bpf_lsm_hooks), "bpf");
>       |                                                               ^~~~~
>       |                                                               |
>       |                                                               char *
> In file included from security/bpf/hooks.c:6:
> ./include/linux/lsm_hooks.h:1592:26: note: expected ‘struct lsm_id *’
> but argument is of type ‘char *’
>  1592 |           struct lsm_id *lsmid);
>       |           ~~~~~~~~~~~~~~~^~~~~

^ permalink raw reply

* Re: [PATCH] integrity: remove redundant initialization of variable ret
From: Mimi Zohar @ 2020-07-27 20:57 UTC (permalink / raw)
  To: James Morris, Colin King, Mimi Zohar
  Cc: Serge E . Hallyn, linux-security-module, kernel-janitors,
	linux-kernel
In-Reply-To: <alpine.LRH.2.21.2007280405340.18670@namei.org>

On Tue, 2020-07-28 at 04:05 +1000, James Morris wrote:
> On Wed, 1 Jul 2020, Colin King wrote:
> 
> > From: Colin Ian King <colin.king@canonical.com>
> > 
> > The variable ret is being initialized with a value that is never read
> > and it is being updated later with a new value.  The initialization is
> > redundant and can be removed.
> > 
> > Addresses-Coverity: ("Unused value")
> > Signed-off-by: Colin Ian King <colin.king@canonical.com>
> > ---
> >  security/integrity/digsig_asymmetric.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> > 
> > diff --git a/security/integrity/digsig_asymmetric.c b/security/integrity/digsig_asymmetric.c
> > index 4e0d6778277e..cfa4127d0518 100644
> > --- a/security/integrity/digsig_asymmetric.c
> > +++ b/security/integrity/digsig_asymmetric.c
> > @@ -79,7 +79,7 @@ int asymmetric_verify(struct key *keyring, const char *sig,
> >  	struct public_key_signature pks;
> >  	struct signature_v2_hdr *hdr = (struct signature_v2_hdr *)sig;
> >  	struct key *key;
> > -	int ret = -ENOMEM;
> > +	int ret;
> 
> Assuming Mimi will grab this.
> 
> 
> Acked-by: James Morris <jamorris@linux.microsoft.com>

Yes, thank you for the reminder.

Mimi

^ permalink raw reply

* Re: [PATCH v19 13/23] LSM: Specify which LSM to display
From: John Johansen @ 2020-07-27 20:40 UTC (permalink / raw)
  To: James Morris, Casey Schaufler
  Cc: casey.schaufler, linux-security-module, selinux, Audit-ML,
	Kees Cook, Tetsuo Handa, Paul Moore, Stephen Smalley
In-Reply-To: <alpine.LRH.2.21.2007280636040.18670@namei.org>

On 7/27/20 1:36 PM, James Morris wrote:
> On Fri, 24 Jul 2020, Casey Schaufler wrote:
> 
>> Create a new entry "display" in the procfs attr directory for
>> controlling which LSM security information is displayed for a
>> process. A process can only read or write its own display value.
>>
>> The name of an active LSM that supplies hooks for
>> human readable data may be written to "display" to set the
>> value. The name of the LSM currently in use can be read from
>> "display". At this point there can only be one LSM capable
>> of display active. A helper function lsm_task_display() is
>> provided to get the display slot for a task_struct.
>>
>> Setting the "display" requires that all security modules using
>> setprocattr hooks allow the action. Each security module is
>> responsible for defining its policy.
>>
>> AppArmor hook provided by John Johansen <john.johansen@canonical.com>
>> SELinux hook provided by Stephen Smalley <sds@tycho.nsa.gov>
>>
>> Reviewed-by: Kees Cook <keescook@chromium.org>
>> Acked-by: Stephen Smalley <sds@tycho.nsa.gov>
>> Acked-by: Paul Moore <paul@paul-moore.com>
>> Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
> 
> jj: do you have any review/feedback on this?
> 
yeah, I am working my way through it today


^ permalink raw reply

* Re: [PATCH v19 21/23] Audit: Add a new record for multiple object LSM  attributes
From: James Morris @ 2020-07-27 20:40 UTC (permalink / raw)
  To: Casey Schaufler, Paul Moore
  Cc: casey.schaufler, linux-security-module, selinux, Audit-ML,
	Kees Cook, John Johansen, Tetsuo Handa, Stephen Smalley
In-Reply-To: <20200724203226.16374-22-casey@schaufler-ca.com>

On Fri, 24 Jul 2020, Casey Schaufler wrote:

> Create a new audit record type to contain the object information
> when there are multiple security modules that require such data.
> This record is emitted before the other records for the event, but
> is linked with the same timestamp and serial number.
> 
> Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
> Cc: linux-audit@redhat.com

These audit patches will need ack/review from Paul.

-- 
James Morris
<jmorris@namei.org>


^ permalink raw reply

* Re: [PATCH v19 17/23] LSM: security_secid_to_secctx in netlink netfilter
From: James Morris @ 2020-07-27 20:37 UTC (permalink / raw)
  To: Casey Schaufler, Paul Moore
  Cc: casey.schaufler, linux-security-module, selinux, Audit-ML,
	Kees Cook, John Johansen, Tetsuo Handa, Stephen Smalley, netdev
In-Reply-To: <20200724203226.16374-18-casey@schaufler-ca.com>

On Fri, 24 Jul 2020, Casey Schaufler wrote:

> Change netlink netfilter interfaces to use lsmcontext
> pointers, and remove scaffolding.
> 
> Reviewed-by: Kees Cook <keescook@chromium.org>
> Reviewed-by: John Johansen <john.johansen@canonical.com>
> Acked-by: Stephen Smalley <sds@tycho.nsa.gov>
> Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
> cc: netdev@vger.kernel.org

I'd like to see Paul's acks on any networking related changes.

-- 
James Morris
<jmorris@namei.org>


^ permalink raw reply

* Re: [PATCH v19 13/23] LSM: Specify which LSM to display
From: James Morris @ 2020-07-27 20:36 UTC (permalink / raw)
  To: Casey Schaufler, John Johansen
  Cc: casey.schaufler, linux-security-module, selinux, Audit-ML,
	Kees Cook, Tetsuo Handa, Paul Moore, Stephen Smalley
In-Reply-To: <20200724203226.16374-14-casey@schaufler-ca.com>

On Fri, 24 Jul 2020, Casey Schaufler wrote:

> Create a new entry "display" in the procfs attr directory for
> controlling which LSM security information is displayed for a
> process. A process can only read or write its own display value.
> 
> The name of an active LSM that supplies hooks for
> human readable data may be written to "display" to set the
> value. The name of the LSM currently in use can be read from
> "display". At this point there can only be one LSM capable
> of display active. A helper function lsm_task_display() is
> provided to get the display slot for a task_struct.
> 
> Setting the "display" requires that all security modules using
> setprocattr hooks allow the action. Each security module is
> responsible for defining its policy.
> 
> AppArmor hook provided by John Johansen <john.johansen@canonical.com>
> SELinux hook provided by Stephen Smalley <sds@tycho.nsa.gov>
> 
> Reviewed-by: Kees Cook <keescook@chromium.org>
> Acked-by: Stephen Smalley <sds@tycho.nsa.gov>
> Acked-by: Paul Moore <paul@paul-moore.com>
> Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>

jj: do you have any review/feedback on this?

-- 
James Morris
<jmorris@namei.org>


^ permalink raw reply

* Re: [PATCH bpf-next v6 3/7] bpf: Generalize bpf_sk_storage
From: KP Singh @ 2020-07-27 20:28 UTC (permalink / raw)
  To: Martin KaFai Lau
  Cc: linux-kernel, bpf, linux-security-module, Alexei Starovoitov,
	Daniel Borkmann, Paul Turner, Jann Horn, Florent Revest
In-Reply-To: <20200725011329.ymvhmbb2y5yqzy3k@kafai-mbp>



On 25.07.20 03:13, Martin KaFai Lau wrote:
> On Thu, Jul 23, 2020 at 01:50:28PM +0200, KP Singh wrote:
>> From: KP Singh <kpsingh@google.com>
>>
>> Refactor the functionality in bpf_sk_storage.c so that concept of
>> storage linked to kernel objects can be extended to other objects like
>> inode, task_struct etc.
>>
>> Each new local storage will still be a separate map and provide its own
>> set of helpers. This allows for future object specific extensions and
>> still share a lot of the underlying implementation.
>>
> 
> [ ... ]
> 
>> @@ -386,54 +407,28 @@ static int sk_storage_alloc(struct sock *sk,
>>   * Otherwise, it will become a leak (and other memory issues
>>   * during map destruction).
>>   */
>> -static struct bpf_local_storage_data *
>> -bpf_local_storage_update(struct sock *sk, struct bpf_map *map, void *value,
>> +struct bpf_local_storage_data *
>> +bpf_local_storage_update(void *owner, struct bpf_map *map,
>> +			 struct bpf_local_storage *local_storage, void *value,
>>  			 u64 map_flags)
>>  {
>>  	struct bpf_local_storage_data *old_sdata = NULL;
>>  	struct bpf_local_storage_elem *selem;
>> -	struct bpf_local_storage *local_storage;
>>  	struct bpf_local_storage_map *smap;
>>  	int err;
>>  
>> -	/* BPF_EXIST and BPF_NOEXIST cannot be both set */
>> -	if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST) ||
>> -	    /* BPF_F_LOCK can only be used in a value with spin_lock */
>> -	    unlikely((map_flags & BPF_F_LOCK) && !map_value_has_spin_lock(map)))
>> -		return ERR_PTR(-EINVAL);
>> -
>>  	smap = (struct bpf_local_storage_map *)map;
>> -	local_storage = rcu_dereference(sk->sk_bpf_storage);
>> -	if (!local_storage || hlist_empty(&local_storage->list)) {
>> -		/* Very first elem for this object */
>> -		err = check_flags(NULL, map_flags);
> This check_flags here is missing in the later sk_storage_update().
> 
>> -		if (err)
>> -			return ERR_PTR(err);
>> -
>> -		selem = bpf_selem_alloc(smap, sk, value, true);
>> -		if (!selem)
>> -			return ERR_PTR(-ENOMEM);
>> -
>> -		err = sk_storage_alloc(sk, smap, selem);
>> -		if (err) {
>> -			kfree(selem);
>> -			atomic_sub(smap->elem_size, &sk->sk_omem_alloc);
>> -			return ERR_PTR(err);
>> -		}
>> -
>> -		return SDATA(selem);
>> -	}
>>  
>>  	if ((map_flags & BPF_F_LOCK) && !(map_flags & BPF_NOEXIST)) {
>>  		/* Hoping to find an old_sdata to do inline update
>>  		 * such that it can avoid taking the local_storage->lock
>>  		 * and changing the lists.
>>  		 */
>> -		old_sdata =
>> -			bpf_local_storage_lookup(local_storage, smap, false);
>> +		old_sdata = bpf_local_storage_lookup(local_storage, smap, false);
>>  		err = check_flags(old_sdata, map_flags);
>>  		if (err)
>>  			return ERR_PTR(err);
>> +
>>  		if (old_sdata && selem_linked_to_storage(SELEM(old_sdata))) {
>>  			copy_map_value_locked(map, old_sdata->data,
>>  					      value, false);
> 
> [ ... ]
> 
>> +static struct bpf_local_storage_data *
>> +sk_storage_update(void *owner, struct bpf_map *map, void *value, u64 map_flags)
>> +{
>> +	struct bpf_local_storage_data *old_sdata = NULL;
>> +	struct bpf_local_storage_elem *selem;
>> +	struct bpf_local_storage *local_storage;
>> +	struct bpf_local_storage_map *smap;
>> +	struct sock *sk;
>> +	int err;
>> +
>> +	err = bpf_local_storage_check_update_flags(map, map_flags);
>> +	if (err)
>> +		return ERR_PTR(err);
>> +
>> +	sk = owner;
>> +	local_storage = rcu_dereference(sk->sk_bpf_storage);
>> +	smap = (struct bpf_local_storage_map *)map;
>> +
>> +	if (!local_storage || hlist_empty(&local_storage->list)) {
> 
> "check_flags(NULL, map_flags);" is gone in this refactoring.
> 
> This part of code is copied into the inode_storage_update()
> in the latter patch which then has the same issue.
> 
>> +		/* Very first elem */
>> +		selem = map->ops->map_selem_alloc(smap, owner, value, !old_sdata);
>> +		if (!selem)
>> +			return ERR_PTR(-ENOMEM);
> 
>>  static int sk_storage_map_btf_id;
>>  const struct bpf_map_ops sk_storage_map_ops = {
>> -	.map_alloc_check = bpf_sk_storage_map_alloc_check,
>> -	.map_alloc = bpf_local_storage_map_alloc,
>> -	.map_free = bpf_local_storage_map_free,
>> +	.map_alloc_check = bpf_local_storage_map_alloc_check,
>> +	.map_alloc = sk_storage_map_alloc,
>> +	.map_free = sk_storage_map_free,
>>  	.map_get_next_key = notsupp_get_next_key,
>>  	.map_lookup_elem = bpf_fd_sk_storage_lookup_elem,
>>  	.map_update_elem = bpf_fd_sk_storage_update_elem,
>>  	.map_delete_elem = bpf_fd_sk_storage_delete_elem,
>> -	.map_check_btf = bpf_sk_storage_map_check_btf,
>> +	.map_check_btf = bpf_local_storage_map_check_btf,
>>  	.map_btf_name = "bpf_local_storage_map",
>>  	.map_btf_id = &sk_storage_map_btf_id,
>> +	.map_selem_alloc = sk_selem_alloc,
>> +	.map_local_storage_update = sk_storage_update,
>> +	.map_local_storage_unlink = unlink_sk_storage,
> I think refactoring codes as map_selem_alloc, map_local_storage_update,
> and map_local_storage_unlink is not the best option.  The sk and inode
> version of the above map_ops are mostly the same.  Fixing the
> issue like the one mentioned above need to fix both sk, inode, and
> the future kernel-object code.
> 
> The only difference is sk charge omem and inode does not.
> I have played around a little.  I think adding the following three ops (pasted at
> the end) is better and should be enough for both sk and inode.  The inode
> does not even have to implement the (un)charge ops at all.
> 
> That should remove the duplication for the followings:
> - (sk|inode)_selem_alloc
> - (sk|inode)_storage_update
> - unlink_(sk|inode)_storage
> - (sk|inode)_storage_alloc
> 
> Another bonus is the new bpf_local_storage_check_update_flags() and
> bpf_local_storage_publish() will be no longer needed too.

I really like this approach. Thank you so much!

> 
> I have hacked up this patch 3 change to compiler-test out this idea.
> I will post in another email.  Let me know wdy> 
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -33,6 +33,9 @@ struct btf;
>  struct btf_type;
>  struct exception_table_entry;
>  struct seq_operations;
> +struct bpf_local_storage;
> +struct bpf_local_storage_map;
> +struct bpf_local_storage_elem;
>  
>  extern struct idr btf_idr;
>  extern spinlock_t btf_idr_lock;
> @@ -93,6 +96,13 @@ struct bpf_map_ops {
>  	__poll_t (*map_poll)(struct bpf_map *map, struct file *filp,
>  			     struct poll_table_struct *pts);
>  
> +	/* Functions called by bpf_local_storage maps */
> +	int (*map_local_storage_charge)(struct bpf_local_storage_map *smap,
> +					void *owner, u32 size);
> +	void (*map_local_storage_uncharge)(struct bpf_local_storage_map *smap,
> +					   void *owner, u32 size);
> +	struct bpf_local_storage __rcu ** (*map_owner_storage_ptr)(struct bpf_local_storage_map *smap,
> +								   void *owner);
>  	/* BTF name and id of struct allocated by map_alloc */
>  	const char * const map_btf_name;
>  	int *map_btf_id;
> 

^ permalink raw reply

* Re: [RFC PATCH bpf-next] bpf: POC on local_storage charge and uncharge map_ops
From: KP Singh @ 2020-07-27 20:26 UTC (permalink / raw)
  To: Martin KaFai Lau, KP Singh
  Cc: bpf, linux-kernel, linux-security-module, Alexei Starovoitov,
	Daniel Borkmann, Paul Turner, Jann Horn, Florent Revest
In-Reply-To: <20200725013047.4006241-1-kafai@fb.com>

Thanks for this, I was able to update the series with this patch and it works.
One minor comment though.

I was wondering how should I send it as a part of the series. I will keep the
original commit description + mention this thread and add your Co-Developed-by:
tag and then you can add your Signed-off-by: as well. I am not sure of the 
canonical way here and am open to suggestions :)

- KP

On 25.07.20 03:30, Martin KaFai Lau wrote:
> It is a direct replacement of the patch 3 in discussion [1]
> and to test out the idea on adding
> map_local_storage_charge, map_local_storage_uncharge,
> and map_owner_storage_ptr.
> 
> It is only compiler tested to demo the idea.
> 
> [1]: https://patchwork.ozlabs.org/project/netdev/patch/20200723115032.460770-4-kpsingh@chromium.org/
> 
> Signed-off-by: Martin KaFai Lau <kafai@fb.com>
> ---
>  include/linux/bpf.h            |  10 ++
>  include/net/bpf_sk_storage.h   |  51 +++++++
>  include/uapi/linux/bpf.h       |   8 +-

[...]

> +
> +static void sk_storage_uncharge(struct bpf_local_storage_map *smap,
> +				void *owner, u32 size)
> +{
> +	struct sock *sk = owner;
> +
> +	atomic_sub(size, &sk->sk_omem_alloc);
> +}
> +
> +static struct bpf_local_storage __rcu **
> +sk_storage_ptr(struct bpf_local_storage_map *smap, void *owner)

Do we need an smap pointer here? It's not being used and is also not
used for inode as well.

- KP

> +{
> +	struct sock *sk = owner;
> +
> +	return &sk->sk_bpf_storage;
> +}
> +

[...]

> -/* BPF_FUNC_sk_storage_get flags */
> +/* BPF_FUNC_<local>_storage_get flags */
>  enum {
> -	BPF_SK_STORAGE_GET_F_CREATE	= (1ULL << 0),
> +	BPF_LOCAL_STORAGE_GET_F_CREATE	= (1ULL << 0),
> +	/* BPF_SK_STORAGE_GET_F_CREATE is only kept for backward compatibility
> +	 * and BPF_LOCAL_STORAGE_GET_F_CREATE must be used instead.
> +	 */
> +	BPF_SK_STORAGE_GET_F_CREATE  = BPF_LOCAL_STORAGE_GET_F_CREATE,
>  };
>  
>  /* BPF_FUNC_read_branch_records flags. */
> 

^ permalink raw reply

* Re: [PATCH v7 4/7] fs: Introduce O_MAYEXEC flag for openat2(2)
From: Mickaël Salaün @ 2020-07-27 19:46 UTC (permalink / raw)
  To: Florian Weimer, Al Viro
  Cc: linux-kernel, Aleksa Sarai, Alexei Starovoitov, Andrew Morton,
	Andy Lutomirski, Christian Brauner, Christian Heimes,
	Daniel Borkmann, Deven Bowers, Dmitry Vyukov, Eric Biggers,
	Eric Chiang, James Morris, Jan Kara, Jann Horn, Jonathan Corbet,
	Kees Cook, Lakshmi Ramasubramanian, Matthew Garrett,
	Matthew Wilcox, Michael Kerrisk, Mimi Zohar,
	Philippe Trébuchet, Scott Shell, Sean Christopherson,
	Shuah Khan, Steve Dower, Steve Grubb, Tetsuo Handa,
	Thibaut Sautereau, Vincent Strubel, kernel-hardening, linux-api,
	linux-integrity, linux-security-module, linux-fsdevel,
	Thibaut Sautereau
In-Reply-To: <87y2n55xzv.fsf@oldenburg2.str.redhat.com>


On 27/07/2020 07:27, Florian Weimer wrote:
> * Al Viro:
> 
>> On Thu, Jul 23, 2020 at 07:12:24PM +0200, Mickaël Salaün wrote:
>>> When the O_MAYEXEC flag is passed, openat2(2) may be subject to
>>> additional restrictions depending on a security policy managed by the
>>> kernel through a sysctl or implemented by an LSM thanks to the
>>> inode_permission hook.  This new flag is ignored by open(2) and
>>> openat(2) because of their unspecified flags handling.  When used with
>>> openat2(2), the default behavior is only to forbid to open a directory.
>>
>> Correct me if I'm wrong, but it looks like you are introducing a magical
>> flag that would mean "let the Linux S&M take an extra special whip
>> for this open()".

There is nothing magic, it doesn't only work with the LSM framework, and
there is nothing painful nor humiliating here (except maybe this language).

>>
>> Why is it done during open?  If the caller is passing it deliberately,
>> why not have an explicit request to apply given torture device to an
>> already opened file?  Why not sys_masochism(int fd, char *hurt_flavour),
>> for that matter?
> 
> While I do not think this is appropriate language for a workplace, Al
> has a point: If the auditing event can be generated on an already-open
> descriptor, it would also cover scenarios like this one:
> 
>   perl < /path/to/script
> 
> Where the process that opens the file does not (and cannot) know that it
> will be used for execution purposes.

The check is done during open because the goal of this patch series is
to address the problem of script execution when opening a script in well
controlled systems (e.g. to enforce a "write xor execute" policy, to do
an atomic integrity check [1], to check specific execute/read
permissions, etc.). As discussed multiple times, controlling other means
to interpret commands (stdin, environment variables, etc.) is out of
scope and should be handled by interpreters (in userspace). Someone
could still extend fcntl(2) to enable to check file descriptors, but it
is an independent change not required for now.
Specific audit features are also out of scope for now [2].

[1] https://lore.kernel.org/lkml/1544699060.6703.11.camel@linux.ibm.com/
[2] https://lore.kernel.org/lkml/202007160822.CCDB5478@keescook/

^ permalink raw reply

* Re: [PATCH v3 00/19] Introduce partial kernel_read_file() support
From: Scott Branden @ 2020-07-27 19:18 UTC (permalink / raw)
  To: Mimi Zohar, Kees Cook, Greg Kroah-Hartman
  Cc: Luis Chamberlain, Jessica Yu, SeongJae Park, KP Singh, linux-efi,
	linux-security-module, linux-integrity, selinux, linux-kselftest,
	linux-kernel
In-Reply-To: <1595848589.4841.78.camel@kernel.org>

Hi Mimi/Kees,

On 2020-07-27 4:16 a.m., Mimi Zohar wrote:
> On Fri, 2020-07-24 at 14:36 -0700, Kees Cook wrote:
>> v3:
>> - add reviews/acks
>> - add "IMA: Add support for file reads without contents" patch
>> - trim CC list, in case that's why vger ignored v2
>> v2: [missing from lkml archives! (CC list too long?) repeating changes here]
>> - fix issues in firmware test suite
>> - add firmware partial read patches
>> - various bug fixes/cleanups
>> v1: https://lore.kernel.org/lkml/20200717174309.1164575-1-keescook@chromium.org/
>>
>> Hi,
>>
>> Here's my tree for adding partial read support in kernel_read_file(),
>> which fixes a number of issues along the way. It's got Scott's firmware
>> and IMA patches ported and everything tests cleanly for me (even with
>> CONFIG_IMA_APPRAISE=y).
> Thanks, Kees.  Other than my comments on the new
> security_kernel_post_load_data() hook, the patch set is really nice.
>
> In addition to compiling with CONFIG_IMA_APPRAISE enabled, have you
> booted the kernel with the ima_policy=tcb?  The tcb policy will add
> measurements to the IMA measurement list and extend the TPM with the
> file or buffer data digest.  Are you seeing the firmware measurements,
> in particular the partial read measurement?
I booted the kernel with ima_policy=tcb.

Unfortunately after enabling the following, fw_run_tests.sh does not run.

mkdir /sys/kernel/security
mount -t securityfs securityfs /sys/kernel/security
echo "measure func=FIRMWARE_CHECK" > /sys/kernel/security/ima/policy
echo "appraise func=FIRMWARE_CHECK appraise_type=imasig" > /sys/kernel/security/ima/policy
./fw_run_tests.sh

[ 1296.258052] test_firmware: loading 'test-firmware.bin'
[ 1296.263903] misc test_firmware: loading /lib/firmware/test-firmware.bin failed with error -13
[ 1296.263905] audit: type=1800 audit(1595905754.266:9): pid=5696 uid=0 auid=4294967295 ses=4294967295 subj=kernel op=appraise_data cause=IMA-signature-required comm="fw_namespace" name="/lib/firmware/test-firmware.bin" dev="tmpfs" ino=4592 res=0
[ 1296.297085] misc test_firmware: Direct firmware load for test-firmware.bin failed with error -13
[ 1296.305947] test_firmware: load of 'test-firmware.bin' failed: -13

>
> thanks,
>
> Mimi


^ permalink raw reply

* Re: [PATCH v19 20/23] Audit: Add new record for multiple process LSM attributes
From: Stephen Smalley @ 2020-07-27 19:04 UTC (permalink / raw)
  To: Casey Schaufler, casey.schaufler, jmorris, linux-security-module,
	selinux
  Cc: john.johansen, linux-audit, sds
In-Reply-To: <20200724203226.16374-21-casey@schaufler-ca.com>

On 7/24/20 4:32 PM, Casey Schaufler wrote:

> Create a new audit record type to contain the subject information
> when there are multiple security modules that require such data.
> This record is linked with the same timestamp and serial number.
> The record is produced only in cases where there is more than one
> security module with a process "context".
>
> Before this change the only audit events that required multiple
> records were syscall events. Several non-syscall events include
> subject contexts, so the use of audit_context data has been expanded
> as necessary.
>
> Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>
> Cc: linux-audit@redhat.com
> ---
> diff --git a/include/net/xfrm.h b/include/net/xfrm.h
> index c7d213c9f9d8..930432c3912e 100644
> --- a/include/net/xfrm.h
> +++ b/include/net/xfrm.h
> @@ -672,11 +672,13 @@ static inline struct audit_buffer *xfrm_audit_start(const char *op)
>   
>   	if (audit_enabled == AUDIT_OFF)
>   		return NULL;
> +	audit_stamp_context(audit_context());
>   	audit_buf = audit_log_start(audit_context(), GFP_ATOMIC,
>   				    AUDIT_MAC_IPSEC_EVENT);
>   	if (audit_buf == NULL)
>   		return NULL;
>   	audit_log_format(audit_buf, "op=%s", op);
> +	audit_log_lsm(NULL, false);

Notice that the audit_log_start() call above specified GFP_ATOMIC. But 
your audit_log_lsm() uses GFP_KERNEL. You'll either need to always use 
GFP_ATOMIC in audit_log_lsm() or pass in the gfp flags there.  Make sure 
you test with CONFIG_DEBUG_ATOMIC_SLEEP=y and check your dmesg output.



^ permalink raw reply

* Re: [PATCH 1/2] LSM: Signal to SafeSetID when in set*gid syscall
From: James Morris @ 2020-07-27 18:44 UTC (permalink / raw)
  To: Micah Morton
  Cc: linux-security-module, keescook, casey, paul,
	stephen.smalley.work, serge, Thomas Cedeno
In-Reply-To: <20200720181156.1461461-1-mortonm@chromium.org>

On Mon, 20 Jul 2020, Micah Morton wrote:

> From: Thomas Cedeno <thomascedeno@google.com>
> 
> For SafeSetID to properly gate set*gid() calls, it needs to know whether
> ns_capable() is being called from within a sys_set*gid() function or is
> being called from elsewhere in the kernel. This allows SafeSetID to deny
> CAP_SETGID to restricted groups when they are attempting to use the
> capability for code paths other than updating GIDs (e.g. setting up
> userns GID mappings). This is the identical approach to what is
> currently done for CAP_SETUID.
> 
> Signed-off-by: Thomas Cedeno <thomascedeno@google.com>
> Signed-off-by: Micah Morton <mortonm@chromium.org>


Acked-by: James Morris <jamorris@linux.microsoft.com>


-- 
James Morris
<jmorris@namei.org>


^ permalink raw reply

* Re: [PATCH] LSM: drop duplicated words in header file comments
From: James Morris @ 2020-07-27 18:39 UTC (permalink / raw)
  To: Randy Dunlap; +Cc: LKML, Serge E. Hallyn, linux-security-module
In-Reply-To: <9299abf4-75e3-6d73-a8b8-c2617208a990@infradead.org>

On Fri, 17 Jul 2020, Randy Dunlap wrote:

> From: Randy Dunlap <rdunlap@infradead.org>
> 
> Drop the doubled words "the" and "and" in comments.
> 
> Signed-off-by: Randy Dunlap <rdunlap@infradead.org>

Applied to
git://git.kernel.org/pub/scm/linux/kernel/git/jmorris/linux-security.git next-general


-- 
James Morris
<jmorris@namei.org>


^ permalink raw reply

* Re: [PATCH] integrity: remove redundant initialization of variable ret
From: James Morris @ 2020-07-27 18:05 UTC (permalink / raw)
  To: Colin King, Mimi Zohar
  Cc: Serge E . Hallyn, linux-security-module, kernel-janitors,
	linux-kernel
In-Reply-To: <20200701134634.549399-1-colin.king@canonical.com>

On Wed, 1 Jul 2020, Colin King wrote:

> From: Colin Ian King <colin.king@canonical.com>
> 
> The variable ret is being initialized with a value that is never read
> and it is being updated later with a new value.  The initialization is
> redundant and can be removed.
> 
> Addresses-Coverity: ("Unused value")
> Signed-off-by: Colin Ian King <colin.king@canonical.com>
> ---
>  security/integrity/digsig_asymmetric.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/security/integrity/digsig_asymmetric.c b/security/integrity/digsig_asymmetric.c
> index 4e0d6778277e..cfa4127d0518 100644
> --- a/security/integrity/digsig_asymmetric.c
> +++ b/security/integrity/digsig_asymmetric.c
> @@ -79,7 +79,7 @@ int asymmetric_verify(struct key *keyring, const char *sig,
>  	struct public_key_signature pks;
>  	struct signature_v2_hdr *hdr = (struct signature_v2_hdr *)sig;
>  	struct key *key;
> -	int ret = -ENOMEM;
> +	int ret;

Assuming Mimi will grab this.


Acked-by: James Morris <jamorris@linux.microsoft.com>

-- 
James Morris
<jmorris@namei.org>


^ permalink raw reply

* Re: [PATCH] smack: fix slab-out-of-bounds by checking for overflow
From: Casey Schaufler @ 2020-07-27 17:38 UTC (permalink / raw)
  To: B K Karthik, James Morris, Serge E. Hallyn, linux-security-module,
	linux-kernel, gregkh, skhan, syzkaller-bugs, Casey Schaufler
In-Reply-To: <20200725125800.vahdiagxilinzoqw@pesu.pes.edu>

On 7/25/2020 5:58 AM, B K Karthik wrote:
> add a barrier to smk_set_cipso() to check for overflow

Thank you for your patch. Dan Carpenter <dan.carpenter@oracle.com>
has already submitted an identical patch.

>
> ==================================================================
> BUG: KASAN: slab-out-of-bounds in vsscanf+0x2666/0x2ef0 lib/vsprintf.c:3321
> Read of size 1 at addr ffff888097d682b8 by task syz-executor980/6804
>
> CPU: 0 PID: 6804 Comm: syz-executor980 Not tainted 5.8.0-rc5-syzkaller #0
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
> Call Trace:
>  __dump_stack lib/dump_stack.c:77 [inline]
>  dump_stack+0x1f0/0x31e lib/dump_stack.c:118
>  print_address_description+0x66/0x5a0 mm/kasan/report.c:383
>  __kasan_report mm/kasan/report.c:513 [inline]
>  kasan_report+0x132/0x1d0 mm/kasan/report.c:530
>  vsscanf+0x2666/0x2ef0 lib/vsprintf.c:3321
>  sscanf+0x6c/0x90 lib/vsprintf.c:3527
>  smk_set_cipso+0x374/0x6c0 security/smack/smackfs.c:908
>  vfs_write+0x2dd/0xc70 fs/read_write.c:576
>  ksys_write+0x11b/0x220 fs/read_write.c:631
>  do_syscall_64+0x73/0xe0 arch/x86/entry/common.c:384
>  entry_SYSCALL_64_after_hwframe+0x44/0xa9
> RIP: 0033:0x4402d9
> Code: Bad RIP value.
> RSP: 002b:00007ffe89010db8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
> RAX: ffffffffffffffda RBX: 00000000004002c8 RCX: 00000000004402d9
> RDX: 0000000000000037 RSI: 0000000020000040 RDI: 0000000000000003
> RBP: 00000000006ca018 R08: 0000000000000014 R09: 00000000004002c8
> R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000401ae0
> R13: 0000000000401b70 R14: 0000000000000000 R15: 0000000000000000
>
> Allocated by task 6804:
>  save_stack mm/kasan/common.c:48 [inline]
>  set_track mm/kasan/common.c:56 [inline]
>  __kasan_kmalloc+0x103/0x140 mm/kasan/common.c:494
>  __do_kmalloc mm/slab.c:3656 [inline]
>  __kmalloc_track_caller+0x249/0x320 mm/slab.c:3671
>  memdup_user_nul+0x26/0xf0 mm/util.c:259
>  smk_set_cipso+0xff/0x6c0 security/smack/smackfs.c:859
>  vfs_write+0x2dd/0xc70 fs/read_write.c:576
>  ksys_write+0x11b/0x220 fs/read_write.c:631
>  do_syscall_64+0x73/0xe0 arch/x86/entry/common.c:384
>  entry_SYSCALL_64_after_hwframe+0x44/0xa9
>
> Freed by task 4906:
>  save_stack mm/kasan/common.c:48 [inline]
>  set_track mm/kasan/common.c:56 [inline]
>  kasan_set_free_info mm/kasan/common.c:316 [inline]
>  __kasan_slab_free+0x114/0x170 mm/kasan/common.c:455
>  __cache_free mm/slab.c:3426 [inline]
>  kfree+0x10a/0x220 mm/slab.c:3757
>  tomoyo_path_number_perm+0x525/0x690 security/tomoyo/file.c:736
>  tomoyo_path_mknod+0x128/0x150 security/tomoyo/tomoyo.c:240
>  security_path_mknod+0xdc/0x160 security/security.c:1077
>  may_o_create fs/namei.c:2919 [inline]
>  lookup_open fs/namei.c:3060 [inline]
>  open_last_lookups fs/namei.c:3169 [inline]
>  path_openat+0xbe8/0x37f0 fs/namei.c:3357
>  do_filp_open+0x191/0x3a0 fs/namei.c:3387
>  do_sys_openat2+0x463/0x770 fs/open.c:1179
>  do_sys_open fs/open.c:1195 [inline]
>  ksys_open include/linux/syscalls.h:1388 [inline]
>  __do_sys_open fs/open.c:1201 [inline]
>  __se_sys_open fs/open.c:1199 [inline]
>  __x64_sys_open+0x1af/0x1e0 fs/open.c:1199
>  do_syscall_64+0x73/0xe0 arch/x86/entry/common.c:384
>  entry_SYSCALL_64_after_hwframe+0x44/0xa9
>
> The buggy address belongs to the object at ffff888097d68280
>  which belongs to the cache kmalloc-64 of size 64
> The buggy address is located 56 bytes inside of
>  64-byte region [ffff888097d68280, ffff888097d682c0)
> The buggy address belongs to the page:
> page:ffffea00025f5a00 refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff888097d68c80
> flags: 0xfffe0000000200(slab)
> raw: 00fffe0000000200 ffffea000288fe08 ffffea00026f38c8 ffff8880aa400380
> raw: ffff888097d68c80 ffff888097d68000 000000010000001e 0000000000000000
> page dumped because: kasan: bad access detected
>
> Memory state around the buggy address:
>  ffff888097d68180: 00 00 00 00 01 fc fc fc fc fc fc fc fc fc fc fc
>  ffff888097d68200: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc
>> ffff888097d68280: 00 00 00 00 00 00 00 fc fc fc fc fc fc fc fc fc
>                                         ^
>  ffff888097d68300: 00 00 00 00 01 fc fc fc fc fc fc fc fc fc fc fc
>  ffff888097d68380: 00 00 00 00 01 fc fc fc fc fc fc fc fc fc fc fc
> ==================================================================
>
> Reported-and-testedby: syzbot+a22c6092d003d6fe1122@syzkaller.appspotmail.com
> Signed-off-by: B K Karthik <bkkarthik@pesu.pes.edu>
> ---
>  security/smack/smackfs.c | 5 +++++
>  1 file changed, 5 insertions(+)
>
> diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c
> index 58d3f43cc8bb..17809310d046 100644
> --- a/security/smack/smackfs.c
> +++ b/security/smack/smackfs.c
> @@ -905,6 +905,11 @@ static ssize_t smk_set_cipso(struct file *file, const char __user *buf,
>  
>  	for (i = 0; i < catlen; i++) {
>  		rule += SMK_DIGITLEN;
> +		if (rule > data + count) {
> +			rc = -EOVERFLOW;
> +			goto out;
> +		}
> +
>  		ret = sscanf(rule, "%u", &cat);
>  		if (ret != 1 || cat > SMACK_CIPSO_MAXCATNUM)
>  			goto out;

^ permalink raw reply

* Re: [PATCH v3 10/19] fs/kernel_read_file: Add file_size output argument
From: Mimi Zohar @ 2020-07-27 16:29 UTC (permalink / raw)
  To: Kees Cook, Greg Kroah-Hartman
  Cc: Scott Branden, Luis Chamberlain, Jessica Yu, SeongJae Park,
	KP Singh, linux-efi, linux-security-module, linux-integrity,
	selinux, linux-kselftest, linux-kernel
In-Reply-To: <20200724213640.389191-11-keescook@chromium.org>

On Fri, 2020-07-24 at 14:36 -0700, Kees Cook wrote:
> In preparation for adding partial read support, add an optional output
> argument to kernel_read_file*() that reports the file size so callers
> can reason more easily about their reading progress.
> 
> Acked-by: Scott Branden <scott.branden@broadcom.com>
> Signed-off-by: Kees Cook <keescook@chromium.org>

Reviewed-by: Mimi Zohar <zohar@linux.ibm.com>

^ permalink raw reply

* Re: [PATCH v3 09/19] fs/kernel_read_file: Switch buffer size arg to size_t
From: Mimi Zohar @ 2020-07-27 16:29 UTC (permalink / raw)
  To: Kees Cook, Greg Kroah-Hartman
  Cc: Scott Branden, Luis Chamberlain, Jessica Yu, SeongJae Park,
	KP Singh, linux-efi, linux-security-module, linux-integrity,
	selinux, linux-kselftest, linux-kernel
In-Reply-To: <20200724213640.389191-10-keescook@chromium.org>

On Fri, 2020-07-24 at 14:36 -0700, Kees Cook wrote:
> In preparation for further refactoring of kernel_read_file*(), rename
> the "max_size" argument to the more accurate "buf_size", and correct
> its type to size_t. Add kerndoc to explain the specifics of how the
> arguments will be used. Note that with buf_size now size_t, it can no
> longer be negative (and was never called with a negative value). Adjust
> callers to use it as a "maximum size" when *buf is NULL.
> 
> Acked-by: Scott Branden <scott.branden@broadcom.com>
> Signed-off-by: Kees Cook <keescook@chromium.org>

Reviewed-by: Mimi Zohar <zohar@linux.ibm.com>

^ permalink raw reply

* Re: [PATCH v3 08/19] fs/kernel_read_file: Remove redundant size argument
From: Mimi Zohar @ 2020-07-27 16:29 UTC (permalink / raw)
  To: Kees Cook, Greg Kroah-Hartman
  Cc: Scott Branden, Luis Chamberlain, Jessica Yu, SeongJae Park,
	KP Singh, linux-efi, linux-security-module, linux-integrity,
	selinux, linux-kselftest, linux-kernel
In-Reply-To: <20200724213640.389191-9-keescook@chromium.org>

On Fri, 2020-07-24 at 14:36 -0700, Kees Cook wrote:
> In preparation for refactoring kernel_read_file*(), remove the redundant
> "size" argument which is not needed: it can be included in the return
> code, with callers adjusted. (VFS reads already cannot be larger than
> INT_MAX.)
> 
> Acked-by: Scott Branden <scott.branden@broadcom.com>
> Signed-off-by: Kees Cook <keescook@chromium.org>

Reviewed-by: Mimi Zohar <zohar@linux.ibm.com>

^ permalink raw reply

* Re: [PATCH v19 02/23] LSM: Create and manage the lsmblob data structure.
From: Stephen Smalley @ 2020-07-27 16:12 UTC (permalink / raw)
  To: Casey Schaufler
  Cc: Casey Schaufler, James Morris, LSM List, SElinux list,
	John Johansen, linux-audit, Stephen Smalley
In-Reply-To: <20200724203226.16374-3-casey@schaufler-ca.com>

On Fri, Jul 24, 2020 at 4:35 PM Casey Schaufler <casey@schaufler-ca.com> wrote:
>
> When more than one security module is exporting data to
> audit and networking sub-systems a single 32 bit integer
> is no longer sufficient to represent the data. Add a
> structure to be used instead.
>
> The lsmblob structure is currently an array of
> u32 "secids". There is an entry for each of the
> security modules built into the system that would
> use secids if active. The system assigns the module
> a "slot" when it registers hooks. If modules are
> compiled in but not registered there will be unused
> slots.
>
> A new lsm_id structure, which contains the name
> of the LSM and its slot number, is created. There
> is an instance for each LSM, which assigns the name
> and passes it to the infrastructure to set the slot.
>
> The audit rules data is expanded to use an array of
> security module data rather than a single instance.
> Because IMA uses the audit rule functions it is
> affected as well.
>
> Acked-by: Stephen Smalley <sds@tycho.nsa.gov>
> Acked-by: Paul Moore <paul@paul-moore.com>
> Signed-off-by: Casey Schaufler <casey@schaufler-ca.com>

With CONFIG_BPF_LSM=y:

security/bpf/hooks.c: In function ‘bpf_lsm_init’:
security/bpf/hooks.c:18:63: error: passing argument 3 of
‘security_add_hooks’ from incompatible pointer type
[-Werror=incompatible-pointer-types]
   18 |  security_add_hooks(bpf_lsm_hooks, ARRAY_SIZE(bpf_lsm_hooks), "bpf");
      |                                                               ^~~~~
      |                                                               |
      |                                                               char *
In file included from security/bpf/hooks.c:6:
./include/linux/lsm_hooks.h:1592:26: note: expected ‘struct lsm_id *’
but argument is of type ‘char *’
 1592 |           struct lsm_id *lsmid);
      |           ~~~~~~~~~~~~~~~^~~~~

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox