LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH stable 4.16 10/14] powerpc/64s: Wire up cpu_show_spectre_v1()
From: Michael Ellerman @ 2018-05-22 14:41 UTC (permalink / raw)
  To: greg; +Cc: stable, tglx, linuxppc-dev
In-Reply-To: <20180522144125.10345-1-mpe@ellerman.id.au>

commit 56986016cb8cd9050e601831fe89f332b4e3c46e upstream.

Add a definition for cpu_show_spectre_v1() to override the generic
version. Currently this just prints "Not affected" or "Vulnerable"
based on the firmware flag.

Although the kernel does have array_index_nospec() in a few places, we
haven't yet audited all the powerpc code to see where it's necessary,
so for now we don't list that as a mitigation.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/security.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index 865db6f8bcca..0eace3cac818 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -50,3 +50,11 @@ ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, cha
 
 	return sprintf(buf, "Vulnerable\n");
 }
+
+ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	if (!security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR))
+		return sprintf(buf, "Not affected\n");
+
+	return sprintf(buf, "Vulnerable\n");
+}
-- 
2.14.1

^ permalink raw reply related

* [PATCH stable 4.16 11/14] powerpc/64s: Wire up cpu_show_spectre_v2()
From: Michael Ellerman @ 2018-05-22 14:41 UTC (permalink / raw)
  To: greg; +Cc: stable, tglx, linuxppc-dev
In-Reply-To: <20180522144125.10345-1-mpe@ellerman.id.au>

commit d6fbe1c55c55c6937cbea3531af7da84ab7473c3 upstream.

Add a definition for cpu_show_spectre_v2() to override the generic
version. This has several permuations, though in practice some may not
occur we cater for any combination.

The most verbose is:

  Mitigation: Indirect branch serialisation (kernel only), Indirect
  branch cache disabled, ori31 speculation barrier enabled

We don't treat the ori31 speculation barrier as a mitigation on its
own, because it has to be *used* by code in order to be a mitigation
and we don't know if userspace is doing that. So if that's all we see
we say:

  Vulnerable, ori31 speculation barrier enabled

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/security.c | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index 0eace3cac818..2cee3dcd231b 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -58,3 +58,36 @@ ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, c
 
 	return sprintf(buf, "Vulnerable\n");
 }
+
+ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	bool bcs, ccd, ori;
+	struct seq_buf s;
+
+	seq_buf_init(&s, buf, PAGE_SIZE - 1);
+
+	bcs = security_ftr_enabled(SEC_FTR_BCCTRL_SERIALISED);
+	ccd = security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED);
+	ori = security_ftr_enabled(SEC_FTR_SPEC_BAR_ORI31);
+
+	if (bcs || ccd) {
+		seq_buf_printf(&s, "Mitigation: ");
+
+		if (bcs)
+			seq_buf_printf(&s, "Indirect branch serialisation (kernel only)");
+
+		if (bcs && ccd)
+			seq_buf_printf(&s, ", ");
+
+		if (ccd)
+			seq_buf_printf(&s, "Indirect branch cache disabled");
+	} else
+		seq_buf_printf(&s, "Vulnerable");
+
+	if (ori)
+		seq_buf_printf(&s, ", ori31 speculation barrier enabled");
+
+	seq_buf_printf(&s, "\n");
+
+	return s.len;
+}
-- 
2.14.1

^ permalink raw reply related

* [PATCH stable 4.16 12/14] powerpc/pseries: Fix clearing of security feature flags
From: Michael Ellerman @ 2018-05-22 14:41 UTC (permalink / raw)
  To: greg; +Cc: stable, tglx, linuxppc-dev
In-Reply-To: <20180522144125.10345-1-mpe@ellerman.id.au>

From: Mauricio Faria de Oliveira <mauricfo@linux.vnet.ibm.com>

commit 0f9bdfe3c77091e8704d2e510eb7c2c2c6cde524 upstream.

The H_CPU_BEHAV_* flags should be checked for in the 'behaviour' field
of 'struct h_cpu_char_result' -- 'character' is for H_CPU_CHAR_*
flags.

Found by playing around with QEMU's implementation of the hypercall:

  H_CPU_CHAR=0xf000000000000000
  H_CPU_BEHAV=0x0000000000000000

  This clears H_CPU_BEHAV_FAVOUR_SECURITY and H_CPU_BEHAV_L1D_FLUSH_PR
  so pseries_setup_rfi_flush() disables 'rfi_flush'; and it also
  clears H_CPU_CHAR_L1D_THREAD_PRIV flag. So there is no RFI flush
  mitigation at all for cpu_show_meltdown() to report; but currently
  it does:

  Original kernel:

    # cat /sys/devices/system/cpu/vulnerabilities/meltdown
    Mitigation: RFI Flush

  Patched kernel:

    # cat /sys/devices/system/cpu/vulnerabilities/meltdown
    Not affected

  H_CPU_CHAR=0x0000000000000000
  H_CPU_BEHAV=0xf000000000000000

  This sets H_CPU_BEHAV_BNDS_CHK_SPEC_BAR so cpu_show_spectre_v1() should
  report vulnerable; but currently it doesn't:

  Original kernel:

    # cat /sys/devices/system/cpu/vulnerabilities/spectre_v1
    Not affected

  Patched kernel:

    # cat /sys/devices/system/cpu/vulnerabilities/spectre_v1
    Vulnerable

Brown-paper-bag-by: Michael Ellerman <mpe@ellerman.id.au>
Fixes: f636c14790ea ("powerpc/pseries: Set or clear security feature flags")
Signed-off-by: Mauricio Faria de Oliveira <mauricfo@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/pseries/setup.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 905e54878fed..6b2152839c48 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -484,13 +484,13 @@ static void init_cpu_char_feature_flags(struct h_cpu_char_result *result)
 	 * The features below are enabled by default, so we instead look to see
 	 * if firmware has *disabled* them, and clear them if so.
 	 */
-	if (!(result->character & H_CPU_BEHAV_FAVOUR_SECURITY))
+	if (!(result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY))
 		security_ftr_clear(SEC_FTR_FAVOUR_SECURITY);
 
-	if (!(result->character & H_CPU_BEHAV_L1D_FLUSH_PR))
+	if (!(result->behaviour & H_CPU_BEHAV_L1D_FLUSH_PR))
 		security_ftr_clear(SEC_FTR_L1D_FLUSH_PR);
 
-	if (!(result->character & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR))
+	if (!(result->behaviour & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR))
 		security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR);
 }
 
-- 
2.14.1

^ permalink raw reply related

* [PATCH stable 4.16 13/14] powerpc: Move default security feature flags
From: Michael Ellerman @ 2018-05-22 14:41 UTC (permalink / raw)
  To: greg; +Cc: stable, tglx, linuxppc-dev
In-Reply-To: <20180522144125.10345-1-mpe@ellerman.id.au>

From: Mauricio Faria de Oliveira <mauricfo@linux.vnet.ibm.com>

commit e7347a86830f38dc3e40c8f7e28c04412b12a2e7 upstream.

This moves the definition of the default security feature flags
(i.e., enabled by default) closer to the security feature flags.

This can be used to restore current flags to the default flags.

Signed-off-by: Mauricio Faria de Oliveira <mauricfo@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/security_features.h | 8 ++++++++
 arch/powerpc/kernel/security.c               | 7 +------
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h
index 400a9050e035..fa4d2e1cf772 100644
--- a/arch/powerpc/include/asm/security_features.h
+++ b/arch/powerpc/include/asm/security_features.h
@@ -63,4 +63,12 @@ static inline bool security_ftr_enabled(unsigned long feature)
 // Firmware configuration indicates user favours security over performance
 #define SEC_FTR_FAVOUR_SECURITY		0x0000000000000200ull
 
+
+// Features enabled by default
+#define SEC_FTR_DEFAULT \
+	(SEC_FTR_L1D_FLUSH_HV | \
+	 SEC_FTR_L1D_FLUSH_PR | \
+	 SEC_FTR_BNDS_CHK_SPEC_BAR | \
+	 SEC_FTR_FAVOUR_SECURITY)
+
 #endif /* _ASM_POWERPC_SECURITY_FEATURES_H */
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index 2cee3dcd231b..bab5a27ea805 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -11,12 +11,7 @@
 #include <asm/security_features.h>
 
 
-unsigned long powerpc_security_features __read_mostly = \
-	SEC_FTR_L1D_FLUSH_HV | \
-	SEC_FTR_L1D_FLUSH_PR | \
-	SEC_FTR_BNDS_CHK_SPEC_BAR | \
-	SEC_FTR_FAVOUR_SECURITY;
-
+unsigned long powerpc_security_features __read_mostly = SEC_FTR_DEFAULT;
 
 ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
 {
-- 
2.14.1

^ permalink raw reply related

* [PATCH stable 4.16 14/14] powerpc/64s: Add support for a store forwarding barrier at kernel entry/exit
From: Michael Ellerman @ 2018-05-22 14:41 UTC (permalink / raw)
  To: greg; +Cc: stable, tglx, linuxppc-dev
In-Reply-To: <20180522144125.10345-1-mpe@ellerman.id.au>

From: Nicholas Piggin <npiggin@gmail.com>

commit a048a07d7f4535baa4cbad6bc024f175317ab938 upstream.

On some CPUs we can prevent a vulnerability related to store-to-load
forwarding by preventing store forwarding between privilege domains,
by inserting a barrier in kernel entry and exit paths.

This is known to be the case on at least Power7, Power8 and Power9
powerpc CPUs.

Barriers must be inserted generally before the first load after moving
to a higher privilege, and after the last store before moving to a
lower privilege, HV and PR privilege transitions must be protected.

Barriers are added as patch sections, with all kernel/hypervisor entry
points patched, and the exit points to lower privilge levels patched
similarly to the RFI flush patching.

Firmware advertisement is not implemented yet, so CPU flush types
are hard coded.

Thanks to Michal Suchánek for bug fixes and review.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Mauricio Faria de Oliveira <mauricfo@linux.vnet.ibm.com>
Signed-off-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Michal Suchánek <msuchanek@suse.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/include/asm/exception-64s.h     |  29 ++++++
 arch/powerpc/include/asm/feature-fixups.h    |  19 ++++
 arch/powerpc/include/asm/security_features.h |  11 ++
 arch/powerpc/kernel/exceptions-64s.S         |  19 +++-
 arch/powerpc/kernel/security.c               | 149 +++++++++++++++++++++++++++
 arch/powerpc/kernel/vmlinux.lds.S            |  14 +++
 arch/powerpc/lib/feature-fixups.c            | 115 +++++++++++++++++++++
 arch/powerpc/platforms/powernv/setup.c       |   1 +
 arch/powerpc/platforms/pseries/setup.c       |   1 +
 9 files changed, 356 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 471b2274fbeb..c40b4380951c 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -74,6 +74,27 @@
  */
 #define EX_R3		EX_DAR
 
+#define STF_ENTRY_BARRIER_SLOT						\
+	STF_ENTRY_BARRIER_FIXUP_SECTION;				\
+	nop;								\
+	nop;								\
+	nop
+
+#define STF_EXIT_BARRIER_SLOT						\
+	STF_EXIT_BARRIER_FIXUP_SECTION;					\
+	nop;								\
+	nop;								\
+	nop;								\
+	nop;								\
+	nop;								\
+	nop
+
+/*
+ * r10 must be free to use, r13 must be paca
+ */
+#define INTERRUPT_TO_KERNEL						\
+	STF_ENTRY_BARRIER_SLOT
+
 /*
  * Macros for annotating the expected destination of (h)rfid
  *
@@ -90,16 +111,19 @@
 	rfid
 
 #define RFI_TO_USER							\
+	STF_EXIT_BARRIER_SLOT;						\
 	RFI_FLUSH_SLOT;							\
 	rfid;								\
 	b	rfi_flush_fallback
 
 #define RFI_TO_USER_OR_KERNEL						\
+	STF_EXIT_BARRIER_SLOT;						\
 	RFI_FLUSH_SLOT;							\
 	rfid;								\
 	b	rfi_flush_fallback
 
 #define RFI_TO_GUEST							\
+	STF_EXIT_BARRIER_SLOT;						\
 	RFI_FLUSH_SLOT;							\
 	rfid;								\
 	b	rfi_flush_fallback
@@ -108,21 +132,25 @@
 	hrfid
 
 #define HRFI_TO_USER							\
+	STF_EXIT_BARRIER_SLOT;						\
 	RFI_FLUSH_SLOT;							\
 	hrfid;								\
 	b	hrfi_flush_fallback
 
 #define HRFI_TO_USER_OR_KERNEL						\
+	STF_EXIT_BARRIER_SLOT;						\
 	RFI_FLUSH_SLOT;							\
 	hrfid;								\
 	b	hrfi_flush_fallback
 
 #define HRFI_TO_GUEST							\
+	STF_EXIT_BARRIER_SLOT;						\
 	RFI_FLUSH_SLOT;							\
 	hrfid;								\
 	b	hrfi_flush_fallback
 
 #define HRFI_TO_UNKNOWN							\
+	STF_EXIT_BARRIER_SLOT;						\
 	RFI_FLUSH_SLOT;							\
 	hrfid;								\
 	b	hrfi_flush_fallback
@@ -254,6 +282,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 #define __EXCEPTION_PROLOG_1_PRE(area)					\
 	OPT_SAVE_REG_TO_PACA(area+EX_PPR, r9, CPU_FTR_HAS_PPR);		\
 	OPT_SAVE_REG_TO_PACA(area+EX_CFAR, r10, CPU_FTR_CFAR);		\
+	INTERRUPT_TO_KERNEL;						\
 	SAVE_CTR(r10, area);						\
 	mfcr	r9;
 
diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h
index 1e82eb3caabd..a9b64df34e2a 100644
--- a/arch/powerpc/include/asm/feature-fixups.h
+++ b/arch/powerpc/include/asm/feature-fixups.h
@@ -187,6 +187,22 @@ label##3:					       	\
 	FTR_ENTRY_OFFSET label##1b-label##3b;		\
 	.popsection;
 
+#define STF_ENTRY_BARRIER_FIXUP_SECTION			\
+953:							\
+	.pushsection __stf_entry_barrier_fixup,"a";	\
+	.align 2;					\
+954:							\
+	FTR_ENTRY_OFFSET 953b-954b;			\
+	.popsection;
+
+#define STF_EXIT_BARRIER_FIXUP_SECTION			\
+955:							\
+	.pushsection __stf_exit_barrier_fixup,"a";	\
+	.align 2;					\
+956:							\
+	FTR_ENTRY_OFFSET 955b-956b;			\
+	.popsection;
+
 #define RFI_FLUSH_FIXUP_SECTION				\
 951:							\
 	.pushsection __rfi_flush_fixup,"a";		\
@@ -199,6 +215,9 @@ label##3:					       	\
 #ifndef __ASSEMBLY__
 #include <linux/types.h>
 
+extern long stf_barrier_fallback;
+extern long __start___stf_entry_barrier_fixup, __stop___stf_entry_barrier_fixup;
+extern long __start___stf_exit_barrier_fixup, __stop___stf_exit_barrier_fixup;
 extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup;
 
 void apply_feature_fixups(void);
diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h
index fa4d2e1cf772..44989b22383c 100644
--- a/arch/powerpc/include/asm/security_features.h
+++ b/arch/powerpc/include/asm/security_features.h
@@ -12,6 +12,17 @@
 extern unsigned long powerpc_security_features;
 extern bool rfi_flush;
 
+/* These are bit flags */
+enum stf_barrier_type {
+	STF_BARRIER_NONE	= 0x1,
+	STF_BARRIER_FALLBACK	= 0x2,
+	STF_BARRIER_EIEIO	= 0x4,
+	STF_BARRIER_SYNC_ORI	= 0x8,
+};
+
+void setup_stf_barrier(void);
+void do_stf_barrier_fixups(enum stf_barrier_type types);
+
 static inline void security_ftr_set(unsigned long feature)
 {
 	powerpc_security_features |= feature;
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 1ecfd8ffb098..bf9b94e376fd 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -833,7 +833,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
 #endif
 
 
-EXC_REAL_MASKABLE(decrementer, 0x900, 0x80, IRQS_DISABLED)
+EXC_REAL_OOL_MASKABLE(decrementer, 0x900, 0x80, IRQS_DISABLED)
 EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x80, 0x900, IRQS_DISABLED)
 TRAMP_KVM(PACA_EXGEN, 0x900)
 EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt)
@@ -909,6 +909,7 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
 	mtctr	r13;							\
 	GET_PACA(r13);							\
 	std	r10,PACA_EXGEN+EX_R10(r13);				\
+	INTERRUPT_TO_KERNEL;						\
 	KVMTEST_PR(0xc00); /* uses r10, branch to do_kvm_0xc00_system_call */ \
 	HMT_MEDIUM;							\
 	mfctr	r9;
@@ -917,7 +918,8 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
 #define SYSCALL_KVMTEST							\
 	HMT_MEDIUM;							\
 	mr	r9,r13;							\
-	GET_PACA(r13);
+	GET_PACA(r13);							\
+	INTERRUPT_TO_KERNEL;
 #endif
 	
 #define LOAD_SYSCALL_HANDLER(reg)					\
@@ -1455,6 +1457,19 @@ masked_##_H##interrupt:					\
 	b	.;					\
 	MASKED_DEC_HANDLER(_H)
 
+TRAMP_REAL_BEGIN(stf_barrier_fallback)
+	std	r9,PACA_EXRFI+EX_R9(r13)
+	std	r10,PACA_EXRFI+EX_R10(r13)
+	sync
+	ld	r9,PACA_EXRFI+EX_R9(r13)
+	ld	r10,PACA_EXRFI+EX_R10(r13)
+	ori	31,31,0
+	.rept 14
+	b	1f
+1:
+	.endr
+	blr
+
 TRAMP_REAL_BEGIN(rfi_flush_fallback)
 	SET_SCRATCH0(r13);
 	GET_PACA(r13);
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index bab5a27ea805..b98a722da915 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -8,6 +8,7 @@
 #include <linux/device.h>
 #include <linux/seq_buf.h>
 
+#include <asm/debugfs.h>
 #include <asm/security_features.h>
 
 
@@ -86,3 +87,151 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c
 
 	return s.len;
 }
+
+/*
+ * Store-forwarding barrier support.
+ */
+
+static enum stf_barrier_type stf_enabled_flush_types;
+static bool no_stf_barrier;
+bool stf_barrier;
+
+static int __init handle_no_stf_barrier(char *p)
+{
+	pr_info("stf-barrier: disabled on command line.");
+	no_stf_barrier = true;
+	return 0;
+}
+
+early_param("no_stf_barrier", handle_no_stf_barrier);
+
+/* This is the generic flag used by other architectures */
+static int __init handle_ssbd(char *p)
+{
+	if (!p || strncmp(p, "auto", 5) == 0 || strncmp(p, "on", 2) == 0 ) {
+		/* Until firmware tells us, we have the barrier with auto */
+		return 0;
+	} else if (strncmp(p, "off", 3) == 0) {
+		handle_no_stf_barrier(NULL);
+		return 0;
+	} else
+		return 1;
+
+	return 0;
+}
+early_param("spec_store_bypass_disable", handle_ssbd);
+
+/* This is the generic flag used by other architectures */
+static int __init handle_no_ssbd(char *p)
+{
+	handle_no_stf_barrier(NULL);
+	return 0;
+}
+early_param("nospec_store_bypass_disable", handle_no_ssbd);
+
+static void stf_barrier_enable(bool enable)
+{
+	if (enable)
+		do_stf_barrier_fixups(stf_enabled_flush_types);
+	else
+		do_stf_barrier_fixups(STF_BARRIER_NONE);
+
+	stf_barrier = enable;
+}
+
+void setup_stf_barrier(void)
+{
+	enum stf_barrier_type type;
+	bool enable, hv;
+
+	hv = cpu_has_feature(CPU_FTR_HVMODE);
+
+	/* Default to fallback in case fw-features are not available */
+	if (cpu_has_feature(CPU_FTR_ARCH_300))
+		type = STF_BARRIER_EIEIO;
+	else if (cpu_has_feature(CPU_FTR_ARCH_207S))
+		type = STF_BARRIER_SYNC_ORI;
+	else if (cpu_has_feature(CPU_FTR_ARCH_206))
+		type = STF_BARRIER_FALLBACK;
+	else
+		type = STF_BARRIER_NONE;
+
+	enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
+		(security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) ||
+		 (security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) && hv));
+
+	if (type == STF_BARRIER_FALLBACK) {
+		pr_info("stf-barrier: fallback barrier available\n");
+	} else if (type == STF_BARRIER_SYNC_ORI) {
+		pr_info("stf-barrier: hwsync barrier available\n");
+	} else if (type == STF_BARRIER_EIEIO) {
+		pr_info("stf-barrier: eieio barrier available\n");
+	}
+
+	stf_enabled_flush_types = type;
+
+	if (!no_stf_barrier)
+		stf_barrier_enable(enable);
+}
+
+ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	if (stf_barrier && stf_enabled_flush_types != STF_BARRIER_NONE) {
+		const char *type;
+		switch (stf_enabled_flush_types) {
+		case STF_BARRIER_EIEIO:
+			type = "eieio";
+			break;
+		case STF_BARRIER_SYNC_ORI:
+			type = "hwsync";
+			break;
+		case STF_BARRIER_FALLBACK:
+			type = "fallback";
+			break;
+		default:
+			type = "unknown";
+		}
+		return sprintf(buf, "Mitigation: Kernel entry/exit barrier (%s)\n", type);
+	}
+
+	if (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) &&
+	    !security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR))
+		return sprintf(buf, "Not affected\n");
+
+	return sprintf(buf, "Vulnerable\n");
+}
+
+#ifdef CONFIG_DEBUG_FS
+static int stf_barrier_set(void *data, u64 val)
+{
+	bool enable;
+
+	if (val == 1)
+		enable = true;
+	else if (val == 0)
+		enable = false;
+	else
+		return -EINVAL;
+
+	/* Only do anything if we're changing state */
+	if (enable != stf_barrier)
+		stf_barrier_enable(enable);
+
+	return 0;
+}
+
+static int stf_barrier_get(void *data, u64 *val)
+{
+	*val = stf_barrier ? 1 : 0;
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_stf_barrier, stf_barrier_get, stf_barrier_set, "%llu\n");
+
+static __init int stf_barrier_debugfs_init(void)
+{
+	debugfs_create_file("stf_barrier", 0600, powerpc_debugfs_root, NULL, &fops_stf_barrier);
+	return 0;
+}
+device_initcall(stf_barrier_debugfs_init);
+#endif /* CONFIG_DEBUG_FS */
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index c8af90ff49f0..b8d82678f8b4 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -133,6 +133,20 @@ SECTIONS
 	RO_DATA(PAGE_SIZE)
 
 #ifdef CONFIG_PPC64
+	. = ALIGN(8);
+	__stf_entry_barrier_fixup : AT(ADDR(__stf_entry_barrier_fixup) - LOAD_OFFSET) {
+		__start___stf_entry_barrier_fixup = .;
+		*(__stf_entry_barrier_fixup)
+		__stop___stf_entry_barrier_fixup = .;
+	}
+
+	. = ALIGN(8);
+	__stf_exit_barrier_fixup : AT(ADDR(__stf_exit_barrier_fixup) - LOAD_OFFSET) {
+		__start___stf_exit_barrier_fixup = .;
+		*(__stf_exit_barrier_fixup)
+		__stop___stf_exit_barrier_fixup = .;
+	}
+
 	. = ALIGN(8);
 	__rfi_flush_fixup : AT(ADDR(__rfi_flush_fixup) - LOAD_OFFSET) {
 		__start___rfi_flush_fixup = .;
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index f61ff5a6bddb..6b3c2d405a6d 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -23,6 +23,7 @@
 #include <asm/page.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
+#include <asm/security_features.h>
 #include <asm/firmware.h>
 
 struct fixup_entry {
@@ -117,6 +118,120 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
 }
 
 #ifdef CONFIG_PPC_BOOK3S_64
+void do_stf_entry_barrier_fixups(enum stf_barrier_type types)
+{
+	unsigned int instrs[3], *dest;
+	long *start, *end;
+	int i;
+
+	start = PTRRELOC(&__start___stf_entry_barrier_fixup),
+	end = PTRRELOC(&__stop___stf_entry_barrier_fixup);
+
+	instrs[0] = 0x60000000; /* nop */
+	instrs[1] = 0x60000000; /* nop */
+	instrs[2] = 0x60000000; /* nop */
+
+	i = 0;
+	if (types & STF_BARRIER_FALLBACK) {
+		instrs[i++] = 0x7d4802a6; /* mflr r10		*/
+		instrs[i++] = 0x60000000; /* branch patched below */
+		instrs[i++] = 0x7d4803a6; /* mtlr r10		*/
+	} else if (types & STF_BARRIER_EIEIO) {
+		instrs[i++] = 0x7e0006ac; /* eieio + bit 6 hint */
+	} else if (types & STF_BARRIER_SYNC_ORI) {
+		instrs[i++] = 0x7c0004ac; /* hwsync		*/
+		instrs[i++] = 0xe94d0000; /* ld r10,0(r13)	*/
+		instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
+	}
+
+	for (i = 0; start < end; start++, i++) {
+		dest = (void *)start + *start;
+
+		pr_devel("patching dest %lx\n", (unsigned long)dest);
+
+		patch_instruction(dest, instrs[0]);
+
+		if (types & STF_BARRIER_FALLBACK)
+			patch_branch(dest + 1, (unsigned long)&stf_barrier_fallback,
+				     BRANCH_SET_LINK);
+		else
+			patch_instruction(dest + 1, instrs[1]);
+
+		patch_instruction(dest + 2, instrs[2]);
+	}
+
+	printk(KERN_DEBUG "stf-barrier: patched %d entry locations (%s barrier)\n", i,
+		(types == STF_BARRIER_NONE)                  ? "no" :
+		(types == STF_BARRIER_FALLBACK)              ? "fallback" :
+		(types == STF_BARRIER_EIEIO)                 ? "eieio" :
+		(types == (STF_BARRIER_SYNC_ORI))            ? "hwsync"
+		                                           : "unknown");
+}
+
+void do_stf_exit_barrier_fixups(enum stf_barrier_type types)
+{
+	unsigned int instrs[6], *dest;
+	long *start, *end;
+	int i;
+
+	start = PTRRELOC(&__start___stf_exit_barrier_fixup),
+	end = PTRRELOC(&__stop___stf_exit_barrier_fixup);
+
+	instrs[0] = 0x60000000; /* nop */
+	instrs[1] = 0x60000000; /* nop */
+	instrs[2] = 0x60000000; /* nop */
+	instrs[3] = 0x60000000; /* nop */
+	instrs[4] = 0x60000000; /* nop */
+	instrs[5] = 0x60000000; /* nop */
+
+	i = 0;
+	if (types & STF_BARRIER_FALLBACK || types & STF_BARRIER_SYNC_ORI) {
+		if (cpu_has_feature(CPU_FTR_HVMODE)) {
+			instrs[i++] = 0x7db14ba6; /* mtspr 0x131, r13 (HSPRG1) */
+			instrs[i++] = 0x7db04aa6; /* mfspr r13, 0x130 (HSPRG0) */
+		} else {
+			instrs[i++] = 0x7db243a6; /* mtsprg 2,r13	*/
+			instrs[i++] = 0x7db142a6; /* mfsprg r13,1    */
+	        }
+		instrs[i++] = 0x7c0004ac; /* hwsync		*/
+		instrs[i++] = 0xe9ad0000; /* ld r13,0(r13)	*/
+		instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
+		if (cpu_has_feature(CPU_FTR_HVMODE)) {
+			instrs[i++] = 0x7db14aa6; /* mfspr r13, 0x131 (HSPRG1) */
+		} else {
+			instrs[i++] = 0x7db242a6; /* mfsprg r13,2 */
+		}
+	} else if (types & STF_BARRIER_EIEIO) {
+		instrs[i++] = 0x7e0006ac; /* eieio + bit 6 hint */
+	}
+
+	for (i = 0; start < end; start++, i++) {
+		dest = (void *)start + *start;
+
+		pr_devel("patching dest %lx\n", (unsigned long)dest);
+
+		patch_instruction(dest, instrs[0]);
+		patch_instruction(dest + 1, instrs[1]);
+		patch_instruction(dest + 2, instrs[2]);
+		patch_instruction(dest + 3, instrs[3]);
+		patch_instruction(dest + 4, instrs[4]);
+		patch_instruction(dest + 5, instrs[5]);
+	}
+	printk(KERN_DEBUG "stf-barrier: patched %d exit locations (%s barrier)\n", i,
+		(types == STF_BARRIER_NONE)                  ? "no" :
+		(types == STF_BARRIER_FALLBACK)              ? "fallback" :
+		(types == STF_BARRIER_EIEIO)                 ? "eieio" :
+		(types == (STF_BARRIER_SYNC_ORI))            ? "hwsync"
+		                                           : "unknown");
+}
+
+
+void do_stf_barrier_fixups(enum stf_barrier_type types)
+{
+	do_stf_entry_barrier_fixups(types);
+	do_stf_exit_barrier_fixups(types);
+}
+
 void do_rfi_flush_fixups(enum l1d_flush_type types)
 {
 	unsigned int instrs[3], *dest;
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 7de050a3736b..fc0412d59149 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -131,6 +131,7 @@ static void __init pnv_setup_arch(void)
 	set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
 
 	pnv_setup_rfi_flush();
+	setup_stf_barrier();
 
 	/* Initialize SMP */
 	pnv_smp_init();
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 6b2152839c48..21fed38bbbd5 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -699,6 +699,7 @@ static void __init pSeries_setup_arch(void)
 	fwnmi_init();
 
 	pseries_setup_rfi_flush();
+	setup_stf_barrier();
 
 	/* By default, only probe PCI (can be overridden by rtas_pci) */
 	pci_add_flags(PCI_PROBE_ONLY);
-- 
2.14.1

^ permalink raw reply related

* Re: [PATCH v7 2/3] powerpc/mm: Only read faulting instruction when necessary in do_page_fault()
From: Christophe LEROY @ 2018-05-22 14:50 UTC (permalink / raw)
  To: Nicholas Piggin
  Cc: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	linux-kernel, linuxppc-dev
In-Reply-To: <20180523003801.43070ddc@roar.ozlabs.ibm.com>



Le 22/05/2018 à 16:38, Nicholas Piggin a écrit :
> On Tue, 22 May 2018 16:02:56 +0200 (CEST)
> Christophe Leroy <christophe.leroy@c-s.fr> wrote:
> 
>> Commit a7a9dcd882a67 ("powerpc: Avoid taking a data miss on every
>> userspace instruction miss") has shown that limiting the read of
>> faulting instruction to likely cases improves performance.
>>
>> This patch goes further into this direction by limiting the read
>> of the faulting instruction to the only cases where it is likely
>> needed.
>>
>> On an MPC885, with the same benchmark app as in the commit referred
>> above, we see a reduction of about 3900 dTLB misses (approx 3%):
>>
>> Before the patch:
>>   Performance counter stats for './fault 500' (10 runs):
>>
>>           683033312      cpu-cycles                                                    ( +-  0.03% )
>>              134538      dTLB-load-misses                                              ( +-  0.03% )
>>               46099      iTLB-load-misses                                              ( +-  0.02% )
>>               19681      faults                                                        ( +-  0.02% )
>>
>>         5.389747878 seconds time elapsed                                          ( +-  0.06% )
>>
>> With the patch:
>>
>>   Performance counter stats for './fault 500' (10 runs):
>>
>>           682112862      cpu-cycles                                                    ( +-  0.03% )
>>              130619      dTLB-load-misses                                              ( +-  0.03% )
>>               46073      iTLB-load-misses                                              ( +-  0.05% )
>>               19681      faults                                                        ( +-  0.01% )
>>
>>         5.381342641 seconds time elapsed                                          ( +-  0.07% )
>>
>> The proper work of the huge stack expansion was tested with the
>> following app:
>>
>> int main(int argc, char **argv)
>> {
>> 	char buf[1024 * 1025];
>>
>> 	sprintf(buf, "Hello world !\n");
>> 	printf(buf);
>>
>> 	exit(0);
>> }
>>
>> Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
>> ---
>>   v7: Following comment from Nicholas on v6 on possibility of the page getting removed from the pagetables
>>       between the fault and the read, I have reworked the patch in order to do the get_user() in
>>       __do_page_fault() directly in order to reduce complexity compared to version v5
> 
> This is looking better, thanks.
> 
>> diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
>> index fcbb34431da2..dc64b8e06477 100644
>> --- a/arch/powerpc/mm/fault.c
>> +++ b/arch/powerpc/mm/fault.c
>> @@ -450,9 +450,6 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
>>   	 * can result in fault, which will cause a deadlock when called with
>>   	 * mmap_sem held
>>   	 */
>> -	if (is_write && is_user)
>> -		get_user(inst, (unsigned int __user *)regs->nip);
>> -
>>   	if (is_user)
>>   		flags |= FAULT_FLAG_USER;
>>   	if (is_write)
>> @@ -498,6 +495,26 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
>>   	if (unlikely(!(vma->vm_flags & VM_GROWSDOWN)))
>>   		return bad_area(regs, address);
>>   
>> +	if (unlikely(is_write && is_user && address + 0x100000 < vma->vm_end &&
>> +		     !inst)) {
>> +		unsigned int __user *nip = (unsigned int __user *)regs->nip;
>> +
>> +		if (likely(access_ok(VERIFY_READ, nip, sizeof(inst)))) {
>> +			int res;
>> +
>> +			pagefault_disable();
>> +			res = __get_user_inatomic(inst, nip);
>> +			pagefault_enable();
>> +			if (unlikely(res)) {
>> +				up_read(&mm->mmap_sem);
>> +				res = __get_user(inst, nip);
>> +				if (!res && inst)
>> +					goto retry;
> 
> You're handling error here but the previous code did not?

The previous code did in store_updates_sp()

When I moved get_user() out of that function in preceeding patch, I did 
consider that if get_user() fails, inst will remain 0, which means that 
store_updates_sp() will return false if ever called.

Now, as the semaphore has been released, we really need to do something, 
because if we goto retry inconditionally, we may end up in an infinite 
loop, and we can't let it continue either as the semaphore is not held 
anymore.

> 
>> +				return bad_area_nosemaphore(regs, address);
>> +			}
>> +		}
>> +	}
> 
> Would it be nicer to move all this up into bad_stack_expansion().
> It would need a way to handle the retry and insn, but I think it
> would still look better.

That's what I did in v5 indeed, but it looked too complex to me at the 
end. Can you have a look at it 
(https://patchwork.ozlabs.org/patch/799053/) and tell me if you feel it 
better than v7, or if you have any suggestion to improve based on v5 
and/or v7 ?

Thanks
Christophe

> 
> Thanks,
> Nick
> 

^ permalink raw reply

* Re: [PATCH] pseries/memory-hotplug: Only update DT once per memory DLPAR request
From: Nathan Fontenot @ 2018-05-22 15:12 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: jallen, Michael Ellerman
In-Reply-To: <152425618836.15614.7060070452480906952.stgit@ltcalpine2-lp14.aus.stglabs.ibm.com>

Hi Michael,

I sent this patch out several weeks ago, just wanted to make sure it hasn't fallen
off your radar.

Thanks,
-Nathan

On 04/20/2018 03:29 PM, Nathan Fontenot wrote:
> The updates to powerpc numa and memory hotplug code now use the
> in-kernel LMB array instead of the device tree. This change
> allows the pseries memory DLPAR code to only update the device
> tree once after successfully handling a DLPAR request.
> 
> Prior to the in-kernel LMB array, the numa code looked up the
> affinity for memory being added in the device tree, the code
> now looks this up in the LMB array. This change means the
> memory hotplug code can just update the affinity for an LMB
> in the LMB array instead of updating the device tree.
> 
> This also provides a savings in kernel memory. When updating the
> device tree old properties are never free'ed since there is no
> usecount on properties. This behavior leads to a new copy of the
> property being allocated every time a LMB is added or removed
> (i.e. a request to add 100 LMBs creates 100 new copies of the
> property). With this update only a single new property is created
> when a DLPAR request completes successfully.
> 
> Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
> ---
>  arch/powerpc/include/asm/drmem.h                |    5 ++
>  arch/powerpc/platforms/pseries/hotplug-memory.c |   55 +++++++----------------
>  2 files changed, 21 insertions(+), 39 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h
> index ce242b9ea8c6..7c1d8e74b25d 100644
> --- a/arch/powerpc/include/asm/drmem.h
> +++ b/arch/powerpc/include/asm/drmem.h
> @@ -99,4 +99,9 @@ void __init walk_drmem_lmbs_early(unsigned long node,
>  			void (*func)(struct drmem_lmb *, const __be32 **));
>  #endif
> 
> +static inline void invalidate_lmb_associativity_index(struct drmem_lmb *lmb)
> +{
> +	lmb->aa_index = 0xffffffff;
> +}
> +
>  #endif /* _ASM_POWERPC_LMB_H */
> diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
> index c1578f54c626..9a15d39995e5 100644
> --- a/arch/powerpc/platforms/pseries/hotplug-memory.c
> +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
> @@ -163,7 +163,7 @@ static u32 find_aa_index(struct device_node *dr_node,
>  	return aa_index;
>  }
> 
> -static u32 lookup_lmb_associativity_index(struct drmem_lmb *lmb)
> +static int update_lmb_associativity_index(struct drmem_lmb *lmb)
>  {
>  	struct device_node *parent, *lmb_node, *dr_node;
>  	struct property *ala_prop;
> @@ -203,43 +203,14 @@ static u32 lookup_lmb_associativity_index(struct drmem_lmb *lmb)
>  	aa_index = find_aa_index(dr_node, ala_prop, lmb_assoc);
> 
>  	dlpar_free_cc_nodes(lmb_node);
> -	return aa_index;
> -}
> -
> -static int dlpar_add_device_tree_lmb(struct drmem_lmb *lmb)
> -{
> -	int rc, aa_index;
> -
> -	lmb->flags |= DRCONF_MEM_ASSIGNED;
> 
> -	aa_index = lookup_lmb_associativity_index(lmb);
>  	if (aa_index < 0) {
> -		pr_err("Couldn't find associativity index for drc index %x\n",
> -		       lmb->drc_index);
> -		return aa_index;
> +		pr_err("Could not find LMB associativity\n");
> +		return -1;
>  	}
> 
>  	lmb->aa_index = aa_index;
> -
> -	rtas_hp_event = true;
> -	rc = drmem_update_dt();
> -	rtas_hp_event = false;
> -
> -	return rc;
> -}
> -
> -static int dlpar_remove_device_tree_lmb(struct drmem_lmb *lmb)
> -{
> -	int rc;
> -
> -	lmb->flags &= ~DRCONF_MEM_ASSIGNED;
> -	lmb->aa_index = 0xffffffff;
> -
> -	rtas_hp_event = true;
> -	rc = drmem_update_dt();
> -	rtas_hp_event = false;
> -
> -	return rc;
> +	return 0;
>  }
> 
>  static struct memory_block *lmb_to_memblock(struct drmem_lmb *lmb)
> @@ -428,7 +399,9 @@ static int dlpar_remove_lmb(struct drmem_lmb *lmb)
>  	/* Update memory regions for memory remove */
>  	memblock_remove(lmb->base_addr, block_sz);
> 
> -	dlpar_remove_device_tree_lmb(lmb);
> +	invalidate_lmb_associativity_index(lmb);
> +	lmb->flags &= ~DRCONF_MEM_ASSIGNED;
> +
>  	return 0;
>  }
> 
> @@ -688,10 +661,8 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb)
>  	if (lmb->flags & DRCONF_MEM_ASSIGNED)
>  		return -EINVAL;
> 
> -	rc = dlpar_add_device_tree_lmb(lmb);
> +	rc = update_lmb_associativity_index(lmb);
>  	if (rc) {
> -		pr_err("Couldn't update device tree for drc index %x\n",
> -		       lmb->drc_index);
>  		dlpar_release_drc(lmb->drc_index);
>  		return rc;
>  	}
> @@ -704,14 +675,14 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb)
>  	/* Add the memory */
>  	rc = add_memory(nid, lmb->base_addr, block_sz);
>  	if (rc) {
> -		dlpar_remove_device_tree_lmb(lmb);
> +		invalidate_lmb_associativity_index(lmb);
>  		return rc;
>  	}
> 
>  	rc = dlpar_online_lmb(lmb);
>  	if (rc) {
>  		remove_memory(nid, lmb->base_addr, block_sz);
> -		dlpar_remove_device_tree_lmb(lmb);
> +		invalidate_lmb_associativity_index(lmb);
>  	} else {
>  		lmb->flags |= DRCONF_MEM_ASSIGNED;
>  	}
> @@ -958,6 +929,12 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
>  		break;
>  	}
> 
> +	if (!rc) {
> +		rtas_hp_event = true;
> +		rc = drmem_update_dt();
> +		rtas_hp_event = false;
> +	}
> +
>  	unlock_device_hotplug();
>  	return rc;
>  }
> 

^ permalink raw reply

* [PATCH v3 0/3] powerpc/lib: Optimisation of string functions for PPC32 - part 1
From: Christophe Leroy @ 2018-05-22 16:06 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, segher
  Cc: linux-kernel, linuxppc-dev

This serie intends to optimise string functions for PPC32 in the
same spirit as already done on PPC64.

The first patch moves PPC32 specific functions from string.S into
a dedicated file named string_32.S
The second patch rewrites __clear_user() by using dcbz intruction
The third patch rewrites memcmp() to compare 32 bits words instead
of comparing byte per byte.

As shown in each individual commit log, second and third patches provide
significant improvment.

Changes in v3:
- Fixed the sign of the result returned by memcmp() by using a logical
comparison of u32 words and returning -1, 0 or 1 instead of doing a substract.
- In first patch, replaced PPC_LCMPI by cmpwi
- Fixed licence in string_32.S
- Removed the two last patches from the serie. They will be handled later as
they require further tests and analysis to properly identify their real benefit
in all possible cases.

Changes in v2:
- Moved out the patch removing the hot loop alignment on PPC32
- Squashed the changes related to NUL size verification in a single patch
- Reordered the patches in a more logical order
- Modified the inlining patch to avoid warning about impossibility to version symbols.

Christophe Leroy (3):
  powerpc/lib: move PPC32 specific functions out of string.S
  powerpc/lib: optimise 32 bits __clear_user()
  powerpc/lib: optimise PPC32 memcmp

 arch/powerpc/lib/Makefile    |   5 +-
 arch/powerpc/lib/string.S    |  61 -------------------
 arch/powerpc/lib/string_32.S | 140 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 143 insertions(+), 63 deletions(-)
 create mode 100644 arch/powerpc/lib/string_32.S

-- 
2.13.3

^ permalink raw reply

* [PATCH v3 1/3] powerpc/lib: move PPC32 specific functions out of string.S
From: Christophe Leroy @ 2018-05-22 16:06 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, segher
  Cc: linux-kernel, linuxppc-dev
In-Reply-To: <cover.1527004752.git.christophe.leroy@c-s.fr>

In preparation of optimisation patches, move PPC32 specific
memcmp() and __clear_user() into string_32.S

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
 arch/powerpc/lib/Makefile    |  5 +--
 arch/powerpc/lib/string.S    | 61 -------------------------------------
 arch/powerpc/lib/string_32.S | 72 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 75 insertions(+), 63 deletions(-)
 create mode 100644 arch/powerpc/lib/string_32.S

diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 653901042ad7..2c9b8c0adf22 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -26,13 +26,14 @@ obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \
 			       memcpy_power7.o
 
 obj64-y	+= copypage_64.o copyuser_64.o mem_64.o hweight_64.o \
-	   string_64.o memcpy_64.o memcmp_64.o pmem.o
+	   memcpy_64.o memcmp_64.o pmem.o
 
 obj64-$(CONFIG_SMP)	+= locks.o
 obj64-$(CONFIG_ALTIVEC)	+= vmx-helper.o
 obj64-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o
 
-obj-y			+= checksum_$(BITS).o checksum_wrappers.o
+obj-y			+= checksum_$(BITS).o checksum_wrappers.o \
+			   string_$(BITS).o
 
 obj-y			+= sstep.o ldstfp.o quad.o
 obj64-y			+= quad.o
diff --git a/arch/powerpc/lib/string.S b/arch/powerpc/lib/string.S
index 0378def28d41..b03c1a6861f4 100644
--- a/arch/powerpc/lib/string.S
+++ b/arch/powerpc/lib/string.S
@@ -56,23 +56,6 @@ _GLOBAL(strncmp)
 	blr
 EXPORT_SYMBOL(strncmp)
 
-#ifdef CONFIG_PPC32
-_GLOBAL(memcmp)
-	PPC_LCMPI 0,r5,0
-	beq-	2f
-	mtctr	r5
-	addi	r6,r3,-1
-	addi	r4,r4,-1
-1:	lbzu	r3,1(r6)
-	lbzu	r0,1(r4)
-	subf.	r3,r0,r3
-	bdnzt	2,1b
-	blr
-2:	li	r3,0
-	blr
-EXPORT_SYMBOL(memcmp)
-#endif
-
 _GLOBAL(memchr)
 	PPC_LCMPI 0,r5,0
 	beq-	2f
@@ -86,47 +69,3 @@ _GLOBAL(memchr)
 2:	li	r3,0
 	blr
 EXPORT_SYMBOL(memchr)
-
-#ifdef CONFIG_PPC32
-_GLOBAL(__clear_user)
-	addi	r6,r3,-4
-	li	r3,0
-	li	r5,0
-	cmplwi	0,r4,4
-	blt	7f
-	/* clear a single word */
-11:	stwu	r5,4(r6)
-	beqlr
-	/* clear word sized chunks */
-	andi.	r0,r6,3
-	add	r4,r0,r4
-	subf	r6,r0,r6
-	srwi	r0,r4,2
-	andi.	r4,r4,3
-	mtctr	r0
-	bdz	7f
-1:	stwu	r5,4(r6)
-	bdnz	1b
-	/* clear byte sized chunks */
-7:	cmpwi	0,r4,0
-	beqlr
-	mtctr	r4
-	addi	r6,r6,3
-8:	stbu	r5,1(r6)
-	bdnz	8b
-	blr
-90:	mr	r3,r4
-	blr
-91:	mfctr	r3
-	slwi	r3,r3,2
-	add	r3,r3,r4
-	blr
-92:	mfctr	r3
-	blr
-
-	EX_TABLE(11b, 90b)
-	EX_TABLE(1b, 91b)
-	EX_TABLE(8b, 92b)
-
-EXPORT_SYMBOL(__clear_user)
-#endif
diff --git a/arch/powerpc/lib/string_32.S b/arch/powerpc/lib/string_32.S
new file mode 100644
index 000000000000..204db8a834fd
--- /dev/null
+++ b/arch/powerpc/lib/string_32.S
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * String handling functions for PowerPC32
+ *
+ * Copyright (C) 1996 Paul Mackerras.
+ *
+ */
+
+#include <asm/processor.h>
+#include <asm/errno.h>
+#include <asm/ppc_asm.h>
+#include <asm/export.h>
+
+	.text
+
+_GLOBAL(memcmp)
+	cmpwi	cr0, r5, 0
+	beq-	2f
+	mtctr	r5
+	addi	r6,r3,-1
+	addi	r4,r4,-1
+1:	lbzu	r3,1(r6)
+	lbzu	r0,1(r4)
+	subf.	r3,r0,r3
+	bdnzt	2,1b
+	blr
+2:	li	r3,0
+	blr
+EXPORT_SYMBOL(memcmp)
+
+_GLOBAL(__clear_user)
+	addi	r6,r3,-4
+	li	r3,0
+	li	r5,0
+	cmplwi	0,r4,4
+	blt	7f
+	/* clear a single word */
+11:	stwu	r5,4(r6)
+	beqlr
+	/* clear word sized chunks */
+	andi.	r0,r6,3
+	add	r4,r0,r4
+	subf	r6,r0,r6
+	srwi	r0,r4,2
+	andi.	r4,r4,3
+	mtctr	r0
+	bdz	7f
+1:	stwu	r5,4(r6)
+	bdnz	1b
+	/* clear byte sized chunks */
+7:	cmpwi	0,r4,0
+	beqlr
+	mtctr	r4
+	addi	r6,r6,3
+8:	stbu	r5,1(r6)
+	bdnz	8b
+	blr
+90:	mr	r3,r4
+	blr
+91:	mfctr	r3
+	slwi	r3,r3,2
+	add	r3,r3,r4
+	blr
+92:	mfctr	r3
+	blr
+
+	EX_TABLE(11b, 90b)
+	EX_TABLE(1b, 91b)
+	EX_TABLE(8b, 92b)
+
+EXPORT_SYMBOL(__clear_user)
-- 
2.13.3

^ permalink raw reply related

* [PATCH v3 2/3] powerpc/lib: optimise 32 bits __clear_user()
From: Christophe Leroy @ 2018-05-22 16:06 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, segher
  Cc: linux-kernel, linuxppc-dev
In-Reply-To: <cover.1527004752.git.christophe.leroy@c-s.fr>

Rewrite clear_user() on the same principle as memset(0), making use
of dcbz to clear complete cache lines.

This code is a copy/paste of memset(), with some modifications
in order to retrieve remaining number of bytes to be cleared,
as it needs to be returned in case of error.

On a MPC885, throughput is almost doubled:

Before:
~# dd if=/dev/zero of=/dev/null bs=1M count=1000
1048576000 bytes (1000.0MB) copied, 18.990779 seconds, 52.7MB/s

After:
~# dd if=/dev/zero of=/dev/null bs=1M count=1000
1048576000 bytes (1000.0MB) copied, 9.611468 seconds, 104.0MB/s

On a MPC8321, throughput is multiplied by 2.12:

Before:
root@vgoippro:~# dd if=/dev/zero of=/dev/null bs=1M count=1000
1048576000 bytes (1000.0MB) copied, 6.844352 seconds, 146.1MB/s

After:
root@vgoippro:~# dd if=/dev/zero of=/dev/null bs=1M count=1000
1048576000 bytes (1000.0MB) copied, 3.218854 seconds, 310.7MB/s

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
 arch/powerpc/lib/string_32.S | 85 +++++++++++++++++++++++++++++++-------------
 1 file changed, 60 insertions(+), 25 deletions(-)

diff --git a/arch/powerpc/lib/string_32.S b/arch/powerpc/lib/string_32.S
index 204db8a834fd..40a576d56ac7 100644
--- a/arch/powerpc/lib/string_32.S
+++ b/arch/powerpc/lib/string_32.S
@@ -11,6 +11,7 @@
 #include <asm/errno.h>
 #include <asm/ppc_asm.h>
 #include <asm/export.h>
+#include <asm/cache.h>
 
 	.text
 
@@ -29,44 +30,78 @@ _GLOBAL(memcmp)
 	blr
 EXPORT_SYMBOL(memcmp)
 
+CACHELINE_BYTES = L1_CACHE_BYTES
+LG_CACHELINE_BYTES = L1_CACHE_SHIFT
+CACHELINE_MASK = (L1_CACHE_BYTES-1)
+
 _GLOBAL(__clear_user)
-	addi	r6,r3,-4
-	li	r3,0
-	li	r5,0
-	cmplwi	0,r4,4
+/*
+ * Use dcbz on the complete cache lines in the destination
+ * to set them to zero.  This requires that the destination
+ * area is cacheable.
+ */
+	cmplwi	cr0, r4, 4
+	mr	r10, r3
+	li	r3, 0
 	blt	7f
-	/* clear a single word */
-11:	stwu	r5,4(r6)
+
+11:	stw	r3, 0(r10)
 	beqlr
-	/* clear word sized chunks */
-	andi.	r0,r6,3
-	add	r4,r0,r4
-	subf	r6,r0,r6
-	srwi	r0,r4,2
-	andi.	r4,r4,3
+	andi.	r0, r10, 3
+	add	r11, r0, r4
+	subf	r6, r0, r10
+
+	clrlwi	r7, r6, 32 - LG_CACHELINE_BYTES
+	add	r8, r7, r11
+	srwi	r9, r8, LG_CACHELINE_BYTES
+	addic.	r9, r9, -1	/* total number of complete cachelines */
+	ble	2f
+	xori	r0, r7, CACHELINE_MASK & ~3
+	srwi.	r0, r0, 2
+	beq	3f
+	mtctr	r0
+4:	stwu	r3, 4(r6)
+	bdnz	4b
+3:	mtctr	r9
+	li	r7, 4
+10:	dcbz	r7, r6
+	addi	r6, r6, CACHELINE_BYTES
+	bdnz	10b
+	clrlwi	r11, r8, 32 - LG_CACHELINE_BYTES
+	addi	r11, r11, 4
+
+2:	srwi	r0 ,r11 ,2
 	mtctr	r0
-	bdz	7f
-1:	stwu	r5,4(r6)
+	bdz	6f
+1:	stwu	r3, 4(r6)
 	bdnz	1b
-	/* clear byte sized chunks */
-7:	cmpwi	0,r4,0
+6:	andi.	r11, r11, 3
 	beqlr
-	mtctr	r4
-	addi	r6,r6,3
-8:	stbu	r5,1(r6)
+	mtctr	r11
+	addi	r6, r6, 3
+8:	stbu	r3, 1(r6)
 	bdnz	8b
 	blr
-90:	mr	r3,r4
+
+7:	cmpwi	cr0, r4, 0
+	beqlr
+	mtctr	r4
+	addi	r6, r10, -1
+9:	stbu	r3, 1(r6)
+	bdnz	9b
 	blr
-91:	mfctr	r3
-	slwi	r3,r3,2
-	add	r3,r3,r4
+
+90:	mr	r3, r4
 	blr
-92:	mfctr	r3
+91:	add	r3, r10, r4
+	subf	r3, r6, r3
 	blr
 
 	EX_TABLE(11b, 90b)
+	EX_TABLE(4b, 91b)
+	EX_TABLE(10b, 91b)
 	EX_TABLE(1b, 91b)
-	EX_TABLE(8b, 92b)
+	EX_TABLE(8b, 91b)
+	EX_TABLE(9b, 91b)
 
 EXPORT_SYMBOL(__clear_user)
-- 
2.13.3

^ permalink raw reply related

* [PATCH v3 3/3] powerpc/lib: optimise PPC32 memcmp
From: Christophe Leroy @ 2018-05-22 16:06 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, segher
  Cc: linux-kernel, linuxppc-dev
In-Reply-To: <cover.1527004752.git.christophe.leroy@c-s.fr>

At the time being, memcmp() compares two chunks of memory
byte per byte.

This patch optimises the comparison by comparing word by word.

A small benchmark performed on an 8xx comparing two chuncks
of 512 bytes performed 100000 times gives:

Before : 5852274 TB ticks
After:   1488638 TB ticks

This is almost 4 times faster

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
 arch/powerpc/lib/string_32.S | 47 +++++++++++++++++++++++++++++++++++++-------
 1 file changed, 40 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/lib/string_32.S b/arch/powerpc/lib/string_32.S
index 40a576d56ac7..d83b7d996f61 100644
--- a/arch/powerpc/lib/string_32.S
+++ b/arch/powerpc/lib/string_32.S
@@ -18,16 +18,49 @@
 _GLOBAL(memcmp)
 	cmpwi	cr0, r5, 0
 	beq-	2f
-	mtctr	r5
-	addi	r6,r3,-1
-	addi	r4,r4,-1
-1:	lbzu	r3,1(r6)
-	lbzu	r0,1(r4)
-	subf.	r3,r0,r3
-	bdnzt	2,1b
+	srawi.	r7, r5, 2		/* Divide len by 4 */
+	mr	r6, r3
+	beq-	3f
+	mtctr	r7
+	li	r7, 0
+1:
+#ifdef __LITTLE_ENDIAN__
+	lwbrx	r3, r6, r7
+	lwbrx	r0, r4, r7
+#else
+	lwzx	r3, r6, r7
+	lwzx	r0, r4, r7
+#endif
+	addi	r7, r7, 4
+	cmpl	cr0, r3, r0
+	bdnzt	eq, 1b
+	bne	5f
+	andi.	r5, r5, 3
+	li	r3, 0
+	beqlr
+3:	cmplwi	cr1, r5, 2
+	blt-	cr1, 4f
+#ifdef __LITTLE_ENDIAN__
+	lhbrx	r3, r6, r7
+	lhbrx	r0, r4, r7
+#else
+	lhzx	r3, r6, r7
+	lhzx	r0, r4, r7
+#endif
+	addi	r7, r7, 2
+	subf.	r3, r0, r3
+	beqlr	cr1
+	bnelr
+4:	lbzx	r3, r6, r7
+	lbzx	r0, r4, r7
+	subf.	r3, r0, r3
 	blr
 2:	li	r3,0
 	blr
+5:	li	r3, 1
+	bgtlr
+	li	r3, -1
+	blr
 EXPORT_SYMBOL(memcmp)
 
 CACHELINE_BYTES = L1_CACHE_BYTES
-- 
2.13.3

^ permalink raw reply related

* [RFC v4 0/4] powerpc/drcinfo: Fix bugs 'ibm,drc-info' property
From: Michael Bringmann @ 2018-05-22 16:36 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Michael Bringmann, Nathan Fontenot, John Allen, Tyrel Datwyler,
	Thomas Falcon

This patch set corrects some errors and omissions in the previous
set of patches adding support for the "ibm,drc-info" property to
powerpc systems.

Unfortunately, some errors in the previous patch set break things
in some of the DLPAR operations.  In particular when attempting to
hot-add a new CPU or set of CPUs, the original patch failed to
properly calculate the available resources, and aborted the operation.
In addition, the original set missed several opportunities to compress
and reuse common code, especially, in the area of device processing.

Signed-off-by: Michael W. Bringmann <mwb@linux.vnet.ibm.com>
---
Changes in V4:
  -- Update code for latest kernel checkins.
  -- Fix bug with virtual devices.
  -- Rebase on top of 4.17-rc5 kernel
  -- Simplify parser code for "ibm,drc-info" property

^ permalink raw reply

* [RFC v4 1/4] hotplug/drcinfo: Simplify parse ibm,drc-info structs
From: Michael Bringmann @ 2018-05-22 16:37 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Michael Bringmann, Nathan Fontenot, John Allen, Tyrel Datwyler,
	Thomas Falcon
In-Reply-To: <47581596-1d6e-a1a9-9490-5b5ff0f5304f@linux.vnet.ibm.com>

Replace use of of_prop_next_u32() in when parsing 'ibm,drc-info'
structure to simplify and reduce parsing code.

Signed-off-by: Michael Bringmann <mwb@linux.vnet.ibm.com>
Fixes: 3f38000eda48 ("powerpc/firmware: Add definitions for new drc-info firmware feature" -- end of patch series applied to powerpc next)
---
Changes in V4:
  -- Rebased patch to 4.17-rc5 kernel
  -- Replace of_prop_next_u32() by of_read_number()
---
 arch/powerpc/platforms/pseries/of_helpers.c     |   20 +++++---------------
 arch/powerpc/platforms/pseries/pseries_energy.c |   10 ++++------
 drivers/pci/hotplug/rpaphp_core.c               |    5 ++---
 3 files changed, 11 insertions(+), 24 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/of_helpers.c b/arch/powerpc/platforms/pseries/of_helpers.c
index 6df192f..11b2ef1 100644
--- a/arch/powerpc/platforms/pseries/of_helpers.c
+++ b/arch/powerpc/platforms/pseries/of_helpers.c
@@ -65,29 +65,19 @@ int of_read_drc_info_cell(struct property **prop, const __be32 **curval,
 
 	/* Get drc-index-start:encode-int */
 	p2 = (const __be32 *)p;
-	p2 = of_prop_next_u32(*prop, p2, &data->drc_index_start);
-	if (!p2)
-		return -EINVAL;
+	data->drc_index_start = of_read_number(p2++, 1);
 
 	/* Get drc-name-suffix-start:encode-int */
-	p2 = of_prop_next_u32(*prop, p2, &data->drc_name_suffix_start);
-	if (!p2)
-		return -EINVAL;
+	data->drc_name_suffix_start = of_read_number(p2++, 1);
 
 	/* Get number-sequential-elements:encode-int */
-	p2 = of_prop_next_u32(*prop, p2, &data->num_sequential_elems);
-	if (!p2)
-		return -EINVAL;
+	data->num_sequential_elems = of_read_number(p2++, 1);
 
 	/* Get sequential-increment:encode-int */
-	p2 = of_prop_next_u32(*prop, p2, &data->sequential_inc);
-	if (!p2)
-		return -EINVAL;
+	data->sequential_inc = of_read_number(p2++, 1);
 
 	/* Get drc-power-domain:encode-int */
-	p2 = of_prop_next_u32(*prop, p2, &data->drc_power_domain);
-	if (!p2)
-		return -EINVAL;
+	data->drc_power_domain = of_read_number(p2++, 1);
 
 	/* Should now know end of current entry */
 	(*curval) = (void *)p2;
diff --git a/arch/powerpc/platforms/pseries/pseries_energy.c b/arch/powerpc/platforms/pseries/pseries_energy.c
index 6ed2212..5261975 100644
--- a/arch/powerpc/platforms/pseries/pseries_energy.c
+++ b/arch/powerpc/platforms/pseries/pseries_energy.c
@@ -61,9 +61,8 @@ static u32 cpu_to_drc_index(int cpu)
 		if (info == NULL)
 			goto err_of_node_put;
 
-		value = of_prop_next_u32(info, NULL, &num_set_entries);
-		if (!value)
-			goto err_of_node_put;
+		value = info->value;
+		num_set_entries = of_read_number(value++, 1);
 
 		for (j = 0; j < num_set_entries; j++) {
 
@@ -123,9 +122,8 @@ static int drc_index_to_cpu(u32 drc_index)
 		if (info == NULL)
 			goto err_of_node_put;
 
-		value = of_prop_next_u32(info, NULL, &num_set_entries);
-		if (!value)
-			goto err_of_node_put;
+		value = info->value;
+		num_set_entries = of_read_number(value++, 1);
 
 		for (j = 0; j < num_set_entries; j++) {
 
diff --git a/drivers/pci/hotplug/rpaphp_core.c b/drivers/pci/hotplug/rpaphp_core.c
index fb5e084..435c1a0 100644
--- a/drivers/pci/hotplug/rpaphp_core.c
+++ b/drivers/pci/hotplug/rpaphp_core.c
@@ -236,9 +236,8 @@ static int rpaphp_check_drc_props_v2(struct device_node *dn, char *drc_name,
 	if (info == NULL)
 		return -EINVAL;
 
-	value = of_prop_next_u32(info, NULL, &entries);
-	if (!value)
-		return -EINVAL;
+	value = info->value;
+	entries = of_read_number(value++, 1);
 
 	for (j = 0; j < entries; j++) {
 		of_read_drc_info_cell(&info, &value, &drc);

^ permalink raw reply related

* [RFC v4 2/4] hotplug/drcinfo: Provide parser with callback
From: Michael Bringmann @ 2018-05-22 16:37 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Michael Bringmann, Nathan Fontenot, John Allen, Tyrel Datwyler,
	Thomas Falcon
In-Reply-To: <47581596-1d6e-a1a9-9490-5b5ff0f5304f@linux.vnet.ibm.com>

This patch provides a common parse function for the ibm,drc-info
property that can be modified by a callback function.  The caller
provides a pointer to the function and a pointer to their unique
data, and the parser provides the current lmb set from the struct.
The callback function may return codes indicating that the parsing
is complete, or should continue, along with an error code that may
be returned to the caller.

Signed-off-by: Michael Bringmann <mwb@linux.vnet.ibm.com>
Fixes: 3f38000eda48 ("powerpc/firmware: Add definitions for new drc-info firmwar
e feature" -- end of patch series applied to powerpc next)
---
Changes in V4:
  -- Update code to account for latest kernel checkins.
  -- Rebased to 4.17-rc5 kernel
  -- Some patch cleanup including file combination
---
 arch/powerpc/include/asm/prom.h             |    7 +++++
 arch/powerpc/platforms/pseries/of_helpers.c |   37 +++++++++++++++++++++++++++
 2 files changed, 44 insertions(+)

diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index b04c5ce..2e947b3 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -94,6 +94,13 @@ struct of_drc_info {
 extern int of_read_drc_info_cell(struct property **prop,
 			const __be32 **curval, struct of_drc_info *data);
 
+extern int drc_info_parser(struct device_node *dn,
+			int (*usercb)(struct of_drc_info *drc,
+					void *data,
+					void *optional_data,
+					int *ret_code),
+			char *opt_drc_type,
+			void *data);
 
 /*
  * There are two methods for telling firmware what our capabilities are.
diff --git a/arch/powerpc/platforms/pseries/of_helpers.c b/arch/powerpc/platforms/pseries/of_helpers.c
index 11b2ef1..a588ee6 100644
--- a/arch/powerpc/platforms/pseries/of_helpers.c
+++ b/arch/powerpc/platforms/pseries/of_helpers.c
@@ -6,6 +6,9 @@
 #include <asm/prom.h>
 
 #include "of_helpers.h"
+#include "pseries.h"
+
+#define	MAX_DRC_NAME_LEN 64
 
 /**
  * pseries_of_derive_parent - basically like dirname(1)
@@ -87,3 +90,37 @@ int of_read_drc_info_cell(struct property **prop, const __be32 **curval,
 	return 0;
 }
 EXPORT_SYMBOL(of_read_drc_info_cell);
+
+int drc_info_parser(struct device_node *dn,
+		int (*usercb)(struct of_drc_info *drc,
+				void *data,
+				void *optional_data,
+				int *ret_code),
+		char *opt_drc_type,
+		void *data)
+{
+	struct property *info;
+	unsigned int entries;
+	struct of_drc_info drc;
+	const __be32 *value;
+	int j, done = 0, ret_code = -EINVAL;
+
+	info = of_find_property(dn, "ibm,drc-info", NULL);
+	if (info == NULL)
+		return -EINVAL;
+
+	value = info->value;
+	entries = of_read_number(value++, 1);
+
+	for (j = 0, done = 0; (j < entries) && (!done); j++) {
+		of_read_drc_info_cell(&info, &value, &drc);
+
+		if (opt_drc_type && strcmp(opt_drc_type, drc.drc_type))
+			continue;
+
+		done = usercb(&drc, data, NULL, &ret_code);
+	}
+
+	return ret_code;
+}
+EXPORT_SYMBOL(drc_info_parser);

^ permalink raw reply related

* [RFC v4 3/4] hotplug/drcinfo: Fix hot-add CPU issues
From: Michael Bringmann @ 2018-05-22 16:37 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Michael Bringmann, Nathan Fontenot, John Allen, Tyrel Datwyler,
	Thomas Falcon
In-Reply-To: <47581596-1d6e-a1a9-9490-5b5ff0f5304f@linux.vnet.ibm.com>

This patch applies a common parse function for the ibm,drc-info
property that can be modified by a callback function to the
hot-add CPU code.  Candidate code is replaced by a call to the
parser including a pointer to a local context-specific functions,
and local data.

In addition, a bug in the release of the previous patch set may
break things in some of the CPU DLPAR operations.  For instance,
when attempting to hot-add a new CPU or set of CPUs, the original
patch failed to always properly calculate the available resources,
and aborted the operation.

Signed-off-by: Michael Bringmann <mwb@linux.vnet.ibm.com>
Fixes: 3f38000eda48 ("powerpc/firmware: Add definitions for new drc-info firmwar
e feature" -- end of patch series applied to powerpc next)
---
Changes in V4:
  -- Update code to account for latest kernel checkins.
  -- Rebased to 4.17-rc5 kernel
  -- Compress some more code
---
 arch/powerpc/platforms/pseries/hotplug-cpu.c    |  118 +++++++++++++++++------
 arch/powerpc/platforms/pseries/pseries_energy.c |  107 +++++++++++----------
 2 files changed, 141 insertions(+), 84 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 6ef77ca..ceacad9 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -411,27 +411,63 @@ static bool dlpar_cpu_exists(struct device_node *parent, u32 drc_index)
 	return found;
 }
 
-static bool valid_cpu_drc_index(struct device_node *parent, u32 drc_index)
+static bool check_cpu_drc_index(struct device_node *parent,
+				int (*cb)(struct of_drc_info *drc,
+					void *data, void *not_used,
+					int *ret_code),
+				void *cdata)
 {
 	bool found = false;
-	int rc, index;
 
-	index = 0;
-	while (!found) {
-		u32 drc;
+	if (firmware_has_feature(FW_FEATURE_DRC_INFO)) {
+		if (drc_info_parser(parent, cb, "CPU", cdata))
+			found = true;
+	} else {
+		int index = 0;
 
-		rc = of_property_read_u32_index(parent, "ibm,drc-indexes",
-						index++, &drc);
-		if (rc)
-			break;
+		while (!found) {
+			u32 drc;
 
-		if (drc == drc_index)
-			found = true;
+			if (of_property_read_u32_index(parent,
+						"ibm,drc-indexes",
+						index++, &drc))
+				break;
+			if (cb(NULL, cdata, &drc, NULL))
+				found = true;
+		}
 	}
 
 	return found;
 }
 
+struct valid_drc_index_struct {
+	u32 targ_drc_index;
+};
+
+static int valid_drc_index_cb(struct of_drc_info *drc, void *idata,
+				void *drc_index, int *ret_code)
+{
+	struct valid_drc_index_struct *cdata = idata;
+
+	if (drc) {
+		if (!((drc->drc_index_start <= cdata->targ_drc_index) &&
+			(cdata->targ_drc_index <= drc->last_drc_index)))
+			return 0;
+	} else {
+		if (*((u32 *)drc_index) != cdata->targ_drc_index)
+			return 0;
+	}
+	(*ret_code) = 1;
+	return 1;
+}
+
+static bool valid_cpu_drc_index(struct device_node *parent, u32 drc_index)
+{
+	struct valid_drc_index_struct cdata = { drc_index };
+
+	return check_cpu_drc_index(parent, valid_drc_index_cb, &cdata);
+}
+
 static ssize_t dlpar_cpu_add(u32 drc_index)
 {
 	struct device_node *dn, *parent;
@@ -721,11 +757,43 @@ static int dlpar_cpu_remove_by_count(u32 cpus_to_remove)
 	return rc;
 }
 
+struct cpus_to_add_struct {
+	struct device_node *parent;
+	u32 *cpu_drcs;
+	u32 cpus_to_add;
+	u32 cpus_found;
+};
+
+static int cpus_to_add_cb(struct of_drc_info *drc, void *idata,
+			void *drc_index, int *ret_code)
+{
+	struct cpus_to_add_struct *cdata = idata;
+
+	if (drc) {
+		int k;
+
+		for (k = 0; (k < drc->num_sequential_elems) &&
+			(cdata->cpus_found < cdata->cpus_to_add); k++) {
+			u32 idrc = drc->drc_index_start +
+				(k * drc->sequential_inc);
+
+			if (dlpar_cpu_exists(cdata->parent, idrc))
+				continue;
+			cdata->cpu_drcs[cdata->cpus_found++] = idrc;
+		}
+	} else {
+		if (!dlpar_cpu_exists(cdata->parent, *((u32 *)drc_index)))
+			cdata->cpu_drcs[cdata->cpus_found++] =
+				*((u32 *)drc_index);
+	}
+	return 0;
+}
+
 static int find_dlpar_cpus_to_add(u32 *cpu_drcs, u32 cpus_to_add)
 {
 	struct device_node *parent;
-	int cpus_found = 0;
-	int index, rc;
+	struct cpus_to_add_struct cdata = {
+		NULL, cpu_drcs, cpus_to_add, 0 };
 
 	parent = of_find_node_by_path("/cpus");
 	if (!parent) {
@@ -734,28 +802,14 @@ static int find_dlpar_cpus_to_add(u32 *cpu_drcs, u32 cpus_to_add)
 		return -1;
 	}
 
-	/* Search the ibm,drc-indexes array for possible CPU drcs to
-	 * add. Note that the format of the ibm,drc-indexes array is
-	 * the number of entries in the array followed by the array
-	 * of drc values so we start looking at index = 1.
+	/* Search the appropriate property for possible CPU drcs to
+	 * add.
 	 */
-	index = 1;
-	while (cpus_found < cpus_to_add) {
-		u32 drc;
-
-		rc = of_property_read_u32_index(parent, "ibm,drc-indexes",
-						index++, &drc);
-		if (rc)
-			break;
-
-		if (dlpar_cpu_exists(parent, drc))
-			continue;
-
-		cpu_drcs[cpus_found++] = drc;
-	}
+	cdata.parent = parent;
+	check_cpu_drc_index(parent, cpus_to_add_cb, &cdata);
 
 	of_node_put(parent);
-	return cpus_found;
+	return cdata.cpus_found;
 }
 
 static int dlpar_cpu_add_by_count(u32 cpus_to_add)
diff --git a/arch/powerpc/platforms/pseries/pseries_energy.c b/arch/powerpc/platforms/pseries/pseries_energy.c
index 5261975..d8d7750 100644
--- a/arch/powerpc/platforms/pseries/pseries_energy.c
+++ b/arch/powerpc/platforms/pseries/pseries_energy.c
@@ -36,6 +36,26 @@
 
 /* Helper Routines to convert between drc_index to cpu numbers */
 
+struct cpu_to_drc_index_struct {
+	u32	thread_index;
+	u32	ret;
+};
+
+static int cpu_to_drc_index_cb(struct of_drc_info *drc, void *idata,
+				void *not_used, int *ret_code)
+{
+	struct cpu_to_drc_index_struct *cdata = idata;
+	int ret = 0;
+
+	if (cdata->thread_index < drc->last_drc_index) {
+		cdata->ret = drc->drc_index_start +
+			(cdata->thread_index * drc->sequential_inc);
+		ret = 1;
+	}
+	(*ret_code) = ret;
+	return ret;
+}
+
 static u32 cpu_to_drc_index(int cpu)
 {
 	struct device_node *dn = NULL;
@@ -51,30 +71,14 @@ static u32 cpu_to_drc_index(int cpu)
 	thread_index = cpu_core_index_of_thread(cpu);
 
 	if (firmware_has_feature(FW_FEATURE_DRC_INFO)) {
-		struct property *info = NULL;
-		struct of_drc_info drc;
-		int j;
-		u32 num_set_entries;
-		const __be32 *value;
-
-		info = of_find_property(dn, "ibm,drc-info", NULL);
-		if (info == NULL)
-			goto err_of_node_put;
+		struct cpu_to_drc_index_struct cdata = {
+			thread_index, 0 };
 
-		value = info->value;
-		num_set_entries = of_read_number(value++, 1);
-
-		for (j = 0; j < num_set_entries; j++) {
-
-			of_read_drc_info_cell(&info, &value, &drc);
-			if (strncmp(drc.drc_type, "CPU", 3))
-				goto err;
-
-			if (thread_index < drc.last_drc_index)
-				break;
-		}
-
-		ret = drc.drc_index_start + (thread_index * drc.sequential_inc);
+		rc = drc_info_parser(dn, &cpu_to_drc_index_cb,
+					"CPU", &cdata);
+		if (rc < 0)
+			goto err_of_node_put;
+		ret = cdata.ret;
 	} else {
 		const __be32 *indexes;
 
@@ -100,11 +104,33 @@ static u32 cpu_to_drc_index(int cpu)
 	return ret;
 }
 
+struct drc_index_to_cpu_struct {
+	u32	drc_index;
+	u32	thread_index;
+	u32	cpu;
+};
+
+static int drc_index_to_cpu_cb(struct of_drc_info *drc,
+				void *idata, void *not_used, int *ret_code)
+{
+	struct drc_index_to_cpu_struct *cdata = idata;
+
+	if (cdata->drc_index > drc->last_drc_index) {
+		cdata->cpu += drc->num_sequential_elems;
+	} else {
+		cdata->cpu += ((cdata->drc_index - drc->drc_index_start) /
+				drc->sequential_inc);
+		cdata->thread_index = cpu_first_thread_of_core(cdata->cpu);
+	}
+	(*ret_code) = 0;
+	return 0;
+}
+
 static int drc_index_to_cpu(u32 drc_index)
 {
 	struct device_node *dn = NULL;
 	const int *indexes;
-	int thread_index = 0, cpu = 0;
+	int thread_index = 0;
 	int rc = 1;
 
 	dn = of_find_node_by_path("/cpus");
@@ -112,36 +138,13 @@ static int drc_index_to_cpu(u32 drc_index)
 		goto err;
 
 	if (firmware_has_feature(FW_FEATURE_DRC_INFO)) {
-		struct property *info = NULL;
-		struct of_drc_info drc;
-		int j;
-		u32 num_set_entries;
-		const __be32 *value;
-
-		info = of_find_property(dn, "ibm,drc-info", NULL);
-		if (info == NULL)
-			goto err_of_node_put;
-
-		value = info->value;
-		num_set_entries = of_read_number(value++, 1);
-
-		for (j = 0; j < num_set_entries; j++) {
+		struct drc_index_to_cpu_struct cdata = {
+			drc_index, 0, 0 };
 
-			of_read_drc_info_cell(&info, &value, &drc);
-			if (strncmp(drc.drc_type, "CPU", 3))
-				goto err;
+		rc = drc_info_parser(dn, &drc_index_to_cpu_cb,
+					"CPU", &cdata);
+		thread_index = cdata.thread_index;
 
-			if (drc_index > drc.last_drc_index) {
-				cpu += drc.num_sequential_elems;
-				continue;
-			}
-			cpu += ((drc_index - drc.drc_index_start) /
-				drc.sequential_inc);
-
-			thread_index = cpu_first_thread_of_core(cpu);
-			rc = 0;
-			break;
-		}
 	} else {
 		unsigned long int i;
 

^ permalink raw reply related

* [RFC v4 4/4] hotplug/drcinfo: Code cleanup for devices
From: Michael Bringmann @ 2018-05-22 16:37 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: Michael Bringmann, Nathan Fontenot, John Allen, Tyrel Datwyler,
	Thomas Falcon
In-Reply-To: <47581596-1d6e-a1a9-9490-5b5ff0f5304f@linux.vnet.ibm.com>

This patch extends the use of a common parse function for the
ibm,drc-info property that can be modified by a callback function
to the hotplug device processing.  Candidate code is replaced by
a call to the parser including a pointer to a local context-specific
functions, and local data.

In addition, several more opportunities to compress and reuse
common code between the old and new property parsers were applied.

Finally, a bug with the registration of slots was observed on some
systems, and the code was rewritten to prevent its reoccurrence.

Signed-off-by: Michael Bringmann <mwb@linux.vnet.ibm.com>
Fixes: 3f38000eda48 ("powerpc/firmware: Add definitions for new drc-info firmwar
e feature" -- end of patch series applied to powerpc next)
---
Changes in V4:
  -- Update code to account for latest kernel checkins.
  -- Fix bug searching for virtual device slots.
  -- Rebased to 4.17-rc5 kernel
  -- Patch cleanup
---
 drivers/pci/hotplug/rpaphp_core.c |  181 ++++++++++++++++++++++++++-----------
 1 file changed, 126 insertions(+), 55 deletions(-)

diff --git a/drivers/pci/hotplug/rpaphp_core.c b/drivers/pci/hotplug/rpaphp_core.c
index 435c1a0..dc4ec68 100644
--- a/drivers/pci/hotplug/rpaphp_core.c
+++ b/drivers/pci/hotplug/rpaphp_core.c
@@ -222,47 +222,51 @@ static int rpaphp_check_drc_props_v1(struct device_node *dn, char *drc_name,
 	return -EINVAL;
 }
 
-static int rpaphp_check_drc_props_v2(struct device_node *dn, char *drc_name,
-				char *drc_type, unsigned int my_index)
+struct check_drc_props_v2_struct {
+	char *drc_name;
+	char *drc_type;
+	unsigned int my_index;
+};
+
+static int check_drc_props_v2_cb(struct of_drc_info *drc, void *idata,
+				void *not_used, int *ret_code)
 {
-	struct property *info;
-	unsigned int entries;
-	struct of_drc_info drc;
-	const __be32 *value;
+	struct check_drc_props_v2_struct *cdata = idata;
 	char cell_drc_name[MAX_DRC_NAME_LEN];
-	int j, fndit;
 
-	info = of_find_property(dn->parent, "ibm,drc-info", NULL);
-	if (info == NULL)
-		return -EINVAL;
+	(*ret_code) = -EINVAL;
 
-	value = info->value;
-	entries = of_read_number(value++, 1);
-
-	for (j = 0; j < entries; j++) {
-		of_read_drc_info_cell(&info, &value, &drc);
-
-		/* Should now know end of current entry */
-
-		if (my_index > drc.last_drc_index)
-			continue;
+	if (cdata->my_index > drc->last_drc_index)
+		return 0;
 
-		fndit = 1;
-		break;
+	/* Found drc_index.  Now match the rest. */
+	sprintf(cell_drc_name, "%s%d", drc->drc_name_prefix,
+		cdata->my_index - drc->drc_index_start +
+		drc->drc_name_suffix_start);
+
+	if (((cdata->drc_name == NULL) ||
+	     (cdata->drc_name && !strcmp(cdata->drc_name, cell_drc_name))) &&
+	    ((cdata->drc_type == NULL) ||
+	     (cdata->drc_type && !strcmp(cdata->drc_type, drc->drc_type)))) {
+		(*ret_code) = 0;
+		return 1;
 	}
-	/* Found it */
 
-	if (fndit)
-		sprintf(cell_drc_name, "%s%d", drc.drc_name_prefix, 
-			my_index);
+	return 0;
+}
 
-	if (((drc_name == NULL) ||
-	     (drc_name && !strcmp(drc_name, cell_drc_name))) &&
-	    ((drc_type == NULL) ||
-	     (drc_type && !strcmp(drc_type, drc.drc_type))))
-		return 0;
+static int rpaphp_check_drc_props_v2(struct device_node *dn, char *drc_name,
+				char *drc_type, unsigned int my_index)
+{
+	struct device_node *root = dn;
+	struct check_drc_props_v2_struct cdata = {
+		drc_name, drc_type, be32_to_cpu(my_index) };
 
-	return -EINVAL;
+	if (!drc_type || (drc_type && strcmp(drc_type, "SLOT")))
+		root = dn->parent;
+
+	return drc_info_parser(root, check_drc_props_v2_cb,
+				drc_type, &cdata);
 }
 
 int rpaphp_check_drc_props(struct device_node *dn, char *drc_name,
@@ -285,7 +289,6 @@ int rpaphp_check_drc_props(struct device_node *dn, char *drc_name,
 }
 EXPORT_SYMBOL_GPL(rpaphp_check_drc_props);
 
-
 static int is_php_type(char *drc_type)
 {
 	unsigned long value;
@@ -345,17 +348,41 @@ static int is_php_dn(struct device_node *dn, const int **indexes,
  *
  * To remove a slot, it suffices to call rpaphp_deregister_slot().
  */
-int rpaphp_add_slot(struct device_node *dn)
+
+static int rpaphp_add_slot_common(struct device_node *dn,
+			u32 drc_index, char *drc_name, char *drc_type,
+			u32 drc_power_domain)
 {
 	struct slot *slot;
 	int retval = 0;
-	int i;
+
+	slot = alloc_slot_struct(dn, drc_index, drc_name, drc_power_domain);
+	if (!slot)
+		return -ENOMEM;
+
+	slot->type = simple_strtoul(drc_type, NULL, 10);
+	if (retval)
+		return -EINVAL;
+
+	dbg("Found drc-index:0x%x drc-name:%s drc-type:%s\n",
+		drc_index, drc_name, drc_type);
+
+	retval = rpaphp_enable_slot(slot);
+	if (!retval)
+		retval = rpaphp_register_slot(slot);
+
+	if (retval)
+		dealloc_slot_struct(slot);
+
+	return retval;
+}
+
+static int rpaphp_add_slot_v1(struct device_node *dn)
+{
+	int i, retval = 0;
 	const int *indexes, *names, *types, *power_domains;
 	char *name, *type;
 
-	if (!dn->name || strcmp(dn->name, "pci"))
-		return 0;
-
 	/* If this is not a hotplug slot, return without doing anything. */
 	if (!is_php_dn(dn, &indexes, &names, &types, &power_domains))
 		return 0;
@@ -366,25 +393,12 @@ int rpaphp_add_slot(struct device_node *dn)
 	name = (char *) &names[1];
 	type = (char *) &types[1];
 	for (i = 0; i < be32_to_cpu(indexes[0]); i++) {
-		int index;
-
-		index = be32_to_cpu(indexes[i + 1]);
-		slot = alloc_slot_struct(dn, index, name,
-					 be32_to_cpu(power_domains[i + 1]));
-		if (!slot)
-			return -ENOMEM;
-
-		slot->type = simple_strtoul(type, NULL, 10);
 
-		dbg("Found drc-index:0x%x drc-name:%s drc-type:%s\n",
-				index, name, type);
-
-		retval = rpaphp_enable_slot(slot);
+		retval = rpaphp_add_slot_common(dn,
+				be32_to_cpu(indexes[i + 1]), name, type,
+				be32_to_cpu(power_domains[i + 1]));
 		if (!retval)
-			retval = rpaphp_register_slot(slot);
-
-		if (retval)
-			dealloc_slot_struct(slot);
+			return retval;
 
 		name += strlen(name) + 1;
 		type += strlen(type) + 1;
@@ -394,6 +408,63 @@ int rpaphp_add_slot(struct device_node *dn)
 	/* XXX FIXME: reports a failure only if last entry in loop failed */
 	return retval;
 }
+
+struct rpaphp_add_slot_v2_struct {
+	struct device_node *dn;
+};
+
+static int rpaphp_add_slot_v2_cb(struct of_drc_info *drc, void *idata,
+				void *not_used, int *ret_code)
+{
+	struct rpaphp_add_slot_v2_struct *cdata = idata;
+	u32 drc_index;
+	char drc_name[MAX_DRC_NAME_LEN];
+	int i, retval;
+
+	(*ret_code) = -EINVAL;
+
+	if (!is_php_type((char *) drc->drc_type)) {
+		(*ret_code) = 0;
+		return 1;
+	}
+
+	for (i = 0, drc_index = drc->drc_index_start;
+		i < drc->num_sequential_elems; i++, drc_index++) {
+
+		sprintf(drc_name, "%s%d", drc->drc_name_prefix,
+			drc_index - drc->drc_index_start +
+			drc->drc_name_suffix_start);
+
+		retval = rpaphp_add_slot_common(cdata->dn,
+				drc_index, drc_name, drc->drc_type,
+				drc->drc_power_domain);
+		if (!retval) {
+			(*ret_code) = retval;
+			return 1;
+		}
+	}
+
+	(*ret_code) = retval;
+	return 0;
+}
+
+static int rpaphp_add_slot_v2(struct device_node *dn)
+{
+	struct rpaphp_add_slot_v2_struct cdata = { dn };
+
+	return drc_info_parser(dn, rpaphp_add_slot_v2_cb, NULL, &cdata);
+}
+
+int rpaphp_add_slot(struct device_node *dn)
+{
+	if (!dn->name || strcmp(dn->name, "pci"))
+		return 0;
+
+	if (firmware_has_feature(FW_FEATURE_DRC_INFO))
+		return rpaphp_add_slot_v2(dn);
+	else
+		return rpaphp_add_slot_v1(dn);
+}
 EXPORT_SYMBOL_GPL(rpaphp_add_slot);
 
 static void __exit cleanup_slots(void)

^ permalink raw reply related

* [PATCH bpf-next v3 00/10] bpf: enhancements for multi-function programs
From: Sandipan Das @ 2018-05-22 17:16 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev, linuxppc-dev, mpe, naveen.n.rao, jakub.kicinski

v3:
 - Change base tree tag to bpf-next.
 - Incorporate review comments from Alexei, Daniel and Jakub.
 - Make sure that the JITed image does not grow or shrink after
   the last pass due to the way the instruction sequence used
   to load a callee's address maybe optimized.
 - Make additional changes to the bpf system call and bpftool to
   make multi-function JITed dumps easier to correlate.

v2:
 - Incorporate review comments from Jakub.

Sandipan Das (10):
  bpf: support 64-bit offsets for bpf function calls
  bpf: powerpc64: pad function address loads with NOPs
  bpf: powerpc64: add JIT support for multi-function programs
  bpf: get kernel symbol addresses via syscall
  tools: bpf: sync bpf uapi header
  tools: bpftool: resolve calls without using imm field
  bpf: fix multi-function JITed dump obtained via syscall
  bpf: get JITed image lengths of functions via syscall
  tools: bpf: sync bpf uapi header
  tools: bpftool: add delimiters to multi-function JITed dumps

 arch/powerpc/net/bpf_jit_comp64.c | 110 ++++++++++++++++++++++++++++++--------
 include/uapi/linux/bpf.h          |   4 ++
 kernel/bpf/syscall.c              |  81 ++++++++++++++++++++++++++--
 kernel/bpf/verifier.c             |  22 +++++---
 tools/bpf/bpftool/prog.c          |  75 +++++++++++++++++++++++++-
 tools/bpf/bpftool/xlated_dumper.c |  14 +++--
 tools/bpf/bpftool/xlated_dumper.h |   3 ++
 tools/include/uapi/linux/bpf.h    |   4 ++
 8 files changed, 278 insertions(+), 35 deletions(-)

-- 
2.14.3

^ permalink raw reply

* [PATCH bpf-next v3 01/10] bpf: support 64-bit offsets for bpf function calls
From: Sandipan Das @ 2018-05-22 17:16 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev, linuxppc-dev, mpe, naveen.n.rao, jakub.kicinski
In-Reply-To: <cover.1527008646.git.sandipan@linux.vnet.ibm.com>

The imm field of a bpf instruction is a signed 32-bit integer.
For JITed bpf-to-bpf function calls, it holds the offset of the
start address of the callee's JITed image from __bpf_call_base.

For some architectures, such as powerpc64, this offset may be
as large as 64 bits and cannot be accomodated in the imm field
without truncation.

We resolve this by:

[1] Additionally using the auxillary data of each function to
    keep a list of start addresses of the JITed images for all
    functions determined by the verifier.

[2] Retaining the subprog id inside the off field of the call
    instructions and using it to index into the list mentioned
    above and lookup the callee's address.

To make sure that the existing JIT compilers continue to work
without requiring changes, we keep the imm field as it is.

Signed-off-by: Sandipan Das <sandipan@linux.vnet.ibm.com>
---
 kernel/bpf/verifier.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index a9e4b1372da6..559cb74ba29e 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5383,11 +5383,24 @@ static int jit_subprogs(struct bpf_verifier_env *env)
 			    insn->src_reg != BPF_PSEUDO_CALL)
 				continue;
 			subprog = insn->off;
-			insn->off = 0;
 			insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
 				func[subprog]->bpf_func -
 				__bpf_call_base;
 		}
+
+		/* we use the aux data to keep a list of the start addresses
+		 * of the JITed images for each function in the program
+		 *
+		 * for some architectures, such as powerpc64, the imm field
+		 * might not be large enough to hold the offset of the start
+		 * address of the callee's JITed image from __bpf_call_base
+		 *
+		 * in such cases, we can lookup the start address of a callee
+		 * by using its subprog id, available from the off field of
+		 * the call instruction, as an index for this list
+		 */
+		func[i]->aux->func = func;
+		func[i]->aux->func_cnt = env->subprog_cnt;
 	}
 	for (i = 0; i < env->subprog_cnt; i++) {
 		old_bpf_func = func[i]->bpf_func;
-- 
2.14.3

^ permalink raw reply related

* [PATCH bpf-next v3 02/10] bpf: powerpc64: pad function address loads with NOPs
From: Sandipan Das @ 2018-05-22 17:16 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev, linuxppc-dev, mpe, naveen.n.rao, jakub.kicinski
In-Reply-To: <cover.1527008646.git.sandipan@linux.vnet.ibm.com>

For multi-function programs, loading the address of a callee
function to a register requires emitting instructions whose
count varies from one to five depending on the nature of the
address.

Since we come to know of the callee's address only before the
extra pass, the number of instructions required to load this
address may vary from what was previously generated. This can
make the JITed image grow or shrink.

To avoid this, we should generate a constant five-instruction
when loading function addresses by padding the optimized load
sequence with NOPs.

Signed-off-by: Sandipan Das <sandipan@linux.vnet.ibm.com>
---
 arch/powerpc/net/bpf_jit_comp64.c | 34 +++++++++++++++++++++++-----------
 1 file changed, 23 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 1bdb1aff0619..e4582744a31d 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -167,25 +167,37 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
 
 static void bpf_jit_emit_func_call(u32 *image, struct codegen_context *ctx, u64 func)
 {
+	unsigned int i, ctx_idx = ctx->idx;
+
+	/* Load function address into r12 */
+	PPC_LI64(12, func);
+
+	/* For bpf-to-bpf function calls, the callee's address is unknown
+	 * until the last extra pass. As seen above, we use PPC_LI64() to
+	 * load the callee's address, but this may optimize the number of
+	 * instructions required based on the nature of the address.
+	 *
+	 * Since we don't want the number of instructions emitted to change,
+	 * we pad the optimized PPC_LI64() call with NOPs to guarantee that
+	 * we always have a five-instruction sequence, which is the maximum
+	 * that PPC_LI64() can emit.
+	 */
+	for (i = ctx->idx - ctx_idx; i < 5; i++)
+		PPC_NOP();
+
 #ifdef PPC64_ELF_ABI_v1
-	/* func points to the function descriptor */
-	PPC_LI64(b2p[TMP_REG_2], func);
-	/* Load actual entry point from function descriptor */
-	PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0);
-	/* ... and move it to LR */
-	PPC_MTLR(b2p[TMP_REG_1]);
 	/*
 	 * Load TOC from function descriptor at offset 8.
 	 * We can clobber r2 since we get called through a
 	 * function pointer (so caller will save/restore r2)
 	 * and since we don't use a TOC ourself.
 	 */
-	PPC_BPF_LL(2, b2p[TMP_REG_2], 8);
-#else
-	/* We can clobber r12 */
-	PPC_FUNC_ADDR(12, func);
-	PPC_MTLR(12);
+	PPC_BPF_LL(2, 12, 8);
+	/* Load actual entry point from function descriptor */
+	PPC_BPF_LL(12, 12, 0);
 #endif
+
+	PPC_MTLR(12);
 	PPC_BLRL();
 }
 
-- 
2.14.3

^ permalink raw reply related

* [PATCH bpf-next v3 03/10] bpf: powerpc64: add JIT support for multi-function programs
From: Sandipan Das @ 2018-05-22 17:16 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev, linuxppc-dev, mpe, naveen.n.rao, jakub.kicinski
In-Reply-To: <cover.1527008646.git.sandipan@linux.vnet.ibm.com>

This adds support for bpf-to-bpf function calls in the powerpc64
JIT compiler. The JIT compiler converts the bpf call instructions
to native branch instructions. After a round of the usual passes,
the start addresses of the JITed images for the callee functions
are known. Finally, to fixup the branch target addresses, we need
to perform an extra pass.

Because of the address range in which JITed images are allocated
on powerpc64, the offsets of the start addresses of these images
from __bpf_call_base are as large as 64 bits. So, for a function
call, we cannot use the imm field of the instruction to determine
the callee's address. Instead, we use the alternative method of
getting it from the list of function addresses in the auxiliary
data of the caller by using the off field as an index.

Signed-off-by: Sandipan Das <sandipan@linux.vnet.ibm.com>
---
v3:
 - Fix memory leak for jit_data when we fail to allocated addrs.
 - Remove unnecessary bpf_jit_binary_lock_ro() call.
---
 arch/powerpc/net/bpf_jit_comp64.c | 76 +++++++++++++++++++++++++++++++++------
 1 file changed, 66 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index e4582744a31d..f1c95779843b 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -268,7 +268,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32
 /* Assemble the body code between the prologue & epilogue */
 static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
 			      struct codegen_context *ctx,
-			      u32 *addrs)
+			      u32 *addrs, bool extra_pass)
 {
 	const struct bpf_insn *insn = fp->insnsi;
 	int flen = fp->len;
@@ -724,11 +724,25 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
 			break;
 
 		/*
-		 * Call kernel helper
+		 * Call kernel helper or bpf function
 		 */
 		case BPF_JMP | BPF_CALL:
 			ctx->seen |= SEEN_FUNC;
-			func = (u8 *) __bpf_call_base + imm;
+
+			/* bpf function call */
+			if (insn[i].src_reg == BPF_PSEUDO_CALL)
+				if (!extra_pass)
+					func = NULL;
+				else if (fp->aux->func && off < fp->aux->func_cnt)
+					/* use the subprog id from the off
+					 * field to lookup the callee address
+					 */
+					func = (u8 *) fp->aux->func[off]->bpf_func;
+				else
+					return -EINVAL;
+			/* kernel helper call */
+			else
+				func = (u8 *) __bpf_call_base + imm;
 
 			bpf_jit_emit_func_call(image, ctx, (u64)func);
 
@@ -876,6 +890,14 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
 	return 0;
 }
 
+struct powerpc64_jit_data {
+	struct bpf_binary_header *header;
+	u32 *addrs;
+	u8 *image;
+	u32 proglen;
+	struct codegen_context ctx;
+};
+
 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 {
 	u32 proglen;
@@ -883,6 +905,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 	u8 *image = NULL;
 	u32 *code_base;
 	u32 *addrs;
+	struct powerpc64_jit_data *jit_data;
 	struct codegen_context cgctx;
 	int pass;
 	int flen;
@@ -890,6 +913,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 	struct bpf_prog *org_fp = fp;
 	struct bpf_prog *tmp_fp;
 	bool bpf_blinded = false;
+	bool extra_pass = false;
 
 	if (!fp->jit_requested)
 		return org_fp;
@@ -903,11 +927,32 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 		fp = tmp_fp;
 	}
 
+	jit_data = fp->aux->jit_data;
+	if (!jit_data) {
+		jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
+		if (!jit_data) {
+			fp = org_fp;
+			goto out;
+		}
+		fp->aux->jit_data = jit_data;
+	}
+
 	flen = fp->len;
+	addrs = jit_data->addrs;
+	if (addrs) {
+		cgctx = jit_data->ctx;
+		image = jit_data->image;
+		bpf_hdr = jit_data->header;
+		proglen = jit_data->proglen;
+		alloclen = proglen + FUNCTION_DESCR_SIZE;
+		extra_pass = true;
+		goto skip_init_ctx;
+	}
+
 	addrs = kzalloc((flen+1) * sizeof(*addrs), GFP_KERNEL);
 	if (addrs == NULL) {
 		fp = org_fp;
-		goto out;
+		goto out_addrs;
 	}
 
 	memset(&cgctx, 0, sizeof(struct codegen_context));
@@ -916,10 +961,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 	cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
 
 	/* Scouting faux-generate pass 0 */
-	if (bpf_jit_build_body(fp, 0, &cgctx, addrs)) {
+	if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) {
 		/* We hit something illegal or unsupported. */
 		fp = org_fp;
-		goto out;
+		goto out_addrs;
 	}
 
 	/*
@@ -937,9 +982,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 			bpf_jit_fill_ill_insns);
 	if (!bpf_hdr) {
 		fp = org_fp;
-		goto out;
+		goto out_addrs;
 	}
 
+skip_init_ctx:
 	code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);
 
 	/* Code generation passes 1-2 */
@@ -947,7 +993,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 		/* Now build the prologue, body code & epilogue for real. */
 		cgctx.idx = 0;
 		bpf_jit_build_prologue(code_base, &cgctx);
-		bpf_jit_build_body(fp, code_base, &cgctx, addrs);
+		bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass);
 		bpf_jit_build_epilogue(code_base, &cgctx);
 
 		if (bpf_jit_enable > 1)
@@ -973,10 +1019,20 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 	fp->jited_len = alloclen;
 
 	bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE));
+	if (!fp->is_func || extra_pass) {
+out_addrs:
+		kfree(addrs);
+		kfree(jit_data);
+		fp->aux->jit_data = NULL;
+	} else {
+		jit_data->addrs = addrs;
+		jit_data->ctx = cgctx;
+		jit_data->proglen = proglen;
+		jit_data->image = image;
+		jit_data->header = bpf_hdr;
+	}
 
 out:
-	kfree(addrs);
-
 	if (bpf_blinded)
 		bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp);
 
-- 
2.14.3

^ permalink raw reply related

* [PATCH bpf-next v3 05/10] tools: bpf: sync bpf uapi header
From: Sandipan Das @ 2018-05-22 17:16 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev, linuxppc-dev, mpe, naveen.n.rao, jakub.kicinski
In-Reply-To: <cover.1527008646.git.sandipan@linux.vnet.ibm.com>

Syncing the bpf.h uapi header with tools so that struct
bpf_prog_info has the two new fields for passing on the
addresses of the kernel symbols corresponding to each
function in a program.

Signed-off-by: Sandipan Das <sandipan@linux.vnet.ibm.com>
---
v3:
 - Move new fields to the end of bpf_prog_info to avoid
   breaking userspace.
---
 tools/include/uapi/linux/bpf.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 97446bbe2ca5..c44105f27da9 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2205,6 +2205,8 @@ struct bpf_prog_info {
 	__u32 gpl_compatible:1;
 	__u64 netns_dev;
 	__u64 netns_ino;
+	__u32 nr_jited_ksyms;
+	__aligned_u64 jited_ksyms;
 } __attribute__((aligned(8)));
 
 struct bpf_map_info {
-- 
2.14.3

^ permalink raw reply related

* [PATCH bpf-next v3 06/10] tools: bpftool: resolve calls without using imm field
From: Sandipan Das @ 2018-05-22 17:16 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev, linuxppc-dev, mpe, naveen.n.rao, jakub.kicinski
In-Reply-To: <cover.1527008646.git.sandipan@linux.vnet.ibm.com>

Currently, we resolve the callee's address for a JITed function
call by using the imm field of the call instruction as an offset
from __bpf_call_base. If bpf_jit_kallsyms is enabled, we further
use this address to get the callee's kernel symbol's name.

For some architectures, such as powerpc64, the imm field is not
large enough to hold this offset. So, instead of assigning this
offset to the imm field, the verifier now assigns the subprog
id. Also, a list of kernel symbol addresses for all the JITed
functions is provided in the program info. We now use the imm
field as an index for this list to lookup a callee's symbol's
address and resolve its name.

Suggested-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Sandipan Das <sandipan@linux.vnet.ibm.com>
---
v3:
 - Avoid using redundant pointers.
 - Fix indentation.

v2:
 - Order variables from longest to shortest.
 - Make sure that ksyms_ptr and ksyms_len are always initialized.
 - Simplify code.
---
 tools/bpf/bpftool/prog.c          | 24 ++++++++++++++++++++++++
 tools/bpf/bpftool/xlated_dumper.c | 10 +++++++++-
 tools/bpf/bpftool/xlated_dumper.h |  2 ++
 3 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 9bdfdf2d3fbe..e05ab58d39e2 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -420,7 +420,9 @@ static int do_show(int argc, char **argv)
 
 static int do_dump(int argc, char **argv)
 {
+	unsigned long *func_ksyms = NULL;
 	struct bpf_prog_info info = {};
+	unsigned int nr_func_ksyms;
 	struct dump_data dd = {};
 	__u32 len = sizeof(info);
 	unsigned int buf_size;
@@ -496,10 +498,22 @@ static int do_dump(int argc, char **argv)
 		return -1;
 	}
 
+	nr_func_ksyms = info.nr_jited_ksyms;
+	if (nr_func_ksyms) {
+		func_ksyms = malloc(nr_func_ksyms * sizeof(__u64));
+		if (!func_ksyms) {
+			p_err("mem alloc failed");
+			close(fd);
+			goto err_free;
+		}
+	}
+
 	memset(&info, 0, sizeof(info));
 
 	*member_ptr = ptr_to_u64(buf);
 	*member_len = buf_size;
+	info.jited_ksyms = ptr_to_u64(func_ksyms);
+	info.nr_jited_ksyms = nr_func_ksyms;
 
 	err = bpf_obj_get_info_by_fd(fd, &info, &len);
 	close(fd);
@@ -513,6 +527,11 @@ static int do_dump(int argc, char **argv)
 		goto err_free;
 	}
 
+	if (info.nr_jited_ksyms > nr_func_ksyms) {
+		p_err("too many addresses returned");
+		goto err_free;
+	}
+
 	if ((member_len == &info.jited_prog_len &&
 	     info.jited_prog_insns == 0) ||
 	    (member_len == &info.xlated_prog_len &&
@@ -558,6 +577,9 @@ static int do_dump(int argc, char **argv)
 			dump_xlated_cfg(buf, *member_len);
 	} else {
 		kernel_syms_load(&dd);
+		dd.nr_jited_ksyms = info.nr_jited_ksyms;
+		dd.jited_ksyms = (__u64 *) info.jited_ksyms;
+
 		if (json_output)
 			dump_xlated_json(&dd, buf, *member_len, opcodes);
 		else
@@ -566,10 +588,12 @@ static int do_dump(int argc, char **argv)
 	}
 
 	free(buf);
+	free(func_ksyms);
 	return 0;
 
 err_free:
 	free(buf);
+	free(func_ksyms);
 	return -1;
 }
 
diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c
index 7a3173b76c16..efdc8fecf2bb 100644
--- a/tools/bpf/bpftool/xlated_dumper.c
+++ b/tools/bpf/bpftool/xlated_dumper.c
@@ -174,7 +174,11 @@ static const char *print_call_pcrel(struct dump_data *dd,
 				    unsigned long address,
 				    const struct bpf_insn *insn)
 {
-	if (sym)
+	if (!dd->nr_jited_ksyms)
+		/* Do not show address for interpreted programs */
+		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+			"%+d", insn->off);
+	else if (sym)
 		snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
 			 "%+d#%s", insn->off, sym->name);
 	else
@@ -203,6 +207,10 @@ static const char *print_call(void *private_data,
 	unsigned long address = dd->address_call_base + insn->imm;
 	struct kernel_sym *sym;
 
+	if (insn->src_reg == BPF_PSEUDO_CALL &&
+	    (__u32) insn->imm < dd->nr_jited_ksyms)
+		address = dd->jited_ksyms[insn->imm];
+
 	sym = kernel_syms_search(dd, address);
 	if (insn->src_reg == BPF_PSEUDO_CALL)
 		return print_call_pcrel(dd, sym, address, insn);
diff --git a/tools/bpf/bpftool/xlated_dumper.h b/tools/bpf/bpftool/xlated_dumper.h
index b34affa7ef2d..eafbb49c8d0b 100644
--- a/tools/bpf/bpftool/xlated_dumper.h
+++ b/tools/bpf/bpftool/xlated_dumper.h
@@ -49,6 +49,8 @@ struct dump_data {
 	unsigned long address_call_base;
 	struct kernel_sym *sym_mapping;
 	__u32 sym_count;
+	__u64 *jited_ksyms;
+	__u32 nr_jited_ksyms;
 	char scratch_buff[SYM_MAX_NAME + 8];
 };
 
-- 
2.14.3

^ permalink raw reply related

* [PATCH bpf-next v3 04/10] bpf: get kernel symbol addresses via syscall
From: Sandipan Das @ 2018-05-22 17:16 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev, linuxppc-dev, mpe, naveen.n.rao, jakub.kicinski
In-Reply-To: <cover.1527008646.git.sandipan@linux.vnet.ibm.com>

This adds new two new fields to struct bpf_prog_info. For
multi-function programs, these fields can be used to pass
a list of kernel symbol addresses for all functions in a
given program to userspace using the bpf system call with
the BPF_OBJ_GET_INFO_BY_FD command.

When bpf_jit_kallsyms is enabled, we can get the address
of the corresponding kernel symbol for a callee function
and resolve the symbol's name. The address is determined
by adding the value of the call instruction's imm field
to __bpf_call_base. This offset gets assigned to the imm
field by the verifier.

For some architectures, such as powerpc64, the imm field
is not large enough to hold this offset.

We resolve this by:

[1] Assigning the subprog id to the imm field of a call
    instruction in the verifier instead of the offset of
    the callee's symbol's address from __bpf_call_base.

[2] Determining the address of a callee's corresponding
    symbol by using the imm field as an index for the
    list of kernel symbol addresses now available from
    the program info.

Suggested-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Sandipan Das <sandipan@linux.vnet.ibm.com>
---
v3:
 - Copy addresses to jited_ksyms only if bpf_dump_raw_ok()
   is true.
 - Move new fields to the end of bpf_prog_info to avoid
   breaking userspace.
---
 include/uapi/linux/bpf.h |  2 ++
 kernel/bpf/syscall.c     | 25 +++++++++++++++++++++++++
 kernel/bpf/verifier.c    |  7 +------
 3 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 97446bbe2ca5..c44105f27da9 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2205,6 +2205,8 @@ struct bpf_prog_info {
 	__u32 gpl_compatible:1;
 	__u64 netns_dev;
 	__u64 netns_ino;
+	__u32 nr_jited_ksyms;
+	__aligned_u64 jited_ksyms;
 } __attribute__((aligned(8)));
 
 struct bpf_map_info {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index bfcde949c7f8..f0ad4b5f0224 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1933,6 +1933,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
 	if (!capable(CAP_SYS_ADMIN)) {
 		info.jited_prog_len = 0;
 		info.xlated_prog_len = 0;
+		info.nr_jited_ksyms = 0;
 		goto done;
 	}
 
@@ -1981,6 +1982,30 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
 		}
 	}
 
+	ulen = info.nr_jited_ksyms;
+	info.nr_jited_ksyms = prog->aux->func_cnt;
+	if (info.nr_jited_ksyms && ulen) {
+		if (bpf_dump_raw_ok()) {
+			u64 __user *user_ksyms;
+			ulong ksym_addr;
+			u32 i;
+
+			/* copy the address of the kernel symbol
+			 * corresponding to each function
+			 */
+			ulen = min_t(u32, info.nr_jited_ksyms, ulen);
+			user_ksyms = u64_to_user_ptr(info.jited_ksyms);
+			for (i = 0; i < ulen; i++) {
+				ksym_addr = (ulong) prog->aux->func[i]->bpf_func;
+				ksym_addr &= PAGE_MASK;
+				if (put_user((u64) ksym_addr, &user_ksyms[i]))
+					return -EFAULT;
+			}
+		} else {
+			info.jited_ksyms = 0;
+		}
+	}
+
 done:
 	if (copy_to_user(uinfo, &info, info_len) ||
 	    put_user(info_len, &uattr->info.info_len))
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 559cb74ba29e..8c4d9d0fd3ab 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5426,17 +5426,12 @@ static int jit_subprogs(struct bpf_verifier_env *env)
 	 * later look the same as if they were interpreted only.
 	 */
 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
-		unsigned long addr;
-
 		if (insn->code != (BPF_JMP | BPF_CALL) ||
 		    insn->src_reg != BPF_PSEUDO_CALL)
 			continue;
 		insn->off = env->insn_aux_data[i].call_imm;
 		subprog = find_subprog(env, i + insn->off + 1);
-		addr  = (unsigned long)func[subprog]->bpf_func;
-		addr &= PAGE_MASK;
-		insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
-			    addr - __bpf_call_base;
+		insn->imm = subprog;
 	}
 
 	prog->jited = 1;
-- 
2.14.3

^ permalink raw reply related

* [PATCH bpf-next v3 07/10] bpf: fix multi-function JITed dump obtained via syscall
From: Sandipan Das @ 2018-05-22 17:16 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev, linuxppc-dev, mpe, naveen.n.rao, jakub.kicinski
In-Reply-To: <cover.1527008646.git.sandipan@linux.vnet.ibm.com>

Currently, for multi-function programs, we cannot get the JITed
instructions using the bpf system call's BPF_OBJ_GET_INFO_BY_FD
command. Because of this, userspace tools such as bpftool fail
to identify a multi-function program as being JITed or not.

With the JIT enabled and the test program running, this can be
verified as follows:

  # cat /proc/sys/net/core/bpf_jit_enable
  1

Before applying this patch:

  # bpftool prog list
  1: kprobe  name foo  tag b811aab41a39ad3d  gpl
          loaded_at 2018-05-16T11:43:38+0530  uid 0
          xlated 216B  not jited  memlock 65536B
  ...

  # bpftool prog dump jited id 1
  no instructions returned

After applying this patch:

  # bpftool prog list
  1: kprobe  name foo  tag b811aab41a39ad3d  gpl
          loaded_at 2018-05-16T12:13:01+0530  uid 0
          xlated 216B  jited 308B  memlock 65536B
  ...

  # bpftool prog dump jited id 1
     0:   nop
     4:   nop
     8:   mflr    r0
     c:   std     r0,16(r1)
    10:   stdu    r1,-112(r1)
    14:   std     r31,104(r1)
    18:   addi    r31,r1,48
    1c:   li      r3,10
  ...

Signed-off-by: Sandipan Das <sandipan@linux.vnet.ibm.com>
---
 kernel/bpf/syscall.c | 36 +++++++++++++++++++++++++++++++++---
 1 file changed, 33 insertions(+), 3 deletions(-)

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index f0ad4b5f0224..1c4cba91e523 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1970,13 +1970,43 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
 	 * for offload.
 	 */
 	ulen = info.jited_prog_len;
-	info.jited_prog_len = prog->jited_len;
+	if (prog->aux->func_cnt) {
+		u32 i;
+
+		info.jited_prog_len = 0;
+		for (i = 0; i < prog->aux->func_cnt; i++)
+			info.jited_prog_len += prog->aux->func[i]->jited_len;
+	} else {
+		info.jited_prog_len = prog->jited_len;
+	}
+
 	if (info.jited_prog_len && ulen) {
 		if (bpf_dump_raw_ok()) {
 			uinsns = u64_to_user_ptr(info.jited_prog_insns);
 			ulen = min_t(u32, info.jited_prog_len, ulen);
-			if (copy_to_user(uinsns, prog->bpf_func, ulen))
-				return -EFAULT;
+
+			/* for multi-function programs, copy the JITed
+			 * instructions for all the functions
+			 */
+			if (prog->aux->func_cnt) {
+				u32 len, free, i;
+				u8 *img;
+
+				free = ulen;
+				for (i = 0; i < prog->aux->func_cnt; i++) {
+					len = prog->aux->func[i]->jited_len;
+					img = (u8 *) prog->aux->func[i]->bpf_func;
+					if (len > free)
+						break;
+					if (copy_to_user(uinsns, img, len))
+						return -EFAULT;
+					uinsns += len;
+					free -= len;
+				}
+			} else {
+				if (copy_to_user(uinsns, prog->bpf_func, ulen))
+					return -EFAULT;
+			}
 		} else {
 			info.jited_prog_insns = 0;
 		}
-- 
2.14.3

^ permalink raw reply related

* [PATCH bpf-next v3 08/10] bpf: get JITed image lengths of functions via syscall
From: Sandipan Das @ 2018-05-22 17:16 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev, linuxppc-dev, mpe, naveen.n.rao, jakub.kicinski
In-Reply-To: <cover.1527008646.git.sandipan@linux.vnet.ibm.com>

This adds new two new fields to struct bpf_prog_info. For
multi-function programs, these fields can be used to pass
a list of the JITed image lengths of each function for a
given program to userspace using the bpf system call with
the BPF_OBJ_GET_INFO_BY_FD command.

This can be used by userspace applications like bpftool
to split up the contiguous JITed dump, also obtained via
the system call, into more relatable chunks corresponding
to each function.

Signed-off-by: Sandipan Das <sandipan@linux.vnet.ibm.com>
---
 include/uapi/linux/bpf.h |  2 ++
 kernel/bpf/syscall.c     | 20 ++++++++++++++++++++
 2 files changed, 22 insertions(+)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index c44105f27da9..8c3109b5d6d3 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2206,7 +2206,9 @@ struct bpf_prog_info {
 	__u64 netns_dev;
 	__u64 netns_ino;
 	__u32 nr_jited_ksyms;
+	__u32 nr_jited_func_lens;
 	__aligned_u64 jited_ksyms;
+	__aligned_u64 jited_func_lens;
 } __attribute__((aligned(8)));
 
 struct bpf_map_info {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 1c4cba91e523..faadbcd90191 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2036,6 +2036,26 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
 		}
 	}
 
+	ulen = info.nr_jited_func_lens;
+	info.nr_jited_func_lens = prog->aux->func_cnt;
+	if (info.nr_jited_func_lens && ulen) {
+		if (bpf_dump_raw_ok()) {
+			u32 __user *user_lens;
+			u32 func_len, i;
+
+			/* copy the JITed image lengths for each function */
+			ulen = min_t(u32, info.nr_jited_func_lens, ulen);
+			user_lens = u64_to_user_ptr(info.jited_func_lens);
+			for (i = 0; i < ulen; i++) {
+				func_len = prog->aux->func[i]->jited_len;
+				if (put_user(func_len, &user_lens[i]))
+					return -EFAULT;
+			}
+		} else {
+			info.jited_func_lens = 0;
+		}
+	}
+
 done:
 	if (copy_to_user(uinfo, &info, info_len) ||
 	    put_user(info_len, &uattr->info.info_len))
-- 
2.14.3

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox