public inbox for linux-snps-arc@lists.infradead.org
 help / color / mirror / Atom feed
From: Sergey Matyukevich <geomatsi@gmail.com>
To: linux-snps-arc@lists.infradead.org
Cc: Vineet Gupta <vgupta@kernel.org>,
	Vladimir Isaev <isaev@synopsys.com>,
	Sergey Matyukevich <geomatsi@gmail.com>,
	Sergey Matyukevich <sergey.matyukevich@synopsys.com>
Subject: [RFC PATCH 04/13] ARC: uaccess: elide ZOL, use double load/stores
Date: Tue, 22 Feb 2022 17:14:57 +0300	[thread overview]
Message-ID: <20220222141506.4003433-5-geomatsi@gmail.com> (raw)
In-Reply-To: <20220222141506.4003433-1-geomatsi@gmail.com>

From: Vineet Gupta <vgupta@kernel.org>

Upcoming ARCv3 lacks ZOL support, so provide alternative
uaccess implementations based on 64-bit memory operations.

Signed-off-by: Vineet Gupta <vgupta@kernel.org>
---
 arch/arc/include/asm/asm-macro-ll64-emul.h |  28 ++++
 arch/arc/include/asm/asm-macro-ll64.h      |  20 +++
 arch/arc/include/asm/assembler.h           |  12 ++
 arch/arc/include/asm/uaccess.h             |  12 ++
 arch/arc/lib/Makefile                      |   2 +
 arch/arc/lib/uaccess.S                     | 144 +++++++++++++++++++++
 6 files changed, 218 insertions(+)
 create mode 100644 arch/arc/include/asm/asm-macro-ll64-emul.h
 create mode 100644 arch/arc/include/asm/asm-macro-ll64.h
 create mode 100644 arch/arc/lib/uaccess.S

diff --git a/arch/arc/include/asm/asm-macro-ll64-emul.h b/arch/arc/include/asm/asm-macro-ll64-emul.h
new file mode 100644
index 000000000000..886320cc74ad
--- /dev/null
+++ b/arch/arc/include/asm/asm-macro-ll64-emul.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+/*
+ * Abstraction for 64-bit load/store:
+ *   - Emulate 64-bit access with two 32-bit load/stores.
+ *   - In the non-emulated case, output register pair r<N>:r<N+1>
+ *     so macro takes only 1 output arg and determines the 2nd.
+ */
+
+.macro ST64.ab d, s, incr
+	st.ab	\d, [\s, \incr / 2]
+	.ifeqs	"\d", "r4"
+		st.ab	r5, [\s, \incr / 2]
+	.endif
+	.ifeqs	"\d", "r6"
+		st.ab	r7, [\s, \incr / 2]
+	.endif
+.endm
+
+.macro LD64.ab d, s, incr
+	ld.ab	\d, [\s, \incr / 2]
+	.ifeqs	"\d", "r4"
+		ld.ab	r5, [\s, \incr / 2]
+	.endif
+	.ifeqs	"\d", "r6"
+		ld.ab	r7, [\s, \incr / 2]
+	.endif
+.endm
diff --git a/arch/arc/include/asm/asm-macro-ll64.h b/arch/arc/include/asm/asm-macro-ll64.h
new file mode 100644
index 000000000000..89e05c923a26
--- /dev/null
+++ b/arch/arc/include/asm/asm-macro-ll64.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+/*
+ * Abstraction for 64-bit load/store:
+ *   - Single instruction to double load/store
+ *   - output register pair r<N>:r<N+1> but only
+ *     first register needs to be specified
+ */
+
+.irp xx,,.ab
+.macro ST64\xx d, s, off=0
+	std\xx	\d, [\s, \off]
+.endm
+.endr
+
+.irp xx,,.ab
+.macro LD64\xx d, s, off=0
+	ldd\xx	\d, [\s, \off]
+.endm
+.endr
diff --git a/arch/arc/include/asm/assembler.h b/arch/arc/include/asm/assembler.h
index 426488ef27d4..1d69390c22ba 100644
--- a/arch/arc/include/asm/assembler.h
+++ b/arch/arc/include/asm/assembler.h
@@ -5,6 +5,12 @@
 
 #ifdef __ASSEMBLY__
 
+#ifdef CONFIG_ARC_HAS_LL64
+#include <asm/asm-macro-ll64.h>
+#else
+#include <asm/asm-macro-ll64-emul.h>
+#endif
+
 #ifdef CONFIG_ARC_LACKS_ZOL
 #include <asm/asm-macro-dbnz.h>
 #else
@@ -13,6 +19,12 @@
 
 #else	/* !__ASSEMBLY__ */
 
+#ifdef CONFIG_ARC_HAS_LL64
+asm(".include \"asm/asm-macro-ll64.h\"\n");
+#else
+asm(".include \"asm/asm-macro-ll64-emul.h\"\n");
+#endif
+
 /*
  * ARCv2 cores have both LPcc and DBNZ instructions (starting 3.5a release).
  * But in this context, LP present implies DBNZ not available (ARCompact ISA)
diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h
index 9b009e64e79c..f5b97d977c1b 100644
--- a/arch/arc/include/asm/uaccess.h
+++ b/arch/arc/include/asm/uaccess.h
@@ -163,6 +163,7 @@
 	: "+r" (ret)				\
 	: "r" (src), "r" (dst), "ir" (-EFAULT))
 
+#ifndef CONFIG_ARC_LACKS_ZOL
 
 static inline unsigned long
 raw_copy_from_user(void *to, const void __user *from, unsigned long n)
@@ -660,6 +661,17 @@ static inline unsigned long __clear_user(void __user *to, unsigned long n)
 #define INLINE_COPY_TO_USER
 #define INLINE_COPY_FROM_USER
 
+#else
+
+extern unsigned long raw_copy_from_user(void *to, const void __user *from,
+					  unsigned long n);
+extern unsigned long raw_copy_to_user(void *to, const void __user *from,
+					unsigned long n);
+
+extern unsigned long __clear_user(void __user *to, unsigned long n);
+
+#endif
+
 #define __clear_user		__clear_user
 
 #include <asm/segment.h>
diff --git a/arch/arc/lib/Makefile b/arch/arc/lib/Makefile
index 30158ae69fd4..87d18f5013dc 100644
--- a/arch/arc/lib/Makefile
+++ b/arch/arc/lib/Makefile
@@ -13,3 +13,5 @@ lib-$(CONFIG_ISA_ARCV2)		+=memcpy-archs-unaligned.o
 else
 lib-$(CONFIG_ISA_ARCV2)		+=memcpy-archs.o
 endif
+
+lib-$(CONFIG_ARC_LACKS_ZOL)	+= uaccess.o
diff --git a/arch/arc/lib/uaccess.S b/arch/arc/lib/uaccess.S
new file mode 100644
index 000000000000..5093160a72d3
--- /dev/null
+++ b/arch/arc/lib/uaccess.S
@@ -0,0 +1,144 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * uaccess for ARCv3: avoids ZOL, uses 64-bit memory ops
+ *   ASSUMES unaligned access
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+#ifndef CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS
+#error "Unaligned access support needed"
+#endif
+
+; Input
+;  - r0: dest, kernel
+;  - r1: src, user
+;  - r2: sz
+; Output
+;  - r0: Num bytes left to copy, 0 on success
+
+ENTRY_CFI(raw_copy_from_user)
+
+	add    r8, r0, r2
+
+	lsr.f  r3, r2, 4
+	bz     .L1dobytes
+
+	; chunks of 16 bytes
+10:	LD64.ab r4, r1, 8
+11:	LD64.ab r6, r1, 8
+	ST64.ab r4, r0, 8
+	ST64.ab r6, r0, 8
+	DBNZR  r3, 10b
+
+.L1dobytes:
+	; last 1-15 bytes
+	and.f  r3, r2, 0xf
+	bz     .L1done
+
+12:	ldb.ab r4, [r1, 1]
+	stb.ab r4, [r0, 1]
+	DBNZR  r3, 12b
+
+.L1done:
+	; bytes not copied = orig_src + sz - curr_src
+	j.d    [blink]
+	sub    r0, r8, r0
+END_CFI(raw_copy_from_user)
+
+.section __ex_table, "a"
+	.word 10b, .L1done
+	.word 11b, .L1done
+	.word 12b, .L1done
+.previous
+
+; Input
+;  - r0: dest, user
+;  - r1: src, kernel
+;  - r2: sz
+; Output
+;  - r0: Num bytes left to copy, 0 on success
+
+ENTRY_CFI(raw_copy_to_user)
+
+	add    r8, r1, r2
+
+	lsr.f  r3, r2, 4
+	bz     .L2dobytes
+
+	; chunks of 16 bytes
+2:	LD64.ab r4, r1, 8
+	LD64.ab r6, r1, 8
+20:	ST64.ab r4, r0, 8
+21:	ST64.ab r6, r0, 8
+	DBNZR  r3, 2b
+
+.L2dobytes:
+	; last 1-15 bytes
+	and.f  r3, r2, 0xf
+	bz     .L2done
+
+2:	ldb.ab r4, [r1, 1]
+22:	stb.ab r4, [r0, 1]
+	DBNZR  r3, 2b
+
+.L2done:
+	; bytes not copied = orig_src + sz - curr_src
+	j.d    [blink]
+	sub    r0, r8, r1
+
+END_CFI(raw_copy_to_user)
+
+.section __ex_table, "a"
+	.word 20b, .L2done
+	.word 21b, .L2done
+	.word 22b, .L2done
+.previous
+
+ENTRY_CFI(__clear_user)
+	add    r8, r0, r1
+
+	mov    r4, 0
+	mov    r5, 0
+
+	lsr.f  r3, r1, 4
+	bz     .L3dobytes
+
+	; chunks of 16 bytes
+30:	ST64.ab r4, r0, 8
+31:	ST64.ab r4, r0, 8
+	DBNZR  r3, 30b
+
+.L3dobytes:
+	; last 1-15 bytes
+	and.f  r3, r1, 0xf
+	bz     .L3done
+
+32:	stb.ab r4, [r0, 1]
+	DBNZR  r3, 32b
+
+.L3done:
+	; bytes not copied = orig_src + sz - curr_src
+	j.d    [blink]
+	sub    r0, r8, r0
+
+END_CFI(__clear_user)
+
+; Note that .fixup section is missing and that is not an omission
+;
+; .fixup is a level of indirection for user fault handling to do some extra work
+; before jumping off to a safe instruction (past the faulting LD/ST) in uaccess
+; code. This could be say setting up -EFAULT in return register for caller.
+; But if that is not needed (such as above where number of bytes copied/not-copied
+; is already in return reg r0) and fault handler only needs to resume to a valid PC
+; that label could be placed in __ex_table entry (otherwise be in .fixup)
+; do_page_fault() -> fixup_exception() use that to setup pt_regs->ret, which the
+; CPU exception handler resumes to. This also makes the handling more efficient
+; by removing a level of indirection.
+
+.section __ex_table, "a"
+	.word 30b, .L3done
+	.word 31b, .L3done
+	.word 32b, .L3done
+.previous
-- 
2.25.1


_______________________________________________
linux-snps-arc mailing list
linux-snps-arc@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-snps-arc

  parent reply	other threads:[~2022-02-22 14:15 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-02-22 14:14 [RFC PATCH 00/13] ARC: handle the lack of ZOL support Sergey Matyukevich
2022-02-22 14:14 ` [RFC PATCH 01/13] ARC: uaccess: elide unaligned handling if hardware supports Sergey Matyukevich
2022-02-22 14:14 ` [RFC PATCH 02/13] ARC: Kconfig: introduce option to disable ZOL Sergey Matyukevich
2022-02-22 14:14 ` [RFC PATCH 03/13] ARC: uaccess: drop CC_OPTIMIZE_FOR_SIZE Sergey Matyukevich
2022-02-22 14:14 ` Sergey Matyukevich [this message]
2022-02-22 14:14 ` [RFC PATCH 05/13] ARCv2: memset: don't prefetch for len == 0 which happens a lot Sergey Matyukevich
2022-02-22 14:14 ` [RFC PATCH 06/13] ARCv2: memset: elide unaligned handling if hardware supports Sergey Matyukevich
2022-02-22 14:15 ` [RFC PATCH 07/13] ARCv2: memset: rewrite using double load/stores Sergey Matyukevich
2022-02-22 14:15 ` [RFC PATCH 08/13] ARC: string: use generic C code if no ZOL support Sergey Matyukevich
2022-02-22 14:15 ` [RFC PATCH 09/13] ARC: delay: elide ZOL Sergey Matyukevich
2022-02-22 14:15 ` [RFC PATCH 10/13] ARC: checksum: " Sergey Matyukevich
2022-02-22 14:15 ` [RFC PATCH 11/13] ARC: head: " Sergey Matyukevich
2022-02-22 14:15 ` [RFC PATCH 12/13] ARC: build: inhibit ZOL generation by compiler Sergey Matyukevich
2022-02-22 14:15 ` [RFC PATCH 13/13] ARC: pt_regs: handle the case when ZOL is not supported Sergey Matyukevich
2022-02-28  2:09 ` [RFC PATCH 00/13] ARC: handle the lack of ZOL support Vineet Gupta
2022-03-03 19:22   ` Sergey Matyukevich
2022-03-23 10:09   ` [RFC PATCH 00/13] ARC: handle the lack of ZOL supporty Sergey Matyukevich

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220222141506.4003433-5-geomatsi@gmail.com \
    --to=geomatsi@gmail.com \
    --cc=isaev@synopsys.com \
    --cc=linux-snps-arc@lists.infradead.org \
    --cc=sergey.matyukevich@synopsys.com \
    --cc=vgupta@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox