From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
To: linux-crypto@vger.kernel.org
Cc: Mark Rutland <mark.rutland@arm.com>,
herbert@gondor.apana.org.au,
Ard Biesheuvel <ard.biesheuvel@linaro.org>,
Peter Zijlstra <peterz@infradead.org>,
Catalin Marinas <catalin.marinas@arm.com>,
Sebastian Andrzej Siewior <bigeasy@linutronix.de>,
Will Deacon <will.deacon@arm.com>,
Russell King - ARM Linux <linux@armlinux.org.uk>,
Steven Rostedt <rostedt@goodmis.org>,
Thomas Gleixner <tglx@linutronix.de>,
Dave Martin <Dave.Martin@arm.com>,
linux-arm-kernel@lists.infradead.org,
linux-rt-users@vger.kernel.org
Subject: [PATCH v5 06/23] crypto: arm64/aes-blk - remove configurable interleave
Date: Sat, 10 Mar 2018 15:21:51 +0000 [thread overview]
Message-ID: <20180310152208.10369-7-ard.biesheuvel@linaro.org> (raw)
In-Reply-To: <20180310152208.10369-1-ard.biesheuvel@linaro.org>
The AES block mode implementation using Crypto Extensions or plain NEON
was written before real hardware existed, and so its interleave factor
was made build time configurable (as well as an option to instantiate
all interleaved sequences inline rather than as subroutines)
We ended up using INTERLEAVE=4 with inlining disabled for both flavors
of the core AES routines, so let's stick with that, and remove the option
to configure this at build time. This makes the code easier to modify,
which is nice now that we're adding yield support.
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
arch/arm64/crypto/Makefile | 3 -
arch/arm64/crypto/aes-modes.S | 237 ++++----------------
2 files changed, 40 insertions(+), 200 deletions(-)
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index cee9b8d9830b..b6b624602582 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -59,9 +59,6 @@ aes-arm64-y := aes-cipher-core.o aes-cipher-glue.o
obj-$(CONFIG_CRYPTO_AES_ARM64_BS) += aes-neon-bs.o
aes-neon-bs-y := aes-neonbs-core.o aes-neonbs-glue.o
-AFLAGS_aes-ce.o := -DINTERLEAVE=4
-AFLAGS_aes-neon.o := -DINTERLEAVE=4
-
CFLAGS_aes-glue-ce.o := -DUSE_V8_CRYPTO_EXTENSIONS
$(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE
diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S
index 65b273667b34..27a235b2ddee 100644
--- a/arch/arm64/crypto/aes-modes.S
+++ b/arch/arm64/crypto/aes-modes.S
@@ -13,44 +13,6 @@
.text
.align 4
-/*
- * There are several ways to instantiate this code:
- * - no interleave, all inline
- * - 2-way interleave, 2x calls out of line (-DINTERLEAVE=2)
- * - 2-way interleave, all inline (-DINTERLEAVE=2 -DINTERLEAVE_INLINE)
- * - 4-way interleave, 4x calls out of line (-DINTERLEAVE=4)
- * - 4-way interleave, all inline (-DINTERLEAVE=4 -DINTERLEAVE_INLINE)
- *
- * Macros imported by this code:
- * - enc_prepare - setup NEON registers for encryption
- * - dec_prepare - setup NEON registers for decryption
- * - enc_switch_key - change to new key after having prepared for encryption
- * - encrypt_block - encrypt a single block
- * - decrypt block - decrypt a single block
- * - encrypt_block2x - encrypt 2 blocks in parallel (if INTERLEAVE == 2)
- * - decrypt_block2x - decrypt 2 blocks in parallel (if INTERLEAVE == 2)
- * - encrypt_block4x - encrypt 4 blocks in parallel (if INTERLEAVE == 4)
- * - decrypt_block4x - decrypt 4 blocks in parallel (if INTERLEAVE == 4)
- */
-
-#if defined(INTERLEAVE) && !defined(INTERLEAVE_INLINE)
-#define FRAME_PUSH stp x29, x30, [sp,#-16]! ; mov x29, sp
-#define FRAME_POP ldp x29, x30, [sp],#16
-
-#if INTERLEAVE == 2
-
-aes_encrypt_block2x:
- encrypt_block2x v0, v1, w3, x2, x8, w7
- ret
-ENDPROC(aes_encrypt_block2x)
-
-aes_decrypt_block2x:
- decrypt_block2x v0, v1, w3, x2, x8, w7
- ret
-ENDPROC(aes_decrypt_block2x)
-
-#elif INTERLEAVE == 4
-
aes_encrypt_block4x:
encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
ret
@@ -61,48 +23,6 @@ aes_decrypt_block4x:
ret
ENDPROC(aes_decrypt_block4x)
-#else
-#error INTERLEAVE should equal 2 or 4
-#endif
-
- .macro do_encrypt_block2x
- bl aes_encrypt_block2x
- .endm
-
- .macro do_decrypt_block2x
- bl aes_decrypt_block2x
- .endm
-
- .macro do_encrypt_block4x
- bl aes_encrypt_block4x
- .endm
-
- .macro do_decrypt_block4x
- bl aes_decrypt_block4x
- .endm
-
-#else
-#define FRAME_PUSH
-#define FRAME_POP
-
- .macro do_encrypt_block2x
- encrypt_block2x v0, v1, w3, x2, x8, w7
- .endm
-
- .macro do_decrypt_block2x
- decrypt_block2x v0, v1, w3, x2, x8, w7
- .endm
-
- .macro do_encrypt_block4x
- encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
- .endm
-
- .macro do_decrypt_block4x
- decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
- .endm
-
-#endif
-
/*
* aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
* int blocks)
@@ -111,28 +31,21 @@ ENDPROC(aes_decrypt_block4x)
*/
AES_ENTRY(aes_ecb_encrypt)
- FRAME_PUSH
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
enc_prepare w3, x2, x5
.LecbencloopNx:
-#if INTERLEAVE >= 2
- subs w4, w4, #INTERLEAVE
+ subs w4, w4, #4
bmi .Lecbenc1x
-#if INTERLEAVE == 2
- ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 pt blocks */
- do_encrypt_block2x
- st1 {v0.16b-v1.16b}, [x0], #32
-#else
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
- do_encrypt_block4x
+ bl aes_encrypt_block4x
st1 {v0.16b-v3.16b}, [x0], #64
-#endif
b .LecbencloopNx
.Lecbenc1x:
- adds w4, w4, #INTERLEAVE
+ adds w4, w4, #4
beq .Lecbencout
-#endif
.Lecbencloop:
ld1 {v0.16b}, [x1], #16 /* get next pt block */
encrypt_block v0, w3, x2, x5, w6
@@ -140,34 +53,27 @@ AES_ENTRY(aes_ecb_encrypt)
subs w4, w4, #1
bne .Lecbencloop
.Lecbencout:
- FRAME_POP
+ ldp x29, x30, [sp], #16
ret
AES_ENDPROC(aes_ecb_encrypt)
AES_ENTRY(aes_ecb_decrypt)
- FRAME_PUSH
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
dec_prepare w3, x2, x5
.LecbdecloopNx:
-#if INTERLEAVE >= 2
- subs w4, w4, #INTERLEAVE
+ subs w4, w4, #4
bmi .Lecbdec1x
-#if INTERLEAVE == 2
- ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */
- do_decrypt_block2x
- st1 {v0.16b-v1.16b}, [x0], #32
-#else
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
- do_decrypt_block4x
+ bl aes_decrypt_block4x
st1 {v0.16b-v3.16b}, [x0], #64
-#endif
b .LecbdecloopNx
.Lecbdec1x:
- adds w4, w4, #INTERLEAVE
+ adds w4, w4, #4
beq .Lecbdecout
-#endif
.Lecbdecloop:
ld1 {v0.16b}, [x1], #16 /* get next ct block */
decrypt_block v0, w3, x2, x5, w6
@@ -175,7 +81,7 @@ AES_ENTRY(aes_ecb_decrypt)
subs w4, w4, #1
bne .Lecbdecloop
.Lecbdecout:
- FRAME_POP
+ ldp x29, x30, [sp], #16
ret
AES_ENDPROC(aes_ecb_decrypt)
@@ -204,30 +110,20 @@ AES_ENDPROC(aes_cbc_encrypt)
AES_ENTRY(aes_cbc_decrypt)
- FRAME_PUSH
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
ld1 {v7.16b}, [x5] /* get iv */
dec_prepare w3, x2, x6
.LcbcdecloopNx:
-#if INTERLEAVE >= 2
- subs w4, w4, #INTERLEAVE
+ subs w4, w4, #4
bmi .Lcbcdec1x
-#if INTERLEAVE == 2
- ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */
- mov v2.16b, v0.16b
- mov v3.16b, v1.16b
- do_decrypt_block2x
- eor v0.16b, v0.16b, v7.16b
- eor v1.16b, v1.16b, v2.16b
- mov v7.16b, v3.16b
- st1 {v0.16b-v1.16b}, [x0], #32
-#else
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
mov v4.16b, v0.16b
mov v5.16b, v1.16b
mov v6.16b, v2.16b
- do_decrypt_block4x
+ bl aes_decrypt_block4x
sub x1, x1, #16
eor v0.16b, v0.16b, v7.16b
eor v1.16b, v1.16b, v4.16b
@@ -235,12 +131,10 @@ AES_ENTRY(aes_cbc_decrypt)
eor v2.16b, v2.16b, v5.16b
eor v3.16b, v3.16b, v6.16b
st1 {v0.16b-v3.16b}, [x0], #64
-#endif
b .LcbcdecloopNx
.Lcbcdec1x:
- adds w4, w4, #INTERLEAVE
+ adds w4, w4, #4
beq .Lcbcdecout
-#endif
.Lcbcdecloop:
ld1 {v1.16b}, [x1], #16 /* get next ct block */
mov v0.16b, v1.16b /* ...and copy to v0 */
@@ -251,8 +145,8 @@ AES_ENTRY(aes_cbc_decrypt)
subs w4, w4, #1
bne .Lcbcdecloop
.Lcbcdecout:
- FRAME_POP
st1 {v7.16b}, [x5] /* return iv */
+ ldp x29, x30, [sp], #16
ret
AES_ENDPROC(aes_cbc_decrypt)
@@ -263,34 +157,19 @@ AES_ENDPROC(aes_cbc_decrypt)
*/
AES_ENTRY(aes_ctr_encrypt)
- FRAME_PUSH
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
enc_prepare w3, x2, x6
ld1 {v4.16b}, [x5]
umov x6, v4.d[1] /* keep swabbed ctr in reg */
rev x6, x6
-#if INTERLEAVE >= 2
cmn w6, w4 /* 32 bit overflow? */
bcs .Lctrloop
.LctrloopNx:
- subs w4, w4, #INTERLEAVE
+ subs w4, w4, #4
bmi .Lctr1x
-#if INTERLEAVE == 2
- mov v0.8b, v4.8b
- mov v1.8b, v4.8b
- rev x7, x6
- add x6, x6, #1
- ins v0.d[1], x7
- rev x7, x6
- add x6, x6, #1
- ins v1.d[1], x7
- ld1 {v2.16b-v3.16b}, [x1], #32 /* get 2 input blocks */
- do_encrypt_block2x
- eor v0.16b, v0.16b, v2.16b
- eor v1.16b, v1.16b, v3.16b
- st1 {v0.16b-v1.16b}, [x0], #32
-#else
ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */
dup v7.4s, w6
mov v0.16b, v4.16b
@@ -303,23 +182,21 @@ AES_ENTRY(aes_ctr_encrypt)
mov v2.s[3], v8.s[1]
mov v3.s[3], v8.s[2]
ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */
- do_encrypt_block4x
+ bl aes_encrypt_block4x
eor v0.16b, v5.16b, v0.16b
ld1 {v5.16b}, [x1], #16 /* get 1 input block */
eor v1.16b, v6.16b, v1.16b
eor v2.16b, v7.16b, v2.16b
eor v3.16b, v5.16b, v3.16b
st1 {v0.16b-v3.16b}, [x0], #64
- add x6, x6, #INTERLEAVE
-#endif
+ add x6, x6, #4
rev x7, x6
ins v4.d[1], x7
cbz w4, .Lctrout
b .LctrloopNx
.Lctr1x:
- adds w4, w4, #INTERLEAVE
+ adds w4, w4, #4
beq .Lctrout
-#endif
.Lctrloop:
mov v0.16b, v4.16b
encrypt_block v0, w3, x2, x8, w7
@@ -339,12 +216,12 @@ AES_ENTRY(aes_ctr_encrypt)
.Lctrout:
st1 {v4.16b}, [x5] /* return next CTR value */
- FRAME_POP
+ ldp x29, x30, [sp], #16
ret
.Lctrtailblock:
st1 {v0.16b}, [x0]
- FRAME_POP
+ ldp x29, x30, [sp], #16
ret
.Lctrcarry:
@@ -378,7 +255,9 @@ CPU_LE( .quad 1, 0x87 )
CPU_BE( .quad 0x87, 1 )
AES_ENTRY(aes_xts_encrypt)
- FRAME_PUSH
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+
ld1 {v4.16b}, [x6]
cbz w7, .Lxtsencnotfirst
@@ -394,25 +273,8 @@ AES_ENTRY(aes_xts_encrypt)
ldr q7, .Lxts_mul_x
next_tweak v4, v4, v7, v8
.LxtsencNx:
-#if INTERLEAVE >= 2
- subs w4, w4, #INTERLEAVE
+ subs w4, w4, #4
bmi .Lxtsenc1x
-#if INTERLEAVE == 2
- ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 pt blocks */
- next_tweak v5, v4, v7, v8
- eor v0.16b, v0.16b, v4.16b
- eor v1.16b, v1.16b, v5.16b
- do_encrypt_block2x
- eor v0.16b, v0.16b, v4.16b
- eor v1.16b, v1.16b, v5.16b
- st1 {v0.16b-v1.16b}, [x0], #32
- cbz w4, .LxtsencoutNx
- next_tweak v4, v5, v7, v8
- b .LxtsencNx
-.LxtsencoutNx:
- mov v4.16b, v5.16b
- b .Lxtsencout
-#else
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
next_tweak v5, v4, v7, v8
eor v0.16b, v0.16b, v4.16b
@@ -421,7 +283,7 @@ AES_ENTRY(aes_xts_encrypt)
eor v2.16b, v2.16b, v6.16b
next_tweak v7, v6, v7, v8
eor v3.16b, v3.16b, v7.16b
- do_encrypt_block4x
+ bl aes_encrypt_block4x
eor v3.16b, v3.16b, v7.16b
eor v0.16b, v0.16b, v4.16b
eor v1.16b, v1.16b, v5.16b
@@ -430,11 +292,9 @@ AES_ENTRY(aes_xts_encrypt)
mov v4.16b, v7.16b
cbz w4, .Lxtsencout
b .LxtsencloopNx
-#endif
.Lxtsenc1x:
- adds w4, w4, #INTERLEAVE
+ adds w4, w4, #4
beq .Lxtsencout
-#endif
.Lxtsencloop:
ld1 {v1.16b}, [x1], #16
eor v0.16b, v1.16b, v4.16b
@@ -447,13 +307,15 @@ AES_ENTRY(aes_xts_encrypt)
b .Lxtsencloop
.Lxtsencout:
st1 {v4.16b}, [x6]
- FRAME_POP
+ ldp x29, x30, [sp], #16
ret
AES_ENDPROC(aes_xts_encrypt)
AES_ENTRY(aes_xts_decrypt)
- FRAME_PUSH
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+
ld1 {v4.16b}, [x6]
cbz w7, .Lxtsdecnotfirst
@@ -469,25 +331,8 @@ AES_ENTRY(aes_xts_decrypt)
ldr q7, .Lxts_mul_x
next_tweak v4, v4, v7, v8
.LxtsdecNx:
-#if INTERLEAVE >= 2
- subs w4, w4, #INTERLEAVE
+ subs w4, w4, #4
bmi .Lxtsdec1x
-#if INTERLEAVE == 2
- ld1 {v0.16b-v1.16b}, [x1], #32 /* get 2 ct blocks */
- next_tweak v5, v4, v7, v8
- eor v0.16b, v0.16b, v4.16b
- eor v1.16b, v1.16b, v5.16b
- do_decrypt_block2x
- eor v0.16b, v0.16b, v4.16b
- eor v1.16b, v1.16b, v5.16b
- st1 {v0.16b-v1.16b}, [x0], #32
- cbz w4, .LxtsdecoutNx
- next_tweak v4, v5, v7, v8
- b .LxtsdecNx
-.LxtsdecoutNx:
- mov v4.16b, v5.16b
- b .Lxtsdecout
-#else
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
next_tweak v5, v4, v7, v8
eor v0.16b, v0.16b, v4.16b
@@ -496,7 +341,7 @@ AES_ENTRY(aes_xts_decrypt)
eor v2.16b, v2.16b, v6.16b
next_tweak v7, v6, v7, v8
eor v3.16b, v3.16b, v7.16b
- do_decrypt_block4x
+ bl aes_decrypt_block4x
eor v3.16b, v3.16b, v7.16b
eor v0.16b, v0.16b, v4.16b
eor v1.16b, v1.16b, v5.16b
@@ -505,11 +350,9 @@ AES_ENTRY(aes_xts_decrypt)
mov v4.16b, v7.16b
cbz w4, .Lxtsdecout
b .LxtsdecloopNx
-#endif
.Lxtsdec1x:
- adds w4, w4, #INTERLEAVE
+ adds w4, w4, #4
beq .Lxtsdecout
-#endif
.Lxtsdecloop:
ld1 {v1.16b}, [x1], #16
eor v0.16b, v1.16b, v4.16b
@@ -522,7 +365,7 @@ AES_ENTRY(aes_xts_decrypt)
b .Lxtsdecloop
.Lxtsdecout:
st1 {v4.16b}, [x6]
- FRAME_POP
+ ldp x29, x30, [sp], #16
ret
AES_ENDPROC(aes_xts_decrypt)
--
2.15.1
next prev parent reply other threads:[~2018-03-10 15:21 UTC|newest]
Thread overview: 29+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-03-10 15:21 [PATCH v5 00/23] crypto: arm64 - play nice with CONFIG_PREEMPT Ard Biesheuvel
2018-03-10 15:21 ` [PATCH v5 01/23] crypto: testmgr - add a new test case for CRC-T10DIF Ard Biesheuvel
2018-03-10 15:21 ` [PATCH v5 02/23] crypto: arm64/aes-ce-ccm - move kernel mode neon en/disable into loop Ard Biesheuvel
2018-03-10 15:21 ` [PATCH v5 03/23] crypto: arm64/aes-blk " Ard Biesheuvel
2018-03-10 15:21 ` [PATCH v5 04/23] crypto: arm64/aes-bs " Ard Biesheuvel
2018-03-10 15:21 ` [PATCH v5 05/23] crypto: arm64/chacha20 " Ard Biesheuvel
2018-03-10 15:21 ` Ard Biesheuvel [this message]
2018-03-10 15:21 ` [PATCH v5 07/23] crypto: arm64/aes-blk - add 4 way interleave to CBC encrypt path Ard Biesheuvel
2018-03-10 15:21 ` [PATCH v5 08/23] crypto: arm64/aes-blk - add 4 way interleave to CBC-MAC " Ard Biesheuvel
2018-03-10 15:21 ` [PATCH v5 09/23] crypto: arm64/sha256-neon - play nice with CONFIG_PREEMPT kernels Ard Biesheuvel
2018-03-10 15:21 ` [PATCH v5 10/23] arm64: assembler: add utility macros to push/pop stack frames Ard Biesheuvel
2018-03-10 15:21 ` [PATCH v5 11/23] arm64: assembler: add macros to conditionally yield the NEON under PREEMPT Ard Biesheuvel
2018-03-10 15:21 ` [PATCH v5 12/23] crypto: arm64/sha1-ce - yield NEON after every block of input Ard Biesheuvel
2018-03-10 15:21 ` [PATCH v5 13/23] crypto: arm64/sha2-ce " Ard Biesheuvel
2018-03-10 15:21 ` [PATCH v5 14/23] crypto: arm64/aes-ccm " Ard Biesheuvel
2018-03-10 15:22 ` [PATCH v5 15/23] crypto: arm64/aes-blk " Ard Biesheuvel
2018-03-10 15:22 ` [PATCH v5 16/23] crypto: arm64/aes-bs " Ard Biesheuvel
2018-03-10 15:22 ` [PATCH v5 17/23] crypto: arm64/aes-ghash " Ard Biesheuvel
2018-03-10 15:22 ` [PATCH v5 18/23] crypto: arm64/crc32-ce " Ard Biesheuvel
2018-03-10 15:22 ` [PATCH v5 19/23] crypto: arm64/crct10dif-ce " Ard Biesheuvel
2018-03-10 15:22 ` [PATCH v5 20/23] crypto: arm64/sha3-ce " Ard Biesheuvel
2018-03-10 15:22 ` [PATCH v5 21/23] crypto: arm64/sha512-ce " Ard Biesheuvel
2018-03-10 15:22 ` [PATCH v5 22/23] crypto: arm64/sm3-ce " Ard Biesheuvel
2018-03-10 15:22 ` [PATCH v5 23/23] DO NOT MERGE Ard Biesheuvel
2018-03-11 5:16 ` [PATCH v5 00/23] crypto: arm64 - play nice with CONFIG_PREEMPT Vakul Garg
2018-03-11 8:55 ` Ard Biesheuvel
2018-03-16 15:57 ` Herbert Xu
2018-03-19 15:31 ` Ard Biesheuvel
2018-03-19 23:36 ` Herbert Xu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180310152208.10369-7-ard.biesheuvel@linaro.org \
--to=ard.biesheuvel@linaro.org \
--cc=Dave.Martin@arm.com \
--cc=bigeasy@linutronix.de \
--cc=catalin.marinas@arm.com \
--cc=herbert@gondor.apana.org.au \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-crypto@vger.kernel.org \
--cc=linux-rt-users@vger.kernel.org \
--cc=linux@armlinux.org.uk \
--cc=mark.rutland@arm.com \
--cc=peterz@infradead.org \
--cc=rostedt@goodmis.org \
--cc=tglx@linutronix.de \
--cc=will.deacon@arm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).