From: Glenn Miles <milesg@linux.vnet.ibm.com>
To: Nicholas Piggin <npiggin@gmail.com>
Cc: qemu-devel@nongnu.org,
"Daniel Henrique Barboza" <danielhb413@gmail.com>,
"Cédric Le Goater" <clg@kaod.org>,
"David Gibson" <david@gibson.dropbear.id.au>,
"Greg Kurz" <groug@kaod.org>,
"open list:PowerPC TCG CPUs" <qemu-ppc@nongnu.org>
Subject: Re: [PATCH 2/4] target/ppc: Add recording of taken branches to BHRB
Date: Tue, 19 Sep 2023 16:35:59 -0500 [thread overview]
Message-ID: <955aa084c1e56559943f17514d7bed73@linux.vnet.ibm.com> (raw)
In-Reply-To: <CVJ33XKZ3RO0.22BRA1VHY26PJ@wheely>
On 2023-09-14 20:02, Nicholas Piggin wrote:
> On Wed Sep 13, 2023 at 6:24 AM AEST, Glenn Miles wrote:
>> This commit continues adding support for the Branch History
>> Rolling Buffer (BHRB) as is provided starting with the P8
>> processor and continuing with its successors. This commit
>> is limited to the recording and filtering of taken branches.
>>
>> The following changes were made:
>>
>> - Added a BHRB buffer for storing branch instruction and
>> target addresses for taken branches
>> - Renamed gen_update_cfar to gen_update_branch_history and
>> added a 'target' parameter to hold the branch target
>> address and 'inst_type' parameter to use for filtering
>> - Added a combination of jit-time and run-time checks to
>> gen_update_branch_history for determining if a branch
>> should be recorded
>> - Added TCG code to gen_update_branch_history that stores
>> data to the BHRB and updates the BHRB offset.
>> - Added BHRB resource initialization and reset functions
>> - Enabled functionality for P8, P9 and P10 processors.
>>
>> Signed-off-by: Glenn Miles <milesg@linux.vnet.ibm.com>
>> ---
>> target/ppc/cpu.h | 18 +++-
>> target/ppc/cpu_init.c | 41 ++++++++-
>> target/ppc/helper_regs.c | 32 +++++++
>> target/ppc/helper_regs.h | 1 +
>> target/ppc/power8-pmu.c | 2 +
>> target/ppc/power8-pmu.h | 7 ++
>> target/ppc/translate.c | 114
>> +++++++++++++++++++++++--
>> target/ppc/translate/branch-impl.c.inc | 2 +-
>> 8 files changed, 205 insertions(+), 12 deletions(-)
>>
>> diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
>> index 20ae1466a5..bda1afb700 100644
>> --- a/target/ppc/cpu.h
>> +++ b/target/ppc/cpu.h
>> @@ -454,8 +454,9 @@ FIELD(MSR, LE, MSR_LE, 1)
>> #define MMCR2_UREG_MASK (MMCR2_FC1P0 | MMCR2_FC2P0 | MMCR2_FC3P0 | \
>> MMCR2_FC4P0 | MMCR2_FC5P0 | MMCR2_FC6P0)
>>
>> -#define MMCRA_BHRBRD PPC_BIT(26) /* BHRB Recording
>> Disable */
>> -
>> +#define MMCRA_BHRBRD PPC_BIT(26) /* BHRB Recording Disable
>> */
>
> Fold this tidying into patch 1.
Done.
>
>> +#define MMCRA_IFM_MASK PPC_BITMASK(32, 33) /* BHRB Instruction
>> Filtering */
>> +#define MMCRA_IFM_SHIFT PPC_BIT_NR(33)
>>
>> #define MMCR1_EVT_SIZE 8
>> /* extract64() does a right shift before extracting */
>> @@ -682,6 +683,8 @@ enum {
>> POWERPC_FLAG_SMT = 0x00400000,
>> /* Using "LPAR per core" mode (as opposed to per-thread)
>> */
>> POWERPC_FLAG_SMT_1LPAR = 0x00800000,
>> + /* Has BHRB */
>> + POWERPC_FLAG_BHRB = 0x01000000,
>> };
>
> Interesting question of which patch to add different flags. I'm
> strongly in add when you add code that uses them like this one,
> but it's a matter of taste and not always practical to be an
> absolute rule. I don't mind too much what others do, but maybe
> this and the pcc->flags init should go in patch 1 since that's adding
> flags that aren't yet used?
>
I think I prefer keeping it in patch 2 since patch 1 is more about
the hflags, which seems unrelated to this flag.
>>
>> /*
>> @@ -1110,6 +1113,9 @@ DEXCR_ASPECT(PHIE, 6)
>> #define PPC_CPU_OPCODES_LEN 0x40
>> #define PPC_CPU_INDIRECT_OPCODES_LEN 0x20
>>
>> +#define BHRB_MAX_NUM_ENTRIES_LOG2 (5)
>> +#define BHRB_MAX_NUM_ENTRIES (1 << BHRB_MAX_NUM_ENTRIES_LOG2)
>> +
>> struct CPUArchState {
>> /* Most commonly used resources during translated code execution
>> first */
>> target_ulong gpr[32]; /* general purpose registers */
>> @@ -1196,6 +1202,14 @@ struct CPUArchState {
>> int dcache_line_size;
>> int icache_line_size;
>>
>> + /* Branch History Rolling Buffer (BHRB) resources */
>> + target_ulong bhrb_num_entries;
>> + target_ulong bhrb_base;
>> + target_ulong bhrb_filter;
>> + target_ulong bhrb_offset;
>> + target_ulong bhrb_offset_mask;
>> + uint64_t bhrb[BHRB_MAX_NUM_ENTRIES];
>
> Put these under ifdef TARGET_PPC64?
>
Ok.
>> +
>> /* These resources are used during exception processing */
>> /* CPU model definition */
>> target_ulong msr_mask;
>> diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
>> index 568f9c3b88..19d7505a73 100644
>> --- a/target/ppc/cpu_init.c
>> +++ b/target/ppc/cpu_init.c
>> @@ -6100,6 +6100,28 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, void
>> *data)
>> pcc->l1_icache_size = 0x8000;
>> }
>>
>> +static void bhrb_init_state(CPUPPCState *env, target_long
>> num_entries_log2)
>> +{
>> + if (env->flags & POWERPC_FLAG_BHRB) {
>> + if (num_entries_log2 > BHRB_MAX_NUM_ENTRIES_LOG2) {
>> + num_entries_log2 = BHRB_MAX_NUM_ENTRIES_LOG2;
>> + }
>> + env->bhrb_num_entries = 1 << num_entries_log2;
>> + env->bhrb_base = (target_long)&env->bhrb[0];
>> + env->bhrb_offset_mask = (env->bhrb_num_entries *
>> sizeof(uint64_t)) - 1;
>> + }
>> +}
>> +
>> +static void bhrb_reset_state(CPUPPCState *env)
>> +{
>> + if (env->flags & POWERPC_FLAG_BHRB) {
>> + env->bhrb_offset = 0;
>> + env->bhrb_filter = 0;
>> + memset(env->bhrb, 0, sizeof(env->bhrb));
>> + }
>> +}
>> +
>> +#define POWER8_BHRB_ENTRIES_LOG2 5
>> static void init_proc_POWER8(CPUPPCState *env)
>> {
>> /* Common Registers */
>> @@ -6141,6 +6163,8 @@ static void init_proc_POWER8(CPUPPCState *env)
>> env->dcache_line_size = 128;
>> env->icache_line_size = 128;
>>
>> + bhrb_init_state(env, POWER8_BHRB_ENTRIES_LOG2);
>> +
>> /* Allocate hardware IRQ controller */
>> init_excp_POWER8(env);
>> ppcPOWER7_irq_init(env_archcpu(env));
>> @@ -6241,7 +6265,8 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void
>> *data)
>> pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
>> POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
>> POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR |
>> - POWERPC_FLAG_VSX | POWERPC_FLAG_TM;
>> + POWERPC_FLAG_VSX | POWERPC_FLAG_TM |
>> + POWERPC_FLAG_BHRB;
>> pcc->l1_dcache_size = 0x8000;
>> pcc->l1_icache_size = 0x8000;
>> }
>> @@ -6265,6 +6290,7 @@ static struct ppc_radix_page_info
>> POWER9_radix_page_info = {
>> };
>> #endif /* CONFIG_USER_ONLY */
>>
>> +#define POWER9_BHRB_ENTRIES_LOG2 5
>> static void init_proc_POWER9(CPUPPCState *env)
>> {
>> /* Common Registers */
>> @@ -6315,6 +6341,8 @@ static void init_proc_POWER9(CPUPPCState *env)
>> env->dcache_line_size = 128;
>> env->icache_line_size = 128;
>>
>> + bhrb_init_state(env, POWER9_BHRB_ENTRIES_LOG2);
>> +
>> /* Allocate hardware IRQ controller */
>> init_excp_POWER9(env);
>> ppcPOWER9_irq_init(env_archcpu(env));
>> @@ -6434,7 +6462,8 @@ POWERPC_FAMILY(POWER9)(ObjectClass *oc, void
>> *data)
>> pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
>> POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
>> POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR |
>> - POWERPC_FLAG_VSX | POWERPC_FLAG_TM |
>> POWERPC_FLAG_SCV;
>> + POWERPC_FLAG_VSX | POWERPC_FLAG_TM |
>> POWERPC_FLAG_SCV |
>> + POWERPC_FLAG_BHRB;
>> pcc->l1_dcache_size = 0x8000;
>> pcc->l1_icache_size = 0x8000;
>> }
>> @@ -6458,6 +6487,7 @@ static struct ppc_radix_page_info
>> POWER10_radix_page_info = {
>> };
>> #endif /* !CONFIG_USER_ONLY */
>>
>> +#define POWER10_BHRB_ENTRIES_LOG2 5
>> static void init_proc_POWER10(CPUPPCState *env)
>> {
>> /* Common Registers */
>> @@ -6505,6 +6535,8 @@ static void init_proc_POWER10(CPUPPCState *env)
>> env->dcache_line_size = 128;
>> env->icache_line_size = 128;
>>
>> + bhrb_init_state(env, POWER10_BHRB_ENTRIES_LOG2);
>> +
>> /* Allocate hardware IRQ controller */
>> init_excp_POWER10(env);
>> ppcPOWER9_irq_init(env_archcpu(env));
>> @@ -6610,7 +6642,8 @@ POWERPC_FAMILY(POWER10)(ObjectClass *oc, void
>> *data)
>> pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
>> POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
>> POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR |
>> - POWERPC_FLAG_VSX | POWERPC_FLAG_TM |
>> POWERPC_FLAG_SCV;
>> + POWERPC_FLAG_VSX | POWERPC_FLAG_TM |
>> POWERPC_FLAG_SCV |
>> + POWERPC_FLAG_BHRB;
>> pcc->l1_dcache_size = 0x8000;
>> pcc->l1_icache_size = 0x8000;
>> }
>> @@ -7178,6 +7211,8 @@ static void ppc_cpu_reset_hold(Object *obj)
>> }
>> env->spr[i] = spr->default_value;
>> }
>> +
>> + bhrb_reset_state(env);
>> }
>>
>> #ifndef CONFIG_USER_ONLY
>> diff --git a/target/ppc/helper_regs.c b/target/ppc/helper_regs.c
>> index 4ff054063d..68b27196d9 100644
>> --- a/target/ppc/helper_regs.c
>> +++ b/target/ppc/helper_regs.c
>> @@ -752,3 +752,35 @@ void register_usprgh_sprs(CPUPPCState *env)
>> &spr_read_ureg, SPR_NOACCESS,
>> 0x00000000);
>> }
>> +
>> +void hreg_bhrb_filter_update(CPUPPCState *env)
>> +{
>> + target_long ifm;
>> +
>> + if (!(env->spr[SPR_POWER_MMCR0] & MMCR0_PMAE)) {
>> + /* disable recording to BHRB */
>> + env->bhrb_filter = BHRB_TYPE_NORECORD;
>> + return;
>> + }
>> +
>> + ifm = (env->spr[SPR_POWER_MMCRA] & MMCRA_IFM_MASK) >>
>> MMCRA_IFM_SHIFT;
>> + switch (ifm) {
>> + case 0:
>> + /* record all branches */
>> + env->bhrb_filter = -1;
>> + break;
>> + case 1:
>> + /* only record calls (LK = 1) */
>> + env->bhrb_filter = BHRB_TYPE_CALL;
>> + break;
>> + case 2:
>> + /* only record indirect branches */
>> + env->bhrb_filter = BHRB_TYPE_INDIRECT;
>> + break;
>> + case 3:
>> + /* only record conditional branches */
>> + env->bhrb_filter = BHRB_TYPE_COND;
>> + break;
>> + }
>> +}
>> +
>> diff --git a/target/ppc/helper_regs.h b/target/ppc/helper_regs.h
>> index 8196c1346d..ab6aba1c24 100644
>> --- a/target/ppc/helper_regs.h
>> +++ b/target/ppc/helper_regs.h
>> @@ -25,6 +25,7 @@ void hreg_compute_hflags(CPUPPCState *env);
>> void hreg_update_pmu_hflags(CPUPPCState *env);
>> void cpu_interrupt_exittb(CPUState *cs);
>> int hreg_store_msr(CPUPPCState *env, target_ulong value, int
>> alter_hv);
>> +void hreg_bhrb_filter_update(CPUPPCState *env);
>>
>> #ifdef CONFIG_USER_ONLY
>> static inline void check_tlb_flush(CPUPPCState *env, bool global) { }
>> diff --git a/target/ppc/power8-pmu.c b/target/ppc/power8-pmu.c
>> index 6f5d4e1256..18149a15b2 100644
>> --- a/target/ppc/power8-pmu.c
>> +++ b/target/ppc/power8-pmu.c
>> @@ -261,6 +261,7 @@ void helper_store_mmcr0(CPUPPCState *env,
>> target_ulong value)
>> env->spr[SPR_POWER_MMCR0] = value;
>>
>> pmu_mmcr01a_updated(env);
>> + hreg_bhrb_filter_update(env);
>>
>> /* Update cycle overflow timers with the current MMCR0 state */
>> pmu_update_overflow_timers(env);
>> @@ -280,6 +281,7 @@ void helper_store_mmcrA(CPUPPCState *env, uint64_t
>> value)
>> env->spr[SPR_POWER_MMCRA] = value;
>>
>> pmu_mmcr01a_updated(env);
>> + hreg_bhrb_filter_update(env);
>> }
>>
>
> Can these just be moved into pmu_mmcr01a_updated? AFAIKS they only
> depend on MMCR0/A.
>
Done.
>> target_ulong helper_read_pmc(CPUPPCState *env, uint32_t sprn)
>> diff --git a/target/ppc/power8-pmu.h b/target/ppc/power8-pmu.h
>> index 87fa8c9334..a887094045 100644
>> --- a/target/ppc/power8-pmu.h
>> +++ b/target/ppc/power8-pmu.h
>> @@ -17,6 +17,13 @@
>>
>> #define PMC_COUNTER_NEGATIVE_VAL 0x80000000UL
>>
>> +#define BHRB_TYPE_NORECORD 0x00
>> +#define BHRB_TYPE_CALL 0x01
>> +#define BHRB_TYPE_INDIRECT 0x02
>> +#define BHRB_TYPE_COND 0x04
>> +#define BHRB_TYPE_OTHER 0x08
>> +#define BHRB_TYPE_XL_FORM 0x10
>> +
>> void cpu_ppc_pmu_init(CPUPPCState *env);
>> void pmu_mmcr01a_updated(CPUPPCState *env);
>> #else
>> diff --git a/target/ppc/translate.c b/target/ppc/translate.c
>> index d93fbd4574..7824475f54 100644
>> --- a/target/ppc/translate.c
>> +++ b/target/ppc/translate.c
>> @@ -177,6 +177,7 @@ struct DisasContext {
>> #if defined(TARGET_PPC64)
>> bool sf_mode;
>> bool has_cfar;
>> + bool has_bhrb;
>> #endif
>> bool fpu_enabled;
>> bool altivec_enabled;
>> @@ -4104,12 +4105,100 @@ static void gen_rvwinkle(DisasContext *ctx)
>> }
>> #endif /* #if defined(TARGET_PPC64) */
>>
>> -static inline void gen_update_cfar(DisasContext *ctx, target_ulong
>> nip)
>> +
>> +static inline void gen_update_branch_history(DisasContext *ctx,
>> + target_ulong nip,
>> + TCGv target,
>> + target_long inst_type)
>> {
>> #if defined(TARGET_PPC64)
>> + TCGv t0;
>> + TCGv t1;
>> + TCGv t2;
>> + TCGv t3;
>> + TCGLabel *no_update;
>> +
>> if (ctx->has_cfar) {
>> tcg_gen_movi_tl(cpu_cfar, nip);
>> }
>> +
>> + if (!ctx->has_bhrb || inst_type == BHRB_TYPE_NORECORD) {
>> + return;
>> + }
>> +
>> + /* ISA 3.1 adds the PMCRA[BRHBRD] and problem state checks */
>> + if ((ctx->insns_flags2 & PPC2_ISA310) && (ctx->mmcra_bhrbrd ||
>> !ctx->pr)) {
>> + return;
>> + }
>> +
>> + /* Check for BHRB "frozen" conditions */
>> + if (ctx->mmcr0_fcpc) {
>> + if (ctx->mmcr0_fcp) {
>> + if ((ctx->hv) && (ctx->pr)) {
>> + return;
>> + }
>> + } else if (!(ctx->hv) && (ctx->pr)) {
>> + return;
>> + }
>> + } else if ((ctx->mmcr0_fcp) && (ctx->pr)) {
>> + return;
>> + }
>> +
>> + t0 = tcg_temp_new();
>> + t1 = tcg_temp_new();
>> + t2 = tcg_temp_new();
>> + t3 = tcg_temp_new();
>> + no_update = gen_new_label();
>> +
>> + /* check for bhrb filtering */
>> + tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUPPCState, bhrb_filter));
>> + tcg_gen_andi_tl(t0, t0, inst_type);
>> + tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, no_update);
>> +
>> + /* load bhrb base address into t2 */
>> + tcg_gen_ld_tl(t2, cpu_env, offsetof(CPUPPCState, bhrb_base));
>> +
>> + /* load current bhrb_offset into t0 */
>> + tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUPPCState, bhrb_offset));
>> +
>> + /* load a BHRB offset mask into t3 */
>> + tcg_gen_ld_tl(t3, cpu_env, offsetof(CPUPPCState,
>> bhrb_offset_mask));
>> +
>> + /* add t2 to t0 to get address of bhrb entry */
>> + tcg_gen_add_tl(t1, t0, t2);
>> +
>> + /* store nip into bhrb at bhrb_offset */
>> + tcg_gen_st_i64(tcg_constant_i64(nip), (TCGv_ptr)t1, 0);
>> +
>> + /* add 8 to current bhrb_offset */
>> + tcg_gen_addi_tl(t0, t0, 8);
>> +
>> + /* apply offset mask */
>> + tcg_gen_and_tl(t0, t0, t3);
>
> For some cases where the value remains live for a time and not reused,
> maybe use a more descriptive name? I know existing code is pretty lazy
> with names (and I share some blame). t0,t2,t3 could be base, offset,
> mask respectively, and t1 could be tmp that is used for filter and
> address generation. Just a suggestion?
>
Yeah, I agree. That is normally what I would have done, but changed
my mind after seeing how everyone else seemed to be doing it :)
Changing for readability.
>
>> +
>> + if (inst_type & BHRB_TYPE_XL_FORM) {
>> + /* Also record the target address for XL-Form branches */
>> +
>> + /* add t2 to t0 to get address of bhrb entry */
>> + tcg_gen_add_tl(t1, t0, t2);
>> +
>> + /* Set the 'T' bit for target entries */
>> + tcg_gen_ori_tl(t2, target, 0x2);
>> +
>> + /* Store target address in bhrb */
>> + tcg_gen_st_tl(t2, (TCGv_ptr)t1, 0);
>> +
>> + /* add 8 to current bhrb_offset */
>> + tcg_gen_addi_tl(t0, t0, 8);
>> +
>> + /* apply offset mask */
>> + tcg_gen_and_tl(t0, t0, t3);
>> + }
>
> I would say this is bordering on warranting a new function since it
> mostly duplicates previous block. You could have it take base, mask,
> offset and value to store, and have it return new offset.
>
I agree. Will make the changes.
>> +
>> + /* save updated bhrb_offset for next time */
>> + tcg_gen_st_tl(t0, cpu_env, offsetof(CPUPPCState, bhrb_offset));
>> +
>> + gen_set_label(no_update);
>> #endif
>> }
>
> It all looks pretty good otherwise. I do worry about POWER8/9 which
> do not have BHRB disable bit. How much do they slow down I wonder?
That is a good question! I'll see if I can get some linux boot times
with and without the changes on P9.
>
> Thanks,
> Nick
Thanks for taking the time to review my code!
Glenn
next prev parent reply other threads:[~2023-09-19 21:39 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-09-12 19:21 [PATCH 0/4] Add BHRB Facility Support Glenn Miles
2023-09-12 20:23 ` [PATCH 1/4] target/ppc: Add new hflags to support BHRB Glenn Miles
2023-09-15 0:39 ` Nicholas Piggin
2023-09-19 21:19 ` Glenn Miles
2023-09-12 20:24 ` [PATCH 2/4] target/ppc: Add recording of taken branches to BHRB Glenn Miles
2023-09-15 1:02 ` Nicholas Piggin
2023-09-19 21:35 ` Glenn Miles [this message]
2023-09-21 15:45 ` Glenn Miles
2023-09-12 20:24 ` [PATCH 3/4] target/ppc: Add clrbhrb and mfbhrbe instructions Glenn Miles
2023-09-15 1:13 ` Nicholas Piggin
2023-09-19 21:40 ` Glenn Miles
2023-09-12 20:25 ` [PATCH 4/4] target/ppc: Add migration support for BHRB Glenn Miles
2023-09-15 1:20 ` Nicholas Piggin
2023-09-19 22:01 ` Glenn Miles
2023-09-12 20:57 ` [PATCH 3/4] target/ppc: Add clrbhrb and mfbhrbe instructions Glenn Miles
2023-09-12 21:05 ` [PATCH 4/4] target/ppc: Add migration support for BHRB Glenn Miles
2023-09-12 21:27 ` Glenn Miles
-- strict thread matches above, loose matches on Subject: below --
2023-09-11 21:23 [PATCH 0/4] Add BHRB Facility Support Glenn Miles
2023-09-11 21:23 ` [PATCH 2/4] target/ppc: Add recording of taken branches to BHRB Glenn Miles
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=955aa084c1e56559943f17514d7bed73@linux.vnet.ibm.com \
--to=milesg@linux.vnet.ibm.com \
--cc=clg@kaod.org \
--cc=danielhb413@gmail.com \
--cc=david@gibson.dropbear.id.au \
--cc=groug@kaod.org \
--cc=npiggin@gmail.com \
--cc=qemu-devel@nongnu.org \
--cc=qemu-ppc@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).