public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Andy Lutomirski <luto@kernel.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	LKML <linux-kernel@vger.kernel.org>, X86 ML <x86@kernel.org>
Subject: Re: [RFC][PATCH 0/4] x86/entry: disallow #DB more
Date: Mon, 25 May 2020 13:01:01 +0200	[thread overview]
Message-ID: <20200525110101.GG325303@hirez.programming.kicks-ass.net> (raw)
In-Reply-To: <20200525104038.GC317569@hirez.programming.kicks-ass.net>

On Mon, May 25, 2020 at 12:40:38PM +0200, Peter Zijlstra wrote:
> On Mon, May 25, 2020 at 12:02:48PM +0200, Rasmus Villemoes wrote:
> 
> > Naive question: did you check disassembly to see whether gcc threw your
> > native_get_debugreg() away, given that the asm isn't volatile and the
> > result is not used for anything? Testing here only shows a "mov
> > %r9,%db7", but the read did seem to get thrown away.
> 
> Argh.. no I did not. Writing it all in asm gets me:
> 
> [    1.627405] XXX: 3900 8304 22632
> 
> which is a lot worse...

+	u64 empty = 0, read = 0, write = 0, cpu = 0, cpu1 = 0;
+	unsigned long dr7;
+
+	for (i=0; i<100; i++) {
+		u64 s;
+
+		s = rdtsc();
+		asm volatile ("lfence; lfence;");
+		empty += rdtsc() - s;
+
+		s = rdtsc();
+		asm volatile ("lfence; mov %%db7, %0; lfence;" : "=r" (dr7));
+		read += rdtsc() - s;
+
+		s = rdtsc();
+		asm volatile ("lfence; mov %0, %%db7; lfence;" :: "r" (dr7));
+		write += rdtsc() - s;
+
+		s = rdtsc();
+		asm volatile ("lfence; mov %0, %%db7; lfence;" :: "r" (dr7));
+		write += rdtsc() - s;
+
+		clflush(this_cpu_ptr(&cpu_dr7));
+
+		s = rdtsc();
+		asm volatile ("lfence;");
+		dr7 = this_cpu_read(cpu_dr7);
+		asm volatile ("lfence;");
+		cpu += rdtsc() - s;
+
+		s = rdtsc();
+		asm volatile ("lfence;");
+		dr7 = this_cpu_read(cpu_dr7);
+		asm volatile ("lfence;");
+		cpu1 += rdtsc() - s;
+	}
+
+	printk("XXX: %ld %ld %ld %ld %ld\n", empty, read, write, cpu, cpu1);

[    1.628252] XXX: 3820 8224 45516 35560 4800

Which still seems to suggest using DR7 directly is probably a good
thing. It's slower than a L1 hit, but massively faster than a full miss.

---

11f:   0f 31                   rdtsc
121:   48 89 d1                mov    %rdx,%rcx
124:   48 89 c6                mov    %rax,%rsi
127:   0f ae e8                lfence
12a:   0f ae e8                lfence
12d:   0f 31                   rdtsc
12f:   48 c1 e2 20             shl    $0x20,%rdx
133:   48 c1 e1 20             shl    $0x20,%rcx
137:   48 09 c2                or     %rax,%rdx
13a:   48 09 f1                or     %rsi,%rcx
13d:   48 29 ca                sub    %rcx,%rdx
140:   48 01 d3                add    %rdx,%rbx
143:   0f 31                   rdtsc
145:   48 89 d1                mov    %rdx,%rcx
148:   48 89 c6                mov    %rax,%rsi
14b:   0f ae e8                lfence
14e:   41 0f 21 fb             mov    %db7,%r11
152:   0f ae e8                lfence
155:   0f 31                   rdtsc
157:   48 c1 e2 20             shl    $0x20,%rdx
15b:   48 c1 e1 20             shl    $0x20,%rcx
15f:   48 09 c2                or     %rax,%rdx
162:   48 09 f1                or     %rsi,%rcx
165:   48 29 ca                sub    %rcx,%rdx
168:   48 01 d5                add    %rdx,%rbp
16b:   0f 31                   rdtsc
16d:   48 89 d6                mov    %rdx,%rsi
170:   49 89 c1                mov    %rax,%r9
173:   0f ae e8                lfence
176:   41 0f 23 fb             mov    %r11,%db7
17a:   0f ae e8                lfence
17d:   0f 31                   rdtsc
17f:   48 89 d7                mov    %rdx,%rdi
182:   49 89 c2                mov    %rax,%r10
185:   0f 31                   rdtsc
187:   48 89 d1                mov    %rdx,%rcx
18a:   49 89 c0                mov    %rax,%r8
18d:   0f ae e8                lfence
190:   41 0f 23 fb             mov    %r11,%db7
194:   0f ae e8                lfence
197:   0f 31                   rdtsc
199:   48 c1 e2 20             shl    $0x20,%rdx
19d:   48 c1 e6 20             shl    $0x20,%rsi
1a1:   48 09 c2                or     %rax,%rdx
1a4:   48 89 f8                mov    %rdi,%rax
1a7:   48 c1 e1 20             shl    $0x20,%rcx
1ab:   48 c1 e0 20             shl    $0x20,%rax
1af:   49 09 f1                or     %rsi,%r9
1b2:   49 09 c8                or     %rcx,%r8
1b5:   49 09 c2                or     %rax,%r10
1b8:   4a 8d 04 12             lea    (%rdx,%r10,1),%rax
1bc:   48 c7 c2 00 00 00 00    mov    $0x0,%rdx
			1bf: R_X86_64_32S       cpu_dr7
1c3:   4c 29 c8                sub    %r9,%rax
1c6:   4c 29 c0                sub    %r8,%rax
1c9:   49 01 c4                add    %rax,%r12
1cc:   48 89 14 24             mov    %rdx,(%rsp)
1d0:   48 89 54 24 08          mov    %rdx,0x8(%rsp)
1d5:   e8 00 00 00 00          callq  1da <sched_init+0xe1>
			1d6: R_X86_64_PLT32     debug_smp_processor_id-0x4
1da:   48 c7 c1 00 00 00 00    mov    $0x0,%rcx
			1dd: R_X86_64_32S       __per_cpu_offset
1e1:   48 8b 14 24             mov    (%rsp),%rdx
1e5:   89 c0                   mov    %eax,%eax
1e7:   48 03 14 c1             add    (%rcx,%rax,8),%rdx
1eb:   0f ae 3a                clflush (%rdx)
1ee:   0f 31                   rdtsc
1f0:   48 89 d1                mov    %rdx,%rcx
1f3:   48 89 c6                mov    %rax,%rsi
1f6:   0f ae e8                lfence
1f9:   65 48 8b 05 00 00 00    mov    %gs:0x0(%rip),%rax        # 201 <sched_init+0x108>
200:   00
			1fd: R_X86_64_PC32      cpu_dr7-0x4
201:   0f ae e8                lfence
204:   0f 31                   rdtsc
206:   48 c1 e2 20             shl    $0x20,%rdx
20a:   48 c1 e1 20             shl    $0x20,%rcx
20e:   48 09 c2                or     %rax,%rdx
211:   48 09 f1                or     %rsi,%rcx
214:   48 29 ca                sub    %rcx,%rdx
217:   49 01 d5                add    %rdx,%r13
21a:   0f 31                   rdtsc
21c:   48 89 d1                mov    %rdx,%rcx
21f:   48 89 c6                mov    %rax,%rsi
222:   0f ae e8                lfence
225:   65 48 8b 05 00 00 00    mov    %gs:0x0(%rip),%rax        # 22d <sched_init+0x134>
22c:   00
			229: R_X86_64_PC32      cpu_dr7-0x4
22d:   0f ae e8                lfence
230:   0f 31                   rdtsc
232:   48 c1 e2 20             shl    $0x20,%rdx
236:   48 c1 e1 20             shl    $0x20,%rcx
23a:   48 09 c2                or     %rax,%rdx
23d:   48 09 f1                or     %rsi,%rcx
240:   48 29 ca                sub    %rcx,%rdx
243:   49 01 d6                add    %rdx,%r14
246:   41 ff cf                dec    %r15d
249:   0f 85 d0 fe ff ff       jne    11f <sched_init+0x26>



  reply	other threads:[~2020-05-25 11:01 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-05-22 20:47 [RFC][PATCH 0/4] x86/entry: disallow #DB more Peter Zijlstra
2020-05-22 20:47 ` [RFC][PATCH 1/4] x86/entry: Introduce local_db_{rave,restore}() Peter Zijlstra
2020-05-22 20:47 ` [RFC][PATCH 2/4] x86/entry, nmi: Disable #DB Peter Zijlstra
2020-05-22 20:47 ` [RFC][PATCH 3/4] x86/entry: Remove debug IST frobbing Peter Zijlstra
2020-05-22 20:47 ` [RFC][PATCH 4/4] x86/entry, mce: Disallow #DB during #MC Peter Zijlstra
2020-05-22 22:13 ` [RFC][PATCH 0/4] x86/entry: disallow #DB more Andy Lutomirski
2020-05-22 22:20   ` Sean Christopherson
2020-05-22 22:43     ` Andy Lutomirski
2020-05-23 12:59   ` Peter Zijlstra
2020-05-23 21:32     ` Peter Zijlstra
2020-05-25 10:02       ` Rasmus Villemoes
2020-05-25 10:40         ` Peter Zijlstra
2020-05-25 11:01           ` Peter Zijlstra [this message]
2020-05-25 17:19             ` Andy Lutomirski
2020-05-25 18:08               ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200525110101.GG325303@hirez.programming.kicks-ass.net \
    --to=peterz@infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux@rasmusvillemoes.dk \
    --cc=luto@kernel.org \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox