public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
From: "Radim Krčmář" <rkrcmar@redhat.com>
To: Paolo Bonzini <pbonzini@redhat.com>
Cc: linux-kernel@vger.kernel.org, kvm@vger.kernel.org,
	Nadav Amit <namit@cs.technion.ac.il>,
	Gleb Natapov <gleb@kernel.org>
Subject: Re: [PATCH 8/8] KVM: x86: simplify kvm_apic_map
Date: Fri, 30 Jan 2015 17:57:47 +0100	[thread overview]
Message-ID: <20150130165747.GD27414@potion.redhat.com> (raw)
In-Reply-To: <54CBA1F9.6070206@redhat.com>

2015-01-30 16:23+0100, Paolo Bonzini:
> On 30/01/2015 16:14, Radim Krčmář wrote:
> > > > +	case KVM_APIC_MODE_XAPIC_FLAT:
> > > > +		*cid = 0;
> > > > +		*lid = ldr & 0xff;
> > > > +		return true;
> > > > +	case KVM_APIC_MODE_XAPIC_CLUSTER:
> > > > +		*cid = (ldr >> 4) & 0xf;
> > > > +		*lid = ldr & 0xf;
> > > > +		return true;
> > > > +	case KVM_APIC_MODE_X2APIC:
> > > > +		*cid = ldr >> 16;
> > > > +		*lid = ldr & 0xffff;
> > > > +		return true;
> > > > +	}
> > 
> >> > 	lid_bits = mode;
> >> > 	cid_bits = mode & (16 | 4);
> >> > 	lid_mask = (1 << lid_bits) - 1;
> >> > 	cid_mask = (1 << cid_bits) - 1;
> >> > 
> >> > 	*cid = (ldr >> lid_bits) & cid_mask;
> >> > 	*lid = ldr & lid_mask;
> > Would jump predictor fail on the switch?  Or is size of the code that
> > important?  This code is shorter, but is going to execute far more
> > operations, so I think it would be slower ... (And harder to read.)
> 
> Considering the additional comparisons for the switch, I don't think
> it's going to execute far more operations...

(A quick assembly comparison [1].)

As optimizations go, we could drop the "&" on cid as "cid < 16" is
checked later, so mode=4 practically does nothing ... Not the best for
future bugs, but still pretty safe -- only x2APIC can set a value that
would require the "&" and it can't have valid XAPIC mode, so u32 still
protects us enough.

  *cid = ldr >> mode;
  *lid = ldr & ((1 << mode) - 1);

Which is probably faster a solution where we don't shuffle switch cases
to jump to x2APIC first.  A comparison is at the very bottom [2].

Would that be ok in v2?


---
1:  To make it easier, it wasn't inlined, sorry.

    There are four parts, first one compares the whole functions and
    three subsequent compare each switch case, with common head/tail
    omitted.  (We don't care about performance when the map is invalid.)

    The optimized version is always first and next to it is the old one.

0000000000001700 <apic_logical_id>:  (ALI from now on)
  1700:	callq  1705 <ALI+0x5>     1700:	callq  1705 <ALI+0x5>
  1705:	push   %rbp               1705:	movzbl 0x10(%rdi),%eax
  1706:	movzbl 0x10(%rdi),%r8d    1709:	push   %rbp
  170b:	mov    %rcx,%r10          170a:	mov    %rsp,%rbp
  170e:	xor    %eax,%eax          170d:	cmp    $0x8,%al
  1710:	mov    %rsp,%rbp          170f:	je     1758 <ALI+0x58>
  1713:	lea    -0x1(%r8),%ecx     1711:	cmp    $0x10,%al
  1717:	test   %r8d,%ecx          1713:	je     1740 <ALI+0x40>
  171a:	jne    1752 <ALI+0x52>    1715:	cmp    $0x4,%al
  171c:	mov    %r8d,%edi          1717:	je     1720 <ALI+0x20>
  171f:	mov    $0x1,%eax          1719:	xor    %eax,%eax
  1724:	mov    %esi,%r8d          171b:	pop    %rbp
  1727:	mov    %edi,%ecx          171c:	retq   
  1729:	mov    %eax,%r9d          171d:	nopl   (%rax)
  172c:	shr    %cl,%r8d           1720:	mov    %esi,%eax
  172f:	and    $0x14,%ecx         1722:	and    $0xf,%esi
  1732:	shl    %cl,%r9d           1725:	shr    $0x4,%eax
  1735:	mov    %edi,%ecx          1728:	and    $0xf,%eax
  1737:	shl    %cl,%eax           172b:	mov    %ax,(%rdx)
  1739:	sub    $0x1,%r9d          172e:	mov    $0x1,%eax
  173d:	sub    $0x1,%eax          1733:	mov    %si,(%rcx)
  1740:	and    %r9d,%r8d          1736:	pop    %rbp
  1743:	and    %esi,%eax          1737:	retq   
  1745:	mov    %r8w,(%rdx)        1738:	nopl   0x0(%rax,%rax,1)
  1749:	mov    %ax,(%r10)         173f:	
  174d:	mov    $0x1,%eax          1740:	mov    %esi,%eax
  1752:	pop    %rbp               1742:	shr    $0x10,%eax
  1753:	retq                      1745:	mov    %ax,(%rdx)
       	                          1748:	mov    $0x1,%eax
       	                          174d:	mov    %si,(%rcx)
       	                          1750:	pop    %rbp
       	                          1751:	retq   
       	                          1752:	nopw   0x0(%rax,%rax,1)
       	                          1758:	xor    %eax,%eax
       	                          175a:	and    $0xff,%si
       	                          175f:	mov    %ax,(%rdx)
       	                          1762:	mov    $0x1,%eax
       	                          1767:	mov    %si,(%rcx)
       	                          176a:	pop    %rbp
       	                          176b:	retq   



  170b:	mov    %rcx,%r10          170a:	mov    %rsp,%rbp
  170e:	xor    %eax,%eax          170d:	cmp    $0x8,%al
  1710:	mov    %rsp,%rbp          170f:	je     1758 <ALI+0x58>
  1713:	lea    -0x1(%r8),%ecx         [jump]
  1717:	test   %r8d,%ecx          1758:	xor    %eax,%eax
  171a:	jne    1752 <ALI+0x52>    175a:	and    $0xff,%si
  171c:	mov    %r8d,%edi          175f:	mov    %ax,(%rdx)
  171f:	mov    $0x1,%eax          1762:	mov    $0x1,%eax
  1724:	mov    %esi,%r8d          1767:	mov    %si,(%rcx)
  1727:	mov    %edi,%ecx          
  1729:	mov    %eax,%r9d          
  172c:	shr    %cl,%r8d           
  172f:	and    $0x14,%ecx         
  1732:	shl    %cl,%r9d           
  1735:	mov    %edi,%ecx          
  1737:	shl    %cl,%eax           
  1739:	sub    $0x1,%r9d          
  173d:	sub    $0x1,%eax          
  1740:	and    %r9d,%r8d          
  1743:	and    %esi,%eax          
  1745:	mov    %r8w,(%rdx)        
  1749:	mov    %ax,(%r10)         
  174d:	mov    $0x1,%eax          


  170b:	mov    %rcx,%r10          170a:	mov    %rsp,%rbp
  170e:	xor    %eax,%eax          170d:	cmp    $0x8,%al
  1710:	mov    %rsp,%rbp          170f:	je     1758 <ALI+0x58>
  1713:	lea    -0x1(%r8),%ecx     1711:	cmp    $0x10,%al
  1717:	test   %r8d,%ecx          1713:	je     1740 <ALI+0x40>
  171a:	jne    1752 <ALI+0x52>        [jump]
  171c:	mov    %r8d,%edi          1740:	mov    %esi,%eax
  171f:	mov    $0x1,%eax          1742:	shr    $0x10,%eax
  1724:	mov    %esi,%r8d          1745:	mov    %ax,(%rdx)
  1727:	mov    %edi,%ecx          1748:	mov    $0x1,%eax
  1729:	mov    %eax,%r9d          174d:	mov    %si,(%rcx)
  172c:	shr    %cl,%r8d           
  172f:	and    $0x14,%ecx         
  1732:	shl    %cl,%r9d           
  1735:	mov    %edi,%ecx          
  1737:	shl    %cl,%eax           
  1739:	sub    $0x1,%r9d          
  173d:	sub    $0x1,%eax          
  1740:	and    %r9d,%r8d          
  1743:	and    %esi,%eax          
  1745:	mov    %r8w,(%rdx)        
  1749:	mov    %ax,(%r10)         
  174d:	mov    $0x1,%eax          


  170b:	mov    %rcx,%r10          170a:	mov    %rsp,%rbp        
  170e:	xor    %eax,%eax          170d:	cmp    $0x8,%al
  1710:	mov    %rsp,%rbp          170f:	je     1758 <ALI+0x58>
  1713:	lea    -0x1(%r8),%ecx     1711:	cmp    $0x10,%al
  1717:	test   %r8d,%ecx          1713:	je     1740 <ALI+0x40>
  171a:	jne    1752 <ALI+0x52>    1715:	cmp    $0x4,%al
  171c:	mov    %r8d,%edi          1717:	je     1720 <ALI+0x20>
  171f:	mov    $0x1,%eax              [jump]
  1724:	mov    %esi,%r8d          1720:	mov    %esi,%eax
  1727:	mov    %edi,%ecx          1722:	and    $0xf,%esi
  1729:	mov    %eax,%r9d          1725:	shr    $0x4,%eax
  172c:	shr    %cl,%r8d           1728:	and    $0xf,%eax
  172f:	and    $0x14,%ecx         172b:	mov    %ax,(%rdx)
  1732:	shl    %cl,%r9d           172e:	mov    $0x1,%eax
  1735:	mov    %edi,%ecx          1733:	mov    %si,(%rcx)
  1737:	shl    %cl,%eax           
  1739:	sub    $0x1,%r9d          
  173d:	sub    $0x1,%eax          
  1740:	and    %r9d,%r8d          
  1743:	and    %esi,%eax          
  1745:	mov    %r8w,(%rdx)        
  1749:	mov    %ax,(%r10)         
  174d:	mov    $0x1,%eax          


2: A comparison of aggressively optimized ALI with the worst case
   scenario, where x2APIC is the third jump.

  16e5:	mov    %rcx,%r9           170a:	mov    %rsp,%rbp        
  16ed:	xor    %eax,%eax          170d:	cmp    $0x8,%al
  16ef:	mov    %rsp,%rbp          170f:	je     1758 <ALI+0x58>
  16f2:	lea    -0x1(%rcx),%r8d    1711:	cmp    $0x10,%al
  16f6:	test   %ecx,%r8d          1713:	je     1740 <ALI+0x40>
  16f9:	jne    1717 <ALI+0x37>    1715:	cmp    $0x4,%al
  16fb:	mov    %esi,%eax          1717:	je     1720 <ALI+0x20>
  16fd:	shr    %cl,%eax               [jump]
  16ff:	mov    %ax,(%rdx)         1720:	mov    %esi,%eax
  1702:	mov    $0x1,%eax          1722:	and    $0xf,%esi
  1707:	shl    %cl,%eax           1725:	shr    $0x4,%eax
  1709:	sub    $0x1,%eax          1728:	and    $0xf,%eax
  170c:	and    %esi,%eax          172b:	mov    %ax,(%rdx)
  170e:	mov    %ax,(%r9)          172e:	mov    $0x1,%eax
  1712:	mov    $0x1,%eax          1733:	mov    %si,(%rcx)

  reply	other threads:[~2015-01-30 16:57 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-01-29 21:48 [PATCH 0/8] KVM: minor APIC fixes and cleanups Radim Krčmář
2015-01-29 21:48 ` [PATCH 1/8] KVM: x86: return bool from kvm_apic_match*() Radim Krčmář
2015-01-29 22:10   ` Joe Perches
2015-01-30  8:50     ` Paolo Bonzini
2015-01-29 21:48 ` [PATCH 2/8] KVM: x86: cleanup kvm_apic_match_*() Radim Krčmář
2015-01-30  8:52   ` Paolo Bonzini
2015-01-30 13:06     ` Radim Krčmář
2015-02-02 14:26     ` Radim Krčmář
2015-02-02 14:28       ` Paolo Bonzini
2015-02-02 14:30         ` Radim Krčmář
2015-02-02 14:29       ` Radim Krčmář
2015-01-29 21:48 ` [PATCH 3/8] KVM: x86: replace 0 with APIC_DEST_PHYSICAL Radim Krčmář
2015-01-29 21:48 ` [PATCH 4/8] KVM: x86: fix x2apic logical address matching Radim Krčmář
2015-01-29 21:48 ` [PATCH 5/8] KVM: x86: use MDA for interrupt matching Radim Krčmář
2015-01-30  9:03   ` Paolo Bonzini
2015-01-30 13:09     ` Radim Krčmář
2015-01-29 21:48 ` [PATCH 6/8] KVM: x86: allow mixed APIC mode broadcast Radim Krčmář
2015-01-29 21:48 ` [PATCH 7/8] KVM: x86: avoid logical_map when it is invalid Radim Krčmář
2015-01-30  9:19   ` Paolo Bonzini
2015-01-30 14:21     ` Radim Krčmář
2015-01-30 14:21       ` Paolo Bonzini
2015-01-30  9:38   ` Paolo Bonzini
2015-01-30 14:56     ` Radim Krčmář
2015-01-30 15:10       ` Paolo Bonzini
2015-01-30 17:09         ` Radim Krčmář
2015-01-29 21:48 ` [PATCH 8/8] KVM: x86: simplify kvm_apic_map Radim Krčmář
2015-01-30  9:18   ` Paolo Bonzini
2015-01-30 15:14     ` Radim Krčmář
2015-01-30 15:23       ` Paolo Bonzini
2015-01-30 16:57         ` Radim Krčmář [this message]
2015-01-30 21:15           ` Paolo Bonzini
2015-01-30  9:22 ` [PATCH 0/8] KVM: minor APIC fixes and cleanups Paolo Bonzini
2015-01-30 15:20   ` Radim Krčmář
2015-01-30 15:24     ` Paolo Bonzini

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20150130165747.GD27414@potion.redhat.com \
    --to=rkrcmar@redhat.com \
    --cc=gleb@kernel.org \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=namit@cs.technion.ac.il \
    --cc=pbonzini@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox