From: David Howells <dhowells@redhat.com>
To: Linus Torvalds <torvalds@linux-foundation.org>
Cc: dhowells@redhat.com, kernel test robot <oliver.sang@intel.com>,
oe-lkp@lists.linux.dev, lkp@intel.com,
linux-kernel@vger.kernel.org,
Christian Brauner <brauner@kernel.org>,
Alexander Viro <viro@zeniv.linux.org.uk>,
Jens Axboe <axboe@kernel.dk>, Christoph Hellwig <hch@lst.de>,
Christian Brauner <christian@brauner.io>,
Matthew Wilcox <willy@infradead.org>,
David Laight <David.Laight@aculab.com>,
ying.huang@intel.com, feng.tang@intel.com, fengwei.yin@intel.com
Subject: Re: [linus:master] [iov_iter] c9eec08bac: vm-scalability.throughput -16.9% regression
Date: Wed, 15 Nov 2023 18:35:34 +0000 [thread overview]
Message-ID: <4007890.1700073334@warthog.procyon.org.uk> (raw)
In-Reply-To: <CAHk-=wjCUckvZUQf7gqp2ziJUWxVpikM_6srFdbcNdBJTxExRg@mail.gmail.com>
Linus Torvalds <torvalds@linux-foundation.org> wrote:
> which makes *zero* sense. It first checks that the the length is at
> least 8 bytes, then it moves *one* word by hand, then it aligns the
> code to 8 bytes remaining, and does the remaining (possibly
> overlapping at the beginning) words as one "rep movsq",
That's not what I see. See attached for a dump of _copy_from_iter from my
kernel. It's just using REP MOVSB.
For reference, the compiler is gcc-13.2.1-3.fc39.x86_64 with
binutils-2.40-13.fc39.x86_64.
David
---
(gdb) disas _copy_from_iter
Dump of assembler code for function _copy_from_iter:
<+0>: push %r15
<+2>: push %r14
<+4>: push %r13
<+6>: push %r12
<+8>: push %rbp
<+9>: push %rbx
<+10>: sub $0x40,%rsp
<+14>: mov %gs:0x28,%rax
<+23>: mov %rax,0x38(%rsp)
<+28>: xor %eax,%eax
<+30>: cmpb $0x0,0x3(%rdx)
<+34>: je 0xffffffff81770334 <_copy_from_iter+50>
<+36>: cmpb $0x0,0x1(%rdx)
<+40>: mov %rdi,%r12
<+43>: mov %rdx,%rbx
<+46>: je 0xffffffff81770364 <_copy_from_iter+98>
<+48>: jmp 0xffffffff8177033d <_copy_from_iter+59>
<+50>: ud2
<+52>: xor %ebp,%ebp
<+54>: jmp 0xffffffff8177067e <_copy_from_iter+892>
<+59>: mov 0x38(%rsp),%rax
<+64>: sub %gs:0x28,%rax
<+73>: jne 0xffffffff8177068e <_copy_from_iter+908>
<+79>: add $0x40,%rsp
<+83>: pop %rbx
<+84>: pop %rbp
<+85>: pop %r12
<+87>: pop %r13
<+89>: pop %r14
<+91>: pop %r15
<+93>: jmp 0xffffffff8176ee3f <__copy_from_iter_mc>
<+98>: mov 0x18(%rdx),%rax
<+102>: cmp %rax,%rsi
<+105>: cmova %rax,%rsi
<+109>: test %rsi,%rsi
<+112>: mov %rsi,%rbp
<+115>: je 0xffffffff8177067e <_copy_from_iter+892>
<+121>: mov (%rdx),%dl
<+123>: test %dl,%dl # ITER_UBUF
<+125>: jne 0xffffffff817703cc <_copy_from_iter+202>
<+127>: mov 0x8(%rbx),%rsi
<+131>: mov %rbp,%rcx
<+134>: add 0x10(%rbx),%rsi
<+138>: mov %rsi,%rdx
<+141>: mov %rbp,%rsi
<+144>: mov %rdx,%rdi
<+147>: call 0xffffffff8176ec9e <__access_ok>
<+152>: test %al,%al
<+154>: je 0xffffffff817703af <_copy_from_iter+173>
<+156>: nop
<+157>: nop
<+158>: nop
<+159>: mov %r12,%rdi
<+162>: mov %rdx,%rsi
<+165>: rep movsb %ds:(%rsi),%es:(%rdi)
<+167>: nop
<+168>: nop
<+169>: nop
<+170>: nop
<+171>: nop
<+172>: nop
<+173>: mov %rbp,%rax
<+176>: sub %rcx,%rax
<+179>: add 0x18(%rbx),%rcx
<+183>: add %rax,0x8(%rbx)
<+187>: sub %rbp,%rcx
<+190>: mov %rax,%rbp
<+193>: mov %rcx,0x18(%rbx)
<+197>: jmp 0xffffffff8177067e <_copy_from_iter+892>
<+202>: cmp $0x1,%dl # ITER_IOVEC
<+205>: jne 0xffffffff8177044c <_copy_from_iter+330>
<+207>: mov 0x10(%rbx),%r9
<+211>: mov %rsi,%r8
<+214>: xor %ebp,%ebp
<+216>: mov 0x8(%rbx),%r10
<+220>: mov 0x8(%r9),%rdx
<+224>: sub %r10,%rdx
<+227>: cmp %r8,%rdx
<+230>: cmova %r8,%rdx
<+234>: test %rdx,%rdx
<+237>: je 0xffffffff81770433 <_copy_from_iter+305>
<+239>: mov (%r9),%r11
<+242>: mov %rdx,%rsi
<+245>: mov %rdx,%rcx
<+248>: add %r10,%r11
<+251>: mov %r11,%rdi
<+254>: call 0xffffffff8176ec9e <__access_ok>
<+259>: test %al,%al
<+261>: je 0xffffffff8177041b <_copy_from_iter+281>
<+263>: nop
<+264>: nop
<+265>: nop
<+266>: lea (%r12,%rbp,1),%rdi
<+270>: mov %r11,%rsi
<+273>: rep movsb %ds:(%rsi),%es:(%rdi)
<+275>: nop
<+276>: nop
<+277>: nop
<+278>: nop
<+279>: nop
<+280>: nop
<+281>: mov %rdx,%rax
<+284>: sub %rdx,%r8
<+287>: sub %rcx,%rax
<+290>: add %rcx,%r8
<+293>: add %rax,%rbp
<+296>: add %rax,%r10
<+299>: cmp 0x8(%r9),%r10
<+303>: jb 0xffffffff81770444 <_copy_from_iter+322>
<+305>: add $0x10,%r9
<+309>: xor %r10d,%r10d
<+312>: test %r8,%r8
<+315>: jne 0xffffffff817703de <_copy_from_iter+220>
<+317>: jmp 0xffffffff81770544 <_copy_from_iter+578>
<+322>: mov %r10,%r8
<+325>: jmp 0xffffffff81770544 <_copy_from_iter+578>
<+330>: cmp $0x2,%dl # ITER_BVEC
<+333>: jne 0xffffffff817704ee <_copy_from_iter+492>
<+339>: mov 0x10(%rbx),%r8
<+343>: mov %rsi,%r11
<+346>: xor %ebp,%ebp
<+348>: mov $0x1000,%r10d
<+354>: mov 0x8(%rbx),%r9
<+358>: mov 0xc(%r8),%ecx
<+362>: add %r9,%rcx
<+365>: mov %rcx,%rdi
<+368>: and $0xfff,%ecx
<+374>: shr $0xc,%rdi
<+378>: shl $0x6,%rdi
<+382>: add (%r8),%rdi
<+385>: call 0xffffffff8176e72e <kmap_local_page>
<+390>: mov %r10,%rdx
<+393>: mov %rax,%rsi
<+396>: mov 0x8(%r8),%eax
<+400>: sub %r9,%rax
<+403>: cmp %r11,%rax
<+406>: cmova %r11,%rax
<+410>: sub %rcx,%rdx
<+413>: cmp %rdx,%rax
<+416>: cmova %rdx,%rax
<+420>: add %rcx,%rsi
<+423>: lea (%r12,%rbp,1),%rdx
<+427>: mov %eax,%ecx
<+429>: sub %rax,%r11
<+432>: mov %rdx,%rdi
<+435>: add %rax,%rbp
<+438>: add %rax,%r9
<+441>: rep movsb %ds:(%rsi),%es:(%rdi)
<+443>: mov 0x8(%r8),%eax
<+447>: cmp %rax,%r9
<+450>: jb 0xffffffff817704cd <_copy_from_iter+459>
<+452>: add $0x10,%r8
<+456>: xor %r9d,%r9d
<+459>: test %r11,%r11
<+462>: jne 0xffffffff81770468 <_copy_from_iter+358>
<+464>: mov %r8,%rax
<+467>: sub 0x10(%rbx),%rax
<+471>: mov %r9,0x8(%rbx)
<+475>: mov %r8,0x10(%rbx)
<+479>: sar $0x4,%rax
<+483>: sub %rax,0x20(%rbx)
<+487>: jmp 0xffffffff81770671 <_copy_from_iter+879>
<+492>: cmp $0x3,%dl # ITER_KVEC
<+495>: jne 0xffffffff81770560 <_copy_from_iter+606>
<+497>: mov 0x10(%rbx),%r9
<+501>: mov %rsi,%r8
<+504>: xor %ebp,%ebp
<+506>: mov 0x8(%rbx),%rdx
<+510>: mov 0x8(%r9),%rax
<+514>: sub %rdx,%rax
<+517>: cmp %r8,%rax
<+520>: cmova %r8,%rax
<+524>: test %rax,%rax
<+527>: je 0xffffffff81770534 <_copy_from_iter+562>
<+529>: mov (%r9),%rsi
<+532>: lea (%r12,%rbp,1),%r10
<+536>: mov %rax,%rcx
<+539>: add %rax,%rbp
<+542>: mov %r10,%rdi
<+545>: sub %rax,%r8
<+548>: add %rdx,%rsi
<+551>: add %rax,%rdx
<+554>: rep movsb %ds:(%rsi),%es:(%rdi)
<+556>: cmp 0x8(%r9),%rdx
<+560>: jb 0xffffffff81770541 <_copy_from_iter+575>
<+562>: add $0x10,%r9
<+566>: xor %edx,%edx
<+568>: test %r8,%r8
<+571>: jne 0xffffffff81770500 <_copy_from_iter+510>
<+573>: jmp 0xffffffff81770544 <_copy_from_iter+578>
<+575>: mov %rdx,%r8
<+578>: mov %r9,%rax
<+581>: sub 0x10(%rbx),%rax
<+585>: mov %r8,0x8(%rbx)
<+589>: mov %r9,0x10(%rbx)
<+593>: sar $0x4,%rax
<+597>: sub %rax,0x20(%rbx)
<+601>: jmp 0xffffffff81770671 <_copy_from_iter+879>
<+606>: cmp $0x4,%dl # ITER_XARRAY
<+609>: jne 0xffffffff81770677 <_copy_from_iter+885>
<+615>: movq $0x3,0x18(%rsp)
<+624>: mov 0x10(%rbx),%rax
<+628>: xor %edx,%edx
<+630>: mov 0x8(%rbx),%r14
<+634>: mov %rdx,0x20(%rsp)
<+639>: add 0x20(%rbx),%r14
<+643>: mov %rdx,0x28(%rsp)
<+648>: mov %rdx,0x30(%rsp)
<+653>: mov %rax,(%rsp)
<+657>: mov %r14,%rax
<+660>: shr $0xc,%rax
<+664>: mov %rax,0x8(%rsp)
<+669>: xor %eax,%eax
<+671>: mov %eax,0x10(%rsp)
<+675>: or $0xffffffffffffffff,%rsi
<+679>: mov %rsp,%rdi
<+682>: mov %rbp,%r13
<+685>: call 0xffffffff81d617d1 <xas_find>
<+690>: xor %ebp,%ebp
<+692>: mov $0x1000,%r15d
<+698>: mov %rax,%r8
<+701>: test %r8,%r8
<+704>: je 0xffffffff8177066d <_copy_from_iter+875>
<+710>: mov %r8,%rsi
<+713>: mov %rsp,%rdi
<+716>: call 0xffffffff8176e7a8 <xas_retry>
<+721>: test %al,%al
<+723>: jne 0xffffffff8177065d <_copy_from_iter+859>
<+729>: test $0x1,%r8d
<+736>: jne 0xffffffff81770614 <_copy_from_iter+786>
<+738>: mov %r8,%rdi
<+741>: call 0xffffffff8176e6a4 <folio_test_hugetlb>
<+746>: test %al,%al
<+748>: jne 0xffffffff81770618 <_copy_from_iter+790>
<+750>: call 0xffffffff8176e714 <folio_size>
<+755>: lea (%r14,%rbp,1),%rdx
<+759>: lea -0x1(%rax),%r10
<+763>: call 0xffffffff8176e714 <folio_size>
<+768>: and %rdx,%r10
<+771>: sub %r10,%rax
<+774>: cmp %r13,%rax
<+777>: cmova %r13,%rax
<+781>: mov %rax,%r9
<+784>: jmp 0xffffffff81770658 <_copy_from_iter+854>
<+786>: ud2
<+788>: jmp 0xffffffff8177066d <_copy_from_iter+875>
<+790>: ud2
<+792>: jmp 0xffffffff8177066d <_copy_from_iter+875>
<+794>: lea (%r12,%rbp,1),%r11
<+798>: mov %r10,%rsi
<+801>: mov %r8,%rdi
<+804>: call 0xffffffff8176e753 <kmap_local_folio>
<+809>: mov %r15,%rdx
<+812>: mov %r11,%rdi
<+815>: mov %rax,%rsi
<+818>: mov %r10,%rax
<+821>: and $0xfff,%eax
<+826>: sub %rax,%rdx
<+829>: cmp %r9,%rdx
<+832>: cmova %r9,%rdx
<+836>: add %rdx,%rbp
<+839>: sub %rdx,%r13
<+842>: mov %edx,%ecx
<+844>: rep movsb %ds:(%rsi),%es:(%rdi)
<+846>: je 0xffffffff8177066d <_copy_from_iter+875>
<+848>: sub %rdx,%r9
<+851>: add %rdx,%r10
<+854>: test %r9,%r9
<+857>: jne 0xffffffff8177061c <_copy_from_iter+794>
<+859>: mov %rsp,%rdi
<+862>: call 0xffffffff8176f307 <xas_next_entry>
<+867>: mov %rax,%r8
<+870>: jmp 0xffffffff817705bf <_copy_from_iter+701>
<+875>: add %rbp,0x8(%rbx)
<+879>: sub %rbp,0x18(%rbx)
<+883>: jmp 0xffffffff8177067e <_copy_from_iter+892>
# ITER_DISCARD / default
<+885>: sub %rsi,%rax
<+888>: mov %rax,0x18(%rbx)
<+892>: mov 0x38(%rsp),%rax
<+897>: sub %gs:0x28,%rax
<+906>: je 0xffffffff81770693 <_copy_from_iter+913>
<+908>: call 0xffffffff81d67d8d <__stack_chk_fail>
<+913>: add $0x40,%rsp
<+917>: mov %rbp,%rax
<+920>: pop %rbx
<+921>: pop %rbp
<+922>: pop %r12
<+924>: pop %r13
<+926>: pop %r14
<+928>: pop %r15
<+930>: jmp 0xffffffff81d745a0 <__x86_return_thunk>
next prev parent reply other threads:[~2023-11-15 18:35 UTC|newest]
Thread overview: 49+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-11-07 1:40 [linus:master] [iov_iter] c9eec08bac: vm-scalability.throughput -16.9% regression kernel test robot
2023-11-15 12:48 ` David Howells
2023-11-15 13:18 ` David Howells
2023-11-15 15:20 ` David Howells
2023-11-15 16:53 ` Linus Torvalds
2023-11-15 17:38 ` Linus Torvalds
2023-11-15 18:35 ` David Howells [this message]
2023-11-15 18:45 ` Linus Torvalds
2023-11-15 19:09 ` Linus Torvalds
2023-11-15 20:54 ` David Howells
2023-11-15 18:38 ` Linus Torvalds
2023-11-15 19:09 ` Borislav Petkov
2023-11-15 19:15 ` Linus Torvalds
2023-11-15 20:07 ` Linus Torvalds
2023-11-16 10:07 ` David Laight
2023-11-16 10:14 ` David Howells
2023-11-16 11:38 ` David Laight
2023-11-15 19:26 ` Linus Torvalds
2023-11-16 15:44 ` Borislav Petkov
2023-11-16 16:44 ` David Howells
2023-11-17 11:35 ` Borislav Petkov
2023-11-17 14:12 ` David Howells
2023-11-17 16:09 ` Borislav Petkov
2023-11-17 16:32 ` Linus Torvalds
2023-11-17 16:44 ` Linus Torvalds
2023-11-17 19:12 ` Borislav Petkov
2023-11-17 21:57 ` Linus Torvalds
2023-11-20 13:32 ` David Howells
2023-11-20 16:06 ` Linus Torvalds
2023-11-20 16:09 ` David Laight
2023-11-16 16:48 ` Linus Torvalds
2023-11-16 16:58 ` David Laight
2023-11-17 11:44 ` Borislav Petkov
2023-11-17 12:09 ` Jakub Jelinek
2023-11-17 12:18 ` Borislav Petkov
2023-11-17 13:09 ` David Laight
2023-11-17 13:36 ` Linus Torvalds
2023-11-17 15:20 ` David Laight
2023-11-15 21:43 ` David Howells
2023-11-15 21:50 ` Linus Torvalds
2023-11-15 21:59 ` Borislav Petkov
2023-11-15 22:59 ` David Howells
2023-11-16 3:26 ` Linus Torvalds
2023-11-16 16:55 ` David Laight
2023-11-16 17:24 ` Linus Torvalds
2023-11-16 22:53 ` David Laight
2023-11-16 21:09 ` David Howells
2023-11-16 22:36 ` Linus Torvalds
2023-11-20 11:52 ` Borislav Petkov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4007890.1700073334@warthog.procyon.org.uk \
--to=dhowells@redhat.com \
--cc=David.Laight@aculab.com \
--cc=axboe@kernel.dk \
--cc=brauner@kernel.org \
--cc=christian@brauner.io \
--cc=feng.tang@intel.com \
--cc=fengwei.yin@intel.com \
--cc=hch@lst.de \
--cc=linux-kernel@vger.kernel.org \
--cc=lkp@intel.com \
--cc=oe-lkp@lists.linux.dev \
--cc=oliver.sang@intel.com \
--cc=torvalds@linux-foundation.org \
--cc=viro@zeniv.linux.org.uk \
--cc=willy@infradead.org \
--cc=ying.huang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox