LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 6/6] ibmvfc: advertise client support for targetWWPN using v2 commands
From: Tyrel Datwyler @ 2020-11-18  1:11 UTC (permalink / raw)
  To: james.bottomley
  Cc: Tyrel Datwyler, martin.petersen, linux-scsi, linux-kernel, brking,
	linuxppc-dev
In-Reply-To: <20201118011104.296999-1-tyreld@linux.ibm.com>

The previous patch added support for the targetWWPN field in version 2
MADs and vfcFrame structures.

Set the IBMVFC_CAN_SEND_VF_WWPN bit in our capabailites flag during NPIV
Login to inform the VIOS that this client supports the feature.

Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
---
 drivers/scsi/ibmvscsi/ibmvfc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index 83627e11e85e..42e4d35e0d35 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -1255,7 +1255,7 @@ static void ibmvfc_set_login_info(struct ibmvfc_host *vhost)
 		login_info->flags |= cpu_to_be16(IBMVFC_CLIENT_MIGRATED);
 
 	login_info->max_cmds = cpu_to_be32(max_requests + IBMVFC_NUM_INTERNAL_REQ);
-	login_info->capabilities = cpu_to_be64(IBMVFC_CAN_MIGRATE);
+	login_info->capabilities = cpu_to_be64(IBMVFC_CAN_MIGRATE | IBMVFC_CAN_SEND_VF_WWPN);
 	login_info->async.va = cpu_to_be64(vhost->async_crq.msg_token);
 	login_info->async.len = cpu_to_be32(vhost->async_crq.size * sizeof(*vhost->async_crq.msgs));
 	strncpy(login_info->partition_name, vhost->partition_name, IBMVFC_MAX_NAME);
-- 
2.27.0


^ permalink raw reply related

* [powerpc:merge] BUILD SUCCESS 9d1aa2f025c6cc516125c42c70f6a9ce087c49ea
From: kernel test robot @ 2020-11-18  1:47 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev

tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git  merge
branch HEAD: 9d1aa2f025c6cc516125c42c70f6a9ce087c49ea  Automatic merge of 'fixes' into merge (2020-11-17 22:09)

elapsed time: 859m

configs tested: 171
configs skipped: 2

The following configs have been built successfully.
More configs may be tested in the coming days.

gcc tested configs:
arm                                 defconfig
arm64                            allyesconfig
arm64                               defconfig
arm                              allyesconfig
arm                              allmodconfig
arm                           efm32_defconfig
powerpc                    sam440ep_defconfig
arm                       imx_v4_v5_defconfig
mips                         tb0219_defconfig
um                           x86_64_defconfig
powerpc                 mpc8315_rdb_defconfig
nios2                         10m50_defconfig
powerpc                       eiger_defconfig
m68k                        mvme147_defconfig
xtensa                          iss_defconfig
mips                           ci20_defconfig
c6x                        evmc6678_defconfig
m68k                        m5407c3_defconfig
arc                 nsimosci_hs_smp_defconfig
ia64                      gensparse_defconfig
powerpc64                        alldefconfig
mips                malta_kvm_guest_defconfig
powerpc                     redwood_defconfig
arm                        cerfcube_defconfig
arc                              alldefconfig
sh                            hp6xx_defconfig
s390                       zfcpdump_defconfig
openrisc                         alldefconfig
powerpc                 xes_mpc85xx_defconfig
arc                         haps_hs_defconfig
arc                    vdk_hs38_smp_defconfig
arm                          pxa910_defconfig
sh                          sdk7786_defconfig
powerpc                     tqm8555_defconfig
powerpc                     tqm8560_defconfig
arm                         orion5x_defconfig
powerpc                      mgcoge_defconfig
mips                           mtx1_defconfig
sh                          landisk_defconfig
sh                         apsh4a3a_defconfig
powerpc                        warp_defconfig
arm                            u300_defconfig
arm                      tct_hammer_defconfig
c6x                         dsk6455_defconfig
csky                             alldefconfig
sparc64                             defconfig
arm                           viper_defconfig
m68k                          hp300_defconfig
arm                          gemini_defconfig
mips                        bcm47xx_defconfig
arm                       mainstone_defconfig
mips                       bmips_be_defconfig
xtensa                       common_defconfig
arm                        neponset_defconfig
arm                            zeus_defconfig
arm                        clps711x_defconfig
sh                         ap325rxa_defconfig
mips                     cu1000-neo_defconfig
sh                  sh7785lcr_32bit_defconfig
arm                          iop32x_defconfig
mips                      loongson3_defconfig
powerpc                     stx_gp3_defconfig
arc                     nsimosci_hs_defconfig
mips                      bmips_stb_defconfig
powerpc                   lite5200b_defconfig
sh                        apsh4ad0a_defconfig
s390                             alldefconfig
arm                           sunxi_defconfig
s390                          debug_defconfig
sh                            titan_defconfig
arm                        oxnas_v6_defconfig
mips                        workpad_defconfig
powerpc                     ppa8548_defconfig
mips                         cobalt_defconfig
microblaze                    nommu_defconfig
mips                         mpc30x_defconfig
powerpc                     skiroot_defconfig
ia64                             allmodconfig
ia64                                defconfig
ia64                             allyesconfig
m68k                             allmodconfig
m68k                                defconfig
m68k                             allyesconfig
nios2                               defconfig
arc                              allyesconfig
nds32                             allnoconfig
c6x                              allyesconfig
nds32                               defconfig
nios2                            allyesconfig
csky                                defconfig
alpha                               defconfig
alpha                            allyesconfig
xtensa                           allyesconfig
h8300                            allyesconfig
arc                                 defconfig
sh                               allmodconfig
parisc                              defconfig
s390                             allyesconfig
parisc                           allyesconfig
s390                                defconfig
i386                             allyesconfig
sparc                            allyesconfig
sparc                               defconfig
i386                                defconfig
mips                             allyesconfig
mips                             allmodconfig
powerpc                          allyesconfig
powerpc                          allmodconfig
powerpc                           allnoconfig
i386                 randconfig-a006-20201117
i386                 randconfig-a005-20201117
i386                 randconfig-a001-20201117
i386                 randconfig-a002-20201117
i386                 randconfig-a004-20201117
i386                 randconfig-a003-20201117
i386                 randconfig-a006-20201116
i386                 randconfig-a005-20201116
i386                 randconfig-a001-20201116
i386                 randconfig-a002-20201116
i386                 randconfig-a004-20201116
i386                 randconfig-a003-20201116
x86_64               randconfig-a015-20201115
x86_64               randconfig-a011-20201115
x86_64               randconfig-a014-20201115
x86_64               randconfig-a013-20201115
x86_64               randconfig-a016-20201115
x86_64               randconfig-a012-20201115
x86_64               randconfig-a003-20201116
x86_64               randconfig-a005-20201116
x86_64               randconfig-a004-20201116
x86_64               randconfig-a002-20201116
x86_64               randconfig-a001-20201116
x86_64               randconfig-a006-20201116
i386                 randconfig-a012-20201116
i386                 randconfig-a014-20201116
i386                 randconfig-a016-20201116
i386                 randconfig-a011-20201116
i386                 randconfig-a015-20201116
i386                 randconfig-a013-20201116
i386                 randconfig-a012-20201117
i386                 randconfig-a014-20201117
i386                 randconfig-a016-20201117
i386                 randconfig-a011-20201117
i386                 randconfig-a015-20201117
i386                 randconfig-a013-20201117
riscv                    nommu_k210_defconfig
riscv                            allyesconfig
riscv                    nommu_virt_defconfig
riscv                             allnoconfig
riscv                               defconfig
riscv                          rv32_defconfig
riscv                            allmodconfig
x86_64                                   rhel
x86_64                           allyesconfig
x86_64                    rhel-7.6-kselftests
x86_64                              defconfig
x86_64                               rhel-8.3
x86_64                                  kexec

clang tested configs:
x86_64               randconfig-a003-20201115
x86_64               randconfig-a015-20201116
x86_64               randconfig-a011-20201116
x86_64               randconfig-a014-20201116
x86_64               randconfig-a013-20201116
x86_64               randconfig-a016-20201116
x86_64               randconfig-a012-20201116
x86_64               randconfig-a003-20201117
x86_64               randconfig-a005-20201117
x86_64               randconfig-a004-20201117
x86_64               randconfig-a002-20201117
x86_64               randconfig-a001-20201117
x86_64               randconfig-a006-20201117

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

^ permalink raw reply

* [powerpc:fixes-test] BUILD SUCCESS e02152ba2810f7c88cb54e71cda096268dfa9241
From: kernel test robot @ 2020-11-18  1:47 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev

tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git  fixes-test
branch HEAD: e02152ba2810f7c88cb54e71cda096268dfa9241  powerpc: Drop -me200 addition to build flags

elapsed time: 860m

configs tested: 194
configs skipped: 103

The following configs have been built successfully.
More configs may be tested in the coming days.

gcc tested configs:
arm                                 defconfig
arm64                            allyesconfig
arm64                               defconfig
arm                              allyesconfig
arm                              allmodconfig
arm                           efm32_defconfig
powerpc                    sam440ep_defconfig
arm                       imx_v4_v5_defconfig
mips                         tb0219_defconfig
um                           x86_64_defconfig
powerpc                 mpc8315_rdb_defconfig
h8300                     edosk2674_defconfig
powerpc                 mpc8313_rdb_defconfig
sh                               j2_defconfig
sparc                       sparc64_defconfig
mips                       bmips_be_defconfig
ia64                            zx1_defconfig
powerpc                 mpc836x_rdk_defconfig
xtensa                generic_kc705_defconfig
powerpc                     tqm8540_defconfig
sh                          rsk7201_defconfig
nios2                         10m50_defconfig
powerpc                       eiger_defconfig
m68k                        mvme147_defconfig
xtensa                          iss_defconfig
mips                           ci20_defconfig
c6x                        evmc6678_defconfig
m68k                        m5407c3_defconfig
arc                 nsimosci_hs_smp_defconfig
ia64                      gensparse_defconfig
powerpc64                        alldefconfig
mips                malta_kvm_guest_defconfig
powerpc                     redwood_defconfig
arm                        cerfcube_defconfig
arc                              alldefconfig
sh                            hp6xx_defconfig
i386                             alldefconfig
xtensa                    smp_lx200_defconfig
parisc                generic-32bit_defconfig
s390                       zfcpdump_defconfig
openrisc                         alldefconfig
powerpc                 xes_mpc85xx_defconfig
arc                         haps_hs_defconfig
arc                    vdk_hs38_smp_defconfig
arm                          pxa910_defconfig
sh                           se7751_defconfig
sh                        sh7785lcr_defconfig
mips                       lemote2f_defconfig
sh                          sdk7786_defconfig
powerpc                     tqm8555_defconfig
powerpc                     tqm8560_defconfig
arm                         orion5x_defconfig
mips                             allyesconfig
powerpc                      mgcoge_defconfig
mips                           mtx1_defconfig
m68k                             allmodconfig
sh                          landisk_defconfig
sh                         apsh4a3a_defconfig
powerpc                        warp_defconfig
arm                            u300_defconfig
arm                      tct_hammer_defconfig
c6x                         dsk6455_defconfig
csky                             alldefconfig
sparc64                             defconfig
arm                           viper_defconfig
powerpc                    amigaone_defconfig
mips                  maltasmvp_eva_defconfig
mips                      bmips_stb_defconfig
arm                     am200epdkit_defconfig
c6x                              allyesconfig
arm                       mainstone_defconfig
xtensa                       common_defconfig
arm                        neponset_defconfig
mips                          rb532_defconfig
h8300                    h8300h-sim_defconfig
arc                          axs103_defconfig
arc                                 defconfig
arm                            zeus_defconfig
arm                        clps711x_defconfig
sh                         ap325rxa_defconfig
mips                     cu1000-neo_defconfig
sh                  sh7785lcr_32bit_defconfig
arm                          iop32x_defconfig
mips                      loongson3_defconfig
powerpc                     stx_gp3_defconfig
arc                     nsimosci_hs_defconfig
powerpc                   lite5200b_defconfig
sh                        apsh4ad0a_defconfig
s390                             alldefconfig
arm                           sunxi_defconfig
s390                          debug_defconfig
sh                            titan_defconfig
arm                        oxnas_v6_defconfig
arm                         lpc18xx_defconfig
powerpc                    klondike_defconfig
arm                          prima2_defconfig
sparc64                          alldefconfig
mips                 decstation_r4k_defconfig
arm                          imote2_defconfig
mips                        workpad_defconfig
powerpc                     ppa8548_defconfig
mips                         cobalt_defconfig
microblaze                    nommu_defconfig
csky                                defconfig
mips                         mpc30x_defconfig
riscv                               defconfig
powerpc                     skiroot_defconfig
ia64                             allmodconfig
ia64                                defconfig
ia64                             allyesconfig
m68k                                defconfig
m68k                             allyesconfig
nios2                               defconfig
arc                              allyesconfig
nds32                             allnoconfig
nds32                               defconfig
nios2                            allyesconfig
alpha                               defconfig
alpha                            allyesconfig
xtensa                           allyesconfig
h8300                            allyesconfig
sh                               allmodconfig
parisc                              defconfig
s390                             allyesconfig
parisc                           allyesconfig
s390                                defconfig
i386                             allyesconfig
sparc                            allyesconfig
sparc                               defconfig
i386                                defconfig
mips                             allmodconfig
powerpc                          allyesconfig
powerpc                          allmodconfig
powerpc                           allnoconfig
i386                 randconfig-a006-20201117
i386                 randconfig-a005-20201117
i386                 randconfig-a001-20201117
i386                 randconfig-a002-20201117
i386                 randconfig-a004-20201117
i386                 randconfig-a003-20201117
i386                 randconfig-a006-20201116
i386                 randconfig-a005-20201116
i386                 randconfig-a001-20201116
i386                 randconfig-a002-20201116
i386                 randconfig-a004-20201116
i386                 randconfig-a003-20201116
x86_64               randconfig-a015-20201115
x86_64               randconfig-a011-20201115
x86_64               randconfig-a014-20201115
x86_64               randconfig-a013-20201115
x86_64               randconfig-a016-20201115
x86_64               randconfig-a012-20201115
x86_64               randconfig-a003-20201116
x86_64               randconfig-a005-20201116
x86_64               randconfig-a004-20201116
x86_64               randconfig-a002-20201116
x86_64               randconfig-a001-20201116
x86_64               randconfig-a006-20201116
i386                 randconfig-a012-20201116
i386                 randconfig-a014-20201116
i386                 randconfig-a016-20201116
i386                 randconfig-a011-20201116
i386                 randconfig-a015-20201116
i386                 randconfig-a013-20201116
i386                 randconfig-a012-20201117
i386                 randconfig-a014-20201117
i386                 randconfig-a016-20201117
i386                 randconfig-a011-20201117
i386                 randconfig-a015-20201117
i386                 randconfig-a013-20201117
riscv                    nommu_k210_defconfig
riscv                            allyesconfig
riscv                    nommu_virt_defconfig
riscv                             allnoconfig
riscv                          rv32_defconfig
riscv                            allmodconfig
x86_64                                   rhel
x86_64                           allyesconfig
x86_64                    rhel-7.6-kselftests
x86_64                              defconfig
x86_64                               rhel-8.3
x86_64                                  kexec

clang tested configs:
x86_64               randconfig-a003-20201117
x86_64               randconfig-a005-20201117
x86_64               randconfig-a004-20201117
x86_64               randconfig-a002-20201117
x86_64               randconfig-a001-20201117
x86_64               randconfig-a006-20201117
x86_64               randconfig-a015-20201116
x86_64               randconfig-a011-20201116
x86_64               randconfig-a014-20201116
x86_64               randconfig-a013-20201116
x86_64               randconfig-a016-20201116
x86_64               randconfig-a012-20201116

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

^ permalink raw reply

* Re: [PATCH v2 7/8] powerpc/mm: remove linear mapping if __add_pages() fails in arch_add_memory()
From: Michael Ellerman @ 2020-11-18  2:00 UTC (permalink / raw)
  To: David Hildenbrand, Oscar Salvador
  Cc: Michal Hocko, Wei Yang, linux-kernel, linux-mm, Paul Mackerras,
	Rashmica Gupta, linuxppc-dev, Andrew Morton, Mike Rapoport
In-Reply-To: <dac16bcc-8e37-8cb2-ac61-912a17ab5985@redhat.com>

David Hildenbrand <david@redhat.com> writes:
> On 17.11.20 16:51, Oscar Salvador wrote:
>> On Wed, Nov 11, 2020 at 03:53:21PM +0100, David Hildenbrand wrote:
>>> Let's revert what we did in case seomthing goes wrong and we return an
>> "something" :-)
>
> Thanks! :)
>
> @Michael, I assume if I don't have to resend, this can be fixed up?

Yep, I fixed it up.

cheers

^ permalink raw reply

* Re: [PATCH 2/2] kbuild: Disable CONFIG_LD_ORPHAN_WARN for ld.lld 10.0.1
From: Nathan Chancellor @ 2020-11-18  3:12 UTC (permalink / raw)
  To: Nick Desaulniers
  Cc: Michal Marek, Kees Cook, kernelci . org bot,
	Linux Kbuild mailing list, Mark Brown, Catalin Marinas,
	Masahiro Yamada, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	Russell King, LKML, linuxppc-dev, Arvind Sankar, Ingo Molnar,
	Borislav Petkov, clang-built-linux, Thomas Gleixner, Will Deacon,
	Linux ARM
In-Reply-To: <CAKwvOdni24b_70xm+xK_7r2N77WrsOk4_OgoLiwSzZ5f+6vqfA@mail.gmail.com>

On Tue, Nov 17, 2020 at 11:41:15AM -0800, Nick Desaulniers wrote:
> On Fri, Nov 13, 2020 at 11:56 AM Nathan Chancellor
> <natechancellor@gmail.com> wrote:
> >
> > ld.lld 10.0.1 spews a bunch of various warnings about .rela sections,
> > along with a few others. Newer versions of ld.lld do not have these
> > warnings. As a result, do not add '--orphan-handling=warn' to
> > LDFLAGS_vmlinux if ld.lld's version is not new enough.
> >
> > Reported-by: Arvind Sankar <nivedita@alum.mit.edu>
> > Reported-by: kernelci.org bot <bot@kernelci.org>
> > Reported-by: Mark Brown <broonie@kernel.org>
> > Link: https://github.com/ClangBuiltLinux/linux/issues/1187
> > Link: https://github.com/ClangBuiltLinux/linux/issues/1193
> > Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
> > ---
> >  MAINTAINERS            |  1 +
> >  init/Kconfig           |  6 +++++-
> >  scripts/lld-version.sh | 20 ++++++++++++++++++++
> >  3 files changed, 26 insertions(+), 1 deletion(-)
> >  create mode 100755 scripts/lld-version.sh
> >
> > diff --git a/MAINTAINERS b/MAINTAINERS
> > index 3da6d8c154e4..4b83d3591ec7 100644
> > --- a/MAINTAINERS
> > +++ b/MAINTAINERS
> > @@ -4284,6 +4284,7 @@ B:        https://github.com/ClangBuiltLinux/linux/issues
> >  C:     irc://chat.freenode.net/clangbuiltlinux
> >  F:     Documentation/kbuild/llvm.rst
> >  F:     scripts/clang-tools/
> > +F:     scripts/lld-version.sh
> >  K:     \b(?i:clang|llvm)\b
> >
> >  CLEANCACHE API
> > diff --git a/init/Kconfig b/init/Kconfig
> > index a270716562de..40c9ca60ac1d 100644
> > --- a/init/Kconfig
> > +++ b/init/Kconfig
> > @@ -47,6 +47,10 @@ config CLANG_VERSION
> >         int
> >         default $(shell,$(srctree)/scripts/clang-version.sh $(CC))
> >
> > +config LLD_VERSION
> > +       int
> > +       default $(shell,$(srctree)/scripts/lld-version.sh $(LD))
> > +
> >  config CC_CAN_LINK
> >         bool
> >         default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(m64-flag)) if 64BIT
> > @@ -1349,7 +1353,7 @@ config LD_DEAD_CODE_DATA_ELIMINATION
> >           own risk.
> >
> >  config LD_ORPHAN_WARN
> > -       def_bool ARCH_WANT_LD_ORPHAN_WARN && $(ld-option,--orphan-handling=warn)
> > +       def_bool ARCH_WANT_LD_ORPHAN_WARN && $(ld-option,--orphan-handling=warn) && (!LD_IS_LLD || LLD_VERSION >= 110000)
> >
> >  config SYSCTL
> >         bool
> > diff --git a/scripts/lld-version.sh b/scripts/lld-version.sh
> > new file mode 100755
> > index 000000000000..cc779f412e39
> > --- /dev/null
> > +++ b/scripts/lld-version.sh
> > @@ -0,0 +1,20 @@
> > +#!/bin/sh
> > +# SPDX-License-Identifier: GPL-2.0
> > +#
> > +# ld.lld-version ld.lld-command
> 
> ^ it looks like this format was copied from scripts/gcc-version, but
> it's kind of curious/cryptic to me for a comment about usage.  Is it
> necessary?  A comment in the form:
> 
> # Usage: ./scripts/lld-version.sh ld.lld
> 
> Would be clearer to me.
> 
> > +#
> > +# Print the linker version of `ld.lld-command' in a 5 or 6-digit form
> > +# such as `100001' for ld.lld 10.0.1 etc.
> > +
> > +linker="$*"
> > +
> > +if ! ( $linker --version | grep -q LLD ); then
> > +       echo 0
> > +       exit 1
> > +fi
> > +
> > +VERSION=$($linker --version | cut -d ' ' -f 2)
> 
> This is going to invoke the linker potentially twice if it's LLD.
> Would it be nicer to capture the output of `$linker --version`, check
> which linker it is, then slice that up via `cut` to get the version?
> 
> This version is fine to me, but if you're going to send a v2, that
> might be a nice slight cleanup.  Otherwise,
> 
> Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
> Tested-by: Nick Desaulniers <ndesaulniers@google.com>
> 
> (Please drop those tags if you modify this for v2 and I'll rereview/retest).

Below is the impending v2 if you wanted to give it an early test, I plan
to send it along formally Thursday morning with all of the addressed
feedback so far.

Cheers,
Nathan

======================================================================

From 1ef9b12daf2b19ed6687423483d5bb1f5cf82e13 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Tue, 17 Nov 2020 20:11:26 -0700
Subject: [PATCH] kbuild: Disable CONFIG_LD_ORPHAN_WARN for ld.lld 10.0.1

ld.lld 10.0.1 spews a bunch of various warnings about .rela sections,
along with a few others. Newer versions of ld.lld do not have these
warnings. As a result, do not add '--orphan-handling=warn' to
LDFLAGS_vmlinux if ld.lld's version is not new enough.

Link: https://github.com/ClangBuiltLinux/linux/issues/1187
Link: https://github.com/ClangBuiltLinux/linux/issues/1193
Reported-by: Arvind Sankar <nivedita@alum.mit.edu>
Reported-by: kernelci.org bot <bot@kernelci.org>
Reported-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
---
 MAINTAINERS            |  1 +
 init/Kconfig           |  9 ++++++++-
 scripts/lld-version.sh | 20 ++++++++++++++++++++
 3 files changed, 29 insertions(+), 1 deletion(-)
 create mode 100755 scripts/lld-version.sh

diff --git a/MAINTAINERS b/MAINTAINERS
index e451dcce054f..e6f74f130ae1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4284,6 +4284,7 @@ B:	https://github.com/ClangBuiltLinux/linux/issues
 C:	irc://chat.freenode.net/clangbuiltlinux
 F:	Documentation/kbuild/llvm.rst
 F:	scripts/clang-tools/
+F:	scripts/lld-version.sh
 K:	\b(?i:clang|llvm)\b
 
 CLEANCACHE API
diff --git a/init/Kconfig b/init/Kconfig
index a270716562de..b9037d6c5ab3 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -47,6 +47,10 @@ config CLANG_VERSION
 	int
 	default $(shell,$(srctree)/scripts/clang-version.sh $(CC))
 
+config LLD_VERSION
+	int
+	default $(shell,$(srctree)/scripts/lld-version.sh $(LD))
+
 config CC_CAN_LINK
 	bool
 	default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(m64-flag)) if 64BIT
@@ -1349,7 +1353,10 @@ config LD_DEAD_CODE_DATA_ELIMINATION
 	  own risk.
 
 config LD_ORPHAN_WARN
-	def_bool ARCH_WANT_LD_ORPHAN_WARN && $(ld-option,--orphan-handling=warn)
+	def_bool y
+	depends on ARCH_WANT_LD_ORPHAN_WARN
+	depends on !LD_IS_LLD || LLD_VERSION >= 110000
+	depends on $(ld-option,--orphan-handling=warn)
 
 config SYSCTL
 	bool
diff --git a/scripts/lld-version.sh b/scripts/lld-version.sh
new file mode 100755
index 000000000000..d70edb4d8a4f
--- /dev/null
+++ b/scripts/lld-version.sh
@@ -0,0 +1,20 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Usage: $ ./scripts/lld-version.sh ld.lld
+#
+# Print the linker version of `ld.lld' in a 5 or 6-digit form
+# such as `100001' for ld.lld 10.0.1 etc.
+
+linker_string="$($* --version)"
+
+if ! ( echo $linker_string | grep -q LLD ); then
+	echo 0
+	exit 1
+fi
+
+VERSION=$(echo $linker_string | cut -d ' ' -f 2)
+MAJOR=$(echo $VERSION | cut -d . -f 1)
+MINOR=$(echo $VERSION | cut -d . -f 2)
+PATCHLEVEL=$(echo $VERSION | cut -d . -f 3)
+printf "%d%02d%02d\\n" $MAJOR $MINOR $PATCHLEVEL
-- 
2.29.2


^ permalink raw reply related

* Re: [PATCH 2/2] kbuild: Disable CONFIG_LD_ORPHAN_WARN for ld.lld 10.0.1
From: Nathan Chancellor @ 2020-11-18  3:12 UTC (permalink / raw)
  To: Kees Cook
  Cc: Michal Marek, kernelci . org bot, linux-kbuild, Mark Brown,
	Catalin Marinas, Masahiro Yamada, x86, Nick Desaulniers,
	Russell King, linux-kernel, linuxppc-dev, Arvind Sankar,
	Ingo Molnar, Borislav Petkov, clang-built-linux, Thomas Gleixner,
	Will Deacon, linux-arm-kernel
In-Reply-To: <202011171350.F95127F@keescook>

On Tue, Nov 17, 2020 at 01:51:43PM -0800, Kees Cook wrote:
> On Fri, Nov 13, 2020 at 12:55:53PM -0700, Nathan Chancellor wrote:
> >  config LD_ORPHAN_WARN
> > -	def_bool ARCH_WANT_LD_ORPHAN_WARN && $(ld-option,--orphan-handling=warn)
> > +	def_bool ARCH_WANT_LD_ORPHAN_WARN && $(ld-option,--orphan-handling=warn) && (!LD_IS_LLD || LLD_VERSION >= 110000)
> 
> Readability nit-pick... I prefer separate "depends" lines to make things
> a little easier to parse, change, etc:
> 
> config LD_ORPHAN_WARN
> 	def_bool y
> 	depends on ARCH_WANT_LD_ORPHAN_WARN
> 	depends on !LD_IS_LLD || LLD_VERSION >= 110000
> 	depends on $(ld-option,--orphan-handling=warn)
> 
> Otherwise, yeah, looks good to me. With this and the other suggestions,
> please consider it:
> 
> Reviewed-by: Kees Cook <keescook@chromium.org>

Thank you, I have updated it locally for v2!

Cheers,
Nathan

^ permalink raw reply

* Re: [PATCH 2/4] powerpc/perf: Update the PMU group constraints for l2l3 and threshold events in power10
From: Michael Ellerman @ 2020-11-18  4:32 UTC (permalink / raw)
  To: Athira Rajeev; +Cc: mikey, maddy, linuxppc-dev
In-Reply-To: <1605069189-2740-3-git-send-email-atrajeev@linux.vnet.ibm.com>

Athira Rajeev <atrajeev@linux.vnet.ibm.com> writes:
> In Power9, L2/L3 bus events are always available as a
> "bank" of 4 events. To obtain the counts for any of the
> l2/l3 bus events in a given bank, the user will have to
> program PMC4 with corresponding l2/l3 bus event for that
> bank.
>
> Commit 59029136d750 ("powerpc/perf: Add constraints for power9 l2/l3 bus events")
> enforced this rule in Power9. But this is not valid for
> Power10, since in Power10 Monitor Mode Control Register2
> (MMCR2) has bits to configure l2/l3 event bits. Hence remove
> this PMC4 constraint check from power10.
>
> Since the l2/l3 bits in MMCR2 are not per-pmc, patch handles
> group constrints checks for l2/l3 bits in MMCR2.

> Patch also updates constraints for threshold events in power10.

That should be done in a separate patch please.

cheers

^ permalink raw reply

* Re: [PATCH 3/4] powerpc/perf: Fix to update l2l3 events and generic event codes for power10
From: Michael Ellerman @ 2020-11-18  4:36 UTC (permalink / raw)
  To: Athira Rajeev; +Cc: mikey, maddy, linuxppc-dev
In-Reply-To: <1605069189-2740-4-git-send-email-atrajeev@linux.vnet.ibm.com>

Athira Rajeev <atrajeev@linux.vnet.ibm.com> writes:
> Fix the event code for events: branch-instructions (to PM_BR_FIN),
> branch-misses (to PM_BR_MPRED_FIN) and cache-misses (to
> PM_LD_DEMAND_MISS_L1_FIN) for power10 PMU. Update the
> list of generic events with this modified event code.

That should be one patch.

> Export l2l3 events (PM_L2_ST_MISS and PM_L2_ST) and LLC-prefetches
> (PM_L3_PF_MISS_L3) via sysfs, and also add these to cache_events.

That should be another patch.

> To maintain the current event code work with DD1, rename
> existing array of generic_events, cache_events and pmu_attr_groups
> with suffix _dd1. Update the power10 pmu init code to pick the
> dd1 list while registering the power PMU, based on the pvr
> (Processor Version Register) value.

And that should be a third patch.

cheers

> diff --git a/arch/powerpc/perf/power10-events-list.h b/arch/powerpc/perf/power10-events-list.h
> index 60c1b81..9e0b3c9 100644
> --- a/arch/powerpc/perf/power10-events-list.h
> +++ b/arch/powerpc/perf/power10-events-list.h
> @@ -15,6 +15,9 @@
>  EVENT(PM_RUN_INST_CMPL,				0x500fa);
>  EVENT(PM_BR_CMPL,                               0x4d05e);
>  EVENT(PM_BR_MPRED_CMPL,                         0x400f6);
> +EVENT(PM_BR_FIN,				0x2f04a);
> +EVENT(PM_BR_MPRED_FIN,				0x35884);
> +EVENT(PM_LD_DEMAND_MISS_L1_FIN,			0x400f0);
>  
>  /* All L1 D cache load references counted at finish, gated by reject */
>  EVENT(PM_LD_REF_L1,				0x100fc);
> @@ -36,6 +39,12 @@
>  EVENT(PM_DATA_FROM_L3,				0x01340000001c040);
>  /* Demand LD - L3 Miss (not L2 hit and not L3 hit) */
>  EVENT(PM_DATA_FROM_L3MISS,			0x300fe);
> +/* All successful D-side store dispatches for this thread */
> +EVENT(PM_L2_ST,					0x010000046080);
> +/* All successful D-side store dispatches for this thread that were L2 Miss */
> +EVENT(PM_L2_ST_MISS,				0x26880);
> +/* Total HW L3 prefetches(Load+store) */
> +EVENT(PM_L3_PF_MISS_L3,				0x100000016080);
>  /* Data PTEG reload */
>  EVENT(PM_DTLB_MISS,				0x300fc);
>  /* ITLB Reloaded */
> diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c
> index cf44fb7..86665ad 100644
> --- a/arch/powerpc/perf/power10-pmu.c
> +++ b/arch/powerpc/perf/power10-pmu.c
> @@ -114,6 +114,9 @@ static int power10_get_alternatives(u64 event, unsigned int flags, u64 alt[])
>  GENERIC_EVENT_ATTR(cache-misses,		PM_LD_MISS_L1);
>  GENERIC_EVENT_ATTR(mem-loads,			MEM_LOADS);
>  GENERIC_EVENT_ATTR(mem-stores,			MEM_STORES);
> +GENERIC_EVENT_ATTR(branch-instructions,         PM_BR_FIN);
> +GENERIC_EVENT_ATTR(branch-misses,               PM_BR_MPRED_FIN);
> +GENERIC_EVENT_ATTR(cache-misses,		PM_LD_DEMAND_MISS_L1_FIN);
>  
>  CACHE_EVENT_ATTR(L1-dcache-load-misses,		PM_LD_MISS_L1);
>  CACHE_EVENT_ATTR(L1-dcache-loads,		PM_LD_REF_L1);
> @@ -124,12 +127,15 @@ static int power10_get_alternatives(u64 event, unsigned int flags, u64 alt[])
>  CACHE_EVENT_ATTR(L1-icache-prefetches,		PM_IC_PREF_REQ);
>  CACHE_EVENT_ATTR(LLC-load-misses,		PM_DATA_FROM_L3MISS);
>  CACHE_EVENT_ATTR(LLC-loads,			PM_DATA_FROM_L3);
> +CACHE_EVENT_ATTR(LLC-prefetches,		PM_L3_PF_MISS_L3);
> +CACHE_EVENT_ATTR(LLC-store-misses,		PM_L2_ST_MISS);
> +CACHE_EVENT_ATTR(LLC-stores,			PM_L2_ST);
>  CACHE_EVENT_ATTR(branch-load-misses,		PM_BR_MPRED_CMPL);
>  CACHE_EVENT_ATTR(branch-loads,			PM_BR_CMPL);
>  CACHE_EVENT_ATTR(dTLB-load-misses,		PM_DTLB_MISS);
>  CACHE_EVENT_ATTR(iTLB-load-misses,		PM_ITLB_MISS);
>  
> -static struct attribute *power10_events_attr[] = {
> +static struct attribute *power10_events_attr_dd1[] = {
>  	GENERIC_EVENT_PTR(PM_RUN_CYC),
>  	GENERIC_EVENT_PTR(PM_RUN_INST_CMPL),
>  	GENERIC_EVENT_PTR(PM_BR_CMPL),
> @@ -154,11 +160,44 @@ static int power10_get_alternatives(u64 event, unsigned int flags, u64 alt[])
>  	NULL
>  };
>  
> +static struct attribute *power10_events_attr[] = {
> +	GENERIC_EVENT_PTR(PM_RUN_CYC),
> +	GENERIC_EVENT_PTR(PM_RUN_INST_CMPL),
> +	GENERIC_EVENT_PTR(PM_BR_FIN),
> +	GENERIC_EVENT_PTR(PM_BR_MPRED_FIN),
> +	GENERIC_EVENT_PTR(PM_LD_REF_L1),
> +	GENERIC_EVENT_PTR(PM_LD_DEMAND_MISS_L1_FIN),
> +	GENERIC_EVENT_PTR(MEM_LOADS),
> +	GENERIC_EVENT_PTR(MEM_STORES),
> +	CACHE_EVENT_PTR(PM_LD_MISS_L1),
> +	CACHE_EVENT_PTR(PM_LD_REF_L1),
> +	CACHE_EVENT_PTR(PM_LD_PREFETCH_CACHE_LINE_MISS),
> +	CACHE_EVENT_PTR(PM_ST_MISS_L1),
> +	CACHE_EVENT_PTR(PM_L1_ICACHE_MISS),
> +	CACHE_EVENT_PTR(PM_INST_FROM_L1),
> +	CACHE_EVENT_PTR(PM_IC_PREF_REQ),
> +	CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS),
> +	CACHE_EVENT_PTR(PM_DATA_FROM_L3),
> +	CACHE_EVENT_PTR(PM_L3_PF_MISS_L3),
> +	CACHE_EVENT_PTR(PM_L2_ST_MISS),
> +	CACHE_EVENT_PTR(PM_L2_ST),
> +	CACHE_EVENT_PTR(PM_BR_MPRED_CMPL),
> +	CACHE_EVENT_PTR(PM_BR_CMPL),
> +	CACHE_EVENT_PTR(PM_DTLB_MISS),
> +	CACHE_EVENT_PTR(PM_ITLB_MISS),
> +	NULL
> +};
> +
>  static struct attribute_group power10_pmu_events_group = {
>  	.name = "events",
>  	.attrs = power10_events_attr,
>  };
>  
> +static struct attribute_group power10_pmu_events_group_dd1 = {
> +	.name = "events",
> +	.attrs = power10_events_attr_dd1,
> +};
> +
>  PMU_FORMAT_ATTR(event,          "config:0-59");
>  PMU_FORMAT_ATTR(pmcxsel,        "config:0-7");
>  PMU_FORMAT_ATTR(mark,           "config:8");
> @@ -211,7 +250,13 @@ static int power10_get_alternatives(u64 event, unsigned int flags, u64 alt[])
>  	NULL,
>  };
>  
> -static int power10_generic_events[] = {
> +static const struct attribute_group *power10_pmu_attr_groups_dd1[] = {
> +	&power10_pmu_format_group,
> +	&power10_pmu_events_group_dd1,
> +	NULL,
> +};
> +
> +static int power10_generic_events_dd1[] = {
>  	[PERF_COUNT_HW_CPU_CYCLES] =			PM_RUN_CYC,
>  	[PERF_COUNT_HW_INSTRUCTIONS] =			PM_RUN_INST_CMPL,
>  	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =		PM_BR_CMPL,
> @@ -220,6 +265,15 @@ static int power10_get_alternatives(u64 event, unsigned int flags, u64 alt[])
>  	[PERF_COUNT_HW_CACHE_MISSES] =			PM_LD_MISS_L1,
>  };
>  
> +static int power10_generic_events[] = {
> +	[PERF_COUNT_HW_CPU_CYCLES] =			PM_RUN_CYC,
> +	[PERF_COUNT_HW_INSTRUCTIONS] =			PM_RUN_INST_CMPL,
> +	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =		PM_BR_FIN,
> +	[PERF_COUNT_HW_BRANCH_MISSES] =			PM_BR_MPRED_FIN,
> +	[PERF_COUNT_HW_CACHE_REFERENCES] =		PM_LD_REF_L1,
> +	[PERF_COUNT_HW_CACHE_MISSES] =			PM_LD_DEMAND_MISS_L1_FIN,
> +};
> +
>  static u64 power10_bhrb_filter_map(u64 branch_sample_type)
>  {
>  	u64 pmu_bhrb_filter = 0;
> @@ -311,6 +365,107 @@ static void power10_config_bhrb(u64 pmu_bhrb_filter)
>  			[C(RESULT_MISS)] = PM_DATA_FROM_L3MISS,
>  		},
>  		[C(OP_WRITE)] = {
> +			[C(RESULT_ACCESS)] = PM_L2_ST,
> +			[C(RESULT_MISS)] = PM_L2_ST_MISS,
> +		},
> +		[C(OP_PREFETCH)] = {
> +			[C(RESULT_ACCESS)] = PM_L3_PF_MISS_L3,
> +			[C(RESULT_MISS)] = 0,
> +		},
> +	},
> +	 [C(DTLB)] = {
> +		[C(OP_READ)] = {
> +			[C(RESULT_ACCESS)] = 0,
> +			[C(RESULT_MISS)] = PM_DTLB_MISS,
> +		},
> +		[C(OP_WRITE)] = {
> +			[C(RESULT_ACCESS)] = -1,
> +			[C(RESULT_MISS)] = -1,
> +		},
> +		[C(OP_PREFETCH)] = {
> +			[C(RESULT_ACCESS)] = -1,
> +			[C(RESULT_MISS)] = -1,
> +		},
> +	},
> +	[C(ITLB)] = {
> +		[C(OP_READ)] = {
> +			[C(RESULT_ACCESS)] = 0,
> +			[C(RESULT_MISS)] = PM_ITLB_MISS,
> +		},
> +		[C(OP_WRITE)] = {
> +			[C(RESULT_ACCESS)] = -1,
> +			[C(RESULT_MISS)] = -1,
> +		},
> +		[C(OP_PREFETCH)] = {
> +			[C(RESULT_ACCESS)] = -1,
> +			[C(RESULT_MISS)] = -1,
> +		},
> +	},
> +	[C(BPU)] = {
> +		[C(OP_READ)] = {
> +			[C(RESULT_ACCESS)] = PM_BR_CMPL,
> +			[C(RESULT_MISS)] = PM_BR_MPRED_CMPL,
> +		},
> +		[C(OP_WRITE)] = {
> +			[C(RESULT_ACCESS)] = -1,
> +			[C(RESULT_MISS)] = -1,
> +		},
> +		[C(OP_PREFETCH)] = {
> +			[C(RESULT_ACCESS)] = -1,
> +			[C(RESULT_MISS)] = -1,
> +		},
> +	},
> +	[C(NODE)] = {
> +		[C(OP_READ)] = {
> +			[C(RESULT_ACCESS)] = -1,
> +			[C(RESULT_MISS)] = -1,
> +		},
> +		[C(OP_WRITE)] = {
> +			[C(RESULT_ACCESS)] = -1,
> +			[C(RESULT_MISS)] = -1,
> +		},
> +		[C(OP_PREFETCH)] = {
> +			[C(RESULT_ACCESS)] = -1,
> +			[C(RESULT_MISS)] = -1,
> +		},
> +	},
> +};
> +
> +static u64 power10_cache_events_dd1[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
> +	[C(L1D)] = {
> +		[C(OP_READ)] = {
> +			[C(RESULT_ACCESS)] = PM_LD_REF_L1,
> +			[C(RESULT_MISS)] = PM_LD_MISS_L1,
> +		},
> +		[C(OP_WRITE)] = {
> +			[C(RESULT_ACCESS)] = 0,
> +			[C(RESULT_MISS)] = PM_ST_MISS_L1,
> +		},
> +		[C(OP_PREFETCH)] = {
> +			[C(RESULT_ACCESS)] = PM_LD_PREFETCH_CACHE_LINE_MISS,
> +			[C(RESULT_MISS)] = 0,
> +		},
> +	},
> +	[C(L1I)] = {
> +		[C(OP_READ)] = {
> +			[C(RESULT_ACCESS)] = PM_INST_FROM_L1,
> +			[C(RESULT_MISS)] = PM_L1_ICACHE_MISS,
> +		},
> +		[C(OP_WRITE)] = {
> +			[C(RESULT_ACCESS)] = PM_INST_FROM_L1MISS,
> +			[C(RESULT_MISS)] = -1,
> +		},
> +		[C(OP_PREFETCH)] = {
> +			[C(RESULT_ACCESS)] = PM_IC_PREF_REQ,
> +			[C(RESULT_MISS)] = 0,
> +		},
> +	},
> +	[C(LL)] = {
> +		[C(OP_READ)] = {
> +			[C(RESULT_ACCESS)] = PM_DATA_FROM_L3,
> +			[C(RESULT_MISS)] = PM_DATA_FROM_L3MISS,
> +		},
> +		[C(OP_WRITE)] = {
>  			[C(RESULT_ACCESS)] = -1,
>  			[C(RESULT_MISS)] = -1,
>  		},
> @@ -407,6 +562,7 @@ static void power10_config_bhrb(u64 pmu_bhrb_filter)
>  int init_power10_pmu(void)
>  {
>  	int rc;
> +	unsigned int pvr = mfspr(SPRN_PVR);
>  
>  	/* Comes from cpu_specs[] */
>  	if (!cur_cpu_spec->oprofile_cpu_type ||
> @@ -416,6 +572,12 @@ int init_power10_pmu(void)
>  	/* Set the PERF_REG_EXTENDED_MASK here */
>  	PERF_REG_EXTENDED_MASK = PERF_REG_PMU_MASK_31;
>  
> +	if ((PVR_MAJ(pvr) == 1)) {
> +		power10_pmu.generic_events = power10_generic_events_dd1;
> +		power10_pmu.attr_groups = power10_pmu_attr_groups_dd1;
> +		power10_pmu.cache_events = &power10_cache_events_dd1;
> +	}
> +
>  	rc = register_power_pmu(&power10_pmu);
>  	if (rc)
>  		return rc;
> -- 
> 1.8.3.1

^ permalink raw reply

* Re: [PATCH 2/4] powerpc/perf: Update the PMU group constraints for l2l3 and threshold events in power10
From: Athira Rajeev @ 2020-11-18  5:21 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: Michael Neuling, Madhavan Srinivasan, linuxppc-dev
In-Reply-To: <878saz2sl7.fsf@mpe.ellerman.id.au>



> On 18-Nov-2020, at 10:02 AM, Michael Ellerman <mpe@ellerman.id.au> wrote:
> 
> Athira Rajeev <atrajeev@linux.vnet.ibm.com> writes:
>> In Power9, L2/L3 bus events are always available as a
>> "bank" of 4 events. To obtain the counts for any of the
>> l2/l3 bus events in a given bank, the user will have to
>> program PMC4 with corresponding l2/l3 bus event for that
>> bank.
>> 
>> Commit 59029136d750 ("powerpc/perf: Add constraints for power9 l2/l3 bus events")
>> enforced this rule in Power9. But this is not valid for
>> Power10, since in Power10 Monitor Mode Control Register2
>> (MMCR2) has bits to configure l2/l3 event bits. Hence remove
>> this PMC4 constraint check from power10.
>> 
>> Since the l2/l3 bits in MMCR2 are not per-pmc, patch handles
>> group constrints checks for l2/l3 bits in MMCR2.
> 
>> Patch also updates constraints for threshold events in power10.
> 
> That should be done in a separate patch please.

Thanks mpe for checking the patch set.
 
Sure, 
I will make threshold constraint changes as a separate patch and send next version


> 
> cheers


^ permalink raw reply

* Re: [PATCH 3/4] powerpc/perf: Fix to update l2l3 events and generic event codes for power10
From: Athira Rajeev @ 2020-11-18  5:23 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: Michael Neuling, Madhavan Srinivasan, linuxppc-dev
In-Reply-To: <875z632sdt.fsf@mpe.ellerman.id.au>



> On 18-Nov-2020, at 10:06 AM, Michael Ellerman <mpe@ellerman.id.au> wrote:
> 
> Athira Rajeev <atrajeev@linux.vnet.ibm.com> writes:
>> Fix the event code for events: branch-instructions (to PM_BR_FIN),
>> branch-misses (to PM_BR_MPRED_FIN) and cache-misses (to
>> PM_LD_DEMAND_MISS_L1_FIN) for power10 PMU. Update the
>> list of generic events with this modified event code.
> 
> That should be one patch.

Ok, 
> 
>> Export l2l3 events (PM_L2_ST_MISS and PM_L2_ST) and LLC-prefetches
>> (PM_L3_PF_MISS_L3) via sysfs, and also add these to cache_events.
> 
> That should be another patch.

Ok, 
> 
>> To maintain the current event code work with DD1, rename
>> existing array of generic_events, cache_events and pmu_attr_groups
>> with suffix _dd1. Update the power10 pmu init code to pick the
>> dd1 list while registering the power PMU, based on the pvr
>> (Processor Version Register) value.
> 
> And that should be a third patch.
> 

Ok, I will make these changes in the next version

Thanks
Athira
> cheers
> 
>> diff --git a/arch/powerpc/perf/power10-events-list.h b/arch/powerpc/perf/power10-events-list.h
>> index 60c1b81..9e0b3c9 100644
>> --- a/arch/powerpc/perf/power10-events-list.h
>> +++ b/arch/powerpc/perf/power10-events-list.h
>> @@ -15,6 +15,9 @@
>> EVENT(PM_RUN_INST_CMPL,				0x500fa);
>> EVENT(PM_BR_CMPL,                               0x4d05e);
>> EVENT(PM_BR_MPRED_CMPL,                         0x400f6);
>> +EVENT(PM_BR_FIN,				0x2f04a);
>> +EVENT(PM_BR_MPRED_FIN,				0x35884);
>> +EVENT(PM_LD_DEMAND_MISS_L1_FIN,			0x400f0);
>> 
>> /* All L1 D cache load references counted at finish, gated by reject */
>> EVENT(PM_LD_REF_L1,				0x100fc);
>> @@ -36,6 +39,12 @@
>> EVENT(PM_DATA_FROM_L3,				0x01340000001c040);
>> /* Demand LD - L3 Miss (not L2 hit and not L3 hit) */
>> EVENT(PM_DATA_FROM_L3MISS,			0x300fe);
>> +/* All successful D-side store dispatches for this thread */
>> +EVENT(PM_L2_ST,					0x010000046080);
>> +/* All successful D-side store dispatches for this thread that were L2 Miss */
>> +EVENT(PM_L2_ST_MISS,				0x26880);
>> +/* Total HW L3 prefetches(Load+store) */
>> +EVENT(PM_L3_PF_MISS_L3,				0x100000016080);
>> /* Data PTEG reload */
>> EVENT(PM_DTLB_MISS,				0x300fc);
>> /* ITLB Reloaded */
>> diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c
>> index cf44fb7..86665ad 100644
>> --- a/arch/powerpc/perf/power10-pmu.c
>> +++ b/arch/powerpc/perf/power10-pmu.c
>> @@ -114,6 +114,9 @@ static int power10_get_alternatives(u64 event, unsigned int flags, u64 alt[])
>> GENERIC_EVENT_ATTR(cache-misses,		PM_LD_MISS_L1);
>> GENERIC_EVENT_ATTR(mem-loads,			MEM_LOADS);
>> GENERIC_EVENT_ATTR(mem-stores,			MEM_STORES);
>> +GENERIC_EVENT_ATTR(branch-instructions,         PM_BR_FIN);
>> +GENERIC_EVENT_ATTR(branch-misses,               PM_BR_MPRED_FIN);
>> +GENERIC_EVENT_ATTR(cache-misses,		PM_LD_DEMAND_MISS_L1_FIN);
>> 
>> CACHE_EVENT_ATTR(L1-dcache-load-misses,		PM_LD_MISS_L1);
>> CACHE_EVENT_ATTR(L1-dcache-loads,		PM_LD_REF_L1);
>> @@ -124,12 +127,15 @@ static int power10_get_alternatives(u64 event, unsigned int flags, u64 alt[])
>> CACHE_EVENT_ATTR(L1-icache-prefetches,		PM_IC_PREF_REQ);
>> CACHE_EVENT_ATTR(LLC-load-misses,		PM_DATA_FROM_L3MISS);
>> CACHE_EVENT_ATTR(LLC-loads,			PM_DATA_FROM_L3);
>> +CACHE_EVENT_ATTR(LLC-prefetches,		PM_L3_PF_MISS_L3);
>> +CACHE_EVENT_ATTR(LLC-store-misses,		PM_L2_ST_MISS);
>> +CACHE_EVENT_ATTR(LLC-stores,			PM_L2_ST);
>> CACHE_EVENT_ATTR(branch-load-misses,		PM_BR_MPRED_CMPL);
>> CACHE_EVENT_ATTR(branch-loads,			PM_BR_CMPL);
>> CACHE_EVENT_ATTR(dTLB-load-misses,		PM_DTLB_MISS);
>> CACHE_EVENT_ATTR(iTLB-load-misses,		PM_ITLB_MISS);
>> 
>> -static struct attribute *power10_events_attr[] = {
>> +static struct attribute *power10_events_attr_dd1[] = {
>> 	GENERIC_EVENT_PTR(PM_RUN_CYC),
>> 	GENERIC_EVENT_PTR(PM_RUN_INST_CMPL),
>> 	GENERIC_EVENT_PTR(PM_BR_CMPL),
>> @@ -154,11 +160,44 @@ static int power10_get_alternatives(u64 event, unsigned int flags, u64 alt[])
>> 	NULL
>> };
>> 
>> +static struct attribute *power10_events_attr[] = {
>> +	GENERIC_EVENT_PTR(PM_RUN_CYC),
>> +	GENERIC_EVENT_PTR(PM_RUN_INST_CMPL),
>> +	GENERIC_EVENT_PTR(PM_BR_FIN),
>> +	GENERIC_EVENT_PTR(PM_BR_MPRED_FIN),
>> +	GENERIC_EVENT_PTR(PM_LD_REF_L1),
>> +	GENERIC_EVENT_PTR(PM_LD_DEMAND_MISS_L1_FIN),
>> +	GENERIC_EVENT_PTR(MEM_LOADS),
>> +	GENERIC_EVENT_PTR(MEM_STORES),
>> +	CACHE_EVENT_PTR(PM_LD_MISS_L1),
>> +	CACHE_EVENT_PTR(PM_LD_REF_L1),
>> +	CACHE_EVENT_PTR(PM_LD_PREFETCH_CACHE_LINE_MISS),
>> +	CACHE_EVENT_PTR(PM_ST_MISS_L1),
>> +	CACHE_EVENT_PTR(PM_L1_ICACHE_MISS),
>> +	CACHE_EVENT_PTR(PM_INST_FROM_L1),
>> +	CACHE_EVENT_PTR(PM_IC_PREF_REQ),
>> +	CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS),
>> +	CACHE_EVENT_PTR(PM_DATA_FROM_L3),
>> +	CACHE_EVENT_PTR(PM_L3_PF_MISS_L3),
>> +	CACHE_EVENT_PTR(PM_L2_ST_MISS),
>> +	CACHE_EVENT_PTR(PM_L2_ST),
>> +	CACHE_EVENT_PTR(PM_BR_MPRED_CMPL),
>> +	CACHE_EVENT_PTR(PM_BR_CMPL),
>> +	CACHE_EVENT_PTR(PM_DTLB_MISS),
>> +	CACHE_EVENT_PTR(PM_ITLB_MISS),
>> +	NULL
>> +};
>> +
>> static struct attribute_group power10_pmu_events_group = {
>> 	.name = "events",
>> 	.attrs = power10_events_attr,
>> };
>> 
>> +static struct attribute_group power10_pmu_events_group_dd1 = {
>> +	.name = "events",
>> +	.attrs = power10_events_attr_dd1,
>> +};
>> +
>> PMU_FORMAT_ATTR(event,          "config:0-59");
>> PMU_FORMAT_ATTR(pmcxsel,        "config:0-7");
>> PMU_FORMAT_ATTR(mark,           "config:8");
>> @@ -211,7 +250,13 @@ static int power10_get_alternatives(u64 event, unsigned int flags, u64 alt[])
>> 	NULL,
>> };
>> 
>> -static int power10_generic_events[] = {
>> +static const struct attribute_group *power10_pmu_attr_groups_dd1[] = {
>> +	&power10_pmu_format_group,
>> +	&power10_pmu_events_group_dd1,
>> +	NULL,
>> +};
>> +
>> +static int power10_generic_events_dd1[] = {
>> 	[PERF_COUNT_HW_CPU_CYCLES] =			PM_RUN_CYC,
>> 	[PERF_COUNT_HW_INSTRUCTIONS] =			PM_RUN_INST_CMPL,
>> 	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =		PM_BR_CMPL,
>> @@ -220,6 +265,15 @@ static int power10_get_alternatives(u64 event, unsigned int flags, u64 alt[])
>> 	[PERF_COUNT_HW_CACHE_MISSES] =			PM_LD_MISS_L1,
>> };
>> 
>> +static int power10_generic_events[] = {
>> +	[PERF_COUNT_HW_CPU_CYCLES] =			PM_RUN_CYC,
>> +	[PERF_COUNT_HW_INSTRUCTIONS] =			PM_RUN_INST_CMPL,
>> +	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =		PM_BR_FIN,
>> +	[PERF_COUNT_HW_BRANCH_MISSES] =			PM_BR_MPRED_FIN,
>> +	[PERF_COUNT_HW_CACHE_REFERENCES] =		PM_LD_REF_L1,
>> +	[PERF_COUNT_HW_CACHE_MISSES] =			PM_LD_DEMAND_MISS_L1_FIN,
>> +};
>> +
>> static u64 power10_bhrb_filter_map(u64 branch_sample_type)
>> {
>> 	u64 pmu_bhrb_filter = 0;
>> @@ -311,6 +365,107 @@ static void power10_config_bhrb(u64 pmu_bhrb_filter)
>> 			[C(RESULT_MISS)] = PM_DATA_FROM_L3MISS,
>> 		},
>> 		[C(OP_WRITE)] = {
>> +			[C(RESULT_ACCESS)] = PM_L2_ST,
>> +			[C(RESULT_MISS)] = PM_L2_ST_MISS,
>> +		},
>> +		[C(OP_PREFETCH)] = {
>> +			[C(RESULT_ACCESS)] = PM_L3_PF_MISS_L3,
>> +			[C(RESULT_MISS)] = 0,
>> +		},
>> +	},
>> +	 [C(DTLB)] = {
>> +		[C(OP_READ)] = {
>> +			[C(RESULT_ACCESS)] = 0,
>> +			[C(RESULT_MISS)] = PM_DTLB_MISS,
>> +		},
>> +		[C(OP_WRITE)] = {
>> +			[C(RESULT_ACCESS)] = -1,
>> +			[C(RESULT_MISS)] = -1,
>> +		},
>> +		[C(OP_PREFETCH)] = {
>> +			[C(RESULT_ACCESS)] = -1,
>> +			[C(RESULT_MISS)] = -1,
>> +		},
>> +	},
>> +	[C(ITLB)] = {
>> +		[C(OP_READ)] = {
>> +			[C(RESULT_ACCESS)] = 0,
>> +			[C(RESULT_MISS)] = PM_ITLB_MISS,
>> +		},
>> +		[C(OP_WRITE)] = {
>> +			[C(RESULT_ACCESS)] = -1,
>> +			[C(RESULT_MISS)] = -1,
>> +		},
>> +		[C(OP_PREFETCH)] = {
>> +			[C(RESULT_ACCESS)] = -1,
>> +			[C(RESULT_MISS)] = -1,
>> +		},
>> +	},
>> +	[C(BPU)] = {
>> +		[C(OP_READ)] = {
>> +			[C(RESULT_ACCESS)] = PM_BR_CMPL,
>> +			[C(RESULT_MISS)] = PM_BR_MPRED_CMPL,
>> +		},
>> +		[C(OP_WRITE)] = {
>> +			[C(RESULT_ACCESS)] = -1,
>> +			[C(RESULT_MISS)] = -1,
>> +		},
>> +		[C(OP_PREFETCH)] = {
>> +			[C(RESULT_ACCESS)] = -1,
>> +			[C(RESULT_MISS)] = -1,
>> +		},
>> +	},
>> +	[C(NODE)] = {
>> +		[C(OP_READ)] = {
>> +			[C(RESULT_ACCESS)] = -1,
>> +			[C(RESULT_MISS)] = -1,
>> +		},
>> +		[C(OP_WRITE)] = {
>> +			[C(RESULT_ACCESS)] = -1,
>> +			[C(RESULT_MISS)] = -1,
>> +		},
>> +		[C(OP_PREFETCH)] = {
>> +			[C(RESULT_ACCESS)] = -1,
>> +			[C(RESULT_MISS)] = -1,
>> +		},
>> +	},
>> +};
>> +
>> +static u64 power10_cache_events_dd1[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
>> +	[C(L1D)] = {
>> +		[C(OP_READ)] = {
>> +			[C(RESULT_ACCESS)] = PM_LD_REF_L1,
>> +			[C(RESULT_MISS)] = PM_LD_MISS_L1,
>> +		},
>> +		[C(OP_WRITE)] = {
>> +			[C(RESULT_ACCESS)] = 0,
>> +			[C(RESULT_MISS)] = PM_ST_MISS_L1,
>> +		},
>> +		[C(OP_PREFETCH)] = {
>> +			[C(RESULT_ACCESS)] = PM_LD_PREFETCH_CACHE_LINE_MISS,
>> +			[C(RESULT_MISS)] = 0,
>> +		},
>> +	},
>> +	[C(L1I)] = {
>> +		[C(OP_READ)] = {
>> +			[C(RESULT_ACCESS)] = PM_INST_FROM_L1,
>> +			[C(RESULT_MISS)] = PM_L1_ICACHE_MISS,
>> +		},
>> +		[C(OP_WRITE)] = {
>> +			[C(RESULT_ACCESS)] = PM_INST_FROM_L1MISS,
>> +			[C(RESULT_MISS)] = -1,
>> +		},
>> +		[C(OP_PREFETCH)] = {
>> +			[C(RESULT_ACCESS)] = PM_IC_PREF_REQ,
>> +			[C(RESULT_MISS)] = 0,
>> +		},
>> +	},
>> +	[C(LL)] = {
>> +		[C(OP_READ)] = {
>> +			[C(RESULT_ACCESS)] = PM_DATA_FROM_L3,
>> +			[C(RESULT_MISS)] = PM_DATA_FROM_L3MISS,
>> +		},
>> +		[C(OP_WRITE)] = {
>> 			[C(RESULT_ACCESS)] = -1,
>> 			[C(RESULT_MISS)] = -1,
>> 		},
>> @@ -407,6 +562,7 @@ static void power10_config_bhrb(u64 pmu_bhrb_filter)
>> int init_power10_pmu(void)
>> {
>> 	int rc;
>> +	unsigned int pvr = mfspr(SPRN_PVR);
>> 
>> 	/* Comes from cpu_specs[] */
>> 	if (!cur_cpu_spec->oprofile_cpu_type ||
>> @@ -416,6 +572,12 @@ int init_power10_pmu(void)
>> 	/* Set the PERF_REG_EXTENDED_MASK here */
>> 	PERF_REG_EXTENDED_MASK = PERF_REG_PMU_MASK_31;
>> 
>> +	if ((PVR_MAJ(pvr) == 1)) {
>> +		power10_pmu.generic_events = power10_generic_events_dd1;
>> +		power10_pmu.attr_groups = power10_pmu_attr_groups_dd1;
>> +		power10_pmu.cache_events = &power10_cache_events_dd1;
>> +	}
>> +
>> 	rc = register_power_pmu(&power10_pmu);
>> 	if (rc)
>> 		return rc;
>> -- 
>> 1.8.3.1


^ permalink raw reply

* Re: [PATCH v2 0/3] PPC: Fix -Wimplicit-fallthrough for clang
From: Gustavo A. R. Silva @ 2020-11-18 20:27 UTC (permalink / raw)
  To: Nick Desaulniers
  Cc: clang-built-linux, linux-kernel, Miguel Ojeda, Arvind Sankar,
	Paul Mackerras, Nathan Chancellor, linuxppc-dev
In-Reply-To: <20201118000751.845172-1-ndesaulniers@google.com>

Nick,

On Tue, Nov 17, 2020 at 04:07:48PM -0800, Nick Desaulniers wrote:
> While cleaning up the last few -Wimplicit-fallthrough warnings in tree
> for Clang, I noticed
> commit 6a9dc5fd6170d ("lib: Revert use of fallthrough pseudo-keyword in lib/")
> which seemed to undo a bunch of fixes in lib/ due to breakage in
> arch/powerpc/boot/ not including compiler_types.h.  We don't need
> compiler_types.h for the definition of `fallthrough`, simply
> compiler_attributes.h.  Include that, revert the revert to lib/, and fix
> the last remaining cases I observed for powernv_defconfig.

I've added the series to my -next tree, together with Miguel's
suggestions.

Thanks for the Acks and comments, Michael.

--
Gustavo

^ permalink raw reply

* [PATCH 1/3] powerpc/wrapper: add "-z notext" flag to disable diagnostic
From: Bill Wendling @ 2020-11-18 22:35 UTC (permalink / raw)
  To: Michael Ellerman, linuxppc-dev
  Cc: Nick Desaulniers, Bill Wendling, Fangrui Song, Alan Modra
In-Reply-To: <20201017004752.415054-3-morbo@google.com>

The "-z notext" flag disables reporting an error if DT_TEXTREL is set.

  ld.lld: error: can't create dynamic relocation R_PPC64_ADDR64 against
    symbol: _start in readonly segment; recompile object files with
    -fPIC or pass '-Wl,-z,notext' to allow text relocations in the
    output
  >>> defined in
  >>> referenced by crt0.o:(.text+0x8) in archive arch/powerpc/boot/wrapper.a

The BFD linker disables this by default (though it's configurable in
current versions). LLD enables this by default. So we add the flag to
keep LLD from emitting the error.

Cc: Fangrui Song <maskray@google.com>
Cc: Alan Modra <amodra@gmail.com>
Signed-off-by: Bill Wendling <morbo@google.com>
---
 arch/powerpc/boot/wrapper | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
index e1194955adbb..41fa0a8715e3 100755
--- a/arch/powerpc/boot/wrapper
+++ b/arch/powerpc/boot/wrapper
@@ -46,6 +46,7 @@ compression=.gz
 uboot_comp=gzip
 pie=
 format=
+notext=
 rodynamic=
 
 # cross-compilation prefix
@@ -354,6 +355,7 @@ epapr)
     platformo="$object/pseries-head.o $object/epapr.o $object/epapr-wrapper.o"
     link_address='0x20000000'
     pie=-pie
+    notext='-z notext'
     rodynamic=$(if ${CROSS}ld -V 2>&1 | grep -q LLD ; then echo "-z rodynamic"; fi)
     ;;
 mvme5100)
@@ -495,7 +497,7 @@ if [ "$platform" != "miboot" ]; then
         text_start="-Ttext $link_address"
     fi
 #link everything
-    ${CROSS}ld -m $format -T $lds $text_start $pie $nodl $rodynamic -o "$ofile" $map \
+    ${CROSS}ld -m $format -T $lds $text_start $pie $nodl $rodynamic $notext -o "$ofile" $map \
 	$platformo $tmp $object/wrapper.a
     rm $tmp
 fi
-- 
2.29.2.454.gaff20da3a2-goog


^ permalink raw reply related

* [PATCH 0/3] PPC: fixes for clang support
From: Bill Wendling @ 2020-11-18 22:35 UTC (permalink / raw)
  To: Michael Ellerman, linuxppc-dev; +Cc: Nick Desaulniers, Bill Wendling
In-Reply-To: <20201017004752.415054-3-morbo@google.com>

This series of patches include fixes for clang issues that arose. The
"powerpc/64s" patch was "inspired" by a similar patch for ARM:

eb7c11ee3c5ce arm64: alternative: Work around .inst assembler bugs

Bill Wendling (3):
  powerpc/wrapper: add "-z notext" flag to disable diagnostic
  powerpc/boot: Use clang when CC is clang
  powerpc/64s: feature: work around inline asm issues

 arch/powerpc/boot/Makefile                |  4 ++++
 arch/powerpc/boot/wrapper                 |  4 +++-
 arch/powerpc/include/asm/feature-fixups.h | 19 ++++++++++++++-----
 3 files changed, 21 insertions(+), 6 deletions(-)

-- 
2.29.2.454.gaff20da3a2-goog


^ permalink raw reply

* [PATCH 3/3] powerpc/64s: feature: work around inline asm issues
From: Bill Wendling @ 2020-11-18 22:35 UTC (permalink / raw)
  To: Michael Ellerman, linuxppc-dev; +Cc: Nick Desaulniers, Bill Wendling
In-Reply-To: <20201017004752.415054-3-morbo@google.com>

The clang toolchain treats inline assembly a bit differently than
straight assembly code. In particular, inline assembly doesn't have the
complete context available to resolve expressions. This is intentional
to avoid divergence in the resulting assembly code.

We can work around this issue by borrowing a workaround done for ARM,
i.e. not directly testing the labels themselves, but by moving the
current output pointer by a value that should always be zero. If this
value is not null, then we will trigger a backward move, which is
explicitly forbidden.

Signed-off-by: Bill Wendling <morbo@google.com>
---
 arch/powerpc/include/asm/feature-fixups.h | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h
index b0af97add751..34331c4ba61a 100644
--- a/arch/powerpc/include/asm/feature-fixups.h
+++ b/arch/powerpc/include/asm/feature-fixups.h
@@ -36,6 +36,18 @@ label##2:						\
 	.align 2;					\
 label##3:
 
+/*
+ * If the .org directive fails, it means that the feature instructions
+ * are smaller than the alternate instructions. This used to be written
+ * as
+ *
+ * .ifgt (label##4b-label##3b) - (label##2b-label##1b)
+ *      .error "Feature section else case larger than body"
+ * .endif
+ *
+ * but clang's assembler complains about the expression being non-absolute
+ * when the code appears in an inline assembly statement.
+ */
 #define MAKE_FTR_SECTION_ENTRY(msk, val, label, sect)		\
 label##4:							\
 	.popsection;						\
@@ -48,11 +60,8 @@ label##5:							\
 	FTR_ENTRY_OFFSET label##2b-label##5b;			\
 	FTR_ENTRY_OFFSET label##3b-label##5b;			\
 	FTR_ENTRY_OFFSET label##4b-label##5b;			\
-	.ifgt (label##4b- label##3b)-(label##2b- label##1b);	\
-	.error "Feature section else case larger than body";	\
-	.endif;							\
-	.popsection;
-
+	.popsection;						\
+	.org . - ((label##4b-label##3b) > (label##2b-label##1b));
 
 /* CPU feature dependent sections */
 #define BEGIN_FTR_SECTION_NESTED(label)	START_FTR_SECTION(label)
-- 
2.29.2.454.gaff20da3a2-goog


^ permalink raw reply related

* [PATCH 2/3] powerpc/boot: Use clang when CC is clang
From: Bill Wendling @ 2020-11-18 22:35 UTC (permalink / raw)
  To: Michael Ellerman, linuxppc-dev; +Cc: Nick Desaulniers, Bill Wendling
In-Reply-To: <20201017004752.415054-3-morbo@google.com>

The gcc compiler may not be available if CC is clang.

Signed-off-by: Bill Wendling <morbo@google.com>
---
 arch/powerpc/boot/Makefile | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index f8ce6d2dde7b..68a7534454cd 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -21,7 +21,11 @@
 all: $(obj)/zImage
 
 ifdef CROSS32_COMPILE
+ifdef CONFIG_CC_IS_CLANG
+    BOOTCC := $(CROSS32_COMPILE)clang
+else
     BOOTCC := $(CROSS32_COMPILE)gcc
+endif
     BOOTAR := $(CROSS32_COMPILE)ar
 else
     BOOTCC := $(CC)
-- 
2.29.2.454.gaff20da3a2-goog


^ permalink raw reply related

* [PATCH] powerpc/wrapper: add "-z rodynamic" when using LLD
From: Bill Wendling @ 2020-11-18 22:39 UTC (permalink / raw)
  To: Michael Ellerman, linuxppc-dev
  Cc: Nick Desaulniers, Bill Wendling, Fangrui Song, Alan Modra
In-Reply-To: <20201017000151.150788-1-morbo@google.com>

Normally all read-only sections precede SHF_WRITE sections. .dynamic and
.got have the SHF_WRITE flag; .dynamic probably because of DT_DEBUG. LLD
emits an error when this happens, so use "-z rodynamic" to mark .dynamic
as read-only.

Cc: Fangrui Song <maskray@google.com>
Cc: Alan Modra <amodra@gmail.com>
Signed-off-by: Bill Wendling <morbo@google.com>
---
 arch/powerpc/boot/wrapper | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
index cd58a62e810d..e1194955adbb 100755
--- a/arch/powerpc/boot/wrapper
+++ b/arch/powerpc/boot/wrapper
@@ -46,6 +46,7 @@ compression=.gz
 uboot_comp=gzip
 pie=
 format=
+rodynamic=
 
 # cross-compilation prefix
 CROSS=
@@ -353,6 +354,7 @@ epapr)
     platformo="$object/pseries-head.o $object/epapr.o $object/epapr-wrapper.o"
     link_address='0x20000000'
     pie=-pie
+    rodynamic=$(if ${CROSS}ld -V 2>&1 | grep -q LLD ; then echo "-z rodynamic"; fi)
     ;;
 mvme5100)
     platformo="$object/fixed-head.o $object/mvme5100.o"
@@ -493,7 +495,7 @@ if [ "$platform" != "miboot" ]; then
         text_start="-Ttext $link_address"
     fi
 #link everything
-    ${CROSS}ld -m $format -T $lds $text_start $pie $nodl -o "$ofile" $map \
+    ${CROSS}ld -m $format -T $lds $text_start $pie $nodl $rodynamic -o "$ofile" $map \
 	$platformo $tmp $object/wrapper.a
     rm $tmp
 fi
-- 
2.29.2.454.gaff20da3a2-goog


^ permalink raw reply related

* Re: [PATCH] powerpc/wrapper: add "-z rodynamic" when using LLD
From: Fangrui Song @ 2020-11-18 22:55 UTC (permalink / raw)
  To: Bill Wendling; +Cc: Alan Modra, Nick Desaulniers, linuxppc-dev
In-Reply-To: <20201118223910.2711337-1-morbo@google.com>

We could wait for https://lkml.org/lkml/2020/11/13/19
"[PATCH] kbuild: Always link with '-z norelro'"

Then we would not need -z rodynamic to work around a -z relro issue.

(The issue is that some sections don't strictly follow the normal
R/RX/RW(RELRO)/RW(non-RELRO) section flag partition. As a linker person
I would suggest that we don't create multiple clusters with the same
section flags (e.g. RW in two separate places), but this is my very
minor complaint.)

On 2020-11-18, Bill Wendling wrote:
>Normally all read-only sections precede SHF_WRITE sections. .dynamic and
>.got have the SHF_WRITE flag; .dynamic probably because of DT_DEBUG. LLD
>emits an error when this happens, so use "-z rodynamic" to mark .dynamic
>as read-only.
>
>Cc: Fangrui Song <maskray@google.com>
>Cc: Alan Modra <amodra@gmail.com>
>Signed-off-by: Bill Wendling <morbo@google.com>
>---
> arch/powerpc/boot/wrapper | 4 +++-
> 1 file changed, 3 insertions(+), 1 deletion(-)
>
>diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
>index cd58a62e810d..e1194955adbb 100755
>--- a/arch/powerpc/boot/wrapper
>+++ b/arch/powerpc/boot/wrapper
>@@ -46,6 +46,7 @@ compression=.gz
> uboot_comp=gzip
> pie=
> format=
>+rodynamic=
>
> # cross-compilation prefix
> CROSS=
>@@ -353,6 +354,7 @@ epapr)
>     platformo="$object/pseries-head.o $object/epapr.o $object/epapr-wrapper.o"
>     link_address='0x20000000'
>     pie=-pie
>+    rodynamic=$(if ${CROSS}ld -V 2>&1 | grep -q LLD ; then echo "-z rodynamic"; fi)
>     ;;
> mvme5100)
>     platformo="$object/fixed-head.o $object/mvme5100.o"
>@@ -493,7 +495,7 @@ if [ "$platform" != "miboot" ]; then
>         text_start="-Ttext $link_address"
>     fi
> #link everything
>-    ${CROSS}ld -m $format -T $lds $text_start $pie $nodl -o "$ofile" $map \
>+    ${CROSS}ld -m $format -T $lds $text_start $pie $nodl $rodynamic -o "$ofile" $map \
> 	$platformo $tmp $object/wrapper.a
>     rm $tmp
> fi
>-- 
>2.29.2.454.gaff20da3a2-goog
>

^ permalink raw reply

* [powerpc:next-test] BUILD SUCCESS a1062188413df416db21b02ffe4bd60228ad6240
From: kernel test robot @ 2020-11-19  1:11 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev

tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git  next-test
branch HEAD: a1062188413df416db21b02ffe4bd60228ad6240  powerpc: fix -Wimplicit-fallthrough

elapsed time: 726m

configs tested: 155
configs skipped: 3

The following configs have been built successfully.
More configs may be tested in the coming days.

gcc tested configs:
arm                                 defconfig
arm64                            allyesconfig
arm64                               defconfig
arm                              allyesconfig
arm                              allmodconfig
sh                         ap325rxa_defconfig
mips                  maltasmvp_eva_defconfig
mips                      pic32mzda_defconfig
powerpc                      bamboo_defconfig
powerpc                     tqm8560_defconfig
m68k                          sun3x_defconfig
arc                        nsim_700_defconfig
mips                      loongson3_defconfig
arm                           sunxi_defconfig
nios2                            alldefconfig
powerpc                 xes_mpc85xx_defconfig
powerpc                      ppc6xx_defconfig
powerpc                     taishan_defconfig
powerpc                     skiroot_defconfig
powerpc                    adder875_defconfig
sh                ecovec24-romimage_defconfig
m68k                            mac_defconfig
sh                           sh2007_defconfig
sh                          polaris_defconfig
arm                       aspeed_g5_defconfig
arm                           stm32_defconfig
powerpc                     sbc8548_defconfig
arm                   milbeaut_m10v_defconfig
sh                   sh7724_generic_defconfig
arm                         s3c6400_defconfig
m68k                        mvme16x_defconfig
mips                        omega2p_defconfig
mips                    maltaup_xpa_defconfig
mips                       bmips_be_defconfig
s390                          debug_defconfig
mips                            e55_defconfig
sh                     sh7710voipgw_defconfig
mips                 decstation_r4k_defconfig
powerpc                      arches_defconfig
arm                          pcm027_defconfig
mips                           ip32_defconfig
mips                  cavium_octeon_defconfig
ia64                        generic_defconfig
mips                        nlm_xlr_defconfig
powerpc                     tqm8540_defconfig
mips                        bcm63xx_defconfig
powerpc                  mpc885_ads_defconfig
sh                           se7722_defconfig
powerpc                 mpc8313_rdb_defconfig
powerpc                     stx_gp3_defconfig
powerpc                 mpc85xx_cds_defconfig
powerpc                      pcm030_defconfig
powerpc                      ppc64e_defconfig
sh                         ecovec24_defconfig
arm                         socfpga_defconfig
sh                        edosk7760_defconfig
sh                               j2_defconfig
arm                          exynos_defconfig
xtensa                  cadence_csp_defconfig
mips                         rt305x_defconfig
sh                               alldefconfig
arm                      tct_hammer_defconfig
sh                             sh03_defconfig
nios2                         10m50_defconfig
um                           x86_64_defconfig
arm                       netwinder_defconfig
arm                         s3c2410_defconfig
mips                      bmips_stb_defconfig
i386                                defconfig
s390                             alldefconfig
arm                           h5000_defconfig
arm                        neponset_defconfig
arc                                 defconfig
riscv                    nommu_virt_defconfig
powerpc                mpc7448_hpc2_defconfig
arm                       spear13xx_defconfig
sh                           se7705_defconfig
arm                         cm_x300_defconfig
sh                          r7780mp_defconfig
arc                              alldefconfig
powerpc                     mpc83xx_defconfig
mips                         tb0219_defconfig
microblaze                          defconfig
ia64                             allmodconfig
ia64                                defconfig
ia64                             allyesconfig
m68k                             allmodconfig
m68k                                defconfig
m68k                             allyesconfig
nios2                               defconfig
arc                              allyesconfig
nds32                             allnoconfig
c6x                              allyesconfig
nds32                               defconfig
nios2                            allyesconfig
csky                                defconfig
alpha                               defconfig
alpha                            allyesconfig
xtensa                           allyesconfig
h8300                            allyesconfig
sh                               allmodconfig
parisc                              defconfig
s390                             allyesconfig
parisc                           allyesconfig
s390                                defconfig
i386                             allyesconfig
sparc                            allyesconfig
sparc                               defconfig
mips                             allyesconfig
mips                             allmodconfig
powerpc                          allyesconfig
powerpc                          allmodconfig
powerpc                           allnoconfig
x86_64               randconfig-a005-20201118
x86_64               randconfig-a003-20201118
x86_64               randconfig-a004-20201118
x86_64               randconfig-a002-20201118
x86_64               randconfig-a006-20201118
x86_64               randconfig-a001-20201118
i386                 randconfig-a006-20201118
i386                 randconfig-a005-20201118
i386                 randconfig-a002-20201118
i386                 randconfig-a001-20201118
i386                 randconfig-a003-20201118
i386                 randconfig-a004-20201118
i386                 randconfig-a006-20201119
i386                 randconfig-a005-20201119
i386                 randconfig-a002-20201119
i386                 randconfig-a001-20201119
i386                 randconfig-a003-20201119
i386                 randconfig-a004-20201119
i386                 randconfig-a012-20201118
i386                 randconfig-a014-20201118
i386                 randconfig-a016-20201118
i386                 randconfig-a011-20201118
i386                 randconfig-a013-20201118
i386                 randconfig-a015-20201118
riscv                    nommu_k210_defconfig
riscv                            allyesconfig
riscv                             allnoconfig
riscv                               defconfig
riscv                          rv32_defconfig
riscv                            allmodconfig
x86_64                                   rhel
x86_64                           allyesconfig
x86_64                    rhel-7.6-kselftests
x86_64                              defconfig
x86_64                               rhel-8.3
x86_64                                  kexec

clang tested configs:
x86_64               randconfig-a015-20201118
x86_64               randconfig-a014-20201118
x86_64               randconfig-a011-20201118
x86_64               randconfig-a013-20201118
x86_64               randconfig-a016-20201118
x86_64               randconfig-a012-20201118

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

^ permalink raw reply

* [powerpc:fixes] BUILD SUCCESS cd81acc600a9684ea4b4d25a47900d38a3890eab
From: kernel test robot @ 2020-11-19  1:11 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev

tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git  fixes
branch HEAD: cd81acc600a9684ea4b4d25a47900d38a3890eab  powerpc/64s/exception: KVM Fix for host DSI being taken in HPT guest MMU context

elapsed time: 726m

configs tested: 161
configs skipped: 92

The following configs have been built successfully.
More configs may be tested in the coming days.

gcc tested configs:
arm                                 defconfig
arm64                            allyesconfig
arm64                               defconfig
arm                              allyesconfig
arm                              allmodconfig
sh                         ap325rxa_defconfig
mips                  maltasmvp_eva_defconfig
mips                      pic32mzda_defconfig
powerpc                      bamboo_defconfig
powerpc                     tqm8560_defconfig
m68k                          sun3x_defconfig
arc                        nsim_700_defconfig
mips                      loongson3_defconfig
arm                           sunxi_defconfig
nios2                            alldefconfig
powerpc                 xes_mpc85xx_defconfig
powerpc                      ppc6xx_defconfig
powerpc                     taishan_defconfig
powerpc                     skiroot_defconfig
powerpc                    adder875_defconfig
sh                ecovec24-romimage_defconfig
m68k                            mac_defconfig
sh                           sh2007_defconfig
arm                    vt8500_v6_v7_defconfig
sh                   secureedge5410_defconfig
sh                   sh7724_generic_defconfig
arm                         s3c6400_defconfig
m68k                        mvme16x_defconfig
mips                        omega2p_defconfig
mips                    maltaup_xpa_defconfig
mips                       bmips_be_defconfig
s390                          debug_defconfig
mips                            e55_defconfig
sh                     sh7710voipgw_defconfig
mips                 decstation_r4k_defconfig
powerpc                      arches_defconfig
arm                          pcm027_defconfig
mips                           ip32_defconfig
mips                  cavium_octeon_defconfig
ia64                        generic_defconfig
mips                        nlm_xlr_defconfig
powerpc                     tqm8540_defconfig
mips                        bcm63xx_defconfig
sh                          urquell_defconfig
powerpc                      obs600_defconfig
arm                             mxs_defconfig
sh                   sh7770_generic_defconfig
sh                           se7206_defconfig
powerpc                     pq2fads_defconfig
parisc                generic-64bit_defconfig
arm                             rpc_defconfig
powerpc                      katmai_defconfig
s390                       zfcpdump_defconfig
powerpc                      pmac32_defconfig
powerpc                 mpc8315_rdb_defconfig
powerpc                  mpc885_ads_defconfig
sh                           se7722_defconfig
powerpc                 mpc8313_rdb_defconfig
powerpc                     stx_gp3_defconfig
powerpc                 mpc85xx_cds_defconfig
powerpc                      pcm030_defconfig
powerpc                      ppc64e_defconfig
sh                         ecovec24_defconfig
arm                           efm32_defconfig
arm                            zeus_defconfig
arm                           corgi_defconfig
arm                   milbeaut_m10v_defconfig
powerpc                 mpc837x_rdb_defconfig
arm                         socfpga_defconfig
sh                        edosk7760_defconfig
sh                               j2_defconfig
arm                          exynos_defconfig
xtensa                  cadence_csp_defconfig
mips                         rt305x_defconfig
sh                               alldefconfig
arm                      tct_hammer_defconfig
sh                             sh03_defconfig
nios2                         10m50_defconfig
um                           x86_64_defconfig
arm                       netwinder_defconfig
arm                         s3c2410_defconfig
arc                                 defconfig
riscv                    nommu_virt_defconfig
powerpc                mpc7448_hpc2_defconfig
arm                       spear13xx_defconfig
sh                          rsk7264_defconfig
mips                      maltasmvp_defconfig
sh                           se7705_defconfig
arm                         cm_x300_defconfig
sh                          r7780mp_defconfig
arc                              alldefconfig
powerpc                     mpc83xx_defconfig
mips                         tb0219_defconfig
microblaze                          defconfig
ia64                             allmodconfig
ia64                                defconfig
ia64                             allyesconfig
m68k                             allmodconfig
m68k                                defconfig
m68k                             allyesconfig
nios2                               defconfig
arc                              allyesconfig
nds32                             allnoconfig
c6x                              allyesconfig
nds32                               defconfig
nios2                            allyesconfig
csky                                defconfig
alpha                               defconfig
alpha                            allyesconfig
xtensa                           allyesconfig
h8300                            allyesconfig
sh                               allmodconfig
parisc                              defconfig
s390                             allyesconfig
parisc                           allyesconfig
s390                                defconfig
i386                             allyesconfig
sparc                            allyesconfig
sparc                               defconfig
i386                                defconfig
mips                             allyesconfig
mips                             allmodconfig
powerpc                          allyesconfig
powerpc                          allmodconfig
powerpc                           allnoconfig
x86_64               randconfig-a005-20201118
x86_64               randconfig-a003-20201118
x86_64               randconfig-a004-20201118
x86_64               randconfig-a002-20201118
x86_64               randconfig-a006-20201118
x86_64               randconfig-a001-20201118
i386                 randconfig-a006-20201118
i386                 randconfig-a005-20201118
i386                 randconfig-a002-20201118
i386                 randconfig-a001-20201118
i386                 randconfig-a003-20201118
i386                 randconfig-a004-20201118
i386                 randconfig-a012-20201118
i386                 randconfig-a014-20201118
i386                 randconfig-a016-20201118
i386                 randconfig-a011-20201118
i386                 randconfig-a013-20201118
i386                 randconfig-a015-20201118
riscv                    nommu_k210_defconfig
riscv                            allyesconfig
riscv                             allnoconfig
riscv                               defconfig
riscv                          rv32_defconfig
riscv                            allmodconfig
x86_64                                   rhel
x86_64                           allyesconfig
x86_64                    rhel-7.6-kselftests
x86_64                              defconfig
x86_64                               rhel-8.3
x86_64                                  kexec

clang tested configs:
x86_64               randconfig-a015-20201118
x86_64               randconfig-a014-20201118
x86_64               randconfig-a011-20201118
x86_64               randconfig-a013-20201118
x86_64               randconfig-a016-20201118
x86_64               randconfig-a012-20201118

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

^ permalink raw reply

* [PATCH net-next v2 0/9] ibmvnic: Performance improvements and other updates
From: Thomas Falcon @ 2020-11-19  1:12 UTC (permalink / raw)
  To: kuba
  Cc: cforno12, netdev, ljp, ricklind, dnbanerg, tlfalcon, drt, brking,
	sukadev, linuxppc-dev

The first three patches utilize a hypervisor call allowing multiple 
TX and RX buffer replenishment descriptors to be sent in one operation,
which significantly reduces hypervisor call overhead. The xmit_more
and Byte Queue Limit API's are leveraged to provide this support
for TX descriptors.

The subsequent two patches remove superfluous code and members in
TX completion handling function and TX buffer structure, respectively,
and remove unused routines.

Finally, four patches which ensure that device queue memory is
cache-line aligned, resolving slowdowns observed in PCI traces,
as well as optimize the driver's NAPI polling function and 
to RX buffer replenishment are provided by Dwip Banerjee.

This series provides significant performance improvements, allowing
the driver to fully utilize 100Gb NIC's.

v2 updates:

1) Removed three patches from the original series which
   were bug fixes and thus better suited for the net tree,
   suggested by Jakub Kicinski.
2) Fixed error handling when initializing device queues,
   suggested by Jakub Kicinski.
3) Fixed bug where queued entries were not flushed after a
   dropped frame, also suggested by Jakub. Two functions,
   ibmvnic_tx_scrq_flush and its helper ibmvnic_tx_scrq_clean_buffer,
   were introduced to ensure that queued frames are either submitted
   to firmware or, if that is not successful, freed as dropped and
   associated data structures are updated with the new device queue state.

Dwip N. Banerjee (4):
  ibmvnic: Ensure that device queue memory is cache-line aligned
  ibmvnic: Correctly re-enable interrupts in NAPI polling routine
  ibmvnic: Use netdev_alloc_skb instead of alloc_skb to replenish RX
    buffers
  ibmvnic: Do not replenish RX buffers after every polling loop

Thomas Falcon (5):
  ibmvnic: Introduce indirect subordinate Command Response Queue buffer
  ibmvnic: Introduce batched RX buffer descriptor transmission
  ibmvnic: Introduce xmit_more support using batched subCRQ hcalls
  ibmvnic: Clean up TX code and TX buffer data structure
  ibmvnic: Remove send_subcrq function

 drivers/net/ethernet/ibm/ibmvnic.c | 398 ++++++++++++++++++-----------
 drivers/net/ethernet/ibm/ibmvnic.h |  27 +-
 2 files changed, 256 insertions(+), 169 deletions(-)

-- 
2.26.2


^ permalink raw reply

* [PATCH net-next v2 1/9] ibmvnic: Introduce indirect subordinate Command Response Queue buffer
From: Thomas Falcon @ 2020-11-19  1:12 UTC (permalink / raw)
  To: kuba
  Cc: cforno12, netdev, ljp, ricklind, dnbanerg, tlfalcon, drt, brking,
	sukadev, linuxppc-dev
In-Reply-To: <1605748345-32062-1-git-send-email-tlfalcon@linux.ibm.com>

This patch introduces the infrastructure to send batched subordinate
Command Response Queue descriptors, which are used by the ibmvnic
driver to send TX frame and RX buffer descriptors.

Signed-off-by: Thomas Falcon <tlfalcon@linux.ibm.com>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 23 +++++++++++++++++++++++
 drivers/net/ethernet/ibm/ibmvnic.h |  9 +++++++++
 2 files changed, 32 insertions(+)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index da15913879f8..3884f8a683a7 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2858,6 +2858,7 @@ static int reset_one_sub_crq_queue(struct ibmvnic_adapter *adapter,
 	memset(scrq->msgs, 0, 4 * PAGE_SIZE);
 	atomic_set(&scrq->used, 0);
 	scrq->cur = 0;
+	scrq->ind_buf.index = 0;
 
 	rc = h_reg_sub_crq(adapter->vdev->unit_address, scrq->msg_token,
 			   4 * PAGE_SIZE, &scrq->crq_num, &scrq->hw_irq);
@@ -2909,6 +2910,11 @@ static void release_sub_crq_queue(struct ibmvnic_adapter *adapter,
 		}
 	}
 
+	dma_free_coherent(dev,
+			  IBMVNIC_IND_ARR_SZ,
+			  scrq->ind_buf.indir_arr,
+			  scrq->ind_buf.indir_dma);
+
 	dma_unmap_single(dev, scrq->msg_token, 4 * PAGE_SIZE,
 			 DMA_BIDIRECTIONAL);
 	free_pages((unsigned long)scrq->msgs, 2);
@@ -2955,6 +2961,17 @@ static struct ibmvnic_sub_crq_queue *init_sub_crq_queue(struct ibmvnic_adapter
 
 	scrq->adapter = adapter;
 	scrq->size = 4 * PAGE_SIZE / sizeof(*scrq->msgs);
+	scrq->ind_buf.index = 0;
+
+	scrq->ind_buf.indir_arr =
+		dma_alloc_coherent(dev,
+				   IBMVNIC_IND_ARR_SZ,
+				   &scrq->ind_buf.indir_dma,
+				   GFP_KERNEL);
+
+	if (!scrq->ind_buf.indir_arr)
+		goto indir_failed;
+
 	spin_lock_init(&scrq->lock);
 
 	netdev_dbg(adapter->netdev,
@@ -2963,6 +2980,12 @@ static struct ibmvnic_sub_crq_queue *init_sub_crq_queue(struct ibmvnic_adapter
 
 	return scrq;
 
+indir_failed:
+	do {
+		rc = plpar_hcall_norets(H_FREE_SUB_CRQ,
+					adapter->vdev->unit_address,
+					scrq->crq_num);
+	} while (rc == H_BUSY || rc == H_IS_LONG_BUSY(rc));
 reg_failed:
 	dma_unmap_single(dev, scrq->msg_token, 4 * PAGE_SIZE,
 			 DMA_BIDIRECTIONAL);
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index 217dcc7ded70..4a63e9886719 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -31,6 +31,8 @@
 #define IBMVNIC_BUFFS_PER_POOL	100
 #define IBMVNIC_MAX_QUEUES	16
 #define IBMVNIC_MAX_QUEUE_SZ   4096
+#define IBMVNIC_MAX_IND_DESCS  128
+#define IBMVNIC_IND_ARR_SZ	(IBMVNIC_MAX_IND_DESCS * 32)
 
 #define IBMVNIC_TSO_BUF_SZ	65536
 #define IBMVNIC_TSO_BUFS	64
@@ -861,6 +863,12 @@ union sub_crq {
 	struct ibmvnic_rx_buff_add_desc rx_add;
 };
 
+struct ibmvnic_ind_xmit_queue {
+	union sub_crq *indir_arr;
+	dma_addr_t indir_dma;
+	int index;
+};
+
 struct ibmvnic_sub_crq_queue {
 	union sub_crq *msgs;
 	int size, cur;
@@ -873,6 +881,7 @@ struct ibmvnic_sub_crq_queue {
 	spinlock_t lock;
 	struct sk_buff *rx_skb_top;
 	struct ibmvnic_adapter *adapter;
+	struct ibmvnic_ind_xmit_queue ind_buf;
 	atomic_t used;
 	char name[32];
 	u64 handle;
-- 
2.26.2


^ permalink raw reply related

* [PATCH net-next v2 2/9] ibmvnic: Introduce batched RX buffer descriptor transmission
From: Thomas Falcon @ 2020-11-19  1:12 UTC (permalink / raw)
  To: kuba
  Cc: cforno12, netdev, ljp, ricklind, dnbanerg, tlfalcon, drt, brking,
	sukadev, linuxppc-dev
In-Reply-To: <1605748345-32062-1-git-send-email-tlfalcon@linux.ibm.com>

Utilize the H_SEND_SUB_CRQ_INDIRECT hypervisor call to send
multiple RX buffer descriptors to the device in one hypervisor
call operation. This change will reduce the number of hypervisor
calls and thus hypervisor call overhead needed to transmit
RX buffer descriptors to the device.

Signed-off-by: Thomas Falcon <tlfalcon@linux.ibm.com>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 57 +++++++++++++++++++-----------
 1 file changed, 37 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 3884f8a683a7..17ba6db6f5f9 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -306,9 +306,11 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter,
 	int count = pool->size - atomic_read(&pool->available);
 	u64 handle = adapter->rx_scrq[pool->index]->handle;
 	struct device *dev = &adapter->vdev->dev;
+	struct ibmvnic_ind_xmit_queue *ind_bufp;
+	struct ibmvnic_sub_crq_queue *rx_scrq;
+	union sub_crq *sub_crq;
 	int buffers_added = 0;
 	unsigned long lpar_rc;
-	union sub_crq sub_crq;
 	struct sk_buff *skb;
 	unsigned int offset;
 	dma_addr_t dma_addr;
@@ -320,6 +322,8 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter,
 	if (!pool->active)
 		return;
 
+	rx_scrq = adapter->rx_scrq[pool->index];
+	ind_bufp = &rx_scrq->ind_buf;
 	for (i = 0; i < count; ++i) {
 		skb = alloc_skb(pool->buff_size, GFP_ATOMIC);
 		if (!skb) {
@@ -346,12 +350,13 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter,
 		pool->rx_buff[index].pool_index = pool->index;
 		pool->rx_buff[index].size = pool->buff_size;
 
-		memset(&sub_crq, 0, sizeof(sub_crq));
-		sub_crq.rx_add.first = IBMVNIC_CRQ_CMD;
-		sub_crq.rx_add.correlator =
+		sub_crq = &ind_bufp->indir_arr[ind_bufp->index++];
+		memset(sub_crq, 0, sizeof(*sub_crq));
+		sub_crq->rx_add.first = IBMVNIC_CRQ_CMD;
+		sub_crq->rx_add.correlator =
 		    cpu_to_be64((u64)&pool->rx_buff[index]);
-		sub_crq.rx_add.ioba = cpu_to_be32(dma_addr);
-		sub_crq.rx_add.map_id = pool->long_term_buff.map_id;
+		sub_crq->rx_add.ioba = cpu_to_be32(dma_addr);
+		sub_crq->rx_add.map_id = pool->long_term_buff.map_id;
 
 		/* The length field of the sCRQ is defined to be 24 bits so the
 		 * buffer size needs to be left shifted by a byte before it is
@@ -361,15 +366,20 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter,
 #ifdef __LITTLE_ENDIAN__
 		shift = 8;
 #endif
-		sub_crq.rx_add.len = cpu_to_be32(pool->buff_size << shift);
-
-		lpar_rc = send_subcrq(adapter, handle, &sub_crq);
-		if (lpar_rc != H_SUCCESS)
-			goto failure;
-
-		buffers_added++;
-		adapter->replenish_add_buff_success++;
+		sub_crq->rx_add.len = cpu_to_be32(pool->buff_size << shift);
 		pool->next_free = (pool->next_free + 1) % pool->size;
+		if (ind_bufp->index == IBMVNIC_MAX_IND_DESCS ||
+		    i == count - 1) {
+			lpar_rc =
+				send_subcrq_indirect(adapter, handle,
+						     (u64)ind_bufp->indir_dma,
+						     (u64)ind_bufp->index);
+			if (lpar_rc != H_SUCCESS)
+				goto failure;
+			buffers_added += ind_bufp->index;
+			adapter->replenish_add_buff_success += ind_bufp->index;
+			ind_bufp->index = 0;
+		}
 	}
 	atomic_add(buffers_added, &pool->available);
 	return;
@@ -377,13 +387,20 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter,
 failure:
 	if (lpar_rc != H_PARAMETER && lpar_rc != H_CLOSED)
 		dev_err_ratelimited(dev, "rx: replenish packet buffer failed\n");
-	pool->free_map[pool->next_free] = index;
-	pool->rx_buff[index].skb = NULL;
-
-	dev_kfree_skb_any(skb);
-	adapter->replenish_add_buff_failure++;
-	atomic_add(buffers_added, &pool->available);
+	for (i = ind_bufp->index - 1; i >= 0; --i) {
+		struct ibmvnic_rx_buff *rx_buff;
 
+		pool->next_free = pool->next_free == 0 ?
+				  pool->size - 1 : pool->next_free - 1;
+		sub_crq = &ind_bufp->indir_arr[i];
+		rx_buff = (struct ibmvnic_rx_buff *)
+				be64_to_cpu(sub_crq->rx_add.correlator);
+		index = (int)(rx_buff - pool->rx_buff);
+		pool->free_map[pool->next_free] = index;
+		dev_kfree_skb_any(pool->rx_buff[index].skb);
+		pool->rx_buff[index].skb = NULL;
+	}
+	ind_bufp->index = 0;
 	if (lpar_rc == H_CLOSED || adapter->failover_pending) {
 		/* Disable buffer pool replenishment and report carrier off if
 		 * queue is closed or pending failover.
-- 
2.26.2


^ permalink raw reply related

* [PATCH net-next v2 3/9] ibmvnic: Introduce xmit_more support using batched subCRQ hcalls
From: Thomas Falcon @ 2020-11-19  1:12 UTC (permalink / raw)
  To: kuba
  Cc: cforno12, netdev, ljp, ricklind, dnbanerg, tlfalcon, drt, brking,
	sukadev, linuxppc-dev
In-Reply-To: <1605748345-32062-1-git-send-email-tlfalcon@linux.ibm.com>

Include support for the xmit_more feature utilizing the
H_SEND_SUB_CRQ_INDIRECT hypervisor call which allows the sending
of multiple subordinate Command Response Queue descriptors in one
hypervisor call via a DMA-mapped buffer. This update reduces hypervisor
calls and thus hypervisor call overhead per TX descriptor.

Signed-off-by: Thomas Falcon <tlfalcon@linux.ibm.com>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 204 ++++++++++++++++++++---------
 1 file changed, 139 insertions(+), 65 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 17ba6db6f5f9..650aaf100d65 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1165,6 +1165,7 @@ static int __ibmvnic_open(struct net_device *netdev)
 		if (prev_state == VNIC_CLOSED)
 			enable_irq(adapter->tx_scrq[i]->irq);
 		enable_scrq_irq(adapter, adapter->tx_scrq[i]);
+		netdev_tx_reset_queue(netdev_get_tx_queue(netdev, i));
 	}
 
 	rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_UP);
@@ -1523,16 +1524,93 @@ static int ibmvnic_xmit_workarounds(struct sk_buff *skb,
 	return 0;
 }
 
+static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter,
+					 struct ibmvnic_sub_crq_queue *tx_scrq)
+{
+	struct ibmvnic_ind_xmit_queue *ind_bufp;
+	struct ibmvnic_tx_buff *tx_buff;
+	struct ibmvnic_tx_pool *tx_pool;
+	union sub_crq tx_scrq_entry;
+	int queue_num;
+	int entries;
+	int index;
+	int i;
+
+	ind_bufp = &tx_scrq->ind_buf;
+	entries = (u64)ind_bufp->index;
+	queue_num = tx_scrq->pool_index;
+
+	for (i = entries - 1; i >= 0; --i) {
+		tx_scrq_entry = ind_bufp->indir_arr[i];
+		if (tx_scrq_entry.v1.type != IBMVNIC_TX_DESC)
+			continue;
+		index = be32_to_cpu(tx_scrq_entry.v1.correlator);
+		if (index & IBMVNIC_TSO_POOL_MASK) {
+			tx_pool = &adapter->tso_pool[queue_num];
+			index &= ~IBMVNIC_TSO_POOL_MASK;
+		} else {
+			tx_pool = &adapter->tx_pool[queue_num];
+		}
+		tx_pool->free_map[tx_pool->consumer_index] = index;
+		tx_pool->consumer_index = tx_pool->consumer_index == 0 ?
+					  tx_pool->num_buffers - 1 :
+					  tx_pool->consumer_index - 1;
+		tx_buff = &tx_pool->tx_buff[index];
+		adapter->netdev->stats.tx_packets--;
+		adapter->netdev->stats.tx_bytes -= tx_buff->skb->len;
+		adapter->tx_stats_buffers[queue_num].packets--;
+		adapter->tx_stats_buffers[queue_num].bytes -=
+						tx_buff->skb->len;
+		dev_kfree_skb_any(tx_buff->skb);
+		tx_buff->skb = NULL;
+		adapter->netdev->stats.tx_dropped++;
+	}
+	ind_bufp->index = 0;
+	if (atomic_sub_return(entries, &tx_scrq->used) <=
+	    (adapter->req_tx_entries_per_subcrq / 2) &&
+	    __netif_subqueue_stopped(adapter->netdev, queue_num)) {
+		netif_wake_subqueue(adapter->netdev, queue_num);
+		netdev_dbg(adapter->netdev, "Started queue %d\n",
+			   queue_num);
+	}
+}
+
+static int ibmvnic_tx_scrq_flush(struct ibmvnic_adapter *adapter,
+				 struct ibmvnic_sub_crq_queue *tx_scrq)
+{
+	struct ibmvnic_ind_xmit_queue *ind_bufp;
+	u64 dma_addr;
+	u64 entries;
+	u64 handle;
+	int rc;
+
+	ind_bufp = &tx_scrq->ind_buf;
+	dma_addr = (u64)ind_bufp->indir_dma;
+	entries = (u64)ind_bufp->index;
+	handle = tx_scrq->handle;
+
+	if (!entries)
+		return 0;
+	rc = send_subcrq_indirect(adapter, handle, dma_addr, entries);
+	if (rc)
+		ibmvnic_tx_scrq_clean_buffer(adapter, tx_scrq);
+	else
+		ind_bufp->index = 0;
+	return 0;
+}
+
 static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 {
 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
 	int queue_num = skb_get_queue_mapping(skb);
 	u8 *hdrs = (u8 *)&adapter->tx_rx_desc_req;
 	struct device *dev = &adapter->vdev->dev;
+	struct ibmvnic_ind_xmit_queue *ind_bufp;
 	struct ibmvnic_tx_buff *tx_buff = NULL;
 	struct ibmvnic_sub_crq_queue *tx_scrq;
 	struct ibmvnic_tx_pool *tx_pool;
 	unsigned int tx_send_failed = 0;
+	netdev_tx_t ret = NETDEV_TX_OK;
 	unsigned int tx_map_failed = 0;
 	unsigned int tx_dropped = 0;
 	unsigned int tx_packets = 0;
@@ -1546,8 +1624,10 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 	unsigned char *dst;
 	int index = 0;
 	u8 proto = 0;
-	u64 handle;
-	netdev_tx_t ret = NETDEV_TX_OK;
+
+	tx_scrq = adapter->tx_scrq[queue_num];
+	txq = netdev_get_tx_queue(netdev, queue_num);
+	ind_bufp = &tx_scrq->ind_buf;
 
 	if (test_bit(0, &adapter->resetting)) {
 		if (!netif_subqueue_stopped(netdev, skb))
@@ -1557,6 +1637,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 		tx_send_failed++;
 		tx_dropped++;
 		ret = NETDEV_TX_OK;
+		ibmvnic_tx_scrq_flush(adapter, tx_scrq);
 		goto out;
 	}
 
@@ -1564,6 +1645,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 		tx_dropped++;
 		tx_send_failed++;
 		ret = NETDEV_TX_OK;
+		ibmvnic_tx_scrq_flush(adapter, tx_scrq);
 		goto out;
 	}
 	if (skb_is_gso(skb))
@@ -1571,10 +1653,6 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 	else
 		tx_pool = &adapter->tx_pool[queue_num];
 
-	tx_scrq = adapter->tx_scrq[queue_num];
-	txq = netdev_get_tx_queue(netdev, skb_get_queue_mapping(skb));
-	handle = tx_scrq->handle;
-
 	index = tx_pool->free_map[tx_pool->consumer_index];
 
 	if (index == IBMVNIC_INVALID_MAP) {
@@ -1582,6 +1660,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 		tx_send_failed++;
 		tx_dropped++;
 		ret = NETDEV_TX_OK;
+		ibmvnic_tx_scrq_flush(adapter, tx_scrq);
 		goto out;
 	}
 
@@ -1666,55 +1745,29 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 		tx_crq.v1.mss = cpu_to_be16(skb_shinfo(skb)->gso_size);
 		hdrs += 2;
 	}
-	/* determine if l2/3/4 headers are sent to firmware */
-	if ((*hdrs >> 7) & 1) {
+
+	if ((*hdrs >> 7) & 1)
 		build_hdr_descs_arr(tx_buff, &num_entries, *hdrs);
-		tx_crq.v1.n_crq_elem = num_entries;
-		tx_buff->num_entries = num_entries;
-		tx_buff->indir_arr[0] = tx_crq;
-		tx_buff->indir_dma = dma_map_single(dev, tx_buff->indir_arr,
-						    sizeof(tx_buff->indir_arr),
-						    DMA_TO_DEVICE);
-		if (dma_mapping_error(dev, tx_buff->indir_dma)) {
-			dev_kfree_skb_any(skb);
-			tx_buff->skb = NULL;
-			if (!firmware_has_feature(FW_FEATURE_CMO))
-				dev_err(dev, "tx: unable to map descriptor array\n");
-			tx_map_failed++;
-			tx_dropped++;
-			ret = NETDEV_TX_OK;
-			goto tx_err_out;
-		}
-		lpar_rc = send_subcrq_indirect(adapter, handle,
-					       (u64)tx_buff->indir_dma,
-					       (u64)num_entries);
-		dma_unmap_single(dev, tx_buff->indir_dma,
-				 sizeof(tx_buff->indir_arr), DMA_TO_DEVICE);
-	} else {
-		tx_buff->num_entries = num_entries;
-		lpar_rc = send_subcrq(adapter, handle,
-				      &tx_crq);
-	}
-	if (lpar_rc != H_SUCCESS) {
-		if (lpar_rc != H_CLOSED && lpar_rc != H_PARAMETER)
-			dev_err_ratelimited(dev, "tx: send failed\n");
-		dev_kfree_skb_any(skb);
-		tx_buff->skb = NULL;
 
-		if (lpar_rc == H_CLOSED || adapter->failover_pending) {
-			/* Disable TX and report carrier off if queue is closed
-			 * or pending failover.
-			 * Firmware guarantees that a signal will be sent to the
-			 * driver, triggering a reset or some other action.
-			 */
-			netif_tx_stop_all_queues(netdev);
-			netif_carrier_off(netdev);
-		}
+	tx_crq.v1.n_crq_elem = num_entries;
+	tx_buff->num_entries = num_entries;
+	/* flush buffer if current entry can not fit */
+	if (num_entries + ind_bufp->index > IBMVNIC_MAX_IND_DESCS) {
+		lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq);
+		if (lpar_rc != H_SUCCESS)
+			goto tx_flush_err;
+	}
 
-		tx_send_failed++;
-		tx_dropped++;
-		ret = NETDEV_TX_OK;
-		goto tx_err_out;
+	tx_buff->indir_arr[0] = tx_crq;
+	memcpy(&ind_bufp->indir_arr[ind_bufp->index], tx_buff->indir_arr,
+	       num_entries * sizeof(struct ibmvnic_generic_scrq));
+	ind_bufp->index += num_entries;
+	if (__netdev_tx_sent_queue(txq, skb->len,
+				   netdev_xmit_more() &&
+				   ind_bufp->index < IBMVNIC_MAX_IND_DESCS)) {
+		lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq);
+		if (lpar_rc != H_SUCCESS)
+			goto tx_err;
 	}
 
 	if (atomic_add_return(num_entries, &tx_scrq->used)
@@ -1729,14 +1782,26 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 	ret = NETDEV_TX_OK;
 	goto out;
 
-tx_err_out:
-	/* roll back consumer index and map array*/
-	if (tx_pool->consumer_index == 0)
-		tx_pool->consumer_index =
-			tx_pool->num_buffers - 1;
-	else
-		tx_pool->consumer_index--;
-	tx_pool->free_map[tx_pool->consumer_index] = index;
+tx_flush_err:
+	dev_kfree_skb_any(skb);
+	tx_buff->skb = NULL;
+	tx_pool->consumer_index = tx_pool->consumer_index == 0 ?
+				  tx_pool->num_buffers - 1 :
+				  tx_pool->consumer_index - 1;
+	tx_dropped++;
+tx_err:
+	if (lpar_rc != H_CLOSED && lpar_rc != H_PARAMETER)
+		dev_err_ratelimited(dev, "tx: send failed\n");
+
+	if (lpar_rc == H_CLOSED || adapter->failover_pending) {
+		/* Disable TX and report carrier off if queue is closed
+		 * or pending failover.
+		 * Firmware guarantees that a signal will be sent to the
+		 * driver, triggering a reset or some other action.
+		 */
+		netif_tx_stop_all_queues(netdev);
+		netif_carrier_off(netdev);
+	}
 out:
 	netdev->stats.tx_dropped += tx_dropped;
 	netdev->stats.tx_bytes += tx_bytes;
@@ -3117,6 +3182,7 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter,
 	struct device *dev = &adapter->vdev->dev;
 	struct ibmvnic_tx_pool *tx_pool;
 	struct ibmvnic_tx_buff *txbuff;
+	struct netdev_queue *txq;
 	union sub_crq *next;
 	int index;
 	int i, j;
@@ -3125,6 +3191,8 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter,
 	while (pending_scrq(adapter, scrq)) {
 		unsigned int pool = scrq->pool_index;
 		int num_entries = 0;
+		int total_bytes = 0;
+		int num_packets = 0;
 
 		next = ibmvnic_next_scrq(adapter, scrq);
 		for (i = 0; i < next->tx_comp.num_comps; i++) {
@@ -3150,13 +3218,16 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter,
 				txbuff->data_dma[j] = 0;
 			}
 
-			if (txbuff->last_frag) {
-				dev_kfree_skb_any(txbuff->skb);
+			num_packets++;
+			num_entries += txbuff->num_entries;
+			if (txbuff->skb) {
+				total_bytes += txbuff->skb->len;
+				dev_consume_skb_irq(txbuff->skb);
 				txbuff->skb = NULL;
+			} else {
+				netdev_warn(adapter->netdev,
+					    "TX completion received with NULL socket buffer\n");
 			}
-
-			num_entries += txbuff->num_entries;
-
 			tx_pool->free_map[tx_pool->producer_index] = index;
 			tx_pool->producer_index =
 				(tx_pool->producer_index + 1) %
@@ -3165,6 +3236,9 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter,
 		/* remove tx_comp scrq*/
 		next->tx_comp.first = 0;
 
+		txq = netdev_get_tx_queue(adapter->netdev, scrq->pool_index);
+		netdev_tx_completed_queue(txq, num_packets, total_bytes);
+
 		if (atomic_sub_return(num_entries, &scrq->used) <=
 		    (adapter->req_tx_entries_per_subcrq / 2) &&
 		    __netif_subqueue_stopped(adapter->netdev,
-- 
2.26.2


^ permalink raw reply related

* [PATCH net-next v2 4/9] ibmvnic: Clean up TX code and TX buffer data structure
From: Thomas Falcon @ 2020-11-19  1:12 UTC (permalink / raw)
  To: kuba
  Cc: cforno12, netdev, ljp, ricklind, dnbanerg, tlfalcon, drt, brking,
	sukadev, linuxppc-dev
In-Reply-To: <1605748345-32062-1-git-send-email-tlfalcon@linux.ibm.com>

Remove unused and superfluous code and members in
existing TX implementation and data structures.

Signed-off-by: Thomas Falcon <tlfalcon@linux.ibm.com>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 31 +++++++++++-------------------
 drivers/net/ethernet/ibm/ibmvnic.h |  8 --------
 2 files changed, 11 insertions(+), 28 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 650aaf100d65..2aace693559f 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1496,17 +1496,18 @@ static int create_hdr_descs(u8 hdr_field, u8 *hdr_data, int len, int *hdr_len,
  * L2/L3/L4 packet header descriptors to be sent by send_subcrq_indirect.
  */
 
-static void build_hdr_descs_arr(struct ibmvnic_tx_buff *txbuff,
+static void build_hdr_descs_arr(struct sk_buff *skb,
+				union sub_crq *indir_arr,
 				int *num_entries, u8 hdr_field)
 {
 	int hdr_len[3] = {0, 0, 0};
+	u8 hdr_data[140] = {0};
 	int tot_len;
-	u8 *hdr_data = txbuff->hdr_data;
 
-	tot_len = build_hdr_data(hdr_field, txbuff->skb, hdr_len,
-				 txbuff->hdr_data);
+	tot_len = build_hdr_data(hdr_field, skb, hdr_len,
+				 hdr_data);
 	*num_entries += create_hdr_descs(hdr_field, hdr_data, tot_len, hdr_len,
-			 txbuff->indir_arr + 1);
+					 indir_arr + 1);
 }
 
 static int ibmvnic_xmit_workarounds(struct sk_buff *skb,
@@ -1612,6 +1613,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 	unsigned int tx_send_failed = 0;
 	netdev_tx_t ret = NETDEV_TX_OK;
 	unsigned int tx_map_failed = 0;
+	union sub_crq indir_arr[16];
 	unsigned int tx_dropped = 0;
 	unsigned int tx_packets = 0;
 	unsigned int tx_bytes = 0;
@@ -1696,11 +1698,8 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 
 	tx_buff = &tx_pool->tx_buff[index];
 	tx_buff->skb = skb;
-	tx_buff->data_dma[0] = data_dma_addr;
-	tx_buff->data_len[0] = skb->len;
 	tx_buff->index = index;
 	tx_buff->pool_index = queue_num;
-	tx_buff->last_frag = true;
 
 	memset(&tx_crq, 0, sizeof(tx_crq));
 	tx_crq.v1.first = IBMVNIC_CRQ_CMD;
@@ -1747,7 +1746,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 	}
 
 	if ((*hdrs >> 7) & 1)
-		build_hdr_descs_arr(tx_buff, &num_entries, *hdrs);
+		build_hdr_descs_arr(skb, indir_arr, &num_entries, *hdrs);
 
 	tx_crq.v1.n_crq_elem = num_entries;
 	tx_buff->num_entries = num_entries;
@@ -1758,8 +1757,8 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
 			goto tx_flush_err;
 	}
 
-	tx_buff->indir_arr[0] = tx_crq;
-	memcpy(&ind_bufp->indir_arr[ind_bufp->index], tx_buff->indir_arr,
+	indir_arr[0] = tx_crq;
+	memcpy(&ind_bufp->indir_arr[ind_bufp->index], &indir_arr[0],
 	       num_entries * sizeof(struct ibmvnic_generic_scrq));
 	ind_bufp->index += num_entries;
 	if (__netdev_tx_sent_queue(txq, skb->len,
@@ -3185,7 +3184,7 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter,
 	struct netdev_queue *txq;
 	union sub_crq *next;
 	int index;
-	int i, j;
+	int i;
 
 restart_loop:
 	while (pending_scrq(adapter, scrq)) {
@@ -3210,14 +3209,6 @@ static int ibmvnic_complete_tx(struct ibmvnic_adapter *adapter,
 			}
 
 			txbuff = &tx_pool->tx_buff[index];
-
-			for (j = 0; j < IBMVNIC_MAX_FRAGS_PER_CRQ; j++) {
-				if (!txbuff->data_dma[j])
-					continue;
-
-				txbuff->data_dma[j] = 0;
-			}
-
 			num_packets++;
 			num_entries += txbuff->num_entries;
 			if (txbuff->skb) {
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index 4a63e9886719..16d892c3db0f 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -226,8 +226,6 @@ struct ibmvnic_tx_comp_desc {
 #define IBMVNIC_TCP_CHKSUM		0x20
 #define IBMVNIC_UDP_CHKSUM		0x08
 
-#define IBMVNIC_MAX_FRAGS_PER_CRQ 3
-
 struct ibmvnic_tx_desc {
 	u8 first;
 	u8 type;
@@ -896,14 +894,8 @@ struct ibmvnic_long_term_buff {
 
 struct ibmvnic_tx_buff {
 	struct sk_buff *skb;
-	dma_addr_t data_dma[IBMVNIC_MAX_FRAGS_PER_CRQ];
-	unsigned int data_len[IBMVNIC_MAX_FRAGS_PER_CRQ];
 	int index;
 	int pool_index;
-	bool last_frag;
-	union sub_crq indir_arr[6];
-	u8 hdr_data[140];
-	dma_addr_t indir_dma;
 	int num_entries;
 };
 
-- 
2.26.2


^ permalink raw reply related

* [PATCH net-next v2 6/9] ibmvnic: Ensure that device queue memory is cache-line aligned
From: Thomas Falcon @ 2020-11-19  1:12 UTC (permalink / raw)
  To: kuba
  Cc: cforno12, netdev, ljp, ricklind, dnbanerg, tlfalcon, drt, brking,
	sukadev, linuxppc-dev
In-Reply-To: <1605748345-32062-1-git-send-email-tlfalcon@linux.ibm.com>

From: "Dwip N. Banerjee" <dnbanerg@us.ibm.com>

PCI bus slowdowns were observed on IBM VNIC devices as a result
of partial cache line writes and non-cache aligned full cache line writes.
Ensure that packet data buffers are cache-line aligned to avoid these
slowdowns.

Signed-off-by: Dwip N. Banerjee <dnbanerg@us.ibm.com>
---
 drivers/net/ethernet/ibm/ibmvnic.c |  9 ++++++---
 drivers/net/ethernet/ibm/ibmvnic.h | 10 +++++-----
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index e9b0cb6dfd9d..85df91c9861b 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -498,7 +498,7 @@ static int reset_rx_pools(struct ibmvnic_adapter *adapter)
 
 		if (rx_pool->buff_size != buff_size) {
 			free_long_term_buff(adapter, &rx_pool->long_term_buff);
-			rx_pool->buff_size = buff_size;
+			rx_pool->buff_size = ALIGN(buff_size, L1_CACHE_BYTES);
 			rc = alloc_long_term_buff(adapter,
 						  &rx_pool->long_term_buff,
 						  rx_pool->size *
@@ -592,7 +592,7 @@ static int init_rx_pools(struct net_device *netdev)
 
 		rx_pool->size = adapter->req_rx_add_entries_per_subcrq;
 		rx_pool->index = i;
-		rx_pool->buff_size = buff_size;
+		rx_pool->buff_size = ALIGN(buff_size, L1_CACHE_BYTES);
 		rx_pool->active = 1;
 
 		rx_pool->free_map = kcalloc(rx_pool->size, sizeof(int),
@@ -745,6 +745,7 @@ static int init_tx_pools(struct net_device *netdev)
 {
 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
 	int tx_subcrqs;
+	u64 buff_size;
 	int i, rc;
 
 	tx_subcrqs = adapter->num_active_tx_scrqs;
@@ -761,9 +762,11 @@ static int init_tx_pools(struct net_device *netdev)
 	adapter->num_active_tx_pools = tx_subcrqs;
 
 	for (i = 0; i < tx_subcrqs; i++) {
+		buff_size = adapter->req_mtu + VLAN_HLEN;
+		buff_size = ALIGN(buff_size, L1_CACHE_BYTES);
 		rc = init_one_tx_pool(netdev, &adapter->tx_pool[i],
 				      adapter->req_tx_entries_per_subcrq,
-				      adapter->req_mtu + VLAN_HLEN);
+				      buff_size);
 		if (rc) {
 			release_tx_pools(adapter);
 			return rc;
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index 16d892c3db0f..9911d926dd7f 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -883,7 +883,7 @@ struct ibmvnic_sub_crq_queue {
 	atomic_t used;
 	char name[32];
 	u64 handle;
-};
+} ____cacheline_aligned;
 
 struct ibmvnic_long_term_buff {
 	unsigned char *buff;
@@ -907,7 +907,7 @@ struct ibmvnic_tx_pool {
 	struct ibmvnic_long_term_buff long_term_buff;
 	int num_buffers;
 	int buf_size;
-};
+} ____cacheline_aligned;
 
 struct ibmvnic_rx_buff {
 	struct sk_buff *skb;
@@ -928,7 +928,7 @@ struct ibmvnic_rx_pool {
 	int next_alloc;
 	int active;
 	struct ibmvnic_long_term_buff long_term_buff;
-};
+} ____cacheline_aligned;
 
 struct ibmvnic_vpd {
 	unsigned char *buff;
@@ -1015,8 +1015,8 @@ struct ibmvnic_adapter {
 	atomic_t running_cap_crqs;
 	bool wait_capability;
 
-	struct ibmvnic_sub_crq_queue **tx_scrq;
-	struct ibmvnic_sub_crq_queue **rx_scrq;
+	struct ibmvnic_sub_crq_queue **tx_scrq ____cacheline_aligned;
+	struct ibmvnic_sub_crq_queue **rx_scrq ____cacheline_aligned;
 
 	/* rx structs */
 	struct napi_struct *napi;
-- 
2.26.2


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox