LinuxPPC-Dev Archive on lore.kernel.org

LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [powerpc:next] BUILD SUCCESS c9344769e2b46ba28b947bec7a8a8f0a091ecd57
From: kernel test robot @ 2020-12-04 22:26 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev

tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git  next
branch HEAD: c9344769e2b46ba28b947bec7a8a8f0a091ecd57  selftests/powerpc: Fix uninitialized variable warning

elapsed time: 1926m

configs tested: 161
configs skipped: 4

The following configs have been built successfully.
More configs may be tested in the coming days.

gcc tested configs:
arm                                 defconfig
arm64                            allyesconfig
arm64                               defconfig
arm                              allyesconfig
arm                              allmodconfig
arm                         shannon_defconfig
mips                     decstation_defconfig
mips                         mpc30x_defconfig
mips                           ci20_defconfig
nios2                            alldefconfig
arc                        vdk_hs38_defconfig
powerpc                      cm5200_defconfig
arm                          tango4_defconfig
mips                 decstation_r4k_defconfig
mips                           mtx1_defconfig
mips                        qi_lb60_defconfig
arm                           omap1_defconfig
arm                           h5000_defconfig
m68k                          amiga_defconfig
sh                          r7785rp_defconfig
sh                         microdev_defconfig
m68k                       m5275evb_defconfig
nds32                            alldefconfig
mips                      fuloong2e_defconfig
sh                           se7724_defconfig
powerpc                      obs600_defconfig
arm                          pcm027_defconfig
arm                            dove_defconfig
mips                         tb0219_defconfig
m68k                       m5475evb_defconfig
powerpc                     ep8248e_defconfig
powerpc                     mpc512x_defconfig
arm                         orion5x_defconfig
arm                        shmobile_defconfig
arm                           u8500_defconfig
powerpc                      ppc6xx_defconfig
arm                         vf610m4_defconfig
sh                           se7750_defconfig
powerpc                      mgcoge_defconfig
ia64                          tiger_defconfig
mips                        jmr3927_defconfig
xtensa                          iss_defconfig
arm                          ixp4xx_defconfig
powerpc                   motionpro_defconfig
nds32                               defconfig
mips                           jazz_defconfig
powerpc                        cell_defconfig
ia64                             alldefconfig
powerpc                     tqm5200_defconfig
arc                         haps_hs_defconfig
arm                           spitz_defconfig
arm                          exynos_defconfig
powerpc                     asp8347_defconfig
arm                         nhk8815_defconfig
arm                  colibri_pxa300_defconfig
powerpc                      chrp32_defconfig
sh                          urquell_defconfig
arm                       netwinder_defconfig
mips                            gpr_defconfig
powerpc                     skiroot_defconfig
arm                       aspeed_g4_defconfig
arm                         hackkit_defconfig
sparc                       sparc64_defconfig
powerpc                         wii_defconfig
arm                       versatile_defconfig
arc                     nsimosci_hs_defconfig
mips                  maltasmvp_eva_defconfig
arm                        trizeps4_defconfig
arm                        spear3xx_defconfig
powerpc                     redwood_defconfig
m68k                       m5208evb_defconfig
arm                           stm32_defconfig
powerpc                     pseries_defconfig
arm                          prima2_defconfig
sh                            titan_defconfig
powerpc                       eiger_defconfig
sh                          lboxre2_defconfig
mips                           ip32_defconfig
arm                         s3c2410_defconfig
xtensa                              defconfig
c6x                        evmc6474_defconfig
powerpc                    amigaone_defconfig
powerpc               mpc834x_itxgp_defconfig
arm                    vt8500_v6_v7_defconfig
parisc                              defconfig
arm                        keystone_defconfig
sh                           se7343_defconfig
powerpc                     tqm8540_defconfig
arm                             pxa_defconfig
arm                       omap2plus_defconfig
powerpc                    socrates_defconfig
xtensa                    smp_lx200_defconfig
c6x                        evmc6472_defconfig
ia64                             allmodconfig
ia64                                defconfig
ia64                             allyesconfig
m68k                             allmodconfig
m68k                                defconfig
m68k                             allyesconfig
nios2                               defconfig
arc                              allyesconfig
nds32                             allnoconfig
c6x                              allyesconfig
nios2                            allyesconfig
csky                                defconfig
alpha                               defconfig
alpha                            allyesconfig
xtensa                           allyesconfig
h8300                            allyesconfig
arc                                 defconfig
sh                               allmodconfig
s390                             allyesconfig
parisc                           allyesconfig
s390                                defconfig
i386                             allyesconfig
sparc                            allyesconfig
sparc                               defconfig
i386                               tinyconfig
i386                                defconfig
mips                             allyesconfig
mips                             allmodconfig
powerpc                          allyesconfig
powerpc                          allmodconfig
powerpc                           allnoconfig
x86_64               randconfig-a004-20201204
x86_64               randconfig-a006-20201204
x86_64               randconfig-a002-20201204
x86_64               randconfig-a001-20201204
x86_64               randconfig-a005-20201204
x86_64               randconfig-a003-20201204
i386                 randconfig-a005-20201204
i386                 randconfig-a004-20201204
i386                 randconfig-a001-20201204
i386                 randconfig-a002-20201204
i386                 randconfig-a006-20201204
i386                 randconfig-a003-20201204
i386                 randconfig-a014-20201204
i386                 randconfig-a013-20201204
i386                 randconfig-a011-20201204
i386                 randconfig-a015-20201204
i386                 randconfig-a012-20201204
i386                 randconfig-a016-20201204
riscv                    nommu_k210_defconfig
riscv                            allyesconfig
riscv                    nommu_virt_defconfig
riscv                             allnoconfig
riscv                               defconfig
riscv                          rv32_defconfig
riscv                            allmodconfig
x86_64                                   rhel
x86_64                           allyesconfig
x86_64                    rhel-7.6-kselftests
x86_64                              defconfig
x86_64                                  kexec
x86_64                               rhel-8.3

clang tested configs:
x86_64               randconfig-a016-20201204
x86_64               randconfig-a012-20201204
x86_64               randconfig-a014-20201204
x86_64               randconfig-a013-20201204
x86_64               randconfig-a015-20201204
x86_64               randconfig-a011-20201204

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

^ permalink raw reply

* Re: [PATCH v3 18/18] ibmvfc: drop host lock when completing commands in CRQ
From: Brian King @ 2020-12-04 21:35 UTC (permalink / raw)
  To: Tyrel Datwyler, james.bottomley
  Cc: brking, linuxppc-dev, linux-scsi, martin.petersen, linux-kernel
In-Reply-To: <20201203020806.14747-19-tyreld@linux.ibm.com>

On 12/2/20 8:08 PM, Tyrel Datwyler wrote:
> The legacy CRQ holds the host lock the even while completing commands.
> This presents a problem when in legacy single queue mode and
> nr_hw_queues is greater than one since calling scsi_done() introduces
> the potential for deadlock.
> 
> If nr_hw_queues is greater than one drop the hostlock in the legacy CRQ
> path when completing a command.
> 
> Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
> ---
>  drivers/scsi/ibmvscsi/ibmvfc.c | 8 +++++++-
>  1 file changed, 7 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
> index e499599662ec..e2200bdff2be 100644
> --- a/drivers/scsi/ibmvscsi/ibmvfc.c
> +++ b/drivers/scsi/ibmvscsi/ibmvfc.c
> @@ -2969,6 +2969,7 @@ static void ibmvfc_handle_crq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost)
>  {
>  	long rc;
>  	struct ibmvfc_event *evt = (struct ibmvfc_event *)be64_to_cpu(crq->ioba);
> +	unsigned long flags;
>  
>  	switch (crq->valid) {
>  	case IBMVFC_CRQ_INIT_RSP:
> @@ -3039,7 +3040,12 @@ static void ibmvfc_handle_crq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost)
>  	del_timer(&evt->timer);
>  	list_del(&evt->queue);
>  	ibmvfc_trc_end(evt);
> -	evt->done(evt);
> +	if (nr_scsi_hw_queues > 1) {
> +		spin_unlock_irqrestore(vhost->host->host_lock, flags);
> +		evt->done(evt);
> +		spin_lock_irqsave(vhost->host->host_lock, flags);
> +	} else
> +		evt->done(evt);

Similar comment here as previously. The flags parameter is an output for
spin_lock_irqsave but an input for spin_unlock_irqrestore. You'll need
to rethink the locking here. You could just do a spin_unlock_irq / spin_lock_irq
here and that would probably be OK, but probably isn't the best. 

>  }
>  
>  /**
> 


-- 
Brian King
Power Linux I/O
IBM Linux Technology Center


^ permalink raw reply

* Re: [PATCH v3 17/18] ibmvfc: provide modules parameters for MQ settings
From: Brian King @ 2020-12-04 21:28 UTC (permalink / raw)
  To: Tyrel Datwyler, james.bottomley
  Cc: brking, linuxppc-dev, linux-scsi, martin.petersen, linux-kernel
In-Reply-To: <20201203020806.14747-18-tyreld@linux.ibm.com>

Reviewed-by: Brian King <brking@linux.vnet.ibm.com>


-- 
Brian King
Power Linux I/O
IBM Linux Technology Center


^ permalink raw reply

* Re: [PATCH v3 15/18] ibmvfc: send Cancel MAD down each hw scsi channel
From: Brian King @ 2020-12-04 21:26 UTC (permalink / raw)
  To: Tyrel Datwyler, james.bottomley
  Cc: brking, linuxppc-dev, linux-scsi, martin.petersen, linux-kernel
In-Reply-To: <20201203020806.14747-16-tyreld@linux.ibm.com>

On 12/2/20 8:08 PM, Tyrel Datwyler wrote:
> In general the client needs to send Cancel MADs and task management
> commands down the same channel as the command(s) intended to cancel or
> abort. The client assigns cancel keys per LUN and thus must send a
> Cancel down each channel commands were submitted for that LUN. Further,
> the client then must wait for those cancel completions prior to
> submitting a LUN RESET or ABORT TASK SET.
> 
> Add a cancel event pointer and cancel rsp iu storage to the
> ibmvfc_sub_queue struct such that the cancel routine can assign a cancel
> event to each applicable queue. When in legacy CRQ mode we fake treating
> it as a subqueue by using a subqueue struct allocated on the stack. Wait
> for completion of each submitted cancel.
> 
> Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
> ---
>  drivers/scsi/ibmvscsi/ibmvfc.c | 104 ++++++++++++++++++++++-----------
>  drivers/scsi/ibmvscsi/ibmvfc.h |  38 ++++++------
>  2 files changed, 90 insertions(+), 52 deletions(-)
> 
> diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
> index ec3db5a6baf3..e353b9e88104 100644
> --- a/drivers/scsi/ibmvscsi/ibmvfc.c
> +++ b/drivers/scsi/ibmvscsi/ibmvfc.c
> @@ -2339,67 +2339,103 @@ static int ibmvfc_cancel_all(struct scsi_device *sdev, int type)
>  {
>  	struct ibmvfc_host *vhost = shost_priv(sdev->host);
>  	struct ibmvfc_event *evt, *found_evt;
> -	union ibmvfc_iu rsp;
> -	int rsp_rc = -EBUSY;
> +	struct ibmvfc_sub_queue *scrqs;
> +	struct ibmvfc_sub_queue legacy_crq;
> +	int rsp_rc = 0;
>  	unsigned long flags;
>  	u16 status;
> +	int cancel_cnt = 0;
> +	int num_hwq;
> +	int ret = 0;
> +	int i;
>  
>  	ENTER;
>  	spin_lock_irqsave(vhost->host->host_lock, flags);
> -	found_evt = NULL;
> -	list_for_each_entry(evt, &vhost->sent, queue) {
> -		if (evt->cmnd && evt->cmnd->device == sdev) {
> -			found_evt = evt;
> +	if (vhost->using_channels && vhost->scsi_scrqs.active_queues) {
> +		num_hwq = vhost->scsi_scrqs.active_queues;
> +		scrqs = vhost->scsi_scrqs.scrqs;
> +	} else {
> +		/* Use ibmvfc_sub_queue on the stack to fake legacy CRQ as a subqueue */
> +		num_hwq = 1;
> +		scrqs = &legacy_crq;
> +	}
> +
> +	for (i = 0; i < num_hwq; i++) {
> +		scrqs[i].cancel_event = NULL;
> +		found_evt = NULL;
> +		list_for_each_entry(evt, &vhost->sent, queue) {
> +			if (evt->cmnd && evt->cmnd->device == sdev && evt->hwq == i) {
> +				found_evt = evt;
> +				cancel_cnt++;
> +				break;
> +			}
> +		}
> +
> +		if (!found_evt)
> +			continue;
> +
> +		if (vhost->logged_in) {
> +			scrqs[i].cancel_event = ibmvfc_init_tmf(vhost, sdev, type);
> +			scrqs[i].cancel_event->hwq = i;
> +			scrqs[i].cancel_event->sync_iu = &scrqs[i].cancel_rsp;
> +			rsp_rc = ibmvfc_send_event(scrqs[i].cancel_event, vhost, default_timeout);
> +			if (rsp_rc)
> +				break;

It looks like if you have two outstanding commands, on two different hwqs, and you succeed
in sending a cancel for the first hwq but fail sending it for the second hwq due to
something happening like a xport event of some sort, then you would end up falling down
into free_events where you'd call ibmvfc_free_event which will do a list_add_tail to add
the event to the free list without having even pulled the event off the sent list, which
will result in list corruption as now the free list and sent list will be intermingled.
It would probably be better to only free the events if you never sent them or if you
are sure they completed. So, you might need to have to wait for the completion of
the cancel events that did get sent, which would likely be completed via purge_all.


> +		} else {
> +			rsp_rc = -EBUSY;
>  			break;
>  		}
>  	}
>  
> -	if (!found_evt) {
> +	spin_unlock_irqrestore(vhost->host->host_lock, flags);
> +
> +	if (!cancel_cnt) {
>  		if (vhost->log_level > IBMVFC_DEFAULT_LOG_LEVEL)
>  			sdev_printk(KERN_INFO, sdev, "No events found to cancel\n");
> -		spin_unlock_irqrestore(vhost->host->host_lock, flags);
>  		return 0;
>  	}
>  
> -	if (vhost->logged_in) {
> -		evt = ibmvfc_init_tmf(vhost, sdev, type);
> -		evt->sync_iu = &rsp;
> -		rsp_rc = ibmvfc_send_event(evt, vhost, default_timeout);
> -	}
> -
> -	spin_unlock_irqrestore(vhost->host->host_lock, flags);
> -
>  	if (rsp_rc != 0) {
>  		sdev_printk(KERN_ERR, sdev, "Failed to send cancel event. rc=%d\n", rsp_rc);
>  		/* If failure is received, the host adapter is most likely going
>  		 through reset, return success so the caller will wait for the command
>  		 being cancelled to get returned */
> -		return 0;
> +		goto free_events;
>  	}
>  
>  	sdev_printk(KERN_INFO, sdev, "Cancelling outstanding commands.\n");
>  
> -	wait_for_completion(&evt->comp);
> -	status = be16_to_cpu(rsp.mad_common.status);
> -	spin_lock_irqsave(vhost->host->host_lock, flags);
> -	ibmvfc_free_event(evt);
> -	spin_unlock_irqrestore(vhost->host->host_lock, flags);
> +	for (i = 0; i < num_hwq; i++) {
> +		if (!scrqs[i].cancel_event)
> +			continue;
>  
> -	if (status != IBMVFC_MAD_SUCCESS) {
> -		sdev_printk(KERN_WARNING, sdev, "Cancel failed with rc=%x\n", status);
> -		switch (status) {
> -		case IBMVFC_MAD_DRIVER_FAILED:
> -		case IBMVFC_MAD_CRQ_ERROR:
> -			/* Host adapter most likely going through reset, return success to
> -			 the caller will wait for the command being cancelled to get returned */
> -			return 0;
> -		default:
> -			return -EIO;
> -		};
> +		wait_for_completion(&scrqs[i].cancel_event->comp);
> +		status = be16_to_cpu(scrqs[i].cancel_rsp.mad_common.status);
> +
> +		if (status != IBMVFC_MAD_SUCCESS) {
> +			sdev_printk(KERN_WARNING, sdev, "Cancel failed with rc=%x\n", status);
> +			switch (status) {
> +			case IBMVFC_MAD_DRIVER_FAILED:
> +			case IBMVFC_MAD_CRQ_ERROR:
> +				/* Host adapter most likely going through reset, return success to
> +				 the caller will wait for the command being cancelled to get returned */
> +				goto free_events;

Similar comment here... What about the rest of the outstanding cancel commands? Do you need
to wait for those to complete before freeing them?

> +			default:
> +				ret = -EIO;
> +				goto free_events;
> +			};
> +		}
>  	}
>  
>  	sdev_printk(KERN_INFO, sdev, "Successfully cancelled outstanding commands\n");
> -	return 0;
> +free_events:
> +	spin_lock_irqsave(vhost->host->host_lock, flags);
> +	for (i = 0; i < num_hwq; i++)
> +		if (scrqs[i].cancel_event)
> +			ibmvfc_free_event(scrqs[i].cancel_event);
> +	spin_unlock_irqrestore(vhost->host->host_lock, flags);
> +
> +	return ret;
>  }
>  
>  /**



-- 
Brian King
Power Linux I/O
IBM Linux Technology Center


^ permalink raw reply

* Re: [PATCH kernel v2] powerpc/kuap: Restore AMR after replaying soft interrupts
From: kernel test robot @ 2020-12-04 21:02 UTC (permalink / raw)
  To: Alexey Kardashevskiy, linuxppc-dev
  Cc: Alexey Kardashevskiy, clang-built-linux, kbuild-all,
	Nicholas Piggin
In-Reply-To: <20201203054724.44838-1-aik@ozlabs.ru>

[-- Attachment #1: Type: text/plain, Size: 7629 bytes --]

Hi Alexey,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v5.10-rc6 next-20201204]
[cannot apply to powerpc/next scottwood/next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Alexey-Kardashevskiy/powerpc-kuap-Restore-AMR-after-replaying-soft-interrupts/20201203-135010
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 34816d20f173a90389c8a7e641166d8ea9dce70a
config: powerpc64-randconfig-r023-20201204 (attached as .config)
compiler: clang version 12.0.0 (https://github.com/llvm/llvm-project 32c501dd88b62787d3a5ffda7aabcf4650dbe3cd)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install powerpc64 cross compiling tool for clang build
        # apt-get install binutils-powerpc64-linux-gnu
        # https://github.com/0day-ci/linux/commit/d2a99260ef6d241abe6fb961ee3ed84bbc5db7f5
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Alexey-Kardashevskiy/powerpc-kuap-Restore-AMR-after-replaying-soft-interrupts/20201203-135010
        git checkout d2a99260ef6d241abe6fb961ee3ed84bbc5db7f5
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=powerpc64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   In file included from arch/powerpc/kernel/asm-offsets.c:14:
   In file included from include/linux/compat.h:17:
   In file included from include/linux/fs.h:33:
   In file included from include/linux/percpu-rwsem.h:7:
   In file included from include/linux/rcuwait.h:6:
   In file included from include/linux/sched/signal.h:9:
   In file included from include/linux/sched/task.h:11:
   In file included from include/linux/uaccess.h:11:
   In file included from arch/powerpc/include/asm/uaccess.h:9:
>> arch/powerpc/include/asm/kup.h:71:29: error: redefinition of 'get_kuap'
   static inline unsigned long get_kuap(void)
                               ^
   arch/powerpc/include/asm/book3s/64/kup-radix.h:152:29: note: previous definition is here
   static inline unsigned long get_kuap(void)
                               ^
   In file included from arch/powerpc/kernel/asm-offsets.c:14:
   In file included from include/linux/compat.h:17:
   In file included from include/linux/fs.h:33:
   In file included from include/linux/percpu-rwsem.h:7:
   In file included from include/linux/rcuwait.h:6:
   In file included from include/linux/sched/signal.h:9:
   In file included from include/linux/sched/task.h:11:
   In file included from include/linux/uaccess.h:11:
   In file included from arch/powerpc/include/asm/uaccess.h:9:
>> arch/powerpc/include/asm/kup.h:76:20: error: redefinition of 'set_kuap'
   static inline void set_kuap(unsigned long value) { }
                      ^
   arch/powerpc/include/asm/book3s/64/kup-radix.h:157:20: note: previous definition is here
   static inline void set_kuap(unsigned long value) { }
                      ^
   In file included from arch/powerpc/kernel/asm-offsets.c:21:
   include/linux/mman.h:155:9: warning: division by zero is undefined [-Wdivision-by-zero]
                  _calc_vm_trans(flags, MAP_LOCKED,     VM_LOCKED    ) |
                  ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/mman.h:133:21: note: expanded from macro '_calc_vm_trans'
      : ((x) & (bit1)) / ((bit1) / (bit2))))
                       ^ ~~~~~~~~~~~~~~~~~
   include/linux/mman.h:156:9: warning: division by zero is undefined [-Wdivision-by-zero]
                  _calc_vm_trans(flags, MAP_SYNC,       VM_SYNC      ) |
                  ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/mman.h:133:21: note: expanded from macro '_calc_vm_trans'
      : ((x) & (bit1)) / ((bit1) / (bit2))))
                       ^ ~~~~~~~~~~~~~~~~~
   2 warnings and 2 errors generated.
--
   In file included from arch/powerpc/kernel/asm-offsets.c:14:
   In file included from include/linux/compat.h:17:
   In file included from include/linux/fs.h:33:
   In file included from include/linux/percpu-rwsem.h:7:
   In file included from include/linux/rcuwait.h:6:
   In file included from include/linux/sched/signal.h:9:
   In file included from include/linux/sched/task.h:11:
   In file included from include/linux/uaccess.h:11:
   In file included from arch/powerpc/include/asm/uaccess.h:9:
>> arch/powerpc/include/asm/kup.h:71:29: error: redefinition of 'get_kuap'
   static inline unsigned long get_kuap(void)
                               ^
   arch/powerpc/include/asm/book3s/64/kup-radix.h:152:29: note: previous definition is here
   static inline unsigned long get_kuap(void)
                               ^
   In file included from arch/powerpc/kernel/asm-offsets.c:14:
   In file included from include/linux/compat.h:17:
   In file included from include/linux/fs.h:33:
   In file included from include/linux/percpu-rwsem.h:7:
   In file included from include/linux/rcuwait.h:6:
   In file included from include/linux/sched/signal.h:9:
   In file included from include/linux/sched/task.h:11:
   In file included from include/linux/uaccess.h:11:
   In file included from arch/powerpc/include/asm/uaccess.h:9:
>> arch/powerpc/include/asm/kup.h:76:20: error: redefinition of 'set_kuap'
   static inline void set_kuap(unsigned long value) { }
                      ^
   arch/powerpc/include/asm/book3s/64/kup-radix.h:157:20: note: previous definition is here
   static inline void set_kuap(unsigned long value) { }
                      ^
   In file included from arch/powerpc/kernel/asm-offsets.c:21:
   include/linux/mman.h:155:9: warning: division by zero is undefined [-Wdivision-by-zero]
                  _calc_vm_trans(flags, MAP_LOCKED,     VM_LOCKED    ) |
                  ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/mman.h:133:21: note: expanded from macro '_calc_vm_trans'
      : ((x) & (bit1)) / ((bit1) / (bit2))))
                       ^ ~~~~~~~~~~~~~~~~~
   include/linux/mman.h:156:9: warning: division by zero is undefined [-Wdivision-by-zero]
                  _calc_vm_trans(flags, MAP_SYNC,       VM_SYNC      ) |
                  ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/mman.h:133:21: note: expanded from macro '_calc_vm_trans'
      : ((x) & (bit1)) / ((bit1) / (bit2))))
                       ^ ~~~~~~~~~~~~~~~~~
   2 warnings and 2 errors generated.
   make[2]: *** [scripts/Makefile.build:117: arch/powerpc/kernel/asm-offsets.s] Error 1
   make[2]: Target '__build' not remade because of errors.
   make[1]: *** [Makefile:1198: prepare0] Error 2
   make[1]: Target 'prepare' not remade because of errors.
   make: *** [Makefile:185: __sub-make] Error 2
   make: Target 'prepare' not remade because of errors.

vim +/get_kuap +71 arch/powerpc/include/asm/kup.h

    69	
    70	static inline void kuap_check_amr(void) { }
  > 71	static inline unsigned long get_kuap(void)
    72	{
    73		return AMR_KUAP_BLOCKED;
    74	}
    75	
  > 76	static inline void set_kuap(unsigned long value) { }
    77	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 41007 bytes --]

^ permalink raw reply

* Re: [PATCH 01/29] powerpc/rtas: move rtas_call_reentrant() out of pseries guards
From: Nathan Lynch @ 2020-12-04 20:37 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: tyreld, ajd, mmc, cforno12, drt, brking
In-Reply-To: <20201030011805.1224603-2-nathanl@linux.ibm.com>

Nathan Lynch <nathanl@linux.ibm.com> writes:

> rtas_call_reentrant() isn't platform-dependent; move it out of
> CONFIG_PPC_PSERIES-guarded code.

As reported by the 0-day CI, this is mistaken, and it breaks non-pseries
builds:

>> arch/powerpc/kernel/rtas.c:938:21: error: no member named 'rtas_args_reentrant' in 'struct paca_struct'
           args = local_paca->rtas_args_reentrant;
                  ~~~~~~~~~~  ^
   1 error generated.

https://lore.kernel.org/linuxppc-dev/202012050432.SFbbjWMw-lkp@intel.com/

I'll drop this and reroll the series.

^ permalink raw reply

* Re: [RFC v2 1/2] [NEEDS HELP] x86/mm: Handle unlazying membarrier core sync in the arch code
From: Mathieu Desnoyers @ 2020-12-04 20:39 UTC (permalink / raw)
  To: Nadav Amit, Peter Zijlstra
  Cc: linux-arch, Arnd Bergmann, riel, Will Deacon, x86, Dave Hansen,
	linux-kernel, Nicholas Piggin, linux-mm, Andy Lutomirski,
	Catalin Marinas, Jann Horn, linuxppc-dev
In-Reply-To: <A61977A7-F0B2-4492-AB6D-06E24417FA59@gmail.com>

----- On Dec 4, 2020, at 3:17 AM, Nadav Amit nadav.amit@gmail.com wrote:

> I am not very familiar with membarrier, but here are my 2 cents while trying
> to answer your questions.
> 
>> On Dec 3, 2020, at 9:26 PM, Andy Lutomirski <luto@kernel.org> wrote:
>> @@ -496,6 +497,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct
>> mm_struct *next,
>> 		 * from one thread in a process to another thread in the same
>> 		 * process. No TLB flush required.
>> 		 */
>> +
>> +		// XXX: why is this okay wrt membarrier?
>> 		if (!was_lazy)
>> 			return;
> 
> I am confused.
> 
> On one hand, it seems that membarrier_private_expedited() would issue an IPI
> to that core, as it would find that this core’s cpu_rq(cpu)->curr->mm is the
> same as the one that the membarrier applies to.

If the scheduler switches from one thread to another which both have the same mm,
it means cpu_rq(cpu)->curr->mm is invariant, even though ->curr changes. So there
is no need to issue a memory barrier or sync core for membarrier in this case,
because there is no way the IPI can be missed.

> But… (see below)
> 
> 
>> @@ -508,12 +511,24 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct
>> mm_struct *next,
>> 		smp_mb();
>> 		next_tlb_gen = atomic64_read(&next->context.tlb_gen);
>> 		if (this_cpu_read(cpu_tlbstate.ctxs[prev_asid].tlb_gen) ==
>> -				next_tlb_gen)
>> +		    next_tlb_gen) {
>> +			/*
>> +			 * We're reactivating an mm, and membarrier might
>> +			 * need to serialize.  Tell membarrier.
>> +			 */
>> +
>> +			// XXX: I can't understand the logic in
>> +			// membarrier_mm_sync_core_before_usermode().  What's
>> +			// the mm check for?
>> +			membarrier_mm_sync_core_before_usermode(next);
> 
> On the other hand the reason for this mm check that you mention contradicts
> my previous understanding as the git log says:
> 
> commit 2840cf02fae627860156737e83326df354ee4ec6
> Author: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
> Date:   Thu Sep 19 13:37:01 2019 -0400
> 
>    sched/membarrier: Call sync_core only before usermode for same mm
>    
>    When the prev and next task's mm change, switch_mm() provides the core
>    serializing guarantees before returning to usermode. The only case
>    where an explicit core serialization is needed is when the scheduler
>    keeps the same mm for prev and next.

Hrm, so your point here is that if the scheduler keeps the same mm for
prev and next, it means membarrier will have observed the same rq->curr->mm,
and therefore the IPI won't be missed. I wonder if that
membarrier_mm_sync_core_before_usermode is needed at all then or if we
have just been too careful and did not consider that all the scenarios which
need to be core-sync'd are indeed taken care of ?

I see here that my prior commit message indeed discusses prev and next task's
mm, but in reality, we are comparing current->mm with rq->prev_mm. So from
a lazy TLB perspective, this probably matters, and we may still need a core sync
in some lazy TLB scenarios.

> 
>> 	/*
>> 	 * When switching through a kernel thread, the loop in
>> 	 * membarrier_{private,global}_expedited() may have observed that
>> 	 * kernel thread and not issued an IPI. It is therefore possible to
>> 	 * schedule between user->kernel->user threads without passing though
>> 	 * switch_mm(). Membarrier requires a barrier after storing to
>> -	 * rq->curr, before returning to userspace, so provide them here:
>> +	 * rq->curr, before returning to userspace, and mmdrop() provides
>> +	 * this barrier.
>> 	 *
>> -	 * - a full memory barrier for {PRIVATE,GLOBAL}_EXPEDITED, implicitly
>> -	 *   provided by mmdrop(),
>> -	 * - a sync_core for SYNC_CORE.
>> +	 * XXX: I don't think mmdrop() actually does this.  There's no
>> +	 * smp_mb__before/after_atomic() in there.
> 
> I presume that since x86 is the only one that needs
> membarrier_mm_sync_core_before_usermode(), nobody noticed the missing
> smp_mb__before/after_atomic(). These are anyhow a compiler barrier in x86,
> and such a barrier would take place before the return to userspace.

mmdrop already provides the memory barriers for membarrer, as I documented within the
function.

Thanks,

Mathieu

-- 
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com

^ permalink raw reply

* Re: [RFC v2 1/2] [NEEDS HELP] x86/mm: Handle unlazying membarrier core sync in the arch code
From: Mathieu Desnoyers @ 2020-12-04 20:24 UTC (permalink / raw)
  To: Andy Lutomirski, Peter Zijlstra
  Cc: linux-arch, Nadav Amit, Arnd Bergmann, riel, Will Deacon, x86,
	Dave Hansen, linux-kernel, Nicholas Piggin, linux-mm,
	Catalin Marinas, Jann Horn, linuxppc-dev
In-Reply-To: <203d39d11562575fd8bd6a094d97a3a332d8b265.1607059162.git.luto@kernel.org>

----- On Dec 4, 2020, at 12:26 AM, Andy Lutomirski luto@kernel.org wrote:

> The core scheduler isn't a great place for
> membarrier_mm_sync_core_before_usermode() -- the core scheduler doesn't
> actually know whether we are lazy.  With the old code, if a CPU is
> running a membarrier-registered task, goes idle, gets unlazied via a TLB
> shootdown IPI, and switches back to the membarrier-registered task, it
> will do an unnecessary core sync.
> 
> Conveniently, x86 is the only architecture that does anything in this
> hook, so we can just move the code.
> 
> XXX: there are some comments in swich_mm_irqs_off() that seem to be
> trying to document what barriers are expected, and it's not clear to me
> that these barriers are actually present in all paths through the
> code.  So I think this change makes the code more comprehensible and
> has no effect on the code's correctness, but I'm not at all convinced
> that the code is correct.
> 
> Signed-off-by: Andy Lutomirski <luto@kernel.org>
> ---
> arch/x86/mm/tlb.c   | 17 ++++++++++++++++-
> kernel/sched/core.c | 14 +++++++-------
> 2 files changed, 23 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
> index 3338a1feccf9..23df035b80e8 100644
> --- a/arch/x86/mm/tlb.c
> +++ b/arch/x86/mm/tlb.c
> @@ -8,6 +8,7 @@
> #include <linux/export.h>
> #include <linux/cpu.h>
> #include <linux/debugfs.h>
> +#include <linux/sched/mm.h>
> 
> #include <asm/tlbflush.h>
> #include <asm/mmu_context.h>
> @@ -496,6 +497,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct
> mm_struct *next,
> 		 * from one thread in a process to another thread in the same
> 		 * process. No TLB flush required.
> 		 */
> +
> +		// XXX: why is this okay wrt membarrier?
> 		if (!was_lazy)
> 			return;

As I recall, when the scheduler switches between threads which belong to
the same mm, it does not have to provide explicit memory barriers for
membarrier because it does not change the "rq->curr->mm" value which is
used as condition in the membarrier loop to send the IPI.

> 
> @@ -508,12 +511,24 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct
> mm_struct *next,
> 		smp_mb();
> 		next_tlb_gen = atomic64_read(&next->context.tlb_gen);
> 		if (this_cpu_read(cpu_tlbstate.ctxs[prev_asid].tlb_gen) ==
> -				next_tlb_gen)
> +		    next_tlb_gen) {
> +			/*
> +			 * We're reactivating an mm, and membarrier might
> +			 * need to serialize.  Tell membarrier.
> +			 */
> +
> +			// XXX: I can't understand the logic in
> +			// membarrier_mm_sync_core_before_usermode().  What's
> +			// the mm check for?
> +			membarrier_mm_sync_core_before_usermode(next);

I think you mean the:

        if (current->mm != mm)
                return;

check at the beginning.

We have to look at it from the scheduler context from which this function is called
(yeah, I know, it's not so great to mix up scheduler and mm states like this).

in finish_task_switch() we have:

        struct mm_struct *mm = rq->prev_mm;
[...]
        if (mm) {
                membarrier_mm_sync_core_before_usermode(mm);
                mmdrop(mm);
        }

I recall that this current->mm vs rq->prev_mm check is just there to
figure out whether we are in lazy tlb mode, and don't sync core in lazy
tlb mode. Hopefully I'm not stating anything stupid here, maybe Peter
could enlighten us. And you should definitely be careful when calling this
helper from other contexts, as it was originally crafted only for that
single use in the scheduler.


> 			return;
> +		}
> 
> 		/*
> 		 * TLB contents went out of date while we were in lazy
> 		 * mode. Fall through to the TLB switching code below.
> +		 * No need for an explicit membarrier invocation -- the CR3
> +		 * write will serialize.
> 		 */
> 		new_asid = prev_asid;
> 		need_flush = true;
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 2d95dc3f4644..6c4b76147166 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -3619,22 +3619,22 @@ static struct rq *finish_task_switch(struct task_struct
> *prev)
> 	kcov_finish_switch(current);
> 
> 	fire_sched_in_preempt_notifiers(current);
> +
> 	/*
> 	 * When switching through a kernel thread, the loop in
> 	 * membarrier_{private,global}_expedited() may have observed that
> 	 * kernel thread and not issued an IPI. It is therefore possible to
> 	 * schedule between user->kernel->user threads without passing though
> 	 * switch_mm(). Membarrier requires a barrier after storing to
> -	 * rq->curr, before returning to userspace, so provide them here:
> +	 * rq->curr, before returning to userspace, and mmdrop() provides
> +	 * this barrier.
> 	 *
> -	 * - a full memory barrier for {PRIVATE,GLOBAL}_EXPEDITED, implicitly
> -	 *   provided by mmdrop(),
> -	 * - a sync_core for SYNC_CORE.
> +	 * XXX: I don't think mmdrop() actually does this.  There's no
> +	 * smp_mb__before/after_atomic() in there.

I recall mmdrop providing a memory barrier. It looks like I event went though the
trouble of documenting it myself. ;-)

static inline void mmdrop(struct mm_struct *mm)
{
        /*
         * The implicit full barrier implied by atomic_dec_and_test() is
         * required by the membarrier system call before returning to
         * user-space, after storing to rq->curr.
         */
        if (unlikely(atomic_dec_and_test(&mm->mm_count)))
                __mmdrop(mm);
}


> 	 */
> -	if (mm) {
> -		membarrier_mm_sync_core_before_usermode(mm);

OK so here is the meat. The current code is using the (possibly incomplete)
lazy TLB state known by the scheduler to sync core, and it appears it may be
a bit more heavy that what is strictly needed.

Your change instead rely on the internal knowledge of lazy TLB within x86
switch_mm_irqs_off to achieve this, which overall seems like an improvement.

I agree with Nick's comment that it should go on top of his exit_lazy_mm
patches.

Thanks,

Mathieu


> +	if (mm)
> 		mmdrop(mm);
> -	}
> +
> 	if (unlikely(prev_state == TASK_DEAD)) {
> 		if (prev->sched_class->task_dead)
> 			prev->sched_class->task_dead(prev);
> --
> 2.28.0

-- 
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com

^ permalink raw reply

* Re: powerpc 5.10-rcN boot failures with RCU_SCALE_TEST=m
From: Uladzislau Rezki @ 2020-12-04 20:23 UTC (permalink / raw)
  To: Michael Ellerman, Paul E . McKenney; +Cc: rcu, linuxppc-dev, Daniel Axtens
In-Reply-To: <20201203143445.GA22204@pc636>

On Thu, Dec 03, 2020 at 03:34:45PM +0100, Uladzislau Rezki wrote:
> On Thu, Dec 03, 2020 at 05:22:20PM +1100, Michael Ellerman wrote:
> > Uladzislau Rezki <urezki@gmail.com> writes:
> > > On Thu, Dec 03, 2020 at 01:03:32AM +1100, Michael Ellerman wrote:
> > ...
> > >> 
> > >> The SMP bringup stalls because _cpu_up() is blocked trying to take
> > >> cpu_hotplug_lock for writing:
> > >> 
> > >> [  401.403132][    T0] task:swapper/0       state:D stack:12512 pid:    1 ppid:     0 flags:0x00000800
> > >> [  401.403502][    T0] Call Trace:
> > >> [  401.403907][    T0] [c0000000062c37d0] [c0000000062c3830] 0xc0000000062c3830 (unreliable)
> > >> [  401.404068][    T0] [c0000000062c39b0] [c000000000019d70] __switch_to+0x2e0/0x4a0
> > >> [  401.404189][    T0] [c0000000062c3a10] [c000000000b87228] __schedule+0x288/0x9b0
> > >> [  401.404257][    T0] [c0000000062c3ad0] [c000000000b879b8] schedule+0x68/0x120
> > >> [  401.404324][    T0] [c0000000062c3b00] [c000000000184ad4] percpu_down_write+0x164/0x170
> > >> [  401.404390][    T0] [c0000000062c3b50] [c000000000116b68] _cpu_up+0x68/0x280
> > >> [  401.404475][    T0] [c0000000062c3bb0] [c000000000116e70] cpu_up+0xf0/0x140
> > >> [  401.404546][    T0] [c0000000062c3c30] [c00000000011776c] bringup_nonboot_cpus+0xac/0xf0
> > >> [  401.404643][    T0] [c0000000062c3c80] [c000000000eea1b8] smp_init+0x40/0xcc
> > >> [  401.404727][    T0] [c0000000062c3ce0] [c000000000ec43dc] kernel_init_freeable+0x1e0/0x3a0
> > >> [  401.404799][    T0] [c0000000062c3db0] [c000000000011ec4] kernel_init+0x24/0x150
> > >> [  401.404958][    T0] [c0000000062c3e20] [c00000000000daf0] ret_from_kernel_thread+0x5c/0x6c
> > >> 
> > >> It can't get it because kprobe_optimizer() has taken it for read and is now
> > >> blocked waiting for synchronize_rcu_tasks():
> > >> 
> > >> [  401.418808][    T0] task:kworker/0:1     state:D stack:13392 pid:   12 ppid:     2 flags:0x00000800
> > >> [  401.418951][    T0] Workqueue: events kprobe_optimizer
> > >> [  401.419078][    T0] Call Trace:
> > >> [  401.419121][    T0] [c0000000062ef650] [c0000000062ef710] 0xc0000000062ef710 (unreliable)
> > >> [  401.419213][    T0] [c0000000062ef830] [c000000000019d70] __switch_to+0x2e0/0x4a0
> > >> [  401.419281][    T0] [c0000000062ef890] [c000000000b87228] __schedule+0x288/0x9b0
> > >> [  401.419347][    T0] [c0000000062ef950] [c000000000b879b8] schedule+0x68/0x120
> > >> [  401.419415][    T0] [c0000000062ef980] [c000000000b8e664] schedule_timeout+0x2a4/0x340
> > >> [  401.419484][    T0] [c0000000062efa80] [c000000000b894ec] wait_for_completion+0x9c/0x170
> > >> [  401.419552][    T0] [c0000000062efae0] [c0000000001ac85c] __wait_rcu_gp+0x19c/0x210
> > >> [  401.419619][    T0] [c0000000062efb40] [c0000000001ac90c] synchronize_rcu_tasks_generic+0x3c/0x70
> > >> [  401.419690][    T0] [c0000000062efbe0] [c00000000022a3dc] kprobe_optimizer+0x1dc/0x470
> > >> [  401.419757][    T0] [c0000000062efc60] [c000000000136684] process_one_work+0x2f4/0x530
> > >> [  401.419823][    T0] [c0000000062efd20] [c000000000138d28] worker_thread+0x78/0x570
> > >> [  401.419891][    T0] [c0000000062efdb0] [c000000000142424] kthread+0x194/0x1a0
> > >> [  401.419976][    T0] [c0000000062efe20] [c00000000000daf0] ret_from_kernel_thread+0x5c/0x6c
> > >> 
> > >> But why is the synchronize_rcu_tasks() not completing?
OK. Seems i have understood why the synchronize_rcu_tasks() is not doing progress
waiting on complition. Actually the GP kthreads are not spawned by the time when
early_initcall callbacks gets invoked.

It means that callbacks will not be processed because GP kthreads do not exist,
so wakeme_after_rcu() is not invoked, thus does not signal about that a grace
period has elapsed.

<snip>
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index 35bdcfd84d42..c5422bba7fe7 100644  
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -241,7 +241,7 @@ static int __noreturn rcu_tasks_kthread(void *arg)
        }
 }

-/* Spawn RCU-tasks grace-period kthread, e.g., at core_initcall() time. */
+/* Spawn RCU-tasks grace-period kthread at early_initcall() time. */
 static void __init rcu_spawn_tasks_kthread_generic(struct rcu_tasks *rtp)
 {
        struct task_struct *t;
@@ -564,7 +564,7 @@ static int __init rcu_spawn_tasks_kthread(void)
        rcu_spawn_tasks_kthread_generic(&rcu_tasks);
        return 0;
 }
-core_initcall(rcu_spawn_tasks_kthread);
+early_initcall(rcu_spawn_tasks_kthread);
 
 #if !defined(CONFIG_TINY_RCU)
 void show_rcu_tasks_classic_gp_kthread(void)
@@ -692,7 +692,7 @@ static int __init rcu_spawn_tasks_rude_kthread(void)
        rcu_spawn_tasks_kthread_generic(&rcu_tasks_rude);
        return 0;
 }
-core_initcall(rcu_spawn_tasks_rude_kthread);
+early_initcall(rcu_spawn_tasks_rude_kthread);
 
 #if !defined(CONFIG_TINY_RCU)
 void show_rcu_tasks_rude_gp_kthread(void)
@@ -1193,7 +1193,7 @@ static int __init rcu_spawn_tasks_trace_kthread(void)
        rcu_spawn_tasks_kthread_generic(&rcu_tasks_trace);
        return 0;
 }
-core_initcall(rcu_spawn_tasks_trace_kthread);
+early_initcall(rcu_spawn_tasks_trace_kthread);
 
 #if !defined(CONFIG_TINY_RCU)
 void show_rcu_tasks_trace_gp_kthread(void)
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 39334d2d2b37..a251fc705abd 100644  
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -133,7 +133,7 @@ EXPORT_SYMBOL(rcu_read_lock_sched_held);
  * that if the user specifies both rcu_expedited and rcu_normal, then
  * rcu_normal wins.  (Except during the time period during boot from
  * when the first task is spawned until the rcu_set_runtime_mode()
- * core_initcall() is invoked, at which point everything is expedited.)
+ * early_initcall() is invoked, at which point everything is expedited.)
  */

ol rcu_gp_is_normal(void)
 {
@@ -235,7 +235,7 @@ static int __init rcu_set_runtime_mode(void)
        rcu_test_sync_prims();
        return 0;
 }
-core_initcall(rcu_set_runtime_mode);
+early_initcall(rcu_set_runtime_mode);   

 #endif /* #if !defined(CONFIG_TINY_RCU) || defined(CONFIG_SRCU) */
<snip>

Appreciate if you can test on your setup above patch!

--
Vlad Rezki

^ permalink raw reply related

* [powerpc:next-test 192/220] arch/powerpc/kernel/rtas.c:938:21: error: no member named 'rtas_args_reentrant' in 'struct paca_struct'
From: kernel test robot @ 2020-12-04 20:05 UTC (permalink / raw)
  To: Nathan Lynch; +Cc: clang-built-linux, kbuild-all, linuxppc-dev

[-- Attachment #1: Type: text/plain, Size: 5568 bytes --]

tree:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next-test
head:   4e4ed87981c764498942c52004c620bb8f104eac
commit: e2f67efc6c8654d7aed885e943499f00a29eecdc [192/220] powerpc/rtas: move rtas_call_reentrant() out of pseries guards
config: powerpc-randconfig-r016-20201204 (attached as .config)
compiler: clang version 12.0.0 (https://github.com/llvm/llvm-project 32c501dd88b62787d3a5ffda7aabcf4650dbe3cd)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install powerpc cross compiling tool for clang build
        # apt-get install binutils-powerpc-linux-gnu
        # https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git/commit/?id=e2f67efc6c8654d7aed885e943499f00a29eecdc
        git remote add powerpc https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git
        git fetch --no-tags powerpc next-test
        git checkout e2f67efc6c8654d7aed885e943499f00a29eecdc
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=powerpc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

>> arch/powerpc/kernel/rtas.c:938:21: error: no member named 'rtas_args_reentrant' in 'struct paca_struct'
           args = local_paca->rtas_args_reentrant;
                  ~~~~~~~~~~  ^
   1 error generated.

vim +938 arch/powerpc/kernel/rtas.c

b664db8e3f976d Leonardo Bras 2020-05-18  906  
b664db8e3f976d Leonardo Bras 2020-05-18  907  /**
b664db8e3f976d Leonardo Bras 2020-05-18  908   * rtas_call_reentrant() - Used for reentrant rtas calls
b664db8e3f976d Leonardo Bras 2020-05-18  909   * @token:	Token for desired reentrant RTAS call
b664db8e3f976d Leonardo Bras 2020-05-18  910   * @nargs:	Number of Input Parameters
b664db8e3f976d Leonardo Bras 2020-05-18  911   * @nret:	Number of Output Parameters
b664db8e3f976d Leonardo Bras 2020-05-18  912   * @outputs:	Array of outputs
b664db8e3f976d Leonardo Bras 2020-05-18  913   * @...:	Inputs for desired RTAS call
b664db8e3f976d Leonardo Bras 2020-05-18  914   *
b664db8e3f976d Leonardo Bras 2020-05-18  915   * According to LoPAR documentation, only "ibm,int-on", "ibm,int-off",
b664db8e3f976d Leonardo Bras 2020-05-18  916   * "ibm,get-xive" and "ibm,set-xive" are currently reentrant.
b664db8e3f976d Leonardo Bras 2020-05-18  917   * Reentrant calls need their own rtas_args buffer, so not using rtas.args, but
b664db8e3f976d Leonardo Bras 2020-05-18  918   * PACA one instead.
b664db8e3f976d Leonardo Bras 2020-05-18  919   *
b664db8e3f976d Leonardo Bras 2020-05-18  920   * Return:	-1 on error,
b664db8e3f976d Leonardo Bras 2020-05-18  921   *		First output value of RTAS call if (nret > 0),
b664db8e3f976d Leonardo Bras 2020-05-18  922   *		0 otherwise,
b664db8e3f976d Leonardo Bras 2020-05-18  923   */
b664db8e3f976d Leonardo Bras 2020-05-18  924  int rtas_call_reentrant(int token, int nargs, int nret, int *outputs, ...)
b664db8e3f976d Leonardo Bras 2020-05-18  925  {
b664db8e3f976d Leonardo Bras 2020-05-18  926  	va_list list;
b664db8e3f976d Leonardo Bras 2020-05-18  927  	struct rtas_args *args;
b664db8e3f976d Leonardo Bras 2020-05-18  928  	unsigned long flags;
b664db8e3f976d Leonardo Bras 2020-05-18  929  	int i, ret = 0;
b664db8e3f976d Leonardo Bras 2020-05-18  930  
b664db8e3f976d Leonardo Bras 2020-05-18  931  	if (!rtas.entry || token == RTAS_UNKNOWN_SERVICE)
b664db8e3f976d Leonardo Bras 2020-05-18  932  		return -1;
b664db8e3f976d Leonardo Bras 2020-05-18  933  
b664db8e3f976d Leonardo Bras 2020-05-18  934  	local_irq_save(flags);
b664db8e3f976d Leonardo Bras 2020-05-18  935  	preempt_disable();
b664db8e3f976d Leonardo Bras 2020-05-18  936  
b664db8e3f976d Leonardo Bras 2020-05-18  937  	/* We use the per-cpu (PACA) rtas args buffer */
b664db8e3f976d Leonardo Bras 2020-05-18 @938  	args = local_paca->rtas_args_reentrant;
b664db8e3f976d Leonardo Bras 2020-05-18  939  
b664db8e3f976d Leonardo Bras 2020-05-18  940  	va_start(list, outputs);
b664db8e3f976d Leonardo Bras 2020-05-18  941  	va_rtas_call_unlocked(args, token, nargs, nret, list);
b664db8e3f976d Leonardo Bras 2020-05-18  942  	va_end(list);
b664db8e3f976d Leonardo Bras 2020-05-18  943  
b664db8e3f976d Leonardo Bras 2020-05-18  944  	if (nret > 1 && outputs)
b664db8e3f976d Leonardo Bras 2020-05-18  945  		for (i = 0; i < nret - 1; ++i)
b664db8e3f976d Leonardo Bras 2020-05-18  946  			outputs[i] = be32_to_cpu(args->rets[i + 1]);
b664db8e3f976d Leonardo Bras 2020-05-18  947  
b664db8e3f976d Leonardo Bras 2020-05-18  948  	if (nret > 0)
b664db8e3f976d Leonardo Bras 2020-05-18  949  		ret = be32_to_cpu(args->rets[0]);
b664db8e3f976d Leonardo Bras 2020-05-18  950  
b664db8e3f976d Leonardo Bras 2020-05-18  951  	local_irq_restore(flags);
b664db8e3f976d Leonardo Bras 2020-05-18  952  	preempt_enable();
b664db8e3f976d Leonardo Bras 2020-05-18  953  
b664db8e3f976d Leonardo Bras 2020-05-18  954  	return ret;
b664db8e3f976d Leonardo Bras 2020-05-18  955  }
b664db8e3f976d Leonardo Bras 2020-05-18  956  

:::::: The code at line 938 was first introduced by commit
:::::: b664db8e3f976d9233cc9ea5e3f8a8c0bcabeb48 powerpc/rtas: Implement reentrant rtas call

:::::: TO: Leonardo Bras <leobras.c@gmail.com>
:::::: CC: Michael Ellerman <mpe@ellerman.id.au>

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 21980 bytes --]

^ permalink raw reply

* [PATCH] powerpc/powermac: Fix low_sleep_handler with CONFIG_VMAP_STACK
From: Christophe Leroy @ 2020-12-04 18:55 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman,
	giuseppe
  Cc: linuxppc-dev, linux-kernel

low_sleep_handler() can't restore the context from standard
stack because the stack can hardly be accessed with MMU OFF.

Store everything in a global storage area instead of storing
a pointer to the stack in that global storage area.

To avoid a complete churn of the function, still use r1 as
the pointer to the storage area during restore.

Reported-by: Giuseppe Sacco <giuseppe@sguazz.it>
Fixes: cd08f109e262 ("powerpc/32s: Enable CONFIG_VMAP_STACK")
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
This is only build tested. Giuseppe can you test it ? Thanks.
---
 arch/powerpc/platforms/Kconfig.cputype  |   2 +-
 arch/powerpc/platforms/powermac/sleep.S | 130 +++++++++++-------------
 2 files changed, 59 insertions(+), 73 deletions(-)

diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index c194c4ae8bc7..32a9c4c09b98 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -36,7 +36,7 @@ config PPC_BOOK3S_6xx
 	select PPC_HAVE_PMU_SUPPORT
 	select PPC_HAVE_KUEP
 	select PPC_HAVE_KUAP
-	select HAVE_ARCH_VMAP_STACK if !ADB_PMU
+	select HAVE_ARCH_VMAP_STACK
 
 config PPC_85xx
 	bool "Freescale 85xx"
diff --git a/arch/powerpc/platforms/powermac/sleep.S b/arch/powerpc/platforms/powermac/sleep.S
index 7e0f8ba6e54a..ce56082a392a 100644
--- a/arch/powerpc/platforms/powermac/sleep.S
+++ b/arch/powerpc/platforms/powermac/sleep.S
@@ -44,7 +44,8 @@
 #define SL_TB		0xa0
 #define SL_R2		0xa8
 #define SL_CR		0xac
-#define SL_R12		0xb0	/* r12 to r31 */
+#define SL_LR		0xb0
+#define SL_R12		0xb4	/* r12 to r31 */
 #define SL_SIZE		(SL_R12 + 80)
 
 	.section .text
@@ -63,105 +64,107 @@ _GLOBAL(low_sleep_handler)
 	blr
 #else
 	mflr	r0
-	stw	r0,4(r1)
-	stwu	r1,-SL_SIZE(r1)
+	lis	r11,sleep_storage@ha
+	addis	r11,r11,sleep_storage@l
+	stw	r0,SL_LR(r11)
 	mfcr	r0
-	stw	r0,SL_CR(r1)
-	stw	r2,SL_R2(r1)
-	stmw	r12,SL_R12(r1)
+	stw	r0,SL_CR(r11)
+	stw	r1,SL_SP(r11)
+	stw	r2,SL_R2(r11)
+	stmw	r12,SL_R12(r11)
 
 	/* Save MSR & SDR1 */
 	mfmsr	r4
-	stw	r4,SL_MSR(r1)
+	stw	r4,SL_MSR(r11)
 	mfsdr1	r4
-	stw	r4,SL_SDR1(r1)
+	stw	r4,SL_SDR1(r11)
 
 	/* Get a stable timebase and save it */
 1:	mftbu	r4
-	stw	r4,SL_TB(r1)
+	stw	r4,SL_TB(r11)
 	mftb	r5
-	stw	r5,SL_TB+4(r1)
+	stw	r5,SL_TB+4(r11)
 	mftbu	r3
 	cmpw	r3,r4
 	bne	1b
 
 	/* Save SPRGs */
 	mfsprg	r4,0
-	stw	r4,SL_SPRG0(r1)
+	stw	r4,SL_SPRG0(r11)
 	mfsprg	r4,1
-	stw	r4,SL_SPRG0+4(r1)
+	stw	r4,SL_SPRG0+4(r11)
 	mfsprg	r4,2
-	stw	r4,SL_SPRG0+8(r1)
+	stw	r4,SL_SPRG0+8(r11)
 	mfsprg	r4,3
-	stw	r4,SL_SPRG0+12(r1)
+	stw	r4,SL_SPRG0+12(r11)
 
 	/* Save BATs */
 	mfdbatu	r4,0
-	stw	r4,SL_DBAT0(r1)
+	stw	r4,SL_DBAT0(r11)
 	mfdbatl	r4,0
-	stw	r4,SL_DBAT0+4(r1)
+	stw	r4,SL_DBAT0+4(r11)
 	mfdbatu	r4,1
-	stw	r4,SL_DBAT1(r1)
+	stw	r4,SL_DBAT1(r11)
 	mfdbatl	r4,1
-	stw	r4,SL_DBAT1+4(r1)
+	stw	r4,SL_DBAT1+4(r11)
 	mfdbatu	r4,2
-	stw	r4,SL_DBAT2(r1)
+	stw	r4,SL_DBAT2(r11)
 	mfdbatl	r4,2
-	stw	r4,SL_DBAT2+4(r1)
+	stw	r4,SL_DBAT2+4(r11)
 	mfdbatu	r4,3
-	stw	r4,SL_DBAT3(r1)
+	stw	r4,SL_DBAT3(r11)
 	mfdbatl	r4,3
-	stw	r4,SL_DBAT3+4(r1)
+	stw	r4,SL_DBAT3+4(r11)
 	mfibatu	r4,0
-	stw	r4,SL_IBAT0(r1)
+	stw	r4,SL_IBAT0(r11)
 	mfibatl	r4,0
-	stw	r4,SL_IBAT0+4(r1)
+	stw	r4,SL_IBAT0+4(r11)
 	mfibatu	r4,1
-	stw	r4,SL_IBAT1(r1)
+	stw	r4,SL_IBAT1(r11)
 	mfibatl	r4,1
-	stw	r4,SL_IBAT1+4(r1)
+	stw	r4,SL_IBAT1+4(r11)
 	mfibatu	r4,2
-	stw	r4,SL_IBAT2(r1)
+	stw	r4,SL_IBAT2(r11)
 	mfibatl	r4,2
-	stw	r4,SL_IBAT2+4(r1)
+	stw	r4,SL_IBAT2+4(r11)
 	mfibatu	r4,3
-	stw	r4,SL_IBAT3(r1)
+	stw	r4,SL_IBAT3(r11)
 	mfibatl	r4,3
-	stw	r4,SL_IBAT3+4(r1)
+	stw	r4,SL_IBAT3+4(r11)
 
 BEGIN_MMU_FTR_SECTION
 	mfspr	r4,SPRN_DBAT4U
-	stw	r4,SL_DBAT4(r1)
+	stw	r4,SL_DBAT4(r11)
 	mfspr	r4,SPRN_DBAT4L
-	stw	r4,SL_DBAT4+4(r1)
+	stw	r4,SL_DBAT4+4(r11)
 	mfspr	r4,SPRN_DBAT5U
-	stw	r4,SL_DBAT5(r1)
+	stw	r4,SL_DBAT5(r11)
 	mfspr	r4,SPRN_DBAT5L
-	stw	r4,SL_DBAT5+4(r1)
+	stw	r4,SL_DBAT5+4(r11)
 	mfspr	r4,SPRN_DBAT6U
-	stw	r4,SL_DBAT6(r1)
+	stw	r4,SL_DBAT6(r11)
 	mfspr	r4,SPRN_DBAT6L
-	stw	r4,SL_DBAT6+4(r1)
+	stw	r4,SL_DBAT6+4(r11)
 	mfspr	r4,SPRN_DBAT7U
-	stw	r4,SL_DBAT7(r1)
+	stw	r4,SL_DBAT7(r11)
 	mfspr	r4,SPRN_DBAT7L
-	stw	r4,SL_DBAT7+4(r1)
+	stw	r4,SL_DBAT7+4(r11)
 	mfspr	r4,SPRN_IBAT4U
-	stw	r4,SL_IBAT4(r1)
+	stw	r4,SL_IBAT4(r11)
 	mfspr	r4,SPRN_IBAT4L
-	stw	r4,SL_IBAT4+4(r1)
+	stw	r4,SL_IBAT4+4(r11)
 	mfspr	r4,SPRN_IBAT5U
-	stw	r4,SL_IBAT5(r1)
+	stw	r4,SL_IBAT5(r11)
 	mfspr	r4,SPRN_IBAT5L
-	stw	r4,SL_IBAT5+4(r1)
+	stw	r4,SL_IBAT5+4(r11)
 	mfspr	r4,SPRN_IBAT6U
-	stw	r4,SL_IBAT6(r1)
+	stw	r4,SL_IBAT6(r11)
 	mfspr	r4,SPRN_IBAT6L
-	stw	r4,SL_IBAT6+4(r1)
+	stw	r4,SL_IBAT6+4(r11)
 	mfspr	r4,SPRN_IBAT7U
-	stw	r4,SL_IBAT7(r1)
+	stw	r4,SL_IBAT7(r11)
 	mfspr	r4,SPRN_IBAT7L
-	stw	r4,SL_IBAT7+4(r1)
+	stw	r4,SL_IBAT7+4(r11)
 END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
 
 	/* Backup various CPU config stuffs */
@@ -180,9 +183,9 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
 	lis	r5,grackle_wake_up@ha
 	addi	r5,r5,grackle_wake_up@l
 	tophys(r5,r5)
-	stw	r5,SL_PC(r1)
+	stw	r5,SL_PC(r11)
 	lis	r4,KERNELBASE@h
-	tophys(r5,r1)
+	tophys(r5,r11)
 	addi	r5,r5,SL_PC
 	lis	r6,MAGIC@ha
 	addi	r6,r6,MAGIC@l
@@ -194,12 +197,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
 	tophys(r3,r3)
 	stw	r3,0x80(r4)
 	stw	r5,0x84(r4)
-	/* Store a pointer to our backup storage into
-	 * a kernel global
-	 */
-	lis r3,sleep_storage@ha
-	addi r3,r3,sleep_storage@l
-	stw r5,0(r3)
 
 	.globl	low_cpu_offline_self
 low_cpu_offline_self:
@@ -279,7 +276,7 @@ _GLOBAL(core99_wake_up)
 	lis	r3,sleep_storage@ha
 	addi	r3,r3,sleep_storage@l
 	tophys(r3,r3)
-	lwz	r1,0(r3)
+	addi	r1,r3,SL_PC
 
 	/* Pass thru to older resume code ... */
 _ASM_NOKPROBE_SYMBOL(core99_wake_up)
@@ -399,13 +396,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
 	blt	1b
 	sync
 
-	/* restore the MSR and turn on the MMU */
-	lwz	r3,SL_MSR(r1)
-	bl	turn_on_mmu
-
-	/* get back the stack pointer */
-	tovirt(r1,r1)
-
 	/* Restore TB */
 	li	r3,0
 	mttbl	r3
@@ -419,28 +409,24 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
 	mtcr	r0
 	lwz	r2,SL_R2(r1)
 	lmw	r12,SL_R12(r1)
-	addi	r1,r1,SL_SIZE
-	lwz	r0,4(r1)
-	mtlr	r0
-	blr
-_ASM_NOKPROBE_SYMBOL(grackle_wake_up)
 
-turn_on_mmu:
-	mflr	r4
-	tovirt(r4,r4)
+	/* restore the MSR and SP and turn on the MMU and return */
+	lwz	r3,SL_MSR(r1)
+	lwz	r4,SL_LR(r1)
+	lwz	r1,SL_SP(r1)
 	mtsrr0	r4
 	mtsrr1	r3
 	sync
 	isync
 	rfi
-_ASM_NOKPROBE_SYMBOL(turn_on_mmu)
+_ASM_NOKPROBE_SYMBOL(grackle_wake_up)
 
 #endif /* defined(CONFIG_PM) || defined(CONFIG_CPU_FREQ) */
 
 	.section .data
 	.balign	L1_CACHE_BYTES
 sleep_storage:
-	.long 0
+	.space SL_SIZE
 	.balign	L1_CACHE_BYTES, 0
 
 #endif /* CONFIG_PPC_BOOK3S_32 */
-- 
2.25.0


^ permalink raw reply related

* Re: [PATCH v8 11/12] mm/vmalloc: Hugepage vmalloc mappings
From: Edgecombe, Rick P @ 2020-12-04 18:33 UTC (permalink / raw)
  To: npiggin@gmail.com, linux-mm@kvack.org, akpm@linux-foundation.org
  Cc: linux-arch@vger.kernel.org, linux-kernel@vger.kernel.org,
	hch@infradead.org, lizefan@huawei.com,
	Jonathan.Cameron@Huawei.com, linuxppc-dev@lists.ozlabs.org
In-Reply-To: <1607068679.lfd133za4h.astroid@bobo.none>

On Fri, 2020-12-04 at 18:12 +1000, Nicholas Piggin wrote:
> Excerpts from Edgecombe, Rick P's message of December 1, 2020 6:21
> am:
> > On Sun, 2020-11-29 at 01:25 +1000, Nicholas Piggin wrote:
> > > Support huge page vmalloc mappings. Config option
> > > HAVE_ARCH_HUGE_VMALLOC
> > > enables support on architectures that define HAVE_ARCH_HUGE_VMAP
> > > and
> > > supports PMD sized vmap mappings.
> > > 
> > > vmalloc will attempt to allocate PMD-sized pages if allocating
> > > PMD
> > > size
> > > or larger, and fall back to small pages if that was unsuccessful.
> > > 
> > > Allocations that do not use PAGE_KERNEL prot are not permitted to
> > > use
> > > huge pages, because not all callers expect this (e.g., module
> > > allocations vs strict module rwx).
> > 
> > Several architectures (x86, arm64, others?) allocate modules
> > initially
> > with PAGE_KERNEL and so I think this test will not exclude module
> > allocations in those cases.
> 
> Ah, thanks. I guess archs must additionally ensure that their
> PAGE_KERNEL allocations are suitable for huge page mappings before
> enabling the option.
> 
> If there is interest from those archs to support this, I have an
> early (un-posted) patch that adds an explicit VM_HUGE flag that could
> override the pessemistic arch default. It's not much trouble to add
> this 
> to the large system hash allocations. It's very out of date now but
> I 
> can at least give what I have to anyone doing an arch support that
> wants it.

Ahh, sorry, I totally missed that this was only enabled for powerpc.

That patch might be useful for me actually. Or maybe a VM_NOHUGE, since
there are only a few places where executable vmallocs are created? I'm
not sure what the other issues are.

I am endeavoring to have small module allocations share large pages, so
this infrastructure is a big help already.
https://lore.kernel.org/lkml/20201120202426.18009-1-rick.p.edgecombe@intel.com/

Thanks!

^ permalink raw reply

* Re: [PATCH] powerpc/book3s_hv_uvmem: Check for failed page migration
From: Ram Pai @ 2020-12-04 16:52 UTC (permalink / raw)
  To: Bharata B Rao; +Cc: Alistair Popple, linuxppc-dev
In-Reply-To: <20201204101841.GA621541@in.ibm.com>

On Fri, Dec 04, 2020 at 03:48:41PM +0530, Bharata B Rao wrote:
> On Thu, Dec 03, 2020 at 04:08:12PM +1100, Alistair Popple wrote:
> > migrate_vma_pages() may still clear MIGRATE_PFN_MIGRATE on pages which
> > are not able to be migrated. Drivers may safely copy data prior to
> > calling migrate_vma_pages() however a remote mapping must not be
> > established until after migrate_vma_pages() has returned as the
> > migration could still fail.
> > 
> > UV_PAGE_IN_in both copies and maps the data page, therefore it should
> > only be called after checking the results of migrate_vma_pages().
> > 
> > Signed-off-by: Alistair Popple <alistair@popple.id.au>
> > ---
> >  arch/powerpc/kvm/book3s_hv_uvmem.c | 7 ++++---
> >  1 file changed, 4 insertions(+), 3 deletions(-)
> > 
> > diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
> > index 84e5a2dc8be5..08aa6a90c525 100644
> > --- a/arch/powerpc/kvm/book3s_hv_uvmem.c
> > +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
> > @@ -762,7 +762,10 @@ static int kvmppc_svm_page_in(struct vm_area_struct *vma,
> >  		goto out_finalize;
> >  	}
> >  
> > -	if (pagein) {
> > +	*mig.dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
> > +	migrate_vma_pages(&mig);
> > +
> > +	if ((*mig.src & MIGRATE_PFN_MIGRATE) && pagein) {
> >  		pfn = *mig.src >> MIGRATE_PFN_SHIFT;
> >  		spage = migrate_pfn_to_page(*mig.src);
> >  		if (spage) {
> > @@ -773,8 +776,6 @@ static int kvmppc_svm_page_in(struct vm_area_struct *vma,
> >  		}
> >  	}
> >  
> > -	*mig.dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
> > -	migrate_vma_pages(&mig);
> >  out_finalize:
> >  	migrate_vma_finalize(&mig);
> >  	return ret;

This patch certainly looks like the problem, that has been hurting
us for a while.  Let me run this patch through my SVM tests.  Looks very
promising.

BTW: The code does a similar thing while paging out.  It pages out from the
UV, and then does the migration. Is there a bug there aswell?

RP

^ permalink raw reply

* Re: [PATCH 16/29] powerpc/rtas: dispatch partition migration requests to pseries
From: Nathan Lynch @ 2020-12-04 16:04 UTC (permalink / raw)
  To: Michael Ellerman, linuxppc-dev; +Cc: tyreld, ajd, mmc, cforno12, drt, brking
In-Reply-To: <874kl1vilh.fsf@mpe.ellerman.id.au>

Michael Ellerman <mpe@ellerman.id.au> writes:
> Nathan Lynch <nathanl@linux.ibm.com> writes:
>> sys_rtas() cannot call ibm,suspend-me directly in the same way it
>> handles other inputs. Instead it must dispatch the request to code
>> that can first perform the H_JOIN sequence before any call to
>> ibm,suspend-me can succeed. Over time kernel/rtas.c has accreted a fair
>> amount of platform-specific code to implement this.
>>
>> Since a different, more robust implementation of the suspend sequence
>> is now in the pseries platform code, we want to dispatch the request
>> there while minimizing additional dependence on pseries.
>>
>> Use a weak function that only pseries overrides.
>
> Over the years weak functions have caused their fair share of problems.
>
> There are cases where they are the cleanest option, but for intra-arch
> code like this I think and ifdef is much simpler.

Fair enough, I wasn't all that confident about this anyway.


>> diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
>> index fdefe6a974eb..be0fc2536673 100644
>> --- a/arch/powerpc/include/asm/rtas.h
>> +++ b/arch/powerpc/include/asm/rtas.h
>> @@ -260,6 +260,7 @@ extern int rtas_suspend_cpu(struct rtas_suspend_me_data *data);
>>  extern int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data);
>>  int rtas_ibm_suspend_me_unsafe(u64 handle);
>>  int rtas_ibm_suspend_me(int *fw_status);
>> +int rtas_syscall_dispatch_ibm_suspend_me(u64 handle);
>
> ie. we'd just do:
>
> #ifdef CONFIG_PPC_PSERIES
> int rtas_syscall_dispatch_ibm_suspend_me(u64 handle);
> #else
> int rtas_syscall_dispatch_ibm_suspend_me(u64 handle)
> {
> 	return -EINVAL;
> }
> #endif

Yep will do.

^ permalink raw reply

* Re: [PATCH 13/29] powerpc/pseries/mobility: use stop_machine for join/suspend
From: Nathan Lynch @ 2020-12-04 16:01 UTC (permalink / raw)
  To: Michael Ellerman, linuxppc-dev; +Cc: tyreld, ajd, mmc, cforno12, drt, brking
In-Reply-To: <875z5hvilq.fsf@mpe.ellerman.id.au>

Hi Michael,

Michael Ellerman <mpe@ellerman.id.au> writes:
> Nathan Lynch <nathanl@linux.ibm.com> writes:
>> The partition suspend sequence as specified in the platform
>> architecture requires that all active processor threads call
>> H_JOIN, which:
> ...
>> diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
>> index 1b8ae221b98a..44ca7d4e143d 100644
>> --- a/arch/powerpc/platforms/pseries/mobility.c
>> +++ b/arch/powerpc/platforms/pseries/mobility.c
>> @@ -412,6 +414,128 @@ static int wait_for_vasi_session_suspending(u64 handle)
> ...
>
>> +
>> +static int do_join(void *arg)
>> +{
>> +	atomic_t *counter = arg;
>> +	long hvrc;
>> +	int ret;
>> +
>> +	/* Must ensure MSR.EE off for H_JOIN. */
>> +	hard_irq_disable();
>
> Didn't stop_machine() already do that for us?
>
> In the state machine in multi_cpu_stop().

Yes, but I didn't want to rely on something that seems like an
implementation detail of stop_machine(). I assumed it's benign and in
keeping with hard_irq_disable()'s intended semantics to make multiple
calls to it within a critical section.

I don't feel strongly about this though. If I remove this
hard_irq_disable() I'll modify the comment to indicate that this
function relies on stop_machine() to satisfy this condition for H_JOIN.


>> +	hvrc = plpar_hcall_norets(H_JOIN);
>> +
>> +	switch (hvrc) {
>> +	case H_CONTINUE:
>> +		/*
>> +		 * All other CPUs are offline or in H_JOIN. This CPU
>> +		 * attempts the suspend.
>> +		 */
>> +		ret = do_suspend();
>> +		break;
>> +	case H_SUCCESS:
>> +		/*
>> +		 * The suspend is complete and this cpu has received a
>> +		 * prod.
>> +		 */
>> +		ret = 0;
>> +		break;
>> +	case H_BAD_MODE:
>> +	case H_HARDWARE:
>> +	default:
>> +		ret = -EIO;
>> +		pr_err_ratelimited("H_JOIN error %ld on CPU %i\n",
>> +				   hvrc, smp_processor_id());
>> +		break;
>> +	}
>> +
>> +	if (atomic_inc_return(counter) == 1) {
>> +		pr_info("CPU %u waking all threads\n", smp_processor_id());
>> +		prod_others();
>> +	}
>
> Do we even need the counter? IIUC only one CPU receives H_CONTINUE. So
> couldn't we just have that CPU do the prodding of others?

CPUs may exit H_JOIN due to system reset interrupt at any time, and
H_JOIN may return H_HARDWARE to a caller after other CPUs have entered
the join state successfully. In these cases the counter ensures exactly
one thread performs the prod sequence.

>
>> +	/*
>> +	 * Execution may have been suspended for several seconds, so
>> +	 * reset the watchdog.
>> +	 */
>> +	touch_nmi_watchdog();
>> +	return ret;
>> +}
>> +
>> +static int pseries_migrate_partition(u64 handle)
>> +{
>> +	atomic_t counter = ATOMIC_INIT(0);
>> +	int ret;
>> +
>> +	ret = wait_for_vasi_session_suspending(handle);
>> +	if (ret)
>> +		goto out;
>
> Direct return would be clearer IMHO.

OK, I can change this.

^ permalink raw reply

* Re: [PATCH] powerpc/xmon: Change printk() to pr_cont()
From: Joe Perches @ 2020-12-04 15:55 UTC (permalink / raw)
  To: Michael Ellerman, Christophe Leroy, Benjamin Herrenschmidt,
	Paul Mackerras
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <87h7p1vnym.fsf@mpe.ellerman.id.au>

On Fri, 2020-12-04 at 21:56 +1100, Michael Ellerman wrote:
> Christophe Leroy <christophe.leroy@csgroup.eu> writes:
> > Since some time now, printk() adds carriage return, leading to
> > unusable xmon output:
> > 
> > [   54.288722] sysrq: Entering xmon
> > [   54.292209] Vector: 0  at [cace3d2c]
> > [   54.292274]     pc:
> > [   54.292331] c0023650
> 
> ...
> 
> > diff --git a/arch/powerpc/xmon/nonstdio.c b/arch/powerpc/xmon/nonstdio.c
> > index 5c1a50912229..9b0d85bff021 100644
> > --- a/arch/powerpc/xmon/nonstdio.c
> > +++ b/arch/powerpc/xmon/nonstdio.c
> > @@ -178,7 +178,7 @@ void xmon_printf(const char *format, ...)
> >  
> > 
> >  	if (n && rc == 0) {
> >  		/* No udbg hooks, fallback to printk() - dangerous */
> > -		printk("%s", xmon_outbuf);
> > +		pr_cont("%s", xmon_outbuf);
> >  	}
> 
> Ah OK, in the case where there's no udbg backend. We basically always
> have a udbg backend on 64-bit, via hvc console. Which explains why we
> haven't noticed it.
> 
> Will pick up the patch.
> 
> cheers

Perhaps all of these bare printks should be inspected for defects:

$ git grep -P -n '\bprintk\s*\(\s*(?!KERN_\w+)"[^\\n]*"' arch/powerpc
arch/powerpc/kernel/process.c:1475:     printk("NIP:  "REG" LR: "REG" CTR: "REG"\n",
arch/powerpc/kernel/process.c:1479:     printk("MSR:  "REG" ", regs->msr);
arch/powerpc/kernel/process.c:1513:             printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip);
arch/powerpc/kernel/process.c:1514:             printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link);
arch/powerpc/kernel/process.c:2157:                     printk("%s["REG"] ["REG"] %pS",
arch/powerpc/kernel/traps.c:621:        printk("Caused by (from MCSR=%lx): ", reason);
arch/powerpc/kernel/traps.c:726:        printk("Caused by (from MCSR=%lx): ", reason);
arch/powerpc/kernel/traps.c:766:        printk("Caused by (from MCSR=%lx): ", reason);
arch/powerpc/kernel/traps.c:791:        printk("Caused by (from SRR1=%lx): ", reason);
arch/powerpc/kernel/udbg.c:95:          printk("%s", s);
arch/powerpc/math-emu/fabs.c:13:        printk("%s: D %p, B %p: ", __func__, frD, frB);
arch/powerpc/math-emu/fctiw.c:22:       printk("%s: D %p, B %p: ", __func__, frD, frB);
arch/powerpc/math-emu/fctiwz.c:29:      printk("%s: D %p, B %p: ", __func__, frD, frB);
arch/powerpc/math-emu/fmr.c:13: printk("%s: D %p, B %p: ", __func__, frD, frB);
arch/powerpc/math-emu/fnabs.c:13:       printk("%s: D %p, B %p: ", __func__, frD, frB);
arch/powerpc/math-emu/fneg.c:13:        printk("%s: D %p, B %p: ", __func__, frD, frB);
arch/powerpc/math-emu/lfd.c:15: printk("%s: D %p, ea %p: ", __func__, frD, ea);
arch/powerpc/math-emu/stfd.c:11:        printk("%s: S %p, ea %p: ", __func__, frS, ea);
arch/powerpc/mm/nohash/44x.c:192:                                       printk("%d ", i);
arch/powerpc/platforms/4xx/machine_check.c:19:          printk("Data");
arch/powerpc/platforms/chrp/pci.c:256:                  printk(" at %llx", (unsigned long long)r.start);
arch/powerpc/platforms/embedded6xx/ls_uart.c:47:                                        printk("%c", in_8(avr_addr + UART_RX));
arch/powerpc/platforms/powermac/pfunc_core.c:83:        printk("%s", title);
arch/powerpc/platforms/powermac/pfunc_core.c:85:                printk("%02x ", *((u8 *)blob));
arch/powerpc/platforms/powernv/pci-ioda.c:81:   printk("%spci %s: [PE# %.2x] %pV",
arch/powerpc/sysdev/tsi108_pci.c:63:    printk("PCI CFG write : ");
arch/powerpc/sysdev/tsi108_pci.c:64:    printk("%d:0x%x:0x%x ", bus->number, devfunc, offset);
arch/powerpc/sysdev/tsi108_pci.c:65:    printk("%d ADDR=0x%08x ", len, (uint) cfg_addr);
arch/powerpc/sysdev/tsi108_pci.c:164:           printk("PCI CFG read : ");
arch/powerpc/sysdev/tsi108_pci.c:165:           printk("%d:0x%x:0x%x ", bus->number, devfn, offset);
arch/powerpc/sysdev/tsi108_pci.c:166:           printk("%d ADDR=0x%08x ", len, (uint) cfg_addr);
arch/powerpc/sysdev/tsi108_pci.c:315:           printk("cfg_ctl=0x%08x ", temp);
arch/powerpc/xmon/nonstdio.c:181:               printk("%s", xmon_outbuf);



^ permalink raw reply

* Re: [PATCH v3 06/18] ibmvfc: add handlers to drain and complete Sub-CRQ responses
From: Brian King @ 2020-12-04 14:51 UTC (permalink / raw)
  To: Tyrel Datwyler, james.bottomley
  Cc: brking, linuxppc-dev, linux-scsi, martin.petersen, linux-kernel
In-Reply-To: <20201203020806.14747-7-tyreld@linux.ibm.com>

On 12/2/20 8:07 PM, Tyrel Datwyler wrote:
> The logic for iterating over the Sub-CRQ responses is similiar to that
> of the primary CRQ. Add the necessary handlers for processing those
> responses.
> 
> Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
> ---
>  drivers/scsi/ibmvscsi/ibmvfc.c | 80 ++++++++++++++++++++++++++++++++++
>  1 file changed, 80 insertions(+)
> 
> diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
> index e082935f56cf..b61ae1df21e5 100644
> --- a/drivers/scsi/ibmvscsi/ibmvfc.c
> +++ b/drivers/scsi/ibmvscsi/ibmvfc.c
> @@ -3381,6 +3381,86 @@ static int ibmvfc_toggle_scrq_irq(struct ibmvfc_sub_queue *scrq, int enable)
>  	return rc;
>  }
>  
> +static void ibmvfc_handle_scrq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost)
> +{
> +	struct ibmvfc_event *evt = (struct ibmvfc_event *)be64_to_cpu(crq->ioba);
> +	unsigned long flags;
> +
> +	switch (crq->valid) {
> +	case IBMVFC_CRQ_CMD_RSP:
> +		break;
> +	case IBMVFC_CRQ_XPORT_EVENT:
> +		return;
> +	default:
> +		dev_err(vhost->dev, "Got and invalid message type 0x%02x\n", crq->valid);
> +		return;
> +	}
> +
> +	/* The only kind of payload CRQs we should get are responses to
> +	 * things we send. Make sure this response is to something we
> +	 * actually sent
> +	 */
> +	if (unlikely(!ibmvfc_valid_event(&vhost->pool, evt))) {
> +		dev_err(vhost->dev, "Returned correlation_token 0x%08llx is invalid!\n",
> +			crq->ioba);
> +		return;
> +	}
> +
> +	if (unlikely(atomic_read(&evt->free))) {
> +		dev_err(vhost->dev, "Received duplicate correlation_token 0x%08llx!\n",
> +			crq->ioba);
> +		return;
> +	}
> +
> +	del_timer(&evt->timer);
> +	list_del(&evt->queue);
> +	ibmvfc_trc_end(evt);> +	spin_unlock_irqrestore(vhost->host->host_lock, flags);

You can't do this here... You are grabbing the host lock in ibmvfc_drain_sub_crq
and saving the irqflags to a local in that function, then doing a spin_unlock_irqrestore
and restoring irqflags using an uninitialized local in this function...

I'm assuming this will get sorted out with the locking changes we've been discussing off-list...


> +	evt->done(evt);
> +	spin_lock_irqsave(vhost->host->host_lock, flags);
> +}
> +
> +static struct ibmvfc_crq *ibmvfc_next_scrq(struct ibmvfc_sub_queue *scrq)
> +{
> +	struct ibmvfc_crq *crq;
> +
> +	crq = &scrq->msgs[scrq->cur].crq;
> +	if (crq->valid & 0x80) {
> +		if (++scrq->cur == scrq->size)
> +			scrq->cur = 0;
> +		rmb();
> +	} else
> +		crq = NULL;
> +
> +	return crq;
> +}
> +
> +static void ibmvfc_drain_sub_crq(struct ibmvfc_sub_queue *scrq)
> +{
> +	struct ibmvfc_crq *crq;
> +	unsigned long flags;
> +	int done = 0;
> +
> +	spin_lock_irqsave(scrq->vhost->host->host_lock, flags);
> +	while (!done) {
> +		while ((crq = ibmvfc_next_scrq(scrq)) != NULL) {
> +			ibmvfc_handle_scrq(crq, scrq->vhost);
> +			crq->valid = 0;
> +			wmb();
> +		}
> +
> +		ibmvfc_toggle_scrq_irq(scrq, 1);
> +		if ((crq = ibmvfc_next_scrq(scrq)) != NULL) {
> +			ibmvfc_toggle_scrq_irq(scrq, 0);
> +			ibmvfc_handle_scrq(crq, scrq->vhost);
> +			crq->valid = 0;
> +			wmb();
> +		} else
> +			done = 1;
> +	}
> +	spin_unlock_irqrestore(scrq->vhost->host->host_lock, flags);
> +}
> +
>  /**
>   * ibmvfc_init_tgt - Set the next init job step for the target
>   * @tgt:		ibmvfc target struct
> 


-- 
Brian King
Power Linux I/O
IBM Linux Technology Center


^ permalink raw reply

* Re: [PATCH v3 04/18] ibmvfc: add alloc/dealloc routines for SCSI Sub-CRQ Channels
From: Brian King @ 2020-12-04 14:47 UTC (permalink / raw)
  To: Tyrel Datwyler, james.bottomley
  Cc: brking, linuxppc-dev, linux-scsi, martin.petersen, linux-kernel
In-Reply-To: <20201203020806.14747-5-tyreld@linux.ibm.com>

On 12/2/20 8:07 PM, Tyrel Datwyler wrote:
> @@ -4983,6 +4993,118 @@ static int ibmvfc_init_crq(struct ibmvfc_host *vhost)
>  	return retrc;
>  }
>  
> +static int ibmvfc_register_scsi_channel(struct ibmvfc_host *vhost,
> +				  int index)
> +{
> +	struct device *dev = vhost->dev;
> +	struct vio_dev *vdev = to_vio_dev(dev);
> +	struct ibmvfc_sub_queue *scrq = &vhost->scsi_scrqs.scrqs[index];
> +	int rc = -ENOMEM;
> +
> +	ENTER;
> +
> +	scrq->msgs = (struct ibmvfc_sub_crq *)get_zeroed_page(GFP_KERNEL);
> +	if (!scrq->msgs)
> +		return rc;
> +
> +	scrq->size = PAGE_SIZE / sizeof(*scrq->msgs);
> +	scrq->msg_token = dma_map_single(dev, scrq->msgs, PAGE_SIZE,
> +					 DMA_BIDIRECTIONAL);
> +
> +	if (dma_mapping_error(dev, scrq->msg_token))
> +		goto dma_map_failed;
> +
> +	rc = h_reg_sub_crq(vdev->unit_address, scrq->msg_token, PAGE_SIZE,
> +			   &scrq->cookie, &scrq->hw_irq);
> +
> +	if (rc) {
> +		dev_warn(dev, "Error registering sub-crq: %d\n", rc);
> +		if (rc == H_PARAMETER)
> +			dev_warn_once(dev, "Firmware may not support MQ\n");
> +		goto reg_failed;
> +	}
> +
> +	scrq->hwq_id = index;
> +	scrq->vhost = vhost;
> +
> +	LEAVE;
> +	return 0;
> +
> +reg_failed:
> +	dma_unmap_single(dev, scrq->msg_token, PAGE_SIZE, DMA_BIDIRECTIONAL);
> +dma_map_failed:
> +	free_page((unsigned long)scrq->msgs);
> +	LEAVE;
> +	return rc;
> +}
> +
> +static void ibmvfc_deregister_scsi_channel(struct ibmvfc_host *vhost, int index)
> +{
> +	struct device *dev = vhost->dev;
> +	struct vio_dev *vdev = to_vio_dev(dev);
> +	struct ibmvfc_sub_queue *scrq = &vhost->scsi_scrqs.scrqs[index];
> +	long rc;
> +
> +	ENTER;
> +
> +	do {
> +		rc = plpar_hcall_norets(H_FREE_SUB_CRQ, vdev->unit_address,
> +					scrq->cookie);
> +	} while (rc == H_BUSY || H_IS_LONG_BUSY(rc));
> +
> +	if (rc)
> +		dev_err(dev, "Failed to free sub-crq[%d]: rc=%ld\n", index, rc);
> +
> +	dma_unmap_single(dev, scrq->msg_token, PAGE_SIZE, DMA_BIDIRECTIONAL);
> +	free_page((unsigned long)scrq->msgs);
> +	LEAVE;
> +}
> +
> +static int ibmvfc_init_sub_crqs(struct ibmvfc_host *vhost)
> +{
> +	int i, j;
> +
> +	ENTER;
> +
> +	vhost->scsi_scrqs.scrqs = kcalloc(IBMVFC_SCSI_HW_QUEUES,
> +					  sizeof(*vhost->scsi_scrqs.scrqs),
> +					  GFP_KERNEL);
> +	if (!vhost->scsi_scrqs.scrqs)
> +		return -1;
> +
> +	for (i = 0; i < IBMVFC_SCSI_HW_QUEUES; i++) {
> +		if (ibmvfc_register_scsi_channel(vhost, i)) {
> +			for (j = i; j > 0; j--)
> +				ibmvfc_deregister_scsi_channel(vhost, j - 1);
> +			kfree(vhost->scsi_scrqs.scrqs);
> +			vhost->scsi_scrqs.scrqs = NULL;
> +			vhost->scsi_scrqs.active_queues = 0;
> +			LEAVE;
> +			return -1;
> +		}
> +	}
> +
> +	LEAVE;
> +	return 0;
> +}
> +
> +static void ibmvfc_release_sub_crqs(struct ibmvfc_host *vhost)
> +{
> +	int i;
> +
> +	ENTER;
> +	if (!vhost->scsi_scrqs.scrqs)
> +		return;
> +
> +	for (i = 0; i < IBMVFC_SCSI_HW_QUEUES; i++)
> +		ibmvfc_deregister_scsi_channel(vhost, i);
> +
> +	kfree(vhost->scsi_scrqs.scrqs);
> +	vhost->scsi_scrqs.scrqs = NULL;
> +	vhost->scsi_scrqs.active_queues = 0;
> +	LEAVE;
> +}
> +
>  /**
>   * ibmvfc_free_mem - Free memory for vhost
>   * @vhost:	ibmvfc host struct
> @@ -5239,6 +5361,12 @@ static int ibmvfc_probe(struct vio_dev *vdev, const struct vio_device_id *id)
>  		goto remove_shost;
>  	}
>  
> +	if (vhost->mq_enabled) {
> +		rc = ibmvfc_init_sub_crqs(vhost);
> +		if (rc)
> +			dev_warn(dev, "Failed to allocate Sub-CRQs. rc=%d\n", rc);

So, I think if you end up down this path, you will have:

vhost->scsi_scrqs.scrqs == NULL
vhost->scsi_scrqs.active_queues == 0

And you proceed with discovery. You will proceed with enquiry and channel setup.
Then, I think you could end up in queuecommand doing this:

evt->hwq = hwq % vhost->scsi_scrqs.active_queues;

And that is a divide by zero...

I wonder if it would be better in this scenario where registering the sub crqs fails,
if you just did:

vhost->do_enquiry = 0;
vhost->mq_enabled = 0;
vhost->using_channels = 0;

It looks like you only try to allocate the subcrqs in probe, so if that fails, we'd
never end up using mq, so just disabling in this case seems reasonable.

Thanks,

Brian

-- 
Brian King
Power Linux I/O
IBM Linux Technology Center


^ permalink raw reply

* Re: [PATCH 12/29] powerpc/pseries/mobility: extract VASI session polling logic
From: Nathan Lynch @ 2020-12-04 14:46 UTC (permalink / raw)
  To: Michael Ellerman, linuxppc-dev; +Cc: tyreld, ajd, mmc, cforno12, drt, brking
In-Reply-To: <878sadvio4.fsf@mpe.ellerman.id.au>

Michael Ellerman <mpe@ellerman.id.au> writes:
> Nathan Lynch <nathanl@linux.ibm.com> writes:
>> The behavior of rtas_ibm_suspend_me_unsafe() is to return -EAGAIN to
>> the caller until the specified VASI suspend session state makes the
>> transition from H_VASI_ENABLED to H_VASI_SUSPENDING. In the interest
>> of separating concerns to prepare for a new implementation of the
>> join/suspend sequence, extract VASI session polling logic into a
>> couple of local functions. Waiting for the session state to reach
>> H_VASI_SUSPENDING before calling rtas_ibm_suspend_me_unsafe() ensures
>> that we will never get an EAGAIN result necessitating a retry. No
>> user-visible change in behavior is intended.
>>
>> Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
>> ---
>>  arch/powerpc/platforms/pseries/mobility.c | 76 +++++++++++++++++++++--
>>  1 file changed, 71 insertions(+), 5 deletions(-)
>>
>> diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
>> index dc6abf164db7..1b8ae221b98a 100644
>> --- a/arch/powerpc/platforms/pseries/mobility.c
>> +++ b/arch/powerpc/platforms/pseries/mobility.c
>> @@ -345,6 +345,73 @@ void post_mobility_fixup(void)
> ...
>
>> +
>> +static int wait_for_vasi_session_suspending(u64 handle)
>> +{
>> +	unsigned long state;
>> +	bool keep_polling;
>> +	int ret;
>> +
>> +	/*
>> +	 * Wait for transition from H_VASI_ENABLED to
>> +	 * H_VASI_SUSPENDING. Treat anything else as an error.
>> +	 */
>> +	do {
>> +		keep_polling = false;
>> +		ret = poll_vasi_state(handle, &state);
>> +		if (ret != 0)
>> +			break;
>> +
>> +		switch (state) {
>> +		case H_VASI_SUSPENDING:
>> +			break;
>> +		case H_VASI_ENABLED:
>> +			keep_polling = true;
>> +			ssleep(1);
>> +			break;
>> +		default:
>> +			pr_err("unexpected H_VASI_STATE result %lu\n", state);
>> +			ret = -EIO;
>> +			break;
>> +		}
>> +	} while (keep_polling);
>
> This seems like it could be simpler?
>
> eg:
>
> 	while (true) {
> 		ret = poll_vasi_state(handle, &state);
>
> 		if (ret != 0 || state == H_VASI_SUSPENDING)
> 			break;
> 		else if (state == H_VASI_ENABLED)
> 			ssleep(1);
> 		else {
> 			pr_err("unexpected H_VASI_STATE result %lu\n", state);
> 			ret = -EIO;
> 			break;
> 		}
> 	}
>
>
> Or did I miss something?

No I think you're right, thanks.

^ permalink raw reply

* Re: [PATCH 03/29] powerpc/rtas: complete ibm,suspend-me status codes
From: Nathan Lynch @ 2020-12-04 14:40 UTC (permalink / raw)
  To: Michael Ellerman, linuxppc-dev; +Cc: tyreld, ajd, mmc, cforno12, drt, brking
In-Reply-To: <877dpxvimm.fsf@mpe.ellerman.id.au>

Michael Ellerman <mpe@ellerman.id.au> writes:
> Nathan Lynch <nathanl@linux.ibm.com> writes:
>> We don't completely account for the possible return codes for
>> ibm,suspend-me. Add definitions for these.
>>
>> Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
>> ---
>>  arch/powerpc/include/asm/rtas.h | 7 ++++++-
>>  1 file changed, 6 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
>> index 55f9a154c95d..f060181a0d32 100644
>> --- a/arch/powerpc/include/asm/rtas.h
>> +++ b/arch/powerpc/include/asm/rtas.h
>> @@ -23,11 +23,16 @@
>>  #define RTAS_RMOBUF_MAX (64 * 1024)
>>  
>>  /* RTAS return status codes */
>> -#define RTAS_NOT_SUSPENDABLE	-9004
>>  #define RTAS_BUSY		-2    /* RTAS Busy */
>>  #define RTAS_EXTENDED_DELAY_MIN	9900
>>  #define RTAS_EXTENDED_DELAY_MAX	9905
>>  
>> +/* statuses specific to ibm,suspend-me */
>> +#define RTAS_SUSPEND_ABORTED     9000 /* Suspension aborted */
>
> This made me ... pause.
>
> But it really is positive 9000, I checked PAPR.

Yes, 9000 falls within the "vendor-specific success codes" range in the
RTAS status value table. I guess aborting a suspend is
operator-initiated and it's not considered an error condition from that
point of view.

^ permalink raw reply

* Re: [RFC v2 2/2] [MOCKUP] sched/mm: Lightweight lazy mm refcounting
From: Andy Lutomirski @ 2020-12-04 14:37 UTC (permalink / raw)
  To: Nicholas Piggin
  Cc: linux-arch, Nadav Amit, X86 ML, Arnd Bergmann, Jann Horn,
	Catalin Marinas, Rik van Riel, LKML, Linux-MM, Dave Hansen,
	Will Deacon, Mathieu Desnoyers, Andy Lutomirski, linuxppc-dev
In-Reply-To: <1607065599.ecww2w3xq3.astroid@bobo.none>

> On Dec 3, 2020, at 11:54 PM, Nicholas Piggin <npiggin@gmail.com> wrote:
> 
> Excerpts from Andy Lutomirski's message of December 4, 2020 3:26 pm:
>> This is a mockup.  It's designed to illustrate the algorithm and how the
>> code might be structured.  There are several things blatantly wrong with
>> it:
>> 
>> The coding stype is not up to kernel standards.  I have prototypes in the
>> wrong places and other hacks.
>> 
>> There's a problem with mm_cpumask() not being reliable.
> 
> Interesting, this might be a way to reduce those IPIs with fairly 
> minimal fast path cost. Would be interesting to see how much performance 
> advantage it has over my dumb simple shoot-lazies.

My real motivation isn’t really performance per se. I think there’s considerable value in keeping the core algorithms the same across all architectures, and I think my approach can manage that with only a single hint from the architecture as to which CPUs to scan.

With shoot-lazies, in contrast, enabling it everywhere would either malfunction or have very poor performance or even DoS issues on arches like arm64 and s390x that don’t track mm_cpumask at all.  I’m sure we could come up with some way to mitigate that, but I think that my approach may be better overall for keeping the core code uniform and relatively straightforward.

> 
> For powerpc I don't think we'd be inclined to go that way, so don't feel 
> the need to add this complexity for us alone -- we'd be more inclined to 
> move the exit lazy to the final TLB shootdown path, which we're slowly 
> getting more infrastructure in place to do.
> 

> 
> There's a few nits but I don't think I can see a fundamental problem 
> yet.

Thanks!

I can polish the patch, but I want to be sure the memory ordering parts are clear.

> 
> Thanks,
> Nick

^ permalink raw reply

* Re: [PATCH v2 01/17] ibmvfc: add vhost fields and defaults for MQ enablement
From: Brian King @ 2020-12-04 14:26 UTC (permalink / raw)
  To: Tyrel Datwyler, james.bottomley
  Cc: brking, linuxppc-dev, linux-scsi, martin.petersen, linux-kernel
In-Reply-To: <38903a4f-9253-0b4b-6f67-af78ec86175f@linux.ibm.com>

On 12/2/20 11:27 AM, Tyrel Datwyler wrote:
> On 12/2/20 7:14 AM, Brian King wrote:
>> On 12/1/20 6:53 PM, Tyrel Datwyler wrote:
>>> Introduce several new vhost fields for managing MQ state of the adapter
>>> as well as initial defaults for MQ enablement.
>>>
>>> Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
>>> ---
>>>  drivers/scsi/ibmvscsi/ibmvfc.c |  9 ++++++++-
>>>  drivers/scsi/ibmvscsi/ibmvfc.h | 13 +++++++++++--
>>>  2 files changed, 19 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
>>> index 42e4d35e0d35..f1d677a7423d 100644
>>> --- a/drivers/scsi/ibmvscsi/ibmvfc.c
>>> +++ b/drivers/scsi/ibmvscsi/ibmvfc.c
>>> @@ -5161,12 +5161,13 @@ static int ibmvfc_probe(struct vio_dev *vdev, const struct vio_device_id *id)
>>>  	}
>>>  
>>>  	shost->transportt = ibmvfc_transport_template;
>>> -	shost->can_queue = max_requests;
>>> +	shost->can_queue = (max_requests / IBMVFC_SCSI_HW_QUEUES);
>>
>> This doesn't look right. can_queue is the SCSI host queue depth, not the MQ queue depth.
> 
> Our max_requests is the total number commands allowed across all queues. From
> what I understand is can_queue is the total number of commands in flight allowed
> for each hw queue.
> 
>         /*
>          * In scsi-mq mode, the number of hardware queues supported by the LLD.
>          *
>          * Note: it is assumed that each hardware queue has a queue depth of
>          * can_queue. In other words, the total queue depth per host
>          * is nr_hw_queues * can_queue. However, for when host_tagset is set,
>          * the total queue depth is can_queue.
>          */
> 
> We currently don't use the host wide shared tagset.

Ok. I missed that bit... In that case, since we allocate by default only 100
event structs. If we slice that across IBMVFC_SCSI_HW_QUEUES (16) queues, then
we end up with only about 6 commands that can be outstanding per queue,
which is going to really hurt performance... I'd suggest bumping up
IBMVFC_MAX_REQUESTS_DEFAULT from 100 to 1000 as a starting point.

Thanks,

Brian


-- 
Brian King
Power Linux I/O
IBM Linux Technology Center


^ permalink raw reply

* [powerpc:next-test 54/220] arch/powerpc/kernel/vdso32/vgettimeofday.c:13:5: warning: no previous prototype for function '__c_kernel_clock_gettime64'
From: kernel test robot @ 2020-12-04 14:23 UTC (permalink / raw)
  To: Christophe Leroy; +Cc: clang-built-linux, kbuild-all, linuxppc-dev

[-- Attachment #1: Type: text/plain, Size: 3030 bytes --]

tree:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next-test
head:   4e4ed87981c764498942c52004c620bb8f104eac
commit: d0e3fc69d00d1f50d22d6b6acfc555ccda80ad1e [54/220] powerpc/vdso: Provide __kernel_clock_gettime64() on vdso32
config: powerpc64-randconfig-r011-20201204 (attached as .config)
compiler: clang version 12.0.0 (https://github.com/llvm/llvm-project 32c501dd88b62787d3a5ffda7aabcf4650dbe3cd)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install powerpc64 cross compiling tool for clang build
        # apt-get install binutils-powerpc64-linux-gnu
        # https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git/commit/?id=d0e3fc69d00d1f50d22d6b6acfc555ccda80ad1e
        git remote add powerpc https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git
        git fetch --no-tags powerpc next-test
        git checkout d0e3fc69d00d1f50d22d6b6acfc555ccda80ad1e
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=powerpc64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   arch/powerpc/kernel/vdso32/vgettimeofday.c:7:5: error: conflicting types for '__c_kernel_clock_gettime'
   int __c_kernel_clock_gettime(clockid_t clock, struct old_timespec32 *ts,
       ^
   arch/powerpc/include/asm/vdso/gettimeofday.h:183:5: note: previous declaration is here
   int __c_kernel_clock_gettime(clockid_t clock, struct __kernel_timespec *ts,
       ^
>> arch/powerpc/kernel/vdso32/vgettimeofday.c:13:5: warning: no previous prototype for function '__c_kernel_clock_gettime64' [-Wmissing-prototypes]
   int __c_kernel_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts,
       ^
   arch/powerpc/kernel/vdso32/vgettimeofday.c:13:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
   int __c_kernel_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts,
   ^
   static 
   arch/powerpc/kernel/vdso32/vgettimeofday.c:25:5: error: conflicting types for '__c_kernel_clock_getres'
   int __c_kernel_clock_getres(clockid_t clock_id, struct old_timespec32 *res,
       ^
   arch/powerpc/include/asm/vdso/gettimeofday.h:185:5: note: previous declaration is here
   int __c_kernel_clock_getres(clockid_t clock_id, struct __kernel_timespec *res,
       ^
   1 warning and 2 errors generated.

vim +/__c_kernel_clock_gettime64 +13 arch/powerpc/kernel/vdso32/vgettimeofday.c

    12	
  > 13	int __c_kernel_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts,
    14				       const struct vdso_data *vd)
    15	{
    16		return __cvdso_clock_gettime_data(vd, clock, ts);
    17	}
    18	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 31372 bytes --]

^ permalink raw reply

* Re: [PATCH 16/29] powerpc/rtas: dispatch partition migration requests to pseries
From: Michael Ellerman @ 2020-12-04 12:52 UTC (permalink / raw)
  To: Nathan Lynch, linuxppc-dev; +Cc: tyreld, ajd, mmc, cforno12, drt, brking
In-Reply-To: <20201030011805.1224603-17-nathanl@linux.ibm.com>

Nathan Lynch <nathanl@linux.ibm.com> writes:
> sys_rtas() cannot call ibm,suspend-me directly in the same way it
> handles other inputs. Instead it must dispatch the request to code
> that can first perform the H_JOIN sequence before any call to
> ibm,suspend-me can succeed. Over time kernel/rtas.c has accreted a fair
> amount of platform-specific code to implement this.
>
> Since a different, more robust implementation of the suspend sequence
> is now in the pseries platform code, we want to dispatch the request
> there while minimizing additional dependence on pseries.
>
> Use a weak function that only pseries overrides.

Over the years weak functions have caused their fair share of problems.

There are cases where they are the cleanest option, but for intra-arch
code like this I think and ifdef is much simpler.


> diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
> index fdefe6a974eb..be0fc2536673 100644
> --- a/arch/powerpc/include/asm/rtas.h
> +++ b/arch/powerpc/include/asm/rtas.h
> @@ -260,6 +260,7 @@ extern int rtas_suspend_cpu(struct rtas_suspend_me_data *data);
>  extern int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data);
>  int rtas_ibm_suspend_me_unsafe(u64 handle);
>  int rtas_ibm_suspend_me(int *fw_status);
> +int rtas_syscall_dispatch_ibm_suspend_me(u64 handle);

ie. we'd just do:

#ifdef CONFIG_PPC_PSERIES
int rtas_syscall_dispatch_ibm_suspend_me(u64 handle);
#else
int rtas_syscall_dispatch_ibm_suspend_me(u64 handle)
{
	return -EINVAL;
}
#endif


cheers

^ permalink raw reply

* Re: [PATCH 13/29] powerpc/pseries/mobility: use stop_machine for join/suspend
From: Michael Ellerman @ 2020-12-04 12:52 UTC (permalink / raw)
  To: Nathan Lynch, linuxppc-dev; +Cc: tyreld, ajd, mmc, cforno12, drt, brking
In-Reply-To: <20201030011805.1224603-14-nathanl@linux.ibm.com>

Hi Nathan,

Just a couple of nits ...

Nathan Lynch <nathanl@linux.ibm.com> writes:
> The partition suspend sequence as specified in the platform
> architecture requires that all active processor threads call
> H_JOIN, which:
...
> diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
> index 1b8ae221b98a..44ca7d4e143d 100644
> --- a/arch/powerpc/platforms/pseries/mobility.c
> +++ b/arch/powerpc/platforms/pseries/mobility.c
> @@ -412,6 +414,128 @@ static int wait_for_vasi_session_suspending(u64 handle)
...

> +
> +static int do_join(void *arg)
> +{
> +	atomic_t *counter = arg;
> +	long hvrc;
> +	int ret;
> +
> +	/* Must ensure MSR.EE off for H_JOIN. */
> +	hard_irq_disable();

Didn't stop_machine() already do that for us?

In the state machine in multi_cpu_stop().

> +	hvrc = plpar_hcall_norets(H_JOIN);
> +
> +	switch (hvrc) {
> +	case H_CONTINUE:
> +		/*
> +		 * All other CPUs are offline or in H_JOIN. This CPU
> +		 * attempts the suspend.
> +		 */
> +		ret = do_suspend();
> +		break;
> +	case H_SUCCESS:
> +		/*
> +		 * The suspend is complete and this cpu has received a
> +		 * prod.
> +		 */
> +		ret = 0;
> +		break;
> +	case H_BAD_MODE:
> +	case H_HARDWARE:
> +	default:
> +		ret = -EIO;
> +		pr_err_ratelimited("H_JOIN error %ld on CPU %i\n",
> +				   hvrc, smp_processor_id());
> +		break;
> +	}
> +
> +	if (atomic_inc_return(counter) == 1) {
> +		pr_info("CPU %u waking all threads\n", smp_processor_id());
> +		prod_others();
> +	}

Do we even need the counter? IIUC only one CPU receives H_CONTINUE. So
couldn't we just have that CPU do the prodding of others?

> +	/*
> +	 * Execution may have been suspended for several seconds, so
> +	 * reset the watchdog.
> +	 */
> +	touch_nmi_watchdog();
> +	return ret;
> +}
> +
> +static int pseries_migrate_partition(u64 handle)
> +{
> +	atomic_t counter = ATOMIC_INIT(0);
> +	int ret;
> +
> +	ret = wait_for_vasi_session_suspending(handle);
> +	if (ret)
> +		goto out;

Direct return would be clearer IMHO.

> +
> +	ret = stop_machine(do_join, &counter, cpu_online_mask);
> +	if (ret == 0)
> +		post_mobility_fixup();
> +out:
> +	return ret;
> +}
> +

cheers

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox