LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 18/20] block: refator submit_bio_noacct
From: Christoph Hellwig @ 2020-06-29 19:39 UTC (permalink / raw)
  To: Jens Axboe
  Cc: linux-bcache, linux-xtensa, linux-nvdimm, linux-s390, dm-devel,
	linux-nvme, linux-kernel, linux-raid, linux-m68k, linuxppc-dev,
	drbd-dev
In-Reply-To: <20200629193947.2705954-1-hch@lst.de>

Split out a __submit_bio_noacct helper for the actual de-recursion
algorithm, and simplify the loop by using a continue when we can't
enter the queue for a bio.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/blk-core.c | 131 +++++++++++++++++++++++++----------------------
 1 file changed, 71 insertions(+), 60 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 1caeb01e127768..b82f48c86e6f7a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1085,6 +1085,74 @@ static blk_qc_t do_make_request(struct bio *bio)
 	return ret;
 }
 
+/*
+ * The loop in this function may be a bit non-obvious, and so deserves some
+ * explanation:
+ *
+ *  - Before entering the loop, bio->bi_next is NULL (as all callers ensure
+ *    that), so we have a list with a single bio.
+ *  - We pretend that we have just taken it off a longer list, so we assign
+ *    bio_list to a pointer to the bio_list_on_stack, thus initialising the
+ *    bio_list of new bios to be added.  ->submit_bio() may indeed add some more
+ *    bios through a recursive call to submit_bio_noacct.  If it did, we find a
+ *    non-NULL value in bio_list and re-enter the loop from the top.
+ *  - In this case we really did just take the bio of the top of the list (no
+ *    pretending) and so remove it from bio_list, and call into ->submit_bio()
+ *    again.
+ *
+ * bio_list_on_stack[0] contains bios submitted by the current ->submit_bio.
+ * bio_list_on_stack[1] contains bios that were submitted before the current
+ *	->submit_bio_bio, but that haven't been processed yet.
+ */
+static blk_qc_t __submit_bio_noacct(struct bio *bio)
+{
+	struct bio_list bio_list_on_stack[2];
+	blk_qc_t ret = BLK_QC_T_NONE;
+
+	BUG_ON(bio->bi_next);
+
+	bio_list_init(&bio_list_on_stack[0]);
+	current->bio_list = bio_list_on_stack;
+
+	do {
+		struct request_queue *q = bio->bi_disk->queue;
+		struct bio_list lower, same;
+
+		if (unlikely(bio_queue_enter(bio) != 0))
+			continue;
+
+		/*
+		 * Create a fresh bio_list for all subordinate requests.
+		 */
+		bio_list_on_stack[1] = bio_list_on_stack[0];
+		bio_list_init(&bio_list_on_stack[0]);
+
+		ret = do_make_request(bio);
+
+		/*
+		 * Sort new bios into those for a lower level and those for the
+		 * same level.
+		 */
+		bio_list_init(&lower);
+		bio_list_init(&same);
+		while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL)
+			if (q == bio->bi_disk->queue)
+				bio_list_add(&same, bio);
+			else
+				bio_list_add(&lower, bio);
+
+		/*
+		 * Now assemble so we handle the lowest level first.
+		 */
+		bio_list_merge(&bio_list_on_stack[0], &lower);
+		bio_list_merge(&bio_list_on_stack[0], &same);
+		bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
+	} while ((bio = bio_list_pop(&bio_list_on_stack[0])));
+
+	current->bio_list = NULL;
+	return ret;
+}
+
 /**
  * submit_bio_noacct - re-submit a bio to the block device layer for I/O
  * @bio:  The bio describing the location in memory and on the device.
@@ -1096,17 +1164,8 @@ static blk_qc_t do_make_request(struct bio *bio)
  */
 blk_qc_t submit_bio_noacct(struct bio *bio)
 {
-	/*
-	 * bio_list_on_stack[0] contains bios submitted by the current
-	 * ->submit_bio.
-	 * bio_list_on_stack[1] contains bios that were submitted before the
-	 * current ->submit_bio_bio, but that haven't been processed yet.
-	 */
-	struct bio_list bio_list_on_stack[2];
-	blk_qc_t ret = BLK_QC_T_NONE;
-
 	if (!submit_bio_checks(bio))
-		goto out;
+		return BLK_QC_T_NONE;
 
 	/*
 	 * We only want one ->submit_bio to be active at a time, else
@@ -1120,58 +1179,10 @@ blk_qc_t submit_bio_noacct(struct bio *bio)
 	 */
 	if (current->bio_list) {
 		bio_list_add(&current->bio_list[0], bio);
-		goto out;
+		return BLK_QC_T_NONE;
 	}
 
-	/* following loop may be a bit non-obvious, and so deserves some
-	 * explanation.
-	 * Before entering the loop, bio->bi_next is NULL (as all callers
-	 * ensure that) so we have a list with a single bio.
-	 * We pretend that we have just taken it off a longer list, so
-	 * we assign bio_list to a pointer to the bio_list_on_stack,
-	 * thus initialising the bio_list of new bios to be
-	 * added.  ->submit_bio() may indeed add some more bios
-	 * through a recursive call to submit_bio_noacct.  If it
-	 * did, we find a non-NULL value in bio_list and re-enter the loop
-	 * from the top.  In this case we really did just take the bio
-	 * of the top of the list (no pretending) and so remove it from
-	 * bio_list, and call into ->submit_bio() again.
-	 */
-	BUG_ON(bio->bi_next);
-	bio_list_init(&bio_list_on_stack[0]);
-	current->bio_list = bio_list_on_stack;
-	do {
-		struct request_queue *q = bio->bi_disk->queue;
-
-		if (likely(bio_queue_enter(bio) == 0)) {
-			struct bio_list lower, same;
-
-			/* Create a fresh bio_list for all subordinate requests */
-			bio_list_on_stack[1] = bio_list_on_stack[0];
-			bio_list_init(&bio_list_on_stack[0]);
-			ret = do_make_request(bio);
-
-			/* sort new bios into those for a lower level
-			 * and those for the same level
-			 */
-			bio_list_init(&lower);
-			bio_list_init(&same);
-			while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL)
-				if (q == bio->bi_disk->queue)
-					bio_list_add(&same, bio);
-				else
-					bio_list_add(&lower, bio);
-			/* now assemble so we handle the lowest level first */
-			bio_list_merge(&bio_list_on_stack[0], &lower);
-			bio_list_merge(&bio_list_on_stack[0], &same);
-			bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
-		}
-		bio = bio_list_pop(&bio_list_on_stack[0]);
-	} while (bio);
-	current->bio_list = NULL; /* deactivate */
-
-out:
-	return ret;
+	return __submit_bio_noacct(bio);
 }
 EXPORT_SYMBOL(submit_bio_noacct);
 
-- 
2.26.2


^ permalink raw reply related

* [PATCH 19/20] block: shortcut __submit_bio_noacct for blk-mq drivers
From: Christoph Hellwig @ 2020-06-29 19:39 UTC (permalink / raw)
  To: Jens Axboe
  Cc: linux-bcache, linux-xtensa, linux-nvdimm, linux-s390, dm-devel,
	linux-nvme, linux-kernel, linux-raid, linux-m68k, linuxppc-dev,
	drbd-dev
In-Reply-To: <20200629193947.2705954-1-hch@lst.de>

For blk-mq drivers bios can only be inserted for the same queue.  So
bypass the complicated sorting logic in __submit_bio_noacct with
a blk-mq simpler submission helper.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/blk-core.c | 50 +++++++++++++++++++++++++++++++++---------------
 1 file changed, 35 insertions(+), 15 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index b82f48c86e6f7a..46e3c0a37cc377 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1071,20 +1071,6 @@ static noinline_for_stack bool submit_bio_checks(struct bio *bio)
 	return false;
 }
 
-static blk_qc_t do_make_request(struct bio *bio)
-{
-	struct gendisk *disk = bio->bi_disk;
-	blk_qc_t ret = BLK_QC_T_NONE;
-
-	if (blk_crypto_bio_prep(&bio)) {
-		if (!disk->fops->submit_bio)
-			return blk_mq_submit_bio(bio);
-		ret = disk->fops->submit_bio(bio);
-	}
-	blk_queue_exit(disk->queue);
-	return ret;
-}
-
 /*
  * The loop in this function may be a bit non-obvious, and so deserves some
  * explanation:
@@ -1127,7 +1113,11 @@ static blk_qc_t __submit_bio_noacct(struct bio *bio)
 		bio_list_on_stack[1] = bio_list_on_stack[0];
 		bio_list_init(&bio_list_on_stack[0]);
 
-		ret = do_make_request(bio);
+		if (blk_crypto_bio_prep(&bio))
+			ret = bio->bi_disk->fops->submit_bio(bio);
+		else
+			ret = BLK_QC_T_NONE;
+		blk_queue_exit(q);
 
 		/*
 		 * Sort new bios into those for a lower level and those for the
@@ -1153,6 +1143,34 @@ static blk_qc_t __submit_bio_noacct(struct bio *bio)
 	return ret;
 }
 
+static blk_qc_t __submit_bio_noacct_mq(struct bio *bio)
+{
+	struct gendisk *disk = bio->bi_disk;
+	struct bio_list bio_list;
+	blk_qc_t ret = BLK_QC_T_NONE;
+
+	bio_list_init(&bio_list);
+	current->bio_list = &bio_list;
+
+	do {
+		WARN_ON_ONCE(bio->bi_disk != disk);
+
+		if (unlikely(bio_queue_enter(bio) != 0))
+			continue;
+
+		if (!blk_crypto_bio_prep(&bio)) {
+			blk_queue_exit(disk->queue);
+			ret = BLK_QC_T_NONE;
+			continue;
+		}
+
+		ret = blk_mq_submit_bio(bio);
+	} while ((bio = bio_list_pop(&bio_list)));
+
+	current->bio_list = NULL;
+	return ret;
+}
+
 /**
  * submit_bio_noacct - re-submit a bio to the block device layer for I/O
  * @bio:  The bio describing the location in memory and on the device.
@@ -1182,6 +1200,8 @@ blk_qc_t submit_bio_noacct(struct bio *bio)
 		return BLK_QC_T_NONE;
 	}
 
+	if (bio->bi_disk->queue->mq_ops)
+		return __submit_bio_noacct_mq(bio);
 	return __submit_bio_noacct(bio);
 }
 EXPORT_SYMBOL(submit_bio_noacct);
-- 
2.26.2


^ permalink raw reply related

* [PATCH 20/20] block: remove direct_make_request
From: Christoph Hellwig @ 2020-06-29 19:39 UTC (permalink / raw)
  To: Jens Axboe
  Cc: linux-bcache, linux-xtensa, linux-nvdimm, linux-s390, dm-devel,
	linux-nvme, linux-kernel, linux-raid, linux-m68k, linuxppc-dev,
	drbd-dev
In-Reply-To: <20200629193947.2705954-1-hch@lst.de>

Now that submit_bio_noacct has a decent blk-mq fast path there is no
more need for this bypass.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/blk-core.c              | 28 ----------------------------
 drivers/md/dm.c               |  5 +----
 drivers/nvme/host/multipath.c |  2 +-
 include/linux/blkdev.h        |  1 -
 4 files changed, 2 insertions(+), 34 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 46e3c0a37cc377..f127d83c4fafa5 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1206,34 +1206,6 @@ blk_qc_t submit_bio_noacct(struct bio *bio)
 }
 EXPORT_SYMBOL(submit_bio_noacct);
 
-/**
- * direct_make_request - hand a buffer directly to its device driver for I/O
- * @bio:  The bio describing the location in memory and on the device.
- *
- * This function behaves like submit_bio_noacct(), but does not protect
- * against recursion.  Must only be used if the called driver is known
- * to be blk-mq based.
- */
-blk_qc_t direct_make_request(struct bio *bio)
-{
-	struct gendisk *disk = bio->bi_disk;
-
-	if (WARN_ON_ONCE(!disk->queue->mq_ops)) {
-		bio_io_error(bio);
-		return BLK_QC_T_NONE;
-	}
-	if (!submit_bio_checks(bio))
-		return BLK_QC_T_NONE;
-	if (unlikely(bio_queue_enter(bio)))
-		return BLK_QC_T_NONE;
-	if (!blk_crypto_bio_prep(&bio)) {
-		blk_queue_exit(disk->queue);
-		return BLK_QC_T_NONE;
-	}
-	return blk_mq_submit_bio(bio);
-}
-EXPORT_SYMBOL_GPL(direct_make_request);
-
 /**
  * submit_bio - submit a bio to the block device layer for I/O
  * @bio: The &struct bio which describes the I/O
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index b32b539dbace56..2cb33896198c4c 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1302,10 +1302,7 @@ static blk_qc_t __map_bio(struct dm_target_io *tio)
 		/* the bio has been remapped so dispatch it */
 		trace_block_bio_remap(clone->bi_disk->queue, clone,
 				      bio_dev(io->orig_bio), sector);
-		if (md->type == DM_TYPE_NVME_BIO_BASED)
-			ret = direct_make_request(clone);
-		else
-			ret = submit_bio_noacct(clone);
+		ret = submit_bio_noacct(clone);
 		break;
 	case DM_MAPIO_KILL:
 		free_tio(tio);
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index f07fa47c251d9d..a986ac52c4cc7f 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -314,7 +314,7 @@ blk_qc_t nvme_ns_head_submit_bio(struct bio *bio)
 		trace_block_bio_remap(bio->bi_disk->queue, bio,
 				      disk_devt(ns->head->disk),
 				      bio->bi_iter.bi_sector);
-		ret = direct_make_request(bio);
+		ret = submit_bio_noacct(bio);
 	} else if (nvme_available_path(head)) {
 		dev_warn_ratelimited(dev, "no usable path - requeuing I/O\n");
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b73cfa6a5141df..1cc913ffdbe21e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -853,7 +853,6 @@ static inline void rq_flush_dcache_pages(struct request *rq)
 extern int blk_register_queue(struct gendisk *disk);
 extern void blk_unregister_queue(struct gendisk *disk);
 blk_qc_t submit_bio_noacct(struct bio *bio);
-extern blk_qc_t direct_make_request(struct bio *bio);
 extern void blk_rq_init(struct request_queue *q, struct request *rq);
 extern void blk_put_request(struct request *);
 extern struct request *blk_get_request(struct request_queue *, unsigned int op,
-- 
2.26.2


^ permalink raw reply related

* Re: [PATCH] ASoC: fsl_asrc: Add an option to select internal ratio mode
From: Nicolin Chen @ 2020-06-29 20:08 UTC (permalink / raw)
  To: Shengjiu Wang
  Cc: alsa-devel, timur, Xiubo.Lee, linuxppc-dev, tiwai, lgirdwood,
	perex, broonie, festevam, linux-kernel
In-Reply-To: <1593439115-19282-1-git-send-email-shengjiu.wang@nxp.com>

On Mon, Jun 29, 2020 at 09:58:35PM +0800, Shengjiu Wang wrote:
> The ASRC not only supports ideal ratio mode, but also supports
> internal ratio mode.
> 
> For internal rato mode, the rate of clock source should be divided
> with no remainder by sample rate, otherwise there is sound
> distortion.
> 
> Add function fsl_asrc_select_clk() to find proper clock source for
> internal ratio mode, if the clock source is available then internal
> ratio mode will be selected.
> 
> With change, the ideal ratio mode is not the only option for user.
> 
> Signed-off-by: Shengjiu Wang <shengjiu.wang@nxp.com>
> ---

> +static int fsl_asrc_select_clk(struct fsl_asrc_priv *asrc_priv,
> +			       struct fsl_asrc_pair *pair,
> +			       int in_rate,
> +			       int out_rate)
> +{
> +	struct fsl_asrc_pair_priv *pair_priv = pair->private;
> +	struct asrc_config *config = pair_priv->config;
> +	int rate[2], select_clk[2]; /* Array size 2 means IN and OUT */
> +	int clk_rate, clk_index;
> +	int i = 0, j = 0;
> +	bool clk_sel[2];
> +
> +	rate[0] = in_rate;
> +	rate[1] = out_rate;
> +
> +	/* Select proper clock source for internal ratio mode */
> +	for (j = 0; j < 2; j++) {
> +		for (i = 0; i < ASRC_CLK_MAP_LEN; i++) {
> +			clk_index = asrc_priv->clk_map[j][i];
> +			clk_rate = clk_get_rate(asrc_priv->asrck_clk[clk_index]);

+			/* Only match a perfect clock source with no remainder */

> +			if (clk_rate != 0 && (clk_rate / rate[j]) <= 1024 &&
> +			    (clk_rate % rate[j]) == 0)
> +				break;
> +		}
> +
> +		if (i == ASRC_CLK_MAP_LEN) {
> +			select_clk[j] = OUTCLK_ASRCK1_CLK;
> +			clk_sel[j] = false;
> +		} else {
> +			select_clk[j] = i;
> +			clk_sel[j] = true;
> +		}
> +	}
> +
> +	/* Switch to ideal ratio mode if there is no proper clock source */
> +	if (!clk_sel[IN] || !clk_sel[OUT])
> +		select_clk[IN] = INCLK_NONE;

Could get rid of clk_set:

	for (j) {
		for (i) {
			if (match)
				break;
		}

		clk[j] = i;
	}

	if (clk[IN] == ASRC_CLK_MAP_LEN || clk[OUT] == ASRC_CLK_MAP_LEN)

And it only overrides clk[IN] setting but leaving clk[OUT] to
to the searching result. This means that clk[OUT] may be using
a clock source other than OUTCLK_ASRCK1_CLK if sel[IN] happens
to be false while sel[OUT] happens to be true. Not sure if it
is intended...but I feel it would probably be safer to use the
previous settings: INCLK_NONE + OUTCLK_ASRCK1_CLK?

^ permalink raw reply

* Re: [PATCH v6 4/8] libnvdimm/nvdimm/flush: Allow architecture to override the flush barrier
From: Aneesh Kumar K.V @ 2020-06-29 20:27 UTC (permalink / raw)
  To: kernel test robot, linuxppc-dev, mpe, linux-nvdimm,
	dan.j.williams
  Cc: Jan Kara, Jeff Moyer, msuchanek, kbuild-all, oohall
In-Reply-To: <202006300210.ADlNY4uw%lkp@intel.com>

kernel test robot <lkp@intel.com> writes:

> Hi "Aneesh,
>
> I love your patch! Yet something to improve:
>
> [auto build test ERROR on powerpc/next]
> [also build test ERROR on linux-nvdimm/libnvdimm-for-next v5.8-rc3 next-20200629]
> [cannot apply to scottwood/next]
> [If your patch is applied to the wrong git tree, kindly drop us a note.
> And when submitting patch, we suggest to use  as documented in
> https://git-scm.com/docs/git-format-patch]
>
> url:    https://github.com/0day-ci/linux/commits/Aneesh-Kumar-K-V/Support-new-pmem-flush-and-sync-instructions-for-POWER/20200629-223649
> base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
> config: arc-allyesconfig (attached as .config)
> compiler: arc-elf-gcc (GCC) 9.3.0
> reproduce (this is a W=1 build):
>         wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
>         chmod +x ~/bin/make.cross
>         # save the attached .config to linux build tree
>         COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=arc 
>
> If you fix the issue, kindly add following tag as appropriate
> Reported-by: kernel test robot <lkp@intel.com>
>
> All errors (new ones prefixed by >>):
>
>    drivers/nvdimm/region_devs.c: In function 'generic_nvdimm_flush':
>>> drivers/nvdimm/region_devs.c:1215:2: error: implicit declaration of function 'arch_pmem_flush_barrier' [-Werror=implicit-function-declaration]
>     1215 |  arch_pmem_flush_barrier();
>          |  ^~~~~~~~~~~~~~~~~~~~~~~
>    cc1: some warnings being treated as errors

Ok let's move the back to include/linux/libnvdimm.h. Not all arch
include asm-generic/cacheflush.h

-aneesh

^ permalink raw reply

* [PATCH updated] libnvdimm/nvdimm/flush: Allow architecture to override the flush barrier
From: Aneesh Kumar K.V @ 2020-06-29 20:29 UTC (permalink / raw)
  To: linuxppc-dev, mpe, linux-nvdimm, dan.j.williams
  Cc: Jan Kara, Jeff Moyer, msuchanek, oohall, Aneesh Kumar K.V
In-Reply-To: <20200629135722.73558-5-aneesh.kumar@linux.ibm.com>

Architectures like ppc64 provide persistent memory specific barriers
that will ensure that all stores for which the modifications are
written to persistent storage by preceding dcbfps and dcbstps
instructions have updated persistent storage before any data
access or data transfer caused by subsequent instructions is initiated.
This is in addition to the ordering done by wmb()

Update nvdimm core such that architecture can use barriers other than
wmb to ensure all previous writes are architecturally visible for
the platform buffer flush.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 drivers/md/dm-writecache.c   | 2 +-
 drivers/nvdimm/region_devs.c | 8 ++++----
 include/linux/libnvdimm.h    | 4 ++++
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 74f3c506f084..8c6b6dce64e2 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -536,7 +536,7 @@ static void ssd_commit_superblock(struct dm_writecache *wc)
 static void writecache_commit_flushed(struct dm_writecache *wc, bool wait_for_ios)
 {
 	if (WC_MODE_PMEM(wc))
-		wmb();
+		arch_pmem_flush_barrier();
 	else
 		ssd_commit_flushed(wc, wait_for_ios);
 }
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index 4502f9c4708d..b308ad09b63d 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -1206,13 +1206,13 @@ int generic_nvdimm_flush(struct nd_region *nd_region)
 	idx = this_cpu_add_return(flush_idx, hash_32(current->pid + idx, 8));
 
 	/*
-	 * The first wmb() is needed to 'sfence' all previous writes
-	 * such that they are architecturally visible for the platform
-	 * buffer flush.  Note that we've already arranged for pmem
+	 * The first arch_pmem_flush_barrier() is needed to 'sfence' all
+	 * previous writes such that they are architecturally visible for
+	 * the platform buffer flush. Note that we've already arranged for pmem
 	 * writes to avoid the cache via memcpy_flushcache().  The final
 	 * wmb() ensures ordering for the NVDIMM flush write.
 	 */
-	wmb();
+	arch_pmem_flush_barrier();
 	for (i = 0; i < nd_region->ndr_mappings; i++)
 		if (ndrd_get_flush_wpq(ndrd, i, 0))
 			writeq(1, ndrd_get_flush_wpq(ndrd, i, idx));
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 18da4059be09..66f6c65bd789 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -286,4 +286,8 @@ static inline void arch_invalidate_pmem(void *addr, size_t size)
 }
 #endif
 
+#ifndef arch_pmem_flush_barrier
+#define arch_pmem_flush_barrier() wmb()
+#endif
+
 #endif /* __LIBNVDIMM_H__ */
-- 
2.26.2


^ permalink raw reply related

* Re: [PATCH v6 6/8] powerpc/pmem: Avoid the barrier in flush routines
From: Aneesh Kumar K.V @ 2020-06-29 20:40 UTC (permalink / raw)
  To: Michal Suchánek
  Cc: Jan Kara, linux-nvdimm, Jeff Moyer, oohall, dan.j.williams,
	linuxppc-dev
In-Reply-To: <20200629160940.GU21462@kitsune.suse.cz>

Michal Suchánek <msuchanek@suse.de> writes:

> Hello,
>
> On Mon, Jun 29, 2020 at 07:27:20PM +0530, Aneesh Kumar K.V wrote:
>> nvdimm expect the flush routines to just mark the cache clean. The barrier
>> that mark the store globally visible is done in nvdimm_flush().
>> 
>> Update the papr_scm driver to a simplified nvdim_flush callback that do
>> only the required barrier.
>> 
>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
>> ---
>>  arch/powerpc/lib/pmem.c                   |  6 ------
>>  arch/powerpc/platforms/pseries/papr_scm.c | 13 +++++++++++++
>>  2 files changed, 13 insertions(+), 6 deletions(-)
>> 
>> diff --git a/arch/powerpc/lib/pmem.c b/arch/powerpc/lib/pmem.c
>> index 5a61aaeb6930..21210fa676e5 100644
>> --- a/arch/powerpc/lib/pmem.c
>> +++ b/arch/powerpc/lib/pmem.c
>> @@ -19,9 +19,6 @@ static inline void __clean_pmem_range(unsigned long start, unsigned long stop)
>>  
>>  	for (i = 0; i < size >> shift; i++, addr += bytes)
>>  		asm volatile(PPC_DCBSTPS(%0, %1): :"i"(0), "r"(addr): "memory");
>> -
>> -
>> -	asm volatile(PPC_PHWSYNC ::: "memory");
>>  }
>>  
>>  static inline void __flush_pmem_range(unsigned long start, unsigned long stop)
>> @@ -34,9 +31,6 @@ static inline void __flush_pmem_range(unsigned long start, unsigned long stop)
>>  
>>  	for (i = 0; i < size >> shift; i++, addr += bytes)
>>  		asm volatile(PPC_DCBFPS(%0, %1): :"i"(0), "r"(addr): "memory");
>> -
>> -
>> -	asm volatile(PPC_PHWSYNC ::: "memory");
>>  }
>>  
>>  static inline void clean_pmem_range(unsigned long start, unsigned long stop)
>> diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
>> index 9c569078a09f..9a9a0766f8b6 100644
>> --- a/arch/powerpc/platforms/pseries/papr_scm.c
>> +++ b/arch/powerpc/platforms/pseries/papr_scm.c
>> @@ -630,6 +630,18 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc,
>>  
>>  	return 0;
>>  }
>> +/*
>> + * We have made sure the pmem writes are done such that before calling this
>> + * all the caches are flushed/clean. We use dcbf/dcbfps to ensure this. Here
>> + * we just need to add the necessary barrier to make sure the above flushes
>> + * are have updated persistent storage before any data access or data transfer
>> + * caused by subsequent instructions is initiated.
>> + */
>> +static int papr_scm_flush_sync(struct nd_region *nd_region, struct bio *bio)
>> +{
>> +	arch_pmem_flush_barrier();
>> +	return 0;
>> +}
>>  
>>  static ssize_t flags_show(struct device *dev,
>>  			  struct device_attribute *attr, char *buf)
>> @@ -743,6 +755,7 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
>>  	ndr_desc.mapping = &mapping;
>>  	ndr_desc.num_mappings = 1;
>>  	ndr_desc.nd_set = &p->nd_set;
>> +	ndr_desc.flush = papr_scm_flush_sync;
>
> AFAICT currently the only device that implements flush is virtio_pmem.
> How does the nfit driver get away without implementing flush?

generic_nvdimm_flush does the required barrier for nfit. The reason for
adding ndr_desc.flush call back for papr_scm was to avoid the usage
of iomem based deep flushing (ndr_region_data.flush_wpq) which is not
supported by papr_scm.

BTW we do return NULL for ndrd_get_flush_wpq() on power. So the upstream
code also does the same thing, but in a different way.


> Also the flush takes arguments that are completely unused but a user of
> the pmem region must assume they are used, and call flush() on the
> region rather than arch_pmem_flush_barrier() directly.

The bio argument can help a pmem driver to do range based flushing in
case of pmem_make_request. If bio is null then we must assume a full
device flush. 

>This may not
> work well with md as discussed with earlier iteration of the patchest.
>

dm-writecache needs some major changes to work with asynchronous pmem
devices. 

-aneesh

^ permalink raw reply

* Re: [PATCH 01/20] nfblock: stop using ->queuedata
From: Geert Uytterhoeven @ 2020-06-29 21:47 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jens Axboe, open list:TENSILICA XTENSA PORT (xtensa),
	linux-nvdimm@lists.01.org, linux-s390, linux-m68k, linux-nvme,
	Linux Kernel Mailing List, linux-raid, dm-devel, linux-bcache,
	linuxppc-dev, Lars Ellenberg
In-Reply-To: <20200629193947.2705954-2-hch@lst.de>

On Mon, Jun 29, 2020 at 9:40 PM Christoph Hellwig <hch@lst.de> wrote:
> Instead of setting up the queuedata as well just use one private data
> field.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Reviewed-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>

Gr{oetje,eeting}s,

                        Geert

-- 
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds

^ permalink raw reply

* Re: [PATCH 0/8] mm: cleanup usage of <asm/pgalloc.h>
From: Pekka Enberg @ 2020-06-29 14:01 UTC (permalink / raw)
  To: Mike Rapoport
  Cc: ia64, linux-sh, Peter Zijlstra, linux-mips, Max Filippov,
	Satheesh Rajendran, linux-csky, sparclinux, linux-riscv,
	list@ebiederm.org:DOCUMENTATION <linux-doc@vger.kernel.org>, list@ebiederm.org:MEMORY MANAGEMENT <linux-mm@kvack.org>, ,
	Stephen Rothwell, linux-hexagon, Joerg Roedel, Mike Rapoport,
	Abdul Haleem, linux-snps-arc, linux-xtensa, Arnd Bergmann,
	linux-s390, linux-um, Steven Rostedt, linux-m68k, openrisc,
	Andy Lutomirski, Stafford Horne, linux-arm-kernel, linux-parisc,
	linux-mm@kvack.org, LKML, linux-alpha, Andrew Morton,
	linuxppc-dev
In-Reply-To: <20200627143453.31835-1-rppt@kernel.org>

On Sat, Jun 27, 2020 at 5:35 PM Mike Rapoport <rppt@kernel.org> wrote:
> Most architectures have very similar versions of pXd_alloc_one() and
> pXd_free_one() for intermediate levels of page table.
> These patches add generic versions of these functions in
> <asm-generic/pgalloc.h> and enable use of the generic functions where
> appropriate.

Very nice cleanup series to the page table code!

FWIW:

Reviewed-by: Pekka Enberg <penberg@kernel.org>

^ permalink raw reply

* [PATCH 1/1] MAINTAINERS: Remove self
From: Sam Bobroff @ 2020-06-29 22:50 UTC (permalink / raw)
  To: linuxppc-dev

I'm sorry to say I can no longer maintain this position.

Signed-off-by: Sam Bobroff <sbobroff@linux.ibm.com>
---
 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 496fd4eafb68..7e954e4a29e1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13187,7 +13187,6 @@ F:	tools/pci/
 
 PCI ENHANCED ERROR HANDLING (EEH) FOR POWERPC
 M:	Russell Currey <ruscur@russell.cc>
-M:	Sam Bobroff <sbobroff@linux.ibm.com>
 M:	Oliver O'Halloran <oohall@gmail.com>
 L:	linuxppc-dev@lists.ozlabs.org
 S:	Supported
-- 
2.22.0.216.g00a2a96fc9


^ permalink raw reply related

* [PATCH] xmon: Reset RCU and soft lockup watchdogs
From: Anton Blanchard @ 2020-06-30  0:02 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Paul Mackerras, Nicholas Piggin

I'm seeing RCU warnings when exiting xmon. xmon resets the NMI watchdog,
but does nothing with the RCU stall or soft lockup watchdogs. Add a
helper function that handles all three.

Signed-off-by: Anton Blanchard <anton@ozlabs.org>
---
 arch/powerpc/xmon/xmon.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 7efe4bc3ccf6..d27944e38b04 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -481,6 +481,13 @@ static inline int unrecoverable_excp(struct pt_regs *regs)
 #endif
 }
 
+static void xmon_touch_watchdogs(void)
+{
+	touch_softlockup_watchdog_sync();
+	rcu_cpu_stall_reset();
+	touch_nmi_watchdog();
+}
+
 static int xmon_core(struct pt_regs *regs, int fromipi)
 {
 	int cmd = 0;
@@ -718,7 +725,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
 	else
 		insert_cpu_bpts();
 
-	touch_nmi_watchdog();
+	xmon_touch_watchdogs();
 	local_irq_restore(flags);
 
 	return cmd != 'X' && cmd != EOF;
-- 
2.26.2


^ permalink raw reply related

* Re: [PATCH] powerpc: Warn about use of smt_snooze_delay
From: Joel Stanley @ 2020-06-30  1:05 UTC (permalink / raw)
  To: Gautham R . Shenoy; +Cc: linuxppc-dev, ego
In-Reply-To: <20200629104248.GD20062@in.ibm.com>

On Mon, 29 Jun 2020 at 10:42, Gautham R Shenoy <ego@linux.vnet.ibm.com> wrote:
>
> On Thu, Jun 25, 2020 at 07:33:49PM +0930, Joel Stanley wrote:
> > It's not done anything for a long time. Save the percpu variable, and
> > emit a warning to remind users to not expect it to do anything.
> >
> > Signed-off-by: Joel Stanley <joel@jms.id.au>
>
> The only known user of "smt_snooze_delay" is the "ppc64_cpu" which
> uses the presence of this file to assume that the system is SMT
> capable.
>
> Since we have "/sys/devices/system/cpu/smt/" these days, perhaps the
> userspace utility can use that and we can get rid of the file
> altogether ?

I've sent a change to the userspace tool to stop using the file. It
now uses the device tree parsing that was already present to determine
the smt state.

 https://github.com/ibm-power-utilities/powerpc-utils/pull/43

We will want to wait for the userspace tool to propagate through a
release and to distros before we remove the file all together. I agree
it should be removed in the future.

I've got of this patch v2 that changes the message to be:

         pr_warn_ratelimited("%s (%d) used unsupported
smt_snooze_delay, this has no effect\n",
                            current->comm, current->pid);

I'll send that out today.

Cheers,

Joel

>
> FWIW,
> Acked-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
> > ---
> >  arch/powerpc/kernel/sysfs.c | 41 +++++++++++++------------------------
> >  1 file changed, 14 insertions(+), 27 deletions(-)
> >
> > diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
> > index 571b3259697e..530ae92bc46d 100644
> > --- a/arch/powerpc/kernel/sysfs.c
> > +++ b/arch/powerpc/kernel/sysfs.c
> > @@ -32,29 +32,25 @@
> >
> >  static DEFINE_PER_CPU(struct cpu, cpu_devices);
> >
> > -/*
> > - * SMT snooze delay stuff, 64-bit only for now
> > - */
> > -
> >  #ifdef CONFIG_PPC64
> >
> > -/* Time in microseconds we delay before sleeping in the idle loop */
> > -static DEFINE_PER_CPU(long, smt_snooze_delay) = { 100 };
> > +/*
> > + * Snooze delay has not been hooked up since 3fa8cad82b94 ("powerpc/pseries/cpuidle:
> > + * smt-snooze-delay cleanup.") and has been broken even longer. As was foretold in
> > + * 2014:
> > + *
> > + *  "ppc64_util currently utilises it. Once we fix ppc64_util, propose to clean
> > + *  up the kernel code."
> > + *
> > + * At some point in the future this code should be removed.
> > + */
> >
> >  static ssize_t store_smt_snooze_delay(struct device *dev,
> >                                     struct device_attribute *attr,
> >                                     const char *buf,
> >                                     size_t count)
> >  {
> > -     struct cpu *cpu = container_of(dev, struct cpu, dev);
> > -     ssize_t ret;
> > -     long snooze;
> > -
> > -     ret = sscanf(buf, "%ld", &snooze);
> > -     if (ret != 1)
> > -             return -EINVAL;
> > -
> > -     per_cpu(smt_snooze_delay, cpu->dev.id) = snooze;
> > +     WARN_ON_ONCE("smt_snooze_delay sysfs file has no effect\n");
> >       return count;
> >  }
> >
> > @@ -62,9 +58,9 @@ static ssize_t show_smt_snooze_delay(struct device *dev,
> >                                    struct device_attribute *attr,
> >                                    char *buf)
> >  {
> > -     struct cpu *cpu = container_of(dev, struct cpu, dev);
> > +     WARN_ON_ONCE("smt_snooze_delay sysfs file has no effect\n");
> >
> > -     return sprintf(buf, "%ld\n", per_cpu(smt_snooze_delay, cpu->dev.id));
> > +     return sprintf(buf, "100\n");
> >  }
> >
> >  static DEVICE_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay,
> > @@ -72,16 +68,7 @@ static DEVICE_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay,
> >
> >  static int __init setup_smt_snooze_delay(char *str)
> >  {
> > -     unsigned int cpu;
> > -     long snooze;
> > -
> > -     if (!cpu_has_feature(CPU_FTR_SMT))
> > -             return 1;
> > -
> > -     snooze = simple_strtol(str, NULL, 10);
> > -     for_each_possible_cpu(cpu)
> > -             per_cpu(smt_snooze_delay, cpu) = snooze;
> > -
> > +     WARN_ON_ONCE("smt-snooze-delay command line option has no effect\n");
> >       return 1;
> >  }
> >  __setup("smt-snooze-delay=", setup_smt_snooze_delay);
> > --
> > 2.27.0
> >

^ permalink raw reply

* Re: [PATCH v2] powerpc/uaccess: Use flexible addressing with __put_user()/__get_user()
From: Michael Ellerman @ 2020-06-30  1:19 UTC (permalink / raw)
  To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras, npiggin,
	segher
  Cc: linuxppc-dev, linux-kernel
In-Reply-To: <878sg6862r.fsf@mpe.ellerman.id.au>

Michael Ellerman <mpe@ellerman.id.au> writes:
> Christophe Leroy <christophe.leroy@csgroup.eu> writes:
>> Hi Michael,
>>
>> I see this patch is marked as "defered" in patchwork, but I can't see 
>> any related discussion. Is it normal ?
>
> Because it uses the "m<>" constraint which didn't work on GCC 4.6.
>
> https://github.com/linuxppc/issues/issues/297
>
> So we should be able to pick it up for v5.9 hopefully.

It seems to break the build with the kernel.org 4.9.4 compiler and
corenet64_smp_defconfig:

+ make -s CC=powerpc64-linux-gnu-gcc -j 160
In file included from /linux/include/linux/uaccess.h:11:0,
                 from /linux/include/linux/sched/task.h:11,
                 from /linux/include/linux/sched/signal.h:9,
                 from /linux/include/linux/rcuwait.h:6,
                 from /linux/include/linux/percpu-rwsem.h:7,
                 from /linux/include/linux/fs.h:33,
                 from /linux/include/linux/huge_mm.h:8,
                 from /linux/include/linux/mm.h:675,
                 from /linux/arch/powerpc/kernel/signal_32.c:17:
/linux/arch/powerpc/kernel/signal_32.c: In function 'save_user_regs.isra.14.constprop':
/linux/arch/powerpc/include/asm/uaccess.h:161:2: error: 'asm' operand has impossible constraints
  __asm__ __volatile__(     \
  ^
/linux/arch/powerpc/include/asm/uaccess.h:197:12: note: in expansion of macro '__put_user_asm'
    case 4: __put_user_asm(x, ptr, retval, "stw"); break; \
            ^
/linux/arch/powerpc/include/asm/uaccess.h:206:2: note: in expansion of macro '__put_user_size_allowed'
  __put_user_size_allowed(x, ptr, size, retval);  \
  ^
/linux/arch/powerpc/include/asm/uaccess.h:220:2: note: in expansion of macro '__put_user_size'
  __put_user_size(__pu_val, __pu_addr, __pu_size, __pu_err); \
  ^
/linux/arch/powerpc/include/asm/uaccess.h:96:2: note: in expansion of macro '__put_user_nocheck'
  __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
  ^
/linux/arch/powerpc/kernel/signal_32.c:120:7: note: in expansion of macro '__put_user'
   if (__put_user((unsigned int)gregs[i], &frame->mc_gregs[i]))
       ^
/linux/scripts/Makefile.build:280: recipe for target 'arch/powerpc/kernel/signal_32.o' failed
make[3]: *** [arch/powerpc/kernel/signal_32.o] Error 1
make[3]: *** Waiting for unfinished jobs....
In file included from /linux/include/linux/uaccess.h:11:0,
                 from /linux/include/linux/sched/task.h:11,
                 from /linux/include/linux/sched/signal.h:9,
                 from /linux/include/linux/rcuwait.h:6,
                 from /linux/include/linux/percpu-rwsem.h:7,
                 from /linux/include/linux/fs.h:33,
                 from /linux/include/linux/huge_mm.h:8,
                 from /linux/include/linux/mm.h:675,
                 from /linux/arch/powerpc/kernel/signal_64.c:12:
/linux/arch/powerpc/kernel/signal_64.c: In function '__se_sys_swapcontext':
/linux/arch/powerpc/include/asm/uaccess.h:319:2: error: 'asm' operand has impossible constraints
  __asm__ __volatile__(    \
  ^
/linux/arch/powerpc/include/asm/uaccess.h:359:10: note: in expansion of macro '__get_user_asm'
  case 1: __get_user_asm(x, (u8 __user *)ptr, retval, "lbz"); break; \
          ^
/linux/arch/powerpc/include/asm/uaccess.h:370:2: note: in expansion of macro '__get_user_size_allowed'
  __get_user_size_allowed(x, ptr, size, retval);  \
  ^
/linux/arch/powerpc/include/asm/uaccess.h:393:3: note: in expansion of macro '__get_user_size'
   __get_user_size(__gu_val, __gu_addr, __gu_size, __gu_err); \
   ^
/linux/arch/powerpc/include/asm/uaccess.h:94:2: note: in expansion of macro '__get_user_nocheck'
  __get_user_nocheck((x), (ptr), sizeof(*(ptr)), true)
  ^
/linux/arch/powerpc/kernel/signal_64.c:672:9: note: in expansion of macro '__get_user'
      || __get_user(tmp, (u8 __user *) new_ctx + ctx_size - 1))
         ^
/linux/scripts/Makefile.build:280: recipe for target 'arch/powerpc/kernel/signal_64.o' failed
make[3]: *** [arch/powerpc/kernel/signal_64.o] Error 1
/linux/scripts/Makefile.build:497: recipe for target 'arch/powerpc/kernel' failed
make[2]: *** [arch/powerpc/kernel] Error 2
/linux/Makefile:1756: recipe for target 'arch/powerpc' failed
make[1]: *** [arch/powerpc] Error 2
Makefile:185: recipe for target '__sub-make' failed
make: *** [__sub-make] Error 2


cheers

^ permalink raw reply

* Re: [PATCH] powerpc: Warn about use of smt_snooze_delay
From: Michael Ellerman @ 2020-06-30  1:22 UTC (permalink / raw)
  To: Joel Stanley, linuxppc-dev; +Cc: ego
In-Reply-To: <20200625100349.2408899-1-joel@jms.id.au>

Joel Stanley <joel@jms.id.au> writes:
> It's not done anything for a long time. Save the percpu variable, and
> emit a warning to remind users to not expect it to do anything.
>
> Signed-off-by: Joel Stanley <joel@jms.id.au>
> ---
>  arch/powerpc/kernel/sysfs.c | 41 +++++++++++++------------------------
>  1 file changed, 14 insertions(+), 27 deletions(-)
>
> diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
> index 571b3259697e..530ae92bc46d 100644
> --- a/arch/powerpc/kernel/sysfs.c
> +++ b/arch/powerpc/kernel/sysfs.c
> @@ -32,29 +32,25 @@
>  
>  static DEFINE_PER_CPU(struct cpu, cpu_devices);
>  
> -/*
> - * SMT snooze delay stuff, 64-bit only for now
> - */
> -
>  #ifdef CONFIG_PPC64
>  
> -/* Time in microseconds we delay before sleeping in the idle loop */
> -static DEFINE_PER_CPU(long, smt_snooze_delay) = { 100 };
> +/*
> + * Snooze delay has not been hooked up since 3fa8cad82b94 ("powerpc/pseries/cpuidle:
> + * smt-snooze-delay cleanup.") and has been broken even longer. As was foretold in
> + * 2014:
> + *
> + *  "ppc64_util currently utilises it. Once we fix ppc64_util, propose to clean
> + *  up the kernel code."
> + *
> + * At some point in the future this code should be removed.
> + */
>  
>  static ssize_t store_smt_snooze_delay(struct device *dev,
>  				      struct device_attribute *attr,
>  				      const char *buf,
>  				      size_t count)
>  {
> -	struct cpu *cpu = container_of(dev, struct cpu, dev);
> -	ssize_t ret;
> -	long snooze;
> -
> -	ret = sscanf(buf, "%ld", &snooze);
> -	if (ret != 1)
> -		return -EINVAL;
> -
> -	per_cpu(smt_snooze_delay, cpu->dev.id) = snooze;
> +	WARN_ON_ONCE("smt_snooze_delay sysfs file has no effect\n");

We shouldn't have user-triggerable WARNs.

I think this should just be a pr_warn_ratelimited(), maybe including
current->comm & pid.

cheers

^ permalink raw reply

* Re: [PATCH 1/3] powerpc: inline doorbell sending functions
From: Michael Ellerman @ 2020-06-30  1:31 UTC (permalink / raw)
  To: kernel test robot, Nicholas Piggin, linuxppc-dev
  Cc: kbuild-all, Nicholas Piggin, kvm-ppc, Anton Blanchard,
	Cédric Le Goater, David Gibson
In-Reply-To: <202006280326.fcRFUNzs%lkp@intel.com>

kernel test robot <lkp@intel.com> writes:
> Hi Nicholas,
>
> I love your patch! Yet something to improve:
>
> [auto build test ERROR on powerpc/next]
> [also build test ERROR on scottwood/next v5.8-rc2 next-20200626]
> [cannot apply to kvm-ppc/kvm-ppc-next]
> [If your patch is applied to the wrong git tree, kindly drop us a note.
> And when submitting patch, we suggest to use  as documented in
> https://git-scm.com/docs/git-format-patch]
>
> url:    https://github.com/0day-ci/linux/commits/Nicholas-Piggin/powerpc-pseries-IPI-doorbell-improvements/20200627-230544
> base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
> config: powerpc-randconfig-c003-20200628 (attached as .config)
> compiler: powerpc64-linux-gcc (GCC) 9.3.0

> If you fix the issue, kindly add following tag as appropriate
> Reported-by: kernel test robot <lkp@intel.com>
>
> All error/warnings (new ones prefixed by >>):
>
>    In file included from arch/powerpc/kernel/asm-offsets.c:38:
>    arch/powerpc/include/asm/dbell.h: In function 'doorbell_global_ipi':
>>> arch/powerpc/include/asm/dbell.h:114:12: error: implicit declaration of function 'get_hard_smp_processor_id'; did you mean 'raw_smp_processor_id'? [-Werror=implicit-function-declaration]
>      114 |  u32 tag = get_hard_smp_processor_id(cpu);
>          |            ^~~~~~~~~~~~~~~~~~~~~~~~~
>          |            raw_smp_processor_id
>    arch/powerpc/include/asm/dbell.h: In function 'doorbell_try_core_ipi':
>>> arch/powerpc/include/asm/dbell.h:146:28: error: implicit declaration of function 'cpu_sibling_mask'; did you mean 'cpu_online_mask'? [-Werror=implicit-function-declaration]
>      146 |  if (cpumask_test_cpu(cpu, cpu_sibling_mask(this_cpu))) {
>          |                            ^~~~~~~~~~~~~~~~
>          |                            cpu_online_mask
>>> arch/powerpc/include/asm/dbell.h:146:28: warning: passing argument 2 of 'cpumask_test_cpu' makes pointer from integer without a cast [-Wint-conversion]
>      146 |  if (cpumask_test_cpu(cpu, cpu_sibling_mask(this_cpu))) {
>          |                            ^~~~~~~~~~~~~~~~~~~~~~~~~~

Seems like CONFIG_SMP=n is probably the root cause.

You could try including asm/smp.h, but good chance that will lead to
header soup.

Other option would be to wrap the whole lot in #ifdef CONFIG_SMP?

cheers

^ permalink raw reply

* Re: [PATCH updated] libnvdimm/nvdimm/flush: Allow architecture to override the flush barrier
From: Dan Williams @ 2020-06-30  1:32 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: Jan Kara, linux-nvdimm, Jeff Moyer, Oliver O'Halloran,
	Michal Suchánek, linuxppc-dev
In-Reply-To: <20200629202901.83516-1-aneesh.kumar@linux.ibm.com>

On Mon, Jun 29, 2020 at 1:29 PM Aneesh Kumar K.V
<aneesh.kumar@linux.ibm.com> wrote:
>
> Architectures like ppc64 provide persistent memory specific barriers
> that will ensure that all stores for which the modifications are
> written to persistent storage by preceding dcbfps and dcbstps
> instructions have updated persistent storage before any data
> access or data transfer caused by subsequent instructions is initiated.
> This is in addition to the ordering done by wmb()
>
> Update nvdimm core such that architecture can use barriers other than
> wmb to ensure all previous writes are architecturally visible for
> the platform buffer flush.
>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> ---
>  drivers/md/dm-writecache.c   | 2 +-
>  drivers/nvdimm/region_devs.c | 8 ++++----
>  include/linux/libnvdimm.h    | 4 ++++
>  3 files changed, 9 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
> index 74f3c506f084..8c6b6dce64e2 100644
> --- a/drivers/md/dm-writecache.c
> +++ b/drivers/md/dm-writecache.c
> @@ -536,7 +536,7 @@ static void ssd_commit_superblock(struct dm_writecache *wc)
>  static void writecache_commit_flushed(struct dm_writecache *wc, bool wait_for_ios)
>  {
>         if (WC_MODE_PMEM(wc))
> -               wmb();
> +               arch_pmem_flush_barrier();
>         else
>                 ssd_commit_flushed(wc, wait_for_ios);
>  }
> diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
> index 4502f9c4708d..b308ad09b63d 100644
> --- a/drivers/nvdimm/region_devs.c
> +++ b/drivers/nvdimm/region_devs.c
> @@ -1206,13 +1206,13 @@ int generic_nvdimm_flush(struct nd_region *nd_region)
>         idx = this_cpu_add_return(flush_idx, hash_32(current->pid + idx, 8));
>
>         /*
> -        * The first wmb() is needed to 'sfence' all previous writes
> -        * such that they are architecturally visible for the platform
> -        * buffer flush.  Note that we've already arranged for pmem
> +        * The first arch_pmem_flush_barrier() is needed to 'sfence' all
> +        * previous writes such that they are architecturally visible for
> +        * the platform buffer flush. Note that we've already arranged for pmem
>          * writes to avoid the cache via memcpy_flushcache().  The final
>          * wmb() ensures ordering for the NVDIMM flush write.
>          */
> -       wmb();
> +       arch_pmem_flush_barrier();
>         for (i = 0; i < nd_region->ndr_mappings; i++)
>                 if (ndrd_get_flush_wpq(ndrd, i, 0))
>                         writeq(1, ndrd_get_flush_wpq(ndrd, i, idx));
> diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
> index 18da4059be09..66f6c65bd789 100644
> --- a/include/linux/libnvdimm.h
> +++ b/include/linux/libnvdimm.h
> @@ -286,4 +286,8 @@ static inline void arch_invalidate_pmem(void *addr, size_t size)
>  }
>  #endif
>
> +#ifndef arch_pmem_flush_barrier
> +#define arch_pmem_flush_barrier() wmb()
> +#endif

I think it is out of place to define this in libnvdimm.h and it is odd
to give it such a long name. The other pmem api helpers like
arch_wb_cache_pmem() and arch_invalidate_pmem() are function calls for
libnvdimm driver operations, this barrier is just an instruction and
is closer to wmb() than the pmem api routine.

Since it is a store fence for pmem, so let's just call it pmem_wmb()
and define the generic version in include/linux/compiler.h. It should
probably also be documented alongside dma_wmb() in
Documentation/memory-barriers.txt about why code would use it over
wmb(), and why a symmetric pmem_rmb() is not needed.

^ permalink raw reply

* Re: [PATCH v6 5/8] powerpc/pmem/of_pmem: Update of_pmem to use the new barrier instruction.
From: Dan Williams @ 2020-06-30  1:38 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: Jan Kara, linux-nvdimm, Jeff Moyer, Oliver O'Halloran,
	Michal Suchánek, linuxppc-dev
In-Reply-To: <20200629135722.73558-6-aneesh.kumar@linux.ibm.com>

On Mon, Jun 29, 2020 at 6:58 AM Aneesh Kumar K.V
<aneesh.kumar@linux.ibm.com> wrote:
>
> of_pmem on POWER10 can now use phwsync instead of hwsync to ensure
> all previous writes are architecturally visible for the platform
> buffer flush.
>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> ---
>  arch/powerpc/include/asm/cacheflush.h | 7 +++++++
>  1 file changed, 7 insertions(+)
>
> diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h
> index 54764c6e922d..95782f77d768 100644
> --- a/arch/powerpc/include/asm/cacheflush.h
> +++ b/arch/powerpc/include/asm/cacheflush.h
> @@ -98,6 +98,13 @@ static inline void invalidate_dcache_range(unsigned long start,
>         mb();   /* sync */
>  }
>
> +#define arch_pmem_flush_barrier arch_pmem_flush_barrier
> +static inline void  arch_pmem_flush_barrier(void)
> +{
> +       if (cpu_has_feature(CPU_FTR_ARCH_207S))
> +               asm volatile(PPC_PHWSYNC ::: "memory");

Shouldn't this fallback to a compatible store-fence in an else statement?

^ permalink raw reply

* Re: [PATCH v6 6/8] powerpc/pmem: Avoid the barrier in flush routines
From: Dan Williams @ 2020-06-30  1:50 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: Jan Kara, linux-nvdimm, Jeff Moyer, Oliver O'Halloran,
	Michal Suchánek, linuxppc-dev
In-Reply-To: <87lfk5hahc.fsf@linux.ibm.com>

On Mon, Jun 29, 2020 at 1:41 PM Aneesh Kumar K.V
<aneesh.kumar@linux.ibm.com> wrote:
>
> Michal Suchánek <msuchanek@suse.de> writes:
>
> > Hello,
> >
> > On Mon, Jun 29, 2020 at 07:27:20PM +0530, Aneesh Kumar K.V wrote:
> >> nvdimm expect the flush routines to just mark the cache clean. The barrier
> >> that mark the store globally visible is done in nvdimm_flush().
> >>
> >> Update the papr_scm driver to a simplified nvdim_flush callback that do
> >> only the required barrier.
> >>
> >> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> >> ---
> >>  arch/powerpc/lib/pmem.c                   |  6 ------
> >>  arch/powerpc/platforms/pseries/papr_scm.c | 13 +++++++++++++
> >>  2 files changed, 13 insertions(+), 6 deletions(-)
> >>
> >> diff --git a/arch/powerpc/lib/pmem.c b/arch/powerpc/lib/pmem.c
> >> index 5a61aaeb6930..21210fa676e5 100644
> >> --- a/arch/powerpc/lib/pmem.c
> >> +++ b/arch/powerpc/lib/pmem.c
> >> @@ -19,9 +19,6 @@ static inline void __clean_pmem_range(unsigned long start, unsigned long stop)
> >>
> >>      for (i = 0; i < size >> shift; i++, addr += bytes)
> >>              asm volatile(PPC_DCBSTPS(%0, %1): :"i"(0), "r"(addr): "memory");
> >> -
> >> -
> >> -    asm volatile(PPC_PHWSYNC ::: "memory");
> >>  }
> >>
> >>  static inline void __flush_pmem_range(unsigned long start, unsigned long stop)
> >> @@ -34,9 +31,6 @@ static inline void __flush_pmem_range(unsigned long start, unsigned long stop)
> >>
> >>      for (i = 0; i < size >> shift; i++, addr += bytes)
> >>              asm volatile(PPC_DCBFPS(%0, %1): :"i"(0), "r"(addr): "memory");
> >> -
> >> -
> >> -    asm volatile(PPC_PHWSYNC ::: "memory");
> >>  }
> >>
> >>  static inline void clean_pmem_range(unsigned long start, unsigned long stop)
> >> diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
> >> index 9c569078a09f..9a9a0766f8b6 100644
> >> --- a/arch/powerpc/platforms/pseries/papr_scm.c
> >> +++ b/arch/powerpc/platforms/pseries/papr_scm.c
> >> @@ -630,6 +630,18 @@ static int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc,
> >>
> >>      return 0;
> >>  }
> >> +/*
> >> + * We have made sure the pmem writes are done such that before calling this
> >> + * all the caches are flushed/clean. We use dcbf/dcbfps to ensure this. Here
> >> + * we just need to add the necessary barrier to make sure the above flushes
> >> + * are have updated persistent storage before any data access or data transfer
> >> + * caused by subsequent instructions is initiated.
> >> + */
> >> +static int papr_scm_flush_sync(struct nd_region *nd_region, struct bio *bio)
> >> +{
> >> +    arch_pmem_flush_barrier();
> >> +    return 0;
> >> +}
> >>
> >>  static ssize_t flags_show(struct device *dev,
> >>                        struct device_attribute *attr, char *buf)
> >> @@ -743,6 +755,7 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
> >>      ndr_desc.mapping = &mapping;
> >>      ndr_desc.num_mappings = 1;
> >>      ndr_desc.nd_set = &p->nd_set;
> >> +    ndr_desc.flush = papr_scm_flush_sync;
> >
> > AFAICT currently the only device that implements flush is virtio_pmem.
> > How does the nfit driver get away without implementing flush?
>
> generic_nvdimm_flush does the required barrier for nfit. The reason for
> adding ndr_desc.flush call back for papr_scm was to avoid the usage
> of iomem based deep flushing (ndr_region_data.flush_wpq) which is not
> supported by papr_scm.
>
> BTW we do return NULL for ndrd_get_flush_wpq() on power. So the upstream
> code also does the same thing, but in a different way.
>
>
> > Also the flush takes arguments that are completely unused but a user of
> > the pmem region must assume they are used, and call flush() on the
> > region rather than arch_pmem_flush_barrier() directly.
>
> The bio argument can help a pmem driver to do range based flushing in
> case of pmem_make_request. If bio is null then we must assume a full
> device flush.

The bio argument isn't for range based flushing, it is for flush
operations that need to complete asynchronously.

There's no mechanism for the block layer to communicate range based
cache flushing, block-device flushing is assumed to be the device's
entire cache. For pmem that would be the entirety of the cpu cache.
Instead of modeling the cpu cache as a storage device cache it is
modeled as page-cache. Once the fs-layer writes back page-cache /
cpu-cache the storage device is only responsible for flushing those
cache-writes into the persistence domain.

Additionally there is a concept of deep-flush that relegates some
power-fail scenarios to a smaller failure domain. For example consider
the difference between a write arriving at the head of a device-queue
and successfully traversing a device-queue to media. The expectation
of pmem applications is that data is persisted once they reach the
equivalent of the x86 ADR domain, deep-flush is past ADR.

^ permalink raw reply

* Re: [PATCH v6 7/8] powerpc/pmem: Add WARN_ONCE to catch the wrong usage of pmem flush functions.
From: Dan Williams @ 2020-06-30  1:52 UTC (permalink / raw)
  To: Aneesh Kumar K.V
  Cc: Jan Kara, linux-nvdimm, Jeff Moyer, Oliver O'Halloran,
	Michal Suchánek, linuxppc-dev
In-Reply-To: <20200629135722.73558-8-aneesh.kumar@linux.ibm.com>

On Mon, Jun 29, 2020 at 6:58 AM Aneesh Kumar K.V
<aneesh.kumar@linux.ibm.com> wrote:
>
> We only support persistent memory on P8 and above. This is enforced by the
> firmware and further checked on virtualzied platform during platform init.
> Add WARN_ONCE in pmem flush routines to catch the wrong usage of these.
>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> ---
>  arch/powerpc/include/asm/cacheflush.h | 2 ++
>  arch/powerpc/lib/pmem.c               | 2 ++
>  2 files changed, 4 insertions(+)
>
> diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h
> index 95782f77d768..1ab0fa660497 100644
> --- a/arch/powerpc/include/asm/cacheflush.h
> +++ b/arch/powerpc/include/asm/cacheflush.h
> @@ -103,6 +103,8 @@ static inline void  arch_pmem_flush_barrier(void)
>  {
>         if (cpu_has_feature(CPU_FTR_ARCH_207S))
>                 asm volatile(PPC_PHWSYNC ::: "memory");
> +       else
> +               WARN_ONCE(1, "Using pmem flush on older hardware.");

This seems too late to be making this determination. I'd expect the
driver to fail to successfully bind default if this constraint is not
met.

^ permalink raw reply

* Re: [PATCH 1/3] powerpc: inline doorbell sending functions
From: Nicholas Piggin @ 2020-06-30  1:58 UTC (permalink / raw)
  To: linuxppc-dev, kernel test robot, Michael Ellerman
  Cc: kbuild-all, Anton Blanchard, Cédric Le Goater, kvm-ppc,
	David Gibson
In-Reply-To: <87zh8l7318.fsf@mpe.ellerman.id.au>

Excerpts from Michael Ellerman's message of June 30, 2020 11:31 am:
> kernel test robot <lkp@intel.com> writes:
>> Hi Nicholas,
>>
>> I love your patch! Yet something to improve:
>>
>> [auto build test ERROR on powerpc/next]
>> [also build test ERROR on scottwood/next v5.8-rc2 next-20200626]
>> [cannot apply to kvm-ppc/kvm-ppc-next]
>> [If your patch is applied to the wrong git tree, kindly drop us a note.
>> And when submitting patch, we suggest to use  as documented in
>> https://git-scm.com/docs/git-format-patch]
>>
>> url:    https://github.com/0day-ci/linux/commits/Nicholas-Piggin/powerpc-pseries-IPI-doorbell-improvements/20200627-230544
>> base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
>> config: powerpc-randconfig-c003-20200628 (attached as .config)
>> compiler: powerpc64-linux-gcc (GCC) 9.3.0
> 
>> If you fix the issue, kindly add following tag as appropriate
>> Reported-by: kernel test robot <lkp@intel.com>
>>
>> All error/warnings (new ones prefixed by >>):
>>
>>    In file included from arch/powerpc/kernel/asm-offsets.c:38:
>>    arch/powerpc/include/asm/dbell.h: In function 'doorbell_global_ipi':
>>>> arch/powerpc/include/asm/dbell.h:114:12: error: implicit declaration of function 'get_hard_smp_processor_id'; did you mean 'raw_smp_processor_id'? [-Werror=implicit-function-declaration]
>>      114 |  u32 tag = get_hard_smp_processor_id(cpu);
>>          |            ^~~~~~~~~~~~~~~~~~~~~~~~~
>>          |            raw_smp_processor_id
>>    arch/powerpc/include/asm/dbell.h: In function 'doorbell_try_core_ipi':
>>>> arch/powerpc/include/asm/dbell.h:146:28: error: implicit declaration of function 'cpu_sibling_mask'; did you mean 'cpu_online_mask'? [-Werror=implicit-function-declaration]
>>      146 |  if (cpumask_test_cpu(cpu, cpu_sibling_mask(this_cpu))) {
>>          |                            ^~~~~~~~~~~~~~~~
>>          |                            cpu_online_mask
>>>> arch/powerpc/include/asm/dbell.h:146:28: warning: passing argument 2 of 'cpumask_test_cpu' makes pointer from integer without a cast [-Wint-conversion]
>>      146 |  if (cpumask_test_cpu(cpu, cpu_sibling_mask(this_cpu))) {
>>          |                            ^~~~~~~~~~~~~~~~~~~~~~~~~~
> 
> Seems like CONFIG_SMP=n is probably the root cause.
> 
> You could try including asm/smp.h, but good chance that will lead to
> header soup.

Possibly. dbell.h shouldn't be included by much, but maybe it gets
dragged in.

> 
> Other option would be to wrap the whole lot in #ifdef CONFIG_SMP?

Yeah that might be a better idea.

I'll fix it up and repost if there's no strong objections to
the KVM detection bit.

Thanks,
Nick

^ permalink raw reply

* [PATCH v2] powerpc: Warn about use of smt_snooze_delay
From: Joel Stanley @ 2020-06-30  1:59 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Tyrel Datwyler, Gautham R Shenoy

It's not done anything for a long time. Save the percpu variable, and
emit a warning to remind users to not expect it to do anything.

Fixes: 3fa8cad82b94 ("powerpc/pseries/cpuidle: smt-snooze-delay cleanup.")
Cc: stable@vger.kernel.org # v3.14
Signed-off-by: Joel Stanley <joel@jms.id.au>
--
v2:
 Use pr_warn instead of WARN
 Reword and print proccess name with pid in message
 Leave CPU_FTR_SMT test in
 Add Fixes line

mpe, if you don't agree then feel free to drop the cc stable.

Testing 'ppc64_cpu --smt=off' on a 24 core / 4 SMT system it's quite noisy
as the online/offline loop that ppc64_cpu runs is slow.

This could be fixed by open coding pr_warn_ratelimit with the ratelimit
parameters tweaked if someone was concerned. I'll leave that to someone
else as a future enhancement.

[  237.642088][ T1197] ppc64_cpu (1197) used unsupported smt_snooze_delay, this has no effect
[  237.642175][ T1197] ppc64_cpu (1197) used unsupported smt_snooze_delay, this has no effect
[  237.642261][ T1197] ppc64_cpu (1197) used unsupported smt_snooze_delay, this has no effect
[  237.642345][ T1197] ppc64_cpu (1197) used unsupported smt_snooze_delay, this has no effect
[  237.642430][ T1197] ppc64_cpu (1197) used unsupported smt_snooze_delay, this has no effect
[  237.642516][ T1197] ppc64_cpu (1197) used unsupported smt_snooze_delay, this has no effect
[  237.642625][ T1197] ppc64_cpu (1197) used unsupported smt_snooze_delay, this has no effect
[  237.642709][ T1197] ppc64_cpu (1197) used unsupported smt_snooze_delay, this has no effect
[  237.642793][ T1197] ppc64_cpu (1197) used unsupported smt_snooze_delay, this has no effect
[  237.642878][ T1197] ppc64_cpu (1197) used unsupported smt_snooze_delay, this has no effect
[  254.264030][ T1197] store_smt_snooze_delay: 14 callbacks suppressed
[  254.264033][ T1197] ppc64_cpu (1197) used unsupported smt_snooze_delay, this has no effect
[  254.264048][ T1197] ppc64_cpu (1197) used unsupported smt_snooze_delay, this has no effect
[  254.264062][ T1197] ppc64_cpu (1197) used unsupported smt_snooze_delay, this has no effect
[  254.264075][ T1197] ppc64_cpu (1197) used unsupported smt_snooze_delay, this has no effect
[  254.264089][ T1197] ppc64_cpu (1197) used unsupported smt_snooze_delay, this has no effect
[  254.264103][ T1197] ppc64_cpu (1197) used unsupported smt_snooze_delay, this has no effect
[  254.264116][ T1197] ppc64_cpu (1197) used unsupported smt_snooze_delay, this has no effect
[  254.264130][ T1197] ppc64_cpu (1197) used unsupported smt_snooze_delay, this has no effect
[  254.264143][ T1197] ppc64_cpu (1197) used unsupported smt_snooze_delay, this has no effect
[  254.264157][ T1197] ppc64_cpu (1197) used unsupported smt_snooze_delay, this has no effect

Signed-off-by: Joel Stanley <joel@jms.id.au>
---
 arch/powerpc/kernel/sysfs.c | 41 +++++++++++++++----------------------
 1 file changed, 16 insertions(+), 25 deletions(-)

diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 571b3259697e..ba6d4cee19ef 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -32,29 +32,26 @@
 
 static DEFINE_PER_CPU(struct cpu, cpu_devices);
 
-/*
- * SMT snooze delay stuff, 64-bit only for now
- */
-
 #ifdef CONFIG_PPC64
 
-/* Time in microseconds we delay before sleeping in the idle loop */
-static DEFINE_PER_CPU(long, smt_snooze_delay) = { 100 };
+/*
+ * Snooze delay has not been hooked up since 3fa8cad82b94 ("powerpc/pseries/cpuidle:
+ * smt-snooze-delay cleanup.") and has been broken even longer. As was foretold in
+ * 2014:
+ *
+ *  "ppc64_util currently utilises it. Once we fix ppc64_util, propose to clean
+ *  up the kernel code."
+ *
+ * At some point in the future this code should be removed.
+ */
 
 static ssize_t store_smt_snooze_delay(struct device *dev,
 				      struct device_attribute *attr,
 				      const char *buf,
 				      size_t count)
 {
-	struct cpu *cpu = container_of(dev, struct cpu, dev);
-	ssize_t ret;
-	long snooze;
-
-	ret = sscanf(buf, "%ld", &snooze);
-	if (ret != 1)
-		return -EINVAL;
-
-	per_cpu(smt_snooze_delay, cpu->dev.id) = snooze;
+	pr_warn_ratelimited("%s (%d) used unsupported smt_snooze_delay, this has no effect\n",
+			    current->comm, current->pid);
 	return count;
 }
 
@@ -62,9 +59,9 @@ static ssize_t show_smt_snooze_delay(struct device *dev,
 				     struct device_attribute *attr,
 				     char *buf)
 {
-	struct cpu *cpu = container_of(dev, struct cpu, dev);
-
-	return sprintf(buf, "%ld\n", per_cpu(smt_snooze_delay, cpu->dev.id));
+	pr_warn_ratelimited("%s (%d) used unsupported smt_snooze_delay, this has no effect\n",
+			    current->comm, current->pid);
+	return sprintf(buf, "100\n");
 }
 
 static DEVICE_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay,
@@ -72,16 +69,10 @@ static DEVICE_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay,
 
 static int __init setup_smt_snooze_delay(char *str)
 {
-	unsigned int cpu;
-	long snooze;
-
 	if (!cpu_has_feature(CPU_FTR_SMT))
 		return 1;
 
-	snooze = simple_strtol(str, NULL, 10);
-	for_each_possible_cpu(cpu)
-		per_cpu(smt_snooze_delay, cpu) = snooze;
-
+	pr_warn("smt-snooze-delay command line option has no effect\n");
 	return 1;
 }
 __setup("smt-snooze-delay=", setup_smt_snooze_delay);
-- 
2.27.0


^ permalink raw reply related

* Re: [PATCH] kbuild: introduce ccflags-remove-y and asflags-remove-y
From: Masahiro Yamada @ 2020-06-30  2:08 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: Michal Marek, Yoshinori Sato, Linux Kbuild mailing list,
	Linux-sh list, Linux Kernel Mailing List, Steven Rostedt,
	Russell King, linuxppc-dev, Ingo Molnar, Paul Mackerras,
	Sami Tolvanen, Rich Felker, linux-arm-kernel
In-Reply-To: <87imfa8le0.fsf@mpe.ellerman.id.au>

On Mon, Jun 29, 2020 at 2:55 PM Michael Ellerman <mpe@ellerman.id.au> wrote:
>
> Masahiro Yamada <masahiroy@kernel.org> writes:
> > CFLAGS_REMOVE_<file>.o works per object, that is, there is no
> > convenient way to filter out flags for every object in a directory.
> >
> > Add ccflags-remove-y and asflags-remove-y to make it easily.
> >
> > Use ccflags-remove-y to clean up some Makefiles.
> >
> > Suggested-by: Sami Tolvanen <samitolvanen@google.com>
> > Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
> > ---
> >
> >  arch/arm/boot/compressed/Makefile | 6 +-----
> >  arch/powerpc/xmon/Makefile        | 3 +--
> >  arch/sh/boot/compressed/Makefile  | 5 +----
> >  kernel/trace/Makefile             | 4 ++--
> >  lib/Makefile                      | 5 +----
> >  scripts/Makefile.lib              | 4 ++--
> >  6 files changed, 8 insertions(+), 19 deletions(-)
> >
> > diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile
> > index 89c76ca35640..55cbcdd88ac0 100644
> > --- a/arch/powerpc/xmon/Makefile
> > +++ b/arch/powerpc/xmon/Makefile
> > @@ -7,8 +7,7 @@ UBSAN_SANITIZE := n
> >  KASAN_SANITIZE := n
> >
> >  # Disable ftrace for the entire directory
> > -ORIG_CFLAGS := $(KBUILD_CFLAGS)
> > -KBUILD_CFLAGS = $(subst $(CC_FLAGS_FTRACE),,$(ORIG_CFLAGS))
> > +ccflags-remove-y += $(CC_FLAGS_FTRACE)
>
> This could be:
>
> ccflags-remove-$(CONFIG_FUNCTION_TRACER) += $(CC_FLAGS_FTRACE)
>
> Similar to kernel/trace/Makefile below.


I fixed it up, and applied to linux-kbuild.
Thanks.


> I don't mind though.
>
> Acked-by: Michael Ellerman <mpe@ellerman.id.au> (powerpc)
>
> cheers
>
> > diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
> > index 6575bb0a0434..7492844a8b1b 100644
> > --- a/kernel/trace/Makefile
> > +++ b/kernel/trace/Makefile
> > @@ -2,9 +2,9 @@
> >
> >  # Do not instrument the tracer itself:
> >
> > +ccflags-remove-$(CONFIG_FUNCTION_TRACER) += $(CC_FLAGS_FTRACE)
> > +
> >  ifdef CONFIG_FUNCTION_TRACER
> > -ORIG_CFLAGS := $(KBUILD_CFLAGS)
> > -KBUILD_CFLAGS = $(subst $(CC_FLAGS_FTRACE),,$(ORIG_CFLAGS))
> >
> >  # Avoid recursion due to instrumentation.
> >  KCSAN_SANITIZE := n



-- 
Best Regards
Masahiro Yamada

^ permalink raw reply

* Re: [PATCH 3/3] powerpc/pseries: Add KVM guest doorbell restrictions
From: Paul Mackerras @ 2020-06-30  2:27 UTC (permalink / raw)
  To: Nicholas Piggin
  Cc: kvm-ppc, Anton Blanchard, Cédric Le Goater, linuxppc-dev,
	David Gibson
In-Reply-To: <20200627150428.2525192-4-npiggin@gmail.com>

On Sun, Jun 28, 2020 at 01:04:28AM +1000, Nicholas Piggin wrote:
> KVM guests have certain restrictions and performance quirks when
> using doorbells. This patch tests for KVM environment in doorbell
> setup, and optimises IPI performance:
> 
>  - PowerVM guests may now use doorbells even if they are secure.
> 
>  - KVM guests no longer use doorbells if XIVE is available.

It seems, from the fact that you completely remove
kvm_para_available(), that you perhaps haven't tried building with
CONFIG_KVM_GUEST=y.  Somewhat confusingly, that option is not used or
needed when building for a PAPR guest (i.e. the "pseries" platform)
but is used on non-IBM platforms using the "epapr" hypervisor
interface.

If you did intend to remove support for the epapr hypervisor interface
then that should have been talked about in the commit message (and
would I expect be controversial).

So NAK on the kvm_para_available() removal.

Paul.

^ permalink raw reply

* Re: [PATCH] xmon: Reset RCU and soft lockup watchdogs
From: Nicholas Piggin @ 2020-06-30  2:29 UTC (permalink / raw)
  To: Anton Blanchard, linuxppc-dev; +Cc: Paul Mackerras
In-Reply-To: <20200630100218.62a3c3fb@kryten.localdomain>

Excerpts from Anton Blanchard's message of June 30, 2020 10:02 am:
> I'm seeing RCU warnings when exiting xmon. xmon resets the NMI watchdog,
> but does nothing with the RCU stall or soft lockup watchdogs. Add a
> helper function that handles all three.
> 
> Signed-off-by: Anton Blanchard <anton@ozlabs.org>

Acked-by: Nicholas Piggin <npiggin@gmail.com>

> ---
>  arch/powerpc/xmon/xmon.c | 9 ++++++++-
>  1 file changed, 8 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
> index 7efe4bc3ccf6..d27944e38b04 100644
> --- a/arch/powerpc/xmon/xmon.c
> +++ b/arch/powerpc/xmon/xmon.c
> @@ -481,6 +481,13 @@ static inline int unrecoverable_excp(struct pt_regs *regs)
>  #endif
>  }
>  
> +static void xmon_touch_watchdogs(void)
> +{
> +	touch_softlockup_watchdog_sync();
> +	rcu_cpu_stall_reset();
> +	touch_nmi_watchdog();
> +}
> +
>  static int xmon_core(struct pt_regs *regs, int fromipi)
>  {
>  	int cmd = 0;
> @@ -718,7 +725,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
>  	else
>  		insert_cpu_bpts();
>  
> -	touch_nmi_watchdog();
> +	xmon_touch_watchdogs();
>  	local_irq_restore(flags);
>  
>  	return cmd != 'X' && cmd != EOF;
> -- 
> 2.26.2
> 
> 

^ permalink raw reply

* Re: [PATCH 04/11] ppc64/kexec_file: avoid stomping memory used by special regions
From: piliu @ 2020-06-30  3:30 UTC (permalink / raw)
  To: Hari Bathini, Michael Ellerman, Andrew Morton
  Cc: Kexec-ml, Petr Tesarik, Mahesh J Salgaonkar, Sourabh Jain, lkml,
	linuxppc-dev, Mimi Zohar, Thiago Jung Bauermann, Dave Young,
	Vivek Goyal, Eric Biederman
In-Reply-To: <f745de42-297e-6eed-d25b-ea21d6000dc5@linux.ibm.com>



On 06/29/2020 01:55 PM, Hari Bathini wrote:
> 
> 
> On 28/06/20 7:44 am, piliu wrote:
>> Hi Hari,
> 
> Hi Pingfan,
> 
>>
>> After a quick through for this series, I have a few question/comment on
>> this patch for the time being. Pls see comment inline.
>>
>> On 06/27/2020 03:05 AM, Hari Bathini wrote:
>>> crashkernel region could have an overlap with special memory regions
>>> like  opal, rtas, tce-table & such. These regions are referred to as
>>> exclude memory ranges. Setup this ranges during image probe in order
>>> to avoid them while finding the buffer for different kdump segments.
> 
> [...]
> 
>>> +	/*
>>> +	 * Use the locate_mem_hole logic in kexec_add_buffer() for regular
>>> +	 * kexec_file_load syscall
>>> +	 */
>>> +	if (kbuf->image->type != KEXEC_TYPE_CRASH)
>>> +		return 0;
>> Can the ranges overlap [crashk_res.start, crashk_res.end]?  Otherwise
>> there is no requirement for @exclude_ranges.
> 
> The ranges like rtas, opal are loaded by f/w. They almost always overlap with
> crashkernel region. So, @exclude_ranges is required to support kdump.
f/w passes rtas/opal as service, then must f/w mark these ranges as
fdt_reserved_mem in order to make kernel aware not to use these ranges?
Otherwise kernel memory allocation besides kdump can also overwrite
these ranges.

Hmm, revisiting reserve_crashkernel(). It seems not to take any reserved
memory into consider except kernel text. Could it work based on memblock
allocator?

Thanks,
Pingfan
> 
>> I guess you have a design for future. If not true, then it is better to
>> fold the condition "if (kbuf->image->type != KEXEC_TYPE_CRASH)" into the
>> caller and rename this function to better distinguish use cases between
>> kexec and kdump
> 
> Yeah, this condition will be folded. I have a follow-up patch for that explaining
> why kexec case should also be folded. Will try to add that to this series for v2.
> 
> Thanks
> Hari
> 


^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox