* [PATCH 1/4] dt-bindings: sound: fsl, asrc: add properties to select in/out clocks
From: Arnaud Ferraris @ 2020-07-02 14:22 UTC (permalink / raw)
Cc: alsa-devel, devicetree, Arnaud Ferraris, linuxppc-dev, Timur Tabi,
Xiubo Li, linux-kernel, Takashi Iwai, Liam Girdwood, Rob Herring,
Jaroslav Kysela, Nicolin Chen, Mark Brown, kernel, Fabio Estevam
In-Reply-To: <20200702142235.235869-1-arnaud.ferraris@collabora.com>
The ASRC peripheral accepts a wide range of input and output clocks, but
no mechanism exists at the moment to define those as they are currently
hardcoded in the driver.
This commit adds new properties allowing selection of arbitrary input
and output clocks.
Signed-off-by: Arnaud Ferraris <arnaud.ferraris@collabora.com>
---
Documentation/devicetree/bindings/sound/fsl,asrc.txt | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/Documentation/devicetree/bindings/sound/fsl,asrc.txt b/Documentation/devicetree/bindings/sound/fsl,asrc.txt
index 998b4c8a7f78..e26ce9bad617 100644
--- a/Documentation/devicetree/bindings/sound/fsl,asrc.txt
+++ b/Documentation/devicetree/bindings/sound/fsl,asrc.txt
@@ -55,6 +55,12 @@ Optional properties:
Ends, which can replace the fsl,asrc-width.
The value is 2 (S16_LE), or 6 (S24_LE).
+ - fsl,asrc-input-clock : Input clock ID, defaults to INCLK_NONE
+ (see enum asrc_inclk in fsl_asrc.h)
+
+ - fsl,asrc-output-clock : Output clock ID, defaults to OUTCLK_ASRCK1_CLK
+ (see enum asrc_outclk in fsl_asrc.h)
+
Example:
asrc: asrc@2034000 {
@@ -77,4 +83,6 @@ asrc: asrc@2034000 {
"txa", "txb", "txc";
fsl,asrc-rate = <48000>;
fsl,asrc-width = <16>;
+ fsl,asrc-input-clock = <0x3>;
+ fsl,asrc-output-clock = <0xf>;
};
--
2.27.0
^ permalink raw reply related
* [PATCH 2/4] ASoC: fsl_asrc: allow using arbitrary input and output clocks
From: Arnaud Ferraris @ 2020-07-02 14:22 UTC (permalink / raw)
Cc: alsa-devel, devicetree, Arnaud Ferraris, linuxppc-dev, Timur Tabi,
Xiubo Li, linux-kernel, Takashi Iwai, Liam Girdwood, Rob Herring,
Jaroslav Kysela, Nicolin Chen, Mark Brown, kernel, Fabio Estevam
In-Reply-To: <20200702142235.235869-1-arnaud.ferraris@collabora.com>
fsl_asrc currently uses hardcoded input and output clocks, preventing
its use for anything other than S/PDIF output.
This patch adds the ability to select any clock as input or output (by
using new DT properties), making it possible to use this peripheral in a
more advanced way.
Signed-off-by: Arnaud Ferraris <arnaud.ferraris@collabora.com>
---
sound/soc/fsl/fsl_asrc.c | 18 ++++++++++++++++--
sound/soc/fsl/fsl_asrc_common.h | 3 +++
2 files changed, 19 insertions(+), 2 deletions(-)
diff --git a/sound/soc/fsl/fsl_asrc.c b/sound/soc/fsl/fsl_asrc.c
index 95f6a9617b0b..75df220e4b51 100644
--- a/sound/soc/fsl/fsl_asrc.c
+++ b/sound/soc/fsl/fsl_asrc.c
@@ -605,8 +605,8 @@ static int fsl_asrc_dai_hw_params(struct snd_pcm_substream *substream,
config.pair = pair->index;
config.channel_num = channels;
- config.inclk = INCLK_NONE;
- config.outclk = OUTCLK_ASRCK1_CLK;
+ config.inclk = asrc->inclk;
+ config.outclk = asrc->outclk;
if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
config.input_format = params_format(params);
@@ -1067,6 +1067,20 @@ static int fsl_asrc_probe(struct platform_device *pdev)
asrc->channel_avail = 10;
+ ret = of_property_read_u32(np, "fsl,asrc-input-clock",
+ &asrc->inclk);
+ if (ret) {
+ dev_info(&pdev->dev, "no input clock specified, using none\n");
+ asrc->inclk = INCLK_NONE;
+ }
+
+ ret = of_property_read_u32(np, "fsl,asrc-output-clock",
+ &asrc->outclk);
+ if (ret) {
+ dev_info(&pdev->dev, "no output clock specified, using default\n");
+ asrc->outclk = OUTCLK_ASRCK1_CLK;
+ }
+
ret = of_property_read_u32(np, "fsl,asrc-rate",
&asrc->asrc_rate);
if (ret) {
diff --git a/sound/soc/fsl/fsl_asrc_common.h b/sound/soc/fsl/fsl_asrc_common.h
index 7e1c13ca37f1..1468878fbaca 100644
--- a/sound/soc/fsl/fsl_asrc_common.h
+++ b/sound/soc/fsl/fsl_asrc_common.h
@@ -89,6 +89,9 @@ struct fsl_asrc {
struct fsl_asrc_pair *pair[PAIR_CTX_NUM];
unsigned int channel_avail;
+ enum asrc_inclk inclk;
+ enum asrc_outclk outclk;
+
int asrc_rate;
snd_pcm_format_t asrc_format;
bool use_edma;
--
2.27.0
^ permalink raw reply related
* [PATCH 3/4] ASoC: fsl_asrc: always use ratio for conversion
From: Arnaud Ferraris @ 2020-07-02 14:22 UTC (permalink / raw)
Cc: alsa-devel, devicetree, Arnaud Ferraris, linuxppc-dev, Timur Tabi,
Xiubo Li, linux-kernel, Takashi Iwai, Liam Girdwood, Rob Herring,
Jaroslav Kysela, Nicolin Chen, Mark Brown, kernel, Fabio Estevam
In-Reply-To: <20200702142235.235869-1-arnaud.ferraris@collabora.com>
Even when not in "Ideal Ratio" mode, ASRC can use an internally measured
ratio, which greatly improves the conversion quality.
This patch ensures we always use at least the internal ratio.
Signed-off-by: Arnaud Ferraris <arnaud.ferraris@collabora.com>
---
sound/soc/fsl/fsl_asrc.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/sound/soc/fsl/fsl_asrc.c b/sound/soc/fsl/fsl_asrc.c
index 75df220e4b51..65e7307a3df0 100644
--- a/sound/soc/fsl/fsl_asrc.c
+++ b/sound/soc/fsl/fsl_asrc.c
@@ -451,7 +451,7 @@ static int fsl_asrc_config_pair(struct fsl_asrc_pair *pair, bool use_ideal_rate)
regmap_update_bits(asrc->regmap, REG_ASRCTR,
ASRCTR_ATSi_MASK(index), ASRCTR_ATS(index));
regmap_update_bits(asrc->regmap, REG_ASRCTR,
- ASRCTR_USRi_MASK(index), 0);
+ ASRCTR_USRi_MASK(index), ASRCTR_USR(index));
/* Set the input and output clock sources */
regmap_update_bits(asrc->regmap, REG_ASRCSR,
@@ -493,8 +493,7 @@ static int fsl_asrc_config_pair(struct fsl_asrc_pair *pair, bool use_ideal_rate)
/* Enable Ideal Ratio mode */
regmap_update_bits(asrc->regmap, REG_ASRCTR,
- ASRCTR_IDRi_MASK(index) | ASRCTR_USRi_MASK(index),
- ASRCTR_IDR(index) | ASRCTR_USR(index));
+ ASRCTR_IDRi_MASK(index), ASRCTR_IDR(index));
fsl_asrc_sel_proc(inrate, outrate, &pre_proc, &post_proc);
--
2.27.0
^ permalink raw reply related
* [PATCH 4/4] ASoC: fsl_asrc: swap input and output clocks in capture mode
From: Arnaud Ferraris @ 2020-07-02 14:22 UTC (permalink / raw)
Cc: alsa-devel, devicetree, Arnaud Ferraris, linuxppc-dev, Timur Tabi,
Xiubo Li, linux-kernel, Takashi Iwai, Liam Girdwood, Rob Herring,
Jaroslav Kysela, Nicolin Chen, Mark Brown, kernel, Fabio Estevam
In-Reply-To: <20200702142235.235869-1-arnaud.ferraris@collabora.com>
The input clock is the reference clock we need to convert the stream to,
which therefore has to be the clock of the origin stream/device.
When the stream is bi-directional and we want ASRC to act on both
directions, we need to swap the input and output clocks between the
playback and capture streams.
As some of the clocks have different ID's depending on whether they are
used as input or output, this requires adding a new function to find the
output clock ID corresponding to a given input clock.
Signed-off-by: Arnaud Ferraris <arnaud.ferraris@collabora.com>
---
sound/soc/fsl/fsl_asrc.c | 50 ++++++++++++++++++++++++++++++++++++++--
1 file changed, 48 insertions(+), 2 deletions(-)
diff --git a/sound/soc/fsl/fsl_asrc.c b/sound/soc/fsl/fsl_asrc.c
index 65e7307a3df0..5aeab1fbcdd9 100644
--- a/sound/soc/fsl/fsl_asrc.c
+++ b/sound/soc/fsl/fsl_asrc.c
@@ -506,6 +506,50 @@ static int fsl_asrc_config_pair(struct fsl_asrc_pair *pair, bool use_ideal_rate)
return fsl_asrc_set_ideal_ratio(pair, inrate, outrate);
}
+/**
+ * Select the output clock corresponding to a given input clock (and vice-versa)
+ *
+ * If we want to setup a capture channel, the input and output clocks have to
+ * be swapped.
+ * However, even if most of the clocks have the same index when used as input
+ * or output, some of them (ESAI, SSI* and SPDIF) are different:
+ * - the TX output clock has the index of the corresponding RX input clock
+ * - the RX output clock has the index of the corresponding TX input clock
+ *
+ * This function makes sure that we use the proper clock index when swapping
+ * the input and output clocks.
+ */
+static enum asrc_outclk fsl_asrc_get_capture_clock(enum asrc_inclk inclk)
+{
+ enum asrc_outclk outclk;
+
+ switch (inclk) {
+ case INCLK_ESAI_RX:
+ case INCLK_SSI1_RX:
+ case INCLK_SSI2_RX:
+ case INCLK_SPDIF_RX:
+ outclk = inclk + 0x8;
+ break;
+ case INCLK_SSI3_RX:
+ outclk = OUTCLK_SSI3_RX;
+ break;
+ case INCLK_ESAI_TX:
+ case INCLK_SSI1_TX:
+ case INCLK_SSI2_TX:
+ case INCLK_SPDIF_TX:
+ outclk = inclk - 0x8;
+ break;
+ case INCLK_SSI3_TX:
+ outclk = OUTCLK_SSI3_TX;
+ break;
+ default:
+ outclk = inclk;
+ break;
+ }
+
+ return outclk;
+}
+
/**
* Start the assigned ASRC pair
*
@@ -604,15 +648,17 @@ static int fsl_asrc_dai_hw_params(struct snd_pcm_substream *substream,
config.pair = pair->index;
config.channel_num = channels;
- config.inclk = asrc->inclk;
- config.outclk = asrc->outclk;
if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
+ config.inclk = asrc->inclk;
+ config.outclk = asrc->outclk;
config.input_format = params_format(params);
config.output_format = asrc->asrc_format;
config.input_sample_rate = rate;
config.output_sample_rate = asrc->asrc_rate;
} else {
+ config.inclk = fsl_asrc_get_capture_clock(asrc->outclk);
+ config.outclk = fsl_asrc_get_capture_clock(asrc->inclk);
config.input_format = asrc->asrc_format;
config.output_format = params_format(params);
config.input_sample_rate = asrc->asrc_rate;
--
2.27.0
^ permalink raw reply related
* Re: [PATCH 09/20] bcache: stop setting ->queuedata
From: Coly Li @ 2020-07-02 14:47 UTC (permalink / raw)
To: Christoph Hellwig, Jens Axboe
Cc: linux-bcache, linux-xtensa, linux-nvdimm, linux-s390, dm-devel,
linux-nvme, linux-kernel, linux-raid, linux-m68k, linuxppc-dev,
drbd-dev
In-Reply-To: <20200701085947.3354405-10-hch@lst.de>
On 2020/7/1 16:59, Christoph Hellwig wrote:
> Nothing in bcache actually uses the ->queuedata field.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Coly Li <colyli@suse.de>
> ---
> drivers/md/bcache/super.c | 1 -
> 1 file changed, 1 deletion(-)
>
> diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
> index 2014016f9a60d3..21aa168113d30b 100644
> --- a/drivers/md/bcache/super.c
> +++ b/drivers/md/bcache/super.c
> @@ -876,7 +876,6 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
> return -ENOMEM;
>
> d->disk->queue = q;
> - q->queuedata = d;
> q->backing_dev_info->congested_data = d;
> q->limits.max_hw_sectors = UINT_MAX;
> q->limits.max_sectors = UINT_MAX;
>
^ permalink raw reply
* Re: [PATCH 16/20] block: move ->make_request_fn to struct block_device_operations
From: Coly Li @ 2020-07-02 14:48 UTC (permalink / raw)
To: Christoph Hellwig, Jens Axboe
Cc: linux-bcache, linux-xtensa, linux-nvdimm, linux-s390, dm-devel,
linux-nvme, linux-kernel, linux-raid, linux-m68k, linuxppc-dev,
drbd-dev
In-Reply-To: <20200701085947.3354405-17-hch@lst.de>
On 2020/7/1 16:59, Christoph Hellwig wrote:
> The make_request_fn is a little weird in that it sits directly in
> struct request_queue instead of an operation vector. Replace it with
> a block_device_operations method called submit_bio (which describes much
> better what it does). Also remove the request_queue argument to it, as
> the queue can be derived pretty trivially from the bio.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
For the bcache part,
Acked-by: Coly Li <colyli@suse.de>
> ---
> Documentation/block/biodoc.rst | 2 +-
> .../block/writeback_cache_control.rst | 2 +-
> arch/m68k/emu/nfblock.c | 5 +-
> arch/xtensa/platforms/iss/simdisk.c | 5 +-
> block/blk-cgroup.c | 2 +-
> block/blk-core.c | 53 +++++++------------
> block/blk-mq.c | 10 ++--
> block/blk.h | 2 -
> drivers/block/brd.c | 5 +-
> drivers/block/drbd/drbd_int.h | 2 +-
> drivers/block/drbd/drbd_main.c | 9 ++--
> drivers/block/drbd/drbd_req.c | 2 +-
> drivers/block/null_blk_main.c | 17 ++++--
> drivers/block/pktcdvd.c | 11 ++--
> drivers/block/ps3vram.c | 15 +++---
> drivers/block/rsxx/dev.c | 7 ++-
> drivers/block/umem.c | 5 +-
> drivers/block/zram/zram_drv.c | 11 ++--
> drivers/lightnvm/core.c | 8 +--
> drivers/lightnvm/pblk-init.c | 12 +++--
> drivers/md/bcache/request.c | 4 +-
> drivers/md/bcache/request.h | 4 +-
> drivers/md/bcache/super.c | 23 +++++---
> drivers/md/dm.c | 23 ++++----
> drivers/md/md.c | 5 +-
> drivers/nvdimm/blk.c | 5 +-
> drivers/nvdimm/btt.c | 5 +-
> drivers/nvdimm/pmem.c | 5 +-
> drivers/nvme/host/core.c | 1 +
> drivers/nvme/host/multipath.c | 5 +-
> drivers/nvme/host/nvme.h | 1 +
> drivers/s390/block/dcssblk.c | 9 ++--
> drivers/s390/block/xpram.c | 6 +--
> include/linux/blk-mq.h | 2 +-
> include/linux/blkdev.h | 7 +--
> include/linux/lightnvm.h | 3 +-
> 36 files changed, 153 insertions(+), 140 deletions(-)
>
[snipped]
> diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
> index 7acf024e99f351..fc5702b10074d6 100644
> --- a/drivers/md/bcache/request.c
> +++ b/drivers/md/bcache/request.c
> @@ -1158,7 +1158,7 @@ static void quit_max_writeback_rate(struct cache_set *c,
>
> /* Cached devices - read & write stuff */
>
> -blk_qc_t cached_dev_make_request(struct request_queue *q, struct bio *bio)
> +blk_qc_t cached_dev_submit_bio(struct bio *bio)
> {
> struct search *s;
> struct bcache_device *d = bio->bi_disk->private_data;
> @@ -1291,7 +1291,7 @@ static void flash_dev_nodata(struct closure *cl)
> continue_at(cl, search_free, NULL);
> }
>
> -blk_qc_t flash_dev_make_request(struct request_queue *q, struct bio *bio)
> +blk_qc_t flash_dev_submit_bio(struct bio *bio)
> {
> struct search *s;
> struct closure *cl;
> diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h
> index bb005c93dd7218..82b38366a95deb 100644
> --- a/drivers/md/bcache/request.h
> +++ b/drivers/md/bcache/request.h
> @@ -37,10 +37,10 @@ unsigned int bch_get_congested(const struct cache_set *c);
> void bch_data_insert(struct closure *cl);
>
> void bch_cached_dev_request_init(struct cached_dev *dc);
> -blk_qc_t cached_dev_make_request(struct request_queue *q, struct bio *bio);
> +blk_qc_t cached_dev_submit_bio(struct bio *bio);
>
> void bch_flash_dev_request_init(struct bcache_device *d);
> -blk_qc_t flash_dev_make_request(struct request_queue *q, struct bio *bio);
> +blk_qc_t flash_dev_submit_bio(struct bio *bio);
>
> extern struct kmem_cache *bch_search_cache;
>
> diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
> index 21aa168113d30b..de13f6e916966d 100644
> --- a/drivers/md/bcache/super.c
> +++ b/drivers/md/bcache/super.c
> @@ -680,7 +680,16 @@ static int ioctl_dev(struct block_device *b, fmode_t mode,
> return d->ioctl(d, mode, cmd, arg);
> }
>
> -static const struct block_device_operations bcache_ops = {
> +static const struct block_device_operations bcache_cached_ops = {
> + .submit_bio = cached_dev_submit_bio,
> + .open = open_dev,
> + .release = release_dev,
> + .ioctl = ioctl_dev,
> + .owner = THIS_MODULE,
> +};
> +
> +static const struct block_device_operations bcache_flash_ops = {
> + .submit_bio = flash_dev_submit_bio,
> .open = open_dev,
> .release = release_dev,
> .ioctl = ioctl_dev,
> @@ -820,8 +829,8 @@ static void bcache_device_free(struct bcache_device *d)
> }
>
> static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
> - sector_t sectors, make_request_fn make_request_fn,
> - struct block_device *cached_bdev)
> + sector_t sectors, struct block_device *cached_bdev,
> + const struct block_device_operations *ops)
> {
> struct request_queue *q;
> const size_t max_stripes = min_t(size_t, INT_MAX,
> @@ -868,10 +877,10 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
>
> d->disk->major = bcache_major;
> d->disk->first_minor = idx_to_first_minor(idx);
> - d->disk->fops = &bcache_ops;
> + d->disk->fops = ops;
> d->disk->private_data = d;
>
> - q = blk_alloc_queue(make_request_fn, NUMA_NO_NODE);
> + q = blk_alloc_queue(NUMA_NO_NODE);
> if (!q)
> return -ENOMEM;
>
> @@ -1355,7 +1364,7 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
>
> ret = bcache_device_init(&dc->disk, block_size,
> dc->bdev->bd_part->nr_sects - dc->sb.data_offset,
> - cached_dev_make_request, dc->bdev);
> + dc->bdev, &bcache_cached_ops);
> if (ret)
> return ret;
>
> @@ -1468,7 +1477,7 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u)
> kobject_init(&d->kobj, &bch_flash_dev_ktype);
>
> if (bcache_device_init(d, block_bytes(c), u->sectors,
> - flash_dev_make_request, NULL))
> + NULL, &bcache_flash_ops))
> goto err;
>
> bcache_device_attach(d, c, u - c->uuids);
^ permalink raw reply
* Re: [PATCH 17/20] block: rename generic_make_request to submit_bio_noacct
From: Coly Li @ 2020-07-02 14:51 UTC (permalink / raw)
To: Christoph Hellwig, Jens Axboe
Cc: linux-bcache, linux-xtensa, linux-nvdimm, linux-s390, dm-devel,
linux-nvme, linux-kernel, linux-raid, linux-m68k, linuxppc-dev,
drbd-dev
In-Reply-To: <20200701085947.3354405-18-hch@lst.de>
On 2020/7/1 16:59, Christoph Hellwig wrote:
> generic_make_request has always been very confusingly misnamed, so rename
> it to submit_bio_noacct to make it clear that it is submit_bio minus
> accounting and a few checks.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
I will miss generic_make_request(). Anyway, if it is decided, for bcache
part,
Acked-by: Coly Li <colyli@suse.de>
> ---
> Documentation/block/biodoc.rst | 2 +-
> .../fault-injection/fault-injection.rst | 2 +-
> Documentation/trace/ftrace.rst | 4 +--
> block/bio.c | 14 ++++----
> block/blk-core.c | 32 +++++++++----------
> block/blk-crypto-fallback.c | 2 +-
> block/blk-crypto.c | 2 +-
> block/blk-merge.c | 2 +-
> block/blk-throttle.c | 4 +--
> block/bounce.c | 2 +-
> drivers/block/drbd/drbd_int.h | 6 ++--
> drivers/block/drbd/drbd_main.c | 2 +-
> drivers/block/drbd/drbd_receiver.c | 2 +-
> drivers/block/drbd/drbd_req.c | 2 +-
> drivers/block/drbd/drbd_worker.c | 2 +-
> drivers/block/pktcdvd.c | 2 +-
> drivers/lightnvm/pblk-read.c | 2 +-
> drivers/md/bcache/bcache.h | 2 +-
> drivers/md/bcache/btree.c | 2 +-
> drivers/md/bcache/request.c | 7 ++--
> drivers/md/dm-cache-target.c | 6 ++--
> drivers/md/dm-clone-target.c | 10 +++---
> drivers/md/dm-crypt.c | 6 ++--
> drivers/md/dm-delay.c | 2 +-
> drivers/md/dm-era-target.c | 2 +-
> drivers/md/dm-integrity.c | 4 +--
> drivers/md/dm-mpath.c | 2 +-
> drivers/md/dm-raid1.c | 2 +-
> drivers/md/dm-snap-persistent.c | 2 +-
> drivers/md/dm-snap.c | 6 ++--
> drivers/md/dm-thin.c | 4 +--
> drivers/md/dm-verity-target.c | 2 +-
> drivers/md/dm-writecache.c | 2 +-
> drivers/md/dm-zoned-target.c | 2 +-
> drivers/md/dm.c | 10 +++---
> drivers/md/md-faulty.c | 4 +--
> drivers/md/md-linear.c | 4 +--
> drivers/md/md-multipath.c | 4 +--
> drivers/md/raid0.c | 8 ++---
> drivers/md/raid1.c | 14 ++++----
> drivers/md/raid10.c | 28 ++++++++--------
> drivers/md/raid5.c | 10 +++---
> drivers/nvme/host/multipath.c | 2 +-
> include/linux/blkdev.h | 2 +-
> 44 files changed, 115 insertions(+), 118 deletions(-)
>
[snipped]
> diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
> index 140927ebf41e9a..c28537a489bc10 100644
> --- a/drivers/lightnvm/pblk-read.c
> +++ b/drivers/lightnvm/pblk-read.c
> @@ -320,7 +320,7 @@ void pblk_submit_read(struct pblk *pblk, struct bio *bio)
> split_bio = bio_split(bio, nr_secs * NR_PHY_IN_LOG, GFP_KERNEL,
> &pblk_bio_set);
> bio_chain(split_bio, bio);
> - generic_make_request(bio);
> + submit_bio_noacct(bio);
>
> /* New bio contains first N sectors of the previous one, so
> * we can continue to use existing rqd, but we need to shrink
> diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
> index 221e0191b6870f..3c708e8b5e2d34 100644
> --- a/drivers/md/bcache/bcache.h
> +++ b/drivers/md/bcache/bcache.h
> @@ -929,7 +929,7 @@ static inline void closure_bio_submit(struct cache_set *c,
> bio_endio(bio);
> return;
> }
> - generic_make_request(bio);
> + submit_bio_noacct(bio);
> }
>
> /*
> diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
> index 6548a601edf0e4..d5c51e33204679 100644
> --- a/drivers/md/bcache/btree.c
> +++ b/drivers/md/bcache/btree.c
> @@ -959,7 +959,7 @@ static struct btree *mca_alloc(struct cache_set *c, struct btree_op *op,
> * bch_btree_node_get - find a btree node in the cache and lock it, reading it
> * in from disk if necessary.
> *
> - * If IO is necessary and running under generic_make_request, returns -EAGAIN.
> + * If IO is necessary and running under submit_bio_noacct, returns -EAGAIN.
> *
> * The btree node will have either a read or a write lock held, depending on
> * level and op->lock.
> diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
> index fc5702b10074d6..dd012ebface012 100644
> --- a/drivers/md/bcache/request.c
> +++ b/drivers/md/bcache/request.c
> @@ -1115,7 +1115,7 @@ static void detached_dev_do_request(struct bcache_device *d, struct bio *bio)
> !blk_queue_discard(bdev_get_queue(dc->bdev)))
> bio->bi_end_io(bio);
> else
> - generic_make_request(bio);
> + submit_bio_noacct(bio);
> }
>
> static void quit_max_writeback_rate(struct cache_set *c,
> @@ -1197,7 +1197,7 @@ blk_qc_t cached_dev_submit_bio(struct bio *bio)
> if (!bio->bi_iter.bi_size) {
> /*
> * can't call bch_journal_meta from under
> - * generic_make_request
> + * submit_bio_noacct
> */
> continue_at_nobarrier(&s->cl,
> cached_dev_nodata,
> @@ -1311,8 +1311,7 @@ blk_qc_t flash_dev_submit_bio(struct bio *bio)
>
> if (!bio->bi_iter.bi_size) {
> /*
> - * can't call bch_journal_meta from under
> - * generic_make_request
> + * can't call bch_journal_meta from under submit_bio_noacct
> */
> continue_at_nobarrier(&s->cl,
> flash_dev_nodata,
>
^ permalink raw reply
* Re: [PATCH 1/2] dt-bindings: sound: fsl-asoc-card: add new compatible for I2S slave
From: Arnaud Ferraris @ 2020-07-02 15:28 UTC (permalink / raw)
To: Mark Brown
Cc: devicetree, alsa-devel, linuxppc-dev, Timur Tabi, Xiubo Li,
linux-kernel, Takashi Iwai, Liam Girdwood, Jaroslav Kysela,
Nicolin Chen, Rob Herring, kernel, Fabio Estevam
In-Reply-To: <20200702143145.GG4483@sirena.org.uk>
Hi Mark,
Le 02/07/2020 à 16:31, Mark Brown a écrit :
> On Thu, Jul 02, 2020 at 04:11:14PM +0200, Arnaud Ferraris wrote:
>> fsl-asoc-card currently doesn't support generic codecs with the SoC
>> acting as I2S slave.
>>
>> This commit adds a new `fsl,imx-audio-i2s-slave` for this use-case, as
>> well as the following mandatory properties:
>
> Why require that the CODEC be clock master here - why not make this
> configurable, reusing the properties from the generic and audio graph
> cards?
This is partly because I'm not sure how to do it (yet), but mostly
because I don't have the hardware to test this (the 2 CODECs present on
my only i.MX6 board are both clock master)
Regards,
Arnaud
^ permalink raw reply
* [PATCH] powerpc/powernv: machine check handler for POWER10
From: Nicholas Piggin @ 2020-07-02 23:33 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Mahesh Salgaonkar, Nicholas Piggin
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/mce.h | 1 +
arch/powerpc/kernel/dt_cpu_ftrs.c | 10 ++++
arch/powerpc/kernel/mce.c | 1 +
arch/powerpc/kernel/mce_power.c | 84 +++++++++++++++++++++++++++++++
4 files changed, 96 insertions(+)
diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index 376a395daf32..30b5b03fb11d 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -86,6 +86,7 @@ enum MCE_TlbErrorType {
enum MCE_UserErrorType {
MCE_USER_ERROR_INDETERMINATE = 0,
MCE_USER_ERROR_TLBIE = 1,
+ MCE_USER_ERROR_SCV = 2,
};
enum MCE_RaErrorType {
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 3a409517c031..12cec6919a1a 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -67,6 +67,7 @@ struct dt_cpu_feature {
extern long __machine_check_early_realmode_p8(struct pt_regs *regs);
extern long __machine_check_early_realmode_p9(struct pt_regs *regs);
+extern long __machine_check_early_realmode_p10(struct pt_regs *regs);
static int hv_mode;
@@ -450,6 +451,14 @@ static int __init feat_enable_pmu_power9(struct dt_cpu_feature *f)
return 1;
}
+static int __init feat_enable_mce_power10(struct dt_cpu_feature *f)
+{
+ cur_cpu_spec->platform = "power10";
+ cur_cpu_spec->machine_check_early = __machine_check_early_realmode_p10;
+
+ return 1;
+}
+
static int __init feat_enable_tm(struct dt_cpu_feature *f)
{
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -638,6 +647,7 @@ static struct dt_cpu_feature_match __initdata
{"group-start-register", feat_enable, 0},
{"pc-relative-addressing", feat_enable, 0},
{"machine-check-power9", feat_enable_mce_power9, 0},
+ {"machine-check-power10", feat_enable_mce_power10, 0},
{"performance-monitor-power9", feat_enable_pmu_power9, 0},
{"event-based-branch-v3", feat_enable, 0},
{"random-number-generator", feat_enable, 0},
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index fd90c0eda229..4f96f3c24797 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -370,6 +370,7 @@ void machine_check_print_event_info(struct machine_check_event *evt,
static const char *mc_user_types[] = {
"Indeterminate",
"tlbie(l) invalid",
+ "scv invalid",
};
static const char *mc_ra_types[] = {
"Indeterminate",
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index c3b522bff9b4..b7e173754a2e 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -243,6 +243,45 @@ static const struct mce_ierror_table mce_p9_ierror_table[] = {
MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0, 0, 0, 0, 0, 0, 0 } };
+static const struct mce_ierror_table mce_p10_ierror_table[] = {
+{ 0x00000000081c0000, 0x0000000000040000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x0000000000080000, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x00000000000c0000, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0x00000000081c0000, 0x0000000000100000, true,
+ MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0x00000000081c0000, 0x0000000000140000, true,
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0x00000000081c0000, 0x0000000000180000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x00000000001c0000, true,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH_FOREIGN, MCE_ECLASS_SOFTWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x0000000008080000, true,
+ MCE_ERROR_TYPE_USER,MCE_USER_ERROR_SCV, MCE_ECLASS_SOFTWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0x00000000081c0000, 0x00000000080c0000, true,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH, MCE_ECLASS_SOFTWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x0000000008100000, true,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_SOFTWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x0000000008140000, false,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_STORE, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_FATAL, false }, /* ASYNC is fatal */
+{ 0x00000000081c0000, 0x00000000081c0000, true, MCE_ECLASS_HARDWARE,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0, 0, 0, 0, 0, 0, 0 } };
+
struct mce_derror_table {
unsigned long dsisr_value;
bool dar_valid; /* dar is a valid indicator of faulting address */
@@ -361,6 +400,46 @@ static const struct mce_derror_table mce_p9_derror_table[] = {
MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0, false, 0, 0, 0, 0, 0 } };
+static const struct mce_derror_table mce_p10_derror_table[] = {
+{ 0x00008000, false,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00004000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000800, true,
+ MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0x00000400, true,
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0x00000200, false,
+ MCE_ERROR_TYPE_USER, MCE_USER_ERROR_TLBIE, MCE_ECLASS_SOFTWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0x00000080, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */
+ MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0x00000100, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000040, true,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000020, false,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000010, false,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN,
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000008, false,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD_STORE_FOREIGN, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0, false, 0, 0, 0, 0, 0 } };
+
static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr,
uint64_t *phys_addr)
{
@@ -657,3 +736,8 @@ long __machine_check_early_realmode_p9(struct pt_regs *regs)
return mce_handle_error(regs, mce_p9_derror_table, mce_p9_ierror_table);
}
+
+long __machine_check_early_realmode_p10(struct pt_regs *regs)
+{
+ return mce_handle_error(regs, mce_p10_derror_table, mce_p10_ierror_table);
+}
--
2.23.0
^ permalink raw reply related
* Re: [PATCH v2 4/4] mm/vmalloc: Hugepage vmalloc mappings
From: Nicholas Piggin @ 2020-07-03 0:15 UTC (permalink / raw)
To: linux-mm, Zefan Li
Cc: linux-arch, Will Deacon, Catalin Marinas, x86, linux-kernel,
Ingo Molnar, Borislav, Petkov, H. Peter Anvin, Thomas, Gleixner,
linuxppc-dev, linux-arm-kernel
In-Reply-To: <d148f86c-b27b-63fb-31d2-35b8f52ec540@huawei.com>
Excerpts from Zefan Li's message of July 1, 2020 5:10 pm:
>> static void *__vmalloc_node(unsigned long size, unsigned long align,
>> - gfp_t gfp_mask, pgprot_t prot,
>> - int node, const void *caller);
>> + gfp_t gfp_mask, pgprot_t prot, unsigned long vm_flags,
>> + int node, const void *caller);
>> static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
>> - pgprot_t prot, int node)
>> + pgprot_t prot, unsigned int page_shift,
>> + int node)
>> {
>> struct page **pages;
>> + unsigned long addr = (unsigned long)area->addr;
>> + unsigned long size = get_vm_area_size(area);
>> + unsigned int page_order = page_shift - PAGE_SHIFT;
>> unsigned int nr_pages, array_size, i;
>> const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
>> const gfp_t alloc_mask = gfp_mask | __GFP_NOWARN;
>> const gfp_t highmem_mask = (gfp_mask & (GFP_DMA | GFP_DMA32)) ?
>> - 0 :
>> - __GFP_HIGHMEM;
>> + 0 : __GFP_HIGHMEM;
>>
>> - nr_pages = get_vm_area_size(area) >> PAGE_SHIFT;
>> + nr_pages = size >> page_shift;
>
> while try out this patchset, we encountered a BUG_ON in account_kernel_stack()
> in kernel/fork.c.
>
> BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);
>
> which obviously should be updated accordingly.
Thanks for finding that. We may have to change this around a bit so
nr_pages still appears to be in PAGE_SIZE units for anybody looking.
Thanks,
Nick
^ permalink raw reply
* Re: [PATCH 6/8] powerpc/pseries: implement paravirt qspinlocks for SPLPAR
From: Waiman Long @ 2020-07-03 0:36 UTC (permalink / raw)
To: Nicholas Piggin
Cc: kbuild-all, kernel test robot, Peter Zijlstra, linuxppc-dev,
Boqun Feng, linux-kernel, virtualization, Ingo Molnar,
Will Deacon
In-Reply-To: <202007030059.nT5quxzB%lkp@intel.com>
On 7/2/20 12:15 PM, kernel test robot wrote:
> Hi Nicholas,
>
> I love your patch! Yet something to improve:
>
> [auto build test ERROR on powerpc/next]
> [also build test ERROR on tip/locking/core v5.8-rc3 next-20200702]
> [If your patch is applied to the wrong git tree, kindly drop us a note.
> And when submitting patch, we suggest to use as documented in
> https://git-scm.com/docs/git-format-patch]
>
> url: https://github.com/0day-ci/linux/commits/Nicholas-Piggin/powerpc-queued-spinlocks-and-rwlocks/20200702-155158
> base: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
> config: powerpc-allyesconfig (attached as .config)
> compiler: powerpc64-linux-gcc (GCC) 9.3.0
> reproduce (this is a W=1 build):
> wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
> chmod +x ~/bin/make.cross
> # save the attached .config to linux build tree
> COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross ARCH=powerpc
>
> If you fix the issue, kindly add following tag as appropriate
> Reported-by: kernel test robot <lkp@intel.com>
>
> All errors (new ones prefixed by >>):
>
> kernel/locking/lock_events.c:61:16: warning: no previous prototype for 'lockevent_read' [-Wmissing-prototypes]
> 61 | ssize_t __weak lockevent_read(struct file *file, char __user *user_buf,
> | ^~~~~~~~~~~~~~
> kernel/locking/lock_events.c: In function 'skip_lockevent':
>>> kernel/locking/lock_events.c:126:12: error: implicit declaration of function 'pv_is_native_spin_unlock' [-Werror=implicit-function-declaration]
> 126 | pv_on = !pv_is_native_spin_unlock();
> | ^~~~~~~~~~~~~~~~~~~~~~~~
> cc1: some warnings being treated as errors
>
> vim +/pv_is_native_spin_unlock +126 kernel/locking/lock_events.c
I think you will need to add the following into
arch/powerpc/include/asm/qspinlock_paravirt.h:
static inline pv_is_native_spin_unlock(void)
{
return !is_shared_processor();
}
Cheers,
Longman
^ permalink raw reply
* [Bug 208181] BUG: KASAN: stack-out-of-bounds in strcmp+0x58/0xd8
From: bugzilla-daemon @ 2020-07-03 1:12 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <bug-208181-206035@https.bugzilla.kernel.org/>
https://bugzilla.kernel.org/show_bug.cgi?id=208181
--- Comment #14 from Erhard F. (erhard_f@mailbox.org) ---
Ah, I've overlooked that...
To set CONFIG_DATA_SHIFT=25 I needed to set ADVANCED_OPTIONS=y,
DATA_SHIFT_BOOL=y first.
But with CONFIG_DATA_SHIFT=25 this kernel won't boot at all. OpenFirmware shows
for a brief moment that the kernel gets loaded and then I get dropped back to
OF console.
OpenFirmware tells me:
Invalid memory access at %SRR0: a0000000 %SRR1: 00000000
--
You are receiving this mail because:
You are watching the assignee of the bug.
^ permalink raw reply
* [PATCH v2 0/3] remove PROT_SAO support and disable
From: Nicholas Piggin @ 2020-07-03 1:19 UTC (permalink / raw)
To: linuxppc-dev; +Cc: linux-mm, kvm-ppc, Nicholas Piggin, linux-api
It was suggested that I post this to a wider audience on account of
the change to supported userspace features in patch 2 particularly.
Thanks,
Nick
Nicholas Piggin (3):
powerpc: remove stale calc_vm_prot_bits comment
powerpc/64s: remove PROT_SAO support
powerpc/64s/hash: disable subpage_prot syscall by default
arch/powerpc/Kconfig | 7 +++-
arch/powerpc/configs/powernv_defconfig | 1 -
arch/powerpc/configs/pseries_defconfig | 1 -
arch/powerpc/include/asm/book3s/64/pgtable.h | 8 ++--
arch/powerpc/include/asm/cputable.h | 10 ++---
arch/powerpc/include/asm/kvm_book3s_64.h | 5 ++-
arch/powerpc/include/asm/mman.h | 30 ++-----------
arch/powerpc/include/asm/nohash/64/pgtable.h | 2 -
arch/powerpc/include/uapi/asm/mman.h | 2 +-
arch/powerpc/kernel/dt_cpu_ftrs.c | 2 +-
arch/powerpc/mm/book3s64/hash_utils.c | 2 -
include/linux/mm.h | 2 -
include/trace/events/mmflags.h | 2 -
mm/ksm.c | 4 --
tools/testing/selftests/powerpc/mm/.gitignore | 1 -
tools/testing/selftests/powerpc/mm/Makefile | 4 +-
tools/testing/selftests/powerpc/mm/prot_sao.c | 42 -------------------
17 files changed, 25 insertions(+), 100 deletions(-)
delete mode 100644 tools/testing/selftests/powerpc/mm/prot_sao.c
--
2.23.0
^ permalink raw reply
* [PATCH v2 1/3] powerpc: remove stale calc_vm_prot_bits comment
From: Nicholas Piggin @ 2020-07-03 1:19 UTC (permalink / raw)
To: linuxppc-dev; +Cc: linux-mm, kvm-ppc, Nicholas Piggin, linux-api
In-Reply-To: <20200703011958.1166620-1-npiggin@gmail.com>
This comment is wrong, we wouldn't use calc_vm_prot_bits here because
we are being called by calc_vm_prot_bits to modify its behaviour.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/mman.h | 4 ----
1 file changed, 4 deletions(-)
diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h
index d610c2e07b28..4ba303ea27f5 100644
--- a/arch/powerpc/include/asm/mman.h
+++ b/arch/powerpc/include/asm/mman.h
@@ -13,10 +13,6 @@
#include <linux/pkeys.h>
#include <asm/cpu_has_feature.h>
-/*
- * This file is included by linux/mman.h, so we can't use cacl_vm_prot_bits()
- * here. How important is the optimization?
- */
static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
unsigned long pkey)
{
--
2.23.0
^ permalink raw reply related
* [PATCH v2 2/3] powerpc/64s: remove PROT_SAO support
From: Nicholas Piggin @ 2020-07-03 1:19 UTC (permalink / raw)
To: linuxppc-dev; +Cc: linux-mm, kvm-ppc, Nicholas Piggin, linux-api
In-Reply-To: <20200703011958.1166620-1-npiggin@gmail.com>
ISA v3.1 does not support the SAO storage control attribute required to
implement PROT_SAO. PROT_SAO was used by specialised system software
(Lx86) that has been discontinued for about 7 years, and is not thought
to be used elsewhere, so removal should not cause problems.
We rather remove it than keep support for older processors, because
live migrating guest partitions to newer processors may not be possible
if SAO is in use (or worse allowed with silent races).
- PROT_SAO stays in the uapi header so code using it would still build.
- arch_validate_prot() is removed, the generic version rejects PROT_SAO
so applications would get a failure at mmap() time.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/book3s/64/pgtable.h | 8 ++--
arch/powerpc/include/asm/cputable.h | 10 ++---
arch/powerpc/include/asm/kvm_book3s_64.h | 5 ++-
arch/powerpc/include/asm/mman.h | 26 ++----------
arch/powerpc/include/asm/nohash/64/pgtable.h | 2 -
arch/powerpc/include/uapi/asm/mman.h | 2 +-
arch/powerpc/kernel/dt_cpu_ftrs.c | 2 +-
arch/powerpc/mm/book3s64/hash_utils.c | 2 -
include/linux/mm.h | 2 -
include/trace/events/mmflags.h | 2 -
mm/ksm.c | 4 --
tools/testing/selftests/powerpc/mm/.gitignore | 1 -
tools/testing/selftests/powerpc/mm/Makefile | 4 +-
tools/testing/selftests/powerpc/mm/prot_sao.c | 42 -------------------
14 files changed, 20 insertions(+), 92 deletions(-)
delete mode 100644 tools/testing/selftests/powerpc/mm/prot_sao.c
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 25c3cb8272c0..8e9aca96143b 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -20,9 +20,13 @@
#define _PAGE_RW (_PAGE_READ | _PAGE_WRITE)
#define _PAGE_RWX (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)
#define _PAGE_PRIVILEGED 0x00008 /* kernel access only */
-#define _PAGE_SAO 0x00010 /* Strong access order */
+
+#define _PAGE_CACHE_CTL 0x00030 /* Bits for the folowing cache modes */
+ /* No bits set is normal cacheable memory */
+ /* 0x00010 unused, is SAO bit on radix POWER9 */
#define _PAGE_NON_IDEMPOTENT 0x00020 /* non idempotent memory */
#define _PAGE_TOLERANT 0x00030 /* tolerant memory, cache inhibited */
+
#define _PAGE_DIRTY 0x00080 /* C: page changed */
#define _PAGE_ACCESSED 0x00100 /* R: page referenced */
/*
@@ -825,8 +829,6 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
return hash__set_pte_at(mm, addr, ptep, pte, percpu);
}
-#define _PAGE_CACHE_CTL (_PAGE_SAO | _PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT)
-
#define pgprot_noncached pgprot_noncached
static inline pgprot_t pgprot_noncached(pgprot_t prot)
{
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index bac2252c839e..87284750535d 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -191,7 +191,7 @@ static inline void cpu_feature_keys_init(void) { }
#define CPU_FTR_SPURR LONG_ASM_CONST(0x0000000001000000)
#define CPU_FTR_DSCR LONG_ASM_CONST(0x0000000002000000)
#define CPU_FTR_VSX LONG_ASM_CONST(0x0000000004000000)
-#define CPU_FTR_SAO LONG_ASM_CONST(0x0000000008000000)
+// Free LONG_ASM_CONST(0x0000000008000000)
#define CPU_FTR_CP_USE_DCBTZ LONG_ASM_CONST(0x0000000010000000)
#define CPU_FTR_UNALIGNED_LD_STD LONG_ASM_CONST(0x0000000020000000)
#define CPU_FTR_ASYM_SMT LONG_ASM_CONST(0x0000000040000000)
@@ -435,7 +435,7 @@ static inline void cpu_feature_keys_init(void) { }
CPU_FTR_MMCRA | CPU_FTR_SMT | \
CPU_FTR_COHERENT_ICACHE | \
CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
- CPU_FTR_DSCR | CPU_FTR_SAO | CPU_FTR_ASYM_SMT | \
+ CPU_FTR_DSCR | CPU_FTR_ASYM_SMT | \
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
CPU_FTR_CFAR | CPU_FTR_HVMODE | \
CPU_FTR_VMX_COPY | CPU_FTR_HAS_PPR | CPU_FTR_DABRX | CPU_FTR_PKEY)
@@ -444,7 +444,7 @@ static inline void cpu_feature_keys_init(void) { }
CPU_FTR_MMCRA | CPU_FTR_SMT | \
CPU_FTR_COHERENT_ICACHE | \
CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
- CPU_FTR_DSCR | CPU_FTR_SAO | \
+ CPU_FTR_DSCR | \
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \
@@ -455,7 +455,7 @@ static inline void cpu_feature_keys_init(void) { }
CPU_FTR_MMCRA | CPU_FTR_SMT | \
CPU_FTR_COHERENT_ICACHE | \
CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
- CPU_FTR_DSCR | CPU_FTR_SAO | \
+ CPU_FTR_DSCR | \
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \
@@ -473,7 +473,7 @@ static inline void cpu_feature_keys_init(void) { }
CPU_FTR_MMCRA | CPU_FTR_SMT | \
CPU_FTR_COHERENT_ICACHE | \
CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
- CPU_FTR_DSCR | CPU_FTR_SAO | \
+ CPU_FTR_DSCR | \
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 9bb9bb370b53..fac39ff659d4 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -398,9 +398,10 @@ static inline bool hpte_cache_flags_ok(unsigned long hptel, bool is_ci)
{
unsigned int wimg = hptel & HPTE_R_WIMG;
- /* Handle SAO */
+ /* Handle SAO for POWER7,8,9 */
if (wimg == (HPTE_R_W | HPTE_R_I | HPTE_R_M) &&
- cpu_has_feature(CPU_FTR_ARCH_206))
+ cpu_has_feature(CPU_FTR_ARCH_206) &&
+ !cpu_has_feature(CPU_FTR_ARCH_31))
wimg = HPTE_R_M;
if (!is_ci)
diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h
index 4ba303ea27f5..7c07728af300 100644
--- a/arch/powerpc/include/asm/mman.h
+++ b/arch/powerpc/include/asm/mman.h
@@ -13,38 +13,20 @@
#include <linux/pkeys.h>
#include <asm/cpu_has_feature.h>
+#ifdef CONFIG_PPC_MEM_KEYS
static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
unsigned long pkey)
{
-#ifdef CONFIG_PPC_MEM_KEYS
- return (((prot & PROT_SAO) ? VM_SAO : 0) | pkey_to_vmflag_bits(pkey));
-#else
- return ((prot & PROT_SAO) ? VM_SAO : 0);
-#endif
+ return pkey_to_vmflag_bits(pkey);
}
#define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey)
static inline pgprot_t arch_vm_get_page_prot(unsigned long vm_flags)
{
-#ifdef CONFIG_PPC_MEM_KEYS
- return (vm_flags & VM_SAO) ?
- __pgprot(_PAGE_SAO | vmflag_to_pte_pkey_bits(vm_flags)) :
- __pgprot(0 | vmflag_to_pte_pkey_bits(vm_flags));
-#else
- return (vm_flags & VM_SAO) ? __pgprot(_PAGE_SAO) : __pgprot(0);
-#endif
+ return __pgprot(vmflag_to_pte_pkey_bits(vm_flags));
}
#define arch_vm_get_page_prot(vm_flags) arch_vm_get_page_prot(vm_flags)
-
-static inline bool arch_validate_prot(unsigned long prot, unsigned long addr)
-{
- if (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC | PROT_SEM | PROT_SAO))
- return false;
- if ((prot & PROT_SAO) && !cpu_has_feature(CPU_FTR_SAO))
- return false;
- return true;
-}
-#define arch_validate_prot arch_validate_prot
+#endif
#endif /* CONFIG_PPC64 */
#endif /* _ASM_POWERPC_MMAN_H */
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index 6cb8aa357191..59ee9fa4ae09 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -82,8 +82,6 @@
*/
#include <asm/nohash/pte-book3e.h>
-#define _PAGE_SAO 0
-
#define PTE_RPN_MASK (~((1UL << PTE_RPN_SHIFT) - 1))
/*
diff --git a/arch/powerpc/include/uapi/asm/mman.h b/arch/powerpc/include/uapi/asm/mman.h
index c0c737215b00..3a700351feca 100644
--- a/arch/powerpc/include/uapi/asm/mman.h
+++ b/arch/powerpc/include/uapi/asm/mman.h
@@ -11,7 +11,7 @@
#include <asm-generic/mman-common.h>
-#define PROT_SAO 0x10 /* Strong Access Ordering */
+#define PROT_SAO 0x10 /* Unsupported since v5.9 */
#define MAP_RENAME MAP_ANONYMOUS /* In SunOS terminology */
#define MAP_NORESERVE 0x40 /* don't reserve swap pages */
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 3a409517c031..41412c198e70 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -622,7 +622,7 @@ static struct dt_cpu_feature_match __initdata
{"processor-control-facility-v3", feat_enable_dbell, CPU_FTR_DBELL},
{"processor-utilization-of-resources-register", feat_enable_purr, 0},
{"no-execute", feat_enable, 0},
- {"strong-access-ordering", feat_enable, CPU_FTR_SAO},
+ /* strong-access-ordering is unused */
{"cache-inhibited-large-page", feat_enable_large_ci, 0},
{"coprocessor-icswx", feat_enable, 0},
{"hypervisor-virtualization-interrupt", feat_enable_hvi, 0},
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index 468169e33c86..e35d8dae4f84 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -232,8 +232,6 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags)
rflags |= HPTE_R_I;
else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_NON_IDEMPOTENT)
rflags |= (HPTE_R_I | HPTE_R_G);
- else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_SAO)
- rflags |= (HPTE_R_W | HPTE_R_I | HPTE_R_M);
else
/*
* Add memory coherence if cache inhibited is not set
diff --git a/include/linux/mm.h b/include/linux/mm.h
index dc7b87310c10..6c8333d6c991 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -317,8 +317,6 @@ extern unsigned int kobjsize(const void *objp);
#if defined(CONFIG_X86)
# define VM_PAT VM_ARCH_1 /* PAT reserves whole VMA at once (x86) */
-#elif defined(CONFIG_PPC)
-# define VM_SAO VM_ARCH_1 /* Strong Access Ordering (powerpc) */
#elif defined(CONFIG_PARISC)
# define VM_GROWSUP VM_ARCH_1
#elif defined(CONFIG_IA64)
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index 5fb752034386..939092dbcb8b 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -114,8 +114,6 @@ IF_HAVE_PG_IDLE(PG_idle, "idle" )
#if defined(CONFIG_X86)
#define __VM_ARCH_SPECIFIC_1 {VM_PAT, "pat" }
-#elif defined(CONFIG_PPC)
-#define __VM_ARCH_SPECIFIC_1 {VM_SAO, "sao" }
#elif defined(CONFIG_PARISC) || defined(CONFIG_IA64)
#define __VM_ARCH_SPECIFIC_1 {VM_GROWSUP, "growsup" }
#elif !defined(CONFIG_MMU)
diff --git a/mm/ksm.c b/mm/ksm.c
index 4102034cd55a..d1cfa18689b5 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -2452,10 +2452,6 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
if (vma_is_dax(vma))
return 0;
-#ifdef VM_SAO
- if (*vm_flags & VM_SAO)
- return 0;
-#endif
#ifdef VM_SPARC_ADI
if (*vm_flags & VM_SPARC_ADI)
return 0;
diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore
index 2ca523255b1b..ff296c94f627 100644
--- a/tools/testing/selftests/powerpc/mm/.gitignore
+++ b/tools/testing/selftests/powerpc/mm/.gitignore
@@ -2,7 +2,6 @@
hugetlb_vs_thp_test
subpage_prot
tempfile
-prot_sao
segv_errors
wild_bctr
large_vm_fork_separation
diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile
index b9103c4bb414..9b8a7b3069c5 100644
--- a/tools/testing/selftests/powerpc/mm/Makefile
+++ b/tools/testing/selftests/powerpc/mm/Makefile
@@ -2,7 +2,7 @@
noarg:
$(MAKE) -C ../
-TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr \
+TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot segv_errors wild_bctr \
large_vm_fork_separation bad_accesses
TEST_GEN_PROGS_EXTENDED := tlbie_test
TEST_GEN_FILES := tempfile
@@ -12,8 +12,6 @@ include ../../lib.mk
$(TEST_GEN_PROGS): ../harness.c
-$(OUTPUT)/prot_sao: ../utils.c
-
$(OUTPUT)/wild_bctr: CFLAGS += -m64
$(OUTPUT)/large_vm_fork_separation: CFLAGS += -m64
$(OUTPUT)/bad_accesses: CFLAGS += -m64
diff --git a/tools/testing/selftests/powerpc/mm/prot_sao.c b/tools/testing/selftests/powerpc/mm/prot_sao.c
deleted file mode 100644
index e2eed65b7735..000000000000
--- a/tools/testing/selftests/powerpc/mm/prot_sao.c
+++ /dev/null
@@ -1,42 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright 2016, Michael Ellerman, IBM Corp.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/mman.h>
-
-#include <asm/cputable.h>
-
-#include "utils.h"
-
-#define SIZE (64 * 1024)
-
-int test_prot_sao(void)
-{
- char *p;
-
- /* 2.06 or later should support SAO */
- SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
-
- /*
- * Ensure we can ask for PROT_SAO.
- * We can't really verify that it does the right thing, but at least we
- * confirm the kernel will accept it.
- */
- p = mmap(NULL, SIZE, PROT_READ | PROT_WRITE | PROT_SAO,
- MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
- FAIL_IF(p == MAP_FAILED);
-
- /* Write to the mapping, to at least cause a fault */
- memset(p, 0xaa, SIZE);
-
- return 0;
-}
-
-int main(void)
-{
- return test_harness(test_prot_sao, "prot-sao");
-}
--
2.23.0
^ permalink raw reply related
* [PATCH v2 3/3] powerpc/64s/hash: disable subpage_prot syscall by default
From: Nicholas Piggin @ 2020-07-03 1:19 UTC (permalink / raw)
To: linuxppc-dev; +Cc: linux-mm, kvm-ppc, Nicholas Piggin, linux-api
In-Reply-To: <20200703011958.1166620-1-npiggin@gmail.com>
The subpage_prot syscall was added for specialised system software
(Lx86) that has been discontinued for about 7 years, and is not thought
to be used elsewhere, so disable it by default.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/Kconfig | 7 +++++--
arch/powerpc/configs/powernv_defconfig | 1 -
arch/powerpc/configs/pseries_defconfig | 1 -
3 files changed, 5 insertions(+), 4 deletions(-)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 9fa23eb320ff..04c6ca17661a 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -833,13 +833,16 @@ config FORCE_MAX_ZONEORDER
this in mind when choosing a value for this option.
config PPC_SUBPAGE_PROT
- bool "Support setting protections for 4k subpages"
+ bool "Support setting protections for 4k subpages (subpage_prot syscall)"
+ default n
depends on PPC_BOOK3S_64 && PPC_64K_PAGES
help
- This option adds support for a system call to allow user programs
+ This option adds support for system call to allow user programs
to set access permissions (read/write, readonly, or no access)
on the 4k subpages of each 64k page.
+ If unsure, say N here.
+
config PPC_COPRO_BASE
bool
diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig
index 2de9aadf0f50..afc0dd73a1e6 100644
--- a/arch/powerpc/configs/powernv_defconfig
+++ b/arch/powerpc/configs/powernv_defconfig
@@ -64,7 +64,6 @@ CONFIG_HWPOISON_INJECT=m
CONFIG_TRANSPARENT_HUGEPAGE=y
CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
CONFIG_PPC_64K_PAGES=y
-CONFIG_PPC_SUBPAGE_PROT=y
CONFIG_SCHED_SMT=y
CONFIG_PM=y
CONFIG_HOTPLUG_PCI=y
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index dfa4a726333b..894e8d85fb48 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -57,7 +57,6 @@ CONFIG_MEMORY_HOTREMOVE=y
CONFIG_KSM=y
CONFIG_TRANSPARENT_HUGEPAGE=y
CONFIG_PPC_64K_PAGES=y
-CONFIG_PPC_SUBPAGE_PROT=y
CONFIG_SCHED_SMT=y
CONFIG_HOTPLUG_PCI=y
CONFIG_HOTPLUG_PCI_RPA=m
--
2.23.0
^ permalink raw reply related
* [Bug 208181] BUG: KASAN: stack-out-of-bounds in strcmp+0x58/0xd8
From: bugzilla-daemon @ 2020-07-03 4:55 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <bug-208181-206035@https.bugzilla.kernel.org/>
https://bugzilla.kernel.org/show_bug.cgi?id=208181
--- Comment #15 from Christophe Leroy (christophe.leroy@csgroup.eu) ---
Ah yes, having init_text above the 24 bits limit might be a problem for
function calls. I'm surprised that the linker doesn't complain.
Anyway, it is not a problem in itself, and it's unrelated to this bug.
--
You are receiving this mail because:
You are watching the assignee of the bug.
^ permalink raw reply
* Re: objtool clac/stac handling change..
From: Christophe Leroy @ 2020-07-03 5:27 UTC (permalink / raw)
To: Michael Ellerman, Linus Torvalds, Al Viro
Cc: Peter Zijlstra, the arch/x86 maintainers,
linuxppc-dev@lists.ozlabs.org, Linux Kernel Mailing List,
Josh Poimboeuf
In-Reply-To: <87h7up70e5.fsf@mpe.ellerman.id.au>
Le 03/07/2020 à 05:17, Michael Ellerman a écrit :
> Christophe Leroy <christophe.leroy@csgroup.eu> writes:
>> Le 02/07/2020 à 15:34, Michael Ellerman a écrit :
>>> Linus Torvalds <torvalds@linux-foundation.org> writes:
>>>> On Wed, Jul 1, 2020 at 12:59 PM Al Viro <viro@zeniv.linux.org.uk> wrote:
>>>>> On Wed, Jul 01, 2020 at 12:04:36PM -0700, Linus Torvalds wrote:
>>>>>>
>>>>>> That's actually for the access granting. Shutting the access down ends
>>>>>> up always doing the same thing anyway..
>>>>>
>>>>> #define user_read_access_end prevent_current_read_from_user
>>>>> #define user_write_access_end prevent_current_write_to_user
>>>>> static inline void prevent_current_read_from_user(void)
>>>>> {
>>>>> prevent_user_access(NULL, NULL, ~0UL, KUAP_CURRENT_READ);
>>>>> }
>>>>>
>>>>> static inline void prevent_current_write_to_user(void)
>>>>> {
>>>>> prevent_user_access(NULL, NULL, ~0UL, KUAP_CURRENT_WRITE);
>>>>> }
>>>>>
>>>>> and prevent_user_access() has instances that do care about the direction...
>>>>
>>>> Go and look closer.
>>>>
>>>> There are three cases:
>>>>
>>>> (a) the 32-bit book3s case. It looks like it cares, but when you look
>>>> closer, it ends up not caring about the read side, and saving the
>>>> "which address to I allow user writes to" in current->thread.kuap
>>>>
>>>> (b) the nohash 32-bit case - doesn't care
>>>>
>>>> (c) the 64-bit books case - doesn't care
>>>>
>>>> So yes, in the (a) case it does make a difference between reads and
>>>> writes, but at least as far as I can tell, it ignores the read case,
>>>> and has code to avoid the unnecessary "disable user writes" case when
>>>> there was only a read enable done.
>>>
>>> Yeah that's my understanding too.
>>>
>>> Christophe is the expert on that code so I'll defer to him if I'm wrong.
>>>
>>>> Now, it's possible that I'm wrong, but the upshot of that is that even
>>>> on powerpc, I think that if we just made the rule be that "taking a
>>>> user exception should automatically do the 'user_access_end()' for us"
>>>> is trivial.
>>>
>>> I think we can do something to make it work.
>>>
>>> We don't have an equivalent of x86's ex_handler_uaccess(), so it's not
>>> quite as easy as whacking a user_access_end() in there.
>>
>> Isn't it something easy to do in bad_page_fault() ?
>
> We'd need to do it there at least.
>
> But I'm not convinced that's the only place we'd need to do it. We could
> theoretically take a machine check on a user access, and those are
> handled differently on each sub-(sub-sub)-platform, and I think all or
> most of them don't call bad_page_fault().
Indeed, it needs to be done everywhere we do
regs->nip = extable_fixup(entry)
There are half a dozen of places that do that, in additional of
bad_page_fault() that's mainly machine checks, also kprobe.
I think we can create a fixup_exception() function which takes regs and
entry as parameters and does the nip fixup and kuap closuse.
>
>> Not exactly a call to user_access_end() but altering regs->kuap so that
>> user access is not restored on exception exit.
>
> Yes.
>
>>> Probably the simplest option for us is to just handle it in our
>>> unsafe_op_wrap(). I'll try and come up with something tomorrow.
>>
>> unsafe_op_wrap() is not used anymore for unsafe_put_user() as we are now
>> using asm goto.
>
> Sure, but we could change it back to use unsafe_op_wrap().
But the whole purpose of using goto in unsafe_???_user() is to allow the
use of asm goto. See explanations in commit
https://github.com/linuxppc/linux/commit/1bd4403d86a1c06cb6cc9ac87664a0c9d3413d51#diff-eba084de047bb8a9087dac10c06f44bc
>
> I did a quick hack to do that and see no difference in the generated
> code, but your commit adding put_user_goto() did show better code
> generation, so possibly it depends on compiler version, or my example
> wasn't complicated enough (filldir()).
Yes as explained above it should remove the error checking in the caller
so your exemple was most likely too trivial.
Christophe
^ permalink raw reply
* [PATCH v3 0/3] Off-load TLB invalidations to host for !GTSE
From: Bharata B Rao @ 2020-07-03 5:36 UTC (permalink / raw)
To: linuxppc-dev; +Cc: aneesh.kumar, Bharata B Rao, npiggin
Hypervisor may choose not to enable Guest Translation Shootdown Enable
(GTSE) option for the guest. When GTSE isn't ON, the guest OS isn't
permitted to use instructions like tblie and tlbsync directly, but is
expected to make hypervisor calls to get the TLB flushed.
This series enables the TLB flush routines in the radix code to
off-load TLB flushing to hypervisor via the newly proposed hcall
H_RPT_INVALIDATE.
To easily check the availability of GTSE, it is made an MMU feature.
The OV5 handling and H_REGISTER_PROC_TBL hcall are changed to
handle GTSE as an optionally available feature and to not assume GTSE
when radix support is available.
The actual hcall implementation for KVM isn't included in this
patchset and will be posted separately.
Changes in v3
=============
- Fixed a bug in the hcall wrapper code where we were missing setting
H_RPTI_TYPE_NESTED while retrying the failed flush request with
a full flush for the nested case.
- s/psize_to_h_rpti/psize_to_rpti_pgsize
v2: https://lore.kernel.org/linuxppc-dev/20200626131000.5207-1-bharata@linux.ibm.com/T/#t
Bharata B Rao (2):
powerpc/mm: Enable radix GTSE only if supported.
powerpc/pseries: H_REGISTER_PROC_TBL should ask for GTSE only if
enabled
Nicholas Piggin (1):
powerpc/mm/book3s64/radix: Off-load TLB invalidations to host when
!GTSE
.../include/asm/book3s/64/tlbflush-radix.h | 15 ++++
arch/powerpc/include/asm/hvcall.h | 34 +++++++-
arch/powerpc/include/asm/mmu.h | 4 +
arch/powerpc/include/asm/plpar_wrappers.h | 52 ++++++++++++
arch/powerpc/kernel/dt_cpu_ftrs.c | 1 +
arch/powerpc/kernel/prom_init.c | 13 +--
arch/powerpc/mm/book3s64/radix_tlb.c | 82 +++++++++++++++++--
arch/powerpc/mm/init_64.c | 5 +-
arch/powerpc/platforms/pseries/lpar.c | 8 +-
9 files changed, 197 insertions(+), 17 deletions(-)
--
2.21.3
^ permalink raw reply
* [PATCH v3 1/3] powerpc/mm: Enable radix GTSE only if supported.
From: Bharata B Rao @ 2020-07-03 5:36 UTC (permalink / raw)
To: linuxppc-dev; +Cc: aneesh.kumar, Bharata B Rao, npiggin
In-Reply-To: <20200703053608.12884-1-bharata@linux.ibm.com>
Make GTSE an MMU feature and enable it by default for radix.
However for guest, conditionally enable it if hypervisor supports
it via OV5 vector. Let prom_init ask for radix GTSE only if the
support exists.
Having GTSE as an MMU feature will make it easy to enable radix
without GTSE. Currently radix assumes GTSE is enabled by default.
Signed-off-by: Bharata B Rao <bharata@linux.ibm.com>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
arch/powerpc/include/asm/mmu.h | 4 ++++
arch/powerpc/kernel/dt_cpu_ftrs.c | 1 +
arch/powerpc/kernel/prom_init.c | 13 ++++++++-----
arch/powerpc/mm/init_64.c | 5 ++++-
4 files changed, 17 insertions(+), 6 deletions(-)
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index f4ac25d4df05..884d51995934 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -28,6 +28,9 @@
* Individual features below.
*/
+/* Guest Translation Shootdown Enable */
+#define MMU_FTR_GTSE ASM_CONST(0x00001000)
+
/*
* Support for 68 bit VA space. We added that from ISA 2.05
*/
@@ -173,6 +176,7 @@ enum {
#endif
#ifdef CONFIG_PPC_RADIX_MMU
MMU_FTR_TYPE_RADIX |
+ MMU_FTR_GTSE |
#ifdef CONFIG_PPC_KUAP
MMU_FTR_RADIX_KUAP |
#endif /* CONFIG_PPC_KUAP */
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index a0edeb391e3e..ac650c233cd9 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -336,6 +336,7 @@ static int __init feat_enable_mmu_radix(struct dt_cpu_feature *f)
#ifdef CONFIG_PPC_RADIX_MMU
cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX;
cur_cpu_spec->mmu_features |= MMU_FTRS_HASH_BASE;
+ cur_cpu_spec->mmu_features |= MMU_FTR_GTSE;
cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_MMU;
return 1;
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 90c604d00b7d..cbc605cfdec0 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -1336,12 +1336,15 @@ static void __init prom_check_platform_support(void)
}
}
- if (supported.radix_mmu && supported.radix_gtse &&
- IS_ENABLED(CONFIG_PPC_RADIX_MMU)) {
- /* Radix preferred - but we require GTSE for now */
- prom_debug("Asking for radix with GTSE\n");
+ if (supported.radix_mmu && IS_ENABLED(CONFIG_PPC_RADIX_MMU)) {
+ /* Radix preferred - Check if GTSE is also supported */
+ prom_debug("Asking for radix\n");
ibm_architecture_vec.vec5.mmu = OV5_FEAT(OV5_MMU_RADIX);
- ibm_architecture_vec.vec5.radix_ext = OV5_FEAT(OV5_RADIX_GTSE);
+ if (supported.radix_gtse)
+ ibm_architecture_vec.vec5.radix_ext =
+ OV5_FEAT(OV5_RADIX_GTSE);
+ else
+ prom_debug("Radix GTSE isn't supported\n");
} else if (supported.hash_mmu) {
/* Default to hash mmu (if we can) */
prom_debug("Asking for hash\n");
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index bc73abf0bc25..152aa0200cef 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -407,12 +407,15 @@ static void __init early_check_vec5(void)
if (!(vec5[OV5_INDX(OV5_RADIX_GTSE)] &
OV5_FEAT(OV5_RADIX_GTSE))) {
pr_warn("WARNING: Hypervisor doesn't support RADIX with GTSE\n");
- }
+ cur_cpu_spec->mmu_features &= ~MMU_FTR_GTSE;
+ } else
+ cur_cpu_spec->mmu_features |= MMU_FTR_GTSE;
/* Do radix anyway - the hypervisor said we had to */
cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX;
} else if (mmu_supported == OV5_FEAT(OV5_MMU_HASH)) {
/* Hypervisor only supports hash - disable radix */
cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
+ cur_cpu_spec->mmu_features &= ~MMU_FTR_GTSE;
}
}
--
2.21.3
^ permalink raw reply related
* [PATCH v3 2/3] powerpc/pseries: H_REGISTER_PROC_TBL should ask for GTSE only if enabled
From: Bharata B Rao @ 2020-07-03 5:36 UTC (permalink / raw)
To: linuxppc-dev; +Cc: aneesh.kumar, Bharata B Rao, npiggin
In-Reply-To: <20200703053608.12884-1-bharata@linux.ibm.com>
H_REGISTER_PROC_TBL asks for GTSE by default. GTSE flag bit should
be set only when GTSE is supported.
Signed-off-by: Bharata B Rao <bharata@linux.ibm.com>
Reviewed-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
arch/powerpc/platforms/pseries/lpar.c | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index fd26f3d21d7b..f82569a505f1 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -1680,9 +1680,11 @@ static int pseries_lpar_register_process_table(unsigned long base,
if (table_size)
flags |= PROC_TABLE_NEW;
- if (radix_enabled())
- flags |= PROC_TABLE_RADIX | PROC_TABLE_GTSE;
- else
+ if (radix_enabled()) {
+ flags |= PROC_TABLE_RADIX;
+ if (mmu_has_feature(MMU_FTR_GTSE))
+ flags |= PROC_TABLE_GTSE;
+ } else
flags |= PROC_TABLE_HPT_SLB;
for (;;) {
rc = plpar_hcall_norets(H_REGISTER_PROC_TBL, flags, base,
--
2.21.3
^ permalink raw reply related
* [PATCH v3 3/3] powerpc/mm/book3s64/radix: Off-load TLB invalidations to host when !GTSE
From: Bharata B Rao @ 2020-07-03 5:36 UTC (permalink / raw)
To: linuxppc-dev; +Cc: aneesh.kumar, Bharata B Rao, npiggin
In-Reply-To: <20200703053608.12884-1-bharata@linux.ibm.com>
From: Nicholas Piggin <npiggin@gmail.com>
When platform doesn't support GTSE, let TLB invalidation requests
for radix guests be off-loaded to the host using H_RPT_INVALIDATE
hcall.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Bharata B Rao <bharata@linux.ibm.com>
[hcall wrapper, error path handling and renames]
---
.../include/asm/book3s/64/tlbflush-radix.h | 15 ++++
arch/powerpc/include/asm/hvcall.h | 34 +++++++-
arch/powerpc/include/asm/plpar_wrappers.h | 52 ++++++++++++
arch/powerpc/mm/book3s64/radix_tlb.c | 82 +++++++++++++++++--
4 files changed, 175 insertions(+), 8 deletions(-)
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
index ca8db193ae38..94439e0cefc9 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
@@ -2,10 +2,25 @@
#ifndef _ASM_POWERPC_TLBFLUSH_RADIX_H
#define _ASM_POWERPC_TLBFLUSH_RADIX_H
+#include <asm/hvcall.h>
+
struct vm_area_struct;
struct mm_struct;
struct mmu_gather;
+static inline u64 psize_to_rpti_pgsize(unsigned long psize)
+{
+ if (psize == MMU_PAGE_4K)
+ return H_RPTI_PAGE_4K;
+ if (psize == MMU_PAGE_64K)
+ return H_RPTI_PAGE_64K;
+ if (psize == MMU_PAGE_2M)
+ return H_RPTI_PAGE_2M;
+ if (psize == MMU_PAGE_1G)
+ return H_RPTI_PAGE_1G;
+ return H_RPTI_PAGE_ALL;
+}
+
static inline int mmu_get_ap(int psize)
{
return mmu_psize_defs[psize].ap;
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index e90c073e437e..43486e773bd6 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -305,7 +305,8 @@
#define H_SCM_UNBIND_ALL 0x3FC
#define H_SCM_HEALTH 0x400
#define H_SCM_PERFORMANCE_STATS 0x418
-#define MAX_HCALL_OPCODE H_SCM_PERFORMANCE_STATS
+#define H_RPT_INVALIDATE 0x448
+#define MAX_HCALL_OPCODE H_RPT_INVALIDATE
/* Scope args for H_SCM_UNBIND_ALL */
#define H_UNBIND_SCOPE_ALL (0x1)
@@ -389,6 +390,37 @@
#define PROC_TABLE_RADIX 0x04
#define PROC_TABLE_GTSE 0x01
+/*
+ * Defines for
+ * H_RPT_INVALIDATE - Invalidate RPT translation lookaside information.
+ */
+
+/* Type of translation to invalidate (type) */
+#define H_RPTI_TYPE_NESTED 0x0001 /* Invalidate nested guest partition-scope */
+#define H_RPTI_TYPE_TLB 0x0002 /* Invalidate TLB */
+#define H_RPTI_TYPE_PWC 0x0004 /* Invalidate Page Walk Cache */
+/* Invalidate Process Table Entries if H_RPTI_TYPE_NESTED is clear */
+#define H_RPTI_TYPE_PRT 0x0008
+/* Invalidate Partition Table Entries if H_RPTI_TYPE_NESTED is set */
+#define H_RPTI_TYPE_PAT 0x0008
+#define H_RPTI_TYPE_ALL (H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | \
+ H_RPTI_TYPE_PRT)
+#define H_RPTI_TYPE_NESTED_ALL (H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | \
+ H_RPTI_TYPE_PAT)
+
+/* Invalidation targets (target) */
+#define H_RPTI_TARGET_CMMU 0x01 /* All virtual processors in the partition */
+#define H_RPTI_TARGET_CMMU_LOCAL 0x02 /* Current virtual processor */
+/* All nest/accelerator agents in use by the partition */
+#define H_RPTI_TARGET_NMMU 0x04
+
+/* Page size mask (page sizes) */
+#define H_RPTI_PAGE_4K 0x01
+#define H_RPTI_PAGE_64K 0x02
+#define H_RPTI_PAGE_2M 0x04
+#define H_RPTI_PAGE_1G 0x08
+#define H_RPTI_PAGE_ALL (-1UL)
+
#ifndef __ASSEMBLY__
#include <linux/types.h>
diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h
index 4497c8afb573..4293c5d2ddf4 100644
--- a/arch/powerpc/include/asm/plpar_wrappers.h
+++ b/arch/powerpc/include/asm/plpar_wrappers.h
@@ -334,6 +334,51 @@ static inline long plpar_get_cpu_characteristics(struct h_cpu_char_result *p)
return rc;
}
+/*
+ * Wrapper to H_RPT_INVALIDATE hcall that handles return values appropriately
+ *
+ * - Returns H_SUCCESS on success
+ * - For H_BUSY return value, we retry the hcall.
+ * - For any other hcall failures, attempt a full flush once before
+ * resorting to BUG().
+ *
+ * Note: This hcall is expected to fail only very rarely. The correct
+ * error recovery of killing the process/guest will be eventually
+ * needed.
+ */
+static inline long pseries_rpt_invalidate(u32 pid, u64 target, u64 type,
+ u64 page_sizes, u64 start, u64 end)
+{
+ long rc;
+ unsigned long all;
+
+ while (true) {
+ rc = plpar_hcall_norets(H_RPT_INVALIDATE, pid, target, type,
+ page_sizes, start, end);
+ if (rc == H_BUSY) {
+ cpu_relax();
+ continue;
+ } else if (rc == H_SUCCESS)
+ return rc;
+
+ /* Flush request failed, try with a full flush once */
+ if (type & H_RPTI_TYPE_NESTED)
+ all = H_RPTI_TYPE_NESTED | H_RPTI_TYPE_NESTED_ALL;
+ else
+ all = H_RPTI_TYPE_ALL;
+retry:
+ rc = plpar_hcall_norets(H_RPT_INVALIDATE, pid, target,
+ all, page_sizes, 0, -1UL);
+ if (rc == H_BUSY) {
+ cpu_relax();
+ goto retry;
+ } else if (rc == H_SUCCESS)
+ return rc;
+
+ BUG();
+ }
+}
+
#else /* !CONFIG_PPC_PSERIES */
static inline long plpar_set_ciabr(unsigned long ciabr)
@@ -346,6 +391,13 @@ static inline long plpar_pte_read_4(unsigned long flags, unsigned long ptex,
{
return 0;
}
+
+static inline long pseries_rpt_invalidate(u32 pid, u64 target, u64 type,
+ u64 page_sizes, u64 start, u64 end)
+{
+ return 0;
+}
+
#endif /* CONFIG_PPC_PSERIES */
#endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index b5cc9b23cf02..0d233763441f 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -16,6 +16,7 @@
#include <asm/tlbflush.h>
#include <asm/trace.h>
#include <asm/cputhreads.h>
+#include <asm/plpar_wrappers.h>
#define RIC_FLUSH_TLB 0
#define RIC_FLUSH_PWC 1
@@ -694,7 +695,14 @@ void radix__flush_tlb_mm(struct mm_struct *mm)
goto local;
}
- if (cputlb_use_tlbie()) {
+ if (!mmu_has_feature(MMU_FTR_GTSE)) {
+ unsigned long tgt = H_RPTI_TARGET_CMMU;
+
+ if (atomic_read(&mm->context.copros) > 0)
+ tgt |= H_RPTI_TARGET_NMMU;
+ pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB,
+ H_RPTI_PAGE_ALL, 0, -1UL);
+ } else if (cputlb_use_tlbie()) {
if (mm_needs_flush_escalation(mm))
_tlbie_pid(pid, RIC_FLUSH_ALL);
else
@@ -727,7 +735,16 @@ static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
goto local;
}
}
- if (cputlb_use_tlbie())
+ if (!mmu_has_feature(MMU_FTR_GTSE)) {
+ unsigned long tgt = H_RPTI_TARGET_CMMU;
+ unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
+ H_RPTI_TYPE_PRT;
+
+ if (atomic_read(&mm->context.copros) > 0)
+ tgt |= H_RPTI_TARGET_NMMU;
+ pseries_rpt_invalidate(pid, tgt, type,
+ H_RPTI_PAGE_ALL, 0, -1UL);
+ } else if (cputlb_use_tlbie())
_tlbie_pid(pid, RIC_FLUSH_ALL);
else
_tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL);
@@ -760,7 +777,19 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
exit_flush_lazy_tlbs(mm);
goto local;
}
- if (cputlb_use_tlbie())
+ if (!mmu_has_feature(MMU_FTR_GTSE)) {
+ unsigned long tgt, pg_sizes, size;
+
+ tgt = H_RPTI_TARGET_CMMU;
+ pg_sizes = psize_to_rpti_pgsize(psize);
+ size = 1UL << mmu_psize_to_shift(psize);
+
+ if (atomic_read(&mm->context.copros) > 0)
+ tgt |= H_RPTI_TARGET_NMMU;
+ pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB,
+ pg_sizes, vmaddr,
+ vmaddr + size);
+ } else if (cputlb_use_tlbie())
_tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
else
_tlbiel_va_multicast(mm, vmaddr, pid, psize, RIC_FLUSH_TLB);
@@ -810,7 +839,14 @@ static inline void _tlbiel_kernel_broadcast(void)
*/
void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end)
{
- if (cputlb_use_tlbie())
+ if (!mmu_has_feature(MMU_FTR_GTSE)) {
+ unsigned long tgt = H_RPTI_TARGET_CMMU | H_RPTI_TARGET_NMMU;
+ unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
+ H_RPTI_TYPE_PRT;
+
+ pseries_rpt_invalidate(0, tgt, type, H_RPTI_PAGE_ALL,
+ start, end);
+ } else if (cputlb_use_tlbie())
_tlbie_pid(0, RIC_FLUSH_ALL);
else
_tlbiel_kernel_broadcast();
@@ -864,7 +900,17 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm,
nr_pages > tlb_local_single_page_flush_ceiling);
}
- if (full) {
+ if (!mmu_has_feature(MMU_FTR_GTSE) && !local) {
+ unsigned long tgt = H_RPTI_TARGET_CMMU;
+ unsigned long pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize);
+
+ if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
+ pg_sizes |= psize_to_rpti_pgsize(MMU_PAGE_2M);
+ if (atomic_read(&mm->context.copros) > 0)
+ tgt |= H_RPTI_TARGET_NMMU;
+ pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, pg_sizes,
+ start, end);
+ } else if (full) {
if (local) {
_tlbiel_pid(pid, RIC_FLUSH_TLB);
} else {
@@ -1046,7 +1092,17 @@ static __always_inline void __radix__flush_tlb_range_psize(struct mm_struct *mm,
nr_pages > tlb_local_single_page_flush_ceiling);
}
- if (full) {
+ if (!mmu_has_feature(MMU_FTR_GTSE) && !local) {
+ unsigned long tgt = H_RPTI_TARGET_CMMU;
+ unsigned long type = H_RPTI_TYPE_TLB;
+ unsigned long pg_sizes = psize_to_rpti_pgsize(psize);
+
+ if (also_pwc)
+ type |= H_RPTI_TYPE_PWC;
+ if (atomic_read(&mm->context.copros) > 0)
+ tgt |= H_RPTI_TARGET_NMMU;
+ pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end);
+ } else if (full) {
if (local) {
_tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
} else {
@@ -1111,7 +1167,19 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
exit_flush_lazy_tlbs(mm);
goto local;
}
- if (cputlb_use_tlbie())
+ if (!mmu_has_feature(MMU_FTR_GTSE)) {
+ unsigned long tgt, type, pg_sizes;
+
+ tgt = H_RPTI_TARGET_CMMU;
+ type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
+ H_RPTI_TYPE_PRT;
+ pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize);
+
+ if (atomic_read(&mm->context.copros) > 0)
+ tgt |= H_RPTI_TARGET_NMMU;
+ pseries_rpt_invalidate(pid, tgt, type, pg_sizes,
+ addr, end);
+ } else if (cputlb_use_tlbie())
_tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
else
_tlbiel_va_range_multicast(mm,
--
2.21.3
^ permalink raw reply related
* [PATCH v3 0/6] Remove default DMA window before creating DDW
From: Leonardo Bras @ 2020-07-03 6:18 UTC (permalink / raw)
To: Michael Ellerman, Benjamin Herrenschmidt, Paul Mackerras,
Alexey Kardashevskiy, Leonardo Bras, Thiago Jung Bauermann,
Ram Pai
Cc: linuxppc-dev, linux-kernel
There are some devices in which a hypervisor may only allow 1 DMA window
to exist at a time, and in those cases, a DDW is never created to them,
since the default DMA window keeps using this resource.
LoPAR recommends this procedure:
1. Remove the default DMA window,
2. Query for which configs the DDW can be created,
3. Create a DDW.
Patch #1:
Create defines for outputs of ibm,ddw-applicable, so it's easier to
identify them.
Patch #2:
- After LoPAR level 2.8, there is an extension that can make
ibm,query-pe-dma-windows to have 6 outputs instead of 5. This changes the
order of the outputs, and that can cause some trouble.
- query_ddw() was updated to check how many outputs the
ibm,query-pe-dma-windows is supposed to have, update the rtas_call() and
deal correctly with the outputs in both cases.
- This patch looks somehow unrelated to the series, but it can avoid future
problems on DDW creation.
Patch #3 moves the window-removing code from remove_ddw() to
remove_dma_window(), creating a way to delete any DMA window, so it can be
used to delete the default DMA window.
Patch #4 makes use of the remove_dma_window() from patch #3 to remove the
default DMA window before query_ddw(). It also implements a new rtas call
to recover the default DMA window, in case anything fails after it was
removed, and a DDW couldn't be created.
Patch #5:
Instead of destroying the created DDW if it doesn't map the whole
partition, make use of it instead of the default DMA window as it improves
performance.
Patch #6:
Does some renaming of 'direct window' to 'dma window', given the DDW
created can now be also used in indirect mapping if direct mapping is not
available.
All patches were tested into an LPAR with an Ethernet VF:
4005:01:00.0 Ethernet controller: Mellanox Technologies MT27700 Family
[ConnectX-4 Virtual Function]
Patch #5 It was tested with a 64GB DDW which did not map the whole
partition (128G). Performance improvement noticed by using the DDW instead
of the default DMA window:
64 thread write throughput: +203.0%
64 thread read throughput: +17.5%
1 thread write throughput: +20.5%
1 thread read throughput: +3.43%
Average write latency: -23.0%
Average read latency: -2.26%
---
Changes since v2:
- Change the way ibm,ddw-extensions is accessed, using a proper function
instead of doing this inline everytime it's used.
- Remove previous patch #6, as it doesn't look like it would be useful.
- Add new patch, for changing names from direct* to dma*, as indirect
mapping can be used from now on.
- Fix some typos, corrects some define usage.
- v2 link: http://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=185433&state=%2A&archive=both
Changes since v1:
- Add defines for ibm,ddw-applicable and ibm,ddw-extensions outputs
- Merge aux function query_ddw_out_sz() into query_ddw()
- Merge reset_dma_window() patch (prev. #2) into remove default DMA
window patch (#4).
- Keep device_node *np name instead of using pdn in remove_*()
- Rename 'device_node *pdn' into 'parent' in new functions
- Rename dfl_win to default_win
- Only remove the default DMA window if there is no window available
in first query.
- Check if default DMA window can be restored before removing it.
- Fix 'unitialized use' (found by travis mpe:ci-test)
- New patches #5 and #6
- v1 link: http://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=184420&state=%2A&archive=both
Special thanks for Alexey Kardashevskiy and Oliver O'Halloran for
the feedback provided!
Leonardo Bras (6):
powerpc/pseries/iommu: Create defines for operations in
ibm,ddw-applicable
powerpc/pseries/iommu: Update call to ibm,query-pe-dma-windows
powerpc/pseries/iommu: Move window-removing part of remove_ddw into
remove_dma_window
powerpc/pseries/iommu: Remove default DMA window before creating DDW
powerpc/pseries/iommu: Make use of DDW even if it does not map the
partition
powerpc/pseries/iommu: Rename "direct window" to "dma window"
arch/powerpc/platforms/pseries/iommu.c | 379 ++++++++++++++++++-------
1 file changed, 269 insertions(+), 110 deletions(-)
--
2.25.4
^ permalink raw reply
* [PATCH v3 1/6] powerpc/pseries/iommu: Create defines for operations in ibm, ddw-applicable
From: Leonardo Bras @ 2020-07-03 6:18 UTC (permalink / raw)
To: Michael Ellerman, Benjamin Herrenschmidt, Paul Mackerras,
Alexey Kardashevskiy, Leonardo Bras, Thiago Jung Bauermann,
Ram Pai
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <20200703061844.111865-1-leobras.c@gmail.com>
Create defines to help handling ibm,ddw-applicable values, avoiding
confusion about the index of given operations.
Signed-off-by: Leonardo Bras <leobras.c@gmail.com>
---
arch/powerpc/platforms/pseries/iommu.c | 43 ++++++++++++++++----------
1 file changed, 26 insertions(+), 17 deletions(-)
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 6d47b4a3ce39..ac0d6376bdad 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -39,6 +39,14 @@
#include "pseries.h"
+enum {
+ DDW_QUERY_PE_DMA_WIN = 0,
+ DDW_CREATE_PE_DMA_WIN = 1,
+ DDW_REMOVE_PE_DMA_WIN = 2,
+
+ DDW_APPLICABLE_SIZE
+};
+
static struct iommu_table_group *iommu_pseries_alloc_group(int node)
{
struct iommu_table_group *table_group;
@@ -771,12 +779,12 @@ static void remove_ddw(struct device_node *np, bool remove_prop)
{
struct dynamic_dma_window_prop *dwp;
struct property *win64;
- u32 ddw_avail[3];
+ u32 ddw_avail[DDW_APPLICABLE_SIZE];
u64 liobn;
int ret = 0;
ret = of_property_read_u32_array(np, "ibm,ddw-applicable",
- &ddw_avail[0], 3);
+ &ddw_avail[0], DDW_APPLICABLE_SIZE);
win64 = of_find_property(np, DIRECT64_PROPNAME, NULL);
if (!win64)
@@ -798,15 +806,15 @@ static void remove_ddw(struct device_node *np, bool remove_prop)
pr_debug("%pOF successfully cleared tces in window.\n",
np);
- ret = rtas_call(ddw_avail[2], 1, 1, NULL, liobn);
+ ret = rtas_call(ddw_avail[DDW_REMOVE_PE_DMA_WIN], 1, 1, NULL, liobn);
if (ret)
pr_warn("%pOF: failed to remove direct window: rtas returned "
"%d to ibm,remove-pe-dma-window(%x) %llx\n",
- np, ret, ddw_avail[2], liobn);
+ np, ret, ddw_avail[DDW_REMOVE_PE_DMA_WIN], liobn);
else
pr_debug("%pOF: successfully removed direct window: rtas returned "
"%d to ibm,remove-pe-dma-window(%x) %llx\n",
- np, ret, ddw_avail[2], liobn);
+ np, ret, ddw_avail[DDW_REMOVE_PE_DMA_WIN], liobn);
delprop:
if (remove_prop)
@@ -889,11 +897,11 @@ static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail,
buid = pdn->phb->buid;
cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8));
- ret = rtas_call(ddw_avail[0], 3, 5, (u32 *)query,
- cfg_addr, BUID_HI(buid), BUID_LO(buid));
+ ret = rtas_call(ddw_avail[DDW_QUERY_PE_DMA_WIN], 3, 5, (u32 *)query,
+ cfg_addr, BUID_HI(buid), BUID_LO(buid));
dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x"
- " returned %d\n", ddw_avail[0], cfg_addr, BUID_HI(buid),
- BUID_LO(buid), ret);
+ " returned %d\n", ddw_avail[DDW_QUERY_PE_DMA_WIN], cfg_addr,
+ BUID_HI(buid), BUID_LO(buid), ret);
return ret;
}
@@ -920,15 +928,16 @@ static int create_ddw(struct pci_dev *dev, const u32 *ddw_avail,
do {
/* extra outputs are LIOBN and dma-addr (hi, lo) */
- ret = rtas_call(ddw_avail[1], 5, 4, (u32 *)create,
- cfg_addr, BUID_HI(buid), BUID_LO(buid),
- page_shift, window_shift);
+ ret = rtas_call(ddw_avail[DDW_CREATE_PE_DMA_WIN], 5, 4,
+ (u32 *)create, cfg_addr, BUID_HI(buid),
+ BUID_LO(buid), page_shift, window_shift);
} while (rtas_busy_delay(ret));
dev_info(&dev->dev,
"ibm,create-pe-dma-window(%x) %x %x %x %x %x returned %d "
- "(liobn = 0x%x starting addr = %x %x)\n", ddw_avail[1],
- cfg_addr, BUID_HI(buid), BUID_LO(buid), page_shift,
- window_shift, ret, create->liobn, create->addr_hi, create->addr_lo);
+ "(liobn = 0x%x starting addr = %x %x)\n",
+ ddw_avail[DDW_CREATE_PE_DMA_WIN], cfg_addr, BUID_HI(buid),
+ BUID_LO(buid), page_shift, window_shift, ret, create->liobn,
+ create->addr_hi, create->addr_lo);
return ret;
}
@@ -996,7 +1005,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
int page_shift;
u64 dma_addr, max_addr;
struct device_node *dn;
- u32 ddw_avail[3];
+ u32 ddw_avail[DDW_APPLICABLE_SIZE];
struct direct_window *window;
struct property *win64;
struct dynamic_dma_window_prop *ddwprop;
@@ -1029,7 +1038,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
* the property is actually in the parent, not the PE
*/
ret = of_property_read_u32_array(pdn, "ibm,ddw-applicable",
- &ddw_avail[0], 3);
+ &ddw_avail[0], DDW_APPLICABLE_SIZE);
if (ret)
goto out_failed;
--
2.25.4
^ permalink raw reply related
* [PATCH v3 2/6] powerpc/pseries/iommu: Update call to ibm, query-pe-dma-windows
From: Leonardo Bras @ 2020-07-03 6:18 UTC (permalink / raw)
To: Michael Ellerman, Benjamin Herrenschmidt, Paul Mackerras,
Alexey Kardashevskiy, Leonardo Bras, Thiago Jung Bauermann,
Ram Pai
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <20200703061844.111865-1-leobras.c@gmail.com>
From LoPAR level 2.8, "ibm,ddw-extensions" index 3 can make the number of
outputs from "ibm,query-pe-dma-windows" go from 5 to 6.
This change of output size is meant to expand the address size of
largest_available_block PE TCE from 32-bit to 64-bit, which ends up
shifting page_size and migration_capable.
This ends up requiring the update of
ddw_query_response->largest_available_block from u32 to u64, and manually
assigning the values from the buffer into this struct, according to
output size.
Also, a routine was created for helping reading the ddw extensions as
suggested by LoPAR: First reading the size of the extension array from
index 0, checking if the property exists, and then returning it's value.
Signed-off-by: Leonardo Bras <leobras.c@gmail.com>
---
arch/powerpc/platforms/pseries/iommu.c | 91 +++++++++++++++++++++++---
1 file changed, 81 insertions(+), 10 deletions(-)
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index ac0d6376bdad..1a933c4e8bba 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -47,6 +47,12 @@ enum {
DDW_APPLICABLE_SIZE
};
+enum {
+ DDW_EXT_SIZE = 0,
+ DDW_EXT_RESET_DMA_WIN = 1,
+ DDW_EXT_QUERY_OUT_SIZE = 2
+};
+
static struct iommu_table_group *iommu_pseries_alloc_group(int node)
{
struct iommu_table_group *table_group;
@@ -342,7 +348,7 @@ struct direct_window {
/* Dynamic DMA Window support */
struct ddw_query_response {
u32 windows_available;
- u32 largest_available_block;
+ u64 largest_available_block;
u32 page_size;
u32 migration_capable;
};
@@ -877,14 +883,62 @@ static int find_existing_ddw_windows(void)
}
machine_arch_initcall(pseries, find_existing_ddw_windows);
+/**
+ * ddw_read_ext - Get the value of an DDW extension
+ * @np: device node from which the extension value is to be read.
+ * @extnum: index number of the extension.
+ * @value: pointer to return value, modified when extension is available.
+ *
+ * Checks if "ibm,ddw-extensions" exists for this node, and get the value
+ * on index 'extnum'.
+ * It can be used only to check if a property exists, passing value == NULL.
+ *
+ * Returns:
+ * 0 if extension successfully read
+ * -EINVAL if the "ibm,ddw-extensions" does not exist,
+ * -ENODATA if "ibm,ddw-extensions" does not have a value, and
+ * -EOVERFLOW if "ibm,ddw-extensions" does not contain this extension.
+ */
+static inline int ddw_read_ext(const struct device_node *np, int extnum,
+ u32 *value)
+{
+ static const char propname[] = "ibm,ddw-extensions";
+ u32 count;
+ int ret;
+
+ ret = of_property_read_u32_index(np, propname, DDW_EXT_SIZE, &count);
+ if (ret)
+ return ret;
+
+ if (count < extnum)
+ return -EOVERFLOW;
+
+ if (!value)
+ value = &count;
+
+ return of_property_read_u32_index(np, propname, extnum, value);
+}
+
static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail,
- struct ddw_query_response *query)
+ struct ddw_query_response *query,
+ struct device_node *parent)
{
struct device_node *dn;
struct pci_dn *pdn;
- u32 cfg_addr;
+ u32 cfg_addr, ext_query, query_out[5];
u64 buid;
- int ret;
+ int ret, out_sz;
+
+ /*
+ * From LoPAR level 2.8, "ibm,ddw-extensions" index 3 can rule how many
+ * output parameters ibm,query-pe-dma-windows will have, ranging from
+ * 5 to 6.
+ */
+ ret = ddw_read_ext(parent, DDW_EXT_QUERY_OUT_SIZE, &ext_query);
+ if (!ret && ext_query == 1)
+ out_sz = 6;
+ else
+ out_sz = 5;
/*
* Get the config address and phb buid of the PE window.
@@ -897,11 +951,28 @@ static int query_ddw(struct pci_dev *dev, const u32 *ddw_avail,
buid = pdn->phb->buid;
cfg_addr = ((pdn->busno << 16) | (pdn->devfn << 8));
- ret = rtas_call(ddw_avail[DDW_QUERY_PE_DMA_WIN], 3, 5, (u32 *)query,
+ ret = rtas_call(ddw_avail[DDW_QUERY_PE_DMA_WIN], 3, out_sz, query_out,
cfg_addr, BUID_HI(buid), BUID_LO(buid));
- dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x"
- " returned %d\n", ddw_avail[DDW_QUERY_PE_DMA_WIN], cfg_addr,
- BUID_HI(buid), BUID_LO(buid), ret);
+ dev_info(&dev->dev, "ibm,query-pe-dma-windows(%x) %x %x %x returned %d\n",
+ ddw_avail[DDW_QUERY_PE_DMA_WIN], cfg_addr, BUID_HI(buid),
+ BUID_LO(buid), ret);
+
+ switch (out_sz) {
+ case 5:
+ query->windows_available = query_out[0];
+ query->largest_available_block = query_out[1];
+ query->page_size = query_out[2];
+ query->migration_capable = query_out[3];
+ break;
+ case 6:
+ query->windows_available = query_out[0];
+ query->largest_available_block = ((u64)query_out[1] << 32) |
+ query_out[2];
+ query->page_size = query_out[3];
+ query->migration_capable = query_out[4];
+ break;
+ }
+
return ret;
}
@@ -1049,7 +1120,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
* of page sizes: supported and supported for migrate-dma.
*/
dn = pci_device_to_OF_node(dev);
- ret = query_ddw(dev, ddw_avail, &query);
+ ret = query_ddw(dev, ddw_avail, &query, pdn);
if (ret != 0)
goto out_failed;
@@ -1077,7 +1148,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
/* check largest block * page size > max memory hotplug addr */
max_addr = ddw_memory_hotplug_max();
if (query.largest_available_block < (max_addr >> page_shift)) {
- dev_dbg(&dev->dev, "can't map partition max 0x%llx with %u "
+ dev_dbg(&dev->dev, "can't map partition max 0x%llx with %llu "
"%llu-sized pages\n", max_addr, query.largest_available_block,
1ULL << page_shift);
goto out_failed;
--
2.25.4
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox