Linux-ARM-Kernel Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] KVM: arm64: Ensure CPU PMU probes before pKVM host de-privilege
From: Will Deacon @ 2023-04-20 12:33 UTC (permalink / raw)
  To: linux-arm-kernel
  Cc: kvmarm, Will Deacon, Oliver Upton, Fuad Tabba, Marc Zyngier

Although pKVM supports CPU PMU emulation for non-protected guests since
722625c6f4c5 ("KVM: arm64: Reenable pmu in Protected Mode"), this relies
on the PMU driver probing before the host has de-privileged so that the
'kvm_arm_pmu_available' static key can still be enabled by patching the
hypervisor text.

As it happens, both of these events hang off device_initcall() but the
PMU consistently won the race until 7755cec63ade ("arm64: perf: Move
PMUv3 driver to drivers/perf"). Since then, the host will fail to boot
when pKVM is enabled:

  | hw perfevents: enabled with armv8_pmuv3_0 PMU driver, 7 counters available
  | kvm [1]: nVHE hyp BUG at: [<ffff8000090366e0>] __kvm_nvhe_handle_host_mem_abort+0x270/0x284!
  | kvm [1]: Cannot dump pKVM nVHE stacktrace: !CONFIG_PROTECTED_NVHE_STACKTRACE
  | kvm [1]: Hyp Offset: 0xfffea41fbdf70000
  | Kernel panic - not syncing: HYP panic:
  | PS:a00003c9 PC:0000dbe04b0c66e0 ESR:00000000f2000800
  | FAR:fffffbfffddfcf00 HPFAR:00000000010b0bf0 PAR:0000000000000000
  | VCPU:0000000000000000
  | CPU: 2 PID: 1 Comm: swapper/0 Not tainted 6.3.0-rc7-00083-g0bce6746d154 #1
  | Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015
  | Call trace:
  |  dump_backtrace+0xec/0x108
  |  show_stack+0x18/0x2c
  |  dump_stack_lvl+0x50/0x68
  |  dump_stack+0x18/0x24
  |  panic+0x13c/0x33c
  |  nvhe_hyp_panic_handler+0x10c/0x190
  |  aarch64_insn_patch_text_nosync+0x64/0xc8
  |  arch_jump_label_transform+0x4c/0x5c
  |  __jump_label_update+0x84/0xfc
  |  jump_label_update+0x100/0x134
  |  static_key_enable_cpuslocked+0x68/0xac
  |  static_key_enable+0x20/0x34
  |  kvm_host_pmu_init+0x88/0xa4
  |  armpmu_register+0xf0/0xf4
  |  arm_pmu_acpi_probe+0x2ec/0x368
  |  armv8_pmu_driver_init+0x38/0x44
  |  do_one_initcall+0xcc/0x240

Fix the race properly by deferring the de-privilege step to
device_initcall_sync(). This will also be needed in future when probing
IOMMU devices and allows us to separate the pKVM de-privilege logic from
the core hypervisor initialisation path.

Cc: Oliver Upton <oliver.upton@linux.dev>
Cc: Fuad Tabba <tabba@google.com>
Cc: Marc Zyngier <maz@kernel.org>
Fixes: 7755cec63ade ("arm64: perf: Move PMUv3 driver to drivers/perf")
Signed-off-by: Will Deacon <will@kernel.org>
---

Marc, Oliver -- in practice, this issue only crops with the patches
moving the CPU PMU driver out into drivers/perf/ and so the arm64
for-next/core branch is broken. Please can I queue this in the arm64
tree for 6.4 with your Ack? Thanks.

 arch/arm64/kvm/arm.c  | 45 -----------------------------------------
 arch/arm64/kvm/pkvm.c | 47 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 47 insertions(+), 45 deletions(-)

diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 3bd732eaf087..890f730bc3ab 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -16,7 +16,6 @@
 #include <linux/fs.h>
 #include <linux/mman.h>
 #include <linux/sched.h>
-#include <linux/kmemleak.h>
 #include <linux/kvm.h>
 #include <linux/kvm_irqfd.h>
 #include <linux/irqbypass.h>
@@ -46,7 +45,6 @@
 #include <kvm/arm_psci.h>
 
 static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT;
-DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
 
 DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector);
 
@@ -2105,41 +2103,6 @@ static int __init init_hyp_mode(void)
 	return err;
 }
 
-static void __init _kvm_host_prot_finalize(void *arg)
-{
-	int *err = arg;
-
-	if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize)))
-		WRITE_ONCE(*err, -EINVAL);
-}
-
-static int __init pkvm_drop_host_privileges(void)
-{
-	int ret = 0;
-
-	/*
-	 * Flip the static key upfront as that may no longer be possible
-	 * once the host stage 2 is installed.
-	 */
-	static_branch_enable(&kvm_protected_mode_initialized);
-	on_each_cpu(_kvm_host_prot_finalize, &ret, 1);
-	return ret;
-}
-
-static int __init finalize_hyp_mode(void)
-{
-	if (!is_protected_kvm_enabled())
-		return 0;
-
-	/*
-	 * Exclude HYP sections from kmemleak so that they don't get peeked
-	 * at, which would end badly once inaccessible.
-	 */
-	kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
-	kmemleak_free_part_phys(hyp_mem_base, hyp_mem_size);
-	return pkvm_drop_host_privileges();
-}
-
 struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr)
 {
 	struct kvm_vcpu *vcpu;
@@ -2257,14 +2220,6 @@ static __init int kvm_arm_init(void)
 	if (err)
 		goto out_hyp;
 
-	if (!in_hyp_mode) {
-		err = finalize_hyp_mode();
-		if (err) {
-			kvm_err("Failed to finalize Hyp protection\n");
-			goto out_subs;
-		}
-	}
-
 	if (is_protected_kvm_enabled()) {
 		kvm_info("Protected nVHE mode initialized successfully\n");
 	} else if (in_hyp_mode) {
diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
index cf56958b1492..6e9ece1ebbe7 100644
--- a/arch/arm64/kvm/pkvm.c
+++ b/arch/arm64/kvm/pkvm.c
@@ -4,6 +4,8 @@
  * Author: Quentin Perret <qperret@google.com>
  */
 
+#include <linux/init.h>
+#include <linux/kmemleak.h>
 #include <linux/kvm_host.h>
 #include <linux/memblock.h>
 #include <linux/mutex.h>
@@ -13,6 +15,8 @@
 
 #include "hyp_constants.h"
 
+DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
+
 static struct memblock_region *hyp_memory = kvm_nvhe_sym(hyp_memory);
 static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr);
 
@@ -213,3 +217,46 @@ int pkvm_init_host_vm(struct kvm *host_kvm)
 	mutex_init(&host_kvm->lock);
 	return 0;
 }
+
+static void __init _kvm_host_prot_finalize(void *arg)
+{
+	int *err = arg;
+
+	if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize)))
+		WRITE_ONCE(*err, -EINVAL);
+}
+
+static int __init pkvm_drop_host_privileges(void)
+{
+	int ret = 0;
+
+	/*
+	 * Flip the static key upfront as that may no longer be possible
+	 * once the host stage 2 is installed.
+	 */
+	static_branch_enable(&kvm_protected_mode_initialized);
+	on_each_cpu(_kvm_host_prot_finalize, &ret, 1);
+	return ret;
+}
+
+static int __init finalize_pkvm(void)
+{
+	int ret;
+
+	if (!is_protected_kvm_enabled())
+		return 0;
+
+	/*
+	 * Exclude HYP sections from kmemleak so that they don't get peeked
+	 * at, which would end badly once inaccessible.
+	 */
+	kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
+	kmemleak_free_part_phys(hyp_mem_base, hyp_mem_size);
+
+	ret = pkvm_drop_host_privileges();
+	if (ret)
+		pr_err("Failed to finalize Hyp protection: %d\n", ret);
+
+	return ret;
+}
+device_initcall_sync(finalize_pkvm);
-- 
2.40.0.634.g4ca3ef3211-goog


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related

* [PATCH v2 0/2] Support PWM on MediaTek MT7981
From: Daniel Golle @ 2023-04-20 12:35 UTC (permalink / raw)
  To: devicetree, linux-pwm, linux-mediatek, linux-arm-kernel,
	linux-kernel, Rob Herring, Krzysztof Kozlowski, Thierry Reding,
	Uwe Kleine-König, Matthias Brugger,
	AngeloGioacchino Del Regno, John Crispin

Add support for PWM on the MediaTek MT7981 to pwm-mediatek.c as well
as new mediatek,mt7981-pwm compatible string to the existing bindings.

Changes since v1:
 * use pointer to reg_offset instead of u8 reg_ver and if-else

Daniel Golle (2):
  dt-bindings: pwm: mediatek: Add mediatek,mt7981 compatible
  pwm: mediatek: Add support for MT7981

 .../bindings/pwm/mediatek,mt2712-pwm.yaml     |  1 +
 drivers/pwm/pwm-mediatek.c                    | 39 +++++++++++++++----
 2 files changed, 32 insertions(+), 8 deletions(-)


base-commit: 3cdbc01c40e34c57697f8934f2727a88551696be
-- 
2.40.0


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply

* [PATCH v2 1/2] dt-bindings: pwm: mediatek: Add mediatek,mt7981 compatible
From: Daniel Golle @ 2023-04-20 12:35 UTC (permalink / raw)
  To: devicetree, linux-pwm, linux-mediatek, linux-arm-kernel,
	linux-kernel, Rob Herring, Krzysztof Kozlowski, Thierry Reding,
	Uwe Kleine-König, Matthias Brugger,
	AngeloGioacchino Del Regno, John Crispin
In-Reply-To: <cover.1681992038.git.daniel@makrotopia.org>

Add compatible string for the PWM unit found of the MediaTek MT7981 SoC.
This is in preparation to adding support in the pwm-mediatek.c driver.

Signed-off-by: Daniel Golle <daniel@makrotopia.org>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
---
No changes since v1.

 Documentation/devicetree/bindings/pwm/mediatek,mt2712-pwm.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/pwm/mediatek,mt2712-pwm.yaml b/Documentation/devicetree/bindings/pwm/mediatek,mt2712-pwm.yaml
index 8e176ba7a525f..0fbe8a6469eb2 100644
--- a/Documentation/devicetree/bindings/pwm/mediatek,mt2712-pwm.yaml
+++ b/Documentation/devicetree/bindings/pwm/mediatek,mt2712-pwm.yaml
@@ -22,6 +22,7 @@ properties:
           - mediatek,mt7623-pwm
           - mediatek,mt7628-pwm
           - mediatek,mt7629-pwm
+          - mediatek,mt7981-pwm
           - mediatek,mt7986-pwm
           - mediatek,mt8183-pwm
           - mediatek,mt8365-pwm
-- 
2.40.0


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related

* [PATCH v2 2/2] pwm: mediatek: Add support for MT7981
From: Daniel Golle @ 2023-04-20 12:36 UTC (permalink / raw)
  To: linux-pwm, linux-mediatek, linux-arm-kernel, linux-kernel,
	Thierry Reding, Uwe Kleine-König, Matthias Brugger,
	AngeloGioacchino Del Regno, John Crispin
In-Reply-To: <cover.1681992038.git.daniel@makrotopia.org>

The PWM unit on MT7981 uses different register offsets than previous
MediaTek PWM units. Add support for these new offsets and add support
for PWM on MT7981 which has 3 PWM channels, one of them is typically
used for a temperature controlled fan.

Signed-off-by: Daniel Golle <daniel@makrotopia.org>
---
Changes since v1:
 * use pointer to reg_offset instead of u8 reg_ver and if-else

 drivers/pwm/pwm-mediatek.c | 39 ++++++++++++++++++++++++++++++--------
 1 file changed, 31 insertions(+), 8 deletions(-)

diff --git a/drivers/pwm/pwm-mediatek.c b/drivers/pwm/pwm-mediatek.c
index 5b5eeaff35da6..7a51d210a8778 100644
--- a/drivers/pwm/pwm-mediatek.c
+++ b/drivers/pwm/pwm-mediatek.c
@@ -38,6 +38,7 @@ struct pwm_mediatek_of_data {
 	unsigned int num_pwms;
 	bool pwm45_fixup;
 	bool has_ck_26m_sel;
+	const unsigned int *reg_offset;
 };
 
 /**
@@ -59,10 +60,14 @@ struct pwm_mediatek_chip {
 	const struct pwm_mediatek_of_data *soc;
 };
 
-static const unsigned int pwm_mediatek_reg_offset[] = {
+static const unsigned int mtk_pwm_reg_offset_v1[] = {
 	0x0010, 0x0050, 0x0090, 0x00d0, 0x0110, 0x0150, 0x0190, 0x0220
 };
 
+static const unsigned int mtk_pwm_reg_offset_v2[] = {
+	0x0080, 0x00c0, 0x0100, 0x0140, 0x0180, 0x01c0, 0x0200, 0x0240
+};
+
 static inline struct pwm_mediatek_chip *
 to_pwm_mediatek_chip(struct pwm_chip *chip)
 {
@@ -111,7 +116,7 @@ static inline void pwm_mediatek_writel(struct pwm_mediatek_chip *chip,
 				       unsigned int num, unsigned int offset,
 				       u32 value)
 {
-	writel(value, chip->regs + pwm_mediatek_reg_offset[num] + offset);
+	writel(value, chip->regs + chip->soc->reg_offset[num] + offset);
 }
 
 static int pwm_mediatek_config(struct pwm_chip *chip, struct pwm_device *pwm,
@@ -285,60 +290,77 @@ static const struct pwm_mediatek_of_data mt2712_pwm_data = {
 	.num_pwms = 8,
 	.pwm45_fixup = false,
 	.has_ck_26m_sel = false,
+	.reg_offset = mtk_pwm_reg_offset_v1,
 };
 
 static const struct pwm_mediatek_of_data mt6795_pwm_data = {
 	.num_pwms = 7,
 	.pwm45_fixup = false,
 	.has_ck_26m_sel = false,
+	.reg_offset = mtk_pwm_reg_offset_v1,
 };
 
 static const struct pwm_mediatek_of_data mt7622_pwm_data = {
 	.num_pwms = 6,
 	.pwm45_fixup = false,
 	.has_ck_26m_sel = true,
+	.reg_offset = mtk_pwm_reg_offset_v1,
 };
 
 static const struct pwm_mediatek_of_data mt7623_pwm_data = {
 	.num_pwms = 5,
 	.pwm45_fixup = true,
 	.has_ck_26m_sel = false,
+	.reg_offset = mtk_pwm_reg_offset_v1,
 };
 
 static const struct pwm_mediatek_of_data mt7628_pwm_data = {
 	.num_pwms = 4,
 	.pwm45_fixup = true,
 	.has_ck_26m_sel = false,
+	.reg_offset = mtk_pwm_reg_offset_v1,
 };
 
 static const struct pwm_mediatek_of_data mt7629_pwm_data = {
 	.num_pwms = 1,
 	.pwm45_fixup = false,
 	.has_ck_26m_sel = false,
+	.reg_offset = mtk_pwm_reg_offset_v1,
 };
 
-static const struct pwm_mediatek_of_data mt8183_pwm_data = {
-	.num_pwms = 4,
+static const struct pwm_mediatek_of_data mt7981_pwm_data = {
+	.num_pwms = 3,
 	.pwm45_fixup = false,
 	.has_ck_26m_sel = true,
+	.reg_offset = mtk_pwm_reg_offset_v2,
 };
 
-static const struct pwm_mediatek_of_data mt8365_pwm_data = {
-	.num_pwms = 3,
+static const struct pwm_mediatek_of_data mt7986_pwm_data = {
+	.num_pwms = 2,
 	.pwm45_fixup = false,
 	.has_ck_26m_sel = true,
+	.reg_offset = mtk_pwm_reg_offset_v1,
 };
 
-static const struct pwm_mediatek_of_data mt7986_pwm_data = {
-	.num_pwms = 2,
+static const struct pwm_mediatek_of_data mt8183_pwm_data = {
+	.num_pwms = 4,
+	.pwm45_fixup = false,
+	.has_ck_26m_sel = true,
+	.reg_offset = mtk_pwm_reg_offset_v1,
+};
+
+static const struct pwm_mediatek_of_data mt8365_pwm_data = {
+	.num_pwms = 3,
 	.pwm45_fixup = false,
 	.has_ck_26m_sel = true,
+	.reg_offset = mtk_pwm_reg_offset_v1,
 };
 
 static const struct pwm_mediatek_of_data mt8516_pwm_data = {
 	.num_pwms = 5,
 	.pwm45_fixup = false,
 	.has_ck_26m_sel = true,
+	.reg_offset = mtk_pwm_reg_offset_v1,
 };
 
 static const struct of_device_id pwm_mediatek_of_match[] = {
@@ -348,6 +370,7 @@ static const struct of_device_id pwm_mediatek_of_match[] = {
 	{ .compatible = "mediatek,mt7623-pwm", .data = &mt7623_pwm_data },
 	{ .compatible = "mediatek,mt7628-pwm", .data = &mt7628_pwm_data },
 	{ .compatible = "mediatek,mt7629-pwm", .data = &mt7629_pwm_data },
+	{ .compatible = "mediatek,mt7981-pwm", .data = &mt7981_pwm_data },
 	{ .compatible = "mediatek,mt7986-pwm", .data = &mt7986_pwm_data },
 	{ .compatible = "mediatek,mt8183-pwm", .data = &mt8183_pwm_data },
 	{ .compatible = "mediatek,mt8365-pwm", .data = &mt8365_pwm_data },
-- 
2.40.0


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related

* Re: [PATCH] perf cs-etm: Add support for coresight trace for any range of CPUs
From: Ganapatrao Kulkarni @ 2023-04-20 12:37 UTC (permalink / raw)
  To: James Clark
  Cc: mathieu.poirier, acme, darren, scott, scclevenger, linux-kernel,
	coresight, linux-arm-kernel, mike.leach, suzuki.poulose
In-Reply-To: <91ba66e7-737f-6526-a703-a755e114f9d4@arm.com>



On 20-04-2023 06:00 pm, James Clark wrote:
> 
> 
> On 20/04/2023 12:47, Ganapatrao Kulkarni wrote:
>>
>> Hi James,
>>
>> On 20-04-2023 03:13 pm, James Clark wrote:
>>>
>>>
>>> On 19/04/2023 18:21, Ganapatrao Kulkarni wrote:
>>>> The current implementation supports coresight trace for a range of
>>>> CPUs, if the first CPU is CPU0.
>>>>
>>>> Adding changes to enable coresight trace for any range of CPUs by
>>>> decoding the first CPU also from the header.
>>>> Later, first CPU id is used instead of CPU0 across the decoder
>>>> functions.
>>>>
>>>> Signed-off-by: Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com>
>>>> ---
>>>>    .../perf/util/cs-etm-decoder/cs-etm-decoder.c |  4 +-
>>>>    .../perf/util/cs-etm-decoder/cs-etm-decoder.h |  3 +-
>>>>    tools/perf/util/cs-etm.c                      | 62 ++++++++++++-------
>>>>    3 files changed, 42 insertions(+), 27 deletions(-)
>>>>
>>>> diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
>>>> b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
>>>> index 82a27ab90c8b..41ab299b643b 100644
>>>> --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
>>>> +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
>>>> @@ -724,7 +724,7 @@ cs_etm_decoder__create_etm_decoder(struct
>>>> cs_etm_decoder_params *d_params,
>>>>    }
>>>>      struct cs_etm_decoder *
>>>> -cs_etm_decoder__new(int decoders, struct cs_etm_decoder_params
>>>> *d_params,
>>>> +cs_etm_decoder__new(int first_decoder, int decoders, struct
>>>> cs_etm_decoder_params *d_params,
>>>>                struct cs_etm_trace_params t_params[])
>>>>    {
>>>>        struct cs_etm_decoder *decoder;
>>>> @@ -769,7 +769,7 @@ cs_etm_decoder__new(int decoders, struct
>>>> cs_etm_decoder_params *d_params,
>>>>        /* init raw frame logging if required */
>>>>        cs_etm_decoder__init_raw_frame_logging(d_params, decoder);
>>>>    -    for (i = 0; i < decoders; i++) {
>>>> +    for (i = first_decoder; i < decoders; i++) {
>>>>            ret = cs_etm_decoder__create_etm_decoder(d_params,
>>>>                                 &t_params[i],
>>>>                                 decoder);
>>>> diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
>>>> b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
>>>> index 92a855fbe5b8..b06193fc75b4 100644
>>>> --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
>>>> +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
>>>> @@ -90,7 +90,8 @@ int cs_etm_decoder__process_data_block(struct
>>>> cs_etm_decoder *decoder,
>>>>                           size_t len, size_t *consumed);
>>>>      struct cs_etm_decoder *
>>>> -cs_etm_decoder__new(int num_cpu,
>>>> +cs_etm_decoder__new(int first_decoder,
>>>> +            int decoders,
>>>>                struct cs_etm_decoder_params *d_params,
>>>>                struct cs_etm_trace_params t_params[]);
>>>>    diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
>>>> index 94e2d02009eb..2619513ae088 100644
>>>> --- a/tools/perf/util/cs-etm.c
>>>> +++ b/tools/perf/util/cs-etm.c
>>>> @@ -55,6 +55,8 @@ struct cs_etm_auxtrace {
>>>>        u8 has_virtual_ts; /* Virtual/Kernel timestamps in the trace. */
>>>>          int num_cpu;
>>>> +    int first_cpu;
>>>> +    int last_cpu;
>>>>        u64 latest_kernel_timestamp;
>>>>        u32 auxtrace_type;
>>>>        u64 branches_sample_type;
>>>> @@ -638,14 +640,13 @@ static void cs_etm__set_trace_param_ete(struct
>>>> cs_etm_trace_params *t_params,
>>>>    }
>>>>      static int cs_etm__init_trace_params(struct cs_etm_trace_params
>>>> *t_params,
>>>> -                     struct cs_etm_auxtrace *etm,
>>>> -                     int decoders)
>>>> +                     struct cs_etm_auxtrace *etm)
>>>>    {
>>>>        int i;
>>>>        u32 etmidr;
>>>>        u64 architecture;
>>>>    -    for (i = 0; i < decoders; i++) {
>>>> +    for (i = etm->first_cpu; i < etm->last_cpu; i++) {
>>>>            architecture = etm->metadata[i][CS_ETM_MAGIC];
>>>>              switch (architecture) {
>>>> @@ -817,7 +818,7 @@ static void cs_etm__free(struct perf_session
>>>> *session)
>>>>        /* Then the RB tree itself */
>>>>        intlist__delete(traceid_list);
>>>>    -    for (i = 0; i < aux->num_cpu; i++)
>>>> +    for (i = aux->first_cpu; i < aux->last_cpu; i++)
>>>>            zfree(&aux->metadata[i]);
>>>>          thread__zput(aux->unknown_thread);
>>>> @@ -921,7 +922,8 @@ static struct cs_etm_queue
>>>> *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
>>>>         * Each queue can only contain data from one CPU when
>>>> unformatted, so only one decoder is
>>>>         * needed.
>>>>         */
>>>> -    int decoders = formatted ? etm->num_cpu : 1;
>>>> +    int first_decoder = formatted ? etm->first_cpu : 0;
>>>> +    int decoders = first_decoder + (formatted ? etm->num_cpu : 1);
>>>>          etmq = zalloc(sizeof(*etmq));
>>>>        if (!etmq)
>>>> @@ -937,7 +939,7 @@ static struct cs_etm_queue
>>>> *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
>>>>        if (!t_params)
>>>>            goto out_free;
>>>>    -    if (cs_etm__init_trace_params(t_params, etm, decoders))
>>>> +    if (cs_etm__init_trace_params(t_params, etm))
>>>>            goto out_free;
>>>>          /* Set decoder parameters to decode trace packets */
>>>> @@ -947,8 +949,7 @@ static struct cs_etm_queue
>>>> *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
>>>>                        formatted))
>>>>            goto out_free;
>>>>    -    etmq->decoder = cs_etm_decoder__new(decoders, &d_params,
>>>> -                        t_params);
>>>> +    etmq->decoder = cs_etm_decoder__new(first_decoder, decoders,
>>>> &d_params, t_params);
>>>>          if (!etmq->decoder)
>>>>            goto out_free;
>>>> @@ -2959,11 +2960,11 @@ static int cs_etm__queue_aux_records(struct
>>>> perf_session *session)
>>>>     * Loop through the ETMs and complain if we find at least one where
>>>> ts_source != 1 (virtual
>>>>     * timestamps).
>>>>     */
>>>> -static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu)
>>>> +static bool cs_etm__has_virtual_ts(u64 **metadata, struct
>>>> cs_etm_auxtrace *etm)
>>>>    {
>>>>        int j;
>>>>    -    for (j = 0; j < num_cpu; j++) {
>>>> +    for (j = etm->first_cpu; j < etm->last_cpu; j++) {
>>>>            switch (metadata[j][CS_ETM_MAGIC]) {
>>>>            case __perf_cs_etmv4_magic:
>>>>                if (HAS_PARAM(j, ETMV4, TS_SOURCE) ||
>>>> metadata[j][CS_ETMV4_TS_SOURCE] != 1)
>>>> @@ -2982,13 +2983,14 @@ static bool cs_etm__has_virtual_ts(u64
>>>> **metadata, int num_cpu)
>>>>    }
>>>>      /* map trace ids to correct metadata block, from information in
>>>> metadata */
>>>> -static int cs_etm__map_trace_ids_metadata(int num_cpu, u64 **metadata)
>>>> +static int cs_etm__map_trace_ids_metadata(struct cs_etm_auxtrace *etm)
>>>>    {
>>>>        u64 cs_etm_magic;
>>>> +    u64 **metadata = etm->metadata;
>>>>        u8 trace_chan_id;
>>>>        int i, err;
>>>>    -    for (i = 0; i < num_cpu; i++) {
>>>> +    for (i = etm->first_cpu; i < etm->last_cpu; i++) {
>>>>            cs_etm_magic = metadata[i][CS_ETM_MAGIC];
>>>>            switch (cs_etm_magic) {
>>>>            case __perf_cs_etmv3_magic:
>>>> @@ -3015,12 +3017,13 @@ static int cs_etm__map_trace_ids_metadata(int
>>>> num_cpu, u64 **metadata)
>>>>     * If we found AUX_HW_ID packets, then set any metadata marked as
>>>> unused to the
>>>>     * unused value to reduce the number of unneeded decoders created.
>>>>     */
>>>> -static int cs_etm__clear_unused_trace_ids_metadata(int num_cpu, u64
>>>> **metadata)
>>>> +static int cs_etm__clear_unused_trace_ids_metadata(struct
>>>> cs_etm_auxtrace *etm)
>>>>    {
>>>>        u64 cs_etm_magic;
>>>> +    u64 **metadata = etm->metadata;
>>>>        int i;
>>>>    -    for (i = 0; i < num_cpu; i++) {
>>>> +    for (i = etm->first_cpu; i < etm->last_cpu; i++) {
>>>>            cs_etm_magic = metadata[i][CS_ETM_MAGIC];
>>>>            switch (cs_etm_magic) {
>>>>            case __perf_cs_etmv3_magic:
>>>> @@ -3049,7 +3052,7 @@ int cs_etm__process_auxtrace_info_full(union
>>>> perf_event *event,
>>>>        int event_header_size = sizeof(struct perf_event_header);
>>>>        int total_size = auxtrace_info->header.size;
>>>>        int priv_size = 0;
>>>> -    int num_cpu;
>>>> +    int num_cpu, first_cpu = 0, last_cpu;
>>>>        int err = 0;
>>>>        int aux_hw_id_found;
>>>>        int i, j;
>>>> @@ -3068,22 +3071,31 @@ int cs_etm__process_auxtrace_info_full(union
>>>> perf_event *event,
>>>>        /* First the global part */
>>>>        ptr = (u64 *) auxtrace_info->priv;
>>>>        num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff;
>>>> -    metadata = zalloc(sizeof(*metadata) * num_cpu);
>>>> +
>>>> +    /* Start parsing after the common part of the header */
>>>> +    i = CS_HEADER_VERSION_MAX;
>>>> +
>>>> +    /*Get CPU id of first event */
>>>> +    first_cpu = ptr[i + CS_ETM_CPU];
>>>> +    last_cpu = first_cpu + num_cpu;
>>>> +
>>>> +    if (first_cpu > cpu__max_cpu().cpu ||
>>>> +            last_cpu > cpu__max_cpu().cpu)
>>>> +        return -EINVAL;
>>>> +
>>>> +    metadata = zalloc(sizeof(*metadata) * last_cpu);
>>>
>>> Hi Ganapatrao,
>>>
>>> I think I see what the problem is, but I'm wondering if a better fix
>>> would be to further decouple the CPU ID from the index in the array.
>>>
>>> With your change it's not clear what happens with sparse recordings, for
>>> example 'perf record -e cs_etm// -C 1,3,5'. And it seems like there is
>>
>> This patch fixes for any range of CPUs.
>> Record with sparse list of CPUs will not work with current code still.
>>
> 
> Is there a major issue that means sparse can't be done? I'm thinking it
> would be best to fix both issues with one change while we understand
> this part rather than a half fix that might have do be completely
> re-understood and re-done later anyway. Unless there is some big blocker
> but I can't see it?
> 
>>> some wastage in the zalloc here for example if only CPU 256 is traced
>>> then we'd still make 256 decoders but 255 of them would be unused?
>>>
>>> I tried to test sparse recordings, but your change doesn't apply to the
>>> latest coresight/next branch. I did notice that 'perf report -D' doesn't
>>> work with them on coresight/next (it just quits), but I couldn't see if
>>> that's fixed with your change.
>>
>> My patch is rebased on 6.3-RC7 codebase with Mike's 3 perf patches
>> related to dynamic id [1] support(queued for 6.4).
>>
>> "perf report -D" works for me.
> 
> I was referring to sparse CPU lists, which I think you mentioned above
> doesn't work even with this patch.
> 
>>
>> [1] https://www.spinics.net/lists/linux-perf-users/msg27452.html
>>
> 
> It should be based on the next branch here:
> git://git.kernel.org/pub/scm/linux/kernel/git/coresight/linux.git

OK.
> 
>>>
>>> Would a better fix not be to keep the metadata loops from 0-N and
>>> instead save the CPU ID in cs_etm_decoder_params or the decoder. That
>>> way it would support both sparse and not starting from 0 cases? I think
>>
>> Yep, I though this initially, it got complicated due to for more
>> for-loops. I will try again and post V2.
>>
> 
> I can't imagine it would need any extra for loops off the top of my
> head. Just saving the CPU ID in a few structs and using it wherever it's
> needed instead of the loop index. I imagine most of the loops would
> actually stay the same rather than be changed like you have in V1.

Working on V2, I will post it at the earliest.
> 
>>> the code would be better if it's worded like "i < recorded_cpus" rather
>>> than "i < cpu" to make it clear that i isn't actually the CPU ID it's
>>> just an index.
>>
>> Yes, makes sense to call it "recorded_cpus".
>>
>>>
>>> Also a new test for this scenario would probably be a good idea.
>>>
>>> Thanks
>>> James
>>>
>> Thanks,
>> Ganapat

Thanks,
Ganapat

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply

* Re: [RFC PATCH 1/2] arm64: amlogic: add new ARCH_AMLIPC for IPC SoC
From: Krzysztof Kozlowski @ 2023-04-20 12:39 UTC (permalink / raw)
  To: Kelvin Zhang, Dmitry Rokosov, Neil Armstrong
  Cc: =Xianwei Zhao, linux-arm-kernel, linux-kernel, linux-amlogic,
	devicetree, Catalin Marinas, Will Deacon, Kevin Hilman,
	Rob Herring, Krzysztof Kozlowski
In-Reply-To: <1c7322c9-8d2d-1cd1-95dc-dd9ec861981f@amlogic.com>

On 20/04/2023 10:43, Kelvin Zhang wrote:
>>>>>>> +config ARCH_AMLIPC
>>>>>> Do we really need a different ARCH for Amlogic IPC?
>>>>>> I can imagine that it's not the Meson architecture at all.
>>>>>> But maybe a better solution is just to rename ARCH_MESON to ARCH_AMLOGIC?
>>>>> It should be changed treewide, and is it worth it ?
>>>> As far as I understand, the A1 and S4 families are not fully compatible
>>>> with the Meson architecture, and we haven't provided additional ARCH_*
>>>> for them.
>>> The GXBB, GXL/GXM, G12A, G12B & SM1 are also not fully compatible,
>>> but they lie under the "MESON" umbrella which covers SoC since the
>>> Meson6 architecture. It's a facility to include/exclude Amlogic
>>> drivers/DT, nothing else.
> GXBB, GXL/GXM, G12A, G12B , SM1 and S4 belong to media box.
> So, "MESON" represents the media box series.
> Up to now, "MESON" works well for all existing chips except A1 and AXG.
>>> If you compare it to BCM or NXP, it's different situation, the
>>> different ARCH_* actually targets totally different SoCs from
>>> completely different Business Units or from companies acquisitions.
> Firstly, the new C series is totally different from previous MESON series.
>  From the perspective of application, the new C series is designed for 
> smart IP camera applications,
> while MESON series is designed for hybrid OTT/ IP Set Top Box  and 
> high-end media box applications.
>  From the perspective of architecture, the new C series integrates the 
> sensor interface, image signal processing unit, Dewarp, video encoder, 
> neural networking processing unit,
> which MESON series does not and will never have.
> Secondly, there are C1 and C2 besides C3.
> Moreover, more other series are on the way, such as T series.
> If we always stick to "MESON", people will get more and more confused.
> Therefore, I think it is the right time to add ARCH_AMLIPC.

Your DTS uses compatibles from meson, so I would argue that it is still
part of the same architecture.

Anyway, this is just config symbol, so it does not matter. There will be
no confusion and no problem of keeping it ARCH_MESON.


>>> We should have named it ARCH_AMLOGIC since the beginning, but we
>>> can't change history.
> Shouldn't we deserve a chance to make it right?
>>>> In my opinion, it's a good time to split the Meson architecture into
>>>> proper subsets, or rename it treewide (maybe only config option
>>>> ARCH_MESON => ARCH_AMLOGIC).
>>> MESON is only a codename to differentiate from other SoC vendors
>>> because Amlogic used it as a codename for a long time.
>>> Compare this to Allwinner's "sunxi" or Qualcomm's "msm".
>>>
>>> This config has no functional mean, it's only a config namespace.
>>>
>>> Renaming it would need renaming it in all subsystems Kconfig/Makefiles
>>> and will certainly break builds with custom kernel configs
>>> in various publicly used builds like Armbian, meta-meson, LibreELEC,
>>> Debian, Suse, ...
> Let's get back to ARCH_AMLIPC.
> We just need to add ARCH_AMLIPC in the necessary subsystems 
> Kconfig/Makefile.
> This change will keep the existing MESON related code,  and will neither 
> involve renaming nor break any builds.

It is also not necessary and not justified. We do not have multiple
top-level subarchs for one architecture. We had such talks already and
there was no consensus to change it.

Best regards,
Krzysztof


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply

* Re: [PATCH] KVM: arm64: Ensure CPU PMU probes before pKVM host de-privilege
From: Fuad Tabba @ 2023-04-20 12:49 UTC (permalink / raw)
  To: Will Deacon; +Cc: linux-arm-kernel, kvmarm, Oliver Upton, Marc Zyngier
In-Reply-To: <20230420123356.2708-1-will@kernel.org>

Hi Will,

On Thu, Apr 20, 2023 at 1:34 PM Will Deacon <will@kernel.org> wrote:
>
> Although pKVM supports CPU PMU emulation for non-protected guests since
> 722625c6f4c5 ("KVM: arm64: Reenable pmu in Protected Mode"), this relies
> on the PMU driver probing before the host has de-privileged so that the
> 'kvm_arm_pmu_available' static key can still be enabled by patching the
> hypervisor text.
>
> As it happens, both of these events hang off device_initcall() but the
> PMU consistently won the race until 7755cec63ade ("arm64: perf: Move
> PMUv3 driver to drivers/perf"). Since then, the host will fail to boot
> when pKVM is enabled:
>
>   | hw perfevents: enabled with armv8_pmuv3_0 PMU driver, 7 counters available
>   | kvm [1]: nVHE hyp BUG at: [<ffff8000090366e0>] __kvm_nvhe_handle_host_mem_abort+0x270/0x284!
>   | kvm [1]: Cannot dump pKVM nVHE stacktrace: !CONFIG_PROTECTED_NVHE_STACKTRACE
>   | kvm [1]: Hyp Offset: 0xfffea41fbdf70000
>   | Kernel panic - not syncing: HYP panic:
>   | PS:a00003c9 PC:0000dbe04b0c66e0 ESR:00000000f2000800
>   | FAR:fffffbfffddfcf00 HPFAR:00000000010b0bf0 PAR:0000000000000000
>   | VCPU:0000000000000000
>   | CPU: 2 PID: 1 Comm: swapper/0 Not tainted 6.3.0-rc7-00083-g0bce6746d154 #1
>   | Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015
>   | Call trace:
>   |  dump_backtrace+0xec/0x108
>   |  show_stack+0x18/0x2c
>   |  dump_stack_lvl+0x50/0x68
>   |  dump_stack+0x18/0x24
>   |  panic+0x13c/0x33c
>   |  nvhe_hyp_panic_handler+0x10c/0x190
>   |  aarch64_insn_patch_text_nosync+0x64/0xc8
>   |  arch_jump_label_transform+0x4c/0x5c
>   |  __jump_label_update+0x84/0xfc
>   |  jump_label_update+0x100/0x134
>   |  static_key_enable_cpuslocked+0x68/0xac
>   |  static_key_enable+0x20/0x34
>   |  kvm_host_pmu_init+0x88/0xa4
>   |  armpmu_register+0xf0/0xf4
>   |  arm_pmu_acpi_probe+0x2ec/0x368
>   |  armv8_pmu_driver_init+0x38/0x44
>   |  do_one_initcall+0xcc/0x240
>
> Fix the race properly by deferring the de-privilege step to
> device_initcall_sync(). This will also be needed in future when probing
> IOMMU devices and allows us to separate the pKVM de-privilege logic from
> the core hypervisor initialisation path.
>
> Cc: Oliver Upton <oliver.upton@linux.dev>
> Cc: Fuad Tabba <tabba@google.com>
> Cc: Marc Zyngier <maz@kernel.org>
> Fixes: 7755cec63ade ("arm64: perf: Move PMUv3 driver to drivers/perf")
> Signed-off-by: Will Deacon <will@kernel.org>
> ---

Tested-by: Fuad Tabba <tabba@google.com>

Cheers,
/fuad

>
> Marc, Oliver -- in practice, this issue only crops with the patches
> moving the CPU PMU driver out into drivers/perf/ and so the arm64
> for-next/core branch is broken. Please can I queue this in the arm64
> tree for 6.4 with your Ack? Thanks.
>
>  arch/arm64/kvm/arm.c  | 45 -----------------------------------------
>  arch/arm64/kvm/pkvm.c | 47 +++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 47 insertions(+), 45 deletions(-)
>
> diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
> index 3bd732eaf087..890f730bc3ab 100644
> --- a/arch/arm64/kvm/arm.c
> +++ b/arch/arm64/kvm/arm.c
> @@ -16,7 +16,6 @@
>  #include <linux/fs.h>
>  #include <linux/mman.h>
>  #include <linux/sched.h>
> -#include <linux/kmemleak.h>
>  #include <linux/kvm.h>
>  #include <linux/kvm_irqfd.h>
>  #include <linux/irqbypass.h>
> @@ -46,7 +45,6 @@
>  #include <kvm/arm_psci.h>
>
>  static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT;
> -DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
>
>  DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector);
>
> @@ -2105,41 +2103,6 @@ static int __init init_hyp_mode(void)
>         return err;
>  }
>
> -static void __init _kvm_host_prot_finalize(void *arg)
> -{
> -       int *err = arg;
> -
> -       if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize)))
> -               WRITE_ONCE(*err, -EINVAL);
> -}
> -
> -static int __init pkvm_drop_host_privileges(void)
> -{
> -       int ret = 0;
> -
> -       /*
> -        * Flip the static key upfront as that may no longer be possible
> -        * once the host stage 2 is installed.
> -        */
> -       static_branch_enable(&kvm_protected_mode_initialized);
> -       on_each_cpu(_kvm_host_prot_finalize, &ret, 1);
> -       return ret;
> -}
> -
> -static int __init finalize_hyp_mode(void)
> -{
> -       if (!is_protected_kvm_enabled())
> -               return 0;
> -
> -       /*
> -        * Exclude HYP sections from kmemleak so that they don't get peeked
> -        * at, which would end badly once inaccessible.
> -        */
> -       kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
> -       kmemleak_free_part_phys(hyp_mem_base, hyp_mem_size);
> -       return pkvm_drop_host_privileges();
> -}
> -
>  struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr)
>  {
>         struct kvm_vcpu *vcpu;
> @@ -2257,14 +2220,6 @@ static __init int kvm_arm_init(void)
>         if (err)
>                 goto out_hyp;
>
> -       if (!in_hyp_mode) {
> -               err = finalize_hyp_mode();
> -               if (err) {
> -                       kvm_err("Failed to finalize Hyp protection\n");
> -                       goto out_subs;
> -               }
> -       }
> -
>         if (is_protected_kvm_enabled()) {
>                 kvm_info("Protected nVHE mode initialized successfully\n");
>         } else if (in_hyp_mode) {
> diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
> index cf56958b1492..6e9ece1ebbe7 100644
> --- a/arch/arm64/kvm/pkvm.c
> +++ b/arch/arm64/kvm/pkvm.c
> @@ -4,6 +4,8 @@
>   * Author: Quentin Perret <qperret@google.com>
>   */
>
> +#include <linux/init.h>
> +#include <linux/kmemleak.h>
>  #include <linux/kvm_host.h>
>  #include <linux/memblock.h>
>  #include <linux/mutex.h>
> @@ -13,6 +15,8 @@
>
>  #include "hyp_constants.h"
>
> +DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
> +
>  static struct memblock_region *hyp_memory = kvm_nvhe_sym(hyp_memory);
>  static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr);
>
> @@ -213,3 +217,46 @@ int pkvm_init_host_vm(struct kvm *host_kvm)
>         mutex_init(&host_kvm->lock);
>         return 0;
>  }
> +
> +static void __init _kvm_host_prot_finalize(void *arg)
> +{
> +       int *err = arg;
> +
> +       if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize)))
> +               WRITE_ONCE(*err, -EINVAL);
> +}
> +
> +static int __init pkvm_drop_host_privileges(void)
> +{
> +       int ret = 0;
> +
> +       /*
> +        * Flip the static key upfront as that may no longer be possible
> +        * once the host stage 2 is installed.
> +        */
> +       static_branch_enable(&kvm_protected_mode_initialized);
> +       on_each_cpu(_kvm_host_prot_finalize, &ret, 1);
> +       return ret;
> +}
> +
> +static int __init finalize_pkvm(void)
> +{
> +       int ret;
> +
> +       if (!is_protected_kvm_enabled())
> +               return 0;
> +
> +       /*
> +        * Exclude HYP sections from kmemleak so that they don't get peeked
> +        * at, which would end badly once inaccessible.
> +        */
> +       kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
> +       kmemleak_free_part_phys(hyp_mem_base, hyp_mem_size);
> +
> +       ret = pkvm_drop_host_privileges();
> +       if (ret)
> +               pr_err("Failed to finalize Hyp protection: %d\n", ret);
> +
> +       return ret;
> +}
> +device_initcall_sync(finalize_pkvm);
> --
> 2.40.0.634.g4ca3ef3211-goog
>

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply

* Re: [PATCH] KVM: arm64: Ensure CPU PMU probes before pKVM host de-privilege
From: Marc Zyngier @ 2023-04-20 12:50 UTC (permalink / raw)
  To: Will Deacon; +Cc: linux-arm-kernel, kvmarm, Oliver Upton, Fuad Tabba
In-Reply-To: <20230420123356.2708-1-will@kernel.org>

On Thu, 20 Apr 2023 13:33:56 +0100,
Will Deacon <will@kernel.org> wrote:
> 
> Although pKVM supports CPU PMU emulation for non-protected guests since
> 722625c6f4c5 ("KVM: arm64: Reenable pmu in Protected Mode"), this relies
> on the PMU driver probing before the host has de-privileged so that the
> 'kvm_arm_pmu_available' static key can still be enabled by patching the
> hypervisor text.
> 
> As it happens, both of these events hang off device_initcall() but the
> PMU consistently won the race until 7755cec63ade ("arm64: perf: Move
> PMUv3 driver to drivers/perf"). Since then, the host will fail to boot
> when pKVM is enabled:
> 
>   | hw perfevents: enabled with armv8_pmuv3_0 PMU driver, 7 counters available
>   | kvm [1]: nVHE hyp BUG at: [<ffff8000090366e0>] __kvm_nvhe_handle_host_mem_abort+0x270/0x284!
>   | kvm [1]: Cannot dump pKVM nVHE stacktrace: !CONFIG_PROTECTED_NVHE_STACKTRACE
>   | kvm [1]: Hyp Offset: 0xfffea41fbdf70000
>   | Kernel panic - not syncing: HYP panic:
>   | PS:a00003c9 PC:0000dbe04b0c66e0 ESR:00000000f2000800
>   | FAR:fffffbfffddfcf00 HPFAR:00000000010b0bf0 PAR:0000000000000000
>   | VCPU:0000000000000000
>   | CPU: 2 PID: 1 Comm: swapper/0 Not tainted 6.3.0-rc7-00083-g0bce6746d154 #1
>   | Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015
>   | Call trace:
>   |  dump_backtrace+0xec/0x108
>   |  show_stack+0x18/0x2c
>   |  dump_stack_lvl+0x50/0x68
>   |  dump_stack+0x18/0x24
>   |  panic+0x13c/0x33c
>   |  nvhe_hyp_panic_handler+0x10c/0x190
>   |  aarch64_insn_patch_text_nosync+0x64/0xc8
>   |  arch_jump_label_transform+0x4c/0x5c
>   |  __jump_label_update+0x84/0xfc
>   |  jump_label_update+0x100/0x134
>   |  static_key_enable_cpuslocked+0x68/0xac
>   |  static_key_enable+0x20/0x34
>   |  kvm_host_pmu_init+0x88/0xa4
>   |  armpmu_register+0xf0/0xf4
>   |  arm_pmu_acpi_probe+0x2ec/0x368
>   |  armv8_pmu_driver_init+0x38/0x44
>   |  do_one_initcall+0xcc/0x240
> 
> Fix the race properly by deferring the de-privilege step to
> device_initcall_sync(). This will also be needed in future when probing
> IOMMU devices and allows us to separate the pKVM de-privilege logic from
> the core hypervisor initialisation path.
> 
> Cc: Oliver Upton <oliver.upton@linux.dev>
> Cc: Fuad Tabba <tabba@google.com>
> Cc: Marc Zyngier <maz@kernel.org>
> Fixes: 7755cec63ade ("arm64: perf: Move PMUv3 driver to drivers/perf")
> Signed-off-by: Will Deacon <will@kernel.org>
> ---
> 
> Marc, Oliver -- in practice, this issue only crops with the patches
> moving the CPU PMU driver out into drivers/perf/ and so the arm64
> for-next/core branch is broken. Please can I queue this in the arm64
> tree for 6.4 with your Ack? Thanks.

It doesn't conflict with the current state of kvmarm/next, and I
actually like that this code is moved into pkvm.c, so:

Acked-by: Marc Zyngier <maz@kernel.org>

Thanks,

	M.

-- 
Without deviation from the norm, progress is not possible.

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply

* Re: [PATCH v2 1/1] arm: dts: sunxi: Add ICnova A20 ADB4006 board support
From: Krzysztof Kozlowski @ 2023-04-20 12:53 UTC (permalink / raw)
  To: Andre Przywara
  Cc: Ludwig Kormann, samuel, jernej.skrabec, wens, robh+dt,
	krzysztof.kozlowski+dt, linux-arm-kernel, devicetree, linux-sunxi,
	linux-kernel
In-Reply-To: <20230420095315.5aaab9eb@donnerap.cambridge.arm.com>

On 20/04/2023 10:53, Andre Przywara wrote:
>> Bindings are always separate patches. checkpatch did not complain?
>>
>>>  arch/arm/boot/dts/Makefile                    |   1 +
>>>  .../boot/dts/sun7i-a20-icnova-a20-adb4006.dts | 137 ++++++++++++++++++
>>>  arch/arm/boot/dts/sun7i-a20-icnova-a20.dtsi   |  63 ++++++++
>>>  4 files changed, 207 insertions(+)
>>>  create mode 100644 arch/arm/boot/dts/sun7i-a20-icnova-a20-adb4006.dts
>>>  create mode 100644 arch/arm/boot/dts/sun7i-a20-icnova-a20.dtsi
>>>
>>> diff --git a/Documentation/devicetree/bindings/arm/sunxi.yaml b/Documentation/devicetree/bindings/arm/sunxi.yaml
>>> index 013821f4a7b8..12f0c236f17b 100644
>>> --- a/Documentation/devicetree/bindings/arm/sunxi.yaml
>>> +++ b/Documentation/devicetree/bindings/arm/sunxi.yaml
>>> @@ -305,6 +305,12 @@ properties:
>>>            - const: allwinner,i12-tvbox
>>>            - const: allwinner,sun7i-a20
>>>  
>>> +      - description: ICNova A20 ADB4006
>>> +        items:
>>> +          - const: incircuit,icnova-a20-adb4006
>>> +          - const: incircuit,icnova-a20
>>> +          - const: allwinner,sun7i-a20
>>> +
>>>        - description: ICNova A20 SWAC
>>>          items:
>>>            - const: incircuit,icnova-a20-swac
>>> diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile
>>> index 3cc32722c394..b6b408417261 100644
>>> --- a/arch/arm/boot/dts/Makefile
>>> +++ b/arch/arm/boot/dts/Makefile
>>> @@ -1321,6 +1321,7 @@ dtb-$(CONFIG_MACH_SUN7I) += \
>>>  	sun7i-a20-hummingbird.dtb \
>>>  	sun7i-a20-itead-ibox.dtb \
>>>  	sun7i-a20-i12-tvbox.dtb \
>>> +	sun7i-a20-icnova-a20-adb4006.dtb \
>>>  	sun7i-a20-icnova-swac.dtb \
>>>  	sun7i-a20-lamobo-r1.dtb \
>>>  	sun7i-a20-linutronix-testbox-v2.dtb \
>>> diff --git a/arch/arm/boot/dts/sun7i-a20-icnova-a20-adb4006.dts b/arch/arm/boot/dts/sun7i-a20-icnova-a20-adb4006.dts
>>> new file mode 100644
>>> index 000000000000..c1606c085e4e
>>> --- /dev/null
>>> +++ b/arch/arm/boot/dts/sun7i-a20-icnova-a20-adb4006.dts
>>> @@ -0,0 +1,137 @@
>>> +// SPDX-License-Identifier: (GPL-2.0+ OR MIT)  
>>
>> Unusual license. Are you sure you are ok with GPLv5.0?
> 
> Is it really unusual? This is literally the most commonly used dual license
> for DTs, grep counts 252 users in arm and 573 users in arm64.
> 

No, the most commonly used is GPL-2.0 (optionally OR MIT/BSD). GPLv3 and
later appear, but it is actually weird to use it for kernel DTS where in
general we said no to GPLv3.

And my question is not the only one... just grep from responses from
other maintainers.

Best regards,
Krzysztof


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply

* Re: [PATCH] perf cs-etm: Add support for coresight trace for any range of CPUs
From: Suzuki K Poulose @ 2023-04-20 13:03 UTC (permalink / raw)
  To: Ganapatrao Kulkarni, James Clark
  Cc: mathieu.poirier, acme, darren, scott, scclevenger, linux-kernel,
	coresight, linux-arm-kernel, mike.leach
In-Reply-To: <dea08376-e66b-bacc-7673-c79fe2a8f889@os.amperecomputing.com>

On 20/04/2023 13:37, Ganapatrao Kulkarni wrote:
> 
> 
> On 20-04-2023 06:00 pm, James Clark wrote:
>>
>>
>> On 20/04/2023 12:47, Ganapatrao Kulkarni wrote:
>>>

...

>>> My patch is rebased on 6.3-RC7 codebase with Mike's 3 perf patches
>>> related to dynamic id [1] support(queued for 6.4).
>>>
>>> "perf report -D" works for me.
>>
>> I was referring to sparse CPU lists, which I think you mentioned above
>> doesn't work even with this patch.
>>
>>>
>>> [1] https://www.spinics.net/lists/linux-perf-users/msg27452.html
>>>
>>
>> It should be based on the next branch here:
>> git://git.kernel.org/pub/scm/linux/kernel/git/coresight/linux.git
> 
> OK.

It need not be. Since this patch is purely perf tools patch and has
nothing to do with the kernel drivers, it should be beased on whatever
the tip of the perf tool tree is. Otherwise we risk rebasing to that
eventually.

Cheers
Suzuki


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply

* Re: [PATCH 1/6] drm: bridge: samsung-dsim: Support multi-lane calculations
From: Lucas Stach @ 2023-04-20 13:06 UTC (permalink / raw)
  To: Adam Ford
  Cc: dri-devel, Krzysztof Kozlowski, aford, Laurent Pinchart,
	Andrzej Hajda, Fabio Estevam, m.szyprowski, marex, Robert Foss,
	David Airlie, Jernej Skrabec, Jagan Teki, NXP Linux Team,
	devicetree, Daniel Vetter, Jonas Karlman, Sascha Hauer, Inki Dae,
	Rob Herring, linux-arm-kernel, Neil Armstrong, linux-kernel,
	Pengutronix Kernel Team, Shawn Guo
In-Reply-To: <CAHCN7xK8K+DsNAFTVAezwJQzZ7RCDb2CjCBZ8dNb=S8d1BmtMA@mail.gmail.com>

Hi Adam,

Am Mittwoch, dem 19.04.2023 um 05:47 -0500 schrieb Adam Ford:
> On Mon, Apr 17, 2023 at 6:55 AM Adam Ford <aford173@gmail.com> wrote:
> > 
> > On Mon, Apr 17, 2023 at 3:43 AM Lucas Stach <l.stach@pengutronix.de> wrote:
> > > 
> > > Hi Adam,
> > > 
> > > Am Samstag, dem 15.04.2023 um 05:40 -0500 schrieb Adam Ford:
> > > > If there is more than one lane, the HFP, HBP, and HSA is calculated in
> > > > bytes/pixel, then they are divided amongst the different lanes with some
> > > > additional overhead. This is necessary to achieve higher resolutions while
> > > > keeping the pixel clocks lower as the number of lanes increase.
> > > > 
> > > 
> > > In the testing I did to come up with my patch "drm: bridge: samsung-
> > > dsim: fix blanking packet size calculation" the number of lanes didn't
> > > make any difference. My testing might be flawed, as I could only
> > > measure the blanking after translation from MIPI DSI to DPI, so I'm
> > > interested to know what others did here. How did you validate the
> > > blanking with your patch? Would you have a chance to test my patch and
> > > see if it works or breaks in your setup?
> 
> Lucas,
> 
> I tried your patch instead of mine.  Yours is dependent on the
> hs_clock being always set to the burst clock which is configured by
> the device tree.  I unrolled a bit of my stuff and replaced it with
> yours.  It worked at 1080p, but when I tried a few other resolutions,
> they did not work.  I assume it's because the DSI clock is fixed and
> not changing based on the pixel clock.  In the version I did, I only
> did that math when the lanes were > 1. In your patch, you divide by 8,
> and in mine, I fetch the bits-per-pixel (which is 8) and I divide by
> that just in case the bpp ever changes from 8.  Overall,  I think our
> patches basically do the same thing.

The calculations in your and my patch are quite different. I'm not
taking into account the number of lanes or the MIPI format. I'm basing
the blanking size purely on the ratio between MIPI DSI byte clock and
the DPI interface clock. It's quite counter-intuitive that the host
would scale the blanking to the number of lanes automatically, but
still require the MIPI packet offset removed, but that's what my
measurements showed to produce the correct blanking after translation
to DPI by the TC358767 bridge chip.

If you dynamically scale the HS clock, then you would need to input the
real used HS clock to the calculation in my patch, instead of the fixed
burst mode rate.

Regards,
Lucas

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply

* Re: [PATCH] KVM: arm64: fix repeated words in comments
From: Mukesh Ojha @ 2023-04-20 13:07 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20230309075919.169518-1-jingyuwang_vip@163.com>



On 3/9/2023 1:29 PM, Jingyu Wang wrote:
> Delete the redundant word 'to'.
> 
> Signed-off-by: Jingyu Wang <jingyuwang_vip@163.com>

Reviewed-by: Mukesh Ojha <quic_mojha@quicinc.com>

--Mukesh

> ---
>   arch/arm64/kvm/inject_fault.c | 2 +-
>   1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
> index 64c3aec0d937..0bd93a5f21ce 100644
> --- a/arch/arm64/kvm/inject_fault.c
> +++ b/arch/arm64/kvm/inject_fault.c
> @@ -204,7 +204,7 @@ void kvm_inject_size_fault(struct kvm_vcpu *vcpu)
>   	 * Size Fault at level 0, as if exceeding PARange.
>   	 *
>   	 * Non-LPAE guests will only get the external abort, as there
> -	 * is no way to to describe the ASF.
> +	 * is no way to describe the ASF.
>   	 */
>   	if (vcpu_el1_is_32bit(vcpu) &&
>   	    !(vcpu_read_sys_reg(vcpu, TCR_EL1) & TTBCR_EAE))

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply

* [PATCH 1/2] wifi: mt7601u: delete dead code checking debugfs returns
From: Wang Jikai @ 2023-04-20 13:08 UTC (permalink / raw)
  To: Jakub Kicinski, Kalle Valo, David S. Miller, Eric Dumazet,
	Paolo Abeni, Matthias Brugger, AngeloGioacchino Del Regno
  Cc: hust-os-kernel-patches, Wang Jikai, Jakub Kicinski,
	linux-wireless, netdev, linux-kernel, linux-arm-kernel,
	linux-mediatek

Smatch reports that:
drivers/net/wireless/mediatek/mt7601u/debugfs.c:130
mt7601u_init_debugfs() warn: 'dir' is an error pointer or valid".

Debugfs code is not supposed to need error checking so instead of
changing this to if (IS_ERR()) the correct thing is to just delete
the dead code.

Fixes: c869f77d6abb ("add mt7601u driver")
Signed-off-by: Wang Jikai <wangjikai@hust.edu.cn>
---
The issue is found by static analysis and remains untested.
---
 drivers/net/wireless/mediatek/mt7601u/debugfs.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt7601u/debugfs.c b/drivers/net/wireless/mediatek/mt7601u/debugfs.c
index 230b0e1061a7..dbddf256921b 100644
--- a/drivers/net/wireless/mediatek/mt7601u/debugfs.c
+++ b/drivers/net/wireless/mediatek/mt7601u/debugfs.c
@@ -127,8 +127,6 @@ void mt7601u_init_debugfs(struct mt7601u_dev *dev)
 	struct dentry *dir;
 
 	dir = debugfs_create_dir("mt7601u", dev->hw->wiphy->debugfsdir);
-	if (!dir)
-		return;
 
 	debugfs_create_u8("temperature", 0400, dir, &dev->raw_temp);
 	debugfs_create_u32("temp_mode", 0400, dir, &dev->temp_mode);
-- 
2.34.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related

* [PATCH 2/2] wifi: mt7601u: remove debugfs directory on disconnect
From: Wang Jikai @ 2023-04-20 13:09 UTC (permalink / raw)
  To: Jakub Kicinski, Kalle Valo, David S. Miller, Eric Dumazet,
	Paolo Abeni, Matthias Brugger, AngeloGioacchino Del Regno
  Cc: hust-os-kernel-patches, Wang Jikai, Jakub Kicinski,
	linux-wireless, netdev, linux-kernel, linux-arm-kernel,
	linux-mediatek

Debugfs is created during device init but not removed.
Add a function mt7601u_remove_debugfs to remove debugfs
when the device disconnects.

Fixes: c869f77d6abb ("add mt7601u driver")
Signed-off-by: Wang Jikai <wangjikai@hust.edu.cn>
---
The issue is found by static analysis and remains untested.
---
 drivers/net/wireless/mediatek/mt7601u/debugfs.c | 5 +++++
 drivers/net/wireless/mediatek/mt7601u/mt7601u.h | 1 +
 drivers/net/wireless/mediatek/mt7601u/usb.c     | 1 +
 3 files changed, 7 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt7601u/debugfs.c b/drivers/net/wireless/mediatek/mt7601u/debugfs.c
index dbddf256921b..1b87a4854e0e 100644
--- a/drivers/net/wireless/mediatek/mt7601u/debugfs.c
+++ b/drivers/net/wireless/mediatek/mt7601u/debugfs.c
@@ -136,3 +136,8 @@ void mt7601u_init_debugfs(struct mt7601u_dev *dev)
 	debugfs_create_file("ampdu_stat", 0400, dir, dev, &mt7601u_ampdu_stat_fops);
 	debugfs_create_file("eeprom_param", 0400, dir, dev, &mt7601u_eeprom_param_fops);
 }
+
+void mt7601u_remove_debugfs(struct mt7601u_dev *dev)
+{
+	debugfs_lookup_and_remove("mt7601u", dev->hw->wiphy->debugfsdir);
+}
diff --git a/drivers/net/wireless/mediatek/mt7601u/mt7601u.h b/drivers/net/wireless/mediatek/mt7601u/mt7601u.h
index 118d43707853..0216ace4b8e9 100644
--- a/drivers/net/wireless/mediatek/mt7601u/mt7601u.h
+++ b/drivers/net/wireless/mediatek/mt7601u/mt7601u.h
@@ -279,6 +279,7 @@ struct mt7601u_rxwi;
 extern const struct ieee80211_ops mt7601u_ops;
 
 void mt7601u_init_debugfs(struct mt7601u_dev *dev);
+void mt7601u_remove_debugfs(struct mt7601u_dev *dev);
 
 u32 mt7601u_rr(struct mt7601u_dev *dev, u32 offset);
 void mt7601u_wr(struct mt7601u_dev *dev, u32 offset, u32 val);
diff --git a/drivers/net/wireless/mediatek/mt7601u/usb.c b/drivers/net/wireless/mediatek/mt7601u/usb.c
index cc772045d526..d9a93d05f1cf 100644
--- a/drivers/net/wireless/mediatek/mt7601u/usb.c
+++ b/drivers/net/wireless/mediatek/mt7601u/usb.c
@@ -332,6 +332,7 @@ static void mt7601u_disconnect(struct usb_interface *usb_intf)
 
 	ieee80211_unregister_hw(dev->hw);
 	mt7601u_cleanup(dev);
+	mt7601u_remove_debugfs(dev);
 
 	usb_set_intfdata(usb_intf, NULL);
 	usb_put_dev(interface_to_usbdev(usb_intf));
-- 
2.34.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related

* Re: [PATCH 1/6] drm: bridge: samsung-dsim: Support multi-lane calculations
From: Adam Ford @ 2023-04-20 13:24 UTC (permalink / raw)
  To: Lucas Stach
  Cc: dri-devel, Krzysztof Kozlowski, aford, Laurent Pinchart,
	Andrzej Hajda, Fabio Estevam, m.szyprowski, marex, Robert Foss,
	David Airlie, Jernej Skrabec, Jagan Teki, NXP Linux Team,
	devicetree, Daniel Vetter, Jonas Karlman, Sascha Hauer, Inki Dae,
	Rob Herring, linux-arm-kernel, Neil Armstrong, linux-kernel,
	Pengutronix Kernel Team, Shawn Guo
In-Reply-To: <f42a2a11c1a2df4d773b61a449e8f4d5a9a010d1.camel@pengutronix.de>

On Thu, Apr 20, 2023 at 8:06 AM Lucas Stach <l.stach@pengutronix.de> wrote:
>
> Hi Adam,
>
> Am Mittwoch, dem 19.04.2023 um 05:47 -0500 schrieb Adam Ford:
> > On Mon, Apr 17, 2023 at 6:55 AM Adam Ford <aford173@gmail.com> wrote:
> > >
> > > On Mon, Apr 17, 2023 at 3:43 AM Lucas Stach <l.stach@pengutronix.de> wrote:
> > > >
> > > > Hi Adam,
> > > >
> > > > Am Samstag, dem 15.04.2023 um 05:40 -0500 schrieb Adam Ford:
> > > > > If there is more than one lane, the HFP, HBP, and HSA is calculated in
> > > > > bytes/pixel, then they are divided amongst the different lanes with some
> > > > > additional overhead. This is necessary to achieve higher resolutions while
> > > > > keeping the pixel clocks lower as the number of lanes increase.
> > > > >
> > > >
> > > > In the testing I did to come up with my patch "drm: bridge: samsung-
> > > > dsim: fix blanking packet size calculation" the number of lanes didn't
> > > > make any difference. My testing might be flawed, as I could only
> > > > measure the blanking after translation from MIPI DSI to DPI, so I'm
> > > > interested to know what others did here. How did you validate the
> > > > blanking with your patch? Would you have a chance to test my patch and
> > > > see if it works or breaks in your setup?
> >
> > Lucas,
> >
> > I tried your patch instead of mine.  Yours is dependent on the
> > hs_clock being always set to the burst clock which is configured by
> > the device tree.  I unrolled a bit of my stuff and replaced it with
> > yours.  It worked at 1080p, but when I tried a few other resolutions,
> > they did not work.  I assume it's because the DSI clock is fixed and
> > not changing based on the pixel clock.  In the version I did, I only
> > did that math when the lanes were > 1. In your patch, you divide by 8,
> > and in mine, I fetch the bits-per-pixel (which is 8) and I divide by
> > that just in case the bpp ever changes from 8.  Overall,  I think our
> > patches basically do the same thing.
>
> The calculations in your and my patch are quite different. I'm not
> taking into account the number of lanes or the MIPI format. I'm basing

I was looking more at the division by 8 and the overhead correction of 6.
This number 6 also appears in the NXP downstream kernel [1].  I know
Marek V had some concerns about that.

> the blanking size purely on the ratio between MIPI DSI byte clock and
> the DPI interface clock. It's quite counter-intuitive that the host
> would scale the blanking to the number of lanes automatically, but
> still require the MIPI packet offset removed, but that's what my
> measurements showed to produce the correct blanking after translation
> to DPI by the TC358767 bridge chip.

How many lanes is your DSI interface using?

>
> If you dynamically scale the HS clock, then you would need to input the
> real used HS clock to the calculation in my patch, instead of the fixed
> burst mode rate.

I think what you're saying makes sense.

The code I originally modeled this from was from the NXP downstream
kernel where they define the calculation as being in words [2]. I am
not saying the NXP code is perfect, but the NXP code works.  With this
series, my monitors are able to sync a bunch of different resolutions
from 1080p down to 640x480 and a bunch in between with various refresh
rates too. That was the goal of this series.

Instead of just using your patch as-is, I will adapt yours to use the
scaled clock to see how it behaves and get back to you.  I have
finished reworking the dynamic DPHY settings, and I've fixed up making
the PLL device tree optional. If your patch works, I'll drop my
calculation and just build off what you have to use the scaled HS
clock when I submit the V2 and I'll make sure I CC you.

adam

[1] - https://github.com/nxp-imx/linux-imx/blob/lf-6.1.y/drivers/gpu/drm/bridge/sec-dsim.c#L270
[2] - https://github.com/nxp-imx/linux-imx/blob/lf-6.1.y/drivers/gpu/drm/bridge/sec-dsim.c#L914

>
> Regards,
> Lucas

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply

* Re: [PATCH V3] spi: cadence-quadspi: use macro DEFINE_SIMPLE_DEV_PM_OPS
From: Mark Brown @ 2023-04-20 13:26 UTC (permalink / raw)
  To: Dhruva Gole
  Cc: Vaishnav Achath, Vignesh, Apurva Nandan, linux-arm-kernel,
	linux-spi, linux-kernel, kernel test robot
In-Reply-To: <20230420054257.925092-1-d-gole@ti.com>

On Thu, 20 Apr 2023 11:12:57 +0530, Dhruva Gole wrote:
> Using this macro makes the code more readable.
> It also inits the members of dev_pm_ops in the following manner
> without us explicitly needing to:
> 
> .suspend = cqspi_suspend, \
> .resume = cqspi_resume, \
> .freeze = cqspi_suspend, \
> .thaw = cqspi_resume, \
> .poweroff = cqspi_suspend, \
> .restore = cqspi_resume
> 
> [...]

Applied to

   https://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi.git for-next

Thanks!

[1/1] spi: cadence-quadspi: use macro DEFINE_SIMPLE_DEV_PM_OPS
      commit: be3206e8906e7a93df673ab2e96d69304a008edc

All being well this means that it will be integrated into the linux-next
tree (usually sometime in the next 24 hours) and sent to Linus during
the next merge window (or sooner if it is a bug fix), however if
problems are discovered then the patch may be dropped or reverted.

You may get further e-mails resulting from automated or manual testing
and review of the tree, please engage with people reporting problems and
send followup patches addressing any issues that are reported if needed.

If any updates are required or you are submitting further changes they
should be sent as incremental updates against current git, existing
patches will not be replaced.

Please add any relevant lists and maintainers to the CCs when replying
to this mail.

Thanks,
Mark


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply

* Re: [PATCH 1/6] drm: bridge: samsung-dsim: Support multi-lane calculations
From: Lucas Stach @ 2023-04-20 13:42 UTC (permalink / raw)
  To: Adam Ford
  Cc: dri-devel, Krzysztof Kozlowski, aford, Laurent Pinchart,
	Andrzej Hajda, Fabio Estevam, m.szyprowski, marex, Robert Foss,
	David Airlie, Jernej Skrabec, Jagan Teki, NXP Linux Team,
	devicetree, Daniel Vetter, Jonas Karlman, Sascha Hauer, Inki Dae,
	Rob Herring, linux-arm-kernel, Neil Armstrong, linux-kernel,
	Pengutronix Kernel Team, Shawn Guo
In-Reply-To: <CAHCN7x+bZHZHxYk=qC3QFS07kLO85w_rj1tOuX1Y3fJXekmvMQ@mail.gmail.com>

Am Donnerstag, dem 20.04.2023 um 08:24 -0500 schrieb Adam Ford:
> On Thu, Apr 20, 2023 at 8:06 AM Lucas Stach <l.stach@pengutronix.de> wrote:
> > 
> > Hi Adam,
> > 
> > Am Mittwoch, dem 19.04.2023 um 05:47 -0500 schrieb Adam Ford:
> > > On Mon, Apr 17, 2023 at 6:55 AM Adam Ford <aford173@gmail.com> wrote:
> > > > 
> > > > On Mon, Apr 17, 2023 at 3:43 AM Lucas Stach <l.stach@pengutronix.de> wrote:
> > > > > 
> > > > > Hi Adam,
> > > > > 
> > > > > Am Samstag, dem 15.04.2023 um 05:40 -0500 schrieb Adam Ford:
> > > > > > If there is more than one lane, the HFP, HBP, and HSA is calculated in
> > > > > > bytes/pixel, then they are divided amongst the different lanes with some
> > > > > > additional overhead. This is necessary to achieve higher resolutions while
> > > > > > keeping the pixel clocks lower as the number of lanes increase.
> > > > > > 
> > > > > 
> > > > > In the testing I did to come up with my patch "drm: bridge: samsung-
> > > > > dsim: fix blanking packet size calculation" the number of lanes didn't
> > > > > make any difference. My testing might be flawed, as I could only
> > > > > measure the blanking after translation from MIPI DSI to DPI, so I'm
> > > > > interested to know what others did here. How did you validate the
> > > > > blanking with your patch? Would you have a chance to test my patch and
> > > > > see if it works or breaks in your setup?
> > > 
> > > Lucas,
> > > 
> > > I tried your patch instead of mine.  Yours is dependent on the
> > > hs_clock being always set to the burst clock which is configured by
> > > the device tree.  I unrolled a bit of my stuff and replaced it with
> > > yours.  It worked at 1080p, but when I tried a few other resolutions,
> > > they did not work.  I assume it's because the DSI clock is fixed and
> > > not changing based on the pixel clock.  In the version I did, I only
> > > did that math when the lanes were > 1. In your patch, you divide by 8,
> > > and in mine, I fetch the bits-per-pixel (which is 8) and I divide by
> > > that just in case the bpp ever changes from 8.  Overall,  I think our
> > > patches basically do the same thing.
> > 
> > The calculations in your and my patch are quite different. I'm not
> > taking into account the number of lanes or the MIPI format. I'm basing
> 
> I was looking more at the division by 8 and the overhead correction of 6.
> This number 6 also appears in the NXP downstream kernel [1].  I know
> Marek V had some concerns about that.
> 
Yea, I don't fully remember the details about the overhead. Need to
page that back in. The division by 8 in my patch is just to get from
the bit to a byte clock, nothing to do with the MIPI format bits per
channel or something like that.

> > the blanking size purely on the ratio between MIPI DSI byte clock and
> > the DPI interface clock. It's quite counter-intuitive that the host
> > would scale the blanking to the number of lanes automatically, but
> > still require the MIPI packet offset removed, but that's what my
> > measurements showed to produce the correct blanking after translation
> > to DPI by the TC358767 bridge chip.
> 
> How many lanes is your DSI interface using?
> 
When I did the measurements to come up with the patch, I varied the
number of lanes between 1 and 4. Different number of lanes didn't make
a difference. In fact trying to compensate for the number of lanes when
calculating the blanking size to program into the controller lead to
wildly wrong blanking on the DPI side of the external bridge.

> > 
> > If you dynamically scale the HS clock, then you would need to input the
> > real used HS clock to the calculation in my patch, instead of the fixed
> > burst mode rate.
> 
> I think what you're saying makes sense.
> 
> The code I originally modeled this from was from the NXP downstream
> kernel where they define the calculation as being in words [2]. I am
> not saying the NXP code is perfect, but the NXP code works.  With this
> series, my monitors are able to sync a bunch of different resolutions
> from 1080p down to 640x480 and a bunch in between with various refresh
> rates too. That was the goal of this series.
> 
> Instead of just using your patch as-is, I will adapt yours to use the
> scaled clock to see how it behaves and get back to you.
> 

Thanks, that would be very much appreciated.

I also don't assert that my calculation is perfect, as I also don't
have any more information and needed to resort to observing the
blanking after translation by the external bridge, so I hope we could
get some better understanding of how things really work by checking
what works for both our systems.

>   I have
> finished reworking the dynamic DPHY settings, and I've fixed up making
> the PLL device tree optional. If your patch works, I'll drop my
> calculation and just build off what you have to use the scaled HS
> clock when I submit the V2 and I'll make sure I CC you.
> 
Thanks, I'm open to re-do my measurements with your new patches.

Regards,
Lucas

> adam
> 
> [1] - https://github.com/nxp-imx/linux-imx/blob/lf-6.1.y/drivers/gpu/drm/bridge/sec-dsim.c#L270
> [2] - https://github.com/nxp-imx/linux-imx/blob/lf-6.1.y/drivers/gpu/drm/bridge/sec-dsim.c#L914
> 
> > 
> > Regards,
> > Lucas


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply

* Re: [PATCH v2 08/19] KVM: arm64: Save/restore TCR2_EL1
From: Joey Gouly @ 2023-04-20 14:11 UTC (permalink / raw)
  To: Marc Zyngier
  Cc: linux-arm-kernel, nd, broonie, catalin.marinas, james.morse,
	mark.rutland, oliver.upton, suzuki.poulose, will, yuzenghui
In-Reply-To: <86v8hqkjb1.wl-maz@kernel.org>

Hi,

On Thu, Apr 20, 2023 at 10:13:38AM +0100, Marc Zyngier wrote:
> On Thu, 13 Apr 2023 12:05:02 +0100,
> Joey Gouly <joey.gouly@arm.com> wrote:
> > 
> > Define the new system register TCR2_EL1 and context switch it.
> > 
> > Signed-off-by: Joey Gouly <joey.gouly@arm.com>
> > Cc: Marc Zyngier <maz@kernel.org>
> > Cc: Oliver Upton <oliver.upton@linux.dev>
> > Cc: James Morse <james.morse@arm.com>
> > Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
> > Cc: Zenghui Yu <yuzenghui@huawei.com>
> > Cc: Catalin Marinas <catalin.marinas@arm.com>
> > Cc: Will Deacon <will@kernel.org>
> > Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
> > ---
> >  arch/arm64/include/asm/kvm_host.h          | 1 +
> >  arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 4 ++++
> >  arch/arm64/kvm/sys_regs.c                  | 1 +
> >  3 files changed, 6 insertions(+)
> > 
> > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> > index bcd774d74f34..e1137832a01f 100644
> > --- a/arch/arm64/include/asm/kvm_host.h
> > +++ b/arch/arm64/include/asm/kvm_host.h
> > @@ -269,6 +269,7 @@ enum vcpu_sysreg {
> >  	TTBR0_EL1,	/* Translation Table Base Register 0 */
> >  	TTBR1_EL1,	/* Translation Table Base Register 1 */
> >  	TCR_EL1,	/* Translation Control Register */
> > +	TCR2_EL1,	/* Extended Translation Control Register */
> >  	ESR_EL1,	/* Exception Syndrome Register */
> >  	AFSR0_EL1,	/* Auxiliary Fault Status Register 0 */
> >  	AFSR1_EL1,	/* Auxiliary Fault Status Register 1 */
> > diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
> > index 699ea1f8d409..16199a107a47 100644
> > --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
> > +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
> > @@ -44,6 +44,8 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
> >  	ctxt_sys_reg(ctxt, TTBR0_EL1)	= read_sysreg_el1(SYS_TTBR0);
> >  	ctxt_sys_reg(ctxt, TTBR1_EL1)	= read_sysreg_el1(SYS_TTBR1);
> >  	ctxt_sys_reg(ctxt, TCR_EL1)	= read_sysreg_el1(SYS_TCR);
> > +	if (cpus_have_final_cap(ARM64_HAS_TCR2))
> > +		ctxt_sys_reg(ctxt, TCR2_EL1)	= read_sysreg_el1(SYS_TCR2);
> >  	ctxt_sys_reg(ctxt, ESR_EL1)	= read_sysreg_el1(SYS_ESR);
> >  	ctxt_sys_reg(ctxt, AFSR0_EL1)	= read_sysreg_el1(SYS_AFSR0);
> >  	ctxt_sys_reg(ctxt, AFSR1_EL1)	= read_sysreg_el1(SYS_AFSR1);
> > @@ -114,6 +116,8 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
> >  	write_sysreg_el1(ctxt_sys_reg(ctxt, CPACR_EL1),	SYS_CPACR);
> >  	write_sysreg_el1(ctxt_sys_reg(ctxt, TTBR0_EL1),	SYS_TTBR0);
> >  	write_sysreg_el1(ctxt_sys_reg(ctxt, TTBR1_EL1),	SYS_TTBR1);
> > +	if (cpus_have_final_cap(ARM64_HAS_TCR2))
> > +		write_sysreg_el1(ctxt_sys_reg(ctxt, TCR2_EL1),	SYS_TCR2);
> >  	write_sysreg_el1(ctxt_sys_reg(ctxt, ESR_EL1),	SYS_ESR);
> >  	write_sysreg_el1(ctxt_sys_reg(ctxt, AFSR0_EL1),	SYS_AFSR0);
> >  	write_sysreg_el1(ctxt_sys_reg(ctxt, AFSR1_EL1),	SYS_AFSR1);
> > diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> > index 53749d3a0996..5e7e4a433035 100644
> > --- a/arch/arm64/kvm/sys_regs.c
> > +++ b/arch/arm64/kvm/sys_regs.c
> > @@ -1871,6 +1871,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
> >  	{ SYS_DESC(SYS_TTBR0_EL1), access_vm_reg, reset_unknown, TTBR0_EL1 },
> >  	{ SYS_DESC(SYS_TTBR1_EL1), access_vm_reg, reset_unknown, TTBR1_EL1 },
> >  	{ SYS_DESC(SYS_TCR_EL1), access_vm_reg, reset_val, TCR_EL1, 0 },
> > +	{ SYS_DESC(SYS_TCR2_EL1), NULL, reset_unknown, TCR2_EL1 },
> 
> I'm not convinced reset_unknown is the right thing, at least for the
> bits that are defined as "If EL2 and EL3 is not implemented, this bit
> resets to 0b0 on a reset."
> 
> Given that an EL1 guest isn't in control of EL2, I'm a bit wary that
> we start execution of the guest in a context that isn't well defined.
> My strong preference would be to reset TCR2 just like TCR, unless you
> can provide a explanation of why UNKNOWN is actually more correct.


You're right, I have changed this to reset to 0 like TCR_EL1.

Thanks,
Joey

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply

* Re: [PATCH v1 1/5] mtd: rawnand: meson: fix NAND access for read/write
From: Liang Yang @ 2023-04-20 14:22 UTC (permalink / raw)
  To: Arseniy Krasnov, Miquel Raynal, Richard Weinberger,
	Vignesh Raghavendra, Neil Armstrong, Kevin Hilman, Jerome Brunet,
	Martin Blumenstingl, Yixun Lan, Jianxin Pan
  Cc: oxffffaa, kernel, linux-mtd, linux-arm-kernel, linux-amlogic,
	linux-kernel
In-Reply-To: <ed74240c-5028-c151-7904-c437b87c5f2a@sberdevices.ru>

Hi Arseniy,

Sorry, I am busy on other things and will try it on AXG platform in next 
week.

On 2023/4/20 3:43, Arseniy Krasnov wrote:
> [ EXTERNAL EMAIL ]
> 
> On 17.04.2023 17:10, Arseniy Krasnov wrote:
>>
>>
>> On 17.04.2023 16:54, Liang Yang wrote:
>>> Hi Arseniy,
>>>
>>> On 2023/4/17 14:47, Arseniy Krasnov wrote:
>>>> [ EXTERNAL EMAIL ]
>>>>
>>>>
>>>>
>>>> On 13.04.2023 08:57, Liang Yang wrote:
>>>>> Hi Arseniy,
>>>>>
>>>>> On 2023/4/13 13:10, Arseniy Krasnov wrote:
>>>>>> [ EXTERNAL EMAIL ]
>>>>>>
>>>>>>
>>>>>>
>>>>>> On 12.04.2023 16:30, Liang Yang wrote:
>>>>>>> Hi,
>>>>>>>
>>>>>>> On 2023/4/12 20:03, Arseniy Krasnov wrote:
>>>>>>>> [ EXTERNAL EMAIL ]
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>> On 12.04.2023 13:24, Arseniy Krasnov wrote:
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> On 12.04.2023 12:37, Liang Yang wrote:
>>>>>>>>>> Hi Arseniy,
>>>>>>>>>>
>>>>>>>>>> Thanks for pointing out this problem. also comment inline.
>>>>>>>>>>
>>>>>>>>>> On 2023/4/12 14:16, Arseniy Krasnov wrote:
>>>>>>>>>>> [ EXTERNAL EMAIL ]
>>>>>>>>>>>
>>>>>>>>>>> This fixes read/write functionality. New command sequences were ported
>>>>>>>>>>> from old vendor's driver. Without this patch driver works unstable. This
>>>>>>>>>>> change is tested with 'nanddump'/'nandwrite' utilities and mounting
>>>>>>>>>>> JFFS2 filesystem on AXG family (A113X SoC).
>>>>>>>>>>>
>>>>>>>>>>> Fixes: 8fae856c5350 ("mtd: rawnand: meson: add support for Amlogic NAND flash controller")
>>>>>>>>>>> Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
>>>>>>>>>>> ---
>>>>>>>>>>>       drivers/mtd/nand/raw/meson_nand.c | 116 ++++++++++++++++++++++++++----
>>>>>>>>>>>       1 file changed, 101 insertions(+), 15 deletions(-)
>>>>>>>>>>>
>>>>>>>>>>> diff --git a/drivers/mtd/nand/raw/meson_nand.c b/drivers/mtd/nand/raw/meson_nand.c
>>>>>>>>>>> index 074e14225c06..256c37c76526 100644
>>>>>>>>>>> --- a/drivers/mtd/nand/raw/meson_nand.c
>>>>>>>>>>> +++ b/drivers/mtd/nand/raw/meson_nand.c
>>>>>>>>>>> @@ -26,6 +26,7 @@
>>>>>>>>>>>       #define NFC_CMD_IDLE        (0xc << 14)
>>>>>>>>>>>       #define NFC_CMD_CLE        (0x5 << 14)
>>>>>>>>>>>       #define NFC_CMD_ALE        (0x6 << 14)
>>>>>>>>>>> +#define NFC_CMD_DRD        (0x8 << 14)
>>>>>>>>>>>       #define NFC_CMD_ADL        ((0 << 16) | (3 << 20))
>>>>>>>>>>>       #define NFC_CMD_ADH        ((1 << 16) | (3 << 20))
>>>>>>>>>>>       #define NFC_CMD_AIL        ((2 << 16) | (3 << 20))
>>>>>>>>>>> @@ -84,6 +85,7 @@
>>>>>>>>>>>         #define DMA_BUSY_TIMEOUT    0x100000
>>>>>>>>>>>       #define CMD_FIFO_EMPTY_TIMEOUT    1000
>>>>>>>>>>> +#define DEVICE_READY_TIMEOUT    1000
>>>>>>>>>>>         #define MAX_CE_NUM        2
>>>>>>>>>>>       @@ -255,8 +257,26 @@ static void meson_nfc_select_chip(struct nand_chip *nand, int chip)
>>>>>>>>>>>           }
>>>>>>>>>>>       }
>>>>>>>>>>>       +static int meson_nfc_wait_cmd_finish(struct meson_nfc *nfc,
>>>>>>>>>>> +                     unsigned int timeout_ms)
>>>>>>>>>>> +{
>>>>>>>>>>> +    u32 cmd_size = 0;
>>>>>>>>>>> +    int ret;
>>>>>>>>>>> +
>>>>>>>>>>> +    /* wait cmd fifo is empty */
>>>>>>>>>>> +    ret = readl_relaxed_poll_timeout(nfc->reg_base + NFC_REG_CMD, cmd_size,
>>>>>>>>>>> +                     !NFC_CMD_GET_SIZE(cmd_size),
>>>>>>>>>>> +                     10, timeout_ms * 1000);
>>>>>>>>>>> +    if (ret)
>>>>>>>>>>> +        dev_err(nfc->dev, "wait for empty CMD FIFO timed out\n");
>>>>>>>>>>> +
>>>>>>>>>>> +    return ret;
>>>>>>>>>>> +}
>>>>>>>>>>> +
>>>>>>>>>>>       static void meson_nfc_cmd_idle(struct meson_nfc *nfc, u32 time)
>>>>>>>>>>>       {
>>>>>>>>>>> +    meson_nfc_wait_cmd_finish(nfc, 0);
>>>>>>>>>>> +
>>>>>>>>>>>           writel(nfc->param.chip_select | NFC_CMD_IDLE | (time & 0x3ff),
>>>>>>>>>>>                  nfc->reg_base + NFC_REG_CMD);
>>>>>>>>>>>       }
>>>>>>>>>>> @@ -308,23 +328,9 @@ static void meson_nfc_drain_cmd(struct meson_nfc *nfc)
>>>>>>>>>>>            */
>>>>>>>>>>>           meson_nfc_cmd_idle(nfc, 0);
>>>>>>>>>>>           meson_nfc_cmd_idle(nfc, 0);
>>>>>>>>>>> +    meson_nfc_wait_cmd_finish(nfc, 1000);
>>>>>>>>>>>       }
>>>>>>>>>>>       -static int meson_nfc_wait_cmd_finish(struct meson_nfc *nfc,
>>>>>>>>>>> -                     unsigned int timeout_ms)
>>>>>>>>>>> -{
>>>>>>>>>>> -    u32 cmd_size = 0;
>>>>>>>>>>> -    int ret;
>>>>>>>>>>> -
>>>>>>>>>>> -    /* wait cmd fifo is empty */
>>>>>>>>>>> -    ret = readl_relaxed_poll_timeout(nfc->reg_base + NFC_REG_CMD, cmd_size,
>>>>>>>>>>> -                     !NFC_CMD_GET_SIZE(cmd_size),
>>>>>>>>>>> -                     10, timeout_ms * 1000);
>>>>>>>>>>> -    if (ret)
>>>>>>>>>>> -        dev_err(nfc->dev, "wait for empty CMD FIFO time out\n");
>>>>>>>>>>> -
>>>>>>>>>>> -    return ret;
>>>>>>>>>>> -}
>>>>>>>>>>>         static int meson_nfc_wait_dma_finish(struct meson_nfc *nfc)
>>>>>>>>>>>       {
>>>>>>>>>>> @@ -631,6 +637,48 @@ static int meson_nfc_rw_cmd_prepare_and_execute(struct nand_chip *nand,
>>>>>>>>>>>           return 0;
>>>>>>>>>>>       }
>>>>>>>>>>>       +static uint8_t meson_nfc_read_byte(struct nand_chip *nand)
>>>>>>>>>>> +{
>>>>>>>>>>> +    struct meson_nfc *nfc = nand_get_controller_data(nand);
>>>>>>>>>>> +
>>>>>>>>>>> +    writel(NFC_CMD_DRD, nfc->reg_base + NFC_REG_CMD);
>>>>>>>>>>> +    meson_nfc_cmd_idle(nfc, nfc->timing.twb);
>>>>>>>>>>> +    meson_nfc_drain_cmd(nfc);
>>>>>>>>>>> +
>>>>>>>>>>> +    return readl(nfc->reg_base + NFC_REG_BUF);
>>>>>>>>>>> +}
>>>>>>>>>>> +
>>>>>>>>>>> +static int meson_nfc_wait_dev_ready(struct nand_chip *nand)
>>>>>>>>>>> +{
>>>>>>>>>>> +    struct meson_nfc *nfc = nand_get_controller_data(nand);
>>>>>>>>>>> +    u32 cs = nfc->param.chip_select;
>>>>>>>>>>> +    unsigned long cnt = 0;
>>>>>>>>>>> +
>>>>>>>>>>> +    meson_nfc_drain_cmd(nfc);
>>>>>>>>>>> +
>>>>>>>>>>> +    writel(cs | NFC_CMD_CLE | NAND_CMD_STATUS, nfc->reg_base + NFC_REG_CMD);
>>>>>>>>>>> +
>>>>>>>>>>> +    /* 10 ms. */
>>>>>>>>>>> +    while (cnt < DEVICE_READY_TIMEOUT) {
>>>>>>>>>>> +        uint8_t status;
>>>>>>>>>>> +
>>>>>>>>>>> +        status = meson_nfc_read_byte(nand);
>>>>>>>>>>> +
>>>>>>>>>>> +        if (status & NAND_STATUS_READY)
>>>>>>>>>>> +            break;
>>>>>>>>>>> +
>>>>>>>>>>> +        usleep_range(10, 11);
>>>>>>>>>>> +        cnt++;
>>>>>>>>>>> +    }
>>>>>>>>>>> +
>>>>>>>>>>> +    if (cnt == DEVICE_READY_TIMEOUT) {
>>>>>>>>>>> +        dev_err(nfc->dev, "device ready timeout\n");
>>>>>>>>>>> +        return -ETIMEDOUT;
>>>>>>>>>>> +    }
>>>>>>>>>>> +
>>>>>>>>>>> +    return 0;
>>>>>>>>>>> +}
>>>>>>>>>>> +
>>>>>>>>>>>       static int meson_nfc_write_page_sub(struct nand_chip *nand,
>>>>>>>>>>>                           int page, int raw)
>>>>>>>>>>>       {
>>>>>>>>>>> @@ -643,6 +691,10 @@ static int meson_nfc_write_page_sub(struct nand_chip *nand,
>>>>>>>>>>>           u32 cmd;
>>>>>>>>>>>           int ret;
>>>>>>>>>>>       +    ret = meson_nfc_wait_dev_ready(nand);
>>>>>>>>>>> +    if (ret)
>>>>>>>>>>> +        return ret;
>>>>>>>>>>> +
>>>>>>>>>>>           meson_nfc_select_chip(nand, nand->cur_cs);
>>>>>>>>>>>             data_len =  mtd->writesize + mtd->oobsize;
>>>>>>>>>>> @@ -667,12 +719,20 @@ static int meson_nfc_write_page_sub(struct nand_chip *nand,
>>>>>>>>>>>                            NFC_CMD_SCRAMBLER_DISABLE);
>>>>>>>>>>>           }
>>>>>>>>>>>       +    ret = meson_nfc_wait_dma_finish(nfc);
>>>>>>>>>>> +    if (ret)
>>>>>>>>>>> +        return ret;
>>>>>>>>>>> +
>>>>>>>>>>>           cmd = nfc->param.chip_select | NFC_CMD_CLE | NAND_CMD_PAGEPROG;
>>>>>>>>>>>           writel(cmd, nfc->reg_base + NFC_REG_CMD);
>>>>>>>>>>>           meson_nfc_queue_rb(nfc, PSEC_TO_MSEC(sdr->tPROG_max));
>>>>>>>>>>>             meson_nfc_dma_buffer_release(nand, data_len, info_len, DMA_TO_DEVICE);
>>>>>>>>>>>       +    ret = meson_nfc_wait_dev_ready(nand);
>>>>>>>>>>> +    if (ret)
>>>>>>>>>>> +        return ret;
>>>>>>>>>>> +
>>>>>>>>>>>           return ret;
>>>>>>>>>>>       }
>>>>>>>>>>>       @@ -720,6 +780,21 @@ static void meson_nfc_check_ecc_pages_valid(struct meson_nfc *nfc,
>>>>>>>>>>>           } while (!ret);
>>>>>>>>>>>       }
>>>>>>>>>>>       +static inline int meson_nfc_send_read(struct nand_chip *nand)
>>>>>>>>>>> +{
>>>>>>>>>>> +    struct meson_nfc *nfc = nand_get_controller_data(nand);
>>>>>>>>>>> +    u32 cs = nfc->param.chip_select;
>>>>>>>>>>> +    int ret;
>>>>>>>>>>> +
>>>>>>>>>>> +    ret = meson_nfc_wait_dev_ready(nand);
>>>>>>>>>>> +    if (ret)
>>>>>>>>>>> +        return ret;
>>>>>>>>>>> +
>>>>>>>>>>> +    writel(cs | NFC_CMD_CLE | NAND_CMD_READ0, nfc->reg_base + NFC_REG_CMD);
>>>>>>>>>>> +
>>>>>>>>>>> +    return 0;
>>>>>>>>>>> +}
>>>>>>>>>>> +
>>>>>>>>>>
>>>>>>>>>> it already calls meson_nfc_queue_rb() in meson_nfc_rw_cmd_prepare_and_execute(). Could you implements this in meson_nfc_queue_rb()? and we can use the irq method.
>>>>>>>>>> also without Ready/Busy pin, the meson_nfc_queue_rb() should change like below:
>>>>>>>>>>         ......
>>>>>>>>>>         #define NFC_CMD_RB_INT    ((0xb << 10) | BIT(18))
>>>>>>>>
>>>>>>>> Sorry, I can see this define as (and it is used in the driver):
>>>>>>>> #define NFC_CMD_RB_INT          BIT(14)
>>>>>>>>
>>>>>>>> in drivers/mtd/nand/raw/meson_nand.c
>>>>>>>>
>>>>>>>> Which one is correct ?
>>>>>>>
>>>>>>> we need to modify the define 'NFC_CMD_RB_INT' as ((0xb << 10) | BIT(18)).
>>>>>>>
>>>>>>
>>>>>> Ok, NFC_CMD_RB_INT - it is "Ready/Busy_Interrupt" ? You suppose that currently it is
>>>>>> defined incorrectly, so irq waiting does not work?
>>>>>
>>>>> Previous defined BIT(14) is for having the external Ready/Busy pin of the NAND device connected to the controller. the new define is for reading status by sending read status(70H) command and read status from the data bus(checking the IO6). the both can work on AXG soc.
>>>>> when the controller RB command is sent, the controller automatically checks the level of external Ready/Busy pin or the data bus(IO6) periodicity. and generate the irq signal if status is ready.
>>>>>
>>>>>>
>>>>>> Thanks, Arseniy
>>>>>>
>>>>>>>>
>>>>>>>> Thanks, Arseniy
>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>         meson_nfc_cmd_idle(nfc, 0);
>>>>>>>>>>         cmd = nfc->param.chip_select | NFC_CMD_CLE | NAND_CMD_STATUS;
>>>>>>>>>>         writel(cmd, nfc->reg_base + NFC_REG_CMD);
>>>>>>>>>>         meson_nfc_cmd_idle(nfc, 5);
>>>>>>>>>>         cmd = NFC_CMD_RB | NFC_CMD_RB_INT | nfc->timing.tbers_max;
>>>>>>>>>>         writel(cmd, nfc->reg_base + NFC_REG_CMD);
>>>>>>>>>>
>>>>>>>>>>         ret = wait_for_completion_timeout(&nfc->completion,
>>>>>>>>>>                           msecs_to_jiffies(timeout_ms));
>>>>>>>>>>         if (ret == 0)
>>>>>>>>>>             ret = -1;
>>>>>>>>>>
>>>>>>>>>>         writel(cs | NFC_CMD_CLE | NAND_CMD_READ0, nfc->reg_base + NFC_REG_CMD);
>>>>>>>>>>         ......
>>>>>>>>>>
>>>>
>>>> Hello Liang!
>>>>
>>>> I've got small questions to clarify Your comment. You suggest two thing IIUC:
>>>>
>>>> 1) Send NAND_CMD_READ0 from 'meson_nfc_queue_rb()'. This means that extra argument is needed to
>>>> 'meson_nfc_queue_rb()' which shows that read operation is going to be performed. We can't send
>>>> NAND_CMD_READ0 for write operation?
>>>
>>> it is ok to me. but does NAND_CMD_READ0 really need to send in the controller driver? i don't find the other controller drivers have to send it for the old vendor NAND device.
>>
>> Hm, I found this command in the old driver. For example without it I get the following error:
>>
>> # nanddump -c -s 0 -l 2048 /dev/mtd0 --oob
>> ECC failed: 3975
>> ECC corrected: 47
>> Number of bad blocks: 10
>> Number of bbt blocks: 0
>> Block size 131072, page size 2048, OOB size 64
>> Dumping data starting at 0x00000000 and ending at 0x00000800...
>>
>> But data is not corrupted and seems ok. With this extra NAND_CMD_READ0 everything is ok - data is still valid and
>> there are no ECC errors.
>>
>>>
>>>>
>>>> 2) About code and define above, I tried to replace current body of 'meson_nfc_queue_rb()', but it
>>>> didn't work. May be I did it wrong, because what to do with 'meson_nfc_wait_dev_ready()' and it's
>>>> call sites? It must be removed? Could You please explain Your idea in more details? I'm asking You
>>>> because I don't have doc for this NAND controller, so it is very difficult to me to add valid
>>>> logic to this driver without any references
>>>
>>> Could you please try the following? i have tested it on another SOC (not axg).
>>>
>>> static int meson_nfc_queue_rb(struct meson_nfc *nfc, int timeout_ms)
>>> {
>>>      u32 cmd, cfg;
>>>      int ret = 0;
>>>
>>>      meson_nfc_cmd_idle(nfc, nfc->timing.twb);
>>>      meson_nfc_drain_cmd(nfc);
>>>      meson_nfc_wait_cmd_finish(nfc, CMD_FIFO_EMPTY_TIMEOUT);
>>>
>>>      cfg = readl(nfc->reg_base + NFC_REG_CFG);
>>>      cfg |= NFC_RB_IRQ_EN;
>>>      writel(cfg, nfc->reg_base + NFC_REG_CFG);
>>>
>>>      reinit_completion(&nfc->completion);
>>>
>>>      meson_nfc_cmd_idle(nfc, 0);
>>>      cmd = nfc->param.chip_select | NFC_CMD_CLE | NAND_CMD_STATUS;
>>>      writel(cmd, nfc->reg_base + NFC_REG_CMD);
>>>      meson_nfc_cmd_idle(nfc, 5);
>>>      cmd = NFC_CMD_RB | NFC_CMD_RB_INT
>>>          | nfc->param.chip_select | nfc->timing.tbers_max;
>>>      writel(cmd, nfc->reg_base + NFC_REG_CMD);
>>>      meson_nfc_drain_cmd(nfc);
>>>
>>>      ret = wait_for_completion_timeout(&nfc->completion,
>>>                        msecs_to_jiffies(timeout_ms));
>>>      if (ret == 0)
>>>          ret = -1;
>>>
>>>      writel(1 << 31, nfc->reg_base + NFC_REG_CMD);
>>>
>>>      return ret;
>>> }
>>
>> Ok! Thanks, I'll try it!
>>
>> Thanks, Arseniy
> 
> Hello @Liang, I tried this code, seems with this implementation NAND driver works very slow,
> here is for example output buring bad blocks lookup:
> 
> [    2.060835] Scanning device for bad blocks
> [    3.350085] Bad eraseblock 20 at 0x000000280000
> [    3.536389] Freeing initrd memory: 11808K
> [   39.133952] Bad eraseblock 581 at 0x0000048a0000
> [   44.837917] Bad eraseblock 671 at 0x0000053e0000
> [   45.677964] Bad eraseblock 685 at 0x0000055a0000
> [   83.637917] Bad eraseblock 1279 at 0x000009fe0000
> [  132.833318] modprobe (56) used greatest stack depth: 12672 bytes left
> 
> 
> Take a look at timeouts. I used Your variant of 'meson_nfc_queue_rb()' from above,
> #define NFC_CMD_RB_INT is ((0xb << 10) | BIT(18)). I tested it with my ports of
> of dev ready calls from the old vendor's driver.
> 
> Thanks, Arseniy
> 
>>
>>>
>>> also we need to check and return the return value for meson_nfc_queue_rb() in meson_nfc_rw_cmd_prepare_and_execute() and meson_nfc_write_page_sub().
>>>
>>>>
>>>> May be I can send v2 patchset for review without this change, as v2 already includes udpate for OOB
>>>> handling which is I think more critical?
>>>>
>>>
>>> sure, it is up to you. it is more important, thanks again.
>>>
>>>> Thanks, Arseniy
>>>>
>>>>>>>>>
>>>>>>>>>         Thanks for reply! I'll try this code! One more question about OOB processing in this
>>>>>>>>> driver (as You are author of it):
>>>>>>>>>
>>>>>>>>>        OOB size is 64 bytes, but for example if I have 1K ECC, 2 bytes user bytes and 14
>>>>>>>>>        bytes for ECC code for each 1K. In this case I have access to only 32 bytes of OOB:
>>>>>>>>>        2 x (2 user bytes + 14 ECC bytes). Correct me if i'm wrong, but rest of OOB (next
>>>>>>>>>        32 bytes) become unavailable (in both raw and ECC modes) ?
>>>>>>>>>
>>>>>>>>> Thanks, Arseniy
>>>>>>>>>
>>>>>>>>>>>       static int meson_nfc_read_page_sub(struct nand_chip *nand,
>>>>>>>>>>>                          int page, int raw)
>>>>>>>>>>>       {
>>>>>>>>>>> @@ -734,10 +809,18 @@ static int meson_nfc_read_page_sub(struct nand_chip *nand,
>>>>>>>>>>>           data_len =  mtd->writesize + mtd->oobsize;
>>>>>>>>>>>           info_len = nand->ecc.steps * PER_INFO_BYTE;
>>>>>>>>>>>       +    ret = meson_nfc_wait_dev_ready(nand);
>>>>>>>>>>> +    if (ret)
>>>>>>>>>>> +        return ret;
>>>>>>>>>>> +
>>>>>>>>>>>           ret = meson_nfc_rw_cmd_prepare_and_execute(nand, page, DIRREAD);
>>>>>>>>>>>           if (ret)
>>>>>>>>>>>               return ret;
>>>>>>>>>>>       +    ret = meson_nfc_send_read(nand);
>>>>>>>>>>> +    if (ret)
>>>>>>>>>>> +        return ret;
>>>>>>>>>>> +
>>>>>>>>>>>           ret = meson_nfc_dma_buffer_setup(nand, meson_chip->data_buf,
>>>>>>>>>>>                            data_len, meson_chip->info_buf,
>>>>>>>>>>>                            info_len, DMA_FROM_DEVICE);
>>>>>>>>>>> @@ -754,6 +837,9 @@ static int meson_nfc_read_page_sub(struct nand_chip *nand,
>>>>>>>>>>>           }
>>>>>>>>>>>             ret = meson_nfc_wait_dma_finish(nfc);
>>>>>>>>>>> +    if (ret)
>>>>>>>>>>> +        return ret;
>>>>>>>>>>> +
>>>>>>>>>>>           meson_nfc_check_ecc_pages_valid(nfc, nand, raw);
>>>>>>>>>>>             meson_nfc_dma_buffer_release(nand, data_len, info_len, DMA_FROM_DEVICE);
>>>>>>>>>>
>>>>>>>>
>>>>>>>
>>>>>>
>>>>>
>>>>

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply

* Re: [PATCH] perf cs-etm: Add support for coresight trace for any range of CPUs
From: Ganapatrao Kulkarni @ 2023-04-20 14:30 UTC (permalink / raw)
  To: James Clark
  Cc: mathieu.poirier, acme, darren, scott, scclevenger, linux-kernel,
	linux-arm-kernel, mike.leach, suzuki.poulose
In-Reply-To: <dea08376-e66b-bacc-7673-c79fe2a8f889@os.amperecomputing.com>

Hi James,

On 20-04-2023 06:07 pm, Ganapatrao Kulkarni wrote:
> 
> 
> On 20-04-2023 06:00 pm, James Clark wrote:
>>
>>
>> On 20/04/2023 12:47, Ganapatrao Kulkarni wrote:
>>>
>>> Hi James,
>>>
>>> On 20-04-2023 03:13 pm, James Clark wrote:
>>>>
>>>>
>>>> On 19/04/2023 18:21, Ganapatrao Kulkarni wrote:
>>>>> The current implementation supports coresight trace for a range of
>>>>> CPUs, if the first CPU is CPU0.
>>>>>
>>>>> Adding changes to enable coresight trace for any range of CPUs by
>>>>> decoding the first CPU also from the header.
>>>>> Later, first CPU id is used instead of CPU0 across the decoder
>>>>> functions.
>>>>>
>>>>> Signed-off-by: Ganapatrao Kulkarni 
>>>>> <gankulkarni@os.amperecomputing.com>
>>>>> ---
>>>>>    .../perf/util/cs-etm-decoder/cs-etm-decoder.c |  4 +-
>>>>>    .../perf/util/cs-etm-decoder/cs-etm-decoder.h |  3 +-
>>>>>    tools/perf/util/cs-etm.c                      | 62 
>>>>> ++++++++++++-------
>>>>>    3 files changed, 42 insertions(+), 27 deletions(-)
>>>>>
>>>>> diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
>>>>> b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
>>>>> index 82a27ab90c8b..41ab299b643b 100644
>>>>> --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
>>>>> +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
>>>>> @@ -724,7 +724,7 @@ cs_etm_decoder__create_etm_decoder(struct
>>>>> cs_etm_decoder_params *d_params,
>>>>>    }
>>>>>      struct cs_etm_decoder *
>>>>> -cs_etm_decoder__new(int decoders, struct cs_etm_decoder_params
>>>>> *d_params,
>>>>> +cs_etm_decoder__new(int first_decoder, int decoders, struct
>>>>> cs_etm_decoder_params *d_params,
>>>>>                struct cs_etm_trace_params t_params[])
>>>>>    {
>>>>>        struct cs_etm_decoder *decoder;
>>>>> @@ -769,7 +769,7 @@ cs_etm_decoder__new(int decoders, struct
>>>>> cs_etm_decoder_params *d_params,
>>>>>        /* init raw frame logging if required */
>>>>>        cs_etm_decoder__init_raw_frame_logging(d_params, decoder);
>>>>>    -    for (i = 0; i < decoders; i++) {
>>>>> +    for (i = first_decoder; i < decoders; i++) {
>>>>>            ret = cs_etm_decoder__create_etm_decoder(d_params,
>>>>>                                 &t_params[i],
>>>>>                                 decoder);
>>>>> diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
>>>>> b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
>>>>> index 92a855fbe5b8..b06193fc75b4 100644
>>>>> --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
>>>>> +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
>>>>> @@ -90,7 +90,8 @@ int cs_etm_decoder__process_data_block(struct
>>>>> cs_etm_decoder *decoder,
>>>>>                           size_t len, size_t *consumed);
>>>>>      struct cs_etm_decoder *
>>>>> -cs_etm_decoder__new(int num_cpu,
>>>>> +cs_etm_decoder__new(int first_decoder,
>>>>> +            int decoders,
>>>>>                struct cs_etm_decoder_params *d_params,
>>>>>                struct cs_etm_trace_params t_params[]);
>>>>>    diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
>>>>> index 94e2d02009eb..2619513ae088 100644
>>>>> --- a/tools/perf/util/cs-etm.c
>>>>> +++ b/tools/perf/util/cs-etm.c
>>>>> @@ -55,6 +55,8 @@ struct cs_etm_auxtrace {
>>>>>        u8 has_virtual_ts; /* Virtual/Kernel timestamps in the 
>>>>> trace. */
>>>>>          int num_cpu;
>>>>> +    int first_cpu;
>>>>> +    int last_cpu;
>>>>>        u64 latest_kernel_timestamp;
>>>>>        u32 auxtrace_type;
>>>>>        u64 branches_sample_type;
>>>>> @@ -638,14 +640,13 @@ static void cs_etm__set_trace_param_ete(struct
>>>>> cs_etm_trace_params *t_params,
>>>>>    }
>>>>>      static int cs_etm__init_trace_params(struct cs_etm_trace_params
>>>>> *t_params,
>>>>> -                     struct cs_etm_auxtrace *etm,
>>>>> -                     int decoders)
>>>>> +                     struct cs_etm_auxtrace *etm)
>>>>>    {
>>>>>        int i;
>>>>>        u32 etmidr;
>>>>>        u64 architecture;
>>>>>    -    for (i = 0; i < decoders; i++) {
>>>>> +    for (i = etm->first_cpu; i < etm->last_cpu; i++) {
>>>>>            architecture = etm->metadata[i][CS_ETM_MAGIC];
>>>>>              switch (architecture) {
>>>>> @@ -817,7 +818,7 @@ static void cs_etm__free(struct perf_session
>>>>> *session)
>>>>>        /* Then the RB tree itself */
>>>>>        intlist__delete(traceid_list);
>>>>>    -    for (i = 0; i < aux->num_cpu; i++)
>>>>> +    for (i = aux->first_cpu; i < aux->last_cpu; i++)
>>>>>            zfree(&aux->metadata[i]);
>>>>>          thread__zput(aux->unknown_thread);
>>>>> @@ -921,7 +922,8 @@ static struct cs_etm_queue
>>>>> *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
>>>>>         * Each queue can only contain data from one CPU when
>>>>> unformatted, so only one decoder is
>>>>>         * needed.
>>>>>         */
>>>>> -    int decoders = formatted ? etm->num_cpu : 1;
>>>>> +    int first_decoder = formatted ? etm->first_cpu : 0;
>>>>> +    int decoders = first_decoder + (formatted ? etm->num_cpu : 1);
>>>>>          etmq = zalloc(sizeof(*etmq));
>>>>>        if (!etmq)
>>>>> @@ -937,7 +939,7 @@ static struct cs_etm_queue
>>>>> *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
>>>>>        if (!t_params)
>>>>>            goto out_free;
>>>>>    -    if (cs_etm__init_trace_params(t_params, etm, decoders))
>>>>> +    if (cs_etm__init_trace_params(t_params, etm))
>>>>>            goto out_free;
>>>>>          /* Set decoder parameters to decode trace packets */
>>>>> @@ -947,8 +949,7 @@ static struct cs_etm_queue
>>>>> *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
>>>>>                        formatted))
>>>>>            goto out_free;
>>>>>    -    etmq->decoder = cs_etm_decoder__new(decoders, &d_params,
>>>>> -                        t_params);
>>>>> +    etmq->decoder = cs_etm_decoder__new(first_decoder, decoders,
>>>>> &d_params, t_params);
>>>>>          if (!etmq->decoder)
>>>>>            goto out_free;
>>>>> @@ -2959,11 +2960,11 @@ static int cs_etm__queue_aux_records(struct
>>>>> perf_session *session)
>>>>>     * Loop through the ETMs and complain if we find at least one where
>>>>> ts_source != 1 (virtual
>>>>>     * timestamps).
>>>>>     */
>>>>> -static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu)
>>>>> +static bool cs_etm__has_virtual_ts(u64 **metadata, struct
>>>>> cs_etm_auxtrace *etm)
>>>>>    {
>>>>>        int j;
>>>>>    -    for (j = 0; j < num_cpu; j++) {
>>>>> +    for (j = etm->first_cpu; j < etm->last_cpu; j++) {
>>>>>            switch (metadata[j][CS_ETM_MAGIC]) {
>>>>>            case __perf_cs_etmv4_magic:
>>>>>                if (HAS_PARAM(j, ETMV4, TS_SOURCE) ||
>>>>> metadata[j][CS_ETMV4_TS_SOURCE] != 1)
>>>>> @@ -2982,13 +2983,14 @@ static bool cs_etm__has_virtual_ts(u64
>>>>> **metadata, int num_cpu)
>>>>>    }
>>>>>      /* map trace ids to correct metadata block, from information in
>>>>> metadata */
>>>>> -static int cs_etm__map_trace_ids_metadata(int num_cpu, u64 
>>>>> **metadata)
>>>>> +static int cs_etm__map_trace_ids_metadata(struct cs_etm_auxtrace 
>>>>> *etm)
>>>>>    {
>>>>>        u64 cs_etm_magic;
>>>>> +    u64 **metadata = etm->metadata;
>>>>>        u8 trace_chan_id;
>>>>>        int i, err;
>>>>>    -    for (i = 0; i < num_cpu; i++) {
>>>>> +    for (i = etm->first_cpu; i < etm->last_cpu; i++) {
>>>>>            cs_etm_magic = metadata[i][CS_ETM_MAGIC];
>>>>>            switch (cs_etm_magic) {
>>>>>            case __perf_cs_etmv3_magic:
>>>>> @@ -3015,12 +3017,13 @@ static int cs_etm__map_trace_ids_metadata(int
>>>>> num_cpu, u64 **metadata)
>>>>>     * If we found AUX_HW_ID packets, then set any metadata marked as
>>>>> unused to the
>>>>>     * unused value to reduce the number of unneeded decoders created.
>>>>>     */
>>>>> -static int cs_etm__clear_unused_trace_ids_metadata(int num_cpu, u64
>>>>> **metadata)
>>>>> +static int cs_etm__clear_unused_trace_ids_metadata(struct
>>>>> cs_etm_auxtrace *etm)
>>>>>    {
>>>>>        u64 cs_etm_magic;
>>>>> +    u64 **metadata = etm->metadata;
>>>>>        int i;
>>>>>    -    for (i = 0; i < num_cpu; i++) {
>>>>> +    for (i = etm->first_cpu; i < etm->last_cpu; i++) {
>>>>>            cs_etm_magic = metadata[i][CS_ETM_MAGIC];
>>>>>            switch (cs_etm_magic) {
>>>>>            case __perf_cs_etmv3_magic:
>>>>> @@ -3049,7 +3052,7 @@ int cs_etm__process_auxtrace_info_full(union
>>>>> perf_event *event,
>>>>>        int event_header_size = sizeof(struct perf_event_header);
>>>>>        int total_size = auxtrace_info->header.size;
>>>>>        int priv_size = 0;
>>>>> -    int num_cpu;
>>>>> +    int num_cpu, first_cpu = 0, last_cpu;
>>>>>        int err = 0;
>>>>>        int aux_hw_id_found;
>>>>>        int i, j;
>>>>> @@ -3068,22 +3071,31 @@ int cs_etm__process_auxtrace_info_full(union
>>>>> perf_event *event,
>>>>>        /* First the global part */
>>>>>        ptr = (u64 *) auxtrace_info->priv;
>>>>>        num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff;
>>>>> -    metadata = zalloc(sizeof(*metadata) * num_cpu);
>>>>> +
>>>>> +    /* Start parsing after the common part of the header */
>>>>> +    i = CS_HEADER_VERSION_MAX;
>>>>> +
>>>>> +    /*Get CPU id of first event */
>>>>> +    first_cpu = ptr[i + CS_ETM_CPU];
>>>>> +    last_cpu = first_cpu + num_cpu;
>>>>> +
>>>>> +    if (first_cpu > cpu__max_cpu().cpu ||
>>>>> +            last_cpu > cpu__max_cpu().cpu)
>>>>> +        return -EINVAL;
>>>>> +
>>>>> +    metadata = zalloc(sizeof(*metadata) * last_cpu);
>>>>
>>>> Hi Ganapatrao,
>>>>
>>>> I think I see what the problem is, but I'm wondering if a better fix
>>>> would be to further decouple the CPU ID from the index in the array.
>>>>
>>>> With your change it's not clear what happens with sparse recordings, 
>>>> for
>>>> example 'perf record -e cs_etm// -C 1,3,5'. And it seems like there is
>>>
>>> This patch fixes for any range of CPUs.
>>> Record with sparse list of CPUs will not work with current code still.
>>>
>>
>> Is there a major issue that means sparse can't be done? I'm thinking it
>> would be best to fix both issues with one change while we understand
>> this part rather than a half fix that might have do be completely
>> re-understood and re-done later anyway. Unless there is some big blocker
>> but I can't see it?
>>
>>>> some wastage in the zalloc here for example if only CPU 256 is traced
>>>> then we'd still make 256 decoders but 255 of them would be unused?
>>>>
>>>> I tried to test sparse recordings, but your change doesn't apply to the
>>>> latest coresight/next branch. I did notice that 'perf report -D' 
>>>> doesn't
>>>> work with them on coresight/next (it just quits), but I couldn't see if
>>>> that's fixed with your change.
>>>
>>> My patch is rebased on 6.3-RC7 codebase with Mike's 3 perf patches
>>> related to dynamic id [1] support(queued for 6.4).
>>>
>>> "perf report -D" works for me.
>>
>> I was referring to sparse CPU lists, which I think you mentioned above
>> doesn't work even with this patch.
>>
>>>
>>> [1] https://www.spinics.net/lists/linux-perf-users/msg27452.html
>>>
>>
>> It should be based on the next branch here:
>> git://git.kernel.org/pub/scm/linux/kernel/git/coresight/linux.git
> 
> OK.
>>
>>>>
>>>> Would a better fix not be to keep the metadata loops from 0-N and
>>>> instead save the CPU ID in cs_etm_decoder_params or the decoder. That
>>>> way it would support both sparse and not starting from 0 cases? I think
>>>
>>> Yep, I though this initially, it got complicated due to for more
>>> for-loops. I will try again and post V2.
>>>
>>
>> I can't imagine it would need any extra for loops off the top of my
>> head. Just saving the CPU ID in a few structs and using it wherever it's
>> needed instead of the loop index. I imagine most of the loops would
>> actually stay the same rather than be changed like you have in V1.
> 
> Working on V2, I will post it at the earliest.

My codebase is "6.3-RC7 + Mike's patche[1] + Anshuman/Suzuki patches[3]" 
and below diff works for any CPU range and sparse list as well.

I will send the V2 ASAP.

[root@B0-213 perf]# git diff
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 94e2d02009eb..bfed0bb8be98 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -274,6 +274,22 @@ static int cs_etm__metadata_set_trace_id(u8 
trace_chan_id, u64 *cpu_metadata)
                 (typeof(_mask))(((_reg) & (_mask)) >> __bf_shf(_mask)); \
         })

+
+static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu)
+{
+       int i;
+       u64 *metadata = NULL;
+
+       for (i = 0; i < etm->num_cpu; i++) {
+               if (etm->metadata[i][CS_ETM_CPU] == (u64)cpu) {
+                       metadata = etm->metadata[i];
+                       break;
+               }
+       }
+
+       return metadata;
+}
+
  /*
   * Handle the PERF_RECORD_AUX_OUTPUT_HW_ID event.
   *
@@ -344,7 +360,7 @@ static int cs_etm__process_aux_output_hw_id(struct 
perf_session *session,
         }

         /* not one we've seen before - lets map it */
-       cpu_data = etm->metadata[cpu];
+       cpu_data = get_cpu_data(etm, cpu);
         err = cs_etm__map_trace_id(trace_chan_id, cpu_data);
         if (err)
                 return err;


I tried below commands and works well with this diff.

timeout 8s ./perf record -e cs_etm// -C 0 dd if=/dev/zero of=/dev/null
timeout 8s ./perf record -e cs_etm// -C 1 dd if=/dev/zero of=/dev/null
timeout 8s ./perf record -e cs_etm// -C 1,3,5 dd if=/dev/zero of=/dev/null
timeout 8s ./perf record -e cs_etm// -C 10,3,12 dd if=/dev/zero of=/dev/null
timeout 8s ./perf record -e cs_etm//u -C 0-10 dd if=/dev/zero of=/dev/null
timeout 8s ./perf record -e cs_etm//u -C 10-20 dd if=/dev/zero of=/dev/nu

However I could not be able to apply to next branch of coresight repo
git://git.kernel.org/pub/scm/linux/kernel/git/coresight/linux.git

Then I looked at the code, it does not have Mike's dynamic ID patches[1] 
which are merged to perf/next.
It seems the code is not the latest or it is next for coresight driver only.

I think either patch should be rebased to most-recent kernel 
RC1/RCx(which I did) or perf-tools-next of [2] for perf tool.

[1] https://www.spinics.net/lists/linux-perf-users/msg27452.html
[2] git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git
[3] https://www.spinics.net/lists/arm-kernel/msg1059012.html

>>
>>>> the code would be better if it's worded like "i < recorded_cpus" rather
>>>> than "i < cpu" to make it clear that i isn't actually the CPU ID it's
>>>> just an index.
>>>
>>> Yes, makes sense to call it "recorded_cpus".
>>>
>>>>
>>>> Also a new test for this scenario would probably be a good idea.
>>>>
>>>> Thanks
>>>> James
>>>>
>>> Thanks,
>>> Ganapat
> 
> Thanks,
> Ganapat

Thanks,
Ganapat

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related

* Re: [PATCH v2 2/2] pwm: mediatek: Add support for MT7981
From: AngeloGioacchino Del Regno @ 2023-04-20 14:32 UTC (permalink / raw)
  To: Daniel Golle, linux-pwm, linux-mediatek, linux-arm-kernel,
	linux-kernel, Thierry Reding, Uwe Kleine-König,
	Matthias Brugger, John Crispin
In-Reply-To: <7c6e31c844642c199f223f4229a04a37b57a34f3.1681992038.git.daniel@makrotopia.org>

Il 20/04/23 14:36, Daniel Golle ha scritto:
> The PWM unit on MT7981 uses different register offsets than previous
> MediaTek PWM units. Add support for these new offsets and add support
> for PWM on MT7981 which has 3 PWM channels, one of them is typically
> used for a temperature controlled fan.
> 
> Signed-off-by: Daniel Golle <daniel@makrotopia.org>

The implementation is good now; there's only one nitpick: you're reordering
the platform data entries and I agree about doing that, as they should be
alphabetically sorted, but you didn't mention that in the commit message.

Please send a v3 that mentions that you're also reordering.

"...while at it, also reorder..."

After which:

Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply

* Re: [PATCH 1/2] wifi: mt7601u: delete dead code checking debugfs returns
From: Jakub Kicinski @ 2023-04-20 14:33 UTC (permalink / raw)
  To: Wang Jikai
  Cc: Kalle Valo, David S. Miller, Eric Dumazet, Paolo Abeni,
	Matthias Brugger, AngeloGioacchino Del Regno,
	hust-os-kernel-patches, Jakub Kicinski, linux-wireless, netdev,
	linux-kernel, linux-arm-kernel, linux-mediatek
In-Reply-To: <20230420130815.8425-1-wangjikai@hust.edu.cn>

On Thu, 20 Apr 2023 13:08:14 +0000 Wang Jikai wrote:
> Smatch reports that:
> drivers/net/wireless/mediatek/mt7601u/debugfs.c:130
> mt7601u_init_debugfs() warn: 'dir' is an error pointer or valid".
> 
> Debugfs code is not supposed to need error checking so instead of
> changing this to if (IS_ERR()) the correct thing is to just delete
> the dead code.
> 
> Fixes: c869f77d6abb ("add mt7601u driver")

Don't add a Fixes tag on this cleanup.
one - dead code is not a bug
two - the semantics have changed since the driver was added
      so it's certainly not the right Fixes tag

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply

* Re: [PATCH 2/2] wifi: mt7601u: remove debugfs directory on disconnect
From: Jakub Kicinski @ 2023-04-20 14:35 UTC (permalink / raw)
  To: Wang Jikai
  Cc: Kalle Valo, David S. Miller, Eric Dumazet, Paolo Abeni,
	Matthias Brugger, AngeloGioacchino Del Regno,
	hust-os-kernel-patches, Jakub Kicinski, linux-wireless, netdev,
	linux-kernel, linux-arm-kernel, linux-mediatek
In-Reply-To: <20230420130924.8702-1-wangjikai@hust.edu.cn>

On Thu, 20 Apr 2023 13:09:24 +0000 Wang Jikai wrote:
> Debugfs is created during device init but not removed.
> Add a function mt7601u_remove_debugfs to remove debugfs
> when the device disconnects.

No, the entire wiphy dir gets recursively removed.

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply

* Re: [patch 00/37] cpu/hotplug, x86: Reworked parallel CPU bringup
From: Sean Christopherson @ 2023-04-20 14:51 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Andrew Cooper, Paul Menzel, linux-kernel, x86, David Woodhouse,
	Brian Gerst, Arjan van de Veen, Paolo Bonzini, Paul McKenney,
	Tom Lendacky, Oleksandr Natalenko, Guilherme G. Piccoli,
	Piotr Gorski, David Woodhouse, Usama Arif, Jürgen Groß,
	Boris Ostrovsky, xen-devel, Russell King, Arnd Bergmann,
	linux-arm-kernel, Catalin Marinas, Will Deacon, Guo Ren,
	linux-csky, Thomas Bogendoerfer, linux-mips,
	James E. J. Bottomley, Helge Deller, linux-parisc, Paul Walmsley,
	Palmer Dabbelt, linux-riscv, Mark Rutland, Sabin Rapan
In-Reply-To: <87y1mm3iqz.ffs@tglx>

On Thu, Apr 20, 2023, Thomas Gleixner wrote:
> On Thu, Apr 20 2023 at 10:23, Andrew Cooper wrote:
> > On 20/04/2023 9:32 am, Thomas Gleixner wrote:
> > > On Wed, Apr 19, 2023, Andrew Cooper wrote:
> > > > This was changed in x2APIC, which made the x2APIC_ID immutable.
>
> >> I'm pondering to simply deny parallel mode if x2APIC is not there.
> >
> > I'm not sure if that will help much.
> 
> Spoilsport.

LOL, well let me pile on then.  x2APIC IDs aren't immutable on AMD hardware.  The
ID is read-only when the CPU is in x2APIC mode, but any changes made to the ID
while the CPU is in xAPIC mode survive the transition to x2APIC.  From the APM:

  A value previously written by software to the 8-bit APIC_ID register (MMIO offset
  30h) is converted by hardware into the appropriate format and reflected into the
  32-bit x2APIC_ID register (MSR 802h).

FWIW, my observations from testing on bare metal are that the xAPIC ID is effectively
read-only (writes are dropped) on Intel CPUs as far back as Haswell, while the above
behavior described in the APM holds true on at least Rome and Milan.

My guess is that Intel's uArch specific behavior of the xAPIC ID being read-only
was introduced when x2APIC came along, but I didn't test farther back than Haswell.

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply

* [PATCH v2 2/3] firmware: arm_ffa: Fix usage of partition info get count flag
From: Sudeep Holla @ 2023-04-20 15:06 UTC (permalink / raw)
  To: linux-arm-kernel
  Cc: Sudeep Holla, Marc Bonnici, Jens Wiklander, Lucian Paul-Trifu
In-Reply-To: <20230419-ffa_fixes_6-4-v2-0-d9108e43a176@arm.com>

Commit bb1be7498500 ("firmware: arm_ffa: Add v1.1 get_partition_info support")
adds support to discovery the UUIDs of the partitions or just fetch the
partition count using the PARTITION_INFO_GET_RETURN_COUNT_ONLY flag.

However the commit doesn't handle the fact that the older version doesn't
understand the flag and must be MBZ which results in firmware returning
invalid parameter error. That results in the failure of the driver probe
which is in correct.

Limit the usage of the PARTITION_INFO_GET_RETURN_COUNT_ONLY flag for the
versions above v1.0(i.e v1.1 and onwards) which fixes the issue.

Fixes: bb1be7498500 ("firmware: arm_ffa: Add v1.1 get_partition_info support")
Reported-by: Jens Wiklander <jens.wiklander@linaro.org>
Reported-by: Marc Bonnici <marc.bonnici@arm.com>
Tested-by: Jens Wiklander <jens.wiklander@linaro.org>
Reviewed-by: Jens Wiklander <jens.wiklander@linaro.org>
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
---
 drivers/firmware/arm_ffa/driver.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c
index fa85c64d3ded..4aced2e5b772 100644
--- a/drivers/firmware/arm_ffa/driver.c
+++ b/drivers/firmware/arm_ffa/driver.c
@@ -193,7 +193,8 @@ __ffa_partition_info_get(u32 uuid0, u32 uuid1, u32 uuid2, u32 uuid3,
 	int idx, count, flags = 0, sz, buf_sz;
 	ffa_value_t partition_info;
 
-	if (!buffer || !num_partitions) /* Just get the count for now */
+	if (drv_info->version > FFA_VERSION_1_0 &&
+	    (!buffer || !num_partitions)) /* Just get the count for now */
 		flags = PARTITION_INFO_GET_RETURN_COUNT_ONLY;
 
 	mutex_lock(&drv_info->rx_lock);

-- 
2.40.0


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox