* [PATCH v2 2/2] KVM: arm64: selftests: Add MMIO sign-extending load test
From: Fuad Tabba @ 2026-06-25 14:48 UTC (permalink / raw)
To: Marc Zyngier, Oliver Upton
Cc: Joey Gouly, Suzuki K Poulose, Zenghui Yu, Steffen Eiden,
Catalin Marinas, Will Deacon, Shuah Khan, Christoffer Dall,
Victor Kamensky, linux-arm-kernel, kvmarm, linux-kernel
In-Reply-To: <20260625144807.2603272-1-fuad.tabba@linux.dev>
Add a test for sign-extending MMIO loads (LDRSB, LDRSH, LDRSW) into Xt
and Wt destinations, with and without the sign bit set. The host supplies
the MMIO data and checks the guest register holds the sign-extended value.
Repeat the loads big-endian on a mixed-endian implementation. Issue those
at EL0: SCTLR_EL1.EE would make an EL1 load big-endian but also walk the
little-endian page tables big-endian, whereas SCTLR_EL1.E0E selects only
EL0 data endianness and leaves the walk little-endian.
Signed-off-by: Fuad Tabba <fuad.tabba@linux.dev>
---
tools/testing/selftests/kvm/Makefile.kvm | 1 +
.../selftests/kvm/arm64/mmio_sign_ext.c | 259 ++++++++++++++++++
2 files changed, 260 insertions(+)
create mode 100644 tools/testing/selftests/kvm/arm64/mmio_sign_ext.c
diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm
index 9118a5a51b89f..0f5803a1092e1 100644
--- a/tools/testing/selftests/kvm/Makefile.kvm
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -171,6 +171,7 @@ TEST_GEN_PROGS_arm64 += arm64/hello_el2
TEST_GEN_PROGS_arm64 += arm64/host_sve
TEST_GEN_PROGS_arm64 += arm64/hypercalls
TEST_GEN_PROGS_arm64 += arm64/external_aborts
+TEST_GEN_PROGS_arm64 += arm64/mmio_sign_ext
TEST_GEN_PROGS_arm64 += arm64/page_fault_test
TEST_GEN_PROGS_arm64 += arm64/psci_test
TEST_GEN_PROGS_arm64 += arm64/sea_to_user
diff --git a/tools/testing/selftests/kvm/arm64/mmio_sign_ext.c b/tools/testing/selftests/kvm/arm64/mmio_sign_ext.c
new file mode 100644
index 0000000000000..06708ab6db8c0
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/mmio_sign_ext.c
@@ -0,0 +1,259 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * mmio_sign_ext - Test sign-extending MMIO load emulation (LDRSB/LDRSH/LDRSW)
+ *
+ * Copyright (c) 2026 Google LLC
+ */
+
+#include <asm/ptrace.h>
+
+#include "processor.h"
+#include "test_util.h"
+
+#define MMIO_ADDR 0x8000000ULL
+
+/* AP[1]: allow unprivileged (EL0) access to a mapping. */
+#define PTE_USER BIT(6)
+
+/* SPSR for ERET to EL0t with DAIF masked. */
+#define SPSR_EL0 (PSR_MODE_EL0t | PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT)
+
+struct mmio_test {
+ const char *name;
+ uint64_t data; /* access-width value, host byte order */
+ uint8_t len;
+ uint64_t expected; /* sign-extended result; same for LE and BE */
+};
+
+/* Paired 1:1, in order, with the loads in guest_loads_le() and el0_be_loads. */
+static const struct mmio_test tests[] = {
+ /* LDRSB Xt: byte sign-extended to 64 bits */
+ { "LDRSB Xt 0xFF", 0xFF, 1, 0xFFFFFFFFFFFFFFFFULL },
+ { "LDRSB Xt 0x7F", 0x7F, 1, 0x7FULL },
+
+ /* LDRSB Wt: byte sign-extended to 32 bits, upper 32 bits zeroed */
+ { "LDRSB Wt 0xFF", 0xFF, 1, 0xFFFFFFFFULL },
+ { "LDRSB Wt 0x7F", 0x7F, 1, 0x7FULL },
+
+ /* LDRSH Xt: halfword sign-extended to 64 bits */
+ { "LDRSH Xt 0x8001", 0x8001, 2, 0xFFFFFFFFFFFF8001ULL },
+ { "LDRSH Xt 0x7FFF", 0x7FFF, 2, 0x7FFFULL },
+
+ /* LDRSH Wt: halfword sign-extended to 32 bits, upper 32 bits zeroed */
+ { "LDRSH Wt 0x8001", 0x8001, 2, 0xFFFF8001ULL },
+ { "LDRSH Wt 0x7FFF", 0x7FFF, 2, 0x7FFFULL },
+
+ /* LDRSW Xt: word sign-extended to 64 bits (no Wt form) */
+ { "LDRSW Xt 0x80000001", 0x80000001, 4, 0xFFFFFFFF80000001ULL },
+ { "LDRSW Xt 0x7FFFFFFF", 0x7FFFFFFF, 4, 0x7FFFFFFFULL },
+};
+
+/* Issue one sign-extending load from MMIO and report the result. */
+#define GUEST_LDRS(load) do { \
+ uint64_t val; \
+ \
+ asm volatile(load : "=r"(val) : "r"(MMIO_ADDR) : "memory"); \
+ GUEST_SYNC(val); \
+} while (0)
+
+/* Little-endian pass: loads issued at EL1. */
+static void guest_loads_le(void)
+{
+ GUEST_LDRS("ldrsb %0, [%1]");
+ GUEST_LDRS("ldrsb %0, [%1]");
+ GUEST_LDRS("ldrsb %w0, [%1]");
+ GUEST_LDRS("ldrsb %w0, [%1]");
+ GUEST_LDRS("ldrsh %0, [%1]");
+ GUEST_LDRS("ldrsh %0, [%1]");
+ GUEST_LDRS("ldrsh %w0, [%1]");
+ GUEST_LDRS("ldrsh %w0, [%1]");
+ GUEST_LDRS("ldrsw %0, [%1]");
+ GUEST_LDRS("ldrsw %0, [%1]");
+}
+
+/*
+ * Run the big-endian loads at EL0, where SCTLR_EL1.E0E flips only the data
+ * endianness; at EL1, SCTLR_EL1.EE would also flip the page-table walk and
+ * fault on the little-endian tables. x0 holds MMIO_ADDR; results return in
+ * x19..x28 (tests[] order) via a single SVC.
+ */
+extern char el0_be_loads[], el0_be_loads_end[];
+asm(
+" .pushsection .text, \"ax\"\n"
+" .global el0_be_loads\n"
+"el0_be_loads:\n"
+" ldrsb x19, [x0]\n"
+" ldrsb x20, [x0]\n"
+" ldrsb w21, [x0]\n"
+" ldrsb w22, [x0]\n"
+" ldrsh x23, [x0]\n"
+" ldrsh x24, [x0]\n"
+" ldrsh w25, [x0]\n"
+" ldrsh w26, [x0]\n"
+" ldrsw x27, [x0]\n"
+" ldrsw x28, [x0]\n"
+" svc #0\n"
+" .global el0_be_loads_end\n"
+"el0_be_loads_end:\n"
+" .popsection\n"
+);
+
+/* EL1 handler for the EL0 SVC: report the results, then finish. */
+static void el0_svc_handler(struct ex_regs *regs)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++)
+ GUEST_SYNC(regs->regs[19 + i]);
+
+ GUEST_DONE();
+}
+
+static bool guest_mixed_endian_el0(void)
+{
+ uint64_t mmfr0 = read_sysreg(id_aa64mmfr0_el1);
+
+ return SYS_FIELD_GET(ID_AA64MMFR0_EL1, BIGEND, mmfr0) ||
+ SYS_FIELD_GET(ID_AA64MMFR0_EL1, BIGENDEL0, mmfr0);
+}
+
+static void guest_code(void)
+{
+ guest_loads_le();
+
+ if (guest_mixed_endian_el0()) {
+ write_sysreg(read_sysreg(sctlr_el1) | SCTLR_EL1_E0E, sctlr_el1);
+ isb();
+
+ asm volatile(
+ " msr elr_el1, %[pc]\n"
+ " msr spsr_el1, %[spsr]\n"
+ " mov x0, %[mmio]\n"
+ " isb\n"
+ " eret\n"
+ :
+ : [pc] "r"(el0_be_loads),
+ [spsr] "r"((uint64_t)SPSR_EL0),
+ [mmio] "r"(MMIO_ADDR)
+ : "x0", "memory");
+ __builtin_unreachable(); /* el0_svc_handler ends the test */
+ }
+
+ GUEST_DONE();
+}
+
+static void handle_mmio(struct kvm_run *run, const struct mmio_test *t, bool be)
+{
+ int i;
+
+ TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR);
+ TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read for %s", t->name);
+ TEST_ASSERT_EQ(run->mmio.len, t->len);
+
+ memset(run->mmio.data, 0, sizeof(run->mmio.data));
+ if (be) {
+ /* The guest reads the device bytes most-significant first. */
+ for (i = 0; i < t->len; i++)
+ run->mmio.data[i] = t->data >> (8 * (t->len - 1 - i));
+ } else {
+ /* Works because arm64 KVM hosts are always little-endian. */
+ memcpy(run->mmio.data, &t->data, t->len);
+ }
+}
+
+static void expect_sync(struct kvm_vcpu *vcpu, struct ucall *uc,
+ const struct mmio_test *t)
+{
+ switch (get_ucall(vcpu, uc)) {
+ case UCALL_SYNC:
+ TEST_ASSERT(uc->args[1] == t->expected,
+ "%s: got %#lx, want %#lx", t->name,
+ (unsigned long)uc->args[1], (unsigned long)t->expected);
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(*uc);
+ break;
+ default:
+ TEST_FAIL("Unexpected ucall for %s", t->name);
+ }
+}
+
+/* OR PTE_USER into the leaf descriptors covering [gva, gva + len). */
+static void make_el0_accessible(struct kvm_vm *vm, uint64_t gva, uint64_t len)
+{
+ uint64_t addr;
+
+ for (addr = gva & ~((uint64_t)vm->page_size - 1); addr < gva + len;
+ addr += vm->page_size)
+ *virt_get_pte_hva(vm, addr) |= PTE_USER;
+}
+
+static bool vcpu_mixed_endian_el0(struct kvm_vcpu *vcpu)
+{
+ uint64_t mmfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64MMFR0_EL1));
+
+ return SYS_FIELD_GET(ID_AA64MMFR0_EL1, BIGEND, mmfr0) ||
+ SYS_FIELD_GET(ID_AA64MMFR0_EL1, BIGENDEL0, mmfr0);
+}
+
+int main(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ unsigned int i;
+ bool be;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ virt_map(vm, MMIO_ADDR, MMIO_ADDR, 1);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+ vm_install_sync_handler(vm, VECTOR_SYNC_LOWER_64, ESR_ELx_EC_SVC64,
+ el0_svc_handler);
+
+ be = vcpu_mixed_endian_el0(vcpu);
+ if (be) {
+ make_el0_accessible(vm, MMIO_ADDR, vm->page_size);
+ make_el0_accessible(vm, (uint64_t)el0_be_loads,
+ el0_be_loads_end - el0_be_loads);
+ }
+
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(tests) * (be ? 2 : 1));
+
+ /* Little-endian pass: one load and one result per iteration. */
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ const struct mmio_test *t = &tests[i];
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO);
+ handle_mmio(vcpu->run, t, false);
+
+ vcpu_run(vcpu);
+ expect_sync(vcpu, &uc, t);
+
+ ksft_test_result_pass("%s\n", t->name);
+ }
+
+ if (be) {
+ /* The EL0 stub issues all the loads, then reports the results. */
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO);
+ handle_mmio(vcpu->run, &tests[i], true);
+ }
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ vcpu_run(vcpu);
+ expect_sync(vcpu, &uc, &tests[i]);
+ ksft_test_result_pass("BE %s\n", tests[i].name);
+ }
+ }
+
+ vcpu_run(vcpu);
+ TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_DONE, "Expected UCALL_DONE");
+
+ kvm_vm_free(vm);
+
+ ksft_finished();
+}
--
2.39.5
^ permalink raw reply related
* [PATCH v2 1/2] KVM: arm64: Fix sign-extension of MMIO loads
From: Fuad Tabba @ 2026-06-25 14:48 UTC (permalink / raw)
To: Marc Zyngier, Oliver Upton
Cc: Joey Gouly, Suzuki K Poulose, Zenghui Yu, Steffen Eiden,
Catalin Marinas, Will Deacon, Shuah Khan, Christoffer Dall,
Victor Kamensky, linux-arm-kernel, kvmarm, linux-kernel
In-Reply-To: <20260625144807.2603272-1-fuad.tabba@linux.dev>
A sign-extending load (LDRSB, LDRSH, LDRSW) from MMIO returns a
zero-extended value to the guest. The architecture performs such a load
as a memory read of the access size, then a sign-extension to the
register width. For LDRSH (DDI 0487 M.b C6.2.225, with the Mem accessor
at J1.2.3.111):
data = Mem{16}(address, accdesc);
X{regsize}(t) = SignExtend{regsize}(data);
The byte order is handled inside the Mem accessor, keyed on the access
size; the register width is separate, applied afterwards by SignExtend().
kvm_handle_mmio_return() runs these in the wrong order: it sign-extends
the access-width data, then calls vcpu_data_host_to_guest(), which masks
the value back to the access width (the size-keyed byte-order step). The
mask drops the sign bits that sign-extension produced.
Reorder so vcpu_data_host_to_guest() runs first, with the sign-extension
to register width after it. trace_kvm_mmio() moves with it and now logs
the access-width data before sign-extension.
Fixes: b30070862edbd ("ARM64: KVM: MMIO support BE host running LE code")
Reviewed-by: Oliver Upton <oupton@kernel.org>
Signed-off-by: Fuad Tabba <fuad.tabba@linux.dev>
---
arch/arm64/kvm/mmio.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/arch/arm64/kvm/mmio.c b/arch/arm64/kvm/mmio.c
index e2285ed8c91de..d1c3a352d5a22 100644
--- a/arch/arm64/kvm/mmio.c
+++ b/arch/arm64/kvm/mmio.c
@@ -126,6 +126,10 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu)
len = kvm_vcpu_dabt_get_as(vcpu);
data = kvm_mmio_read_buf(run->mmio.data, len);
+ trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
+ &data);
+ data = vcpu_data_host_to_guest(vcpu, data, len);
+
if (kvm_vcpu_dabt_issext(vcpu) &&
len < sizeof(unsigned long)) {
mask = 1U << ((len * 8) - 1);
@@ -135,9 +139,6 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu)
if (!kvm_vcpu_dabt_issf(vcpu))
data = data & 0xffffffff;
- trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
- &data);
- data = vcpu_data_host_to_guest(vcpu, data, len);
vcpu_set_reg(vcpu, kvm_vcpu_dabt_get_rd(vcpu), data);
}
--
2.39.5
^ permalink raw reply related
* [PATCH v2 0/2] KVM: arm64: Fix and test MMIO sign-extending loads
From: Fuad Tabba @ 2026-06-25 14:48 UTC (permalink / raw)
To: Marc Zyngier, Oliver Upton
Cc: Joey Gouly, Suzuki K Poulose, Zenghui Yu, Steffen Eiden,
Catalin Marinas, Will Deacon, Shuah Khan, Christoffer Dall,
Victor Kamensky, linux-arm-kernel, kvmarm, linux-kernel
Hi folks,
Changes since v1 [1]:
- Patch 1: rewrote the commit msg in the Arm ARM's terms, with the Mem
accessor performing the access keyed on the access size and SignExtend
handling the register width. No code change. (Marc)
- Patch 2: added a big-endian pass to the test. The big-endian loads run
at EL0 with SCTLR_EL1.E0E set, so the access is big-endian while the
stage-1 walk stays little-endian. (Marc)
Oliver's Reviewed-by is on patch 1 only: the code there is unchanged, while
the test in patch 2 gained the big-endian coverage above.
A sign-extending load (LDRSB/LDRSH/LDRSW) from emulated MMIO returns a
zero-extended value rather than the sign-extended one the architecture
requires; vcpu_data_host_to_guest() strips the sign bits when it masks
the data to the access width.
If my git archeology is right, the masking dates to 2014 (b30070862edbd,
big-endian support) and has been wrong ever since, but sign-extending
loads from device memory are rare enough that nobody hit it. Patch 1
fixes it; patch 2 adds a selftest so it doesn't regress.
Based on Linux 7.1 (8cd9520d35a6c).
Cheers,
/fuad
[1] https://lore.kernel.org/all/20260622190701.2039766-1-fuad.tabba@linux.dev/
Fuad Tabba (2):
KVM: arm64: Fix sign-extension of MMIO loads
KVM: arm64: selftests: Add MMIO sign-extending load test
arch/arm64/kvm/mmio.c | 7 +-
tools/testing/selftests/kvm/Makefile.kvm | 1 +
.../selftests/kvm/arm64/mmio_sign_ext.c | 259 ++++++++++++++++++
3 files changed, 264 insertions(+), 3 deletions(-)
create mode 100644 tools/testing/selftests/kvm/arm64/mmio_sign_ext.c
--
2.39.5
^ permalink raw reply
* Re: [PATCH v2 2/5] docs: media: add documentation for media client usage stats
From: Detlev Casanova @ 2026-06-25 14:36 UTC (permalink / raw)
To: Nicolas Dufresne, Hans Verkuil, Mauro Carvalho Chehab,
Benjamin Gaignard, Philipp Zabel, Ezequiel Garcia, Heiko Stuebner
Cc: linux-media, linux-kernel, linux-rockchip, kernel,
linux-arm-kernel, Christopher Healy
In-Reply-To: <0449f2bf75685e17034ada5ae3961518ceb04345.camel@collabora.com>
Hi !
On 6/19/26 10:04, Nicolas Dufresne wrote:
> Hi,
>
> Le vendredi 19 juin 2026 à 14:58 +0200, Hans Verkuil a écrit :
>> Hi Detlev,
>>
>> Interesting, I had never heard of fdinfo, so if nothing else, I learned something new!
>>
>> On 17/06/2026 20:10, Detlev Casanova wrote:
>>> From: Christopher Healy <healych@amazon.com>
>>>
>>> Document the media fdinfo interface for per-file-descriptor usage
>>> statistics exposed by stateless V4L2 codec drivers via
>>> /proc/<pid>/fdinfo/<fd>.
>>>
>>> This interface is designed for stateless (request API based) codec
>>> devices where the kernel driver has per-job visibility into hardware
>>> execution. Stateful codecs cannot support all of this because their
>>> firmware manages job scheduling opaquely.
>>>
>>> The specification defines media- prefixed keys for engine utilization
>>> time, and operating frequency, following the same conventions as the DRM
>>> fdinfo mechanism documented in drm-usage-stats.rst.
>>>
>>> More fields can be added later.
>>>
>>> Signed-off-by: Christopher Healy <healych@amazon.com>
>>> Signed-off-by: Detlev Casanova <detlev.casanova@collabora.com>
>>> ---
>>> .../userspace-api/media/drivers/index.rst | 1 +
>>> .../media/drivers/media-usage-stats.rst | 85 ++++++++++++++++++++++
>>> 2 files changed, 86 insertions(+)
>>>
>>> diff --git a/Documentation/userspace-api/media/drivers/index.rst b/Documentation/userspace-api/media/drivers/index.rst
>>> index 02967c9b18d6..61879738836c 100644
>>> --- a/Documentation/userspace-api/media/drivers/index.rst
>>> +++ b/Documentation/userspace-api/media/drivers/index.rst
>>> @@ -34,6 +34,7 @@ For more details see the file COPYING in the source distribution of Linux.
>>> imx-uapi
>>> mali-c55
>>> max2175
>>> + media-usage-stats
>>> npcm-video
>>> omap3isp-uapi
>>> thp7312
>>> diff --git a/Documentation/userspace-api/media/drivers/media-usage-stats.rst b/Documentation/userspace-api/media/drivers/media-usage-stats.rst
>>> new file mode 100644
>>> index 000000000000..d3dc07002f62
>>> --- /dev/null
>>> +++ b/Documentation/userspace-api/media/drivers/media-usage-stats.rst
>>> @@ -0,0 +1,85 @@
>>> +.. SPDX-License-Identifier: GPL-2.0
>>> +
>>> +.. _media-usage-stats:
>>> +
>>> +==========================
>>> +Media client usage stats
>> stats -> statistics
>>
>> But are these really statistics? Isn't it just the current status?
The term "statistics" was the first that came to my mind, but I agree
it's not great here.
Thinking of it, the term "metrics" feels more adapted, what do you think ?
>> In many ways this feature looks to me similar to what VIDIOC_LOG_STATUS does,
>> except in a nicer format. When VIDIOC_LOG_STATUS was first added, fdinfo
>> didn't exist yet.
>>
>> And I wouldn't call it 'Media client': it's specific to stateless V4L2
>> codec drivers.
> While it is currently implemented for sateless m2m codec drivers as example
> (because we can't implement this everywhere at once really, its not practical),
> there is no reason why we cannot track to a process fdinfo memory usage
> associated with other video devices. In fact, I would pretty much want to see
> capture devices showing up in v4l2top, as these are using a lot of memory too,
> specially the new camera pipelines. And this is also perfectly suitable for
> stateful codec, converters, everything.
>
> So please, let's agree this is not "stateless V4L2 codec drivers" specific.
Indeed, but I get the confusion as the documentation suggests that
stateless codec drivers fields are mandatory.
I'll rework it to have generic v4l2 fields that are mandatory for all
driver types, and then fields per type (where it will focus on stateless
codecs)
>
> About VIDIOC_LOG_STATUS, this is effectively broken for m2m, only the owning
> session process could call that. Its also pretty bad for tools to have to
> monitor the dmesg and filter it up. fdinfo on the other end, does not require
> opening the device and permissions are very clear, and bound to user process.
>
>>> +==========================
>>> +
>>> +Stateless V4L2 codec drivers can optionally expose per-file-descriptor usage
>>> +statistics via ``/proc/<pid>/fdinfo/<fd>``. This is analogous to the DRM fdinfo
>>> +mechanism documented in :ref:`drm-client-usage-stats`, but uses the ``media-``
>>> +key prefix for V4L2 media devices.
>>> +
>>> +This interface is specific to stateless (request API based) codec devices,
>>> +including both decoders and encoders. With stateless codecs, the kernel driver
>>> +explicitly submits each frame to the hardware and receives a completion
>>> +interrupt, providing a clean per-job boundary that can be attributed to the
>>> +submitting file descriptor.
>>> +
>>> +Stateful codec devices cannot support this interface because their firmware
>>> +manages job scheduling internally. The kernel driver submits bitstream data
>>> +but has no visibility into per-frame hardware execution timing.
>>> +
>>> +Implementation
>>> +==============
>>> +
>>> +The V4L2 core provides the plumbing: drivers implement the ``show_fdinfo``
>>> +callback in ``struct v4l2_file_operations``, and the core wires it into the
>>> +kernel ``struct file_operations`` so that ``/proc/<pid>/fdinfo/<fd>`` output
>>> +includes the driver-provided keys.
>>> +
>>> +File format specification
>>> +=========================
>>> +
>>> +- File shall contain one key value pair per one line of text.
>>> +- Colon character (``:``) must be used to delimit keys and values.
>>> +- All standardised keys shall be prefixed with ``media-``.
>>> +- Driver-specific keys shall be prefixed with ``driver_name-``.
>>> +
>>> +Mandatory keys
>>> +--------------
>>> +
>>> +- media-driver: <valstr>
>> I'd pick 'v4l2-driver'. Since 'media-driver' is too generic for this
>> since that encompasses also DVB/CEC/RC drivers.
v4l2-driver works for me.
>>
>>> +
>>> + String shall contain the name of the media driver.
>> 'V4L2 stateless codec driver'
> With that said, we can refine, but not in that direction.
Then just 'V4L2 driver'
>>> +
>>> +- media-type: <valstr>
>> Poor name.
This fields will basically be the one telling userspace what fields to
expect next.
"v4l2-type" sounds wrong as well. What about "v4l2-driver-type" ?
>>
>>> +
>>> + String shall identify the type of media engine exposed through this file
>>> + descriptor. Standard values are ``decoder`` and ``encoder``.
>> I think I would use 'stateless-decoder' and 'stateless-encoder'. It's more
>> specific than de/encoder since that can be stateful as well.
Yes, I totally agree.
>>
>> So I am missing the big picture here: right now this patch adds support for
>> this for stateless codecs, but what happens in the future if this is also
>> added for regular video capture devices, ISPs, etc.?
> We should work further on the type system, then we'd simply have to extend it.
> Different type of driver will provide different key/value pair. We should well
> document which key are mandatory for the type (or for all type, like giving a
> name for general identification purpose).
>
> Of course, the current proposal is very torward processing cores, hence the one
> to one match with what GPUs present in fdinfo today in mainline, such are
> frequency, core utilization time, etc.
>
> Though, DMA devices such as capture may expose more of a bandwidth kind of
> information. I really don't want to make up too may cases, so owners of these
> don't feel like this is mandated though. As we develop software using our
> drivers, we should be able to figure-out what kind of things are important to
> monitor. For CODECs, we have two majors things. a) core utilization (which can
> be with a timer like this one, but ideally using cycle counters, or firmware
> provided data). b) memory usage.
For core utilization, we can have both (HW cycles and time).
Time is less precise as it is computed by the driver, starting a bit
before the hardware is run and a bit after the interrupt arrives.
HW cycles is very precise, but need the clock rate to be exposed and not
all HW may support it.
We can say that time is mandatory (for stateless codec), as it can
always be computed and HW cycles is optional.
For showing usage like in v4l2top, the userspace can compute it from HW
cycles if available and fallback to less precise "Time" otherwise.
>
>> The naming here matters, it has to have some sort of scheme so it can be
>> extended to other types of drivers. So a 'media' prefix is too generic,
>> and it also looks like it refers to the /dev/mediaX device.
>>
>>> +
>>> +Utilization keys
>>> +----------------
>>> +
>>> +- media-engine-usage: <uint> ns
>> 'Media Engine': very vague.
Thinking of it, this one could be changed to
"v4l2-core-usage-time-<core_id>".
That way we can have "v4l2-core-usage-cycles-<core_id>" too.
Separating by core id show more precise metrics.
>>
>>> +
>>> + Time in nanoseconds that the hardware engine spent busy processing work
>> Cumulative since creating the file handle? Or since the last read?
I implemented cumulative since the file handle was created.
This is closer to what DRM does, so let's keep it. I'll specify it in
the documentation.
>>
>>> + belonging to this file descriptor. The engine being measured is identified
>>> + by the ``media-type`` key.
>>> +
>>> + Values are not required to be constantly monotonic if it makes the driver
>>> + implementation easier, but are required to catch up with the previously
>>> + reported larger value within a reasonable period.
>> Does this make sense for codecs?
>>
>> I can tell that this is heavily influenced by the drm documentation, but that
>> does not necessarily translate to V4L2.
> I would guess this is bound to usage of cycle counter, which we definitely have
> in some codec HW exposing. As the frequency fluctuate, I suppose there is ways
> you can get your calculation a bit off, as this is being sampled. Basically, the
> frequence change is not strictly tracked by our drivers, which is ok. But I also
> don't seen why we wouldn't keep the counters monotonic, its really easy to do.
Yeah, I think we can drop that comment. For each run, we just add a
positive value,
so there is no reason not to be monotonic.
>>> +
>>> +Frequency keys
>>> +--------------
>>> +
>>> +- media-maxfreq: <uint> Hz
>>> +
>>> + Maximum operating frequency of the main engine clock.
>>> +
>>> +- media-curfreq: <uint> Hz
>>> +
>>> + Current operating frequency of the main engine clock.
>> 'Main engine clock'?
> The word "engine" seems indeed alien in this subsystem. It is referring to a
> system where you open one devices, so you have one fdinfo, but multiple cores.
> Each cores may be running at it owns frequency. But I think this is a little too
> vague to my taste.
Main doesn't refer to the main engine, but rather to the main clock of
the engine.
e.g. rkvdec so far can have up to 5 clocks linked to it but only the one
that actually clocks the HW should be exposed.
That being said, you are right that one file handle can have multiple
cores, so the name should be changed to v4l2-maxfreq-<core_id> (and
curfreq) and the text should use "main clock of the core <core_id>".
We also could just expose all clocks with their name, but userspace
still needs to know which one can be used to compute usage %age from HW
cycles.
Detlev.
>
> Hope this feedback does not cross to many wires, but tagging this as specific to
> stateless codec drivers did spark a little in my head. And we are really in need
> for a way to monitor our drivers.
>
> Nicolas
>
>>> +
>>> +Example output
>>> +==============
>>> +
>>> +::
>>> +
>>> + media-driver: hantro-vpu
>>> + media-type: decoder
>>> + media-engine-usage: 123456789 ns
>>> + media-maxfreq: 600000000 Hz
>>> + media-curfreq: 600000000 Hz
>>>
>> Regards,
>>
>> Hans
^ permalink raw reply
* [PATCH v3 4/4] media: qcom: camss: use fwnode_graph_for_each_endpoint_scoped() to simplify code
From: Frank.Li @ 2026-06-25 14:17 UTC (permalink / raw)
To: Andy Shevchenko, Daniel Scally, Heikki Krogerus, Sakari Ailus,
Greg Kroah-Hartman, Rafael J. Wysocki, Danilo Krummrich,
Mauro Carvalho Chehab, Dafna Hirschfeld, Laurent Pinchart,
Heiko Stuebner, Bryan O'Donoghue, Vladimir Zapolskiy,
Loic Poulain
Cc: driver-core, linux-acpi, linux-kernel, linux-media,
linux-rockchip, linux-arm-kernel, linux-arm-msm, imx, Guoniu Zhou,
Frank Li, Guoniu Zhou, Laurent Pinchart
In-Reply-To: <20260625-fw_scoped-v3-0-ffd0868e498d@nxp.com>
From: Frank Li <Frank.Li@nxp.com>
Use fwnode_graph_for_each_endpoint_scoped() to simplify code.
No functional changes.
Reviewed-by: Guoniu Zhou <guoniu.zhou@oss.nxp.com>
Reviewed-by: Loic Poulain <loic.poulain@oss.qualcomm.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Signed-off-by: Frank Li <Frank.Li@nxp.com>
---
change in v2
- fix typo simplify
- collect andy, gouniou and loic's review tags
---
drivers/media/platform/qcom/camss/camss.c | 17 +++++------------
1 file changed, 5 insertions(+), 12 deletions(-)
diff --git a/drivers/media/platform/qcom/camss/camss.c b/drivers/media/platform/qcom/camss/camss.c
index 2123f6388e3d7..23f3cc30a15a5 100644
--- a/drivers/media/platform/qcom/camss/camss.c
+++ b/drivers/media/platform/qcom/camss/camss.c
@@ -4793,30 +4793,23 @@ static int camss_parse_endpoint_node(struct device *dev,
static int camss_parse_ports(struct camss *camss)
{
struct device *dev = camss->dev;
- struct fwnode_handle *fwnode = dev_fwnode(dev), *ep;
+ struct fwnode_handle *fwnode = dev_fwnode(dev);
int ret;
- fwnode_graph_for_each_endpoint(fwnode, ep) {
+ fwnode_graph_for_each_endpoint_scoped(fwnode, ep) {
struct camss_async_subdev *csd;
csd = v4l2_async_nf_add_fwnode_remote(&camss->notifier, ep,
typeof(*csd));
- if (IS_ERR(csd)) {
- ret = PTR_ERR(csd);
- goto err_cleanup;
- }
+ if (IS_ERR(csd))
+ return PTR_ERR(csd);
ret = camss_parse_endpoint_node(dev, ep, csd);
if (ret < 0)
- goto err_cleanup;
+ return ret;
}
return 0;
-
-err_cleanup:
- fwnode_handle_put(ep);
-
- return ret;
}
/*
--
2.43.0
^ permalink raw reply related
* [PATCH v3 3/4] media: rkisp1: use fwnode_graph_for_each_endpoint_scoped() to simplify code
From: Frank.Li @ 2026-06-25 14:17 UTC (permalink / raw)
To: Andy Shevchenko, Daniel Scally, Heikki Krogerus, Sakari Ailus,
Greg Kroah-Hartman, Rafael J. Wysocki, Danilo Krummrich,
Mauro Carvalho Chehab, Dafna Hirschfeld, Laurent Pinchart,
Heiko Stuebner, Bryan O'Donoghue, Vladimir Zapolskiy,
Loic Poulain
Cc: driver-core, linux-acpi, linux-kernel, linux-media,
linux-rockchip, linux-arm-kernel, linux-arm-msm, imx, Guoniu Zhou,
Frank Li, Laurent Pinchart
In-Reply-To: <20260625-fw_scoped-v3-0-ffd0868e498d@nxp.com>
From: Frank Li <Frank.Li@nxp.com>
Use fwnode_graph_for_each_endpoint_scoped() to simplify code.
No functional changes.
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Signed-off-by: Frank Li <Frank.Li@nxp.com>
---
Andy: Keep original code because too much break. and I am working on more
generally solution, so the whole function can be replaced.
---
drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c
index 1791c02a40ae1..f90e01301943c 100644
--- a/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c
+++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c
@@ -187,7 +187,6 @@ static int rkisp1_subdev_notifier_register(struct rkisp1_device *rkisp1)
{
struct v4l2_async_notifier *ntf = &rkisp1->notifier;
struct fwnode_handle *fwnode = dev_fwnode(rkisp1->dev);
- struct fwnode_handle *ep;
unsigned int index = 0;
int ret = 0;
@@ -195,7 +194,7 @@ static int rkisp1_subdev_notifier_register(struct rkisp1_device *rkisp1)
ntf->ops = &rkisp1_subdev_notifier_ops;
- fwnode_graph_for_each_endpoint(fwnode, ep) {
+ fwnode_graph_for_each_endpoint_scoped(fwnode, ep) {
struct fwnode_handle *port;
struct v4l2_fwnode_endpoint vep = { };
struct rkisp1_sensor_async *rk_asd;
@@ -286,7 +285,6 @@ static int rkisp1_subdev_notifier_register(struct rkisp1_device *rkisp1)
}
if (ret) {
- fwnode_handle_put(ep);
v4l2_async_nf_cleanup(ntf);
return ret;
}
--
2.43.0
^ permalink raw reply related
* [PATCH v3 2/4] media: mc: use fwnode_graph_for_each_endpoint_scoped() to simpilfy code
From: Frank.Li @ 2026-06-25 14:17 UTC (permalink / raw)
To: Andy Shevchenko, Daniel Scally, Heikki Krogerus, Sakari Ailus,
Greg Kroah-Hartman, Rafael J. Wysocki, Danilo Krummrich,
Mauro Carvalho Chehab, Dafna Hirschfeld, Laurent Pinchart,
Heiko Stuebner, Bryan O'Donoghue, Vladimir Zapolskiy,
Loic Poulain
Cc: driver-core, linux-acpi, linux-kernel, linux-media,
linux-rockchip, linux-arm-kernel, linux-arm-msm, imx, Guoniu Zhou,
Frank Li, Guoniu Zhou, Laurent Pinchart
In-Reply-To: <20260625-fw_scoped-v3-0-ffd0868e498d@nxp.com>
From: Frank Li <Frank.Li@nxp.com>
Use cleanup helper fwnode_graph_for_each_endpoint_scoped() to simpilfy
code.
Reviewed-by: Guoniu Zhou <guoniu.zhou@oss.nxp.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Signed-off-by: Frank Li <Frank.Li@nxp.com>
---
drivers/media/v4l2-core/v4l2-mc.c | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/drivers/media/v4l2-core/v4l2-mc.c b/drivers/media/v4l2-core/v4l2-mc.c
index 937d358697e19..5d7fcd67dc42e 100644
--- a/drivers/media/v4l2-core/v4l2-mc.c
+++ b/drivers/media/v4l2-core/v4l2-mc.c
@@ -324,12 +324,10 @@ EXPORT_SYMBOL_GPL(v4l_vb2q_enable_media_source);
int v4l2_create_fwnode_links_to_pad(struct v4l2_subdev *src_sd,
struct media_pad *sink, u32 flags)
{
- struct fwnode_handle *endpoint;
-
if (!(sink->flags & MEDIA_PAD_FL_SINK))
return -EINVAL;
- fwnode_graph_for_each_endpoint(src_sd->fwnode, endpoint) {
+ fwnode_graph_for_each_endpoint_scoped(src_sd->fwnode, endpoint) {
struct fwnode_handle *remote_ep;
int src_idx, sink_idx, ret;
struct media_pad *src;
@@ -397,7 +395,6 @@ int v4l2_create_fwnode_links_to_pad(struct v4l2_subdev *src_sd,
src_sd->entity.name, src_idx,
sink->entity->name, sink_idx, ret);
- fwnode_handle_put(endpoint);
return ret;
}
}
--
2.43.0
^ permalink raw reply related
* [PATCH v3 1/4] device property: Introduce fwnode_graph_for_each_endpoint_scoped()
From: Frank.Li @ 2026-06-25 14:17 UTC (permalink / raw)
To: Andy Shevchenko, Daniel Scally, Heikki Krogerus, Sakari Ailus,
Greg Kroah-Hartman, Rafael J. Wysocki, Danilo Krummrich,
Mauro Carvalho Chehab, Dafna Hirschfeld, Laurent Pinchart,
Heiko Stuebner, Bryan O'Donoghue, Vladimir Zapolskiy,
Loic Poulain
Cc: driver-core, linux-acpi, linux-kernel, linux-media,
linux-rockchip, linux-arm-kernel, linux-arm-msm, imx, Guoniu Zhou,
Frank Li, Guoniu Zhou, Laurent Pinchart
In-Reply-To: <20260625-fw_scoped-v3-0-ffd0868e498d@nxp.com>
From: Frank Li <Frank.Li@nxp.com>
Similar to recently propose for_each_child_of_node_scoped() this new
version of the loop macro instantiates a new local struct fwnode_handle *
that uses the __free(fwnode_handle) auto cleanup handling so that if a
reference to a node is held on early exit from the loop the reference will
be released. If the loop runs to completion, the child pointer will be NULL
and no action will be taken.
The reason this is useful is that it removes the need for
fwnode_handle_put() on early loop exits. If there is a need to retain the
reference, then return_ptr(child) or no_free_ptr(child) may be used to
safely disable the auto cleanup.
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Guoniu Zhou <guoniu.zhou@oss.nxp.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Signed-off-by: Frank Li <Frank.Li@nxp.com>
---
change in v3
- fix missed a tab before \
change in v2
- collect Andy and Guoniu's reviewed-by tags
- fix indention
- remove extra space in commit message
---
include/linux/property.h | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/include/linux/property.h b/include/linux/property.h
index 14c304db46648..c71177ca16a95 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -545,6 +545,11 @@ unsigned int fwnode_graph_get_endpoint_count(const struct fwnode_handle *fwnode,
for (child = fwnode_graph_get_next_endpoint(fwnode, NULL); child; \
child = fwnode_graph_get_next_endpoint(fwnode, child))
+#define fwnode_graph_for_each_endpoint_scoped(fwnode, child) \
+ for (struct fwnode_handle *child __free(fwnode_handle) = \
+ fwnode_graph_get_next_endpoint(fwnode, NULL); \
+ child; child = fwnode_graph_get_next_endpoint(fwnode, child))
+
int fwnode_graph_parse_endpoint(const struct fwnode_handle *fwnode,
struct fwnode_endpoint *endpoint);
--
2.43.0
^ permalink raw reply related
* [PATCH v3 0/4] media: add and use fwnode_graph_for_each_endpoint_scoped()
From: Frank.Li @ 2026-06-25 14:17 UTC (permalink / raw)
To: Andy Shevchenko, Daniel Scally, Heikki Krogerus, Sakari Ailus,
Greg Kroah-Hartman, Rafael J. Wysocki, Danilo Krummrich,
Mauro Carvalho Chehab, Dafna Hirschfeld, Laurent Pinchart,
Heiko Stuebner, Bryan O'Donoghue, Vladimir Zapolskiy,
Loic Poulain
Cc: driver-core, linux-acpi, linux-kernel, linux-media,
linux-rockchip, linux-arm-kernel, linux-arm-msm, imx, Guoniu Zhou,
Frank Li, Guoniu Zhou, Laurent Pinchart
Add new helper macro fwnode_graph_for_each_endpoint_scoped() and use it
simplify media code.
Typical example should qualcomm's driver (camss.c), the v4l2_mc.c and
rkisp1-dev.c only silience improvement.
Anyways, *_for_each_*_scoped() already use widely and make code clean.
Build test only.
Sakari Ailus:
when I try to improve the patch
"Add common helper library for 1-to-1 subdev registration", I found need
camss.c pattern, so I create this small improvement firstly.
Signed-off-by: Frank Li <Frank.Li@nxp.com>
---
Changes in v3:
- colllect luarent pinchart's reviewed-by tags
- fix miss tab before \
- Link to v2: https://patch.msgid.link/20260624-fw_scoped-v2-0-0a8db472af4a@nxp.com
Changes in v2:
- colllect review by tags
- fix typo and indent.
- see each patch's change log.
- Link to v1: https://patch.msgid.link/20260622-fw_scoped-v1-0-a37d0aac0a68@nxp.com
---
Frank Li (4):
device property: Introduce fwnode_graph_for_each_endpoint_scoped()
media: mc: use fwnode_graph_for_each_endpoint_scoped() to simpilfy code
media: rkisp1: use fwnode_graph_for_each_endpoint_scoped() to simplify code
media: qcom: camss: use fwnode_graph_for_each_endpoint_scoped() to simplify code
drivers/media/platform/qcom/camss/camss.c | 17 +++++------------
drivers/media/platform/rockchip/rkisp1/rkisp1-dev.c | 4 +---
drivers/media/v4l2-core/v4l2-mc.c | 5 +----
include/linux/property.h | 5 +++++
4 files changed, 12 insertions(+), 19 deletions(-)
---
base-commit: 3ce97bd3c4f18608335e709c24d6a40e7036cab8
change-id: 20260620-fw_scoped-5dab644510a1
Best regards,
--
Frank Li <Frank.Li@nxp.com>
^ permalink raw reply
* Re: [PATCH v6 0/9] media: add new API simple 1to1 subdev register and add imx parallel camera support
From: Frank Li @ 2026-06-25 14:17 UTC (permalink / raw)
To: Sakari Ailus, Mauro Carvalho Chehab, Michael Riesch,
Laurent Pinchart, Frank Li, Martin Kepplinger-Novakovic,
Rui Miguel Silva, Purism Kernel Team, Rob Herring,
Krzysztof Kozlowski, Conor Dooley, Sascha Hauer,
Pengutronix Kernel Team, Fabio Estevam
Cc: linux-media, linux-kernel, imx, Guoniu Zhou, devicetree,
linux-arm-kernel, Alice Yuan, Robert Chiras, Zhipeng Wang,
Krzysztof Kozlowski
In-Reply-To: <20260624-imx8qxp_pcam-v6-0-4b3f45920d2f@nxp.com>
On Wed, Jun 24, 2026 at 04:37:47PM -0400, Frank.Li@oss.nxp.com wrote:
> Base on patches "media: add and use fwnode_graph_for_each_endpoint_scoped()"
> https://lore.kernel.org/imx/20260624200237.GJ851255@killaraus.ideasonboard.com/T/#m7969735b6c236c6b3abc16b9f3f55ec0488dbe89
>
> This patches base on previous' thread "media: imx8qxp: add parallel camera
> support".
>
> Add new API media_async_register_subdev_1to1() to simple 1to1 subdev
> register.
typo here, should be media_async_register_subdev().
fwnode graphic, there two mehtod to connect nodes together.
method 1:
port@0
{
endpoint@0{
-> sensor 0
}
endpoint@1{
-> sensor 1
}
}
method 2:
port@0
{
endpoint {
-> sensor 0
}
}
port@1 {
endpoint {
-> sensor 1
}
}
NXP/Freesclae use method 2, not sure other vendors or history support
mehtod 1
Most system one port have only one endpoint, not sure previous design hope
endpont map to media pad or one port map to media pad.
It is the same if only one endpont under port. So far this version support
all devices, which use method 2.
Frank
>
> Many V4L2 subdev drivers implement the same registration and media pads.
> Assumes a 1:1 mapping between firmware endpoints and media pads.
> During registration it parses the firmware graph, creates media pads for
> all endpoints, and registers common asynchronous notifiers for sink
> endpoints. These notifiers automatically create media links when the
> corresponding remote source devices become available.
>
> The set_pad_by_ep() callback allows drivers to determine the media pad
> associated with a firmware endpoint and identify whether the endpoint
> represents a sink pad.
>
> By centralizing firmware graph parsing, media pad creation, notifier
> registration, and link creation, this helper reduces duplicated code and
> simplifies error handling in V4L2 sub-device drivers.
>
> Add media_async_register_subdev(), a helper to register a V4L2 sub-device
> with the asynchronous sub-device framework.
>
> This reduces code duplication and simplifies the implementation of
> simple bridge and converter drivers.
>
> In subdev driver:
>
> your_device_probe()
> {
> v4l2_subdev_init(sd, &dw_mipi_csi2rx_ops);
> ...
> return media_async_register_subdev_1to1(sd);
> }
>
> ...
> your_device_remove()
> {
> media_async_subdev_cleanup(sd);
> }
>
> This API help reduce over line duplcated code in synopsys/dw-mipi-csi2rx.c.
> And use this API at imx8's parallel CPI driver, which over 90% code now
> hardware related.
>
> And also benefit on going pix format patch
> https://lore.kernel.org/imx/20260525-csi_formatter-v8-0-6b646231224b@oss.nxp.com/
>
> It will also reduce missed media_entity_cleanup() problem at some error path
> https://lore.kernel.org/linux-media/20260614202835.11977-15-birenpandya@gmail.com/
>
> Previous do partial simpilfy at
> https://lore.kernel.org/imx/aaisdJSsFE5-PLx1@lizhi-Precision-Tower-5810/
>
> To: Sakari Ailus <sakari.ailus@linux.intel.com>
> To: Mauro Carvalho Chehab <mchehab@kernel.org>
> To: Michael Riesch <michael.riesch@collabora.com>
> To: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
> To: Frank Li <Frank.Li@nxp.com>
> To: Martin Kepplinger-Novakovic <martink@posteo.de>
> To: Rui Miguel Silva <rmfrfs@gmail.com>
> To: Purism Kernel Team <kernel@puri.sm>
> To: Rob Herring <robh@kernel.org>
> To: Krzysztof Kozlowski <krzk+dt@kernel.org>
> To: Conor Dooley <conor+dt@kernel.org>
> To: Sascha Hauer <s.hauer@pengutronix.de>
> To: Pengutronix Kernel Team <kernel@pengutronix.de>
> To: Fabio Estevam <festevam@gmail.com>
> Cc: linux-media@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org
> Cc: imx@lists.linux.dev
> Cc: Guoniu Zhou <guoniu.zhou@nxp.com>
> Cc: devicetree@vger.kernel.org
> Cc: linux-arm-kernel@lists.infradead.org
>
> Signed-off-by: Frank Li <Frank.Li@nxp.com>
> ---
> Changes in v6:
> - Change API to fix more width user case, assume a media pad have one endpoint
> on dts.
> - other detail change see each patch's change log
> - Link to v5: https://patch.msgid.link/20260617-imx8qxp_pcam-v5-0-7fa6c8e7fba7@nxp.com
>
> Changes in v5:
> - Add media_async_register_subdev_1to1() to simple code.
> - Link to v4: https://lore.kernel.org/r/20250729-imx8qxp_pcam-v4-0-4dfca4ed2f87@nxp.com
>
> Changes in v4:
> - remove imx93 driver support since have not camera sensor module to do test now.
> Add it later
> - Add new patch
> media: v4l2-common: Add helper function v4l_get_required_align_by_bpp()
> - See each patche's change log for detail.
> - Link to v3: https://lore.kernel.org/r/20250708-imx8qxp_pcam-v3-0-c8533e405df1@nxp.com
>
> Changes in v3:
> - replace CSI with CPI.
> - detail change see each patch's change logs
> - Link to v2: https://lore.kernel.org/r/20250703-imx8qxp_pcam-v2-0-188be85f06f1@nxp.com
>
> Changes in v2:
> - remove patch media: nxp: isi: add support for UYVY8_2X8 and YUYV8_2X8 bus codes
> because pcif controller convert 2x8 to 1x16 to match isi's input
> - rename comaptible string to fsl,imx8qxp-pcif
> - See each patches's change log for detail
> - Link to v1: https://lore.kernel.org/r/20250630-imx8qxp_pcam-v1-0-eccd38d99201@nxp.com
>
> ---
> Alice Yuan (2):
> dt-bindings: media: add i.MX parallel CPI support
> media: nxp: add V4L2 subdev driver for camera parallel interface (CPI)
>
> Frank Li (7):
> media: mc-entity: Store parsed V4L2 fwnode endpoint in media_pad
> media: subdev: Add set_pad_by_ep() callback to internal ops
> media: subdev: Add media_async_register_subdev() helper
> media: synopsys: Use v4l2_subdev_get_frame_desc_passthrough()
> media: synopsys: Use media_async_register_subdev() to simplify code
> arm64: dts: imx8: add camera parallel interface (CPI) node
> arm64: dts: imx8qxp-mek: add parallel ov5640 camera support
>
> .../devicetree/bindings/media/fsl,imx93-pcif.yaml | 126 +++++
> MAINTAINERS | 2 +
> arch/arm64/boot/dts/freescale/Makefile | 3 +
> arch/arm64/boot/dts/freescale/imx8-ss-img.dtsi | 13 +
> .../boot/dts/freescale/imx8qxp-mek-ov5640-cpi.dtso | 83 +++
> arch/arm64/boot/dts/freescale/imx8qxp-ss-img.dtsi | 27 +
> drivers/media/platform/nxp/Kconfig | 12 +
> drivers/media/platform/nxp/Makefile | 1 +
> drivers/media/platform/nxp/imx-parallel-cpi.c | 629 +++++++++++++++++++++
> drivers/media/platform/synopsys/dw-mipi-csi2rx.c | 200 ++-----
> drivers/media/v4l2-core/v4l2-fwnode.c | 155 +++++
> include/media/media-entity.h | 5 +-
> include/media/v4l2-async.h | 39 ++
> include/media/v4l2-subdev.h | 5 +
> 14 files changed, 1140 insertions(+), 160 deletions(-)
> ---
> base-commit: c425f8be0326d40823cd93cbca633872d099df2a
> change-id: 20250626-imx8qxp_pcam-d851238343c3
>
> Best regards,
> --
> Frank Li <Frank.Li@nxp.com>
>
>
^ permalink raw reply
* Re: [RFC PATCH net-next v8 03/12] net: phylink: add phylink_release_pcs() to externally release a PCS
From: Maxime Chevallier @ 2026-06-25 14:13 UTC (permalink / raw)
To: Christian Marangi, Andrew Lunn, David S. Miller, Eric Dumazet,
Jakub Kicinski, Paolo Abeni, Rob Herring, Krzysztof Kozlowski,
Conor Dooley, Simon Horman, Jonathan Corbet, Shuah Khan,
Lorenzo Bianconi, Heiner Kallweit, Russell King, Saravana Kannan,
Philipp Zabel, Nathan Chancellor, Nick Desaulniers, Bill Wendling,
Justin Stitt, netdev, devicetree, linux-kernel, linux-doc,
linux-arm-kernel, linux-mediatek, llvm
In-Reply-To: <20260618125752.1223-4-ansuelsmth@gmail.com>
Hello Christian,
On 6/18/26 14:57, Christian Marangi wrote:
> Add phylink_release_pcs() to externally release a PCS from a phylink
> instance. This can be used to handle case when a single PCS needs to be
> removed and the phylink instance needs to be refreshed.
>
> On calling phylink_release_pcs(), the PCS will be removed from the
> phylink internal PCS list and the phylink supported_interfaces value is
> reparsed with the remaining PCS interfaces.
>
> Also a phylink resolve is triggered to handle the PCS removal.
>
> The flag force_major_config is set to make phylink resolve reconfigure
> the interface (even if it didn't change).
> This is needed to handle the special case when the current PCS used
> by phylink is removed and a major_config is needed to propagae the
> configuration change. With this option enabled we also force mac_config
> even if the PHY link is not up for the in-band case.
>
> Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
> ---
> drivers/net/phy/phylink.c | 56 +++++++++++++++++++++++++++++++++++++++
> include/linux/phylink.h | 2 ++
> 2 files changed, 58 insertions(+)
>
> diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
> index c38bcd43b8c8..064d6f5a06da 100644
> --- a/drivers/net/phy/phylink.c
> +++ b/drivers/net/phy/phylink.c
> @@ -158,6 +158,8 @@ static const phy_interface_t phylink_sfp_interface_preference[] = {
> static DECLARE_PHY_INTERFACE_MASK(phylink_sfp_interfaces);
>
> static void phylink_run_resolve(struct phylink *pl);
> +static void phylink_link_down(struct phylink *pl);
> +static void phylink_pcs_disable(struct phylink_pcs *pcs);
>
> /**
> * phylink_set_port_modes() - set the port type modes in the ethtool mask
> @@ -918,6 +920,60 @@ static void phylink_resolve_an_pause(struct phylink_link_state *state)
> }
> }
>
> +/**
> + * phylink_release_pcs - Removes a PCS from the phylink PCS available list
> + * @pcs: a pointer to the phylink_pcs struct to be released
> + *
> + * This function release a PCS from the phylink PCS available list if
> + * actually in use. It also refreshes the supported interfaces of the
> + * phylink instance by copying the supported interfaces from the phylink
> + * conf and merging the supported interfaces of the remaining available PCS
> + * in the list and trigger a resolve.
> + */
> +void phylink_release_pcs(struct phylink_pcs *pcs)
> +{
> + struct phylink *pl;
> +
> + ASSERT_RTNL();
> +
> + pl = pcs->phylink;
> + if (!pl)
> + return;
> +
> + mutex_lock(&pl->state_mutex);
> +
> + list_del(&pcs->list);
> + pcs->phylink = NULL;
> +
> + /*
> + * Check if we are removing the PCS currently
> + * in use by phylink. If this is the case, tear down
> + * the link, force phylink resolve to reconfigure the
> + * interface mode, disable the current PCS and set the
> + * phylink PCS to NULL.
> + */
> + if (pl->pcs == pcs) {
> + phylink_link_down(pl);
> + phylink_pcs_disable(pl->pcs);
> +
> + pl->force_major_config = true;
> + pl->pcs = NULL;
> + }
> +
> + mutex_unlock(&pl->state_mutex);
> +
> + /* Refresh supported interfaces */
> + phy_interface_copy(pl->supported_interfaces,
> + pl->config->supported_interfaces);
> + list_for_each_entry(pcs, &pl->pcs_list, list)
> + phy_interface_or(pl->supported_interfaces,
> + pl->supported_interfaces,
> + pcs->supported_interfaces);
I've given more thought to that 'supported_interfaces' thing. This
patchset redefines the meaning of
pl->config->supported_interfaces
Currently, it's filled by the MAC driver and means "Every interface
we can support, including the ones provided by PCSs that we can use
with this MAC".
It now becomes "Every interface we support without needing a PCS", at
least the way I understand that.
It's not an error in your code, but I think this is worth documenting
somewhere as this changes one the things that's already fairly
error-prone in new drivers.
I don't know to what extent people use that, be we have a porting guide
that explains how to use phylink in a MAC driver, maybe an update in there
would be nice as well :
https://docs.kernel.org/networking/sfp-phylink.html#rough-guide-to-converting-a-network-driver-to-sfp-phylink
Maxime
^ permalink raw reply
* Re: [PATCH V2 1/8] PCI: imx6: Add skip_pwrctrl_off flag support
From: Manivannan Sadhasivam @ 2026-06-25 13:57 UTC (permalink / raw)
To: Sherry Sun
Cc: Frank Li (OSS), Sherry Sun (OSS), robh@kernel.org,
krzk+dt@kernel.org, conor+dt@kernel.org, Frank Li,
s.hauer@pengutronix.de, kernel@pengutronix.de, festevam@gmail.com,
Amitkumar Karwar, Neeraj Sanjay Kale, marcel@holtmann.org,
luiz.dentz@gmail.com, Hongxing Zhu, l.stach@pengutronix.de,
lpieralisi@kernel.org, kwilczynski@kernel.org,
bhelgaas@google.com, brgl@kernel.org, imx@lists.linux.dev,
linux-pci@vger.kernel.org, linux-arm-kernel@lists.infradead.org,
devicetree@vger.kernel.org, linux-kernel@vger.kernel.org,
linux-bluetooth@vger.kernel.org, linux-pm@vger.kernel.org
In-Reply-To: <VI0PR04MB12114BCBD405505888063284492EC2@VI0PR04MB12114.eurprd04.prod.outlook.com>
On Thu, Jun 25, 2026 at 07:25:46AM +0000, Sherry Sun wrote:
> > Subject: Re: [PATCH V2 1/8] PCI: imx6: Add skip_pwrctrl_off flag support
> >
> > On Wed, Jun 24, 2026 at 07:09:26AM +0000, Sherry Sun wrote:
> > > > Subject: Re: [PATCH V2 1/8] PCI: imx6: Add skip_pwrctrl_off flag
> > > > support
> > > >
> > > > On Tue, Jun 23, 2026 at 11:07:28AM +0800, Sherry Sun (OSS) wrote:
> > > > > From: Sherry Sun <sherry.sun@nxp.com>
> > > > >
> > > > > Use dw_pcie_rp::skip_pwrctrl_off to avoid powering off devices
> > > > > during suspend to preserve wakeup capability of the devices and
> > > > > also not to power on the devices in the init path.
> > > > > This allows controller power-off to be skipped when some devices(e.g.
> > > > > M.2 cards key E without auxiliary power) required to support PCIe
> > > > > L2 link state and wake-up mechanisms.
> > > > >
> > > > > Signed-off-by: Sherry Sun <sherry.sun@nxp.com>
> > > > > ---
> > > > > drivers/pci/controller/dwc/pci-imx6.c | 36
> > > > > +++++++++++++++++----------
> > > > > 1 file changed, 23 insertions(+), 13 deletions(-)
> > > > >
> > > > > diff --git a/drivers/pci/controller/dwc/pci-imx6.c
> > > > > b/drivers/pci/controller/dwc/pci-imx6.c
> > > > > index 0fa716d1ed75..ff5a9565dbbf 100644
> > > > > --- a/drivers/pci/controller/dwc/pci-imx6.c
> > > > > +++ b/drivers/pci/controller/dwc/pci-imx6.c
> > > > > @@ -1382,16 +1382,20 @@ static int imx_pcie_host_init(struct
> > > > > dw_pcie_rp
> > > > *pp)
> > > > > }
> > > > > }
> > > > >
> > > > > - ret = pci_pwrctrl_create_devices(dev);
> > > > > - if (ret) {
> > > > > - dev_err(dev, "failed to create pwrctrl devices\n");
> > > > > - goto err_reg_disable;
> > > > > + if (!pci->suspended) {
> > > > > + ret = pci_pwrctrl_create_devices(dev);
> > > >
> > > > Is possible move pci_pwrctrl_create_devices() of
> > > > pci_pwrctrl_create_devices
> > > >
> > > > and call it direct at probe() function, like other regulator_get function.
> > > >
> > >
> > > Hi Frank,
> > > That makes sense. However, if we move pci_pwrctrl_create_devices () to
> > > probe(), we may need to add the following goto err_pwrctrl_destroy
> > > path in imx_pcie_probe() to properly handle errors from
> > > pci_pwrctrl_power_on_devices(), is that acceptable?
> >
> > Can you add a API devm_pci_pwrctrl_create_devices() ?
> >
>
> Hi Frank, we cannot unconditionally destroy the pwrctrl devices
> when probing fails by using devm API.
> Since we need to check the return value of
> pci_pwrctrl_power_on_devices() for example EPROBE_DEFER to decide
> whether to destroy the pwrctrl devices to avoid the deferred probe loop.
>
> You can find more related discussion here.
> https://lore.kernel.org/all/tutxwjciedqoje5wxvtin4h637auni5zzpvb7rtfg4uticxoux@yfl6xg7oht7t/
>
Yes. Ideally we should try to do a blocking wait in the pwrctrl core until all
the pwrctrl drivers are probed, instead of deferring probe. Hopefully, I'll get
to it soon.
- Mani
--
மணிவண்ணன் சதாசிவம்
^ permalink raw reply
* Re: [PATCH v14 29/44] arm64: RMI: Runtime faulting of memory
From: Gavin Shan @ 2026-06-25 13:53 UTC (permalink / raw)
To: Lorenzo Pieralisi
Cc: Steven Price, kvm, kvmarm, Catalin Marinas, Marc Zyngier,
Will Deacon, James Morse, Oliver Upton, Suzuki K Poulose,
Zenghui Yu, linux-arm-kernel, linux-kernel, Joey Gouly,
Alexandru Elisei, Christoffer Dall, Fuad Tabba, linux-coco,
Ganapatrao Kulkarni, Shanker Donthineni, Alper Gun,
Aneesh Kumar K . V, Emi Kisanuki, Vishal Annapurve, WeiLin.Chang,
Lorenzo.Pieralisi2
In-Reply-To: <aiLes2ecZSr17UwZ@lpieralisi>
On 6/6/26 12:35 AM, Lorenzo Pieralisi wrote:
> On Fri, Jun 05, 2026 at 06:11:11PM +1000, Gavin Shan wrote:
>> On 6/5/26 5:28 PM, Lorenzo Pieralisi wrote:
>>> On Fri, Jun 05, 2026 at 04:23:15PM +1000, Gavin Shan wrote:
>>>
>>> [...]
>>>
>>>>> +static int realm_map_ipa(struct kvm *kvm, phys_addr_t ipa,
>>>>> + kvm_pfn_t pfn, unsigned long map_size,
>>>>> + enum kvm_pgtable_prot prot,
>>>>> + struct kvm_mmu_memory_cache *memcache)
>>>>> +{
>>>>> + struct realm *realm = &kvm->arch.realm;
>>>>> +
>>>>> + /*
>>>>> + * Write permission is required for now even though it's possible to
>>>>> + * map unprotected pages (granules) as read-only. It's impossible to
>>>>> + * map protected pages (granules) as read-only.
>>>>> + */
>>>>> + if (WARN_ON(!(prot & KVM_PGTABLE_PROT_W)))
>>>>> + return -EFAULT;
>>>>> +
>>>>
>>>> I'm a bit concerned with this. We don't have KVM_PGTABLE_PROT_W set in @prot
>>>> if the stage2 fault is raised due to memory read. With -EFAULT returned to VMM
>>>> (e.g. QEMU), the vCPU continuous execution is stopped and system won't be
>>>> working any more.
>>>>
>>>>> + ipa = ALIGN_DOWN(ipa, PAGE_SIZE);
>>>>> + if (!kvm_realm_is_private_address(realm, ipa))
>>>>> + return realm_map_non_secure(realm, ipa, pfn, map_size, prot,
>>>>> + memcache);
>>>>> +
>>>>> + return realm_map_protected(kvm, ipa, pfn, map_size, memcache);
>>>>> +}
>>>>> +
>>>>> static bool kvm_vma_is_cacheable(struct vm_area_struct *vma)
>>>>> {
>>>>> switch (FIELD_GET(PTE_ATTRINDX_MASK, pgprot_val(vma->vm_page_prot))) {
>>>>> @@ -1604,27 +1641,52 @@ static int gmem_abort(const struct kvm_s2_fault_desc *s2fd)
>>>>> bool write_fault, exec_fault;
>>>>> enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_SHARED;
>>>>> enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
>>>>> - struct kvm_pgtable *pgt = s2fd->vcpu->arch.hw_mmu->pgt;
>>>>> + struct kvm_vcpu *vcpu = s2fd->vcpu;
>>>>> + struct kvm_pgtable *pgt = vcpu->arch.hw_mmu->pgt;
>>>>> + gpa_t gpa = kvm_gpa_from_fault(vcpu->kvm, s2fd->fault_ipa);
>>>>> unsigned long mmu_seq;
>>>>> struct page *page;
>>>>> - struct kvm *kvm = s2fd->vcpu->kvm;
>>>>> + struct kvm *kvm = vcpu->kvm;
>>>>> void *memcache;
>>>>> kvm_pfn_t pfn;
>>>>> gfn_t gfn;
>>>>> int ret;
>>>>> - memcache = get_mmu_memcache(s2fd->vcpu);
>>>>> - ret = topup_mmu_memcache(s2fd->vcpu, memcache);
>>>>> + if (kvm_is_realm(vcpu->kvm)) {
>>>>> + /* check for memory attribute mismatch */
>>>>> + bool is_priv_gfn = kvm_mem_is_private(kvm, gpa >> PAGE_SHIFT);
>>>>> + /*
>>>>> + * For Realms, the shared address is an alias of the private
>>>>> + * PA with the top bit set. Thus if the fault address matches
>>>>> + * the GPA then it is the private alias.
>>>>> + */
>>>>> + bool is_priv_fault = (gpa == s2fd->fault_ipa);
>>>>> +
>>>>> + if (is_priv_gfn != is_priv_fault) {
>>>>> + kvm_prepare_memory_fault_exit(vcpu, gpa, PAGE_SIZE,
>>>>> + kvm_is_write_fault(vcpu),
>>>>> + false,
>>>>> + is_priv_fault);
>>>>> + /*
>>>>> + * KVM_EXIT_MEMORY_FAULT requires an return code of
>>>>> + * -EFAULT, see the API documentation
>>>>> + */
>>>>> + return -EFAULT;
>>>>> + }
>>>>> + }
>>>>> +
>>>>> + memcache = get_mmu_memcache(vcpu);
>>>>> + ret = topup_mmu_memcache(vcpu, memcache);
>>>>> if (ret)
>>>>> return ret;
>>>>> if (s2fd->nested)
>>>>> gfn = kvm_s2_trans_output(s2fd->nested) >> PAGE_SHIFT;
>>>>> else
>>>>> - gfn = s2fd->fault_ipa >> PAGE_SHIFT;
>>>>> + gfn = gpa >> PAGE_SHIFT;
>>>>> - write_fault = kvm_is_write_fault(s2fd->vcpu);
>>>>> - exec_fault = kvm_vcpu_trap_is_exec_fault(s2fd->vcpu);
>>>>> + write_fault = kvm_is_write_fault(vcpu);
>>>>> + exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu);
>>>>> VM_WARN_ON_ONCE(write_fault && exec_fault);
>>>>> @@ -1634,7 +1696,7 @@ static int gmem_abort(const struct kvm_s2_fault_desc *s2fd)
>>>>> ret = kvm_gmem_get_pfn(kvm, s2fd->memslot, gfn, &pfn, &page, NULL);
>>>>> if (ret) {
>>>>> - kvm_prepare_memory_fault_exit(s2fd->vcpu, s2fd->fault_ipa, PAGE_SIZE,
>>>>> + kvm_prepare_memory_fault_exit(vcpu, gpa, PAGE_SIZE,
>>>>> write_fault, exec_fault, false);
>>>>> return ret;
>>>>> }
>>>>> @@ -1654,14 +1716,20 @@ static int gmem_abort(const struct kvm_s2_fault_desc *s2fd)
>>>>> kvm_fault_lock(kvm);
>>>>> if (mmu_invalidate_retry(kvm, mmu_seq)) {
>>>>> ret = -EAGAIN;
>>>>> - goto out_unlock;
>>>>> + goto out_release_page;
>>>>> + }
>>>>> +
>>>>> + if (kvm_is_realm(kvm)) {
>>>>> + ret = realm_map_ipa(kvm, s2fd->fault_ipa, pfn,
>>>>> + PAGE_SIZE, KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W, memcache);
>>>>> + goto out_release_page;
>>>>> }
>>>>> ret = KVM_PGT_FN(kvm_pgtable_stage2_map)(pgt, s2fd->fault_ipa, PAGE_SIZE,
>>>>> __pfn_to_phys(pfn), prot,
>>>>> memcache, flags);
>>>>> -out_unlock:
>>>>> +out_release_page:
>>>>> kvm_release_faultin_page(kvm, page, !!ret, prot & KVM_PGTABLE_PROT_W);
>>>>> kvm_fault_unlock(kvm);
>>>>> @@ -1847,7 +1915,7 @@ static int kvm_s2_fault_get_vma_info(const struct kvm_s2_fault_desc *s2fd,
>>>>> * mapping size to ensure we find the right PFN and lay down the
>>>>> * mapping in the right place.
>>>>> */
>>>>> - s2vi->gfn = ALIGN_DOWN(s2fd->fault_ipa, s2vi->vma_pagesize) >> PAGE_SHIFT;
>>>>> + s2vi->gfn = kvm_gpa_from_fault(kvm, ALIGN_DOWN(s2fd->fault_ipa, s2vi->vma_pagesize)) >> PAGE_SHIFT;
>>>>> s2vi->mte_allowed = kvm_vma_mte_allowed(vma);
>>>>> @@ -2056,6 +2124,9 @@ static int kvm_s2_fault_map(const struct kvm_s2_fault_desc *s2fd,
>>>>> prot &= ~KVM_NV_GUEST_MAP_SZ;
>>>>> ret = KVM_PGT_FN(kvm_pgtable_stage2_relax_perms)(pgt, gfn_to_gpa(gfn),
>>>>> prot, flags);
>>>>> + } else if (kvm_is_realm(kvm)) {
>>>>> + ret = realm_map_ipa(kvm, s2fd->fault_ipa, pfn, mapping_size,
>>>>> + prot, memcache);
>>>>> } else {
>>>>> ret = KVM_PGT_FN(kvm_pgtable_stage2_map)(pgt, gfn_to_gpa(gfn), mapping_size,
>>>>> __pfn_to_phys(pfn), prot,
>>>>
>>>> For the case kvm_is_realm(), need we adjust 's2fd->fault_ipa' for the sake of
>>>> huge pages. In kvm_s2_fault_map(), @gfn and @pfn may have been adjusted by
>>>> transparent_hugepage_adjust() to be aligned with huge page size. If the
>>>> adjustment happened in transparent_hugepage_adjust(), we need to align
>>>> s2fd->fault_ipa down to the huge page size either.
>>>
>>> All of the above + some RMM changes are needed to get QEmu VMM going
>>> with anon pages guest memory backing - currently testing various
>>> configurations in the background.
>>>
>>
>> I tried to rebase Jean's latest QEMU series [1] to upstream QEMU, and found
>> that memory slots backed by THP are broken. With THP disabled on the host and
>> other fixes (mentioned in my prevous replies) applied on the top of this (v14)
>> series, I'm able to boot a realm guest with rebased QEMU series [2], plus more
>> fxies on the top.
>>
>> [1] https://git.codelinaro.org/linaro/dcap/qemu.git (branch: cca/latest)
>> [2] https://git.qemu.org/git/qemu.git (branch: cca/gavin)
>>
>> Lorenzo, You may be saying there is someone making QEMU to support ARM/CCA?
>
> Mathieu and I are working on that yes and with Steven/Suzuki to fix the THP
> issues you pointed out above.
>
>> If so, I'm not sure if there is a QEMU repository for me to try?
>
> We should be able to submit patches by end of June - we shall let you know
> whether we can make something available earlier.
>
Not sure if there are other known issues in this series. It seems the stage2
page fault handling on the shared space isn't working well. In my test, the
vring (struct vring_desc) of virtio-net-pci is updated by the guest, and the
data isn't seen by QEMU, I'm suspecting if the host-page-frame-number is properly
resolved in the s2 page fault handler for shared (unprotected) space.
- I rebased Jean's latest qemu branch to the upstream qemu;
- On the host, which is emulated by qemu/tcg, the THP (transparent huge page) is
disabled.
- On the guest, I can see the virtio vring (struct vring_desc) is updated. The
S1 page-table entry looks correct because the corresponding physical address
0x10046880000 is a sane shared (unprotected) space address.
[ 52.094143] software IO TLB: Memory encryption is active and system is using DMA bounce buffers
[ 52.289746] virtqueue_add_desc_split: desc[0]@0xffff000006880000, [00000100b983f000 00000640 0002 0001]
[ 52.432150] PTE 0x00e8010046880707 at address 0xffff000006880000
- On the host, the s2 page-table-entry is unmapped due to attribute transition (private -> shared).
A subsequent S2 page fault is raised against the adress and the s2 page-table-entry is built.
[ 109.259077] ====> realm_unmap_shared_range: tracked_unprot_addr=0x10046880000
[ 109.260249] realm_unmap_shared_range: unmapped shared range at 0x10046880000
[ 109.317786] realm_unmap_shared_range: unmapped shared range at 0x10046880000
[ 109.629939] ====> kvm_handle_guest_abort: fault_ipa=0x10046880000, esr=0x92000007
[ 109.630245] realm_map_non_secure: ipa=0x10046880000, pfn=0xb8b59, size=0x1000, prot=0xf
[ 109.630331] realm_map_non_secure: ipa=0x10046880000, ipa_top=0x10046881000, flags=0x1e0001, range_desc=0xb8b59004
- On QEMU, the updated vring (struct vring_desc) at GPA 0x46880000 isn't seen. All the
data in that adress are zeros.
====> virtqueue_split_pop: vdev=<virtio-net>, sz=0x38, queue_index=0x0, vq->vring.num=0x100
virtqueue_split_pop: last_avail_idx=0x0, head=0x0
address_space_read_cached_slow: cache@0xffff1c036440, addr=0x0, buf=0xffffeee34880, len=0x10
address_space_read_cached_slow: cache: ptr=0x0, xlat=0x10046880000, len=0x1000, mrs=<realm-dma-region>, is_write=no
address_space_read_cached_slow: translated to mr=<mach-virt.ram>, mr_addr=0x6880000, l=0x10
flatview_read_continue_step: mr=<mach-virt.ram>, host=0xffff23e00000, mr_addr=0x6880000, ram_ptr=0xffff2a680000
virtqueue_split_pop: desc: 0000000000000000 - 00000000 - 00000000 - 00000000
qemu-system-aarch64: virtio: zero sized buffers are not allowed
Thanks,
Gavin
^ permalink raw reply
* Re: [PATCH v4 1/3] perf: marvell: Add MPAM partid filtering to CN10K TAD PMU
From: Ben Horgan @ 2026-06-25 13:53 UTC (permalink / raw)
To: Geetha sowjanya, linux-perf-users, linux-kernel, linux-arm-kernel,
devicetree
Cc: mark.rutland, will, krzk+dt, james.morse
In-Reply-To: <20260618153610.13649-2-gakula@marvell.com>
Hi Geetha,
+CC James
On 6/18/26 16:36, Geetha sowjanya wrote:
> From: Tanmay Jagdale <tanmay@marvell.com>
>
> The TAD PMU exposes counters that can be filtered by MPAM partition id
> for a subset of allocation and hit events.
>
> Add a 9-bit partid format attribute (config1) and route counter programming
> through variant-specific ops so CN10K keeps MPAM-capable programming while
> Odyssey keeps the reduced event set without advertising partid in sysfs.
>
> Probe no longer mutates the platform_device MMIO resource (walk a local
> map_start), rejects tad-cnt / page sizes of zero, validates the memory
> window against tad-cnt, and registers the perf PMU before hotplug with
> correct unwind.
>
> Example:
> perf stat -e tad/tad_alloc_any,partid=0x12,partid_en=1/ -- <program>
Where is the user expected to get the PARTID from? The MPAM driver
considers the PARTID as an internal only value.
resctrl does support a 'debug' mount option which will show the CLOSID
associated with a control group. Whilst the CLOSID is often the PARTID,
it is really a set of PARTIDs. When the cdp mount option is used, CLOSID
maps to 2 PARTIDs and if we use PARTID narrowing to give us more
monitors, as in proposed in [1], then the set of PARTIDs may be bigger.
Furthermore, if the PARTID narrowing scheme is made dynamic the size of
the PARTID set may change when control or monitoring groups are created
or deleted.
It seems that a way to map from a resctrl control group to the set of
PARTIDs is required and a mechanism to tie this to lifetime of the
resctrl mount.
Perhaps some helpers along the lines of:
int resctrl_mount_generation(void)
int mpam_rdtgrp_to_partid_is_static(int mount_gen)
int resctrl_rdtgrp_generation(char *name)
int mpam_rdtgrp_to_partid_count(char *name, int rdt_gen)
int mpam_rdtgrp_to_partid_array(char *name, int rdt_gen, int* partids)
The rdtgrp generation is to an attempt to avoid having to use a debug
interface in anger and cope with renaming of control groups in resctrl.
This does seem a bit unwieldly so hopefully there is better way to do this.
Sorry to throw a spanner in the works.
Thanks,
Ben
>
> Signed-off-by: Tanmay Jagdale <tanmay@marvell.com>
> Signed-off-by: Geetha sowjanya <gakula@marvell.com>
> ---
>
> Changelog (since v3)
> --------------------
> - Restore cpuhp_state_add_instance_nocalls before perf_pmu_register in probe
> so users cannot attach events before the hotplug instance exists; unwind
> removes the hotplug instance if perf registration fails.
> - Add perf_ready: tad_pmu_offline_cpu skips perf_pmu_migrate_context until after
> successful perf_pmu_register, so a CPU offline between hotplug add and perf
> register does not touch perf core state for an unregistered PMU.
>
> Changelog (since v2)
> --------------------
> - Validate the eventId using an appropriate mask to ensure
> it is restricted to 8 bits.
>
> Changelog (since v1)
> --------------------
> - Fix config1 filter enable to use bit 9 consistently with the PMU format
> string (partid_en) and reject reserved bits with GENMASK(9, 0).
> - Register perf_pmu_register before cpuhp_state_add_instance_nocalls and
> unregister on hotplug failure.
>
> drivers/perf/marvell_cn10k_tad_pmu.c | 220 +++++++++++++++++++++------
> 1 file changed, 171 insertions(+), 49 deletions(-)
>
> diff --git a/drivers/perf/marvell_cn10k_tad_pmu.c b/drivers/perf/marvell_cn10k_tad_pmu.c
> index 51ccb0befa05..340be3776fe7 100644
> --- a/drivers/perf/marvell_cn10k_tad_pmu.c
> +++ b/drivers/perf/marvell_cn10k_tad_pmu.c
> @@ -7,6 +7,8 @@
> #define pr_fmt(fmt) "tad_pmu: " fmt
>
> #include <linux/io.h>
> +#include <linux/bits.h>
> +#include <linux/compiler.h>
> #include <linux/module.h>
> #include <linux/of.h>
> #include <linux/cpuhotplug.h>
> @@ -14,12 +16,20 @@
> #include <linux/platform_device.h>
> #include <linux/acpi.h>
>
> -#define TAD_PFC_OFFSET 0x800
> -#define TAD_PFC(counter) (TAD_PFC_OFFSET | (counter << 3))
> #define TAD_PRF_OFFSET 0x900
> -#define TAD_PRF(counter) (TAD_PRF_OFFSET | (counter << 3))
> +#define TAD_PFC_OFFSET 0x800
> +#define TAD_PFC(base, counter) ((base) | ((u64)(counter) << 3))
> +#define TAD_PRF(base, counter) ((base) | ((u64)(counter) << 3))
> #define TAD_PRF_CNTSEL_MASK 0xFF
> +#define TAD_PRF_MATCH_PARTID BIT(8)
> +#define TAD_PRF_PARTID_NS BIT(10)
> +/*
> + * config1: bits 0..8 MPAM partition id (including 0); bit 9 requests
> + * filtering for MPAM-capable events. All-zero config1 means no filter.
> + */
> +#define TAD_PARTID_FILTER_EN BIT(9)
> #define TAD_MAX_COUNTERS 8
> +#define TAD_EVENT_SEL_MASK GENMASK(7, 0)
>
> #define to_tad_pmu(p) (container_of(p, struct tad_pmu, pmu))
>
> @@ -27,30 +37,94 @@ struct tad_region {
> void __iomem *base;
> };
>
> +enum mrvl_tad_pmu_version {
> + TAD_PMU_V1 = 1,
> + TAD_PMU_V2,
> +};
> +
> +struct tad_pmu_data {
> + int id;
> + u64 tad_prf_offset;
> + u64 tad_pfc_offset;
> +};
> +
> struct tad_pmu {
> struct pmu pmu;
> struct tad_region *regions;
> u32 region_cnt;
> unsigned int cpu;
> + /* Set after successful perf_pmu_register(); gates offline migration. */
> + bool perf_ready;
> + const struct tad_pmu_ops *ops;
> + const struct tad_pmu_data *pdata;
> struct hlist_node node;
> struct perf_event *events[TAD_MAX_COUNTERS];
> DECLARE_BITMAP(counters_map, TAD_MAX_COUNTERS);
> };
>
> -enum mrvl_tad_pmu_version {
> - TAD_PMU_V1 = 1,
> - TAD_PMU_V2,
> -};
> -
> -struct tad_pmu_data {
> - int id;
> +struct tad_pmu_ops {
> + void (*start_counter)(struct tad_pmu *pmu, struct perf_event *event);
> };
>
> static int tad_pmu_cpuhp_state;
>
> +static void tad_pmu_start_counter(struct tad_pmu *pmu,
> + struct perf_event *event)
> +{
> + const struct tad_pmu_data *pdata = pmu->pdata;
> + struct hw_perf_event *hwc = &event->hw;
> + u32 event_idx = (u32)(event->attr.config & TAD_EVENT_SEL_MASK);
> + u32 counter_idx = hwc->idx;
> + u64 partid_filter = 0;
> + u64 reg_val;
> + u64 cfg1 = event->attr.config1;
> + bool use_mpam = cfg1 & TAD_PARTID_FILTER_EN;
> + u32 partid = (u32)(cfg1 & GENMASK(8, 0));
> + int i;
> +
> + for (i = 0; i < pmu->region_cnt; i++)
> + writeq_relaxed(0, pmu->regions[i].base +
> + TAD_PFC(pdata->tad_pfc_offset, counter_idx));
> +
> + if (use_mpam && event_idx > 0x19 && event_idx < 0x21) {
> + partid_filter = TAD_PRF_MATCH_PARTID | TAD_PRF_PARTID_NS |
> + ((u64)partid << 11);
> + }
> +
> +
> + for (i = 0; i < pmu->region_cnt; i++) {
> + reg_val = event_idx & 0xFF;
> + reg_val |= partid_filter;
> + writeq_relaxed(reg_val, pmu->regions[i].base +
> + TAD_PRF(pdata->tad_prf_offset, counter_idx));
> + }
> +}
> +
> +static void tad_pmu_v2_start_counter(struct tad_pmu *pmu,
> + struct perf_event *event)
> +{
> + const struct tad_pmu_data *pdata = pmu->pdata;
> + struct hw_perf_event *hwc = &event->hw;
> + u32 event_idx = (u32)(event->attr.config & TAD_EVENT_SEL_MASK);
> + u32 counter_idx = hwc->idx;
> + u64 reg_val;
> + int i;
> +
> + for (i = 0; i < pmu->region_cnt; i++)
> + writeq_relaxed(0, pmu->regions[i].base +
> + TAD_PFC(pdata->tad_pfc_offset, counter_idx));
> +
> + for (i = 0; i < pmu->region_cnt; i++) {
> + reg_val = event_idx & 0xFF;
> + writeq_relaxed(reg_val, pmu->regions[i].base +
> + TAD_PRF(pdata->tad_prf_offset, counter_idx));
> + }
> +}
> +
> static void tad_pmu_event_counter_read(struct perf_event *event)
> {
> struct tad_pmu *tad_pmu = to_tad_pmu(event->pmu);
> + const struct tad_pmu_data *pdata = tad_pmu->pdata;
> struct hw_perf_event *hwc = &event->hw;
> u32 counter_idx = hwc->idx;
> u64 prev, new;
> @@ -60,7 +134,7 @@ static void tad_pmu_event_counter_read(struct perf_event *event)
> prev = local64_read(&hwc->prev_count);
> for (i = 0, new = 0; i < tad_pmu->region_cnt; i++)
> new += readq(tad_pmu->regions[i].base +
> - TAD_PFC(counter_idx));
> + TAD_PFC(pdata->tad_pfc_offset, counter_idx));
> } while (local64_cmpxchg(&hwc->prev_count, prev, new) != prev);
>
> local64_add(new - prev, &event->count);
> @@ -69,16 +143,14 @@ static void tad_pmu_event_counter_read(struct perf_event *event)
> static void tad_pmu_event_counter_stop(struct perf_event *event, int flags)
> {
> struct tad_pmu *tad_pmu = to_tad_pmu(event->pmu);
> + const struct tad_pmu_data *pdata = tad_pmu->pdata;
> struct hw_perf_event *hwc = &event->hw;
> u32 counter_idx = hwc->idx;
> int i;
>
> - /* TAD()_PFC() stop counting on the write
> - * which sets TAD()_PRF()[CNTSEL] == 0
> - */
> for (i = 0; i < tad_pmu->region_cnt; i++) {
> writeq_relaxed(0, tad_pmu->regions[i].base +
> - TAD_PRF(counter_idx));
> + TAD_PRF(pdata->tad_prf_offset, counter_idx));
> }
>
> tad_pmu_event_counter_read(event);
> @@ -89,26 +161,10 @@ static void tad_pmu_event_counter_start(struct perf_event *event, int flags)
> {
> struct tad_pmu *tad_pmu = to_tad_pmu(event->pmu);
> struct hw_perf_event *hwc = &event->hw;
> - u32 event_idx = event->attr.config;
> - u32 counter_idx = hwc->idx;
> - u64 reg_val;
> - int i;
>
> hwc->state = 0;
>
> - /* Typically TAD_PFC() are zeroed to start counting */
> - for (i = 0; i < tad_pmu->region_cnt; i++)
> - writeq_relaxed(0, tad_pmu->regions[i].base +
> - TAD_PFC(counter_idx));
> -
> - /* TAD()_PFC() start counting on the write
> - * which sets TAD()_PRF()[CNTSEL] != 0
> - */
> - for (i = 0; i < tad_pmu->region_cnt; i++) {
> - reg_val = event_idx & 0xFF;
> - writeq_relaxed(reg_val, tad_pmu->regions[i].base +
> - TAD_PRF(counter_idx));
> - }
> + tad_pmu->ops->start_counter(tad_pmu, event);
> }
>
> static void tad_pmu_event_counter_del(struct perf_event *event, int flags)
> @@ -128,7 +184,6 @@ static int tad_pmu_event_counter_add(struct perf_event *event, int flags)
> struct hw_perf_event *hwc = &event->hw;
> int idx;
>
> - /* Get a free counter for this event */
> idx = find_first_zero_bit(tad_pmu->counters_map, TAD_MAX_COUNTERS);
> if (idx == TAD_MAX_COUNTERS)
> return -EAGAIN;
> @@ -148,6 +203,9 @@ static int tad_pmu_event_counter_add(struct perf_event *event, int flags)
> static int tad_pmu_event_init(struct perf_event *event)
> {
> struct tad_pmu *tad_pmu = to_tad_pmu(event->pmu);
> + const struct tad_pmu_data *pdata = tad_pmu->pdata;
> + u32 event_idx = (u32)(event->attr.config & TAD_EVENT_SEL_MASK);
> + u64 cfg1 = event->attr.config1;
>
> if (event->attr.type != event->pmu->type)
> return -ENOENT;
> @@ -158,6 +216,23 @@ static int tad_pmu_event_init(struct perf_event *event)
> if (event->state != PERF_EVENT_STATE_OFF)
> return -EINVAL;
>
> + if (event->attr.config & ~TAD_EVENT_SEL_MASK)
> + return -EINVAL;
> +
> + if (pdata->id == TAD_PMU_V2) {
> + if (cfg1)
> + return -EINVAL;
> + } else {
> + if ((cfg1 & GENMASK(8, 0)) && !(cfg1 & TAD_PARTID_FILTER_EN))
> + return -EINVAL;
> + if (cfg1 & TAD_PARTID_FILTER_EN) {
> + if (event_idx <= 0x19 || event_idx >= 0x21)
> + return -EINVAL;
> + }
> + if (cfg1 & ~GENMASK(9, 0))
> + return -EINVAL;
> + }
> +
> event->cpu = tad_pmu->cpu;
> event->hw.idx = -1;
> event->hw.config_base = event->attr.config;
> @@ -232,7 +307,7 @@ static struct attribute *ody_tad_pmu_event_attrs[] = {
> TAD_PMU_EVENT_ATTR(tad_hit_ltg, 0x1e),
> TAD_PMU_EVENT_ATTR(tad_hit_any, 0x1f),
> TAD_PMU_EVENT_ATTR(tad_tag_rd, 0x20),
> - TAD_PMU_EVENT_ATTR(tad_tot_cycle, 0xFF),
> + TAD_PMU_EVENT_ATTR(tad_tot_cycle, 0xff),
> NULL
> };
>
> @@ -242,9 +317,13 @@ static const struct attribute_group ody_tad_pmu_events_attr_group = {
> };
>
> PMU_FORMAT_ATTR(event, "config:0-7");
> +PMU_FORMAT_ATTR(partid, "config1:0-8");
> +PMU_FORMAT_ATTR(partid_en, "config1:9-9");
>
> static struct attribute *tad_pmu_format_attrs[] = {
> &format_attr_event.attr,
> + &format_attr_partid.attr,
> + &format_attr_partid_en.attr,
> NULL
> };
>
> @@ -253,6 +332,16 @@ static struct attribute_group tad_pmu_format_attr_group = {
> .attrs = tad_pmu_format_attrs,
> };
>
> +static struct attribute *ody_tad_pmu_format_attrs[] = {
> + &format_attr_event.attr,
> + NULL
> +};
> +
> +static struct attribute_group ody_tad_pmu_format_attr_group = {
> + .name = "format",
> + .attrs = ody_tad_pmu_format_attrs,
> +};
> +
> static ssize_t tad_pmu_cpumask_show(struct device *dev,
> struct device_attribute *attr, char *buf)
> {
> @@ -281,16 +370,25 @@ static const struct attribute_group *tad_pmu_attr_groups[] = {
>
> static const struct attribute_group *ody_tad_pmu_attr_groups[] = {
> &ody_tad_pmu_events_attr_group,
> - &tad_pmu_format_attr_group,
> + &ody_tad_pmu_format_attr_group,
> &tad_pmu_cpumask_attr_group,
> NULL
> };
>
> +static const struct tad_pmu_ops tad_pmu_ops = {
> + .start_counter = tad_pmu_start_counter,
> +};
> +
> +static const struct tad_pmu_ops tad_pmu_v2_ops = {
> + .start_counter = tad_pmu_v2_start_counter,
> +};
> +
> static int tad_pmu_probe(struct platform_device *pdev)
> {
> const struct tad_pmu_data *dev_data;
> struct device *dev = &pdev->dev;
> struct tad_region *regions;
> + resource_size_t map_start;
> struct tad_pmu *tad_pmu;
> struct resource *res;
> u32 tad_pmu_page_size;
> @@ -298,7 +396,6 @@ static int tad_pmu_probe(struct platform_device *pdev)
> u32 tad_cnt;
> int version;
> int i, ret;
> - char *name;
>
> tad_pmu = devm_kzalloc(&pdev->dev, sizeof(*tad_pmu), GFP_KERNEL);
> if (!tad_pmu)
> @@ -312,6 +409,7 @@ static int tad_pmu_probe(struct platform_device *pdev)
> return -ENODEV;
> }
> version = dev_data->id;
> + tad_pmu->pdata = dev_data;
>
> res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> if (!res) {
> @@ -338,22 +436,31 @@ static int tad_pmu_probe(struct platform_device *pdev)
> dev_err(&pdev->dev, "Can't find tad-cnt property\n");
> return ret;
> }
> + if (!tad_cnt || !tad_page_size || !tad_pmu_page_size) {
> + dev_err(&pdev->dev, "Invalid tad-cnt or page size\n");
> + return -EINVAL;
> + }
>
> regions = devm_kcalloc(&pdev->dev, tad_cnt,
> sizeof(*regions), GFP_KERNEL);
> if (!regions)
> return -ENOMEM;
>
> - /* ioremap the distributed TAD pmu regions */
> - for (i = 0; i < tad_cnt && res->start < res->end; i++) {
> - regions[i].base = devm_ioremap(&pdev->dev,
> - res->start,
> + map_start = res->start;
> + for (i = 0; i < tad_cnt; i++) {
> + if (map_start > res->end ||
> + tad_pmu_page_size > (resource_size_t)(res->end - map_start + 1)) {
> + dev_err(&pdev->dev, "TAD PMU mem window too small for tad-cnt=%u\n",
> + tad_cnt);
> + return -EINVAL;
> + }
> + regions[i].base = devm_ioremap(&pdev->dev, map_start,
> tad_pmu_page_size);
> if (!regions[i].base) {
> dev_err(&pdev->dev, "TAD%d ioremap fail\n", i);
> return -ENOMEM;
> }
> - res->start += tad_page_size;
> + map_start += tad_page_size;
> }
>
> tad_pmu->regions = regions;
> @@ -374,14 +481,16 @@ static int tad_pmu_probe(struct platform_device *pdev)
> .read = tad_pmu_event_counter_read,
> };
>
> - if (version == TAD_PMU_V1)
> + if (version == TAD_PMU_V1) {
> tad_pmu->pmu.attr_groups = tad_pmu_attr_groups;
> - else
> + tad_pmu->ops = &tad_pmu_ops;
> + } else {
> tad_pmu->pmu.attr_groups = ody_tad_pmu_attr_groups;
> + tad_pmu->ops = &tad_pmu_v2_ops;
> + }
>
> tad_pmu->cpu = raw_smp_processor_id();
>
> - /* Register pmu instance for cpu hotplug */
> ret = cpuhp_state_add_instance_nocalls(tad_pmu_cpuhp_state,
> &tad_pmu->node);
> if (ret) {
> @@ -389,19 +498,24 @@ static int tad_pmu_probe(struct platform_device *pdev)
> return ret;
> }
>
> - name = "tad";
> - ret = perf_pmu_register(&tad_pmu->pmu, name, -1);
> - if (ret)
> + ret = perf_pmu_register(&tad_pmu->pmu, "tad", -1);
> + if (ret) {
> + dev_err(&pdev->dev, "Error %d registering perf PMU\n", ret);
> cpuhp_state_remove_instance_nocalls(tad_pmu_cpuhp_state,
> &tad_pmu->node);
> + return ret;
> + }
>
> - return ret;
> + WRITE_ONCE(tad_pmu->perf_ready, true);
> +
> + return 0;
> }
>
> static void tad_pmu_remove(struct platform_device *pdev)
> {
> struct tad_pmu *pmu = platform_get_drvdata(pdev);
>
> + WRITE_ONCE(pmu->perf_ready, false);
> cpuhp_state_remove_instance_nocalls(tad_pmu_cpuhp_state,
> &pmu->node);
> perf_pmu_unregister(&pmu->pmu);
> @@ -410,12 +524,17 @@ static void tad_pmu_remove(struct platform_device *pdev)
> #if defined(CONFIG_OF) || defined(CONFIG_ACPI)
> static const struct tad_pmu_data tad_pmu_data = {
> .id = TAD_PMU_V1,
> + .tad_prf_offset = TAD_PRF_OFFSET,
> + .tad_pfc_offset = TAD_PFC_OFFSET,
> };
> +
> #endif
>
> #ifdef CONFIG_ACPI
> static const struct tad_pmu_data tad_pmu_v2_data = {
> .id = TAD_PMU_V2,
> + .tad_prf_offset = TAD_PRF_OFFSET,
> + .tad_pfc_offset = TAD_PFC_OFFSET,
> };
> #endif
>
> @@ -451,6 +570,9 @@ static int tad_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
> struct tad_pmu *pmu = hlist_entry_safe(node, struct tad_pmu, node);
> unsigned int target;
>
> + if (!READ_ONCE(pmu->perf_ready))
> + return 0;
> +
> if (cpu != pmu->cpu)
> return 0;
>
> @@ -491,6 +613,6 @@ static void __exit tad_pmu_exit(void)
> module_init(tad_pmu_init);
> module_exit(tad_pmu_exit);
>
> -MODULE_DESCRIPTION("Marvell CN10K LLC-TAD Perf driver");
> +MODULE_DESCRIPTION("Marvell CN10K LLC-TAD perf driver");
> MODULE_AUTHOR("Bhaskara Budiredla <bbudiredla@marvell.com>");
> MODULE_LICENSE("GPL v2");
^ permalink raw reply
* Re: [PATCH] mtd: rawnand: lpc32xx_mlc: fail DMA transfers on timeout
From: Vladimir Zapolskiy @ 2026-06-25 13:42 UTC (permalink / raw)
To: Pengpeng Hou, Miquel Raynal
Cc: Richard Weinberger, Vignesh Raghavendra, Piotr Wojtaszczyk,
linux-mtd, linux-arm-kernel, linux-kernel
In-Reply-To: <20260625003327.11060-1-pengpeng@iscas.ac.cn>
On 6/25/26 03:33, Pengpeng Hou wrote:
> lpc32xx_xmit_dma() starts a DMA transfer and waits up to one second for
> its completion, but it ignores the wait result and returns success after
> unmapping the buffer.
>
> A timed out read can therefore return success with incomplete data, and a
> timed out write can continue the NAND operation without proof that the DMA
> payload reached the controller.
>
> Terminate the DMA channel on timeout, unmap the scatterlist through the
> existing cleanup path, and return -ETIMEDOUT to the NAND read/write
> callers.
>
> Signed-off-by: Pengpeng Hou <pengpeng@iscas.ac.cn>
> ---
> drivers/mtd/nand/raw/lpc32xx_mlc.c | 12 ++++++++++--
> 1 file changed, 10 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/mtd/nand/raw/lpc32xx_mlc.c b/drivers/mtd/nand/raw/lpc32xx_mlc.c
> index 19b13ae53..8f6a89d9b 100644
> --- a/drivers/mtd/nand/raw/lpc32xx_mlc.c
> +++ b/drivers/mtd/nand/raw/lpc32xx_mlc.c
> @@ -396,6 +396,7 @@ static int lpc32xx_xmit_dma(struct mtd_info *mtd, void *mem, int len,
> struct lpc32xx_nand_host *host = nand_get_controller_data(chip);
> struct dma_async_tx_descriptor *desc;
> int flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT;
> + unsigned long time_left;
> int res;
>
> sg_init_one(&host->sgl, mem, len);
> @@ -410,6 +411,7 @@ static int lpc32xx_xmit_dma(struct mtd_info *mtd, void *mem, int len,
> flags);
> if (!desc) {
> dev_err(mtd->dev.parent, "Failed to prepare slave sg\n");
> + res = -ENXIO;
> goto out1;
> }
>
> @@ -420,7 +422,13 @@ static int lpc32xx_xmit_dma(struct mtd_info *mtd, void *mem, int len,
> dmaengine_submit(desc);
> dma_async_issue_pending(host->dma_chan);
>
> - wait_for_completion_timeout(&host->comp_dma, msecs_to_jiffies(1000));
> + time_left = wait_for_completion_timeout(&host->comp_dma,
> + msecs_to_jiffies(1000));
> + if (!time_left) {
> + dmaengine_terminate_sync(host->dma_chan);
> + res = -ETIMEDOUT;
> + goto out1;
> + }
>
> dma_unmap_sg(host->dma_chan->device->dev, &host->sgl, 1,
> DMA_BIDIRECTIONAL);
> @@ -428,7 +436,7 @@ static int lpc32xx_xmit_dma(struct mtd_info *mtd, void *mem, int len,
> out1:
> dma_unmap_sg(host->dma_chan->device->dev, &host->sgl, 1,
> DMA_BIDIRECTIONAL);
> - return -ENXIO;
> + return res;
> }
>
> static int lpc32xx_read_page(struct nand_chip *chip, uint8_t *buf,
Thank you for the change.
Reviewed-by: Vladimir Zapolskiy <vz@kernel.org>
--
Best wishes,
Vladimir
^ permalink raw reply
* Re: [PATCH v5 1/7] KVM: arm64: Enforce strict SBZ checks in the FF-A proxy
From: Will Deacon @ 2026-06-25 13:16 UTC (permalink / raw)
To: Sebastian Ene
Cc: catalin.marinas, maz, oupton, joey.gouly, korneld, kvmarm,
linux-arm-kernel, linux-kernel, android-kvm, mrigendra.chaubey,
perlarsen, suzuki.poulose, vdonnefort, yuzenghui
In-Reply-To: <20260623115354.632361-2-sebastianene@google.com>
Hi all,
On Tue, Jun 23, 2026 at 11:53:48AM +0000, Sebastian Ene wrote:
> Introduce a helper method ffa_check_unused_args_sbz to enforce strict
> arguments checking when the hypervisor acts as a relayer between the
> host and Trustzone.
>
> Signed-off-by: Sebastian Ene <sebastianene@google.com>
> Reviewed-by: Vincent Donnefort <vdonnefort@google.com>
> ---
> arch/arm64/kvm/hyp/nvhe/ffa.c | 54 +++++++++++++++++++++++++++++++++++
> 1 file changed, 54 insertions(+)
>
> diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c
> index 1af722771178..78bb043b33ee 100644
> --- a/arch/arm64/kvm/hyp/nvhe/ffa.c
> +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c
> @@ -71,6 +71,20 @@ static u32 hyp_ffa_version;
> static bool has_version_negotiated;
> static hyp_spinlock_t version_lock;
>
> +static bool ffa_check_unused_args_sbz(struct kvm_cpu_context *ctxt, int first_reg)
> +{
> + DECLARE_REG(u32, func_id, ctxt, 0);
> + int reg, end_reg;
> +
> + end_reg = ARM_SMCCC_IS_64(func_id) ? 17 : 7;
> + for (reg = first_reg; reg <= end_reg; reg++) {
> + if (cpu_reg(ctxt, reg))
> + return true;
> + }
> +
> + return false;
> +}
Seb and I tried taking this for a spin on some Android devices and, sadly,
it leads to fireworks. The reason is that the FF-A spec quietly changed
the list of unused parameter registers for 64-bit SMCs from v1.1 to v1.2
of the spec so that pre-existing calls were affected.
For example, in v1.1 a 64-bit RXTX_MAP only has x4-x7 as MBZ, whereas in
v1.2 the same call has x4-x17 as SBZ.
We can follow the spec by predicating the additional check on the FF-A
version being >= 1.2, but I'm not hopeful that existing drivers are
compliant. I also suggest moving this patch to the end of the series in
case we need to revert it.
Cheers,
Will
^ permalink raw reply
* [PATCH v1] KVM: Ignore MMU notifiers for guest_memfd-only memslots
From: Alexandru Elisei @ 2026-06-25 13:09 UTC (permalink / raw)
To: pbonzini, kvm, linux-kernel, maz, oupton, suzuki.poulose, kvmarm,
linux-arm-kernel, seanjc, david.hildenbrand, mark.rutland,
ackerleytng
For guest_memfd-only memslots (kvm_memslot_is_gmem_only() is true), the
memory provider for the virtual machine is the guest_memfd file, not the
userspace mapping. Mappings in the secondary MMU are established by
obtaining folios from guest_memfd directly, not by looking the folios up
through the page tables through GUP. Consequently, there is no relationship
between the page tables and the secondary MMU: MMU notifiers do not apply.
Despite this, KVM's MMU notifiers still modify the secondary MMU page
tables, only for the same memory to be remapped the next time a guest
accesses it. Make the disconnect between the user mapping and the secondary
MMU page tables explicit by ignoring the MMU notifiers for guest_memfd-only
memslots.
Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
---
RFC can be found here [1].
The only theoretical instance where the MMU notifiers are invoked for the
userspace mapping of a guest_memfd-only memslot that I was able to find was
automatic NUMA balancing with a non-NULL NUMA policy for the guest_memfd
file. I wasn't able to test it in practice. Ackerley Tng also mentioned
that this change would fix double unmap on a shared to private in-place
conversion of the guest_memfd memory [2].
When and if it happens, having memory unmapped from the seconday MMU in the
case of a guest_memfd-only memslot is at most a performance issue (it
causes unnecessary guest faults), but having memory that stays mapped at
stage 2 (unless userspace explicitly unmaps it from the VM) is needed for a
Arm feature (called SPE, Statistical Profiling Extension) that I'm working
to upstream. This patch aims to provide the guarantee that memory won't be
unmapped from the secondary MMU without the VMM explicitely triggering it
(by punching a hole or closing the guest_memfd file).
Ran a basic test by hacking KVM_PRE_FAULT_MEMORY for arm64, and modifying
kvmtool to apply it on the entire VM memory, then munmap the same memory
from its page tables. Also hacked guest_memfd + GUEST_MEMFD_FLAG_MMAP
support in kvmtool. Put traces in the arm64 fault handling code and printfs
in kvm_mmu_unmap_gfn_range(). When running a guest, KVM doesn't unmap the
memory from the secondary MMU when kvmtool munmaps it; all the faults
triggered by the guest on the guest_memfd backed memslots are instruction
faults; and KVM unmaps the guest memory from the secondary MMU when the
guest_memfd file is closed by userspace. Looks correct to me.
Changes in RFC -> v1:
* Dropped the RFC tag.
* Fix unbalanced invalidation reported by sashiko by implementing Sean's
approach; I've expanded it to page ageing.
* Modified the commit message as per DavidH comment.
[1] https://lore.kernel.org/kvm/20260615155244.183044-1-alexandru.elisei@arm.com/
[2] https://lore.kernel.org/kvm/CAEvNRgE9cLfjDbXuR5wq3fEWZyHxYPxdExxNjXUFO1nT5m==1A@mail.gmail.com/
include/linux/kvm_host.h | 1 +
virt/kvm/kvm_main.c | 50 ++++++++++++++++++++++++++++++++++++----
2 files changed, 47 insertions(+), 4 deletions(-)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 4c14aee1fb06..483ad9fe8fb7 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -260,6 +260,7 @@ union kvm_mmu_notifier_arg {
enum kvm_gfn_range_filter {
KVM_FILTER_SHARED = BIT(0),
KVM_FILTER_PRIVATE = BIT(1),
+ KVM_FILTER_USERSPACE_MAPPINGS = BIT(2),
};
struct kvm_gfn_range {
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 881f92d7a469..204e7faa325a 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -607,8 +607,13 @@ static __always_inline kvm_mn_ret_t kvm_handle_hva_range(struct kvm *kvm,
/*
* HVA-based notifications aren't relevant to private
* mappings as they don't have a userspace mapping.
+ *
+ * Memslots where guest_memfd is the only memory
+ * provider can also safely ignore changes to the
+ * userspace mapping.
*/
- gfn_range.attr_filter = KVM_FILTER_SHARED;
+ gfn_range.attr_filter = KVM_FILTER_SHARED |
+ KVM_FILTER_USERSPACE_MAPPINGS;
/*
* {gfn(page) | page intersects with [hva_start, hva_end)} =
@@ -715,6 +720,21 @@ void kvm_mmu_invalidate_range_add(struct kvm *kvm, gfn_t start, gfn_t end)
bool kvm_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
{
kvm_mmu_invalidate_range_add(kvm, range->start, range->end);
+
+ /*
+ * When reacting to changes in userspace mappings, don't unmap memslots
+ * that are guest_memfd-only, in which case KVM's MMU mappings are
+ * pulled directly from guest_memfd, i.e. don't depend on the userspace
+ * mappings.
+ *
+ * TODO: Skip gmem-only memslots on mmu_notifier events entirely, once
+ * gfn_to_pfn_cache is also wired up to directly pull from guest_memfd.
+ */
+ if (range->attr_filter & KVM_FILTER_USERSPACE_MAPPINGS &&
+ kvm_slot_has_gmem(range->slot) &&
+ kvm_memslot_is_gmem_only(range->slot))
+ return false;
+
return kvm_unmap_gfn_range(kvm, range);
}
@@ -825,12 +845,23 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
rcuwait_wake_up(&kvm->mn_memslots_update_rcuwait);
}
+static bool kvm_mmu_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+ /* See comment in kvm_mmu_unmap_gfn_range() */
+ if (range->attr_filter & KVM_FILTER_USERSPACE_MAPPINGS &&
+ kvm_slot_has_gmem(range->slot) &&
+ kvm_memslot_is_gmem_only(range->slot))
+ return false;
+
+ return kvm_age_gfn(kvm, range);
+}
+
static bool kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
struct mm_struct *mm, unsigned long start, unsigned long end)
{
trace_kvm_age_hva(start, end);
- return kvm_age_hva_range(mn, start, end, kvm_age_gfn,
+ return kvm_age_hva_range(mn, start, end, kvm_mmu_age_gfn,
!IS_ENABLED(CONFIG_KVM_ELIDE_TLB_FLUSH_IF_YOUNG));
}
@@ -852,7 +883,18 @@ static bool kvm_mmu_notifier_clear_young(struct mmu_notifier *mn,
* cadence. If we find this inaccurate, we might come up with a
* more sophisticated heuristic later.
*/
- return kvm_age_hva_range_no_flush(mn, start, end, kvm_age_gfn);
+ return kvm_age_hva_range_no_flush(mn, start, end, kvm_mmu_age_gfn);
+}
+
+static bool kvm_mmu_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
+{
+ /* See comment in kvm_mmu_unmap_gfn_range() */
+ if (range->attr_filter & KVM_FILTER_USERSPACE_MAPPINGS &&
+ kvm_slot_has_gmem(range->slot) &&
+ kvm_memslot_is_gmem_only(range->slot))
+ return false;
+
+ return kvm_test_age_gfn(kvm, range);
}
static bool kvm_mmu_notifier_test_young(struct mmu_notifier *mn,
@@ -861,7 +903,7 @@ static bool kvm_mmu_notifier_test_young(struct mmu_notifier *mn,
trace_kvm_test_age_hva(address);
return kvm_age_hva_range_no_flush(mn, address, address + 1,
- kvm_test_age_gfn);
+ kvm_mmu_test_age_gfn);
}
static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
base-commit: 8cd9520d35a6c38db6567e97dd93b1f11f185dc6
--
2.54.0
^ permalink raw reply related
* Re: [PATCH] ARM: enable interrupts when arm_notify_die() is handling user mode errors
From: Xie Yuanbin @ 2026-06-25 12:26 UTC (permalink / raw)
To: linux, bigeasy, rmk+kernel
Cc: clrkwllms, rostedt, linusw, arnd, linux-arm-kernel, linux-kernel,
linux-rt-devel, liaohua4, lilinjie8, Xie Yuanbin
In-Reply-To: <aj0BpqlaCh3cVw8Q@shell.armlinux.org.uk>
On Thu, 25 Jun 2026 11:23:34 +0100, Russell King wrote:
>> 3. enable interrupts in do_DataAbort()/do_PrefetchAbort() after
>> `inf->fn()`, this may be ok.
>>
>> From this perspective, arm_notify_die() also seems to be a good place?
>
> If one is happy with higher latency for preempt cases, then it may
> be, but if we want lower latency, then it ought to be earlier.
> My preference is (3).
```c
if (!inf->fn(addr, ifsr | FSR_LNX_PF, regs))
return;
if (likely(user_mode(regs)))
local_irq_enable();
pr_alert("8<--- cut here ---\n");
```
or
```c
if (!inf->fn(addr, ifsr | FSR_LNX_PF, regs))
return;
if (likely(interrupts_enabled(regs)))
local_irq_enable();
pr_alert("8<--- cut here ---\n");
```
Which one do you prefer? I prefer the first one, because for kernel
fault, kernel may have encountered a serious issue,
and enabling interrupts may be not appropriate.
^ permalink raw reply
* Re: mm: opaque hardware page-table entry handles
From: Muhammad Usama Anjum @ 2026-06-25 12:15 UTC (permalink / raw)
To: Pedro Falcato
Cc: usama.anjum, Andrew Morton, Lorenzo Stoakes, David Hildenbrand,
Liam R. Howlett, Mike Rapoport, Ryan Roberts, Anshuman Khandual,
Catalin Marinas, Will Deacon, Samuel Holland, linux-mm,
linux-arm-kernel, linux-kernel
In-Reply-To: <aj0JU-AzsEQBOiQ1@pedro-suse>
On 25/06/2026 12:08 pm, Pedro Falcato wrote:
> On Thu, Jun 25, 2026 at 11:50:28AM +0100, Muhammad Usama Anjum wrote:
>> On 24/06/2026 8:25 pm, Pedro Falcato wrote:
>>> On Wed, Jun 24, 2026 at 03:09:08PM +0100, Usama Anjum wrote:
>>>> Hi all,
>>>>
>>>> This is a direction-check with the wider community before spending time on the
>>>> development. This picks up the idea that was raised and broadly agreed in the
>>>> earlier thread (Ryan Roberts, Lorenzo Stoakes, David Hildenbrand) [1].
>>>>
>>>> The problem
>>>> -----------
>>>> Core MM code reaches page-table entries by raw pointer dereference (pte_t *,
>>>> pmd_t *, *pud, ...) in places, implicitly assuming a single, uniform
>>>> representation. Sprinkling getters wouldn't solve the problem entirely. The
>>>> problem is one level up: the *pointer type* itself is overloaded. At each level
>>>> there are really three distinct things:
>>>>
>>>> 1. a page-table entry value (pte_t, pmd_t, ...)
>>>> 2. a pointer to an entry value, e.g. a pXX_t on the stack
>>>> 3. a pointer to a live entry in the hardware page table
>>>>
>>>> Today (2) and (3) share the same type - pte_t *, pmd_t *, and so on. Nothing
>>>> distinguishes a pointer into a live table from a pointer to a stack copy.
>>>>
>>>> A pointer to an on-stack entry value and a pointer to a live hardware entry have
>>>> the same type, so the compiler cannot distinguish them. Passing the stack
>>>> pointer to an arch helper that expects a hardware-entry pointer compiles fine,
>>>> but is wrong - a bug class the type system makes invisible. It also blocks
>>>> evolution: an arch helper may need to read beyond the addressed entry (e.g.
>>>> adjacent or contiguous entries), which only makes sense for a real page-table
>>>> pointer, not a stack copy.
>>>>
>>>> The idea
>>>> --------
>>>> Give (3) its own opaque type that cannot be dereferenced:
>>>>
>>>> /* opaque handle to a HW page-table entry; not dereferenceable */
>>>> typedef struct {
>>>> pte_t *ptr;
>>>> } hw_ptep;
>>>
>>> I don't love typedefs that hide pointers.
>> Nobody likes them. This is the only way so that by mistake stack pointers
>> don't get reintroduced. Its also hard to catch such cases during review.
>
> That's not true, you could have:
>
> typedef struct { pteval_t pte; } sw_pte_t;
>
> and
>
> /* only usable by arch code and whoever wants to interpret these
> * types */
> static inline sw_to_ptep(sw_pte_t *swptep)
> {
> return (pte_t *) swptep;
> }
>
> and so on... Also, see Documentation/process/coding-style.rst 5) typedefs, it
> explicitly warns against pointer typedefs.
I hear your concern, but I think the sw_pte_t inversion solves the small problem
and gives up the big one. Let me make the case for keeping the opaque hardware type.
The narrow goal is "no stack pointer reaches a table-writing API". Both schemes catch
that. But the actual reasons for this idea is broader one:
* making core independent of how a real page table entry is represented and accessed.
It only works if hardware type is the abstract one.
* As you may have noted with pmdp_get(): on some arches the read is not a pure load
(folding, lockless ordering, kmap of a highmem table page). pte_t * lets callers
bypass all of that with *ptep. The handle makes the accessor the only door, so the
barriers/folds can't be skipped by accident.
>>
>>>
>>>>
>>>> With this:
>>>>
>>>> - a stack value can no longer masquerade as a hardware table entry,
>>>> - a hardware handle can no longer be raw-dereferenced,
>>>> - cases that genuinely operate on a value can be refactored to pass the value
>>>> and let the caller, which knows whether it holds a handle or a stack copy,
>>>> read it once.
>>>
>>> Just a small passing comment: how about doing it differently? like
>>>
>>> typedef struct {
>>> pte_t *ptep;
>>> } sw_ptep_t;
>>>
>>> or something like that. Were I to guess, referring to a pte_t on the stack
>>> is much rarer than all the pte_t references to actual page tables. But maybe
>>> reality doesn't match up with my guess :)
>> We want to fix the current usages and future usages as well. sw_ptep_t can work
>> for current usages, but it'll not force the new code to be written using correct
>> notations.
>
> I don't understand what you mean. pte_t is a perfectly correct notation,
> it's just currently maybe too ambiguously overloaded.
Yes, this overload is what need fixing.
>
>> Apart from different types, another benefit of hw_pXXp would be that
>> it'll become an opaque object which only architecture can manipulate. Hence
>> architecture can decide howeverever it wants to manage them in certain cases.
>
> That's already the case. pte_t is fully opaque apart from the little fact
> that you can declare one on your stack. Introducing a different sw_pte_t
> would further reinforce that. And if you want ways to find raw derefs on
> pointers, we can simply slap on __attribute__((noderef)) (available in
> sparse and clang) on those types after sw_pte_t is introduced and pte_t
> is unambiguously a "hardware" PTE.
The pte_t iterator loops in core code prove that it isn't opaque enough.
The pointer arithmetic (ptep++) is done at several places in the core.
The sw_pte_t + deref protection only catches misues under sparse. While the
hw_ptep type is enforced by every compiler for every build.
>
> I dunno, I'm not convinced that changing around ~450 files is worth it, and
> _if_ we want to do something like this I would strongly prefer the way that
> is less churny.
Probably you grepped these types to come up with 450 files? But we aren't going
to update all files. Only the generic code would be converted with one or two
architectures. Its architecture opt-in. It'll be transparent to non-converted
architectures. So if arch/ is excluded, the number of files would become a
quarter?
This type change is going to localize the future churn. It is one time cost;
after that, every future representation change lives behind the accessors.
If pte_t * stays the live type, each such change is another N files audit.
The struct buys us one choke point to evolve.
--
Thanks,
Usama
^ permalink raw reply
* [PATCH v2 3/3] scsi: ufs: core: Always run tx_eqtr POST_CHANGE notify
From: Can Guo @ 2026-06-25 12:13 UTC (permalink / raw)
To: bvanassche, beanhuo, peter.wang, martin.petersen, mani
Cc: linux-scsi, Can Guo, Alim Akhtar, Avri Altman,
James E.J. Bottomley, Matthias Brugger,
AngeloGioacchino Del Regno, open list,
moderated list:ARM/Mediatek SoC support:Keyword:mediatek,
moderated list:ARM/Mediatek SoC support:Keyword:mediatek
In-Reply-To: <20260625121306.1655467-1-can.guo@oss.qualcomm.com>
ufshcd_tx_eqtr() skips POST_CHANGE notify when __ufshcd_tx_eqtr()
fails. That can leave variant cleanup incomplete when PRE_CHANGE saved
temporary state that POST_CHANGE is expected to restore.
Always call POST_CHANGE once PRE_CHANGE has succeeded. Keep the TX EQTR
result as the primary return value, and only propagate POST_CHANGE
failure when TX EQTR itself succeeded.
Log PRE_CHANGE and POST_CHANGE notify failures to make variant callback
failures visible in TX EQTR error paths.
Reviewed-by: Manivannan Sadhasivam <mani@kernel.org>
Reviewed-by: Peter Wang <peter.wang@mediatek.com>
Signed-off-by: Can Guo <can.guo@oss.qualcomm.com>
---
drivers/ufs/core/ufs-txeq.c | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/drivers/ufs/core/ufs-txeq.c b/drivers/ufs/core/ufs-txeq.c
index e1302ea9f27e..7f908ea97ec3 100644
--- a/drivers/ufs/core/ufs-txeq.c
+++ b/drivers/ufs/core/ufs-txeq.c
@@ -1223,6 +1223,7 @@ static int ufshcd_tx_eqtr(struct ufs_hba *hba,
{
struct ufs_pa_layer_attr old_pwr_info;
unsigned int noio_flag;
+ int notify_ret;
int ret;
/*
@@ -1252,14 +1253,19 @@ static int ufshcd_tx_eqtr(struct ufs_hba *hba,
}
ret = ufshcd_vops_tx_eqtr_notify(hba, PRE_CHANGE, pwr_mode);
- if (ret)
+ if (ret) {
+ dev_err(hba->dev, "TX EQTR PRE_CHANGE notify failed: %d\n", ret);
goto out;
+ }
ret = __ufshcd_tx_eqtr(hba, params, pwr_mode);
- if (ret)
- goto out;
- ret = ufshcd_vops_tx_eqtr_notify(hba, POST_CHANGE, pwr_mode);
+ notify_ret = ufshcd_vops_tx_eqtr_notify(hba, POST_CHANGE, pwr_mode);
+ if (notify_ret)
+ dev_err(hba->dev, "TX EQTR POST_CHANGE notify failed: %d\n", notify_ret);
+
+ if (!ret)
+ ret = notify_ret;
out:
if (ret)
--
2.34.1
^ permalink raw reply related
* Re: [PATCH] ARM: enable interrupts when arm_notify_die() is handling user mode errors
From: Sebastian Andrzej Siewior @ 2026-06-25 12:08 UTC (permalink / raw)
To: Russell King
Cc: Xie Yuanbin, clrkwllms, rostedt, linusw, arnd, linux-arm-kernel,
linux-kernel, linux-rt-devel, liaohua4, lilinjie8
In-Reply-To: <aj0A3SjMhR24e8fP@shell.armlinux.org.uk>
On 2026-06-25 11:20:13 [+0100], Russell King wrote:
> > is not worth doing it? With this I can my little testcase working.
>
> No, it isn't, because if you enable PERF_EVENTS then BKPT breaks.
> hw_breakpoint.c claims this vector.
I see.
…
> BKPT is a total mess.
Understood.
> > it does cond_local_irq_enable() which enables the interrupts if they
> > were enabled by the "caller", sends the signal (SIGTRAP).
>
> I'm happy with that approach as far as interrupts go, but we can't
> change the behaviour for FSR=2 again, beyond fixing LinusW's
> commit (which has recently been reported as a regression.)
>
> Note that the change which makes this raise a SIGTRAP rather than
> SIGBUS when PERF_EVENTS=y could _also_ be reported as a regression
> that we would have to fix, and making FSR=2 raise a SIGTRAP now
> could very well invite that regression to be reported.
>
> Essentially, I don't think we can "fix" BKPT to always raise SIGTRAP.
> The BKPT instruction is something the kernel has never _officially_
> supported.
It looked like an easy fix. You explained that it is a bigger mess with
"other features" and so on. Given that and the fact that it was never
supported, I would appreciate just to enable interrupts before the
(SIGBUS) signal is sent.
Sebastian
^ permalink raw reply
* Re: [PATCH 0/2] gpio: fix sleeping-in-atomic in shared-proxy; restore meson non-sleeping
From: Viacheslav @ 2026-06-25 12:05 UTC (permalink / raw)
To: Bartosz Golaszewski
Cc: linux-gpio, linux-arm-kernel, linux-amlogic, linux-kernel,
linux-rockchip
In-Reply-To: <CAMRc=MdP8Wf6QRXGHpb3KJW2KMidSe-0LeyKKTYix=wYKZcPuA@mail.gmail.com>
Hi!
24.06.2026 10:25, Bartosz Golaszewski wrote:
> On Tue, 23 Jun 2026 17:16:44 +0200, Robin Murphy <robin.murphy@arm.com> said:
>> On 11/06/2026 9:26 am, Marek Szyprowski wrote:
>>> Hi Viachesla,
>>>
>>> On 10.06.2026 17:32, Viacheslav Bocharov wrote:
>>>> gpio-shared-proxy chooses its descriptor lock (mutex vs spinlock) from
>>>> the underlying chip's can_sleep, but under that lock it calls config and
>>>> direction ops that reach sleeping pinctrl paths. On a controller with
>>>> non-sleeping MMIO value ops the lock is a spinlock, so a sleeping call
>>>> runs from atomic context:
>>>>
>
> ...
>
>>>
>>> I've checked this patchset with these two reverted and no warning was reported.
>>
>> If it hadn't already been fixed (...)
>>
>
> About that - Viacheslav, do you still plan to submit v2 of this?
Thanks for review. I prepared and sent the second version of the patch today
>
> Bart
>
> _______________________________________________
> linux-amlogic mailing list
> linux-amlogic@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-amlogic
^ permalink raw reply
* [PATCH v2 0/2] gpio: fix sleeping-in-atomic in shared-proxy; restore meson non-sleeping
From: Viacheslav Bocharov @ 2026-06-25 11:57 UTC (permalink / raw)
To: Linus Walleij, Bartosz Golaszewski
Cc: Neil Armstrong, Kevin Hilman, Jerome Brunet, Martin Blumenstingl,
Marek Szyprowski, Robin Murphy, Diederik de Haas, linux-gpio,
linux-arm-kernel, linux-amlogic, linux-kernel
gpio-shared-proxy chooses its descriptor lock (mutex vs spinlock) from
the underlying chip's can_sleep, but under that lock it calls config and
direction ops that reach sleeping pinctrl paths. On a controller with
non-sleeping MMIO value ops the lock is a spinlock, so a sleeping call
runs from atomic context:
BUG: sleeping function called from invalid context
... pinctrl_gpio_set_config <- gpiochip_generic_config
<- gpio_shared_proxy_set_config (voting spinlock held)
<- ... <- mmc_pwrseq_simple_probe
This was reported on Khadas VIM3 and worked around for Amlogic by
commit 28f240683871 ("pinctrl: meson: mark the GPIO controller as
sleeping"), which marked the whole meson controller sleeping. That
workaround broke atomic value-path consumers: w1-gpio (1-Wire bitbang)
no longer detects devices, because its IRQ-disabled read slot calls the
non-cansleep gpiod_*_value() and now hits WARN_ON(can_sleep) per bit.
Patch 1 fixes the proxy locking generically (always a sleeping mutex).
Patch 2 then restores meson can_sleep=false, fixing 1-Wire.
Patch 1 has a trade-off: a proxied GPIO becomes sleeping, so consumers
gating on gpiod_cansleep() change behaviour. No current device needs
atomic (non-cansleep) value access on a shared GPIO -- every report
(Khadas VIM3, ODROID-M1, my test on JetHub D1+) is a shared reset line
(eMMC/SDIO pwrseq or PCIe reset) driven through the cansleep accessors,
which is what the proxy exists to vote on; bit-banging that needs atomic
access cannot work through voting anyway. An alternative that keeps
atomic value access (split locking) is possible but adds a second lock
and new race windows, so this series takes the simpler mutex-only
approach.
The two are a unit: patch 2 must not be applied without patch 1,
otherwise the original VIM3 splat returns on boards that share a meson
GPIO -- please keep the order. I have not Cc'd stable; I will request
stable backports separately once both patches have landed.
Changes since v1:
- gpio: shared-proxy: open-code the descriptor mutex; drop the
gpio_shared_desc_lock guard and the gpio_shared_lockdep_assert()
helper, move the mutex rationale to the can_sleep assignment. No
functional change.
v1: https://lore.kernel.org/linux-gpio/20260610153329.937833-1-v@baodeep.com/
Viacheslav Bocharov (2):
gpio: shared-proxy: always serialize with a sleeping mutex
pinctrl: meson: restore non-sleeping GPIO access
drivers/gpio/gpio-shared-proxy.c | 66 +++++++++++----------------
drivers/gpio/gpiolib-shared.c | 9 +---
drivers/gpio/gpiolib-shared.h | 28 +-----------
drivers/pinctrl/meson/pinctrl-meson.c | 2 +-
4 files changed, 30 insertions(+), 75 deletions(-)
base-commit: 840ef6c78e6a2f694b578ecb9063241c992aaa9e
--
2.54.0
^ permalink raw reply
* [PATCH v2 2/2] pinctrl: meson: restore non-sleeping GPIO access
From: Viacheslav Bocharov @ 2026-06-25 11:57 UTC (permalink / raw)
To: Linus Walleij, Bartosz Golaszewski
Cc: Neil Armstrong, Kevin Hilman, Jerome Brunet, Martin Blumenstingl,
Marek Szyprowski, Robin Murphy, Diederik de Haas, linux-gpio,
linux-arm-kernel, linux-amlogic, linux-kernel
In-Reply-To: <20260625115718.1678991-1-v@baodeep.com>
Commit 28f240683871 ("pinctrl: meson: mark the GPIO controller as
sleeping") set gpio_chip.can_sleep = true to work around
gpio-shared-proxy holding a spinlock across a sleeping pinctrl config
path. That locking bug is now fixed in the shared-proxy itself ("gpio:
shared-proxy: always serialize with a sleeping mutex"), so the
controller-wide workaround is no longer needed; the meson GPIO
controller does not sleep.
meson_gpio_get/set/direction_* access MMIO through regmap. The
regmap_mmio bus uses fast I/O (spinlock) locking, so these value
callbacks do not contain sleeping operations. Since gpio_chip.can_sleep
describes the get/set value path, restore can_sleep = false.
Marking the controller sleeping also broke atomic value consumers such
as w1-gpio (1-Wire bitbang): w1_io.c runs its read time slot under
local_irq_save() and uses the non-cansleep gpiod_set_value() /
gpiod_get_value(), which with can_sleep=true trigger WARN_ON(can_sleep)
in gpiolib on every transferred bit (from w1_gpio_write_bit() /
w1_gpio_read_bit() via w1_reset_bus() and w1_search()). The printk and
stack dump inside the IRQs-off, microsecond-scale time slot destroy the
bit timing, so reset/presence detection and ROM search fail: the bus
master registers but w1_master_slave_count stays at 0 and no devices
are found. Verified on an Amlogic A113X board (DS18B20 on GPIOA_14):
with can_sleep restored to false the warnings are gone and the sensor
is detected and read again.
This must not be applied or backported without the shared-proxy locking
fix above; otherwise the original Khadas VIM3 splat returns on boards
that genuinely share a meson GPIO.
Fixes: 28f240683871 ("pinctrl: meson: mark the GPIO controller as sleeping")
Link: https://lore.kernel.org/all/20260105150509.56537-1-bartosz.golaszewski@oss.qualcomm.com/
Signed-off-by: Viacheslav Bocharov <v@baodeep.com>
---
v1: https://lore.kernel.org/linux-gpio/20260610153329.937833-3-v@baodeep.com/
drivers/pinctrl/meson/pinctrl-meson.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/pinctrl/meson/pinctrl-meson.c b/drivers/pinctrl/meson/pinctrl-meson.c
index 4507dc8b5563..18295b15ecd9 100644
--- a/drivers/pinctrl/meson/pinctrl-meson.c
+++ b/drivers/pinctrl/meson/pinctrl-meson.c
@@ -619,7 +619,7 @@ static int meson_gpiolib_register(struct meson_pinctrl *pc)
pc->chip.set = meson_gpio_set;
pc->chip.base = -1;
pc->chip.ngpio = pc->data->num_pins;
- pc->chip.can_sleep = true;
+ pc->chip.can_sleep = false;
ret = gpiochip_add_data(&pc->chip, pc);
if (ret) {
--
2.54.0
^ permalink raw reply related
* [PATCH v2 1/2] gpio: shared-proxy: always serialize with a sleeping mutex
From: Viacheslav Bocharov @ 2026-06-25 11:57 UTC (permalink / raw)
To: Linus Walleij, Bartosz Golaszewski
Cc: Neil Armstrong, Kevin Hilman, Jerome Brunet, Martin Blumenstingl,
Marek Szyprowski, Robin Murphy, Diederik de Haas, linux-gpio,
linux-arm-kernel, linux-amlogic, linux-kernel
In-Reply-To: <20260625115718.1678991-1-v@baodeep.com>
The shared GPIO descriptor used either a mutex or a spinlock, chosen at
runtime from the underlying chip's can_sleep:
shared_desc->can_sleep = gpiod_cansleep(shared_desc->desc);
... if (can_sleep) mutex_lock(); else spin_lock_irqsave();
can_sleep describes only the value path (->get/->set). Under the same
lock, however, the proxy may call gpiod_set_config() and
gpiod_direction_*(), which can reach pinctrl paths that take a mutex
(e.g. gpiod_set_config() -> gpiochip_generic_config() ->
pinctrl_gpio_set_config()), independent of can_sleep. On a controller
with non-sleeping MMIO value ops the descriptor lock was a spinlock, so
the sleeping pinctrl call ran from atomic context. Reproduced on an
Amlogic A113X board with the workaround from commit 28f240683871
("pinctrl: meson: mark the GPIO controller as sleeping") reverted; the
original Khadas VIM3 report hit the same path:
BUG: sleeping function called from invalid context
__mutex_lock
pinctrl_get_device_gpio_range
pinctrl_gpio_set_config
gpiochip_generic_config
gpiod_set_config
gpio_shared_proxy_set_config <- voting spinlock held
...
mmc_pwrseq_simple_probe
The spinlock existed to take the value vote from atomic context, but the
vote and the (possibly sleeping) control operations share the same state
and lock, so this scheme cannot serialize config under a mutex and still
offer atomic value access. Always serialize the shared descriptor with a
mutex instead and mark the proxy a sleeping gpiochip, driving the
underlying GPIO through the cansleep value accessors: those are valid
for both sleeping and non-sleeping chips, so value access keeps working
on fast controllers, at the cost of no longer being atomic.
This is observable: consumers gating on gpiod_cansleep() take their
sleeping branch on a proxied GPIO (mmc-pwrseq-emmc skips its
emergency-restart reset handler; its normal reset is unaffected), and
consumers that reject sleeping GPIOs (pwm-gpio, ps2-gpio, ...) would
fail to probe. Such atomic users do not share a pin through the proxy,
whose purpose is voting on shared reset/enable lines. The same narrowing
already applies on Amlogic since that workaround, and rockchip
addressed the identical splat per-driver in commit 7ca497be0016 ("gpio:
rockchip: Stop calling pinctrl for set_direction"); fixing the proxy
addresses the locking error once, for every controller.
The lock type was added by commit a060b8c511ab ("gpiolib: implement
low-level, shared GPIO support"); the sleeping call under it arrived with
the proxy driver.
Fixes: e992d54c6f97 ("gpio: shared-proxy: implement the shared GPIO proxy driver")
Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
Closes: https://lore.kernel.org/all/00107523-7737-4b92-a785-14ce4e93b8cb@samsung.com/
Signed-off-by: Viacheslav Bocharov <v@baodeep.com>
---
v1 -> v2: open-code the descriptor mutex; drop the gpio_shared_desc_lock
guard and the gpio_shared_lockdep_assert() helper, use
guard(mutex) and lockdep_assert_held() directly; move the
mutex rationale from the header to the can_sleep assignment in
probe.
v1: https://lore.kernel.org/linux-gpio/20260610153329.937833-2-v@baodeep.com/
drivers/gpio/gpio-shared-proxy.c | 66 +++++++++++++-------------------
drivers/gpio/gpiolib-shared.c | 9 +----
drivers/gpio/gpiolib-shared.h | 28 +-------------
3 files changed, 29 insertions(+), 74 deletions(-)
diff --git a/drivers/gpio/gpio-shared-proxy.c b/drivers/gpio/gpio-shared-proxy.c
index 6941e4be6cf1..0cd52015b731 100644
--- a/drivers/gpio/gpio-shared-proxy.c
+++ b/drivers/gpio/gpio-shared-proxy.c
@@ -9,8 +9,10 @@
#include <linux/err.h>
#include <linux/gpio/consumer.h>
#include <linux/gpio/driver.h>
+#include <linux/lockdep.h>
#include <linux/mod_devicetable.h>
#include <linux/module.h>
+#include <linux/mutex.h>
#include <linux/string_choices.h>
#include <linux/types.h>
@@ -32,7 +34,7 @@ gpio_shared_proxy_set_unlocked(struct gpio_shared_proxy_data *proxy,
struct gpio_desc *desc = shared_desc->desc;
int ret = 0;
- gpio_shared_lockdep_assert(shared_desc);
+ lockdep_assert_held(&shared_desc->mutex);
if (value) {
/* User wants to set value to high. */
@@ -89,7 +91,7 @@ static int gpio_shared_proxy_request(struct gpio_chip *gc, unsigned int offset)
struct gpio_shared_proxy_data *proxy = gpiochip_get_data(gc);
struct gpio_shared_desc *shared_desc = proxy->shared_desc;
- guard(gpio_shared_desc_lock)(shared_desc);
+ guard(mutex)(&shared_desc->mutex);
proxy->shared_desc->usecnt++;
@@ -105,11 +107,11 @@ static void gpio_shared_proxy_free(struct gpio_chip *gc, unsigned int offset)
struct gpio_shared_desc *shared_desc = proxy->shared_desc;
int ret;
- guard(gpio_shared_desc_lock)(shared_desc);
+ guard(mutex)(&shared_desc->mutex);
if (proxy->voted_high) {
ret = gpio_shared_proxy_set_unlocked(proxy,
- shared_desc->can_sleep ? gpiod_set_value_cansleep : gpiod_set_value, 0);
+ gpiod_set_value_cansleep, 0);
if (ret)
dev_err(proxy->dev,
"Failed to unset the shared GPIO value on release: %d\n", ret);
@@ -129,7 +131,7 @@ static int gpio_shared_proxy_set_config(struct gpio_chip *gc,
struct gpio_desc *desc = shared_desc->desc;
int ret;
- guard(gpio_shared_desc_lock)(shared_desc);
+ guard(mutex)(&shared_desc->mutex);
if (shared_desc->usecnt > 1) {
if (shared_desc->cfg != cfg) {
@@ -157,7 +159,7 @@ static int gpio_shared_proxy_direction_input(struct gpio_chip *gc,
struct gpio_desc *desc = shared_desc->desc;
int dir;
- guard(gpio_shared_desc_lock)(shared_desc);
+ guard(mutex)(&shared_desc->mutex);
if (shared_desc->usecnt == 1) {
dev_dbg(proxy->dev,
@@ -187,7 +189,7 @@ static int gpio_shared_proxy_direction_output(struct gpio_chip *gc,
struct gpio_desc *desc = shared_desc->desc;
int ret, dir;
- guard(gpio_shared_desc_lock)(shared_desc);
+ guard(mutex)(&shared_desc->mutex);
if (shared_desc->usecnt == 1) {
dev_dbg(proxy->dev,
@@ -222,13 +224,6 @@ static int gpio_shared_proxy_direction_output(struct gpio_chip *gc,
return gpio_shared_proxy_set_unlocked(proxy, gpiod_direction_output, value);
}
-static int gpio_shared_proxy_get(struct gpio_chip *gc, unsigned int offset)
-{
- struct gpio_shared_proxy_data *proxy = gpiochip_get_data(gc);
-
- return gpiod_get_value(proxy->shared_desc->desc);
-}
-
static int gpio_shared_proxy_get_cansleep(struct gpio_chip *gc,
unsigned int offset)
{
@@ -237,29 +232,15 @@ static int gpio_shared_proxy_get_cansleep(struct gpio_chip *gc,
return gpiod_get_value_cansleep(proxy->shared_desc->desc);
}
-static int gpio_shared_proxy_do_set(struct gpio_shared_proxy_data *proxy,
- int (*set_func)(struct gpio_desc *desc, int value),
- int value)
-{
- guard(gpio_shared_desc_lock)(proxy->shared_desc);
-
- return gpio_shared_proxy_set_unlocked(proxy, set_func, value);
-}
-
-static int gpio_shared_proxy_set(struct gpio_chip *gc, unsigned int offset,
- int value)
-{
- struct gpio_shared_proxy_data *proxy = gpiochip_get_data(gc);
-
- return gpio_shared_proxy_do_set(proxy, gpiod_set_value, value);
-}
-
static int gpio_shared_proxy_set_cansleep(struct gpio_chip *gc,
unsigned int offset, int value)
{
struct gpio_shared_proxy_data *proxy = gpiochip_get_data(gc);
- return gpio_shared_proxy_do_set(proxy, gpiod_set_value_cansleep, value);
+ guard(mutex)(&proxy->shared_desc->mutex);
+
+ return gpio_shared_proxy_set_unlocked(proxy, gpiod_set_value_cansleep,
+ value);
}
static int gpio_shared_proxy_get_direction(struct gpio_chip *gc,
@@ -302,20 +283,25 @@ static int gpio_shared_proxy_probe(struct auxiliary_device *adev,
gc->label = dev_name(dev);
gc->parent = dev;
gc->owner = THIS_MODULE;
- gc->can_sleep = shared_desc->can_sleep;
+ /*
+ * Under the descriptor mutex the proxy may call
+ * gpiod_set_config()/gpiod_direction_*(), which can reach pinctrl
+ * paths that take a mutex (e.g. gpiod_set_config() ->
+ * gpiochip_generic_config() -> pinctrl_gpio_set_config()), independent
+ * of the underlying chip's can_sleep. So the descriptor lock must be a
+ * mutex and the proxy gpiochip is therefore always sleeping; drive the
+ * underlying GPIO through the cansleep value accessors, which are valid
+ * for both sleeping and non-sleeping chips.
+ */
+ gc->can_sleep = true;
gc->request = gpio_shared_proxy_request;
gc->free = gpio_shared_proxy_free;
gc->set_config = gpio_shared_proxy_set_config;
gc->direction_input = gpio_shared_proxy_direction_input;
gc->direction_output = gpio_shared_proxy_direction_output;
- if (gc->can_sleep) {
- gc->set = gpio_shared_proxy_set_cansleep;
- gc->get = gpio_shared_proxy_get_cansleep;
- } else {
- gc->set = gpio_shared_proxy_set;
- gc->get = gpio_shared_proxy_get;
- }
+ gc->set = gpio_shared_proxy_set_cansleep;
+ gc->get = gpio_shared_proxy_get_cansleep;
gc->get_direction = gpio_shared_proxy_get_direction;
gc->to_irq = gpio_shared_proxy_to_irq;
diff --git a/drivers/gpio/gpiolib-shared.c b/drivers/gpio/gpiolib-shared.c
index de72776fb154..495bd3d0ddf0 100644
--- a/drivers/gpio/gpiolib-shared.c
+++ b/drivers/gpio/gpiolib-shared.c
@@ -627,8 +627,7 @@ static void gpio_shared_release(struct kref *kref)
shared_desc = entry->shared_desc;
gpio_device_put(shared_desc->desc->gdev);
- if (shared_desc->can_sleep)
- mutex_destroy(&shared_desc->mutex);
+ mutex_destroy(&shared_desc->mutex);
kfree(shared_desc);
entry->shared_desc = NULL;
}
@@ -659,11 +658,7 @@ gpiod_shared_desc_create(struct gpio_shared_entry *entry)
}
shared_desc->desc = &gdev->descs[entry->offset];
- shared_desc->can_sleep = gpiod_cansleep(shared_desc->desc);
- if (shared_desc->can_sleep)
- mutex_init(&shared_desc->mutex);
- else
- spin_lock_init(&shared_desc->spinlock);
+ mutex_init(&shared_desc->mutex);
return shared_desc;
}
diff --git a/drivers/gpio/gpiolib-shared.h b/drivers/gpio/gpiolib-shared.h
index 15e72a8dcdb1..bbdc0ab7b647 100644
--- a/drivers/gpio/gpiolib-shared.h
+++ b/drivers/gpio/gpiolib-shared.h
@@ -3,10 +3,7 @@
#ifndef __LINUX_GPIO_SHARED_H
#define __LINUX_GPIO_SHARED_H
-#include <linux/cleanup.h>
-#include <linux/lockdep.h>
#include <linux/mutex.h>
-#include <linux/spinlock.h>
struct gpio_device;
struct gpio_desc;
@@ -42,35 +39,12 @@ static inline int gpio_shared_add_proxy_lookup(struct device *consumer,
struct gpio_shared_desc {
struct gpio_desc *desc;
- bool can_sleep;
unsigned long cfg;
unsigned int usecnt;
unsigned int highcnt;
- union {
- struct mutex mutex;
- spinlock_t spinlock;
- };
+ struct mutex mutex; /* serializes all proxy operations on this descriptor */
};
struct gpio_shared_desc *devm_gpiod_shared_get(struct device *dev);
-DEFINE_LOCK_GUARD_1(gpio_shared_desc_lock, struct gpio_shared_desc,
- if (_T->lock->can_sleep)
- mutex_lock(&_T->lock->mutex);
- else
- spin_lock_irqsave(&_T->lock->spinlock, _T->flags),
- if (_T->lock->can_sleep)
- mutex_unlock(&_T->lock->mutex);
- else
- spin_unlock_irqrestore(&_T->lock->spinlock, _T->flags),
- unsigned long flags)
-
-static inline void gpio_shared_lockdep_assert(struct gpio_shared_desc *shared_desc)
-{
- if (shared_desc->can_sleep)
- lockdep_assert_held(&shared_desc->mutex);
- else
- lockdep_assert_held(&shared_desc->spinlock);
-}
-
#endif /* __LINUX_GPIO_SHARED_H */
--
2.54.0
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox