Linux-ARM-Kernel Archive on lore.kernel.org
 help / color / mirror / Atom feed
* Re: [PATCH v6 01/10] hyperv: Log hypercall status codes as strings
From: Nuno Das Neves @ 2025-03-21 19:12 UTC (permalink / raw)
  To: Michael Kelley, linux-hyperv@vger.kernel.org, x86@kernel.org,
	linux-arm-kernel@lists.infradead.org,
	linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org,
	ltykernel@gmail.com, stanislav.kinsburskiy@gmail.com,
	linux-acpi@vger.kernel.org, eahariha@linux.microsoft.com,
	jeff.johnson@oss.qualcomm.com
  Cc: kys@microsoft.com, haiyangz@microsoft.com, wei.liu@kernel.org,
	decui@microsoft.com, catalin.marinas@arm.com, will@kernel.org,
	tglx@linutronix.de, mingo@redhat.com, bp@alien8.de,
	dave.hansen@linux.intel.com, hpa@zytor.com,
	daniel.lezcano@linaro.org, joro@8bytes.org, robin.murphy@arm.com,
	arnd@arndb.de, jinankjain@linux.microsoft.com,
	muminulrussell@gmail.com, skinsburskii@linux.microsoft.com,
	mrathor@linux.microsoft.com, ssengar@linux.microsoft.com,
	apais@linux.microsoft.com, gregkh@linuxfoundation.org,
	vkuznets@redhat.com, prapal@linux.microsoft.com,
	anrayabh@linux.microsoft.com, rafael@kernel.org, lenb@kernel.org,
	corbet@lwn.net
In-Reply-To: <SN6PR02MB4157E630EC520DC487139372D4DE2@SN6PR02MB4157.namprd02.prod.outlook.com>

On 3/18/2025 11:01 AM, Michael Kelley wrote:
> From: Nuno Das Neves <nunodasneves@linux.microsoft.com> Sent: Friday, March 14, 2025 12:29 PM
>>
>> Introduce hv_status_printk() macros as a convenience to log hypercall
>> errors, formatting them with the status code (HV_STATUS_*) as a raw hex
>> value and also as a string, which saves some time while debugging.
>>
>> Create a table of HV_STATUS_ codes with strings and mapped errnos, and
>> use it for hv_result_to_string() and hv_result_to_errno().
>>
>> Use the new hv_status_printk()s in hv_proc.c, hyperv-iommu.c, and
>> irqdomain.c hypercalls to aid debugging in the root partition.
>>
>> Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
>> Reviewed-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
>> ---
>>  arch/x86/hyperv/irqdomain.c    |   6 +-
>>  drivers/hv/hv_common.c         | 129 ++++++++++++++++++++++++---------
>>  drivers/hv/hv_proc.c           |  10 +--
>>  drivers/iommu/hyperv-iommu.c   |   4 +-
>>  include/asm-generic/mshyperv.h |  13 ++++
>>  5 files changed, 118 insertions(+), 44 deletions(-)
>>
> 
> [snip]
>  
>> +
>> +struct hv_status_info {
>> +	char *string;
>> +	int errno;
>> +	u16 code;
>> +};
>> +
>> +/*
>> + * Note on the errno mappings:
>> + * A failed hypercall is usually only recoverable (or loggable) near
>> + * the call site where the HV_STATUS_* code is known. So the errno
>> + * it gets converted to is not too useful further up the stack.
>> + * Provide a few mappings that could be useful, and revert to -EIO
>> + * as a fallback.
>> + */
>> +static const struct hv_status_info hv_status_infos[] = {
>> +#define _STATUS_INFO(status, errno) { #status, (errno), (status) }
>> +	_STATUS_INFO(HV_STATUS_SUCCESS,				0),
>> +	_STATUS_INFO(HV_STATUS_INVALID_HYPERCALL_CODE,		-EINVAL),
>> +	_STATUS_INFO(HV_STATUS_INVALID_HYPERCALL_INPUT,		-EINVAL),
>> +	_STATUS_INFO(HV_STATUS_INVALID_ALIGNMENT,		-EIO),
>> +	_STATUS_INFO(HV_STATUS_INVALID_PARAMETER,		-EINVAL),
>> +	_STATUS_INFO(HV_STATUS_ACCESS_DENIED,			-EIO),
>> +	_STATUS_INFO(HV_STATUS_INVALID_PARTITION_STATE,		-EIO),
>> +	_STATUS_INFO(HV_STATUS_OPERATION_DENIED,		-EIO),
>> +	_STATUS_INFO(HV_STATUS_UNKNOWN_PROPERTY,		-EIO),
>> +	_STATUS_INFO(HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE,	-EIO),
>> +	_STATUS_INFO(HV_STATUS_INSUFFICIENT_MEMORY,		-ENOMEM),
>> +	_STATUS_INFO(HV_STATUS_INVALID_PARTITION_ID,		-EINVAL),
>> +	_STATUS_INFO(HV_STATUS_INVALID_VP_INDEX,		-EINVAL),
>> +	_STATUS_INFO(HV_STATUS_NOT_FOUND,			-EIO),
>> +	_STATUS_INFO(HV_STATUS_INVALID_PORT_ID,			-EINVAL),
>> +	_STATUS_INFO(HV_STATUS_INVALID_CONNECTION_ID,		-EINVAL),
>> +	_STATUS_INFO(HV_STATUS_INSUFFICIENT_BUFFERS,		-EIO),
>> +	_STATUS_INFO(HV_STATUS_NOT_ACKNOWLEDGED,		-EIO),
>> +	_STATUS_INFO(HV_STATUS_INVALID_VP_STATE,		-EIO),
>> +	_STATUS_INFO(HV_STATUS_NO_RESOURCES,			-EIO),
>> +	_STATUS_INFO(HV_STATUS_PROCESSOR_FEATURE_NOT_SUPPORTED,	-EIO),
>> +	_STATUS_INFO(HV_STATUS_INVALID_LP_INDEX,		-EINVAL),
>> +	_STATUS_INFO(HV_STATUS_INVALID_REGISTER_VALUE,		-EINVAL),
>> +	_STATUS_INFO(HV_STATUS_INVALID_LP_INDEX,		-EIO),
>> +	_STATUS_INFO(HV_STATUS_INVALID_REGISTER_VALUE,		-EIO),
>> +	_STATUS_INFO(HV_STATUS_OPERATION_FAILED,		-EIO),
>> +	_STATUS_INFO(HV_STATUS_TIME_OUT,			-EIO),
>> +	_STATUS_INFO(HV_STATUS_CALL_PENDING,			-EIO),
>> +	_STATUS_INFO(HV_STATUS_VTL_ALREADY_ENABLED,		-EIO),
>> +#undef _STATUS_INFO
>> +};
>> +
>> +static inline const struct hv_status_info *find_hv_status_info(u64 hv_status)
>> +{
>> +	int i;
>> +	u16 code = hv_result(hv_status);
>> +
>> +	for (i = 0; i < ARRAY_SIZE(hv_status_infos); ++i) {
>> +		const struct hv_status_info *info = &hv_status_infos[i];
>> +
>> +		if (info->code == code)
>> +			return info;
>> +	}
>> +
>> +	return NULL;
>> +}
>> +
>> +/* Convert a hypercall result into a linux-friendly error code. */
>> +int hv_result_to_errno(u64 status)
>> +{
>> +	const struct hv_status_info *info;
>> +
>> +	/* hv_do_hypercall() may return U64_MAX, hypercalls aren't possible */
>> +	if (unlikely(status == U64_MAX))
>> +		return -EOPNOTSUPP;
>> +
>> +	info = find_hv_status_info(status);
>> +	if (info)
>> +		return info->errno;
>> +
>> +	return -EIO;
>> +}
>> +EXPORT_SYMBOL_GPL(hv_result_to_errno);
>> +
>> +const char *hv_result_to_string(u64 status)
>> +{
>> +	const struct hv_status_info *info;
>> +
>> +	if (unlikely(status == U64_MAX))
>> +		return "Hypercall page missing!";
>> +
>> +	info = find_hv_status_info(status);
>> +	if (info)
>> +		return info->string;
>> +
>> +	return "Unknown";
>> +}
>> +EXPORT_SYMBOL_GPL(hv_result_to_string);
> 
> I think the table-driven approach worked out pretty well. But here's a version that
> is even more compact, and avoids the duplicate testing for U64_MAX and having
> to special case both U64_MAX and not finding a match:
> 
> +
> +struct hv_status_info {
> +	char *string;
> +	int errno;
> +	int code;
> +};
> +
> +/*
> + * Note on the errno mappings:
> + * A failed hypercall is usually only recoverable (or loggable) near
> + * the call site where the HV_STATUS_* code is known. So the errno
> + * it gets converted to is not too useful further up the stack.
> + * Provide a few mappings that could be useful, and revert to -EIO
> + * as a fallback.
> + */
> +static const struct hv_status_info hv_status_infos[] = {
> +#define _STATUS_INFO(status, errno) { #status, (errno), (status) }
> +	_STATUS_INFO(HV_STATUS_SUCCESS,				0),
> +	_STATUS_INFO(HV_STATUS_INVALID_HYPERCALL_CODE,		-EINVAL),
> +	_STATUS_INFO(HV_STATUS_INVALID_HYPERCALL_INPUT,		-EINVAL),
> +	_STATUS_INFO(HV_STATUS_INVALID_ALIGNMENT,		-EIO),
> +	_STATUS_INFO(HV_STATUS_INVALID_PARAMETER,		-EINVAL),
> +	_STATUS_INFO(HV_STATUS_ACCESS_DENIED,			-EIO),
> +	_STATUS_INFO(HV_STATUS_INVALID_PARTITION_STATE,		-EIO),
> +	_STATUS_INFO(HV_STATUS_OPERATION_DENIED,		-EIO),
> +	_STATUS_INFO(HV_STATUS_UNKNOWN_PROPERTY,		-EIO),
> +	_STATUS_INFO(HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE,	-EIO),
> +	_STATUS_INFO(HV_STATUS_INSUFFICIENT_MEMORY,		-ENOMEM),
> +	_STATUS_INFO(HV_STATUS_INVALID_PARTITION_ID,		-EINVAL),
> +	_STATUS_INFO(HV_STATUS_INVALID_VP_INDEX,		-EINVAL),
> +	_STATUS_INFO(HV_STATUS_NOT_FOUND,			-EIO),
> +	_STATUS_INFO(HV_STATUS_INVALID_PORT_ID,			-EINVAL),
> +	_STATUS_INFO(HV_STATUS_INVALID_CONNECTION_ID,		-EINVAL),
> +	_STATUS_INFO(HV_STATUS_INSUFFICIENT_BUFFERS,		-EIO),
> +	_STATUS_INFO(HV_STATUS_NOT_ACKNOWLEDGED,		-EIO),
> +	_STATUS_INFO(HV_STATUS_INVALID_VP_STATE,		-EIO),
> +	_STATUS_INFO(HV_STATUS_NO_RESOURCES,			-EIO),
> +	_STATUS_INFO(HV_STATUS_PROCESSOR_FEATURE_NOT_SUPPORTED,	-EIO),
> +	_STATUS_INFO(HV_STATUS_INVALID_LP_INDEX,		-EINVAL),
> +	_STATUS_INFO(HV_STATUS_INVALID_REGISTER_VALUE,		-EINVAL),
> +	_STATUS_INFO(HV_STATUS_INVALID_LP_INDEX,		-EIO),
> +	_STATUS_INFO(HV_STATUS_INVALID_REGISTER_VALUE,		-EIO),
> +	_STATUS_INFO(HV_STATUS_OPERATION_FAILED,		-EIO),
> +	_STATUS_INFO(HV_STATUS_TIME_OUT,			-EIO),
> +	_STATUS_INFO(HV_STATUS_CALL_PENDING,			-EIO),
> +	_STATUS_INFO(HV_STATUS_VTL_ALREADY_ENABLED,		-EIO),
> +	{"Hypercall page missing!", -EOPNOTSUPP, -1}, /* code -1 is "no hypercall page" */
> +	{"Unknown", -EIO, -2},  /* code -2 is "Not found" entry; must be last */
> +#undef _STATUS_INFO
> +};
> +
> +static inline const struct hv_status_info *find_hv_status_info(u64 hv_status)
> +{
> +	int i, code;
> +	const struct hv_status_info *info;
> +
> +	/* hv_do_hypercall() may return U64_MAX, hypercalls aren't possible */
> +	if (unlikely(hv_status == U64_MAX))
> +		code = -1;
> +	else
> +		code = hv_result(hv_status);
> +
> +	for (i = 0; i < ARRAY_SIZE(hv_status_infos); ++i) {
> +		info = &hv_status_infos[i];
> +		if (info->code == code || info->code == -2)
> +			break;
> +	}
> +
> +	return info;
> +}
> +
> +/* Convert a hypercall result into a linux-friendly error code. */
> +int hv_result_to_errno(u64 status)
> +{
> +	return find_hv_status_info(status)->errno;
> +}
> +EXPORT_SYMBOL_GPL(hv_result_to_errno);
> +
> +const char *hv_result_to_string(u64 status)
> +{
> +	return find_hv_status_info(status)->string;
> +}
> +EXPORT_SYMBOL_GPL(hv_result_to_string);
> 
> It could be even more compact by exporting find_hv_status_info() and
> letting  hv_result_to_errno() and hv_result_to_string() be #defines to
> find_hv_status_info()->errno and find_hv_status_info()->string,
> respectively.
> 
> Note that in struct hv_status_info, the "code" field is defined as "int"
> instead of "u16" so that it can contain sentinel values -1 and -2 that
> won't overlap with HV_STATUS_* values.
> 
Played around with this some more.

I like your idea of making it more compact by dealing with U64_MAX and
unknown in find_hv_status_info(), however I'm not as keen on putting
these cases in the array and iterating over the whole array when they
could just be static constants or inline struct initializers. See below.

I also like the idea of making hv_result_to_*() functions into simple
macros and exporting find_hv_status_info(). However, if it gets used
elsewhere it makes more sense if the returned hv_status_info for the
"Unknown" case contains the actual status code instead of replacing
that information with -2, so then I'd want to return it by value
instead of pointer:

+static const struct hv_status_info hv_status_infos[] = {
+#define _STATUS_INFO(status, errno) { #status, (status), (errno) }
+       _STATUS_INFO(HV_STATUS_SUCCESS,                         0),
<snip>
+       _STATUS_INFO(HV_STATUS_VTL_ALREADY_ENABLED,             -EIO),
+#undef _STATUS_INFO
+};
+
+struct hv_status_info hv_get_status_info(u64 hv_status)
+{
+       int i;
+       const struct hv_status_info *info;
+       u16 code = hv_result(hv_status);
+       struct hv_status_info ret = {"Unknown", code, -EIO};
+
+       if (hv_status == U64_MAX)
+               ret = (struct hv_status_info){"Hypercall page missing!", -1,
+                                             -EOPNOTSUPP};
+       else
+               for (i = 0; i < ARRAY_SIZE(hv_status_infos); ++i) {
+                       info = &hv_status_infos[i];
+                       if (info->code == code) {
+                               ret = *info;
+                               break;
+                       }
+               }
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(hv_get_status_info);

and in mshyperv.h:

+#define hv_result_to_string(hv_status) hv_get_status_info(hv_status).string
+#define hv_result_to_errno(hv_status) hv_get_status_info(hv_status).errno
+
+struct hv_status_info {
+       char *string;
+       int code;
+       int errno;
+};
+
+struct hv_status_info hv_get_status_info(u64 hv_status);

Note also I switched the order of code and errno in hv_status_info,
mainly because I think the struct initializers for "Unknown" and
"Hypercall page missing!" are more readable with that order:
{string, code, errno}

Do you see any problems with the above?

> Anyway, just a suggestion. The current code works from what I can
> see.
Thanks, it's not a bad idea at all to make it as compact and readable
as possible on the first try, but not a big loss either way.

Thanks
Nuno

> 
> Michael



^ permalink raw reply

* Re: [PATCH net-next v2] net: airoha: Validate egress gdm port in airoha_ppe_foe_entry_prepare()
From: Lorenzo Bianconi @ 2025-03-21 18:30 UTC (permalink / raw)
  To: Paolo Abeni
  Cc: Lorenzo Bianconi, Andrew Lunn, Andrew Lunn, David S. Miller,
	Eric Dumazet, Jakub Kicinski, linux-arm-kernel, linux-mediatek,
	netdev
In-Reply-To: <2d39033d-0303-48d0-98d3-49d63fba5563@redhat.com>

[-- Attachment #1: Type: text/plain, Size: 2262 bytes --]

> On 3/15/25 3:59 PM, Lorenzo Bianconi wrote:
> >>> Fix the issue validating egress gdm port in airoha_ppe_foe_entry_prepare
> >>> routine.
> >>
> >> A more interesting question is, why do you see an invalid port? Is the
> >> hardware broken? Something not correctly configured? Are you just
> >> papering over the crack?
> >>
> >>> -static int airoha_ppe_foe_entry_prepare(struct airoha_foe_entry *hwe,
> >>> +static int airoha_ppe_foe_entry_prepare(struct airoha_eth *eth,
> >>> +					struct airoha_foe_entry *hwe,
> >>>  					struct net_device *dev, int type,
> >>>  					struct airoha_flow_data *data,
> >>>  					int l4proto)
> >>> @@ -224,6 +225,11 @@ static int airoha_ppe_foe_entry_prepare(struct airoha_foe_entry *hwe,
> >>>  	if (dev) {
> >>>  		struct airoha_gdm_port *port = netdev_priv(dev);
> >>
> >> If port is invalid, is dev also invalid? And if dev is invalid, could
> >> dereferencing it to get priv cause an opps?
> > 
> > I do not think this is a hw problem. Running bidirectional high load traffic,
> > I got the sporadic crash reported above. In particular, netfilter runs
> > airoha_ppe_flow_offload_replace() providing the egress net_device pointer used
> > in airoha_ppe_foe_entry_prepare(). Debugging with gdb, I discovered the system
> > crashes dereferencing port pointer in airoha_ppe_foe_entry_prepare() (even if
> > dev pointer is not NULL). Adding this sanity check makes the system stable.
> > Please note a similar check is available even in mtk driver [0].
> 
> I agree with Andrew, you need a better understanding of the root cause.
> This really looks like papering over some deeper issue.
> 
> AFAICS 'dev' is fetched from the airoha driver itself a few lines
> before. Possibly you should double check that code.

Are you referring to airoha_get_dsa_port() routine?
I think dev pointer in airoha_ppe_foe_entry_prepare() is not strictly
necessary a device from a driver itself since it is an egress device
and the flowtable can contain even a wlan or a vlan device. In this
case airoha_get_dsa_port() will just return the original device pointer
and we can't assume priv pointer points to a airoha_gdm_port struct.
Agree?

Regards,
Lorenzo

> 
> Thanks,
> 
> Paolo
> 
> 

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

^ permalink raw reply

* [soc:soc/drivers 24/26] drivers/platform/cznic/turris-signing-key.c:25:50: error: parameter name omitted
From: kernel test robot @ 2025-03-21 18:30 UTC (permalink / raw)
  To: Marek Behún; +Cc: oe-kbuild-all, linux-arm-kernel, arm, Arnd Bergmann

Hi Marek,

First bad commit (maybe != root cause):

tree:   https://git.kernel.org/pub/scm/linux/kernel/git/soc/soc.git soc/drivers
head:   cf2ae5e5b68b06b4c8bceb42d46fa311e6db9a60
commit: ba8755ab541fc629948233125db870d4dbf00a75 [24/26] firmware: turris-mox-rwtm: Add support for ECDSA signatures with HW private key
config: arm-randconfig-002-20250322 (https://download.01.org/0day-ci/archive/20250322/202503220206.LHWsolYc-lkp@intel.com/config)
compiler: arm-linux-gnueabi-gcc (GCC) 7.5.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250322/202503220206.LHWsolYc-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202503220206.LHWsolYc-lkp@intel.com/

Note: the soc/soc/drivers HEAD cf2ae5e5b68b06b4c8bceb42d46fa311e6db9a60 builds fine.
      It only hurts bisectability.

All errors (new ones prefixed by >>):

   drivers/platform/cznic/turris-signing-key.c: In function 'turris_signing_key_instantiate':
>> drivers/platform/cznic/turris-signing-key.c:25:50: error: parameter name omitted
    static int turris_signing_key_instantiate(struct key *, struct key_preparsed_payload *)
                                                     ^~~
   drivers/platform/cznic/turris-signing-key.c:25:64: error: parameter name omitted
    static int turris_signing_key_instantiate(struct key *, struct key_preparsed_payload *)
                                                                   ^~~~~~~~~~~~~~~~~~~~~


vim +25 drivers/platform/cznic/turris-signing-key.c

0b28b7080ef5a32 Marek Behún 2025-02-04  24  
0b28b7080ef5a32 Marek Behún 2025-02-04 @25  static int turris_signing_key_instantiate(struct key *, struct key_preparsed_payload *)
0b28b7080ef5a32 Marek Behún 2025-02-04  26  {
0b28b7080ef5a32 Marek Behún 2025-02-04  27  	return 0;
0b28b7080ef5a32 Marek Behún 2025-02-04  28  }
0b28b7080ef5a32 Marek Behún 2025-02-04  29  

:::::: The code at line 25 was first introduced by commit
:::::: 0b28b7080ef5a323c3afa3860c3d45d627629830 platform: cznic: Add keyctl helpers for Turris platform

:::::: TO: Marek Behún <kabel@kernel.org>
:::::: CC: Arnd Bergmann <arnd@arndb.de>

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki


^ permalink raw reply

* Re: [PATCH 0/2] perf vendor events arm64: AmpereOne/AmpereOneX: Add a new errata and fix metrics calculations
From: Namhyung Kim @ 2025-03-21 18:30 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo, Ian Rogers, John Garry, Will Deacon,
	James Clark, Mike Leach, Leo Yan, Peter Zijlstra, Ingo Molnar,
	Alexander Shishkin, Jiri Olsa, Adrian Hunter, Liang, Kan,
	Mark Rutland, Ilkka Koskinen
  Cc: linux-arm-kernel, linux-perf-users, linux-kernel
In-Reply-To: <20250313201559.11332-1-ilkka@os.amperecomputing.com>

On Thu, 13 Mar 2025 20:15:57 +0000, Ilkka Koskinen wrote:
> Add an errata reference to a impacted event and fix metrics calculation
> caused by a scaling issue.
> 
> Ilkka Koskinen (2):
>   perf vendor events arm64: AmpereOne/AmpereOneX: Mark LD_RETIRED
>     impacted by errata
>   perf vendor events arm64 AmpereOneX: Fix frontend_bound calculation
> 
> [...]
Applied to perf-tools-next, thanks!

Best regards,
Namhyung




^ permalink raw reply

* Re: [PATCH v7 00/14] perf: Support multiple system call tables in the build
From: Namhyung Kim @ 2025-03-21 18:30 UTC (permalink / raw)
  To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Mark Rutland, Alexander Shishkin, Jiri Olsa, Adrian Hunter,
	Kan Liang, John Garry, Will Deacon, James Clark, Mike Leach,
	Leo Yan, guoren, Paul Walmsley, Palmer Dabbelt, Albert Ou,
	Charlie Jenkins, Bibo Mao, Huacai Chen, Catalin Marinas,
	Jiri Slaby, Björn Töpel, Howard Chu, linux-kernel,
	linux-perf-users, linux-arm-kernel, linux-csky, linux-riscv,
	Arnd Bergmann, Ian Rogers
In-Reply-To: <20250319050741.269828-1-irogers@google.com>

On Tue, 18 Mar 2025 22:07:27 -0700, Ian Rogers wrote:
> This work builds on the clean up of system call tables and removal of
> libaudit by Charlie Jenkins <charlie@rivosinc.com>.
> 
> The system call table in perf trace is used to map system call numbers
> to names and vice versa. Prior to these changes, a single table
> matching the perf binary's build was present. The table would be
> incorrect if tracing say a 32-bit binary from a 64-bit version of
> perf, the names and numbers wouldn't match.
> 
> [...]
Applied to perf-tools-next, thanks!

Best regards,
Namhyung




^ permalink raw reply

* Re: [PATCH v8 2/5] drm/bridge: add support for refcounting
From: Luca Ceresoli @ 2025-03-21 18:21 UTC (permalink / raw)
  To: Maxime Ripard
  Cc: dri-devel, imx, linux-arm-kernel, linux-kernel, Andrzej Hajda,
	Anusha Srivatsa, David Airlie, Dmitry Baryshkov, Fabio Estevam,
	Hervé Codina, Hui Pu, Inki Dae, Jagan Teki, Jernej Skrabec,
	Jonas Karlman, Laurent Pinchart, Maarten Lankhorst,
	Marek Szyprowski, Marek Vasut, Neil Armstrong, Paul Kocialkowski,
	Pengutronix Kernel Team, Robert Foss, Sascha Hauer, Shawn Guo,
	Simona Vetter, Stefan Agner, Thomas Petazzoni, Thomas Zimmermann
In-Reply-To: <400466cd3c229ea6c6cb25e2a58cee27@kernel.org>

Hello Maxime,

On Fri, 21 Mar 2025 09:54:55 +0000
"Maxime Ripard" <mripard@kernel.org> wrote:

> On Thu, 20 Mar 2025 16:42:11 +0100, Luca Ceresoli wrote:
> > DRM bridges are currently considered as a fixed element of a DRM card, and
> > thus their lifetime is assumed to extend for as long as the card
> > exists. New use cases, such as hot-pluggable hardware with video bridges,
> > require DRM bridges to be added to and removed from a DRM card without
> > tearing the card down. This is possible for connectors already (used by DP
> > 
> > [ ... ]  
> 
> Reviewed-by: Maxime Ripard <mripard@kernel.org>

Thanks a lot for reviewing!

I noticed you haven't replied on patch 3. Being a change you had
suggested, I was wondering whether haven't noticed that. If you are OK
with that patch, the entire series would have a R-by, which would be
great to unlock all the work depending on this series.

Luca

-- 
Luca Ceresoli, Bootlin
Embedded Linux and Kernel engineering
https://bootlin.com


^ permalink raw reply

* Re: [PATCH net-next v2] net: airoha: Validate egress gdm port in airoha_ppe_foe_entry_prepare()
From: Paolo Abeni @ 2025-03-21 18:18 UTC (permalink / raw)
  To: Lorenzo Bianconi, Andrew Lunn
  Cc: Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
	linux-arm-kernel, linux-mediatek, netdev
In-Reply-To: <Z9WV4mNwG04JRbZg@lore-desk>

On 3/15/25 3:59 PM, Lorenzo Bianconi wrote:
>>> Fix the issue validating egress gdm port in airoha_ppe_foe_entry_prepare
>>> routine.
>>
>> A more interesting question is, why do you see an invalid port? Is the
>> hardware broken? Something not correctly configured? Are you just
>> papering over the crack?
>>
>>> -static int airoha_ppe_foe_entry_prepare(struct airoha_foe_entry *hwe,
>>> +static int airoha_ppe_foe_entry_prepare(struct airoha_eth *eth,
>>> +					struct airoha_foe_entry *hwe,
>>>  					struct net_device *dev, int type,
>>>  					struct airoha_flow_data *data,
>>>  					int l4proto)
>>> @@ -224,6 +225,11 @@ static int airoha_ppe_foe_entry_prepare(struct airoha_foe_entry *hwe,
>>>  	if (dev) {
>>>  		struct airoha_gdm_port *port = netdev_priv(dev);
>>
>> If port is invalid, is dev also invalid? And if dev is invalid, could
>> dereferencing it to get priv cause an opps?
> 
> I do not think this is a hw problem. Running bidirectional high load traffic,
> I got the sporadic crash reported above. In particular, netfilter runs
> airoha_ppe_flow_offload_replace() providing the egress net_device pointer used
> in airoha_ppe_foe_entry_prepare(). Debugging with gdb, I discovered the system
> crashes dereferencing port pointer in airoha_ppe_foe_entry_prepare() (even if
> dev pointer is not NULL). Adding this sanity check makes the system stable.
> Please note a similar check is available even in mtk driver [0].

I agree with Andrew, you need a better understanding of the root cause.
This really looks like papering over some deeper issue.

AFAICS 'dev' is fetched from the airoha driver itself a few lines
before. Possibly you should double check that code.

Thanks,

Paolo



^ permalink raw reply

* Re: [PATCH v3 1/4] clk: add a clk_hw helpers to get the clock device or device_node
From: Jerome Brunet @ 2025-03-21 17:53 UTC (permalink / raw)
  To: Stephen Boyd
  Cc: Kevin Hilman, Martin Blumenstingl, Michael Turquette,
	Neil Armstrong, linux-clk, linux-kernel, linux-amlogic,
	linux-arm-kernel
In-Reply-To: <508a5ee6c6b365e8d9cdefd5a9eec769.sboyd@kernel.org>

On Wed 26 Feb 2025 at 17:01, Stephen Boyd <sboyd@kernel.org> wrote:


>> +static void clk_hw_get_of_node_test(struct kunit *test)
>> +{
>> +       struct device_node *np;
>> +       struct clk_hw *hw;
>> +
>> +       hw = kunit_kzalloc(test, sizeof(*hw), GFP_KERNEL);
>> +       KUNIT_ASSERT_NOT_ERR_OR_NULL(test, hw);
>> +
>> +       np = of_find_compatible_node(NULL, NULL, "test,clk-dummy-device");
>> +       hw->init = CLK_HW_INIT_NO_PARENT("test_get_of_node",
>> +                                        &clk_dummy_rate_ops, 0);
>> +       of_node_put_kunit(test, np);
>> +
>> +       KUNIT_ASSERT_EQ(test, 0, of_clk_hw_register_kunit(test, np, hw));
>
> The stuff before the expectation should likely go to the init function.
> Or it can use the genparams stuff so we can set some struct members to
> indicate if the pointer should be NULL or not and then twist through the
> code a couple times.
>

I'm trying to address all your comments but I'm starting to wonder if
this isn't going a bit too far ? The functions tested are one line
returns. Is it really worth all this ?

I do understand the idea for things that actually do something, such as
reparenting, setting rates or what not ... But this ? It feels like a
lot of test code for very little added value, don't you think ?

-- 
Jerome


^ permalink raw reply

* [soc:for-next] BUILD SUCCESS f7dd075de68de8a2536e50c2de434d1c12366155
From: kernel test robot @ 2025-03-21 17:50 UTC (permalink / raw)
  To: Arnd Bergmann; +Cc: linux-arm-kernel, arm

tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/soc/soc.git for-next
branch HEAD: f7dd075de68de8a2536e50c2de434d1c12366155  Merge branch 'soc/drivers' into for-next

elapsed time: 1446m

configs tested: 92
configs skipped: 2

The following configs have been built successfully.
More configs may be tested in the coming days.

tested configs:
alpha                            allyesconfig    gcc-14.2.0
arc                              allmodconfig    gcc-14.2.0
arc                              allyesconfig    gcc-14.2.0
arc                   randconfig-001-20250321    gcc-13.3.0
arc                   randconfig-002-20250321    gcc-11.5.0
arm                              allmodconfig    gcc-14.2.0
arm                              allyesconfig    gcc-14.2.0
arm                   randconfig-001-20250321    clang-19
arm                   randconfig-002-20250321    gcc-9.3.0
arm                   randconfig-003-20250321    gcc-5.5.0
arm                   randconfig-004-20250321    clang-21
arm64                            allmodconfig    clang-19
arm64                 randconfig-001-20250321    gcc-5.5.0
arm64                 randconfig-002-20250321    gcc-5.5.0
arm64                 randconfig-003-20250321    clang-20
arm64                 randconfig-004-20250321    clang-21
csky                  randconfig-001-20250321    gcc-13.3.0
csky                  randconfig-002-20250321    gcc-13.3.0
hexagon                          allmodconfig    clang-17
hexagon                          allyesconfig    clang-21
hexagon               randconfig-001-20250321    clang-21
hexagon               randconfig-002-20250321    clang-16
i386                             allmodconfig    gcc-12
i386                              allnoconfig    gcc-12
i386                             allyesconfig    gcc-12
i386        buildonly-randconfig-001-20250321    clang-20
i386        buildonly-randconfig-002-20250321    clang-20
i386        buildonly-randconfig-003-20250321    clang-20
i386        buildonly-randconfig-004-20250321    clang-20
i386        buildonly-randconfig-005-20250321    clang-20
i386        buildonly-randconfig-006-20250321    clang-20
i386                                defconfig    clang-20
loongarch                        allmodconfig    gcc-14.2.0
loongarch             randconfig-001-20250321    gcc-14.2.0
loongarch             randconfig-002-20250321    gcc-14.2.0
m68k                             allmodconfig    gcc-14.2.0
m68k                              allnoconfig    gcc-14.2.0
m68k                             allyesconfig    gcc-14.2.0
microblaze                       allmodconfig    gcc-14.2.0
microblaze                        allnoconfig    gcc-14.2.0
microblaze                       allyesconfig    gcc-14.2.0
mips                              allnoconfig    gcc-14.2.0
nios2                             allnoconfig    gcc-14.2.0
nios2                 randconfig-001-20250321    gcc-13.3.0
nios2                 randconfig-002-20250321    gcc-7.5.0
openrisc                         allyesconfig    gcc-14.2.0
parisc                           allmodconfig    gcc-14.2.0
parisc                           allyesconfig    gcc-14.2.0
parisc                randconfig-001-20250321    gcc-8.5.0
parisc                randconfig-002-20250321    gcc-6.5.0
powerpc                          allmodconfig    gcc-14.2.0
powerpc                          allyesconfig    clang-21
powerpc               randconfig-001-20250321    clang-21
powerpc               randconfig-002-20250321    gcc-7.5.0
powerpc               randconfig-003-20250321    gcc-7.5.0
powerpc64             randconfig-001-20250321    gcc-5.5.0
powerpc64             randconfig-002-20250321    clang-16
powerpc64             randconfig-003-20250321    gcc-7.5.0
riscv                            allmodconfig    clang-21
riscv                            allyesconfig    clang-16
riscv                 randconfig-001-20250321    clang-21
riscv                 randconfig-002-20250321    clang-21
s390                             allyesconfig    gcc-14.2.0
s390                  randconfig-001-20250321    clang-16
s390                  randconfig-002-20250321    gcc-8.5.0
sh                               allmodconfig    gcc-14.2.0
sh                                allnoconfig    gcc-14.2.0
sh                               allyesconfig    gcc-14.2.0
sh                    randconfig-001-20250321    gcc-7.5.0
sh                    randconfig-002-20250321    gcc-7.5.0
sparc                            allmodconfig    gcc-14.2.0
sparc                             allnoconfig    gcc-14.2.0
sparc                 randconfig-001-20250321    gcc-12.4.0
sparc                 randconfig-002-20250321    gcc-6.5.0
sparc64               randconfig-001-20250321    gcc-10.5.0
sparc64               randconfig-002-20250321    gcc-6.5.0
um                               allmodconfig    clang-19
um                               allyesconfig    gcc-12
um                    randconfig-001-20250321    gcc-12
um                    randconfig-002-20250321    gcc-12
x86_64                            allnoconfig    clang-20
x86_64                           allyesconfig    clang-20
x86_64      buildonly-randconfig-001-20250321    clang-20
x86_64      buildonly-randconfig-002-20250321    clang-20
x86_64      buildonly-randconfig-003-20250321    gcc-12
x86_64      buildonly-randconfig-004-20250321    clang-20
x86_64      buildonly-randconfig-005-20250321    clang-20
x86_64      buildonly-randconfig-006-20250321    clang-20
x86_64                              defconfig    gcc-11
xtensa                            allnoconfig    gcc-14.2.0
xtensa                randconfig-001-20250321    gcc-6.5.0
xtensa                randconfig-002-20250321    gcc-10.5.0

--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki


^ permalink raw reply

* Re: [PATCH v5 0/9] Merge arm64/riscv hugetlbfs contpte support
From: Christophe Leroy @ 2025-03-21 17:24 UTC (permalink / raw)
  To: Alexandre Ghiti, Catalin Marinas, Will Deacon, Ryan Roberts,
	Mark Rutland, Matthew Wilcox, Paul Walmsley, Palmer Dabbelt,
	Alexandre Ghiti, Andrew Morton, linux-arm-kernel, linux-kernel,
	linux-riscv, linux-mm
In-Reply-To: <20250321130635.227011-1-alexghiti@rivosinc.com>



Le 21/03/2025 à 14:06, Alexandre Ghiti a écrit :
> This patchset intends to merge the contiguous ptes hugetlbfs implementation
> of arm64 and riscv.

Can we also add powerpc in the dance ?

powerpc also use contiguous PTEs allthough there is not (yet) a special 
name for it:
- b250c8c08c79 powerpc/8xx: Manage 512k huge pages as standard pages
- e47168f3d1b1 powerpc/8xx: Support 16k hugepages with 4k pages

powerpc also use configuous PMDs/PUDs for larger hugepages:
- 57fb15c32f4f ("powerpc/64s: use contiguous PMD/PUD instead of HUGEPD")
- 7c44202e3609 ("powerpc/e500: use contiguous PMD instead of hugepd")
- 0549e7666373 ("powerpc/8xx: rework support for 8M pages using 
contiguous PTE entries")

Christophe

> 
> Both arm64 and riscv support the use of contiguous ptes to map pages that
> are larger than the default page table size, respectively called contpte
> and svnapot.
> 
> The riscv implementation differs from the arm64's in that the LSBs of the
> pfn of a svnapot pte are used to store the size of the mapping, allowing
> for future sizes to be added (for now only 64KB is supported). That's an
> issue for the core mm code which expects to find the *real* pfn a pte points
> to. Patch 1 fixes that by always returning svnapot ptes with the real pfn
> and restores the size of the mapping when it is written to a page table.
> 
> The following patches are just merges of the 2 different implementations
> that currently exist in arm64 and riscv which are very similar. It paves
> the way to the reuse of the recent contpte THP work by Ryan [1] to avoid
> reimplementing the same in riscv.
> 
> This patchset was tested by running the libhugetlbfs testsuite with 64KB
> and 2MB pages on both architectures (on a 4KB base page size arm64 kernel).
> 
> [1] https://lore.kernel.org/linux-arm-kernel/20240215103205.2607016-1-ryan.roberts@arm.com/
> 
> v4: https://lore.kernel.org/linux-riscv/20250127093530.19548-1-alexghiti@rivosinc.com/
> v3: https://lore.kernel.org/all/20240802151430.99114-1-alexghiti@rivosinc.com/
> v2: https://lore.kernel.org/linux-riscv/20240508113419.18620-1-alexghiti@rivosinc.com/
> v1: https://lore.kernel.org/linux-riscv/20240301091455.246686-1-alexghiti@rivosinc.com/
> 
> Changes in v5:
>    - Fix "int i" unused variable in patch 2 (as reported by PW)
>    - Fix !svnapot build
>    - Fix arch_make_huge_pte() which returned a real napot pte
>    - Make __ptep_get(), ptep_get_and_clear() and __set_ptes() napot aware to
>      avoid leaking real napot pfns to core mm
>    - Fix arch_contpte_get_num_contig() that used to always try to get the
>      mapping size from the ptep, which does not work if the ptep comes the core mm
>    - Rebase on top of 6.14-rc7 + fix for
>      huge_ptep_get_and_clear()/huge_pte_clear()
>      https://lore.kernel.org/linux-riscv/20250317072551.572169-1-alexghiti@rivosinc.com/
> 
> Changes in v4:
>    - Rebase on top of 6.13
> 
> Changes in v3:
>    - Split set_ptes and ptep_get into internal and external API (Ryan)
>    - Rename ARCH_HAS_CONTPTE into ARCH_WANT_GENERAL_HUGETLB_CONTPTE so that
>      we split hugetlb functions from contpte functions (actually riscv contpte
>      functions to support THP will come into another series) (Ryan)
>    - Rebase on top of 6.11-rc1
> 
> Changes in v2:
>    - Rebase on top of 6.9-rc3
> 
> Alexandre Ghiti (9):
>    riscv: Safely remove huge_pte_offset() when manipulating NAPOT ptes
>    riscv: Restore the pfn in a NAPOT pte when manipulated by core mm code
>    mm: Use common huge_ptep_get() function for riscv/arm64
>    mm: Use common set_huge_pte_at() function for riscv/arm64
>    mm: Use common huge_pte_clear() function for riscv/arm64
>    mm: Use common huge_ptep_get_and_clear() function for riscv/arm64
>    mm: Use common huge_ptep_set_access_flags() function for riscv/arm64
>    mm: Use common huge_ptep_set_wrprotect() function for riscv/arm64
>    mm: Use common huge_ptep_clear_flush() function for riscv/arm64
> 
>   arch/arm64/Kconfig                  |   1 +
>   arch/arm64/include/asm/hugetlb.h    |  22 +--
>   arch/arm64/include/asm/pgtable.h    |  68 ++++++-
>   arch/arm64/mm/hugetlbpage.c         | 294 +---------------------------
>   arch/riscv/Kconfig                  |   1 +
>   arch/riscv/include/asm/hugetlb.h    |  36 +---
>   arch/riscv/include/asm/pgtable-64.h |  11 ++
>   arch/riscv/include/asm/pgtable.h    | 222 ++++++++++++++++++---
>   arch/riscv/mm/hugetlbpage.c         | 243 +----------------------
>   arch/riscv/mm/pgtable.c             |   6 +-
>   include/linux/hugetlb_contpte.h     |  39 ++++
>   mm/Kconfig                          |   3 +
>   mm/Makefile                         |   1 +
>   mm/hugetlb_contpte.c                | 258 ++++++++++++++++++++++++
>   14 files changed, 583 insertions(+), 622 deletions(-)
>   create mode 100644 include/linux/hugetlb_contpte.h
>   create mode 100644 mm/hugetlb_contpte.c
> 



^ permalink raw reply

* Re: [PATCH] net: airoha: fix CONFIG_DEBUG_FS check
From: patchwork-bot+netdevbpf @ 2025-03-21 17:50 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: lorenzo, andrew+netdev, davem, edumazet, kuba, pabeni, arnd,
	horms, ansuelsmth, linux-arm-kernel, linux-mediatek, netdev,
	linux-kernel
In-Reply-To: <20250314155009.4114308-1-arnd@kernel.org>

Hello:

This patch was applied to netdev/net-next.git (main)
by Paolo Abeni <pabeni@redhat.com>:

On Fri, 14 Mar 2025 16:49:59 +0100 you wrote:
> From: Arnd Bergmann <arnd@arndb.de>
> 
> The #if check causes a build failure when CONFIG_DEBUG_FS is turned
> off:
> 
> In file included from drivers/net/ethernet/airoha/airoha_eth.c:17:
> drivers/net/ethernet/airoha/airoha_eth.h:543:5: error: "CONFIG_DEBUG_FS" is not defined, evaluates to 0 [-Werror=undef]
>   543 | #if CONFIG_DEBUG_FS
>       |     ^~~~~~~~~~~~~~~
> 
> [...]

Here is the summary with links:
  - net: airoha: fix CONFIG_DEBUG_FS check
    https://git.kernel.org/netdev/net-next/c/08d0185e36ad

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html




^ permalink raw reply

* Re: [PATCH net-next v3 1/7] net: ethtool: netlink: Allow per-netdevice DUMP operations
From: Maxime Chevallier @ 2025-03-21 17:41 UTC (permalink / raw)
  To: Paolo Abeni
  Cc: davem, Andrew Lunn, Jakub Kicinski, Eric Dumazet, Heiner Kallweit,
	netdev, linux-kernel, thomas.petazzoni, linux-arm-kernel,
	Christophe Leroy, Herve Codina, Florian Fainelli, Russell King,
	Vladimir Oltean, Köry Maincent, Oleksij Rempel, Simon Horman,
	Romain Gantois, Piergiorgio Beruto
In-Reply-To: <c4e5bd2f-6216-4f74-b677-46c79343eb21@redhat.com>

On Fri, 21 Mar 2025 17:31:17 +0100
Paolo Abeni <pabeni@redhat.com> wrote:

> On 3/13/25 7:26 PM, Maxime Chevallier wrote:
> > We have a number of netlink commands in the ethnl family that may have
> > multiple objects to dump even for a single net_device, including :
> > 
> >  - PLCA, PSE-PD, phy: one message per PHY device
> >  - tsinfo: one message per timestamp source (netdev + phys)
> >  - rss: One per RSS context
> > 
> > To get this behaviour, these netlink commands need to roll a custom  
> > ->dumpit().  
> > 
> > To prepare making per-netdev DUMP more generic in ethnl, introduce a
> > member in the ethnl ops to indicate if a given command may allow
> > pernetdev DUMPs (also referred to as filtered DUMPs).
> > 
> > Signed-off-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
> > ---
> >  net/ethtool/netlink.c | 45 +++++++++++++++++++++++++++++--------------
> >  net/ethtool/netlink.h |  2 ++
> >  2 files changed, 33 insertions(+), 14 deletions(-)
> > 
> > diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
> > index a163d40c6431..7adede5e4ff1 100644
> > --- a/net/ethtool/netlink.c
> > +++ b/net/ethtool/netlink.c
> > @@ -587,21 +587,38 @@ static int ethnl_default_dumpit(struct sk_buff *skb,
> >  	int ret = 0;
> >  
> >  	rcu_read_lock();  
> 
> Maintain the RCU read lock here is IMHO confusing...
> 
> > -	for_each_netdev_dump(net, dev, ctx->pos_ifindex) {
> > -		dev_hold(dev);
> > +	if (ctx->req_info->dev) {
> > +		dev = ctx->req_info->dev;  
> 
> .. as this is refcounted.
> 
> I suggest to move the rcu_read_lock inside the if.

Indeed, maybe not the best place indeed. I'll address that, thanks for
pointing this out

> 
> >  		rcu_read_unlock();
> > +		/* Filtered DUMP request targeted to a single netdev. We already
> > +		 * hold a ref to the netdev from ->start()
> > +		 */
> > +		ret = ethnl_default_dump_one(skb, dev, ctx,
> > +					     genl_info_dump(cb));
> > +		rcu_read_lock();
> > +		netdev_put(ctx->req_info->dev, &ctx->req_info->dev_tracker);
> >  
> > -		ret = ethnl_default_dump_one(skb, dev, ctx, genl_info_dump(cb));
> > +		if (ret < 0 && ret != -EOPNOTSUPP && likely(skb->len))
> > +			ret = skb->len;
> >  
> > -		rcu_read_lock();
> > -		dev_put(dev);
> > +	} else {
> > +		for_each_netdev_dump(net, dev, ctx->pos_ifindex) {
> > +			dev_hold(dev);
> > +			rcu_read_unlock();
> > +
> > +			ret = ethnl_default_dump_one(skb, dev, ctx,
> > +						     genl_info_dump(cb));
> > +
> > +			rcu_read_lock();
> > +			dev_put(dev);
> >  
> > -		if (ret < 0 && ret != -EOPNOTSUPP) {
> > -			if (likely(skb->len))
> > -				ret = skb->len;
> > -			break;
> > +			if (ret < 0 && ret != -EOPNOTSUPP) {
> > +				if (likely(skb->len))
> > +					ret = skb->len;  
> 
> IMHO a bit too many levels of indentation. It's possibly better to move
> this code in a separate helper.

That's true, not the prettiest piece of that patch. I'll refactor this
better then.

Thanks for the review,

Maxime


^ permalink raw reply

* Re: [PATCH] wifi: mt76: mt7996: prevent uninit return in mt7996_mac_sta_add_links
From: Dan Carpenter @ 2025-03-21 17:35 UTC (permalink / raw)
  To: Lorenzo Bianconi
  Cc: Qasim Ijaz, nbd, ryder.lee, shayne.chen, sean.wang, matthias.bgg,
	angelogioacchino.delregno, chui-hao.chiu, Bo.Jiao, linux-wireless,
	linux-kernel, linux-arm-kernel, linux-mediatek
In-Reply-To: <Z92eiVIFIUOFKXj_@lore-desk>

On Fri, Mar 21, 2025 at 06:14:49PM +0100, Lorenzo Bianconi wrote:
> > On Thu, Mar 20, 2025 at 08:19:14PM +0000, Qasim Ijaz wrote:
> > > If link_conf_dereference_protected() or mt7996_vif_link() 
> > > or link_sta_dereference_protected() fail the code jumps to
> > > the error_unlink label and returns ret which is uninitialised.
> > > 
> > > Fix this by setting err before jumping to error_unlink.
> > > 
> > > Fixes: c7e4fc362443 ("wifi: mt76: mt7996: Update mt7996_mcu_add_sta to MLO support")
> > > Fixes: dd82a9e02c05 ("wifi: mt76: mt7996: Rely on mt7996_sta_link in sta_add/sta_remove callbacks")
> > > Signed-off-by: Qasim Ijaz <qasdev00@gmail.com>
> > > ---
> > >  drivers/net/wireless/mediatek/mt76/mt7996/main.c | 12 +++++++++---
> > >  1 file changed, 9 insertions(+), 3 deletions(-)
> > > 
> > > diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/main.c b/drivers/net/wireless/mediatek/mt76/mt7996/main.c
> > > index 91c64e3a0860..78f7f1fc867e 100644
> > > --- a/drivers/net/wireless/mediatek/mt76/mt7996/main.c
> > > +++ b/drivers/net/wireless/mediatek/mt76/mt7996/main.c
> > > @@ -998,16 +998,22 @@ mt7996_mac_sta_add_links(struct mt7996_dev *dev, struct ieee80211_vif *vif,
> > >  			continue;
> > 
> > What about if the list is empty or we hit this continue on every link?
> 
> we will return 0 and I guess that's fine, agree?
> 

Fine by me?

regards,
dan carpenter



^ permalink raw reply

* Re: [PATCH v4 10/14] s390: Add support for suppressing warning backtraces
From: Arnd Bergmann @ 2025-03-21 17:33 UTC (permalink / raw)
  To: Guenter Roeck, Alessandro Carminati, linux-kselftest
  Cc: Dave Airlie, Maíra Canal, Dan Carpenter, Kees Cook,
	Daniel Díaz, David Gow, Arthur Grillo, Brendan Higgins,
	Naresh Kamboju, Maarten Lankhorst, Andrew Morton, Maxime Ripard,
	Ville Syrjälä, Daniel Vetter, Thomas Zimmermann,
	Alessandro Carminati, Jani Nikula, dri-devel, kunit-dev,
	Linux-Arch, linux-arm-kernel, linux-doc, linux-kernel,
	linux-parisc, linuxppc-dev, linux-riscv, linux-s390, linux-sh,
	loongarch, x86, Linaro Kernel Functional Testing, Heiko Carstens,
	Vasily Gorbik, Alexander Gordeev
In-Reply-To: <b6bb68f0-7e93-4db2-9fe6-f615f06ddeb1@roeck-us.net>

On Fri, Mar 21, 2025, at 18:05, Guenter Roeck wrote:
> On 3/13/25 04:43, Alessandro Carminati wrote:
>
> gcc 10.3.0 and later do not have this problem. I also tried s390 builds 
> with gcc 9.4
> and 9.5 but they both crash for unrelated reasons.
>
> If this is a concern, the best idea I have is to make KUNIT_SUPPRESS_BACKTRACE
> depend on, say,
> 	depends on CC_IS_CLANG || (CC_IS_GCC && GCC_VERSION >= 100300)
>
> A more complex solution might be to define an architecture flag such
> as HAVE_SUPPRESS_BACKTRACE, make that conditional on the gcc version
> for s390 only, and make KUNIT_SUPPRESS_BACKTRACE depend on it.

That is probably fine, there are very few users on s390 that build
their own kernels, and they likely all have modern compilers anyway.

I should still send a patch to raise the minimum compiler version to
gcc-8.1, but unfortunately that is not enough here.

     Arnd


^ permalink raw reply

* Re: [PATCH v1 2/3] arm64: dts: qcom: x1e80100: add bus topology for PCIe domain 3
From: Dmitry Baryshkov @ 2025-03-21 17:19 UTC (permalink / raw)
  To: Wenbin Yao
  Cc: andersson, konradybcio, robh, krzk+dt, conor+dt, linux-arm-msm,
	devicetree, linux-kernel, catalin.marinas, will, quic_qianyu, sfr,
	linux-arm-kernel
In-Reply-To: <20250320055502.274849-3-quic_wenbyao@quicinc.com>

On Thu, Mar 20, 2025 at 01:55:01PM +0800, Wenbin Yao wrote:
> From: Qiang Yu <quic_qianyu@quicinc.com>
> 
> Add pcie3port node to represent the PCIe bridge of PCIe3 so that PCI slot
> voltage rails can be described under this node in the board's dts.
> 
> Signed-off-by: Qiang Yu <quic_qianyu@quicinc.com>
> Signed-off-by: Wenbin Yao <quic_wenbyao@quicinc.com>
> ---
>  arch/arm64/boot/dts/qcom/x1e80100.dtsi | 10 ++++++++++
>  1 file changed, 10 insertions(+)
> 
> diff --git a/arch/arm64/boot/dts/qcom/x1e80100.dtsi b/arch/arm64/boot/dts/qcom/x1e80100.dtsi
> index 46b79fce9..32e8d400a 100644
> --- a/arch/arm64/boot/dts/qcom/x1e80100.dtsi
> +++ b/arch/arm64/boot/dts/qcom/x1e80100.dtsi
> @@ -3287,6 +3287,16 @@ opp-128000000 {
>  					opp-peak-kBps = <15753000 1>;
>  				};
>  			};
> +			pcie3port: pcie@0 {

Empty line between nodes, please.

> +				device_type = "pci";
> +				compatible = "pciclass,0604";
> +				reg = <0x0 0x0 0x0 0x0 0x0>;
> +				bus-range = <0x01 0xff>;
> +
> +				#address-cells = <3>;
> +				#size-cells = <2>;
> +				ranges;
> +			};
>  		};
>  
>  		pcie3_phy: phy@1be0000 {
> -- 
> 2.34.1
> 

-- 
With best wishes
Dmitry


^ permalink raw reply

* Re: [PATCH] wifi: mt76: mt7996: prevent uninit return in mt7996_mac_sta_add_links
From: Lorenzo Bianconi @ 2025-03-21 17:14 UTC (permalink / raw)
  To: Dan Carpenter
  Cc: Qasim Ijaz, nbd, ryder.lee, shayne.chen, sean.wang, matthias.bgg,
	angelogioacchino.delregno, chui-hao.chiu, Bo.Jiao, linux-wireless,
	linux-kernel, linux-arm-kernel, linux-mediatek
In-Reply-To: <46a714fb-8a14-4d24-a0a6-a22cc9d45768@stanley.mountain>

[-- Attachment #1: Type: text/plain, Size: 1976 bytes --]

> On Thu, Mar 20, 2025 at 08:19:14PM +0000, Qasim Ijaz wrote:
> > If link_conf_dereference_protected() or mt7996_vif_link() 
> > or link_sta_dereference_protected() fail the code jumps to
> > the error_unlink label and returns ret which is uninitialised.
> > 
> > Fix this by setting err before jumping to error_unlink.
> > 
> > Fixes: c7e4fc362443 ("wifi: mt76: mt7996: Update mt7996_mcu_add_sta to MLO support")
> > Fixes: dd82a9e02c05 ("wifi: mt76: mt7996: Rely on mt7996_sta_link in sta_add/sta_remove callbacks")
> > Signed-off-by: Qasim Ijaz <qasdev00@gmail.com>
> > ---
> >  drivers/net/wireless/mediatek/mt76/mt7996/main.c | 12 +++++++++---
> >  1 file changed, 9 insertions(+), 3 deletions(-)
> > 
> > diff --git a/drivers/net/wireless/mediatek/mt76/mt7996/main.c b/drivers/net/wireless/mediatek/mt76/mt7996/main.c
> > index 91c64e3a0860..78f7f1fc867e 100644
> > --- a/drivers/net/wireless/mediatek/mt76/mt7996/main.c
> > +++ b/drivers/net/wireless/mediatek/mt76/mt7996/main.c
> > @@ -998,16 +998,22 @@ mt7996_mac_sta_add_links(struct mt7996_dev *dev, struct ieee80211_vif *vif,
> >  			continue;
> 
> What about if the list is empty or we hit this continue on every link?

we will return 0 and I guess that's fine, agree?

Regards,
Lorenzo

> 
> regards,
> dan carpenter
> 
> >  
> >  		link_conf = link_conf_dereference_protected(vif, link_id);
> > -		if (!link_conf)
> > +		if (!link_conf) {
> > +			err = -EINVAL;
> >  			goto error_unlink;
> > +		}
> >  
> >  		link = mt7996_vif_link(dev, vif, link_id);
> > -		if (!link)
> > +		if (!link) {
> > +			err = -EINVAL;
> >  			goto error_unlink;
> > +		}
> >  
> >  		link_sta = link_sta_dereference_protected(sta, link_id);
> > -		if (!link_sta)
> > +		if (!link_sta) {
> > +			err = -EINVAL
> >  			goto error_unlink;
> > +		}
> >  
> >  		err = mt7996_mac_sta_init_link(dev, link_conf, link_sta, link,
> >  					       link_id);
> > -- 
> > 2.39.5
> > 

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

^ permalink raw reply

* Re: [PATCH next] phy: rockchip-samsung-dcphy: Add missing assignment
From: Heiko Stuebner @ 2025-03-21 17:08 UTC (permalink / raw)
  To: Heiko Stuebner, Dan Carpenter
  Cc: Vinod Koul, Kishon Vijay Abraham I, Sebastian Reichel, linux-phy,
	linux-arm-kernel, linux-rockchip, linux-kernel, kernel-janitors
In-Reply-To: <e64265a4-9543-4728-a49f-ea910fccef7c@stanley.mountain>

Hey Dan,

Am Freitag, 21. März 2025, 15:36:14 MEZ schrieb Dan Carpenter:
> The "ret = " was accidentally dropped so the error handling doesn't work.
> 
> Fixes: b2a1a2ae7818 ("phy: rockchip: Add Samsung MIPI D-/C-PHY driver")
> Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>

oh darn ... thanks so much for catching that

Reviewed-by: Heiko Stuebner <heiko@sntech.de>

> ---
>  drivers/phy/rockchip/phy-rockchip-samsung-dcphy.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/phy/rockchip/phy-rockchip-samsung-dcphy.c b/drivers/phy/rockchip/phy-rockchip-samsung-dcphy.c
> index 08c78c1bafc9..28a052e17366 100644
> --- a/drivers/phy/rockchip/phy-rockchip-samsung-dcphy.c
> +++ b/drivers/phy/rockchip/phy-rockchip-samsung-dcphy.c
> @@ -1653,7 +1653,7 @@ static __maybe_unused int samsung_mipi_dcphy_runtime_resume(struct device *dev)
>  		return ret;
>  	}
>  
> -	clk_prepare_enable(samsung->ref_clk);
> +	ret = clk_prepare_enable(samsung->ref_clk);
>  	if (ret) {
>  		dev_err(samsung->dev, "Failed to enable reference clock, %d\n", ret);
>  		clk_disable_unprepare(samsung->pclk);
> 






^ permalink raw reply

* Re: [PATCH v4 10/14] s390: Add support for suppressing warning backtraces
From: Guenter Roeck @ 2025-03-21 17:05 UTC (permalink / raw)
  To: Alessandro Carminati, linux-kselftest
  Cc: David Airlie, Arnd Bergmann, Maíra Canal, Dan Carpenter,
	Kees Cook, Daniel Diaz, David Gow, Arthur Grillo, Brendan Higgins,
	Naresh Kamboju, Maarten Lankhorst, Andrew Morton, Maxime Ripard,
	Ville Syrjälä, Daniel Vetter, Thomas Zimmermann,
	Alessandro Carminati, Jani Nikula, dri-devel, kunit-dev,
	linux-arch, linux-arm-kernel, linux-doc, linux-kernel,
	linux-parisc, linuxppc-dev, linux-riscv, linux-s390, linux-sh,
	loongarch, x86, Linux Kernel Functional Testing, Heiko Carstens,
	Vasily Gorbik, Alexander Gordeev
In-Reply-To: <20250313114329.284104-11-acarmina@redhat.com>

On 3/13/25 04:43, Alessandro Carminati wrote:
> From: Guenter Roeck <linux@roeck-us.net>
> 
> Add name of functions triggering warning backtraces to the __bug_table
> object section to enable support for suppressing WARNING backtraces.
> 
> To limit image size impact, the pointer to the function name is only added
> to the __bug_table section if both CONFIG_KUNIT_SUPPRESS_BACKTRACE and
> CONFIG_DEBUG_BUGVERBOSE are enabled. Otherwise, the __func__ assembly
> parameter is replaced with a (dummy) NULL parameter to avoid an image size
> increase due to unused __func__ entries (this is necessary because
> __func__ is not a define but a virtual variable).
> 
> Tested-by: Linux Kernel Functional Testing <lkft@linaro.org>
> Acked-by: Dan Carpenter <dan.carpenter@linaro.org>
> Cc: Heiko Carstens <hca@linux.ibm.com>
> Cc: Vasily Gorbik <gor@linux.ibm.com>
> Cc: Alexander Gordeev <agordeev@linux.ibm.com>
> Signed-off-by: Guenter Roeck <linux@roeck-us.net>
> Signed-off-by: Alessandro Carminati <acarmina@redhat.com>
> ---
>   arch/s390/include/asm/bug.h | 17 ++++++++++++++---
>   1 file changed, 14 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/s390/include/asm/bug.h b/arch/s390/include/asm/bug.h
> index c500d45fb465..44d4e9f24ae0 100644
> --- a/arch/s390/include/asm/bug.h
> +++ b/arch/s390/include/asm/bug.h
> @@ -8,6 +8,15 @@
>   
>   #ifdef CONFIG_DEBUG_BUGVERBOSE
>   
> +#ifdef CONFIG_KUNIT_SUPPRESS_BACKTRACE
> +# define HAVE_BUG_FUNCTION
> +# define __BUG_FUNC_PTR	"	.long	%0-.\n"
> +# define __BUG_FUNC	__func__

gcc 7.5.0 on s390 barfs; it doesn't like the use of "__func__" with "%0-."

drivers/gpu/drm/bridge/analogix/analogix-i2c-dptx.c: In function 'anx_dp_aux_transfer':
././include/linux/compiler_types.h:492:20: warning: asm operand 0 probably doesn't match constraints

I was unable to find an alternate constraint that the compiler would accept.

I don't know if the same problem is seen with older compilers on other architectures,
or if the problem is relevant in the first place.

gcc 10.3.0 and later do not have this problem. I also tried s390 builds with gcc 9.4
and 9.5 but they both crash for unrelated reasons.

If this is a concern, the best idea I have is to make KUNIT_SUPPRESS_BACKTRACE
depend on, say,
	depends on CC_IS_CLANG || (CC_IS_GCC && GCC_VERSION >= 100300)

A more complex solution might be to define an architecture flag such
as HAVE_SUPPRESS_BACKTRACE, make that conditional on the gcc version
for s390 only, and make KUNIT_SUPPRESS_BACKTRACE depend on it.

Guenter



^ permalink raw reply

* Re: [PATCH v4 3/8] bits: introduce fixed-type genmasks
From: Yury Norov @ 2025-03-21 17:05 UTC (permalink / raw)
  To: Anshuman Khandual
  Cc: linux-arm-kernel, Catalin Marinas, Lucas De Marchi,
	Vincent Mailhol, Rasmus Villemoes, Jani Nikula, Joonas Lahtinen,
	Rodrigo Vivi, Tvrtko Ursulin, David Airlie, Simona Vetter,
	Andrew Morton, linux-kernel, intel-gfx, dri-devel, Andi Shyti,
	David Laight, Dmitry Baryshkov, Andy Shevchenko, Jani Nikula
In-Reply-To: <1aba17f1-0cd2-429c-8338-28387ec16314@arm.com>

On Wed, Mar 19, 2025 at 09:43:06AM +0530, Anshuman Khandual wrote:
> 
> 
> On 3/19/25 09:04, Anshuman Khandual wrote:
> > On 3/19/25 07:16, Yury Norov wrote:
> >> + Catalin Marinas, ARM maillist
> >>
> >> Hi Catalin and everyone,
> > 
> > Hello Yury,
> > 
> >>
> >> Anshuman Khandual asked me to merge GENMASK_U128() saying it's
> >> important for ARM to stabilize API. While it's a dead code, I
> >> accepted his patch as he promised to add users shortly.
> >>
> >> Now it's more than half a year since that. There's no users,
> >> and no feedback from Anshuman.
> > 
> > My apologies to have missed your email earlier. Please find response
> > for the earlier email below as well.
> > 
> >>
> >> Can you please tell if you still need the macro? I don't want to
> >> undercut your development, but if you don't need 128-bit genmasks
> >> there's no reason to have a dead code in the uapi.
> > 
> > The code base specifically using GENMASK_U128() has not been posted
> > upstream (probably in next couple of months or so) till now, except
> > the following patch which has been not been merged and still under
> > review and development.
> > 
> > https://lore.kernel.org/lkml/20240801054436.612024-1-anshuman.khandual@arm.com/
> > 
> >>
> >> Thanks,
> >> Yury
> >>
> >> On Wed, Mar 05, 2025 at 10:22:47AM -0500, Yury Norov wrote:
> >>> + Anshuman Khandual <anshuman.khandual@arm.com>
> >>>
> >>> Anshuman,
> >>>
> >>> I merged your GENMASK_U128() because you said it's important for your
> >>> projects, and that it will get used in the kernel soon.
> >>>
> >>> Now it's in the kernel for more than 6 month, but no users were added.
> >>> Can you clarify if you still need it, and if so why it's not used?
> > 
> > We would need it but although the code using GENMASK_U128() has not been
> > posted upstream.
> > 
> >>>
> >>> As you see, people add another fixed-types GENMASK() macros, and their
> >>> implementation differ from GENMASK_U128().
> > 
> > I will take a look. Is GENMASK_U128() being problematic for the this new
> > scheme ?
> > 
> >>>
> >>> My second concern is that __GENMASK_U128() is declared in uapi, while
> >>> the general understanding for other fixed-type genmasks is that they
> >>> are not exported to users. Do you need this macro to be exported to
> >>> userspace? Can you show how and where it is used there?
> > 
> > No, not atleast right now.

Ok, thanks.

> > These were moved into uapi subsequently via the following commit.
> > 
> > 21a3a3d015aee ("tools headers: Synchronize {uapi/}linux/bits.h with the kernel sources")
> > 
> > But in general GENMASK_U128() is needed for generating 128 bit page table
> > entries, related flags and masks whether in kernel or in user space for
> > writing kernel test cases etc.
> 
> In the commit 947697c6f0f7 ("uapi: Define GENMASK_U128"), GENMASK_U128() gets defined
> using __GENMASK_U128() which in turn calls __BIT128() - both of which are defined in
> UAPI headers inside (include/uapi/linux/). 
> 
> Just wondering - are you suggesting to move these helpers from include/uapi/linux/ to
> include/linux/bits.h instead ?

Vincent is working on fixed-width GENMASK_Uxx() based on GENMASK_TYPE().

https://lore.kernel.org/lkml/20250308-fixed-type-genmasks-v6-0-f59315e73c29@wanadoo.fr/T/

The series adds a general GENMASK_TYPE() in the linux/bits.h. I'd like
all fixed-widh genmasks to be based on it. The implementation doesn't
allow to move GENMASK_TYPE() the to uapi easily.

There was a discussion regarding that, and for now the general understanding
is that userspace doesn't need GENMASK_Uxx().

Are your proposed tests based on the in-kernel tools/ ? If so, linux/bits.h
will be available for you.

Vincent,

Can you please experiment with moving GENMASK_U128() to linux/bits.h
and switching it to GENMASK_TYPE()-based implementation?

If it works, we can do it after merging of GENMASK_TYPE() and
ancestors.

Thanks,
Yury


^ permalink raw reply

* Re: [PATCH] KVM: selftests: Fix a couple "prio" signedness bugs
From: Marc Zyngier @ 2025-03-21 17:04 UTC (permalink / raw)
  To: Dan Carpenter
  Cc: Ricardo Koller, Oliver Upton, Joey Gouly, Suzuki K Poulose,
	Zenghui Yu, Paolo Bonzini, Shuah Khan, Andrew Jones,
	linux-arm-kernel, kvmarm, kvm, linux-kselftest, linux-kernel,
	kernel-janitors
In-Reply-To: <ca579322-dc9d-4300-bd74-7e9240e930c7@stanley.mountain>

Hey Dan,

On Fri, 21 Mar 2025 14:32:53 +0000,
Dan Carpenter <dan.carpenter@linaro.org> wrote:
> 
> There is an assert which relies on "prio" to be signed.
> 
> 	GUEST_ASSERT(prio >= 0);
> 
> Change the type from uint32_t to int.
> 
> Fixes: 728fcc46d2c2 ("KVM: selftests: aarch64: Add test for restoring active IRQs")
> Fixes: 0ad3ff4a6adc ("KVM: selftests: aarch64: Add preemption tests in vgic_irq")
> Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
> ---
> ---
>  tools/testing/selftests/kvm/arm64/vgic_irq.c | 6 ++++--
>  1 file changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/tools/testing/selftests/kvm/arm64/vgic_irq.c b/tools/testing/selftests/kvm/arm64/vgic_irq.c
> index f4ac28d53747..e89c0fc5eef3 100644
> --- a/tools/testing/selftests/kvm/arm64/vgic_irq.c
> +++ b/tools/testing/selftests/kvm/arm64/vgic_irq.c
> @@ -294,7 +294,8 @@ static void guest_restore_active(struct test_args *args,
>  		uint32_t first_intid, uint32_t num,
>  		kvm_inject_cmd cmd)
>  {
> -	uint32_t prio, intid, ap1r;
> +	uint32_t intid, ap1r;
> +	int prio;
>  	int i;
>  
>  	/*
> @@ -362,7 +363,8 @@ static void test_inject_preemption(struct test_args *args,
>  		uint32_t first_intid, int num,
>  		kvm_inject_cmd cmd)
>  {
> -	uint32_t intid, prio, step = KVM_PRIO_STEPS;
> +	uint32_t intid, step = KVM_PRIO_STEPS;
> +	int prio;
>  	int i;
>  
>  	/* Set the priorities of the first (KVM_NUM_PRIOS - 1) IRQs

I think this is going in the wrong direction. A GIC priority is an
unsigned 8bit value as per the architecture definition.

So the type used by the test the first place looks wrong (it is too
wide), and the assertion is pointless.

Thanks,

	M.

-- 
Without deviation from the norm, progress is not possible.


^ permalink raw reply

* Re: [PATCH v2 4/4] iommu: Get DT/ACPI parsing into the proper probe path
From: Robin Murphy @ 2025-03-21 16:48 UTC (permalink / raw)
  To: Marek Szyprowski, Lorenzo Pieralisi, Hanjun Guo, Sudeep Holla,
	Rafael J. Wysocki, Len Brown, Russell King, Greg Kroah-Hartman,
	Danilo Krummrich, Stuart Yoder, Nipun Gupta, Nikhil Agarwal,
	Joerg Roedel, Will Deacon, Rob Herring, Saravana Kannan,
	Bjorn Helgaas
  Cc: linux-acpi, linux-arm-kernel, linux-kernel, iommu, devicetree,
	linux-pci, Charan Teja Kalla
In-Reply-To: <ace3a01f-4700-4455-ada3-0f88fc8ea4cd@samsung.com>

On 21/03/2025 12:15 pm, Marek Szyprowski wrote:
> On 17.03.2025 19:22, Robin Murphy wrote:
>> On 17/03/2025 7:37 am, Marek Szyprowski wrote:
>>> On 13.03.2025 15:12, Robin Murphy wrote:
>>>> On 2025-03-13 1:06 pm, Robin Murphy wrote:
>>>>> On 2025-03-13 12:23 pm, Marek Szyprowski wrote:
>>>>>> On 13.03.2025 12:01, Robin Murphy wrote:
>>>>>>> On 2025-03-13 9:56 am, Marek Szyprowski wrote:
>>>>>>> [...]
>>>>>>>> This patch landed in yesterday's linux-next as commit bcb81ac6ae3c
>>>>>>>> ("iommu: Get DT/ACPI parsing into the proper probe path"). In my
>>>>>>>> tests I
>>>>>>>> found it breaks booting of ARM64 RK3568-based Odroid-M1 board
>>>>>>>> (arch/arm64/boot/dts/rockchip/rk3568-odroid-m1.dts). Here is the
>>>>>>>> relevant kernel log:
>>>>>>>
>>>>>>> ...and the bug-flushing-out begins!
>>>>>>>
>>>>>>>> Unable to handle kernel NULL pointer dereference at virtual address
>>>>>>>> 00000000000003e8
>>>>>>>> Mem abort info:
>>>>>>>>        ESR = 0x0000000096000004
>>>>>>>>        EC = 0x25: DABT (current EL), IL = 32 bits
>>>>>>>>        SET = 0, FnV = 0
>>>>>>>>        EA = 0, S1PTW = 0
>>>>>>>>        FSC = 0x04: level 0 translation fault
>>>>>>>> Data abort info:
>>>>>>>>        ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000
>>>>>>>>        CM = 0, WnR = 0, TnD = 0, TagAccess = 0
>>>>>>>>        GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
>>>>>>>> [00000000000003e8] user address but active_mm is swapper
>>>>>>>> Internal error: Oops: 0000000096000004 [#1] PREEMPT SMP
>>>>>>>> Modules linked in:
>>>>>>>> CPU: 3 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.14.0-rc3+ #15533
>>>>>>>> Hardware name: Hardkernel ODROID-M1 (DT)
>>>>>>>> pstate: 00400009 (nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
>>>>>>>> pc : devm_kmalloc+0x2c/0x114
>>>>>>>> lr : rk_iommu_of_xlate+0x30/0x90
>>>>>>>> ...
>>>>>>>> Call trace:
>>>>>>>>       devm_kmalloc+0x2c/0x114 (P)
>>>>>>>>       rk_iommu_of_xlate+0x30/0x90
>>>>>>>
>>>>>>> Yeah, looks like this is doing something a bit questionable which
>>>>>>> can't
>>>>>>> work properly. TBH the whole dma_dev thing could probably be
>>>>>>> cleaned up
>>>>>>> now that we have proper instances, but for now does this work?
>>>>>>
>>>>>> Yes, this patch fixes the problem I've observed.
>>>>>>
>>>>>> Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
>>>>>> Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
>>>>>>
>>>>>> BTW, this dma_dev idea has been borrowed from my exynos_iommu driver
>>>>>> and
>>>>>> I doubt it can be cleaned up.
>>>>>
>>>>> On the contrary I suspect they both can - it all dates back to when
>>>>> we had the single global platform bus iommu_ops and the SoC drivers
>>>>> were forced to bodge their own notion of multiple instances, but with
>>>>> the modern core code, ops are always called via a valid IOMMU
>>>>> instance or domain, so in principle it should always be possible to
>>>>> get at an appropriate IOMMU device now. IIRC it was mostly about
>>>>> allocating and DMA-mapping the pagetables in domain_alloc, where the
>>>>> private notion of instances didn't have enough information, but
>>>>> domain_alloc_paging solves that.
>>>>
>>>> Bah, in fact I think I am going to have to do that now, since although
>>>> it doesn't crash, rk_domain_alloc_paging() will also be failing for
>>>> the same reason. Time to find a PSU for the RK3399 board, I guess...
>>>>
>>>> (Or maybe just move the dma_dev assignment earlier to match Exynos?)
>>>
>>> Well I just found that Exynos IOMMU is also broken on some on my test
>>> boards. It looks that the runtime pm links are somehow not correctly
>>> established. I will try to analyze this later in the afternoon.
>>
>> Hmm, I tried to get an Odroid-XU3 up and running, but it seems unable
>> to boot my original 6.14-rc3-based branch - even with the IOMMU driver
>> disabled, it's consistently dying somewhere near (or just after) init
>> with what looks like some catastrophic memory corruption issue - very
>> occasionally it's managed to print the first line of various different
>> panics.
>>
>> Before that point though, with the IOMMU driver enabled it does appear
>> to show signs of working OK:
>>
>> [    0.649703] exynos-sysmmu 14650000.sysmmu: hardware version: 3.3
>> [    0.654220] platform 14450000.mixer: Adding to iommu group 1
>> ...
>> [    2.680920] exynos-mixer 14450000.mixer:
>> exynos_iommu_attach_device: Attached IOMMU with pgtable 0x42924000
>> ...
>> [    5.196674] exynos-mixer 14450000.mixer:
>> exynos_iommu_identity_attach: Restored IOMMU to IDENTITY from pgtable
>> 0x42924000
>> [    5.207091] exynos-mixer 14450000.mixer:
>> exynos_iommu_attach_device: Attached IOMMU with pgtable 0x42884000
>>
>>
>> The multi-instance stuff in probe/release does look a bit suspect,
>> however - seems like the second instance probe would overwrite the
>> first instance's links, and then there would be a double-del() if the
>> device were ever actually released again? I may have made that much
>> more likely to happen, but I suspect it was already possible with
>> async driver probe...
> 
> That is really strange. My Odroid XU3 boots fine from commit
> bcb81ac6ae3c ("iommu: Get DT/ACPI parsing into the proper probe path"),
> although the IOMMU seems not to be working correctly. I've tested this
> with 14450000.mixer device (one need to attach HDMI cable to get it
> activated) and it looks that the video data are not being read from
> memory at all (the lack of VSYNC is reported, no IOMMU fault). However,
> from time to time, everything initializes and works properly.

Urgh, seems my mistake was assuming exynos_defconfig was the right thing 
to begin from - bcb81ac6ae3c with that still dies in the same way (this 
time I saw a hint of spin_bug() being hit...), however a 
multi_v7_defconfig build does get to userspace OK again with no obvious 
signs of distress:

[root@alarm ~]# grep -Hr . /sys/kernel/iommu_groups/*/type
/sys/kernel/iommu_groups/0/type:identity
/sys/kernel/iommu_groups/1/type:identity
/sys/kernel/iommu_groups/10/type:identity
/sys/kernel/iommu_groups/2/type:identity
/sys/kernel/iommu_groups/3/type:identity
/sys/kernel/iommu_groups/4/type:identity
/sys/kernel/iommu_groups/5/type:identity
/sys/kernel/iommu_groups/6/type:identity
/sys/kernel/iommu_groups/7/type:identity
/sys/kernel/iommu_groups/8/type:identity
/sys/kernel/iommu_groups/9/type:identity

Annoyingly I do have an adapter for the fiddly micro-HDMI, but it's at 
home :(

> It looks that this is somehow related to the different IOMMU/DMA-mapping
> glue code, as the other boards (ARM64 based) with exactly the same
> Exynos IOMMU driver always work fine. I've tried to figure out what
> actually happens, but so far I didn't get anything for sure. Disabling
> the call to dev->bus->dma_configure(dev) from iommu_init_device() seems
> to be fixing this, but this is almost equal to the revert of the
> $subject patch. I don't get why calling it in iommu_init_device() causes
> problems. It also doesn't look that this is anyhow related to the
> multi-instance stuff, as the same happens if I only leave a single
> exynos-sysmmu instance and its client (only 14450000.mixer device in the
> system).

On a hunch I stuck a print in exynos_iommu_probe_device(), and it looks 
like in fact device_link_add() isn't getting called at all, and indeed 
your symptoms do sound like they could be explained by the IOMMU not 
being reliably resumed... lemme stare at exynos_iommu_of_xlate() a bit 
longer...

Thanks,
Robin.


^ permalink raw reply

* [PATCH 2/2] firmware: exynos-acpm: allow use during system shutdown
From: André Draszik @ 2025-03-21 16:40 UTC (permalink / raw)
  To: Tudor Ambarus, Krzysztof Kozlowski, Alim Akhtar
  Cc: Peter Griffin, Will McVicker, kernel-team, linux-kernel,
	linux-samsung-soc, linux-arm-kernel, André Draszik
In-Reply-To: <20250321-acpm-atomic-v1-0-fb887bde7e61@linaro.org>

We need to access the PMIC during late system shutdown and at that time
we are not allowed to sleep anymore.

To make this case work, detect this condition and use busy waiting via
udelay() instead of usleep_range() in that situation.

The code isn't switched over to udelay() unconditionally so as to not
waste resources during normal operation. acpm_may_sleep() was heavily
inspired by the I2C subsystem's i2c_in_atomic_xfer_mode().

Signed-off-by: André Draszik <andre.draszik@linaro.org>

---
udelay(10) causes a checkpatch warning (it suggests to use
usleep_range() instead for usec >= 10), but that's exactly what we can
not do.
Reducing the udelay to be smaller will generally cause the loop to be
iterated more than once, which I wanted to avoid.
I could reflow the code to hide the actual value from checkpatch, e.g.
with the help of a local variable if that is preferred to ignoring the
checkpatch warning.
---
 drivers/firmware/samsung/exynos-acpm.c | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/drivers/firmware/samsung/exynos-acpm.c b/drivers/firmware/samsung/exynos-acpm.c
index d7ed6b77a957af5db5beba7deecce13ac7b30fd2..33cde6e88e2c0773fdd36c80927c77d3bcb44135 100644
--- a/drivers/firmware/samsung/exynos-acpm.c
+++ b/drivers/firmware/samsung/exynos-acpm.c
@@ -15,6 +15,8 @@
 #include <linux/firmware/samsung/exynos-acpm-protocol.h>
 #include <linux/io.h>
 #include <linux/iopoll.h>
+#include <linux/irqflags.h>
+#include <linux/kernel.h>
 #include <linux/mailbox/exynos-message.h>
 #include <linux/mailbox_client.h>
 #include <linux/module.h>
@@ -24,6 +26,7 @@
 #include <linux/of_address.h>
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
+#include <linux/preempt.h>
 #include <linux/slab.h>
 #include <linux/types.h>
 
@@ -272,6 +275,17 @@ static int acpm_get_rx(struct acpm_chan *achan, const struct acpm_xfer *xfer)
 	return 0;
 }
 
+/*
+ * When ACPM transfers happen very late, e.g. to access a PMIC when powering
+ * down, we can not sleep. We do want to sleep in the normal case, though, to
+ * avoid wasting CPU cycles!
+ */
+static bool acpm_may_sleep(void)
+{
+	return system_state <= SYSTEM_RUNNING ||
+		(IS_ENABLED(CONFIG_PREEMPT_COUNT) ? preemptible() : !irqs_disabled());
+}
+
 /**
  * acpm_dequeue_by_polling() - RX dequeue by polling.
  * @achan:	ACPM channel info.
@@ -299,7 +313,10 @@ static int acpm_dequeue_by_polling(struct acpm_chan *achan,
 			return 0;
 
 		/* Determined experimentally. */
-		usleep_range(20, 30);
+		if (!acpm_may_sleep())
+			udelay(10);
+		else
+			usleep_range(20, 30);
 	} while (!ktime_after(ktime_get(), timeout));
 
 	dev_err(dev, "Timeout! ch:%u s:%u bitmap:%lx.\n",

-- 
2.49.0.395.g12beb8f557-goog



^ permalink raw reply related

* [PATCH 1/2] firmware: exynos-acpm: use ktime APIs for timeout detection
From: André Draszik @ 2025-03-21 16:40 UTC (permalink / raw)
  To: Tudor Ambarus, Krzysztof Kozlowski, Alim Akhtar
  Cc: Peter Griffin, Will McVicker, kernel-team, linux-kernel,
	linux-samsung-soc, linux-arm-kernel, André Draszik
In-Reply-To: <20250321-acpm-atomic-v1-0-fb887bde7e61@linaro.org>

acpm_dequeue_by_polling() uses a loop counter and assumes that each
iteration of the loop takes 20us. It may take longer, though, because
usleep_range() may sleep a different amount.

Switch to using ktime_get() / ktime_after() to detect the timeout
condition more reliably.

This change also makes the code easier to follow and it allows us to
adjust the sleep without having to adjust the loop counter exit
condition. This will come in useful in a follow-up patch that changes
the delays.

Signed-off-by: André Draszik <andre.draszik@linaro.org>
---
 drivers/firmware/samsung/exynos-acpm.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/firmware/samsung/exynos-acpm.c b/drivers/firmware/samsung/exynos-acpm.c
index a85b2dbdd9f0d7b1f327f54a0a283e4f32587a98..d7ed6b77a957af5db5beba7deecce13ac7b30fd2 100644
--- a/drivers/firmware/samsung/exynos-acpm.c
+++ b/drivers/firmware/samsung/exynos-acpm.c
@@ -32,8 +32,7 @@
 
 #define ACPM_PROTOCOL_SEQNUM		GENMASK(21, 16)
 
-/* The unit of counter is 20 us. 5000 * 20 = 100 ms */
-#define ACPM_POLL_TIMEOUT		5000
+#define ACPM_POLL_TIMEOUT_US		(100 * USEC_PER_MSEC)
 #define ACPM_TX_TIMEOUT_US		500000
 
 #define ACPM_GS101_INITDATA_BASE	0xa000
@@ -284,12 +283,13 @@ static int acpm_dequeue_by_polling(struct acpm_chan *achan,
 				   const struct acpm_xfer *xfer)
 {
 	struct device *dev = achan->acpm->dev;
-	unsigned int cnt_20us = 0;
+	ktime_t timeout;
 	u32 seqnum;
 	int ret;
 
 	seqnum = FIELD_GET(ACPM_PROTOCOL_SEQNUM, xfer->txd[0]);
 
+	timeout = ktime_add_us(ktime_get(), ACPM_POLL_TIMEOUT_US);
 	do {
 		ret = acpm_get_rx(achan, xfer);
 		if (ret)
@@ -300,11 +300,10 @@ static int acpm_dequeue_by_polling(struct acpm_chan *achan,
 
 		/* Determined experimentally. */
 		usleep_range(20, 30);
-		cnt_20us++;
-	} while (cnt_20us < ACPM_POLL_TIMEOUT);
+	} while (!ktime_after(ktime_get(), timeout));
 
-	dev_err(dev, "Timeout! ch:%u s:%u bitmap:%lx, cnt_20us = %d.\n",
-		achan->id, seqnum, achan->bitmap_seqnum[0], cnt_20us);
+	dev_err(dev, "Timeout! ch:%u s:%u bitmap:%lx.\n",
+		achan->id, seqnum, achan->bitmap_seqnum[0]);
 
 	return -ETIME;
 }

-- 
2.49.0.395.g12beb8f557-goog



^ permalink raw reply related

* [PATCH 0/2] firmware: exynos-acpm: allow use during system shutdown
From: André Draszik @ 2025-03-21 16:40 UTC (permalink / raw)
  To: Tudor Ambarus, Krzysztof Kozlowski, Alim Akhtar
  Cc: Peter Griffin, Will McVicker, kernel-team, linux-kernel,
	linux-samsung-soc, linux-arm-kernel, André Draszik

One user of this ACPM driver is a PMIC driver that needs to communicate
with the PMIC during late system shutdown and at that time we are not
allowed to sleep anymore.

This series address this by switching the code to using udelay() in the
specific case of system shutdown. This approach was inspired by I2C's
i2c_in_atomic_xfer_mode(), which has to deal with a similar corner
case.

Signed-off-by: André Draszik <andre.draszik@linaro.org>
---
André Draszik (2):
      firmware: exynos-acpm: use ktime APIs for timeout detection
      firmware: exynos-acpm: allow use during system shutdown

 drivers/firmware/samsung/exynos-acpm.c | 34 +++++++++++++++++++++++++---------
 1 file changed, 25 insertions(+), 9 deletions(-)
---
base-commit: c4d4884b67802c41fd67399747165d65c770621a
change-id: 20250321-acpm-atomic-033775b051ef

Best regards,
-- 
André Draszik <andre.draszik@linaro.org>



^ permalink raw reply

* Re: [PATCH v3 1/1] coresight: prevent deactivate active config while enabling the config
From: Mike Leach @ 2025-03-21 16:40 UTC (permalink / raw)
  To: Yeo Reum Yun
  Cc: Suzuki Poulose, james.clark@linaro.org,
	alexander.shishkin@linux.intel.com, coresight@lists.linaro.org,
	linux-arm-kernel@lists.infradead.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <GV1PR08MB10521BB7C93822F5124F2D66EFBD22@GV1PR08MB10521.eurprd08.prod.outlook.com>

Hi

On Fri, 14 Mar 2025 at 15:25, Yeo Reum Yun <YeoReum.Yun@arm.com> wrote:
>
> Hi, Mike.
>
> > >  static void cscfg_remove_owned_csdev_features(struct coresight_device *csdev, void *load_owner)
> > > @@ -867,6 +870,28 @@ void cscfg_csdev_reset_feats(struct coresight_device *csdev)
> > >  }
> > >  EXPORT_SYMBOL_GPL(cscfg_csdev_reset_feats);
> > >
> > > +static bool cscfg_config_desc_get(struct cscfg_config_desc *config_desc, bool enable)
> > > +{
> > > +       if (enable)
> > > +               return atomic_inc_not_zero(&config_desc->active_cnt);
> > > +
> >
> > Not sure why we have an "enable" parameter here - it completely
> > changes the meaning of the function - with no comment at the start.
>
> Sorry. But what I intended is to distinguish
>     - activation of config
>     - enable of activated config.
> Because, current coresight doesn't grab the module reference on enable of activate config,
> But It grabs that reference only in activation.
> That's why I used to "enable" parameter to distinguish this
> while I integrate with module_owner count.
>
> > >         list_for_each_entry(config_desc, &cscfg_mgr->config_desc_list, item) {
> > >                 if ((unsigned long)config_desc->event_ea->var == cfg_hash) {
> > > -                       atomic_dec(&config_desc->active_cnt);
> > >                         atomic_dec(&cscfg_mgr->sys_active_cnt);
> > > -                       cscfg_owner_put(config_desc->load_owner);
> > > +                       cscfg_config_desc_put(config_desc);
> > >                         dev_dbg(cscfg_device(), "Deactivate config %s.\n", config_desc->name);
> > >                         break;
> > >                 }
> > > @@ -1047,7 +1066,7 @@ int cscfg_csdev_enable_active_config(struct coresight_device *csdev,
> > >                                      unsigned long cfg_hash, int preset)
> > >  {
> > >         struct cscfg_config_csdev *config_csdev_active = NULL, *config_csdev_item;
> > > -       const struct cscfg_config_desc *config_desc;
> > > +       struct cscfg_config_desc *config_desc;
> > >         unsigned long flags;
> > >         int err = 0;
> > >
> > > @@ -1062,8 +1081,8 @@ int cscfg_csdev_enable_active_config(struct coresight_device *csdev,
> > >         raw_spin_lock_irqsave(&csdev->cscfg_csdev_lock, flags);
> > >         list_for_each_entry(config_csdev_item, &csdev->config_csdev_list, node) {
> > >                 config_desc = config_csdev_item->config_desc;
> > > -               if ((atomic_read(&config_desc->active_cnt)) &&
> > > -                   ((unsigned long)config_desc->event_ea->var == cfg_hash)) {
> > > +               if (((unsigned long)config_desc->event_ea->var == cfg_hash) &&
> > > +                               cscfg_config_desc_get(config_desc, true)) {
> > >
> > This obfuscates the logic of the comparisons without good reason. With
> > the true parameter, the function does no "get" operation but just
> > replicates the logic being replaced - checking the active_cnt is
> > non-zero.
> >
> > Restore this to the original logic to make it readable again
>
> It's not a replicates of comparsion logic, but if true,

sorry - missed that point .

> It get the reference of active_cnt but not get module reference.
> The fundemental fault in the UAF becase of just "atomic_read()"
> so, it should hold reference in here.
>
> So, If you think the cscfg_config_desc_get()'s parameter makes obfuscation,
> I think there're two way to modfiy.
>
>     1. cscfg_config_desc_get()/put() always grab/drop the module count.
>     2. remove cscfg_config_desc_get()/put() but just use atomic_XXX(&active_cnt) only
>         with cscfg_owner_get()/put()
>
> Any thougt?
>
> Thanks!
>
>

The get and put functions are asymmetrical w.r.t. owner.

The put will put owner if active count decrements to 0,
The get if not on enable path will put owner unconditionally.

This means that the caller has to work out the correct input conditions.

Might be better if:-

get_desc()
{
    if (! desc->refcnt) {
       if (!get_owner())
           return false;
   }
   desc->refcnt++;
    return true;
}

put_desc()
{
   desc->refcnt--;
  if (! desc->refcnt)
    put_owner()
}

and then change the enable_active_cfg matching logic to

if ( (desc->refcnt) && (desc->hash == hash) && get_desc()) {
     < set active cfg>
}



-- 
Mike Leach
Principal Engineer, ARM Ltd.
Manchester Design Centre. UK


^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox