* [PATCH AUTOSEL 6.1 2/9] audit: Send netlink ACK before setting connection in auditd_set
2024-01-16 0:14 [PATCH AUTOSEL 6.1 1/9] regulator: core: Only increment use_count when enable_count changes Sasha Levin
@ 2024-01-16 0:14 ` Sasha Levin
2024-01-16 0:14 ` [PATCH AUTOSEL 6.1 3/9] platform/chrome: cros_ec_debugfs: Fix permissions for panicinfo Sasha Levin
` (6 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Sasha Levin @ 2024-01-16 0:14 UTC (permalink / raw)
To: linux-kernel, stable; +Cc: Chris Riches, Paul Moore, Sasha Levin, eparis, audit
From: Chris Riches <chris.riches@nutanix.com>
[ Upstream commit 022732e3d846e197539712e51ecada90ded0572a ]
When auditd_set sets the auditd_conn pointer, audit messages can
immediately be put on the socket by other kernel threads. If the backlog
is large or the rate is high, this can immediately fill the socket
buffer. If the audit daemon requested an ACK for this operation, a full
socket buffer causes the ACK to get dropped, also setting ENOBUFS on the
socket.
To avoid this race and ensure ACKs get through, fast-track the ACK in
this specific case to ensure it is sent before auditd_conn is set.
Signed-off-by: Chris Riches <chris.riches@nutanix.com>
[PM: fix some tab vs space damage]
Signed-off-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
kernel/audit.c | 31 ++++++++++++++++++++++++-------
1 file changed, 24 insertions(+), 7 deletions(-)
diff --git a/kernel/audit.c b/kernel/audit.c
index 9bc0b0301198..99127521cda8 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -488,15 +488,19 @@ static void auditd_conn_free(struct rcu_head *rcu)
* @pid: auditd PID
* @portid: auditd netlink portid
* @net: auditd network namespace pointer
+ * @skb: the netlink command from the audit daemon
+ * @ack: netlink ack flag, cleared if ack'd here
*
* Description:
* This function will obtain and drop network namespace references as
* necessary. Returns zero on success, negative values on failure.
*/
-static int auditd_set(struct pid *pid, u32 portid, struct net *net)
+static int auditd_set(struct pid *pid, u32 portid, struct net *net,
+ struct sk_buff *skb, bool *ack)
{
unsigned long flags;
struct auditd_connection *ac_old, *ac_new;
+ struct nlmsghdr *nlh;
if (!pid || !net)
return -EINVAL;
@@ -508,6 +512,13 @@ static int auditd_set(struct pid *pid, u32 portid, struct net *net)
ac_new->portid = portid;
ac_new->net = get_net(net);
+ /* send the ack now to avoid a race with the queue backlog */
+ if (*ack) {
+ nlh = nlmsg_hdr(skb);
+ netlink_ack(skb, nlh, 0, NULL);
+ *ack = false;
+ }
+
spin_lock_irqsave(&auditd_conn_lock, flags);
ac_old = rcu_dereference_protected(auditd_conn,
lockdep_is_held(&auditd_conn_lock));
@@ -1201,7 +1212,8 @@ static int audit_replace(struct pid *pid)
return auditd_send_unicast_skb(skb);
}
-static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
+ bool *ack)
{
u32 seq;
void *data;
@@ -1294,7 +1306,8 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
/* register a new auditd connection */
err = auditd_set(req_pid,
NETLINK_CB(skb).portid,
- sock_net(NETLINK_CB(skb).sk));
+ sock_net(NETLINK_CB(skb).sk),
+ skb, ack);
if (audit_enabled != AUDIT_OFF)
audit_log_config_change("audit_pid",
new_pid,
@@ -1539,9 +1552,10 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
* Parse the provided skb and deal with any messages that may be present,
* malformed skbs are discarded.
*/
-static void audit_receive(struct sk_buff *skb)
+static void audit_receive(struct sk_buff *skb)
{
struct nlmsghdr *nlh;
+ bool ack;
/*
* len MUST be signed for nlmsg_next to be able to dec it below 0
* if the nlmsg_len was not aligned
@@ -1554,9 +1568,12 @@ static void audit_receive(struct sk_buff *skb)
audit_ctl_lock();
while (nlmsg_ok(nlh, len)) {
- err = audit_receive_msg(skb, nlh);
- /* if err or if this message says it wants a response */
- if (err || (nlh->nlmsg_flags & NLM_F_ACK))
+ ack = nlh->nlmsg_flags & NLM_F_ACK;
+ err = audit_receive_msg(skb, nlh, &ack);
+
+ /* send an ack if the user asked for one and audit_receive_msg
+ * didn't already do it, or if there was an error. */
+ if (ack || err)
netlink_ack(skb, nlh, err, NULL);
nlh = nlmsg_next(nlh, &len);
--
2.43.0
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH AUTOSEL 6.1 3/9] platform/chrome: cros_ec_debugfs: Fix permissions for panicinfo
2024-01-16 0:14 [PATCH AUTOSEL 6.1 1/9] regulator: core: Only increment use_count when enable_count changes Sasha Levin
2024-01-16 0:14 ` [PATCH AUTOSEL 6.1 2/9] audit: Send netlink ACK before setting connection in auditd_set Sasha Levin
@ 2024-01-16 0:14 ` Sasha Levin
2024-01-16 0:14 ` [PATCH AUTOSEL 6.1 4/9] ACPI: video: Add quirk for the Colorful X15 AT 23 Laptop Sasha Levin
` (5 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Sasha Levin @ 2024-01-16 0:14 UTC (permalink / raw)
To: linux-kernel, stable
Cc: Avadhut Naik, Greg Kroah-Hartman, Tony Luck, Rafael J . Wysocki,
Sasha Levin, bleung, tzungbi, chrome-platform
From: Avadhut Naik <Avadhut.Naik@amd.com>
[ Upstream commit 0706526ec7704dcd046239078ac175d11a88a95e ]
The debugfs_create_blob() function has been used to create read-only binary
blobs in debugfs. The function filters out permissions, other than S_IRUSR,
S_IRGRP and S_IROTH, provided while creating the blobs.
The very behavior though is being changed through previous patch in the
series (fs: debugfs: Add write functionality to debugfs blobs) which makes
the binary blobs writable by owners. Thus, all permissions provided while
creating the blobs, except S_IRUSR,S_IWUSR, S_IRGRP, S_IROTH, will be
filtered by debugfs_create_blob().
As such, rectify the permissions of panicinfo file since the S_IFREG flag
was anyways being filtered out by debugfs_create_blob(). Moreover, the
very flag will always be set be set for the panicinfo file through
__debugfs_create_file().
Signed-off-by: Avadhut Naik <Avadhut.Naik@amd.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
drivers/platform/chrome/cros_ec_debugfs.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/platform/chrome/cros_ec_debugfs.c b/drivers/platform/chrome/cros_ec_debugfs.c
index 4e63adf083ea..d956eef6e577 100644
--- a/drivers/platform/chrome/cros_ec_debugfs.c
+++ b/drivers/platform/chrome/cros_ec_debugfs.c
@@ -427,7 +427,7 @@ static int cros_ec_create_panicinfo(struct cros_ec_debugfs *debug_info)
debug_info->panicinfo_blob.data = msg->data;
debug_info->panicinfo_blob.size = ret;
- debugfs_create_blob("panicinfo", S_IFREG | 0444, debug_info->dir,
+ debugfs_create_blob("panicinfo", 0444, debug_info->dir,
&debug_info->panicinfo_blob);
return 0;
--
2.43.0
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH AUTOSEL 6.1 4/9] ACPI: video: Add quirk for the Colorful X15 AT 23 Laptop
2024-01-16 0:14 [PATCH AUTOSEL 6.1 1/9] regulator: core: Only increment use_count when enable_count changes Sasha Levin
2024-01-16 0:14 ` [PATCH AUTOSEL 6.1 2/9] audit: Send netlink ACK before setting connection in auditd_set Sasha Levin
2024-01-16 0:14 ` [PATCH AUTOSEL 6.1 3/9] platform/chrome: cros_ec_debugfs: Fix permissions for panicinfo Sasha Levin
@ 2024-01-16 0:14 ` Sasha Levin
2024-01-16 0:14 ` [PATCH AUTOSEL 6.1 5/9] PNP: ACPI: fix fortify warning Sasha Levin
` (4 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Sasha Levin @ 2024-01-16 0:14 UTC (permalink / raw)
To: linux-kernel, stable
Cc: Yuluo Qiu, Celeste Liu, Rafael J . Wysocki, Sasha Levin, rafael,
linux-acpi
From: Yuluo Qiu <qyl27@outlook.com>
[ Upstream commit 143176a46bdd3bfbe9ba2462bf94458e80d65ebf ]
The Colorful X15 AT 23 ACPI video-bus device report spurious
ACPI_VIDEO_NOTIFY_CYCLE events resulting in spurious KEY_SWITCHVIDEOMODE
events being reported to userspace (and causing trouble there) when
an external screen plugged in.
Add a quirk setting the report_key_events mask to
REPORT_BRIGHTNESS_KEY_EVENTS so that the ACPI_VIDEO_NOTIFY_CYCLE
events will be ignored, while still reporting brightness up/down
hotkey-presses to userspace normally.
Signed-off-by: Yuluo Qiu <qyl27@outlook.com>
Co-developed-by: Celeste Liu <CoelacanthusHex@gmail.com>
Signed-off-by: Celeste Liu <CoelacanthusHex@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
drivers/acpi/acpi_video.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c
index ed318485eb19..4a35fa3c6c97 100644
--- a/drivers/acpi/acpi_video.c
+++ b/drivers/acpi/acpi_video.c
@@ -513,6 +513,15 @@ static const struct dmi_system_id video_dmi_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "Vostro 3350"),
},
},
+ {
+ .callback = video_set_report_key_events,
+ .driver_data = (void *)((uintptr_t)REPORT_BRIGHTNESS_KEY_EVENTS),
+ .ident = "COLORFUL X15 AT 23",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "COLORFUL"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "X15 AT 23"),
+ },
+ },
/*
* Some machines change the brightness themselves when a brightness
* hotkey gets pressed, despite us telling them not to. In this case
--
2.43.0
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH AUTOSEL 6.1 5/9] PNP: ACPI: fix fortify warning
2024-01-16 0:14 [PATCH AUTOSEL 6.1 1/9] regulator: core: Only increment use_count when enable_count changes Sasha Levin
` (2 preceding siblings ...)
2024-01-16 0:14 ` [PATCH AUTOSEL 6.1 4/9] ACPI: video: Add quirk for the Colorful X15 AT 23 Laptop Sasha Levin
@ 2024-01-16 0:14 ` Sasha Levin
2024-01-16 0:14 ` [PATCH AUTOSEL 6.1 6/9] ACPI: extlog: fix NULL pointer dereference check Sasha Levin
` (3 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Sasha Levin @ 2024-01-16 0:14 UTC (permalink / raw)
To: linux-kernel, stable
Cc: Dmitry Antipov, Kees Cook, Rafael J . Wysocki, Sasha Levin,
rafael, linux-acpi, linux-hardening
From: Dmitry Antipov <dmantipov@yandex.ru>
[ Upstream commit ba3f5058db437d919f8468db50483dd9028ff688 ]
When compiling with gcc version 14.0.0 20231126 (experimental)
and CONFIG_FORTIFY_SOURCE=y, I've noticed the following:
In file included from ./include/linux/string.h:295,
from ./include/linux/bitmap.h:12,
from ./include/linux/cpumask.h:12,
from ./arch/x86/include/asm/paravirt.h:17,
from ./arch/x86/include/asm/cpuid.h:62,
from ./arch/x86/include/asm/processor.h:19,
from ./arch/x86/include/asm/cpufeature.h:5,
from ./arch/x86/include/asm/thread_info.h:53,
from ./include/linux/thread_info.h:60,
from ./arch/x86/include/asm/preempt.h:9,
from ./include/linux/preempt.h:79,
from ./include/linux/spinlock.h:56,
from ./include/linux/mmzone.h:8,
from ./include/linux/gfp.h:7,
from ./include/linux/slab.h:16,
from ./include/linux/resource_ext.h:11,
from ./include/linux/acpi.h:13,
from drivers/pnp/pnpacpi/rsparser.c:11:
In function 'fortify_memcpy_chk',
inlined from 'pnpacpi_parse_allocated_vendor' at drivers/pnp/pnpacpi/rsparser.c:158:3,
inlined from 'pnpacpi_allocated_resource' at drivers/pnp/pnpacpi/rsparser.c:249:3:
./include/linux/fortify-string.h:588:25: warning: call to '__read_overflow2_field'
declared with attribute warning: detected read beyond size of field (2nd parameter);
maybe use struct_group()? [-Wattribute-warning]
588 | __read_overflow2_field(q_size_field, size);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
According to the comments in include/linux/fortify-string.h, 'memcpy()',
'memmove()' and 'memset()' must not be used beyond individual struct
members to ensure that the compiler can enforce protection against
buffer overflows, and, IIUC, this also applies to partial copies from
the particular member ('vendor->byte_data' in this case). So it should
be better (and safer) to do both copies at once (and 'byte_data' of
'struct acpi_resource_vendor_typed' seems to be a good candidate for
'__counted_by(byte_length)' as well).
Signed-off-by: Dmitry Antipov <dmantipov@yandex.ru>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
drivers/pnp/pnpacpi/rsparser.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/drivers/pnp/pnpacpi/rsparser.c b/drivers/pnp/pnpacpi/rsparser.c
index 4f05f610391b..c02ce0834c2c 100644
--- a/drivers/pnp/pnpacpi/rsparser.c
+++ b/drivers/pnp/pnpacpi/rsparser.c
@@ -151,13 +151,13 @@ static int vendor_resource_matches(struct pnp_dev *dev,
static void pnpacpi_parse_allocated_vendor(struct pnp_dev *dev,
struct acpi_resource_vendor_typed *vendor)
{
- if (vendor_resource_matches(dev, vendor, &hp_ccsr_uuid, 16)) {
- u64 start, length;
+ struct { u64 start, length; } range;
- memcpy(&start, vendor->byte_data, sizeof(start));
- memcpy(&length, vendor->byte_data + 8, sizeof(length));
-
- pnp_add_mem_resource(dev, start, start + length - 1, 0);
+ if (vendor_resource_matches(dev, vendor, &hp_ccsr_uuid,
+ sizeof(range))) {
+ memcpy(&range, vendor->byte_data, sizeof(range));
+ pnp_add_mem_resource(dev, range.start, range.start +
+ range.length - 1, 0);
}
}
--
2.43.0
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH AUTOSEL 6.1 6/9] ACPI: extlog: fix NULL pointer dereference check
2024-01-16 0:14 [PATCH AUTOSEL 6.1 1/9] regulator: core: Only increment use_count when enable_count changes Sasha Levin
` (3 preceding siblings ...)
2024-01-16 0:14 ` [PATCH AUTOSEL 6.1 5/9] PNP: ACPI: fix fortify warning Sasha Levin
@ 2024-01-16 0:14 ` Sasha Levin
2024-01-16 0:14 ` [PATCH AUTOSEL 6.1 7/9] ACPI: NUMA: Fix the logic of getting the fake_pxm value Sasha Levin
` (2 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Sasha Levin @ 2024-01-16 0:14 UTC (permalink / raw)
To: linux-kernel, stable
Cc: Prarit Bhargava, Rafael J . Wysocki, Sasha Levin, rafael,
linux-acpi
From: Prarit Bhargava <prarit@redhat.com>
[ Upstream commit 72d9b9747e78979510e9aafdd32eb99c7aa30dd1 ]
The gcc plugin -fanalyzer [1] tries to detect various
patterns of incorrect behaviour. The tool reports:
drivers/acpi/acpi_extlog.c: In function ‘extlog_exit’:
drivers/acpi/acpi_extlog.c:307:12: warning: check of ‘extlog_l1_addr’ for NULL after already dereferencing it [-Wanalyzer-deref-before-check]
|
| 306 | ((struct extlog_l1_head *)extlog_l1_addr)->flags &= ~FLAG_OS_OPTIN;
| | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~
| | |
| | (1) pointer ‘extlog_l1_addr’ is dereferenced here
| 307 | if (extlog_l1_addr)
| | ~
| | |
| | (2) pointer ‘extlog_l1_addr’ is checked for NULL here but it was already dereferenced at (1)
|
Fix the NULL pointer dereference check in extlog_exit().
Link: https://gcc.gnu.org/onlinedocs/gcc-10.1.0/gcc/Static-Analyzer-Options.html # [1]
Signed-off-by: Prarit Bhargava <prarit@redhat.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
drivers/acpi/acpi_extlog.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c
index e648158368a7..ac89bc85a9c9 100644
--- a/drivers/acpi/acpi_extlog.c
+++ b/drivers/acpi/acpi_extlog.c
@@ -303,9 +303,10 @@ static int __init extlog_init(void)
static void __exit extlog_exit(void)
{
mce_unregister_decode_chain(&extlog_mce_dec);
- ((struct extlog_l1_head *)extlog_l1_addr)->flags &= ~FLAG_OS_OPTIN;
- if (extlog_l1_addr)
+ if (extlog_l1_addr) {
+ ((struct extlog_l1_head *)extlog_l1_addr)->flags &= ~FLAG_OS_OPTIN;
acpi_os_unmap_iomem(extlog_l1_addr, l1_size);
+ }
if (elog_addr)
acpi_os_unmap_iomem(elog_addr, elog_size);
release_mem_region(elog_base, elog_size);
--
2.43.0
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH AUTOSEL 6.1 7/9] ACPI: NUMA: Fix the logic of getting the fake_pxm value
2024-01-16 0:14 [PATCH AUTOSEL 6.1 1/9] regulator: core: Only increment use_count when enable_count changes Sasha Levin
` (4 preceding siblings ...)
2024-01-16 0:14 ` [PATCH AUTOSEL 6.1 6/9] ACPI: extlog: fix NULL pointer dereference check Sasha Levin
@ 2024-01-16 0:14 ` Sasha Levin
2024-01-16 0:14 ` [PATCH AUTOSEL 6.1 8/9] PM / devfreq: Synchronize devfreq_monitor_[start/stop] Sasha Levin
2024-01-16 0:14 ` [PATCH AUTOSEL 6.1 9/9] ACPI: APEI: set memory failure flags as MF_ACTION_REQUIRED on synchronous events Sasha Levin
7 siblings, 0 replies; 9+ messages in thread
From: Sasha Levin @ 2024-01-16 0:14 UTC (permalink / raw)
To: linux-kernel, stable
Cc: Yuntao Wang, Rafael J . Wysocki, Sasha Levin, rafael, dave.hansen,
dan.j.williams, alison.schofield, linux-acpi
From: Yuntao Wang <ytcoode@gmail.com>
[ Upstream commit e3f577830ce216b0ca21d4750cbbd64cfc21efff ]
The for loop does not iterate over the last element of the node_to_pxm_map
array. This could lead to a conflict between the final fake_pxm value and
the existing pxm values. That is, the final fake_pxm value can not be
guaranteed to be an unused pxm value.
While at it, fix up white space in slit_valid().
Signed-off-by: Yuntao Wang <ytcoode@gmail.com>
[ rjw: Changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
drivers/acpi/numa/srat.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/acpi/numa/srat.c b/drivers/acpi/numa/srat.c
index 12f330b0eac0..b57de78fbf14 100644
--- a/drivers/acpi/numa/srat.c
+++ b/drivers/acpi/numa/srat.c
@@ -183,7 +183,7 @@ static int __init slit_valid(struct acpi_table_slit *slit)
int i, j;
int d = slit->locality_count;
for (i = 0; i < d; i++) {
- for (j = 0; j < d; j++) {
+ for (j = 0; j < d; j++) {
u8 val = slit->entry[d*i + j];
if (i == j) {
if (val != LOCAL_DISTANCE)
@@ -532,7 +532,7 @@ int __init acpi_numa_init(void)
*/
/* fake_pxm is the next unused PXM value after SRAT parsing */
- for (i = 0, fake_pxm = -1; i < MAX_NUMNODES - 1; i++) {
+ for (i = 0, fake_pxm = -1; i < MAX_NUMNODES; i++) {
if (node_to_pxm_map[i] > fake_pxm)
fake_pxm = node_to_pxm_map[i];
}
--
2.43.0
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH AUTOSEL 6.1 8/9] PM / devfreq: Synchronize devfreq_monitor_[start/stop]
2024-01-16 0:14 [PATCH AUTOSEL 6.1 1/9] regulator: core: Only increment use_count when enable_count changes Sasha Levin
` (5 preceding siblings ...)
2024-01-16 0:14 ` [PATCH AUTOSEL 6.1 7/9] ACPI: NUMA: Fix the logic of getting the fake_pxm value Sasha Levin
@ 2024-01-16 0:14 ` Sasha Levin
2024-01-16 0:14 ` [PATCH AUTOSEL 6.1 9/9] ACPI: APEI: set memory failure flags as MF_ACTION_REQUIRED on synchronous events Sasha Levin
7 siblings, 0 replies; 9+ messages in thread
From: Sasha Levin @ 2024-01-16 0:14 UTC (permalink / raw)
To: linux-kernel, stable
Cc: Mukesh Ojha, Joyyoung Huang, MyungJoo Ham, Chanwoo Choi,
Sasha Levin, kyungmin.park, linux-pm
From: Mukesh Ojha <quic_mojha@quicinc.com>
[ Upstream commit aed5ed595960c6d301dcd4ed31aeaa7a8054c0c6 ]
There is a chance if a frequent switch of the governor
done in a loop result in timer list corruption where
timer cancel being done from two place one from
cancel_delayed_work_sync() and followed by expire_timers()
can be seen from the traces[1].
while true
do
echo "simple_ondemand" > /sys/class/devfreq/1d84000.ufshc/governor
echo "performance" > /sys/class/devfreq/1d84000.ufshc/governor
done
It looks to be issue with devfreq driver where
device_monitor_[start/stop] need to synchronized so that
delayed work should get corrupted while it is either
being queued or running or being cancelled.
Let's use polling flag and devfreq lock to synchronize the
queueing the timer instance twice and work data being
corrupted.
[1]
...
..
<idle>-0 [003] 9436.209662: timer_cancel timer=0xffffff80444f0428
<idle>-0 [003] 9436.209664: timer_expire_entry timer=0xffffff80444f0428 now=0x10022da1c function=__typeid__ZTSFvP10timer_listE_global_addr baseclk=0x10022da1c
<idle>-0 [003] 9436.209718: timer_expire_exit timer=0xffffff80444f0428
kworker/u16:6-14217 [003] 9436.209863: timer_start timer=0xffffff80444f0428 function=__typeid__ZTSFvP10timer_listE_global_addr expires=0x10022da2b now=0x10022da1c flags=182452227
vendor.xxxyyy.ha-1593 [004] 9436.209888: timer_cancel timer=0xffffff80444f0428
vendor.xxxyyy.ha-1593 [004] 9436.216390: timer_init timer=0xffffff80444f0428
vendor.xxxyyy.ha-1593 [004] 9436.216392: timer_start timer=0xffffff80444f0428 function=__typeid__ZTSFvP10timer_listE_global_addr expires=0x10022da2c now=0x10022da1d flags=186646532
vendor.xxxyyy.ha-1593 [005] 9436.220992: timer_cancel timer=0xffffff80444f0428
xxxyyyTraceManag-7795 [004] 9436.261641: timer_cancel timer=0xffffff80444f0428
[2]
9436.261653][ C4] Unable to handle kernel paging request at virtual address dead00000000012a
[ 9436.261664][ C4] Mem abort info:
[ 9436.261666][ C4] ESR = 0x96000044
[ 9436.261669][ C4] EC = 0x25: DABT (current EL), IL = 32 bits
[ 9436.261671][ C4] SET = 0, FnV = 0
[ 9436.261673][ C4] EA = 0, S1PTW = 0
[ 9436.261675][ C4] Data abort info:
[ 9436.261677][ C4] ISV = 0, ISS = 0x00000044
[ 9436.261680][ C4] CM = 0, WnR = 1
[ 9436.261682][ C4] [dead00000000012a] address between user and kernel address ranges
[ 9436.261685][ C4] Internal error: Oops: 96000044 [#1] PREEMPT SMP
[ 9436.261701][ C4] Skip md ftrace buffer dump for: 0x3a982d0
...
[ 9436.262138][ C4] CPU: 4 PID: 7795 Comm: TraceManag Tainted: G S W O 5.10.149-android12-9-o-g17f915d29d0c #1
[ 9436.262141][ C4] Hardware name: Qualcomm Technologies, Inc. (DT)
[ 9436.262144][ C4] pstate: 22400085 (nzCv daIf +PAN -UAO +TCO BTYPE=--)
[ 9436.262161][ C4] pc : expire_timers+0x9c/0x438
[ 9436.262164][ C4] lr : expire_timers+0x2a4/0x438
[ 9436.262168][ C4] sp : ffffffc010023dd0
[ 9436.262171][ C4] x29: ffffffc010023df0 x28: ffffffd0636fdc18
[ 9436.262178][ C4] x27: ffffffd063569dd0 x26: ffffffd063536008
[ 9436.262182][ C4] x25: 0000000000000001 x24: ffffff88f7c69280
[ 9436.262185][ C4] x23: 00000000000000e0 x22: dead000000000122
[ 9436.262188][ C4] x21: 000000010022da29 x20: ffffff8af72b4e80
[ 9436.262191][ C4] x19: ffffffc010023e50 x18: ffffffc010025038
[ 9436.262195][ C4] x17: 0000000000000240 x16: 0000000000000201
[ 9436.262199][ C4] x15: ffffffffffffffff x14: ffffff889f3c3100
[ 9436.262203][ C4] x13: ffffff889f3c3100 x12: 00000000049f56b8
[ 9436.262207][ C4] x11: 00000000049f56b8 x10: 00000000ffffffff
[ 9436.262212][ C4] x9 : ffffffc010023e50 x8 : dead000000000122
[ 9436.262216][ C4] x7 : ffffffffffffffff x6 : ffffffc0100239d8
[ 9436.262220][ C4] x5 : 0000000000000000 x4 : 0000000000000101
[ 9436.262223][ C4] x3 : 0000000000000080 x2 : ffffff889edc155c
[ 9436.262227][ C4] x1 : ffffff8001005200 x0 : ffffff80444f0428
[ 9436.262232][ C4] Call trace:
[ 9436.262236][ C4] expire_timers+0x9c/0x438
[ 9436.262240][ C4] __run_timers+0x1f0/0x330
[ 9436.262245][ C4] run_timer_softirq+0x28/0x58
[ 9436.262255][ C4] efi_header_end+0x168/0x5ec
[ 9436.262265][ C4] __irq_exit_rcu+0x108/0x124
[ 9436.262274][ C4] __handle_domain_irq+0x118/0x1e4
[ 9436.262282][ C4] gic_handle_irq.30369+0x6c/0x2bc
[ 9436.262286][ C4] el0_irq_naked+0x60/0x6c
Link: https://lore.kernel.org/all/1700860318-4025-1-git-send-email-quic_mojha@quicinc.com/
Reported-by: Joyyoung Huang <huangzaiyang@oppo.com>
Acked-by: MyungJoo Ham <myungjoo.ham@samsung.com>
Signed-off-by: Mukesh Ojha <quic_mojha@quicinc.com>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
drivers/devfreq/devfreq.c | 24 ++++++++++++++++++++++--
1 file changed, 22 insertions(+), 2 deletions(-)
diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index fe6644f99887..8e9ba701a643 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -461,10 +461,14 @@ static void devfreq_monitor(struct work_struct *work)
if (err)
dev_err(&devfreq->dev, "dvfs failed with (%d) error\n", err);
+ if (devfreq->stop_polling)
+ goto out;
+
queue_delayed_work(devfreq_wq, &devfreq->work,
msecs_to_jiffies(devfreq->profile->polling_ms));
- mutex_unlock(&devfreq->lock);
+out:
+ mutex_unlock(&devfreq->lock);
trace_devfreq_monitor(devfreq);
}
@@ -482,6 +486,10 @@ void devfreq_monitor_start(struct devfreq *devfreq)
if (IS_SUPPORTED_FLAG(devfreq->governor->flags, IRQ_DRIVEN))
return;
+ mutex_lock(&devfreq->lock);
+ if (delayed_work_pending(&devfreq->work))
+ goto out;
+
switch (devfreq->profile->timer) {
case DEVFREQ_TIMER_DEFERRABLE:
INIT_DEFERRABLE_WORK(&devfreq->work, devfreq_monitor);
@@ -490,12 +498,16 @@ void devfreq_monitor_start(struct devfreq *devfreq)
INIT_DELAYED_WORK(&devfreq->work, devfreq_monitor);
break;
default:
- return;
+ goto out;
}
if (devfreq->profile->polling_ms)
queue_delayed_work(devfreq_wq, &devfreq->work,
msecs_to_jiffies(devfreq->profile->polling_ms));
+
+out:
+ devfreq->stop_polling = false;
+ mutex_unlock(&devfreq->lock);
}
EXPORT_SYMBOL(devfreq_monitor_start);
@@ -512,6 +524,14 @@ void devfreq_monitor_stop(struct devfreq *devfreq)
if (IS_SUPPORTED_FLAG(devfreq->governor->flags, IRQ_DRIVEN))
return;
+ mutex_lock(&devfreq->lock);
+ if (devfreq->stop_polling) {
+ mutex_unlock(&devfreq->lock);
+ return;
+ }
+
+ devfreq->stop_polling = true;
+ mutex_unlock(&devfreq->lock);
cancel_delayed_work_sync(&devfreq->work);
}
EXPORT_SYMBOL(devfreq_monitor_stop);
--
2.43.0
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH AUTOSEL 6.1 9/9] ACPI: APEI: set memory failure flags as MF_ACTION_REQUIRED on synchronous events
2024-01-16 0:14 [PATCH AUTOSEL 6.1 1/9] regulator: core: Only increment use_count when enable_count changes Sasha Levin
` (6 preceding siblings ...)
2024-01-16 0:14 ` [PATCH AUTOSEL 6.1 8/9] PM / devfreq: Synchronize devfreq_monitor_[start/stop] Sasha Levin
@ 2024-01-16 0:14 ` Sasha Levin
7 siblings, 0 replies; 9+ messages in thread
From: Sasha Levin @ 2024-01-16 0:14 UTC (permalink / raw)
To: linux-kernel, stable
Cc: Shuai Xue, Ma Wupeng, Kefeng Wang, Xiaofei Tan, Baolin Wang,
James Morse, Rafael J . Wysocki, Sasha Levin, rafael, linmiaohe,
leoyang.li, shiju.jose, linux-acpi
From: Shuai Xue <xueshuai@linux.alibaba.com>
[ Upstream commit a70297d2213253853e95f5b49651f924990c6d3b ]
There are two major types of uncorrected recoverable (UCR) errors :
- Synchronous error: The error is detected and raised at the point of
the consumption in the execution flow, e.g. when a CPU tries to
access a poisoned cache line. The CPU will take a synchronous error
exception such as Synchronous External Abort (SEA) on Arm64 and
Machine Check Exception (MCE) on X86. OS requires to take action (for
example, offline failure page/kill failure thread) to recover this
uncorrectable error.
- Asynchronous error: The error is detected out of processor execution
context, e.g. when an error is detected by a background scrubber.
Some data in the memory are corrupted. But the data have not been
consumed. OS is optional to take action to recover this uncorrectable
error.
When APEI firmware first is enabled, a platform may describe one error
source for the handling of synchronous errors (e.g. MCE or SEA notification
), or for handling asynchronous errors (e.g. SCI or External Interrupt
notification). In other words, we can distinguish synchronous errors by
APEI notification. For synchronous errors, kernel will kill the current
process which accessing the poisoned page by sending SIGBUS with
BUS_MCEERR_AR. In addition, for asynchronous errors, kernel will notify the
process who owns the poisoned page by sending SIGBUS with BUS_MCEERR_AO in
early kill mode. However, the GHES driver always sets mf_flags to 0 so that
all synchronous errors are handled as asynchronous errors in memory failure.
To this end, set memory failure flags as MF_ACTION_REQUIRED on synchronous
events.
Signed-off-by: Shuai Xue <xueshuai@linux.alibaba.com>
Tested-by: Ma Wupeng <mawupeng1@huawei.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Reviewed-by: Xiaofei Tan <tanxiaofei@huawei.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Reviewed-by: James Morse <james.morse@arm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
drivers/acpi/apei/ghes.c | 29 +++++++++++++++++++++++------
1 file changed, 23 insertions(+), 6 deletions(-)
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 9952f3a792ba..dd808cf65c84 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -99,6 +99,20 @@ static inline bool is_hest_type_generic_v2(struct ghes *ghes)
return ghes->generic->header.type == ACPI_HEST_TYPE_GENERIC_ERROR_V2;
}
+/*
+ * A platform may describe one error source for the handling of synchronous
+ * errors (e.g. MCE or SEA), or for handling asynchronous errors (e.g. SCI
+ * or External Interrupt). On x86, the HEST notifications are always
+ * asynchronous, so only SEA on ARM is delivered as a synchronous
+ * notification.
+ */
+static inline bool is_hest_sync_notify(struct ghes *ghes)
+{
+ u8 notify_type = ghes->generic->notify.type;
+
+ return notify_type == ACPI_HEST_NOTIFY_SEA;
+}
+
/*
* This driver isn't really modular, however for the time being,
* continuing to use module_param is the easiest way to remain
@@ -461,7 +475,7 @@ static bool ghes_do_memory_failure(u64 physical_addr, int flags)
}
static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata,
- int sev)
+ int sev, bool sync)
{
int flags = -1;
int sec_sev = ghes_severity(gdata->error_severity);
@@ -475,7 +489,7 @@ static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata,
(gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED))
flags = MF_SOFT_OFFLINE;
if (sev == GHES_SEV_RECOVERABLE && sec_sev == GHES_SEV_RECOVERABLE)
- flags = 0;
+ flags = sync ? MF_ACTION_REQUIRED : 0;
if (flags != -1)
return ghes_do_memory_failure(mem_err->physical_addr, flags);
@@ -483,9 +497,11 @@ static bool ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata,
return false;
}
-static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata, int sev)
+static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata,
+ int sev, bool sync)
{
struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata);
+ int flags = sync ? MF_ACTION_REQUIRED : 0;
bool queued = false;
int sec_sev, i;
char *p;
@@ -510,7 +526,7 @@ static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata, int s
* and don't filter out 'corrected' error here.
*/
if (is_cache && has_pa) {
- queued = ghes_do_memory_failure(err_info->physical_fault_addr, 0);
+ queued = ghes_do_memory_failure(err_info->physical_fault_addr, flags);
p += err_info->length;
continue;
}
@@ -631,6 +647,7 @@ static bool ghes_do_proc(struct ghes *ghes,
const guid_t *fru_id = &guid_null;
char *fru_text = "";
bool queued = false;
+ bool sync = is_hest_sync_notify(ghes);
sev = ghes_severity(estatus->error_severity);
apei_estatus_for_each_section(estatus, gdata) {
@@ -648,13 +665,13 @@ static bool ghes_do_proc(struct ghes *ghes,
ghes_edac_report_mem_error(sev, mem_err);
arch_apei_report_mem_error(sev, mem_err);
- queued = ghes_handle_memory_failure(gdata, sev);
+ queued = ghes_handle_memory_failure(gdata, sev, sync);
}
else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
ghes_handle_aer(gdata);
}
else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
- queued = ghes_handle_arm_hw_error(gdata, sev);
+ queued = ghes_handle_arm_hw_error(gdata, sev, sync);
} else {
void *err = acpi_hest_get_payload(gdata);
--
2.43.0
^ permalink raw reply related [flat|nested] 9+ messages in thread