* [PATCH AUTOSEL 6.11 064/244] proc: add config & param to block forcing mem writes
[not found] <20240925113641.1297102-1-sashal@kernel.org>
@ 2024-09-25 11:24 ` Sasha Levin
2024-09-25 11:25 ` [PATCH AUTOSEL 6.11 079/244] net: napi: Prevent overflow of napi_defer_hard_irqs Sasha Levin
1 sibling, 0 replies; 2+ messages in thread
From: Sasha Levin @ 2024-09-25 11:24 UTC (permalink / raw)
To: linux-kernel, stable
Cc: Adrian Ratiu, Doug Anderson, Jeff Xu, Jann Horn, Kees Cook,
Ard Biesheuvel, Christian Brauner, Linus Torvalds, Sasha Levin,
corbet, paul, jmorris, serge, thuth, bp, jpoimboe, tglx, paulmck,
tony, xiongwei.song, akpm, oleg, mic, jlayton, casey, viro,
adobriyan, linux-doc, linux-fsdevel, linux-security-module
From: Adrian Ratiu <adrian.ratiu@collabora.com>
[ Upstream commit 41e8149c8892ed1962bd15350b3c3e6e90cba7f4 ]
This adds a Kconfig option and boot param to allow removing
the FOLL_FORCE flag from /proc/pid/mem write calls because
it can be abused.
The traditional forcing behavior is kept as default because
it can break GDB and some other use cases.
Previously we tried a more sophisticated approach allowing
distributions to fine-tune /proc/pid/mem behavior, however
that got NAK-ed by Linus [1], who prefers this simpler
approach with semantics also easier to understand for users.
Link: https://lore.kernel.org/lkml/CAHk-=wiGWLChxYmUA5HrT5aopZrB7_2VTa0NLZcxORgkUe5tEQ@mail.gmail.com/ [1]
Cc: Doug Anderson <dianders@chromium.org>
Cc: Jeff Xu <jeffxu@google.com>
Cc: Jann Horn <jannh@google.com>
Cc: Kees Cook <kees@kernel.org>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Christian Brauner <brauner@kernel.org>
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Adrian Ratiu <adrian.ratiu@collabora.com>
Link: https://lore.kernel.org/r/20240802080225.89408-1-adrian.ratiu@collabora.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
.../admin-guide/kernel-parameters.txt | 10 +++
fs/proc/base.c | 61 ++++++++++++++++++-
security/Kconfig | 32 ++++++++++
3 files changed, 102 insertions(+), 1 deletion(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 09126bb8cc9ff..be010fec76541 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4788,6 +4788,16 @@
printk.time= Show timing data prefixed to each printk message line
Format: <bool> (1/Y/y=enable, 0/N/n=disable)
+ proc_mem.force_override= [KNL]
+ Format: {always | ptrace | never}
+ Traditionally /proc/pid/mem allows memory permissions to be
+ overridden without restrictions. This option may be set to
+ restrict that. Can be one of:
+ - 'always': traditional behavior always allows mem overrides.
+ - 'ptrace': only allow mem overrides for active ptracers.
+ - 'never': never allow mem overrides.
+ If not specified, default is the CONFIG_PROC_MEM_* choice.
+
processor.max_cstate= [HW,ACPI]
Limit processor to maximum C-state
max_cstate=9 overrides any DMI blacklist limit.
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 72a1acd03675c..f389c69767fa5 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -85,6 +85,7 @@
#include <linux/elf.h>
#include <linux/pid_namespace.h>
#include <linux/user_namespace.h>
+#include <linux/fs_parser.h>
#include <linux/fs_struct.h>
#include <linux/slab.h>
#include <linux/sched/autogroup.h>
@@ -117,6 +118,40 @@
static u8 nlink_tid __ro_after_init;
static u8 nlink_tgid __ro_after_init;
+enum proc_mem_force {
+ PROC_MEM_FORCE_ALWAYS,
+ PROC_MEM_FORCE_PTRACE,
+ PROC_MEM_FORCE_NEVER
+};
+
+static enum proc_mem_force proc_mem_force_override __ro_after_init =
+ IS_ENABLED(CONFIG_PROC_MEM_NO_FORCE) ? PROC_MEM_FORCE_NEVER :
+ IS_ENABLED(CONFIG_PROC_MEM_FORCE_PTRACE) ? PROC_MEM_FORCE_PTRACE :
+ PROC_MEM_FORCE_ALWAYS;
+
+static const struct constant_table proc_mem_force_table[] __initconst = {
+ { "always", PROC_MEM_FORCE_ALWAYS },
+ { "ptrace", PROC_MEM_FORCE_PTRACE },
+ { "never", PROC_MEM_FORCE_NEVER },
+ { }
+};
+
+static int __init early_proc_mem_force_override(char *buf)
+{
+ if (!buf)
+ return -EINVAL;
+
+ /*
+ * lookup_constant() defaults to proc_mem_force_override to preseve
+ * the initial Kconfig choice in case an invalid param gets passed.
+ */
+ proc_mem_force_override = lookup_constant(proc_mem_force_table,
+ buf, proc_mem_force_override);
+
+ return 0;
+}
+early_param("proc_mem.force_override", early_proc_mem_force_override);
+
struct pid_entry {
const char *name;
unsigned int len;
@@ -835,6 +870,28 @@ static int mem_open(struct inode *inode, struct file *file)
return ret;
}
+static bool proc_mem_foll_force(struct file *file, struct mm_struct *mm)
+{
+ struct task_struct *task;
+ bool ptrace_active = false;
+
+ switch (proc_mem_force_override) {
+ case PROC_MEM_FORCE_NEVER:
+ return false;
+ case PROC_MEM_FORCE_PTRACE:
+ task = get_proc_task(file_inode(file));
+ if (task) {
+ ptrace_active = READ_ONCE(task->ptrace) &&
+ READ_ONCE(task->mm) == mm &&
+ READ_ONCE(task->parent) == current;
+ put_task_struct(task);
+ }
+ return ptrace_active;
+ default:
+ return true;
+ }
+}
+
static ssize_t mem_rw(struct file *file, char __user *buf,
size_t count, loff_t *ppos, int write)
{
@@ -855,7 +912,9 @@ static ssize_t mem_rw(struct file *file, char __user *buf,
if (!mmget_not_zero(mm))
goto free;
- flags = FOLL_FORCE | (write ? FOLL_WRITE : 0);
+ flags = write ? FOLL_WRITE : 0;
+ if (proc_mem_foll_force(file, mm))
+ flags |= FOLL_FORCE;
while (count > 0) {
size_t this_len = min_t(size_t, count, PAGE_SIZE);
diff --git a/security/Kconfig b/security/Kconfig
index 412e76f1575d0..a93c1a9b7c283 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -19,6 +19,38 @@ config SECURITY_DMESG_RESTRICT
If you are unsure how to answer this question, answer N.
+choice
+ prompt "Allow /proc/pid/mem access override"
+ default PROC_MEM_ALWAYS_FORCE
+ help
+ Traditionally /proc/pid/mem allows users to override memory
+ permissions for users like ptrace, assuming they have ptrace
+ capability.
+
+ This allows people to limit that - either never override, or
+ require actual active ptrace attachment.
+
+ Defaults to the traditional behavior (for now)
+
+config PROC_MEM_ALWAYS_FORCE
+ bool "Traditional /proc/pid/mem behavior"
+ help
+ This allows /proc/pid/mem accesses to override memory mapping
+ permissions if you have ptrace access rights.
+
+config PROC_MEM_FORCE_PTRACE
+ bool "Require active ptrace() use for access override"
+ help
+ This allows /proc/pid/mem accesses to override memory mapping
+ permissions for active ptracers like gdb.
+
+config PROC_MEM_NO_FORCE
+ bool "Never"
+ help
+ Never override memory mapping permissions
+
+endchoice
+
config SECURITY
bool "Enable different security models"
depends on SYSFS
--
2.43.0
^ permalink raw reply related [flat|nested] 2+ messages in thread
* [PATCH AUTOSEL 6.11 079/244] net: napi: Prevent overflow of napi_defer_hard_irqs
[not found] <20240925113641.1297102-1-sashal@kernel.org>
2024-09-25 11:24 ` [PATCH AUTOSEL 6.11 064/244] proc: add config & param to block forcing mem writes Sasha Levin
@ 2024-09-25 11:25 ` Sasha Levin
1 sibling, 0 replies; 2+ messages in thread
From: Sasha Levin @ 2024-09-25 11:25 UTC (permalink / raw)
To: linux-kernel, stable
Cc: Joe Damato, Jakub Kicinski, Eric Dumazet, Sasha Levin, davem,
pabeni, corbet, leitao, johannes.berg, jamie.bainbridge, netdev,
linux-doc
From: Joe Damato <jdamato@fastly.com>
[ Upstream commit 08062af0a52107a243f7608fd972edb54ca5b7f8 ]
In commit 6f8b12d661d0 ("net: napi: add hard irqs deferral feature")
napi_defer_irqs was added to net_device and napi_defer_irqs_count was
added to napi_struct, both as type int.
This value never goes below zero, so there is not reason for it to be a
signed int. Change the type for both from int to u32, and add an
overflow check to sysfs to limit the value to S32_MAX.
The limit of S32_MAX was chosen because the practical limit before this
patch was S32_MAX (anything larger was an overflow) and thus there are
no behavioral changes introduced. If the extra bit is needed in the
future, the limit can be raised.
Before this patch:
$ sudo bash -c 'echo 2147483649 > /sys/class/net/eth4/napi_defer_hard_irqs'
$ cat /sys/class/net/eth4/napi_defer_hard_irqs
-2147483647
After this patch:
$ sudo bash -c 'echo 2147483649 > /sys/class/net/eth4/napi_defer_hard_irqs'
bash: line 0: echo: write error: Numerical result out of range
Similarly, /sys/class/net/XXXXX/tx_queue_len is defined as unsigned:
include/linux/netdevice.h: unsigned int tx_queue_len;
And has an overflow check:
dev_change_tx_queue_len(..., unsigned long new_len):
if (new_len != (unsigned int)new_len)
return -ERANGE;
Suggested-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Joe Damato <jdamato@fastly.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20240904153431.307932-1-jdamato@fastly.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
Documentation/networking/net_cachelines/net_device.rst | 2 +-
include/linux/netdevice.h | 4 ++--
net/core/net-sysfs.c | 6 +++++-
3 files changed, 8 insertions(+), 4 deletions(-)
diff --git a/Documentation/networking/net_cachelines/net_device.rst b/Documentation/networking/net_cachelines/net_device.rst
index 70c4fb9d4e5ce..d68f37f5b1f82 100644
--- a/Documentation/networking/net_cachelines/net_device.rst
+++ b/Documentation/networking/net_cachelines/net_device.rst
@@ -98,7 +98,7 @@ unsigned_int num_rx_queues
unsigned_int real_num_rx_queues - read_mostly get_rps_cpu
struct_bpf_prog* xdp_prog - read_mostly netif_elide_gro()
unsigned_long gro_flush_timeout - read_mostly napi_complete_done
-int napi_defer_hard_irqs - read_mostly napi_complete_done
+u32 napi_defer_hard_irqs - read_mostly napi_complete_done
unsigned_int gro_max_size - read_mostly skb_gro_receive
unsigned_int gro_ipv4_max_size - read_mostly skb_gro_receive
rx_handler_func_t* rx_handler read_mostly - __netif_receive_skb_core
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 607009150b5fa..39eafd2e2368a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -356,7 +356,7 @@ struct napi_struct {
unsigned long state;
int weight;
- int defer_hard_irqs_count;
+ u32 defer_hard_irqs_count;
unsigned long gro_bitmask;
int (*poll)(struct napi_struct *, int);
#ifdef CONFIG_NETPOLL
@@ -2091,7 +2091,7 @@ struct net_device {
unsigned int real_num_rx_queues;
struct netdev_rx_queue *_rx;
unsigned long gro_flush_timeout;
- int napi_defer_hard_irqs;
+ u32 napi_defer_hard_irqs;
unsigned int gro_max_size;
unsigned int gro_ipv4_max_size;
rx_handler_func_t __rcu *rx_handler;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 291fdf4a328b3..93dd5d5436849 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -32,6 +32,7 @@
#ifdef CONFIG_SYSFS
static const char fmt_hex[] = "%#x\n";
static const char fmt_dec[] = "%d\n";
+static const char fmt_uint[] = "%u\n";
static const char fmt_ulong[] = "%lu\n";
static const char fmt_u64[] = "%llu\n";
@@ -425,6 +426,9 @@ NETDEVICE_SHOW_RW(gro_flush_timeout, fmt_ulong);
static int change_napi_defer_hard_irqs(struct net_device *dev, unsigned long val)
{
+ if (val > S32_MAX)
+ return -ERANGE;
+
WRITE_ONCE(dev->napi_defer_hard_irqs, val);
return 0;
}
@@ -438,7 +442,7 @@ static ssize_t napi_defer_hard_irqs_store(struct device *dev,
return netdev_store(dev, attr, buf, len, change_napi_defer_hard_irqs);
}
-NETDEVICE_SHOW_RW(napi_defer_hard_irqs, fmt_dec);
+NETDEVICE_SHOW_RW(napi_defer_hard_irqs, fmt_uint);
static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t len)
--
2.43.0
^ permalink raw reply related [flat|nested] 2+ messages in thread
end of thread, other threads:[~2024-09-25 11:39 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <20240925113641.1297102-1-sashal@kernel.org>
2024-09-25 11:24 ` [PATCH AUTOSEL 6.11 064/244] proc: add config & param to block forcing mem writes Sasha Levin
2024-09-25 11:25 ` [PATCH AUTOSEL 6.11 079/244] net: napi: Prevent overflow of napi_defer_hard_irqs Sasha Levin
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).