* [PATCH 1/2] pid: delete RESERVED_PIDS
@ 2017-09-09 20:26 Alexey Dobriyan
2017-09-09 20:36 ` [PATCH 2/2] pid: delete struct pidmap::nr_free Alexey Dobriyan
0 siblings, 1 reply; 2+ messages in thread
From: Alexey Dobriyan @ 2017-09-09 20:26 UTC (permalink / raw)
To: akpm; +Cc: linux-kernel
RESERVED_PIDS had a noble goal: to protect root from PID exhaustion
since at least ~2.5.40 except it never did that because there was no
capability or uid checks.
Allow small pids to be allocated after rollover, there is nothing sacred
about them.
Resource exhaustion should be handled by rlimits and/or kernel memory
accounting.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---
kernel/pid.c | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -48,9 +48,7 @@ struct pid init_struct_pid = INIT_STRUCT_PID;
int pid_max = PID_MAX_DEFAULT;
-#define RESERVED_PIDS 300
-
-int pid_max_min = RESERVED_PIDS + 1;
+int pid_max_min = 301;
int pid_max_max = PID_MAX_LIMIT;
static inline int mk_pid(struct pid_namespace *pid_ns,
@@ -157,13 +155,13 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
pid = last + 1;
if (pid >= pid_max)
- pid = RESERVED_PIDS;
+ pid = 1;
offset = pid & BITS_PER_PAGE_MASK;
map = &pid_ns->pidmap[pid/BITS_PER_PAGE];
/*
* If last_pid points into the middle of the map->page we
* want to scan this bitmap block twice, the second time
- * we start with offset == 0 (or RESERVED_PIDS).
+ * we start with offset == 0.
*/
max_scan = DIV_ROUND_UP(pid_max, BITS_PER_PAGE) - !offset;
for (i = 0; i <= max_scan; ++i) {
@@ -203,7 +201,7 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
offset = 0;
} else {
map = &pid_ns->pidmap[0];
- offset = RESERVED_PIDS;
+ offset = 1;
if (unlikely(last == offset))
break;
}
^ permalink raw reply [flat|nested] 2+ messages in thread
* [PATCH 2/2] pid: delete struct pidmap::nr_free
2017-09-09 20:26 [PATCH 1/2] pid: delete RESERVED_PIDS Alexey Dobriyan
@ 2017-09-09 20:36 ` Alexey Dobriyan
0 siblings, 0 replies; 2+ messages in thread
From: Alexey Dobriyan @ 2017-09-09 20:36 UTC (permalink / raw)
To: akpm; +Cc: linux-kernel
There is a check in pid allocation code to skip a full page:
if (likely(atomic_read(&map->nr_free))) {
...
In practice it doesn't do anything. To skip a pidmap page
one has to have 32K consecutive pids allocated at the same
time which doesn't happen.
Currently the price is _every_ fork/exit on every system being slower
than necessary.
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
---
include/linux/pid_namespace.h | 1 -
kernel/pid.c | 28 ++++++++++------------------
kernel/pid_namespace.c | 6 ------
3 files changed, 10 insertions(+), 25 deletions(-)
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -11,7 +11,6 @@
#include <linux/ns_common.h>
struct pidmap {
- atomic_t nr_free;
void *page;
};
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -68,9 +68,6 @@ static inline int mk_pid(struct pid_namespace *pid_ns,
*/
struct pid_namespace init_pid_ns = {
.kref = KREF_INIT(2),
- .pidmap = {
- [ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL }
- },
.last_pid = 0,
.nr_hashed = PIDNS_HASH_ADDING,
.level = 0,
@@ -106,7 +103,6 @@ static void free_pidmap(struct upid *upid)
int offset = nr & BITS_PER_PAGE_MASK;
clear_bit(offset, map->page);
- atomic_inc(&map->nr_free);
}
/*
@@ -181,20 +177,17 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
if (unlikely(!map->page))
return -ENOMEM;
}
- if (likely(atomic_read(&map->nr_free))) {
- for ( ; ; ) {
- if (!test_and_set_bit(offset, map->page)) {
- atomic_dec(&map->nr_free);
- set_last_pid(pid_ns, last, pid);
- return pid;
- }
- offset = find_next_offset(map, offset);
- if (offset >= BITS_PER_PAGE)
- break;
- pid = mk_pid(pid_ns, map, offset);
- if (pid >= pid_max)
- break;
+ for (;;) {
+ if (!test_and_set_bit(offset, map->page)) {
+ set_last_pid(pid_ns, last, pid);
+ return pid;
}
+ offset = find_next_offset(map, offset);
+ if (offset >= BITS_PER_PAGE)
+ break;
+ pid = mk_pid(pid_ns, map, offset);
+ if (pid >= pid_max)
+ break;
}
if (map < &pid_ns->pidmap[(pid_max-1)/BITS_PER_PAGE]) {
++map;
@@ -591,7 +584,6 @@ void __init pidmap_init(void)
init_pid_ns.pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
/* Reserve PID 0. We never call free_pidmap(0) */
set_bit(0, init_pid_ns.pidmap[0].page);
- atomic_dec(&init_pid_ns.pidmap[0].nr_free);
init_pid_ns.pid_cachep = KMEM_CACHE(pid,
SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT);
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -98,7 +98,6 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns
struct pid_namespace *ns;
unsigned int level = parent_pid_ns->level + 1;
struct ucounts *ucounts;
- int i;
int err;
err = -ENOSPC;
@@ -135,11 +134,6 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns
INIT_WORK(&ns->proc_work, proc_cleanup_work);
set_bit(0, ns->pidmap[0].page);
- atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1);
-
- for (i = 1; i < PIDMAP_ENTRIES; i++)
- atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE);
-
return ns;
out_free_map:
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2017-09-09 20:36 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-09-09 20:26 [PATCH 1/2] pid: delete RESERVED_PIDS Alexey Dobriyan
2017-09-09 20:36 ` [PATCH 2/2] pid: delete struct pidmap::nr_free Alexey Dobriyan
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox