All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Pratik R. Sampat" <psampat@linux.ibm.com>
To: bristot@redhat.com, christian@brauner.io, ebiederm@xmission.com,
	lizefan.x@bytedance.com, tj@kernel.org, hannes@cmpxchg.org,
	mingo@kernel.org, juri.lelli@redhat.com,
	linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	cgroups@vger.kernel.org, containers@lists.linux.dev,
	containers@lists.linux-foundation.org, psampat@linux.ibm.com,
	pratik.r.sampat@gmail.com
Subject: [RFC 1/5] ns: Introduce CPU Namespace
Date: Sat,  9 Oct 2021 20:42:39 +0530	[thread overview]
Message-ID: <20211009151243.8825-2-psampat@linux.ibm.com> (raw)
In-Reply-To: <20211009151243.8825-1-psampat@linux.ibm.com>

CPU namespace isolates CPU topology information

The CPU namespace isolates CPU information by virtualizing CPU IDs as
viewed by linux and maintaining a virtual map for each task.
The commit also adds functionality of plugging this interface into the
control and display interface via the sched_set/getaffinity syscalls.
These syscalls translate the namespace map and vice-versa to determine
the CPUset for the task to operate on.

As all the clone flags have been exhausted, therefore following suit
with the time namespace, the flag for a new CPU namespace similarly
now continues with the pattern of intersecting with CSIGNAL.
This means that this namespace can be triggered by only unshare()
and clone3() syscalls.

Signed-off-by: Pratik R. Sampat <psampat@linux.ibm.com>
---
 fs/proc/namespaces.c           |   4 +
 include/linux/cpu_namespace.h  | 159 +++++++++++++++++++++++++++
 include/linux/nsproxy.h        |   2 +
 include/linux/proc_ns.h        |   2 +
 include/linux/user_namespace.h |   1 +
 include/uapi/linux/sched.h     |   1 +
 init/Kconfig                   |   8 ++
 kernel/Makefile                |   1 +
 kernel/cpu_namespace.c         | 192 +++++++++++++++++++++++++++++++++
 kernel/fork.c                  |   2 +-
 kernel/nsproxy.c               |  30 +++++-
 kernel/sched/core.c            |  16 ++-
 kernel/ucount.c                |   1 +
 13 files changed, 414 insertions(+), 5 deletions(-)
 create mode 100644 include/linux/cpu_namespace.h
 create mode 100644 kernel/cpu_namespace.c

diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 8e159fc78c0a..d65170a8a648 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -9,6 +9,7 @@
 #include <linux/ipc_namespace.h>
 #include <linux/pid_namespace.h>
 #include <linux/user_namespace.h>
+#include <linux/cpu_namespace.h>
 #include "internal.h"
 
 
@@ -37,6 +38,9 @@ static const struct proc_ns_operations *ns_entries[] = {
 	&timens_operations,
 	&timens_for_children_operations,
 #endif
+#ifdef CONFIG_CPU_NS
+	&cpuns_operations,
+#endif
 };
 
 static const char *proc_ns_get_link(struct dentry *dentry,
diff --git a/include/linux/cpu_namespace.h b/include/linux/cpu_namespace.h
new file mode 100644
index 000000000000..edad05919db7
--- /dev/null
+++ b/include/linux/cpu_namespace.h
@@ -0,0 +1,159 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _LINUX_CPU_NS_H
+#define _LINUX_CPU_NS_H
+
+#include <linux/sched.h>
+#include <linux/bug.h>
+#include <linux/nsproxy.h>
+#include <linux/ns_common.h>
+
+/*
+ * Virtual CPUs  => View of the CPUs in the CPU NS context
+ * Physical CPUs => CPU as viewed by host, also known as logical CPUs
+ */
+struct cpu_namespace {
+	/* Virtual map of cpus in the cpuset */
+	cpumask_t v_cpuset_cpus;
+	/* map for CPU translation -- Physical --> Virtual */
+	int p_v_trans_map[NR_CPUS];
+	/* map for CPU translation -- Virtual --> Physical */
+	int v_p_trans_map[NR_CPUS];
+	struct cpu_namespace *parent;
+	struct ucounts *ucounts;
+	struct user_namespace *user_ns;
+	struct ns_common ns;
+} __randomize_layout;
+
+extern struct cpu_namespace init_cpu_ns;
+
+#ifdef CONFIG_CPU_NS
+
+static inline struct cpu_namespace *get_cpu_ns(struct cpu_namespace *ns)
+{
+	if (ns != &init_cpu_ns)
+		refcount_inc(&ns->ns.count);
+	return ns;
+}
+
+/*
+ * Get the virtual CPU for the requested physical CPU in the ns context
+ */
+static inline int get_vcpu_cpuns(struct cpu_namespace *c, int pcpu)
+{
+	if (pcpu >= num_possible_cpus())
+		return -1;
+
+	return c->p_v_trans_map[pcpu];
+}
+
+/*
+ * Get the physical CPU for requested virtual CPU in the ns context
+ */
+static inline int get_pcpu_cpuns(struct cpu_namespace *c, int vcpu)
+{
+	if (vcpu >= num_possible_cpus())
+		return -1;
+
+	return c->v_p_trans_map[vcpu];
+}
+
+/*
+ * Given the physical CPU map get the virtual CPUs corresponding to that ns
+ */
+static inline cpumask_t get_vcpus_cpuns(struct cpu_namespace *c,
+					const cpumask_var_t mask)
+{
+	int cpu;
+	cpumask_t temp;
+
+	cpumask_clear(&temp);
+
+	for_each_cpu(cpu, mask)
+		cpumask_set_cpu(get_vcpu_cpuns(c, cpu), &temp);
+
+	return temp;
+}
+
+/*
+ * Given a virtual CPU map get the physical CPUs corresponding to that ns
+ */
+static inline cpumask_t get_pcpus_cpuns(struct cpu_namespace *c,
+					const cpumask_var_t mask)
+{
+	int cpu;
+	cpumask_t temp;
+
+	cpumask_clear(&temp);
+
+	for_each_cpu(cpu, mask)
+		cpumask_set_cpu(get_pcpu_cpuns(c, cpu), &temp);
+
+	return temp;
+}
+
+extern struct cpu_namespace *copy_cpu_ns(unsigned long flags,
+	struct user_namespace *user_ns, struct cpu_namespace *ns);
+extern void put_cpu_ns(struct cpu_namespace *ns);
+
+#else /* !CONFIG_CPU_NS */
+#include <linux/err.h>
+
+static inline struct cpu_namespace *get_cpu_ns(struct cpu_namespace *ns)
+{
+	return ns;
+}
+
+static inline struct cpu_namespace *copy_cpu_ns(unsigned long flags,
+	struct user_namespace *user_ns, struct cpu_namespace *ns)
+{
+	if (flags & CLONE_NEWCPU)
+		return ERR_PTR(-EINVAL);
+	return ns;
+}
+
+static inline void put_cpu_ns(struct cpu_namespace *ns)
+{
+}
+
+static inline int get_vcpu_cpuns(struct cpu_namespace *c, int pcpu)
+{
+	return pcpu;
+}
+
+static inline int get_pcpu_cpuns(struct cpu_namespace *c, int vcpu)
+{
+	return vcpu;
+}
+
+static inline cpumask_t get_vcpus_cpuns(struct cpu_namespace *c,
+					const cpumask_var_t mask)
+{
+	cpumask_t temp;
+	int cpu;
+
+	cpumask_clear(&temp);
+
+	for_each_cpu(cpu, mask)
+		cpumask_set_cpu(get_vcpu_cpuns(c, cpu), &temp);
+
+	return temp;
+}
+
+static inline cpumask_t get_pcpus_cpuns(struct cpu_namespace *c,
+					const cpumask_var_t mask)
+{
+	cpumask_t temp;
+	int cpu;
+
+	cpumask_clear(&temp);
+
+	for_each_cpu(cpu, mask)
+		cpumask_set_cpu(get_pcpu_cpuns(c, cpu), &temp);
+
+	return temp;
+}
+
+#endif /* CONFIG_CPU_NS */
+
+#endif /* _LINUX_CPU_NS_H */
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index cdb171efc7cb..40e0357fe0bb 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -10,6 +10,7 @@ struct uts_namespace;
 struct ipc_namespace;
 struct pid_namespace;
 struct cgroup_namespace;
+struct cpu_namespace;
 struct fs_struct;
 
 /*
@@ -38,6 +39,7 @@ struct nsproxy {
 	struct time_namespace *time_ns;
 	struct time_namespace *time_ns_for_children;
 	struct cgroup_namespace *cgroup_ns;
+	struct cpu_namespace *cpu_ns;
 };
 extern struct nsproxy init_nsproxy;
 
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index 75807ecef880..dd1db6782336 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -34,6 +34,7 @@ extern const struct proc_ns_operations mntns_operations;
 extern const struct proc_ns_operations cgroupns_operations;
 extern const struct proc_ns_operations timens_operations;
 extern const struct proc_ns_operations timens_for_children_operations;
+extern const struct proc_ns_operations cpuns_operations;
 
 /*
  * We always define these enumerators
@@ -46,6 +47,7 @@ enum {
 	PROC_PID_INIT_INO	= 0xEFFFFFFCU,
 	PROC_CGROUP_INIT_INO	= 0xEFFFFFFBU,
 	PROC_TIME_INIT_INO	= 0xEFFFFFFAU,
+	PROC_CPU_INIT_INO	= 0xEFFFFFFU,
 };
 
 #ifdef CONFIG_PROC_FS
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index eb70cabe6e7f..9f0b121f97ac 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -46,6 +46,7 @@ enum ucount_type {
 	UCOUNT_MNT_NAMESPACES,
 	UCOUNT_CGROUP_NAMESPACES,
 	UCOUNT_TIME_NAMESPACES,
+	UCOUNT_CPU_NAMESPACES,
 #ifdef CONFIG_INOTIFY_USER
 	UCOUNT_INOTIFY_INSTANCES,
 	UCOUNT_INOTIFY_WATCHES,
diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
index 3bac0a8ceab2..f8bb6de68874 100644
--- a/include/uapi/linux/sched.h
+++ b/include/uapi/linux/sched.h
@@ -41,6 +41,7 @@
  * cloning flags intersect with CSIGNAL so can be used with unshare and clone3
  * syscalls only:
  */
+#define CLONE_NEWCPU	0x00000040	/* New cpu namespace */
 #define CLONE_NEWTIME	0x00000080	/* New time namespace */
 
 #ifndef __ASSEMBLY__
diff --git a/init/Kconfig b/init/Kconfig
index 55f9f7738ebb..c3e3abd35bb4 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1214,6 +1214,14 @@ config NET_NS
 	  Allow user space to create what appear to be multiple instances
 	  of the network stack.
 
+config CPU_NS
+	bool "CPU Namespaces"
+	default y
+	help
+	  Support CPU namespaces. This allows having a context-aware
+	  scrambled view of the CPU topology coherent to limits set
+	  in system control mechanism.
+
 endif # NAMESPACES
 
 config CHECKPOINT_RESTORE
diff --git a/kernel/Makefile b/kernel/Makefile
index 4df609be42d0..5a37e3e56f8f 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -82,6 +82,7 @@ obj-$(CONFIG_CGROUPS) += cgroup/
 obj-$(CONFIG_UTS_NS) += utsname.o
 obj-$(CONFIG_USER_NS) += user_namespace.o
 obj-$(CONFIG_PID_NS) += pid_namespace.o
+obj-$(CONFIG_CPU_NS) += cpu_namespace.o
 obj-$(CONFIG_IKCONFIG) += configs.o
 obj-$(CONFIG_IKHEADERS) += kheaders.o
 obj-$(CONFIG_SMP) += stop_machine.o
diff --git a/kernel/cpu_namespace.c b/kernel/cpu_namespace.c
new file mode 100644
index 000000000000..6c700522352a
--- /dev/null
+++ b/kernel/cpu_namespace.c
@@ -0,0 +1,192 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * CPU namespaces
+ * <TODO More description>
+ *
+ * Author: Pratik Rajesh Sampat <psampat@linux.ibm.com>
+ */
+
+#include <linux/cpu_namespace.h>
+#include <linux/syscalls.h>
+#include <linux/proc_ns.h>
+#include <linux/export.h>
+#include <linux/acct.h>
+#include <linux/err.h>
+#include <linux/random.h>
+
+static void dec_cpu_namespaces(struct ucounts *ucounts)
+{
+	dec_ucount(ucounts, UCOUNT_CPU_NAMESPACES);
+}
+
+static void destroy_cpu_namespace(struct cpu_namespace *ns)
+{
+	ns_free_inum(&ns->ns);
+
+	dec_cpu_namespaces(ns->ucounts);
+	put_user_ns(ns->user_ns);
+}
+
+static struct ucounts *inc_cpu_namespaces(struct user_namespace *ns)
+{
+	return inc_ucount(ns, current_euid(), UCOUNT_CPU_NAMESPACES);
+}
+
+static struct cpu_namespace *create_cpu_namespace(struct user_namespace *user_ns,
+	struct cpu_namespace *parent_cpu_ns)
+{
+	struct cpu_namespace *ns;
+	struct ucounts *ucounts;
+	int err, i, cpu;
+	cpumask_t temp;
+
+	err = -EINVAL;
+	if (!in_userns(parent_cpu_ns->user_ns, user_ns))
+		goto out;
+
+	ucounts = inc_cpu_namespaces(user_ns);
+	if (!ucounts)
+		goto out;
+
+	err = -ENOMEM;
+	ns = kmalloc(sizeof(*ns), GFP_KERNEL);
+	if (ns == NULL)
+		goto out_dec;
+
+	err = ns_alloc_inum(&ns->ns);
+	if (err)
+		goto out_free_ns;
+	ns->ns.ops = &cpuns_operations;
+
+	refcount_set(&ns->ns.count, 1);
+	ns->parent = get_cpu_ns(parent_cpu_ns);
+	ns->user_ns = get_user_ns(user_ns);
+
+	for_each_present_cpu(cpu) {
+		ns->p_v_trans_map[cpu] = ns->parent->p_v_trans_map[cpu];
+		ns->v_p_trans_map[cpu] = ns->parent->v_p_trans_map[cpu];
+	}
+	cpumask_clear(&temp);
+	cpumask_clear(&ns->v_cpuset_cpus);
+
+	for_each_cpu(i, &parent_cpu_ns->v_cpuset_cpus) {
+		int parent_pcpu = get_pcpu_cpuns(parent_cpu_ns, i);
+
+		cpumask_set_cpu(get_vcpu_cpuns(ns, parent_pcpu),
+				&ns->v_cpuset_cpus);
+	}
+	for_each_cpu(i, &ns->v_cpuset_cpus)
+		cpumask_set_cpu(get_pcpu_cpuns(ns, i), &temp);
+
+	set_cpus_allowed_ptr(current, &temp);
+
+	return ns;
+
+out_free_ns:
+	kfree(ns);
+out_dec:
+	dec_cpu_namespaces(ucounts);
+out:
+	return ERR_PTR(err);
+}
+
+struct cpu_namespace *copy_cpu_ns(unsigned long flags,
+	struct user_namespace *user_ns, struct cpu_namespace *old_ns)
+{
+	if (!(flags & CLONE_NEWCPU))
+		return get_cpu_ns(old_ns);
+	return create_cpu_namespace(user_ns, old_ns);
+}
+
+void put_cpu_ns(struct cpu_namespace *ns)
+{
+	struct cpu_namespace *parent;
+
+	while (ns != &init_cpu_ns) {
+		parent = ns->parent;
+		if (!refcount_dec_and_test(&ns->ns.count))
+			break;
+		destroy_cpu_namespace(ns);
+		ns = parent;
+	}
+}
+EXPORT_SYMBOL_GPL(put_cpu_ns);
+
+static inline struct cpu_namespace *to_cpu_ns(struct ns_common *ns)
+{
+	return container_of(ns, struct cpu_namespace, ns);
+}
+
+static struct ns_common *cpuns_get(struct task_struct *task)
+{
+	struct cpu_namespace *ns = NULL;
+	struct nsproxy *nsproxy;
+
+	task_lock(task);
+	nsproxy = task->nsproxy;
+	if (nsproxy) {
+		ns = nsproxy->cpu_ns;
+		get_cpu_ns(ns);
+	}
+	task_unlock(task);
+
+	return ns ? &ns->ns : NULL;
+}
+
+static void cpuns_put(struct ns_common *ns)
+{
+	put_cpu_ns(to_cpu_ns(ns));
+}
+
+static int cpuns_install(struct nsset *nsset, struct ns_common *new)
+{
+	struct nsproxy *nsproxy = nsset->nsproxy;
+	struct cpu_namespace *ns = to_cpu_ns(new);
+
+	if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) ||
+	    !ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN))
+		return -EPERM;
+
+	get_cpu_ns(ns);
+	put_cpu_ns(nsproxy->cpu_ns);
+	nsproxy->cpu_ns = ns;
+	return 0;
+}
+
+static struct user_namespace *cpuns_owner(struct ns_common *ns)
+{
+	return to_cpu_ns(ns)->user_ns;
+}
+
+const struct proc_ns_operations cpuns_operations = {
+	.name		= "cpu",
+	.type		= CLONE_NEWCPU,
+	.get		= cpuns_get,
+	.put		= cpuns_put,
+	.install	= cpuns_install,
+	.owner		= cpuns_owner,
+};
+
+struct cpu_namespace init_cpu_ns = {
+	.ns.count	= REFCOUNT_INIT(2),
+	.user_ns	= &init_user_ns,
+	.ns.inum	= PROC_CPU_INIT_INO,
+	.ns.ops		= &cpuns_operations,
+};
+EXPORT_SYMBOL(init_cpu_ns);
+
+static __init int cpu_namespaces_init(void)
+{
+	int cpu;
+
+	cpumask_copy(&init_cpu_ns.v_cpuset_cpus, cpu_possible_mask);
+
+	/* Identity mapping for the cpu_namespace init */
+	for_each_present_cpu(cpu) {
+		init_cpu_ns.p_v_trans_map[cpu] = cpu;
+		init_cpu_ns.v_p_trans_map[cpu] = cpu;
+	}
+
+	return 0;
+}
+device_initcall(cpu_namespaces_init);
diff --git a/kernel/fork.c b/kernel/fork.c
index bc94b2cc5995..fac3317b1f57 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2877,7 +2877,7 @@ static int check_unshare_flags(unsigned long unshare_flags)
 				CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
 				CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET|
 				CLONE_NEWUSER|CLONE_NEWPID|CLONE_NEWCGROUP|
-				CLONE_NEWTIME))
+				CLONE_NEWTIME|CLONE_NEWCPU))
 		return -EINVAL;
 	/*
 	 * Not implemented, but pretend it works if there is nothing
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index abc01fcad8c7..dab0f9799ce7 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -19,6 +19,7 @@
 #include <net/net_namespace.h>
 #include <linux/ipc_namespace.h>
 #include <linux/time_namespace.h>
+#include <linux/cpu_namespace.h>
 #include <linux/fs_struct.h>
 #include <linux/proc_fs.h>
 #include <linux/proc_ns.h>
@@ -47,6 +48,9 @@ struct nsproxy init_nsproxy = {
 	.time_ns		= &init_time_ns,
 	.time_ns_for_children	= &init_time_ns,
 #endif
+#ifdef CONFIG_CPU_NS
+	.cpu_ns			= &init_cpu_ns,
+#endif
 };
 
 static inline struct nsproxy *create_nsproxy(void)
@@ -121,8 +125,17 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
 	}
 	new_nsp->time_ns = get_time_ns(tsk->nsproxy->time_ns);
 
+	new_nsp->cpu_ns = copy_cpu_ns(flags, user_ns, tsk->nsproxy->cpu_ns);
+	if (IS_ERR(new_nsp->cpu_ns)) {
+		err = PTR_ERR(new_nsp->cpu_ns);
+		goto out_cpu;
+	}
+
 	return new_nsp;
 
+out_cpu:
+	if (new_nsp->cpu_ns)
+		put_cpu_ns(new_nsp->cpu_ns);
 out_time:
 	put_net(new_nsp->net_ns);
 out_net:
@@ -156,7 +169,8 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
 
 	if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
 			      CLONE_NEWPID | CLONE_NEWNET |
-			      CLONE_NEWCGROUP | CLONE_NEWTIME)))) {
+			      CLONE_NEWCGROUP | CLONE_NEWTIME |
+			      CLONE_NEWCPU)))) {
 		if (likely(old_ns->time_ns_for_children == old_ns->time_ns)) {
 			get_nsproxy(old_ns);
 			return 0;
@@ -216,7 +230,7 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags,
 
 	if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
 			       CLONE_NEWNET | CLONE_NEWPID | CLONE_NEWCGROUP |
-			       CLONE_NEWTIME)))
+			       CLONE_NEWTIME | CLONE_NEWCPU)))
 		return 0;
 
 	user_ns = new_cred ? new_cred->user_ns : current_user_ns();
@@ -289,6 +303,10 @@ static int check_setns_flags(unsigned long flags)
 	if (flags & CLONE_NEWTIME)
 		return -EINVAL;
 #endif
+#ifndef CONFIG_CPU_NS
+	if (flags & CLONE_NEWCPU)
+		return -EINVAL;
+#endif
 
 	return 0;
 }
@@ -471,6 +489,14 @@ static int validate_nsset(struct nsset *nsset, struct pid *pid)
 	}
 #endif
 
+#ifdef CONFIG_CPU_NS
+	if (flags & CLONE_NEWCPU) {
+		ret = validate_ns(nsset, &nsp->cpu_ns->ns);
+		if (ret)
+			goto out;
+	}
+#endif
+
 out:
 	if (pid_ns)
 		put_pid_ns(pid_ns);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2d9ff40f4661..0413175e6d73 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -27,6 +27,8 @@
 #include "pelt.h"
 #include "smp.h"
 
+#include <linux/cpu_namespace.h>
+
 /*
  * Export tracepoints that act as a bare tracehook (ie: have no trace event
  * associated with them) to allow external modules to probe them.
@@ -7559,6 +7561,7 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
 {
 	cpumask_var_t cpus_allowed, new_mask;
 	struct task_struct *p;
+	cpumask_t temp;
 	int retval;
 
 	rcu_read_lock();
@@ -7601,7 +7604,8 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
 
 
 	cpuset_cpus_allowed(p, cpus_allowed);
-	cpumask_and(new_mask, in_mask, cpus_allowed);
+	temp = get_pcpus_cpuns(current->nsproxy->cpu_ns, in_mask);
+	cpumask_and(new_mask, &temp, cpus_allowed);
 
 	/*
 	 * Since bandwidth control happens on root_domain basis,
@@ -7682,8 +7686,9 @@ SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,
 long sched_getaffinity(pid_t pid, struct cpumask *mask)
 {
 	struct task_struct *p;
+	cpumask_var_t temp;
 	unsigned long flags;
-	int retval;
+	int retval, cpu;
 
 	rcu_read_lock();
 
@@ -7698,6 +7703,13 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
 
 	raw_spin_lock_irqsave(&p->pi_lock, flags);
 	cpumask_and(mask, &p->cpus_mask, cpu_active_mask);
+	cpumask_clear(temp);
+	for_each_cpu(cpu, mask) {
+		cpumask_set_cpu(get_vcpu_cpuns(current->nsproxy->cpu_ns, cpu),
+				temp);
+	}
+
+	cpumask_copy(mask, temp);
 	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
 
 out_unlock:
diff --git a/kernel/ucount.c b/kernel/ucount.c
index 87799e2379bd..3adb168b4a2b 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -76,6 +76,7 @@ static struct ctl_table user_table[] = {
 	UCOUNT_ENTRY("max_mnt_namespaces"),
 	UCOUNT_ENTRY("max_cgroup_namespaces"),
 	UCOUNT_ENTRY("max_time_namespaces"),
+	UCOUNT_ENTRY("max_cpu_namespaces"),
 #ifdef CONFIG_INOTIFY_USER
 	UCOUNT_ENTRY("max_inotify_instances"),
 	UCOUNT_ENTRY("max_inotify_watches"),
-- 
2.31.1


  reply	other threads:[~2021-10-09 15:12 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-10-09 15:12 [RFC 0/5] kernel: Introduce CPU Namespace Pratik R. Sampat
2021-10-09 15:12 ` Pratik R. Sampat
2021-10-09 15:12 ` Pratik R. Sampat [this message]
     [not found]   ` <20211009151243.8825-2-psampat-tEXmvtCZX7AybS5Ee8rs3A@public.gmane.org>
2021-10-09 22:37     ` [RFC 1/5] ns: " Peter Zijlstra
2021-10-09 22:37       ` Peter Zijlstra
2021-10-09 15:12 ` [RFC 3/5] cpuset/cpuns: Make cgroup CPUset CPU namespace aware Pratik R. Sampat
     [not found] ` <20211009151243.8825-1-psampat-tEXmvtCZX7AybS5Ee8rs3A@public.gmane.org>
2021-10-09 15:12   ` [RFC 2/5] ns: Add scrambling functionality to CPU namespace Pratik R. Sampat
2021-10-09 15:12     ` Pratik R. Sampat
2021-10-09 15:12   ` [RFC 4/5] cpu/cpuns: Make sysfs CPU namespace aware Pratik R. Sampat
2021-10-09 15:12     ` Pratik R. Sampat
2021-10-09 15:12   ` [RFC 5/5] proc/cpuns: Make procfs load stats " Pratik R. Sampat
2021-10-09 15:12     ` Pratik R. Sampat
2021-10-11 10:11   ` [RFC 0/5] kernel: Introduce CPU Namespace Christian Brauner
2021-10-11 10:11     ` Christian Brauner
2021-10-11 14:17     ` Michal Koutný
2021-10-11 14:17       ` Michal Koutný
2021-10-11 17:42       ` Tejun Heo
2021-10-12  8:42     ` Pratik Sampat
2021-10-14 22:14       ` Tejun Heo
2021-10-18 15:29         ` Pratik Sampat
2021-10-18 16:29           ` Tejun Heo
     [not found]             ` <YW2g73Lwmrhjg/sv-NiLfg/pYEd1N0TnZuCh8vA@public.gmane.org>
2021-10-20 10:44               ` Pratik Sampat
2021-10-20 10:44                 ` Pratik Sampat
     [not found]                 ` <77854748-081f-46c7-df51-357ca78b83b3-tEXmvtCZX7AybS5Ee8rs3A@public.gmane.org>
2021-10-20 16:35                   ` Tejun Heo
2021-10-20 16:35                     ` Tejun Heo
     [not found]                     ` <YXBFVCc61nCG5rto-NiLfg/pYEd1N0TnZuCh8vA@public.gmane.org>
2021-10-21  7:44                       ` Pratik Sampat
2021-10-21  7:44                         ` Pratik Sampat
     [not found]                         ` <bd1811cc-0e04-9e44-0b46-02689ff9a238-tEXmvtCZX7AybS5Ee8rs3A@public.gmane.org>
2021-10-21 17:06                           ` Tejun Heo
2021-10-21 17:06                             ` Tejun Heo
2021-10-21 17:15                   ` Eric W. Biederman
2021-10-21 17:15                     ` Eric W. Biederman
2021-10-09 22:41 ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211009151243.8825-2-psampat@linux.ibm.com \
    --to=psampat@linux.ibm.com \
    --cc=bristot@redhat.com \
    --cc=cgroups@vger.kernel.org \
    --cc=christian@brauner.io \
    --cc=containers@lists.linux-foundation.org \
    --cc=containers@lists.linux.dev \
    --cc=ebiederm@xmission.com \
    --cc=hannes@cmpxchg.org \
    --cc=juri.lelli@redhat.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=lizefan.x@bytedance.com \
    --cc=mingo@kernel.org \
    --cc=pratik.r.sampat@gmail.com \
    --cc=tj@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.