* [PATCH 1/2] numactl: new option --all/-a was added for policy settings
2013-09-06 14:33 [PATCH 0/2] New numactl option for overriding of policy settings cpuset awareness Petr Holasek
@ 2013-09-06 14:33 ` Petr Holasek
2013-09-06 14:33 ` [PATCH 2/2] libnuma: new function numa_run_on_node_mask_all was added to libnuma Petr Holasek
2013-09-06 19:50 ` [PATCH 0/2] New numactl option for overriding of policy settings cpuset awareness Cliff Wickman
2 siblings, 0 replies; 6+ messages in thread
From: Petr Holasek @ 2013-09-06 14:33 UTC (permalink / raw)
To: Cliff Wickman; +Cc: Andi Kleen, linux-numa, Petr Holasek
---
numactl.8 | 6 ++++++
numactl.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++---------
2 files changed, 55 insertions(+), 9 deletions(-)
diff --git a/numactl.8 b/numactl.8
index 576cdb6..7a001c0 100644
--- a/numactl.8
+++ b/numactl.8
@@ -23,6 +23,8 @@ numactl \- Control NUMA policy for processes or shared memory
.SH SYNOPSIS
.B numactl
[
+.B \-\-all
+] [
.B \-\-interleave nodes
] [
.B \-\-preferred node
@@ -107,6 +109,10 @@ for udev names in /dev use
.TP
Policy settings are:
.TP
+.B \-\-all, \-a
+Unset default cpuset awareness, so user can use all possible CPUs/nodes
+for following policy settings.
+.TP
.B \-\-interleave=nodes, \-i nodes
Set a memory interleave policy. Memory will be allocated using round robin
on
diff --git a/numactl.c b/numactl.c
index e1d5487..97955a4 100755
--- a/numactl.c
+++ b/numactl.c
@@ -29,9 +29,13 @@
#include "util.h"
#include "shm.h"
+#define CPUSET 0
+#define ALL 1
+
int exitcode;
struct option opts[] = {
+ {"all", 0, 0, 'a'},
{"interleave", 1, 0, 'i' },
{"preferred", 1, 0, 'p' },
{"cpubind", 1, 0, 'c' },
@@ -60,7 +64,7 @@ struct option opts[] = {
void usage(void)
{
fprintf(stderr,
- "usage: numactl [--interleave= | -i <nodes>] [--preferred= | -p <node>]\n"
+ "usage: numactl [--all | -a] [--interleave= | -i <nodes>] [--preferred= | -p <node>]\n"
" [--physcpubind= | -C <cpus>] [--cpunodebind= | -N <nodes>]\n"
" [--membind= | -m <nodes>] [--localalloc | -l] command args ...\n"
" numactl [--show | -s]\n"
@@ -328,6 +332,8 @@ int did_strict = 0;
int do_shm = 0;
int do_dump = 0;
int shmattached = 0;
+int did_node_cpu_parse = 0;
+int parse_all = 0;
char *shmoption;
void check_cpubind(int flag)
@@ -355,6 +361,12 @@ void needshm(char *opt)
usage_msg("%s must be after shared memory specification", opt);
}
+void check_all_parse(int flag)
+{
+ if (did_node_cpu_parse)
+ usage_msg("--all/-a option must be before all cpu/node specifications");
+}
+
void get_short_opts(struct option *o, char *s)
{
*s++ = '+';
@@ -379,7 +391,7 @@ void check_shmbeyond(char *msg)
}
}
-static struct bitmask *numactl_parse_nodestring(char *s)
+static struct bitmask *numactl_parse_nodestring(char *s, int flag)
{
static char *last;
@@ -390,7 +402,11 @@ static struct bitmask *numactl_parse_nodestring(char *s)
} else {
last = s;
}
- return numa_parse_nodestring(s);
+
+ if (flag == ALL)
+ return numa_parse_nodestring_all(s);
+ else
+ return numa_parse_nodestring(s);
}
int main(int ac, char **av)
@@ -413,13 +429,17 @@ int main(int ac, char **av)
exit(0);
case 'i': /* --interleave */
checknuma();
- mask = numactl_parse_nodestring(optarg);
+ if (parse_all)
+ mask = numactl_parse_nodestring(optarg, ALL);
+ else
+ mask = numactl_parse_nodestring(optarg, CPUSET);
if (!mask) {
printf ("<%s> is invalid\n", optarg);
usage();
}
errno = 0;
+ did_node_cpu_parse = 1;
setpolicy(MPOL_INTERLEAVE);
if (shmfd >= 0)
numa_interleave_memory(shmptr, shmlen, mask);
@@ -431,7 +451,10 @@ int main(int ac, char **av)
case 'c': /* --cpubind */
dontshm("-c/--cpubind/--cpunodebind");
checknuma();
- mask = numactl_parse_nodestring(optarg);
+ if (parse_all)
+ mask = numactl_parse_nodestring(optarg, ALL);
+ else
+ mask = numactl_parse_nodestring(optarg, CPUSET);
if (!mask) {
printf ("<%s> is invalid\n", optarg);
usage();
@@ -439,14 +462,18 @@ int main(int ac, char **av)
errno = 0;
check_cpubind(do_shm);
did_cpubind = 1;
- numa_run_on_node_mask(mask);
+ did_node_cpu_parse = 1;
+ numa_run_on_node_mask_all(mask);
checkerror("sched_setaffinity");
break;
case 'C': /* --physcpubind */
{
struct bitmask *cpubuf;
dontshm("-C/--physcpubind");
- cpubuf = numa_parse_cpustring(optarg);
+ if (parse_all)
+ cpubuf = numa_parse_cpustring_all(optarg);
+ else
+ cpubuf = numa_parse_cpustring(optarg);
if (!cpubuf) {
printf ("<%s> is invalid\n", optarg);
usage();
@@ -454,6 +481,7 @@ int main(int ac, char **av)
errno = 0;
check_cpubind(do_shm);
did_cpubind = 1;
+ did_node_cpu_parse = 1;
numa_sched_setaffinity(0, cpubuf);
checkerror("sched_setaffinity");
free(cpubuf);
@@ -462,12 +490,16 @@ int main(int ac, char **av)
case 'm': /* --membind */
checknuma();
setpolicy(MPOL_BIND);
- mask = numactl_parse_nodestring(optarg);
+ if (parse_all)
+ mask = numactl_parse_nodestring(optarg, ALL);
+ else
+ mask = numactl_parse_nodestring(optarg, CPUSET);
if (!mask) {
printf ("<%s> is invalid\n", optarg);
usage();
}
errno = 0;
+ did_node_cpu_parse = 1;
numa_set_bind_policy(1);
if (shmfd >= 0) {
numa_tonodemask_memory(shmptr, shmlen, mask);
@@ -480,7 +512,10 @@ int main(int ac, char **av)
case 'p': /* --preferred */
checknuma();
setpolicy(MPOL_PREFERRED);
- mask = numactl_parse_nodestring(optarg);
+ if (parse_all)
+ mask = numactl_parse_nodestring(optarg, ALL);
+ else
+ mask = numactl_parse_nodestring(optarg, CPUSET);
if (!mask) {
printf ("<%s> is invalid\n", optarg);
usage();
@@ -495,6 +530,7 @@ int main(int ac, char **av)
usage();
numa_bitmask_free(mask);
errno = 0;
+ did_node_cpu_parse = 1;
numa_set_bind_policy(0);
if (shmfd >= 0)
numa_tonode_memory(shmptr, shmlen, node);
@@ -586,6 +622,10 @@ int main(int ac, char **av)
verify_shm(set_policy, mask);
break;
+ case 'a': /* --all */
+ check_all_parse(did_node_cpu_parse);
+ parse_all = 1;
+ break;
default:
usage();
}
--
1.8.1.4
^ permalink raw reply related [flat|nested] 6+ messages in thread* [PATCH 2/2] libnuma: new function numa_run_on_node_mask_all was added to libnuma
2013-09-06 14:33 [PATCH 0/2] New numactl option for overriding of policy settings cpuset awareness Petr Holasek
2013-09-06 14:33 ` [PATCH 1/2] numactl: new option --all/-a was added for policy settings Petr Holasek
@ 2013-09-06 14:33 ` Petr Holasek
2013-09-06 19:24 ` Andi Kleen
2013-09-06 19:50 ` [PATCH 0/2] New numactl option for overriding of policy settings cpuset awareness Cliff Wickman
2 siblings, 1 reply; 6+ messages in thread
From: Petr Holasek @ 2013-09-06 14:33 UTC (permalink / raw)
To: Cliff Wickman; +Cc: Andi Kleen, linux-numa, Petr Holasek
---
libnuma.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
numa.h | 2 ++
versions.ldscript | 8 ++++++++
3 files changed, 56 insertions(+)
diff --git a/libnuma.c b/libnuma.c
index a319bcc..091261c 100755
--- a/libnuma.c
+++ b/libnuma.c
@@ -1524,6 +1524,52 @@ __asm__(".symver numa_run_on_node_mask_v2,numa_run_on_node_mask@@libnuma_1.2");
make_internal_alias(numa_run_on_node_mask_v2);
+/*
+ * Given a node mask (size of a kernel nodemask_t) (probably populated by
+ * a user argument list) set up a map of cpus (map "cpus") on those nodes
+ * without any cpuset awareness. Then set affinity to those cpus.
+ */
+int
+numa_run_on_node_mask_all(struct bitmask *bmp)
+{
+ int ncpus, i, k, err;
+ struct bitmask *cpus, *nodecpus;
+
+ cpus = numa_allocate_cpumask();
+ ncpus = cpus->size;
+ nodecpus = numa_allocate_cpumask();
+
+ for (i = 0; i < bmp->size; i++) {
+ if (bmp->maskp[i / BITS_PER_LONG] == 0)
+ continue;
+ if (numa_bitmask_isbitset(bmp, i)) {
+ if (!numa_bitmask_isbitset(numa_possible_nodes_ptr, i)) {
+ numa_warn(W_noderunmask,
+ "node %d not allowed", i);
+ continue;
+ }
+ if (numa_node_to_cpus_v2_int(i, nodecpus) < 0) {
+ numa_warn(W_noderunmask,
+ "Cannot read node cpumask from sysfs");
+ continue;
+ }
+ for (k = 0; k < CPU_LONGS(ncpus); k++)
+ cpus->maskp[k] |= nodecpus->maskp[k];
+ }
+ }
+ err = numa_sched_setaffinity_v2_int(0, cpus);
+
+ numa_bitmask_free(cpus);
+ numa_bitmask_free(nodecpus);
+
+ /* With possible nodes freedom it can happen easily now */
+ if (err < 0) {
+ numa_error("numa_sched_setaffinity_v2_int() failed; abort\n");
+ }
+
+ return err;
+}
+
nodemask_t
numa_get_run_node_mask_v1(void)
{
diff --git a/numa.h b/numa.h
index 5982fdc..01b3165 100755
--- a/numa.h
+++ b/numa.h
@@ -238,6 +238,8 @@ void numa_police_memory(void *start, size_t size);
/* Run current task only on nodes in mask */
int numa_run_on_node_mask(struct bitmask *mask);
+/* Run current task on nodes in mask without any cpuset awareness */
+int numa_run_on_node_mask_all(struct bitmask *mask);
/* Run current task only on node */
int numa_run_on_node(int node);
/* Return current mask of nodes the task can run on */
diff --git a/versions.ldscript b/versions.ldscript
index 2f2d254..eaddc7e 100755
--- a/versions.ldscript
+++ b/versions.ldscript
@@ -165,3 +165,11 @@ libnuma_1.3 {
*;
} libnuma_1.2;
+# New interface with customizable cpuset awareness
+# was added into version 1.4
+libnuma_1.4 {
+ global:
+ numa_run_on_node_mask_all;
+ local:
+ *;
+} libnuma_1.3;
--
1.8.1.4
^ permalink raw reply related [flat|nested] 6+ messages in thread