* [PATCH v2 1/9] tracing/filters: Dynamically allocate filter_pred.regex
2023-07-07 17:21 [PATCH v2 0/9] tracing/filters: filtering event fields with a cpumask Valentin Schneider
@ 2023-07-07 17:21 ` Valentin Schneider
2023-07-07 17:21 ` [PATCH v2 2/9] tracing/filters: Enable filtering a cpumask field by another cpumask Valentin Schneider
` (7 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Valentin Schneider @ 2023-07-07 17:21 UTC (permalink / raw)
To: linux-kernel, linux-trace-kernel, linux-doc
Cc: Steven Rostedt, Masami Hiramatsu, Jonathan Corbet, Juri Lelli,
Daniel Bristot de Oliveira, Marcelo Tosatti, Leonardo Bras,
Frederic Weisbecker
Every predicate allocation includes a MAX_FILTER_STR_VAL (256) char array
in the regex field, even if the predicate function does not use the field.
A later commit will introduce a dynamically allocated cpumask to struct
filter_pred, which will require a dedicated freeing function. Bite the
bullet and make filter_pred.regex dynamically allocated.
While at it, reorder the fields of filter_pred to fill in the byte
holes. The struct now fits on a single cacheline.
No change in behaviour intended.
The kfree()'s were patched via Coccinelle:
@@
struct filter_pred *pred;
@@
-kfree(pred);
+free_predicate(pred);
Signed-off-by: Valentin Schneider <vschneid@redhat.com>
---
kernel/trace/trace_events_filter.c | 64 ++++++++++++++++++------------
1 file changed, 39 insertions(+), 25 deletions(-)
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 1dad64267878c..91fc9990107f1 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -70,15 +70,15 @@ enum filter_pred_fn {
};
struct filter_pred {
- enum filter_pred_fn fn_num;
- u64 val;
- u64 val2;
- struct regex regex;
+ struct regex *regex;
unsigned short *ops;
struct ftrace_event_field *field;
- int offset;
+ u64 val;
+ u64 val2;
+ enum filter_pred_fn fn_num;
+ int offset;
int not;
- int op;
+ int op;
};
/*
@@ -186,6 +186,14 @@ enum {
PROCESS_OR = 4,
};
+static void free_predicate(struct filter_pred *pred)
+{
+ if (pred) {
+ kfree(pred->regex);
+ kfree(pred);
+ }
+}
+
/*
* Without going into a formal proof, this explains the method that is used in
* parsing the logical expressions.
@@ -623,7 +631,7 @@ predicate_parse(const char *str, int nr_parens, int nr_preds,
kfree(inverts);
if (prog_stack) {
for (i = 0; prog_stack[i].pred; i++)
- kfree(prog_stack[i].pred);
+ free_predicate(prog_stack[i].pred);
kfree(prog_stack);
}
return ERR_PTR(ret);
@@ -750,7 +758,7 @@ static int filter_pred_string(struct filter_pred *pred, void *event)
char *addr = (char *)(event + pred->offset);
int cmp, match;
- cmp = pred->regex.match(addr, &pred->regex, pred->regex.field_len);
+ cmp = pred->regex->match(addr, pred->regex, pred->regex->field_len);
match = cmp ^ pred->not;
@@ -763,7 +771,7 @@ static __always_inline int filter_pchar(struct filter_pred *pred, char *str)
int len;
len = strlen(str) + 1; /* including tailing '\0' */
- cmp = pred->regex.match(str, &pred->regex, len);
+ cmp = pred->regex->match(str, pred->regex, len);
match = cmp ^ pred->not;
@@ -813,7 +821,7 @@ static int filter_pred_strloc(struct filter_pred *pred, void *event)
char *addr = (char *)(event + str_loc);
int cmp, match;
- cmp = pred->regex.match(addr, &pred->regex, str_len);
+ cmp = pred->regex->match(addr, pred->regex, str_len);
match = cmp ^ pred->not;
@@ -836,7 +844,7 @@ static int filter_pred_strrelloc(struct filter_pred *pred, void *event)
char *addr = (char *)(&item[1]) + str_loc;
int cmp, match;
- cmp = pred->regex.match(addr, &pred->regex, str_len);
+ cmp = pred->regex->match(addr, pred->regex, str_len);
match = cmp ^ pred->not;
@@ -874,7 +882,7 @@ static int filter_pred_comm(struct filter_pred *pred, void *event)
{
int cmp;
- cmp = pred->regex.match(current->comm, &pred->regex,
+ cmp = pred->regex->match(current->comm, pred->regex,
TASK_COMM_LEN);
return cmp ^ pred->not;
}
@@ -1004,7 +1012,7 @@ enum regex_type filter_parse_regex(char *buff, int len, char **search, int *not)
static void filter_build_regex(struct filter_pred *pred)
{
- struct regex *r = &pred->regex;
+ struct regex *r = pred->regex;
char *search;
enum regex_type type = MATCH_FULL;
@@ -1169,7 +1177,7 @@ static void free_prog(struct event_filter *filter)
return;
for (i = 0; prog[i].pred; i++)
- kfree(prog[i].pred);
+ free_predicate(prog[i].pred);
kfree(prog);
}
@@ -1553,9 +1561,12 @@ static int parse_pred(const char *str, void *data,
goto err_free;
}
- pred->regex.len = len;
- strncpy(pred->regex.pattern, str + s, len);
- pred->regex.pattern[len] = 0;
+ pred->regex = kzalloc(sizeof(*pred->regex), GFP_KERNEL);
+ if (!pred->regex)
+ goto err_mem;
+ pred->regex->len = len;
+ strncpy(pred->regex->pattern, str + s, len);
+ pred->regex->pattern[len] = 0;
/* This is either a string, or an integer */
} else if (str[i] == '\'' || str[i] == '"') {
@@ -1597,9 +1608,12 @@ static int parse_pred(const char *str, void *data,
goto err_free;
}
- pred->regex.len = len;
- strncpy(pred->regex.pattern, str + s, len);
- pred->regex.pattern[len] = 0;
+ pred->regex = kzalloc(sizeof(*pred->regex), GFP_KERNEL);
+ if (!pred->regex)
+ goto err_mem;
+ pred->regex->len = len;
+ strncpy(pred->regex->pattern, str + s, len);
+ pred->regex->pattern[len] = 0;
filter_build_regex(pred);
@@ -1608,7 +1622,7 @@ static int parse_pred(const char *str, void *data,
} else if (field->filter_type == FILTER_STATIC_STRING) {
pred->fn_num = FILTER_PRED_FN_STRING;
- pred->regex.field_len = field->size;
+ pred->regex->field_len = field->size;
} else if (field->filter_type == FILTER_DYN_STRING) {
pred->fn_num = FILTER_PRED_FN_STRLOC;
@@ -1691,10 +1705,10 @@ static int parse_pred(const char *str, void *data,
return i;
err_free:
- kfree(pred);
+ free_predicate(pred);
return -EINVAL;
err_mem:
- kfree(pred);
+ free_predicate(pred);
return -ENOMEM;
}
@@ -2287,8 +2301,8 @@ static int ftrace_function_set_filter_pred(struct filter_pred *pred,
return ret;
return __ftrace_function_set_filter(pred->op == OP_EQ,
- pred->regex.pattern,
- pred->regex.len,
+ pred->regex->pattern,
+ pred->regex->len,
data);
}
--
2.31.1
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH v2 2/9] tracing/filters: Enable filtering a cpumask field by another cpumask
2023-07-07 17:21 [PATCH v2 0/9] tracing/filters: filtering event fields with a cpumask Valentin Schneider
2023-07-07 17:21 ` [PATCH v2 1/9] tracing/filters: Dynamically allocate filter_pred.regex Valentin Schneider
@ 2023-07-07 17:21 ` Valentin Schneider
2023-07-07 17:21 ` [PATCH v2 3/9] tracing/filters: Enable filtering a scalar field by a cpumask Valentin Schneider
` (6 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Valentin Schneider @ 2023-07-07 17:21 UTC (permalink / raw)
To: linux-kernel, linux-trace-kernel, linux-doc
Cc: Steven Rostedt, Masami Hiramatsu, Jonathan Corbet, Juri Lelli,
Daniel Bristot de Oliveira, Marcelo Tosatti, Leonardo Bras,
Frederic Weisbecker
The recently introduced ipi_send_cpumask trace event contains a cpumask
field, but it currently cannot be used in filter expressions.
Make event filtering aware of cpumask fields, and allow these to be
filtered by a user-provided cpumask.
The user-provided cpumask is to be given in cpulist format and wrapped as:
"CPUS{$cpulist}". The use of curly braces instead of parentheses is to
prevent predicate_parse() from parsing the contents of CPUS{...} as a
full-fledged predicate subexpression.
This enables e.g.:
$ trace-cmd record -e 'ipi_send_cpumask' -f 'cpumask & CPUS{2,4,6,8-32}'
Signed-off-by: Valentin Schneider <vschneid@redhat.com>
---
include/linux/trace_events.h | 1 +
kernel/trace/trace_events_filter.c | 97 +++++++++++++++++++++++++++++-
2 files changed, 96 insertions(+), 2 deletions(-)
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 7c4a0b72334eb..974ef37a06c83 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -804,6 +804,7 @@ enum {
FILTER_RDYN_STRING,
FILTER_PTR_STRING,
FILTER_TRACE_FN,
+ FILTER_CPUMASK,
FILTER_COMM,
FILTER_CPU,
FILTER_STACKTRACE,
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 91fc9990107f1..cb1863dfa280b 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -64,6 +64,7 @@ enum filter_pred_fn {
FILTER_PRED_FN_PCHAR_USER,
FILTER_PRED_FN_PCHAR,
FILTER_PRED_FN_CPU,
+ FILTER_PRED_FN_CPUMASK,
FILTER_PRED_FN_FUNCTION,
FILTER_PRED_FN_,
FILTER_PRED_TEST_VISITED,
@@ -71,6 +72,7 @@ enum filter_pred_fn {
struct filter_pred {
struct regex *regex;
+ struct cpumask *mask;
unsigned short *ops;
struct ftrace_event_field *field;
u64 val;
@@ -94,6 +96,8 @@ struct filter_pred {
C(TOO_MANY_OPEN, "Too many '('"), \
C(TOO_MANY_CLOSE, "Too few '('"), \
C(MISSING_QUOTE, "Missing matching quote"), \
+ C(MISSING_BRACE_OPEN, "Missing '{'"), \
+ C(MISSING_BRACE_CLOSE, "Missing '}'"), \
C(OPERAND_TOO_LONG, "Operand too long"), \
C(EXPECT_STRING, "Expecting string field"), \
C(EXPECT_DIGIT, "Expecting numeric field"), \
@@ -103,6 +107,7 @@ struct filter_pred {
C(BAD_SUBSYS_FILTER, "Couldn't find or set field in one of a subsystem's events"), \
C(TOO_MANY_PREDS, "Too many terms in predicate expression"), \
C(INVALID_FILTER, "Meaningless filter expression"), \
+ C(INVALID_CPULIST, "Invalid cpulist"), \
C(IP_FIELD_ONLY, "Only 'ip' field is supported for function trace"), \
C(INVALID_VALUE, "Invalid value (did you forget quotes)?"), \
C(NO_FUNCTION, "Function not found"), \
@@ -190,6 +195,7 @@ static void free_predicate(struct filter_pred *pred)
{
if (pred) {
kfree(pred->regex);
+ kfree(pred->mask);
kfree(pred);
}
}
@@ -877,6 +883,26 @@ static int filter_pred_cpu(struct filter_pred *pred, void *event)
}
}
+/* Filter predicate for cpumask field vs user-provided cpumask */
+static int filter_pred_cpumask(struct filter_pred *pred, void *event)
+{
+ u32 item = *(u32 *)(event + pred->offset);
+ int loc = item & 0xffff;
+ const struct cpumask *mask = (event + loc);
+ const struct cpumask *cmp = pred->mask;
+
+ switch (pred->op) {
+ case OP_EQ:
+ return cpumask_equal(mask, cmp);
+ case OP_NE:
+ return !cpumask_equal(mask, cmp);
+ case OP_BAND:
+ return cpumask_intersects(mask, cmp);
+ default:
+ return 0;
+ }
+}
+
/* Filter predicate for COMM. */
static int filter_pred_comm(struct filter_pred *pred, void *event)
{
@@ -1244,8 +1270,12 @@ static void filter_free_subsystem_filters(struct trace_subsystem_dir *dir,
int filter_assign_type(const char *type)
{
- if (strstr(type, "__data_loc") && strstr(type, "char"))
- return FILTER_DYN_STRING;
+ if (strstr(type, "__data_loc")) {
+ if (strstr(type, "char"))
+ return FILTER_DYN_STRING;
+ if (strstr(type, "cpumask_t"))
+ return FILTER_CPUMASK;
+ }
if (strstr(type, "__rel_loc") && strstr(type, "char"))
return FILTER_RDYN_STRING;
@@ -1357,6 +1387,8 @@ static int filter_pred_fn_call(struct filter_pred *pred, void *event)
return filter_pred_pchar(pred, event);
case FILTER_PRED_FN_CPU:
return filter_pred_cpu(pred, event);
+ case FILTER_PRED_FN_CPUMASK:
+ return filter_pred_cpumask(pred, event);
case FILTER_PRED_FN_FUNCTION:
return filter_pred_function(pred, event);
case FILTER_PRED_TEST_VISITED:
@@ -1568,6 +1600,67 @@ static int parse_pred(const char *str, void *data,
strncpy(pred->regex->pattern, str + s, len);
pred->regex->pattern[len] = 0;
+ } else if (!strncmp(str + i, "CPUS", 4)) {
+ unsigned int maskstart;
+ char *tmp;
+
+ switch (field->filter_type) {
+ case FILTER_CPUMASK:
+ break;
+ default:
+ parse_error(pe, FILT_ERR_ILLEGAL_FIELD_OP, pos + i);
+ goto err_free;
+ }
+
+ switch (op) {
+ case OP_EQ:
+ case OP_NE:
+ case OP_BAND:
+ break;
+ default:
+ parse_error(pe, FILT_ERR_ILLEGAL_FIELD_OP, pos + i);
+ goto err_free;
+ }
+
+ /* Skip CPUS */
+ i += 4;
+ if (str[i++] != '{') {
+ parse_error(pe, FILT_ERR_MISSING_BRACE_OPEN, pos + i);
+ goto err_free;
+ }
+ maskstart = i;
+
+ /* Walk the cpulist until closing } */
+ for (; str[i] && str[i] != '}'; i++);
+ if (str[i] != '}') {
+ parse_error(pe, FILT_ERR_MISSING_BRACE_CLOSE, pos + i);
+ goto err_free;
+ }
+
+ if (maskstart == i) {
+ parse_error(pe, FILT_ERR_INVALID_CPULIST, pos + i);
+ goto err_free;
+ }
+
+ /* Copy the cpulist between { and } */
+ tmp = kmalloc((i - maskstart) + 1, GFP_KERNEL);
+ strscpy(tmp, str + maskstart, (i - maskstart) + 1);
+
+ pred->mask = kzalloc(cpumask_size(), GFP_KERNEL);
+ if (!pred->mask)
+ goto err_mem;
+
+ /* Now parse it */
+ if (cpulist_parse(tmp, pred->mask)) {
+ parse_error(pe, FILT_ERR_INVALID_CPULIST, pos + i);
+ goto err_free;
+ }
+
+ /* Move along */
+ i++;
+ if (field->filter_type == FILTER_CPUMASK)
+ pred->fn_num = FILTER_PRED_FN_CPUMASK;
+
/* This is either a string, or an integer */
} else if (str[i] == '\'' || str[i] == '"') {
char q = str[i];
--
2.31.1
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH v2 3/9] tracing/filters: Enable filtering a scalar field by a cpumask
2023-07-07 17:21 [PATCH v2 0/9] tracing/filters: filtering event fields with a cpumask Valentin Schneider
2023-07-07 17:21 ` [PATCH v2 1/9] tracing/filters: Dynamically allocate filter_pred.regex Valentin Schneider
2023-07-07 17:21 ` [PATCH v2 2/9] tracing/filters: Enable filtering a cpumask field by another cpumask Valentin Schneider
@ 2023-07-07 17:21 ` Valentin Schneider
2023-07-07 17:21 ` [PATCH v2 4/9] tracing/filters: Enable filtering the CPU common " Valentin Schneider
` (5 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Valentin Schneider @ 2023-07-07 17:21 UTC (permalink / raw)
To: linux-kernel, linux-trace-kernel, linux-doc
Cc: Steven Rostedt, Masami Hiramatsu, Jonathan Corbet, Juri Lelli,
Daniel Bristot de Oliveira, Marcelo Tosatti, Leonardo Bras,
Frederic Weisbecker
Several events use a scalar field to denote a CPU:
o sched_wakeup.target_cpu
o sched_migrate_task.orig_cpu,dest_cpu
o sched_move_numa.src_cpu,dst_cpu
o ipi_send_cpu.cpu
o ...
Filtering these currently requires using arithmetic comparison functions,
which can be tedious when dealing with interleaved SMT or NUMA CPU ids.
Allow these to be filtered by a user-provided cpumask, which enables e.g.:
$ trace-cmd record -e 'sched_wakeup' -f 'target_cpu & CPUS{2,4,6,8-32}'
Signed-off-by: Valentin Schneider <vschneid@redhat.com>
---
NOTE: I went with an implicit cpumask conversion of the event field, as
AFAICT predicate_parse() does not support parsing the application of a
function to a field (e.g. 'CPUS(target_cpu) & CPUS{2,4,6,8-32}')
---
kernel/trace/trace_events_filter.c | 92 ++++++++++++++++++++++++++----
1 file changed, 81 insertions(+), 11 deletions(-)
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index cb1863dfa280b..1e14f801685a8 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -46,15 +46,19 @@ static const char * ops[] = { OPS };
enum filter_pred_fn {
FILTER_PRED_FN_NOP,
FILTER_PRED_FN_64,
+ FILTER_PRED_FN_64_CPUMASK,
FILTER_PRED_FN_S64,
FILTER_PRED_FN_U64,
FILTER_PRED_FN_32,
+ FILTER_PRED_FN_32_CPUMASK,
FILTER_PRED_FN_S32,
FILTER_PRED_FN_U32,
FILTER_PRED_FN_16,
+ FILTER_PRED_FN_16_CPUMASK,
FILTER_PRED_FN_S16,
FILTER_PRED_FN_U16,
FILTER_PRED_FN_8,
+ FILTER_PRED_FN_8_CPUMASK,
FILTER_PRED_FN_S8,
FILTER_PRED_FN_U8,
FILTER_PRED_FN_COMM,
@@ -643,6 +647,39 @@ predicate_parse(const char *str, int nr_parens, int nr_preds,
return ERR_PTR(ret);
}
+static inline int
+do_filter_cpumask(int op, const struct cpumask *mask, const struct cpumask *cmp)
+{
+ switch (op) {
+ case OP_EQ:
+ return cpumask_equal(mask, cmp);
+ case OP_NE:
+ return !cpumask_equal(mask, cmp);
+ case OP_BAND:
+ return cpumask_intersects(mask, cmp);
+ default:
+ return 0;
+ }
+}
+
+/* Optimisation of do_filter_cpumask() for scalar fields */
+static inline int
+do_filter_scalar_cpumask(int op, unsigned int cpu, const struct cpumask *mask)
+{
+ switch (op) {
+ case OP_EQ:
+ return cpumask_test_cpu(cpu, mask) &&
+ cpumask_nth(1, mask) >= nr_cpu_ids;
+ case OP_NE:
+ return !cpumask_test_cpu(cpu, mask) ||
+ cpumask_nth(1, mask) < nr_cpu_ids;
+ case OP_BAND:
+ return cpumask_test_cpu(cpu, mask);
+ default:
+ return 0;
+ }
+}
+
enum pred_cmp_types {
PRED_CMP_TYPE_NOP,
PRED_CMP_TYPE_LT,
@@ -686,6 +723,18 @@ static int filter_pred_##type(struct filter_pred *pred, void *event) \
} \
}
+#define DEFINE_CPUMASK_COMPARISON_PRED(size) \
+static int filter_pred_##size##_cpumask(struct filter_pred *pred, void *event) \
+{ \
+ u##size *addr = (u##size *)(event + pred->offset); \
+ unsigned int cpu = *addr; \
+ \
+ if (cpu >= nr_cpu_ids) \
+ return 0; \
+ \
+ return do_filter_scalar_cpumask(pred->op, cpu, pred->mask); \
+}
+
#define DEFINE_EQUALITY_PRED(size) \
static int filter_pred_##size(struct filter_pred *pred, void *event) \
{ \
@@ -707,6 +756,11 @@ DEFINE_COMPARISON_PRED(u16);
DEFINE_COMPARISON_PRED(s8);
DEFINE_COMPARISON_PRED(u8);
+DEFINE_CPUMASK_COMPARISON_PRED(64);
+DEFINE_CPUMASK_COMPARISON_PRED(32);
+DEFINE_CPUMASK_COMPARISON_PRED(16);
+DEFINE_CPUMASK_COMPARISON_PRED(8);
+
DEFINE_EQUALITY_PRED(64);
DEFINE_EQUALITY_PRED(32);
DEFINE_EQUALITY_PRED(16);
@@ -891,16 +945,7 @@ static int filter_pred_cpumask(struct filter_pred *pred, void *event)
const struct cpumask *mask = (event + loc);
const struct cpumask *cmp = pred->mask;
- switch (pred->op) {
- case OP_EQ:
- return cpumask_equal(mask, cmp);
- case OP_NE:
- return !cpumask_equal(mask, cmp);
- case OP_BAND:
- return cpumask_intersects(mask, cmp);
- default:
- return 0;
- }
+ return do_filter_cpumask(pred->op, mask, cmp);
}
/* Filter predicate for COMM. */
@@ -1351,24 +1396,32 @@ static int filter_pred_fn_call(struct filter_pred *pred, void *event)
switch (pred->fn_num) {
case FILTER_PRED_FN_64:
return filter_pred_64(pred, event);
+ case FILTER_PRED_FN_64_CPUMASK:
+ return filter_pred_64_cpumask(pred, event);
case FILTER_PRED_FN_S64:
return filter_pred_s64(pred, event);
case FILTER_PRED_FN_U64:
return filter_pred_u64(pred, event);
case FILTER_PRED_FN_32:
return filter_pred_32(pred, event);
+ case FILTER_PRED_FN_32_CPUMASK:
+ return filter_pred_32_cpumask(pred, event);
case FILTER_PRED_FN_S32:
return filter_pred_s32(pred, event);
case FILTER_PRED_FN_U32:
return filter_pred_u32(pred, event);
case FILTER_PRED_FN_16:
return filter_pred_16(pred, event);
+ case FILTER_PRED_FN_16_CPUMASK:
+ return filter_pred_16_cpumask(pred, event);
case FILTER_PRED_FN_S16:
return filter_pred_s16(pred, event);
case FILTER_PRED_FN_U16:
return filter_pred_u16(pred, event);
case FILTER_PRED_FN_8:
return filter_pred_8(pred, event);
+ case FILTER_PRED_FN_8_CPUMASK:
+ return filter_pred_8_cpumask(pred, event);
case FILTER_PRED_FN_S8:
return filter_pred_s8(pred, event);
case FILTER_PRED_FN_U8:
@@ -1606,6 +1659,7 @@ static int parse_pred(const char *str, void *data,
switch (field->filter_type) {
case FILTER_CPUMASK:
+ case FILTER_OTHER:
break;
default:
parse_error(pe, FILT_ERR_ILLEGAL_FIELD_OP, pos + i);
@@ -1658,8 +1712,24 @@ static int parse_pred(const char *str, void *data,
/* Move along */
i++;
- if (field->filter_type == FILTER_CPUMASK)
+ if (field->filter_type == FILTER_CPUMASK) {
pred->fn_num = FILTER_PRED_FN_CPUMASK;
+ } else {
+ switch (field->size) {
+ case 8:
+ pred->fn_num = FILTER_PRED_FN_64_CPUMASK;
+ break;
+ case 4:
+ pred->fn_num = FILTER_PRED_FN_32_CPUMASK;
+ break;
+ case 2:
+ pred->fn_num = FILTER_PRED_FN_16_CPUMASK;
+ break;
+ case 1:
+ pred->fn_num = FILTER_PRED_FN_8_CPUMASK;
+ break;
+ }
+ }
/* This is either a string, or an integer */
} else if (str[i] == '\'' || str[i] == '"') {
--
2.31.1
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH v2 4/9] tracing/filters: Enable filtering the CPU common field by a cpumask
2023-07-07 17:21 [PATCH v2 0/9] tracing/filters: filtering event fields with a cpumask Valentin Schneider
` (2 preceding siblings ...)
2023-07-07 17:21 ` [PATCH v2 3/9] tracing/filters: Enable filtering a scalar field by a cpumask Valentin Schneider
@ 2023-07-07 17:21 ` Valentin Schneider
2023-07-07 17:21 ` [PATCH v2 5/9] tracing/filters: Optimise cpumask vs cpumask filtering when user mask is a single CPU Valentin Schneider
` (4 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Valentin Schneider @ 2023-07-07 17:21 UTC (permalink / raw)
To: linux-kernel, linux-trace-kernel, linux-doc
Cc: Steven Rostedt, Masami Hiramatsu, Jonathan Corbet, Juri Lelli,
Daniel Bristot de Oliveira, Marcelo Tosatti, Leonardo Bras,
Frederic Weisbecker
The tracing_cpumask lets us specify which CPUs are traced in a buffer
instance, but doesn't let us do this on a per-event basis (unless one
creates an instance per event).
A previous commit added filtering scalar fields by a user-given cpumask,
make this work with the CPU common field as well.
This enables doing things like
$ trace-cmd record -e 'sched_switch' -f 'CPU & CPUS{12-52}' \
-e 'sched_wakeup' -f 'target_cpu & CPUS{12-52}'
Signed-off-by: Valentin Schneider <vschneid@redhat.com>
---
kernel/trace/trace_events_filter.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 1e14f801685a8..3009d0c61b532 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -68,6 +68,7 @@ enum filter_pred_fn {
FILTER_PRED_FN_PCHAR_USER,
FILTER_PRED_FN_PCHAR,
FILTER_PRED_FN_CPU,
+ FILTER_PRED_FN_CPU_CPUMASK,
FILTER_PRED_FN_CPUMASK,
FILTER_PRED_FN_FUNCTION,
FILTER_PRED_FN_,
@@ -937,6 +938,14 @@ static int filter_pred_cpu(struct filter_pred *pred, void *event)
}
}
+/* Filter predicate for current CPU vs user-provided cpumask */
+static int filter_pred_cpu_cpumask(struct filter_pred *pred, void *event)
+{
+ int cpu = raw_smp_processor_id();
+
+ return do_filter_scalar_cpumask(pred->op, cpu, pred->mask);
+}
+
/* Filter predicate for cpumask field vs user-provided cpumask */
static int filter_pred_cpumask(struct filter_pred *pred, void *event)
{
@@ -1440,6 +1449,8 @@ static int filter_pred_fn_call(struct filter_pred *pred, void *event)
return filter_pred_pchar(pred, event);
case FILTER_PRED_FN_CPU:
return filter_pred_cpu(pred, event);
+ case FILTER_PRED_FN_CPU_CPUMASK:
+ return filter_pred_cpu_cpumask(pred, event);
case FILTER_PRED_FN_CPUMASK:
return filter_pred_cpumask(pred, event);
case FILTER_PRED_FN_FUNCTION:
@@ -1659,6 +1670,7 @@ static int parse_pred(const char *str, void *data,
switch (field->filter_type) {
case FILTER_CPUMASK:
+ case FILTER_CPU:
case FILTER_OTHER:
break;
default:
@@ -1714,6 +1726,8 @@ static int parse_pred(const char *str, void *data,
i++;
if (field->filter_type == FILTER_CPUMASK) {
pred->fn_num = FILTER_PRED_FN_CPUMASK;
+ } else if (field->filter_type == FILTER_CPU) {
+ pred->fn_num = FILTER_PRED_FN_CPU_CPUMASK;
} else {
switch (field->size) {
case 8:
--
2.31.1
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH v2 5/9] tracing/filters: Optimise cpumask vs cpumask filtering when user mask is a single CPU
2023-07-07 17:21 [PATCH v2 0/9] tracing/filters: filtering event fields with a cpumask Valentin Schneider
` (3 preceding siblings ...)
2023-07-07 17:21 ` [PATCH v2 4/9] tracing/filters: Enable filtering the CPU common " Valentin Schneider
@ 2023-07-07 17:21 ` Valentin Schneider
2023-07-07 17:21 ` [PATCH v2 6/9] tracing/filters: Optimise scalar vs cpumask filtering when the " Valentin Schneider
` (3 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Valentin Schneider @ 2023-07-07 17:21 UTC (permalink / raw)
To: linux-kernel, linux-trace-kernel, linux-doc
Cc: Steven Rostedt, Masami Hiramatsu, Jonathan Corbet, Juri Lelli,
Daniel Bristot de Oliveira, Marcelo Tosatti, Leonardo Bras,
Frederic Weisbecker
Steven noted that when the user-provided cpumask contains a single CPU,
then the filtering function can use a scalar as input instead of a
full-fledged cpumask.
Reuse do_filter_scalar_cpumask() when the input mask has a weight of one.
Suggested-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Valentin Schneider <vschneid@redhat.com>
---
kernel/trace/trace_events_filter.c | 35 +++++++++++++++++++++++++++++-
1 file changed, 34 insertions(+), 1 deletion(-)
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 3009d0c61b532..2fe65ddeb34ef 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -70,6 +70,7 @@ enum filter_pred_fn {
FILTER_PRED_FN_CPU,
FILTER_PRED_FN_CPU_CPUMASK,
FILTER_PRED_FN_CPUMASK,
+ FILTER_PRED_FN_CPUMASK_CPU,
FILTER_PRED_FN_FUNCTION,
FILTER_PRED_FN_,
FILTER_PRED_TEST_VISITED,
@@ -957,6 +958,22 @@ static int filter_pred_cpumask(struct filter_pred *pred, void *event)
return do_filter_cpumask(pred->op, mask, cmp);
}
+/* Filter predicate for cpumask field vs user-provided scalar */
+static int filter_pred_cpumask_cpu(struct filter_pred *pred, void *event)
+{
+ u32 item = *(u32 *)(event + pred->offset);
+ int loc = item & 0xffff;
+ const struct cpumask *mask = (event + loc);
+ unsigned int cpu = pred->val;
+
+ /*
+ * This inverts the usual usage of the function (field is first element,
+ * user parameter is second), but that's fine because the (scalar, mask)
+ * operations used are symmetric.
+ */
+ return do_filter_scalar_cpumask(pred->op, cpu, mask);
+}
+
/* Filter predicate for COMM. */
static int filter_pred_comm(struct filter_pred *pred, void *event)
{
@@ -1453,6 +1470,8 @@ static int filter_pred_fn_call(struct filter_pred *pred, void *event)
return filter_pred_cpu_cpumask(pred, event);
case FILTER_PRED_FN_CPUMASK:
return filter_pred_cpumask(pred, event);
+ case FILTER_PRED_FN_CPUMASK_CPU:
+ return filter_pred_cpumask_cpu(pred, event);
case FILTER_PRED_FN_FUNCTION:
return filter_pred_function(pred, event);
case FILTER_PRED_TEST_VISITED:
@@ -1666,6 +1685,7 @@ static int parse_pred(const char *str, void *data,
} else if (!strncmp(str + i, "CPUS", 4)) {
unsigned int maskstart;
+ bool single;
char *tmp;
switch (field->filter_type) {
@@ -1724,8 +1744,21 @@ static int parse_pred(const char *str, void *data,
/* Move along */
i++;
+
+ /*
+ * Optimisation: if the user-provided mask has a weight of one
+ * then we can treat it as a scalar input.
+ */
+ single = cpumask_weight(pred->mask) == 1;
+ if (single && field->filter_type == FILTER_CPUMASK) {
+ pred->val = cpumask_first(pred->mask);
+ kfree(pred->mask);
+ }
+
if (field->filter_type == FILTER_CPUMASK) {
- pred->fn_num = FILTER_PRED_FN_CPUMASK;
+ pred->fn_num = single ?
+ FILTER_PRED_FN_CPUMASK_CPU :
+ FILTER_PRED_FN_CPUMASK;
} else if (field->filter_type == FILTER_CPU) {
pred->fn_num = FILTER_PRED_FN_CPU_CPUMASK;
} else {
--
2.31.1
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH v2 6/9] tracing/filters: Optimise scalar vs cpumask filtering when the user mask is a single CPU
2023-07-07 17:21 [PATCH v2 0/9] tracing/filters: filtering event fields with a cpumask Valentin Schneider
` (4 preceding siblings ...)
2023-07-07 17:21 ` [PATCH v2 5/9] tracing/filters: Optimise cpumask vs cpumask filtering when user mask is a single CPU Valentin Schneider
@ 2023-07-07 17:21 ` Valentin Schneider
2023-07-07 17:21 ` [PATCH v2 7/9] tracing/filters: Optimise CPU " Valentin Schneider
` (2 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Valentin Schneider @ 2023-07-07 17:21 UTC (permalink / raw)
To: linux-kernel, linux-trace-kernel, linux-doc
Cc: Steven Rostedt, Masami Hiramatsu, Jonathan Corbet, Juri Lelli,
Daniel Bristot de Oliveira, Marcelo Tosatti, Leonardo Bras,
Frederic Weisbecker
Steven noted that when the user-provided cpumask contains a single CPU,
then the filtering function can use a scalar as input instead of a
full-fledged cpumask.
When the mask contains a single CPU, directly re-use the unsigned field
predicate functions. Transform '&' into '==' beforehand.
Suggested-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Valentin Schneider <vschneid@redhat.com>
---
kernel/trace/trace_events_filter.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 2fe65ddeb34ef..54d642fabb7f1 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -1750,7 +1750,7 @@ static int parse_pred(const char *str, void *data,
* then we can treat it as a scalar input.
*/
single = cpumask_weight(pred->mask) == 1;
- if (single && field->filter_type == FILTER_CPUMASK) {
+ if (single && field->filter_type != FILTER_CPU) {
pred->val = cpumask_first(pred->mask);
kfree(pred->mask);
}
@@ -1761,6 +1761,11 @@ static int parse_pred(const char *str, void *data,
FILTER_PRED_FN_CPUMASK;
} else if (field->filter_type == FILTER_CPU) {
pred->fn_num = FILTER_PRED_FN_CPU_CPUMASK;
+ } else if (single) {
+ pred->op = pred->op == OP_BAND ? OP_EQ : pred->op;
+ pred->fn_num = select_comparison_fn(pred->op, field->size, false);
+ if (pred->op == OP_NE)
+ pred->not = 1;
} else {
switch (field->size) {
case 8:
--
2.31.1
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH v2 7/9] tracing/filters: Optimise CPU vs cpumask filtering when the user mask is a single CPU
2023-07-07 17:21 [PATCH v2 0/9] tracing/filters: filtering event fields with a cpumask Valentin Schneider
` (5 preceding siblings ...)
2023-07-07 17:21 ` [PATCH v2 6/9] tracing/filters: Optimise scalar vs cpumask filtering when the " Valentin Schneider
@ 2023-07-07 17:21 ` Valentin Schneider
2023-07-07 17:21 ` [PATCH v2 8/9] tracing/filters: Further optimise scalar vs cpumask comparison Valentin Schneider
2023-07-07 17:21 ` [PATCH v2 9/9] tracing/filters: Document cpumask filtering Valentin Schneider
8 siblings, 0 replies; 10+ messages in thread
From: Valentin Schneider @ 2023-07-07 17:21 UTC (permalink / raw)
To: linux-kernel, linux-trace-kernel, linux-doc
Cc: Steven Rostedt, Masami Hiramatsu, Jonathan Corbet, Juri Lelli,
Daniel Bristot de Oliveira, Marcelo Tosatti, Leonardo Bras,
Frederic Weisbecker
Steven noted that when the user-provided cpumask contains a single CPU,
then the filtering function can use a scalar as input instead of a
full-fledged cpumask.
In this case we can directly re-use filter_pred_cpu(), we just need to
transform '&' into '==' before executing it.
Suggested-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Valentin Schneider <vschneid@redhat.com>
---
kernel/trace/trace_events_filter.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 54d642fabb7f1..fd72dacc5d1b8 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -1750,7 +1750,7 @@ static int parse_pred(const char *str, void *data,
* then we can treat it as a scalar input.
*/
single = cpumask_weight(pred->mask) == 1;
- if (single && field->filter_type != FILTER_CPU) {
+ if (single) {
pred->val = cpumask_first(pred->mask);
kfree(pred->mask);
}
@@ -1760,7 +1760,12 @@ static int parse_pred(const char *str, void *data,
FILTER_PRED_FN_CPUMASK_CPU :
FILTER_PRED_FN_CPUMASK;
} else if (field->filter_type == FILTER_CPU) {
- pred->fn_num = FILTER_PRED_FN_CPU_CPUMASK;
+ if (single) {
+ pred->op = pred->op == OP_BAND ? OP_EQ : pred->op;
+ pred->fn_num = FILTER_PRED_FN_CPU;
+ } else {
+ pred->fn_num = FILTER_PRED_FN_CPU_CPUMASK;
+ }
} else if (single) {
pred->op = pred->op == OP_BAND ? OP_EQ : pred->op;
pred->fn_num = select_comparison_fn(pred->op, field->size, false);
--
2.31.1
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH v2 8/9] tracing/filters: Further optimise scalar vs cpumask comparison
2023-07-07 17:21 [PATCH v2 0/9] tracing/filters: filtering event fields with a cpumask Valentin Schneider
` (6 preceding siblings ...)
2023-07-07 17:21 ` [PATCH v2 7/9] tracing/filters: Optimise CPU " Valentin Schneider
@ 2023-07-07 17:21 ` Valentin Schneider
2023-07-07 17:21 ` [PATCH v2 9/9] tracing/filters: Document cpumask filtering Valentin Schneider
8 siblings, 0 replies; 10+ messages in thread
From: Valentin Schneider @ 2023-07-07 17:21 UTC (permalink / raw)
To: linux-kernel, linux-trace-kernel, linux-doc
Cc: Steven Rostedt, Masami Hiramatsu, Jonathan Corbet, Juri Lelli,
Daniel Bristot de Oliveira, Marcelo Tosatti, Leonardo Bras,
Frederic Weisbecker
Per the previous commits, we now only enter do_filter_scalar_cpumask() with
a mask of weight greater than one. Optimise the equality checks.
Suggested-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Valentin Schneider <vschneid@redhat.com>
---
kernel/trace/trace_events_filter.c | 26 ++++++++++++++++++++------
1 file changed, 20 insertions(+), 6 deletions(-)
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index fd72dacc5d1b8..3a529214a21b7 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -667,6 +667,25 @@ do_filter_cpumask(int op, const struct cpumask *mask, const struct cpumask *cmp)
/* Optimisation of do_filter_cpumask() for scalar fields */
static inline int
do_filter_scalar_cpumask(int op, unsigned int cpu, const struct cpumask *mask)
+{
+ /*
+ * Per the weight-of-one cpumask optimisations, the mask passed in this
+ * function has a weight >= 2, so it is never equal to a single scalar.
+ */
+ switch (op) {
+ case OP_EQ:
+ return false;
+ case OP_NE:
+ return true;
+ case OP_BAND:
+ return cpumask_test_cpu(cpu, mask);
+ default:
+ return 0;
+ }
+}
+
+static inline int
+do_filter_cpumask_scalar(int op, const struct cpumask *mask, unsigned int cpu)
{
switch (op) {
case OP_EQ:
@@ -966,12 +985,7 @@ static int filter_pred_cpumask_cpu(struct filter_pred *pred, void *event)
const struct cpumask *mask = (event + loc);
unsigned int cpu = pred->val;
- /*
- * This inverts the usual usage of the function (field is first element,
- * user parameter is second), but that's fine because the (scalar, mask)
- * operations used are symmetric.
- */
- return do_filter_scalar_cpumask(pred->op, cpu, mask);
+ return do_filter_cpumask_scalar(pred->op, mask, cpu);
}
/* Filter predicate for COMM. */
--
2.31.1
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH v2 9/9] tracing/filters: Document cpumask filtering
2023-07-07 17:21 [PATCH v2 0/9] tracing/filters: filtering event fields with a cpumask Valentin Schneider
` (7 preceding siblings ...)
2023-07-07 17:21 ` [PATCH v2 8/9] tracing/filters: Further optimise scalar vs cpumask comparison Valentin Schneider
@ 2023-07-07 17:21 ` Valentin Schneider
8 siblings, 0 replies; 10+ messages in thread
From: Valentin Schneider @ 2023-07-07 17:21 UTC (permalink / raw)
To: linux-kernel, linux-trace-kernel, linux-doc
Cc: Steven Rostedt, Masami Hiramatsu, Jonathan Corbet, Juri Lelli,
Daniel Bristot de Oliveira, Marcelo Tosatti, Leonardo Bras,
Frederic Weisbecker
Cpumask, scalar and CPU fields can now be filtered by a user-provided
cpumask, document the syntax.
Signed-off-by: Valentin Schneider <vschneid@redhat.com>
---
Documentation/trace/events.rst | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/Documentation/trace/events.rst b/Documentation/trace/events.rst
index f5fcb8e1218f6..34108d5a55b41 100644
--- a/Documentation/trace/events.rst
+++ b/Documentation/trace/events.rst
@@ -219,6 +219,20 @@ the function "security_prepare_creds" and less than the end of that function.
The ".function" postfix can only be attached to values of size long, and can only
be compared with "==" or "!=".
+Cpumask fields or scalar fields that encode a CPU number can be filtered using
+a user-provided cpumask in cpulist format. The format is as follows::
+
+ CPUS{$cpulist}
+
+Operators available to cpumask filtering are:
+
+& (intersection), ==, !=
+
+For example, this will filter events that have their .target_cpu field present
+in the given cpumask::
+
+ target_cpu & CPUS{17-42}
+
5.2 Setting filters
-------------------
--
2.31.1
^ permalink raw reply related [flat|nested] 10+ messages in thread