* [PATCH 1/5] x86/intel_rdt: Add framework for better RDT UI diagnostics
2017-09-18 22:18 [RFC 0/5] x86/intel_rdt: Better diagnostics Luck, Tony
@ 2017-09-18 22:18 ` Luck, Tony
2017-09-18 22:18 ` [PATCH 2/5] x86/intel_rdt: Add diagnostics when writing the schemata file Luck, Tony
` (6 subsequent siblings)
7 siblings, 0 replies; 14+ messages in thread
From: Luck, Tony @ 2017-09-18 22:18 UTC (permalink / raw)
To: Thomas Gleixner
Cc: Tony Luck, linux-kernel, Boris Petkov, Fenghua Yu,
Reinette Chatre, Steven Rostedt, Vikas Shivappa
From: Tony Luck <tony.luck@intel.com>
Commands are given to the resctrl file system by making/removing
directories, or by writing to files. When something goes wrong
the user is generally left wondering why they got:
bash: echo: write error: Invalid argument
Add a new file "last_cmd_status" to the "info" directory that
will give the user some better clues on what went wrong.
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
arch/x86/kernel/cpu/intel_rdt.h | 6 ++++++
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 33 ++++++++++++++++++++++++++++++++
2 files changed, 39 insertions(+)
diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h
index ebaddaeef023..afba509ac164 100644
--- a/arch/x86/kernel/cpu/intel_rdt.h
+++ b/arch/x86/kernel/cpu/intel_rdt.h
@@ -2,6 +2,7 @@
#define _ASM_X86_INTEL_RDT_H
#include <linux/sched.h>
+#include <linux/seq_buf.h>
#include <linux/kernfs.h>
#include <linux/jump_label.h>
@@ -30,6 +31,8 @@
DECLARE_STATIC_KEY_FALSE(rdt_enable_key);
+extern struct seq_buf last_cmd_status;
+
/**
* struct mon_evt - Entry in the event list of a resource
* @evtid: event id
@@ -126,12 +129,15 @@ struct rdtgroup {
#define RFTYPE_BASE BIT(1)
#define RF_CTRLSHIFT 4
#define RF_MONSHIFT 5
+#define RF_TOPSHIFT 6
#define RFTYPE_CTRL BIT(RF_CTRLSHIFT)
#define RFTYPE_MON BIT(RF_MONSHIFT)
+#define RFTYPE_TOP BIT(RF_TOPSHIFT)
#define RFTYPE_RES_CACHE BIT(8)
#define RFTYPE_RES_MB BIT(9)
#define RF_CTRL_INFO (RFTYPE_INFO | RFTYPE_CTRL)
#define RF_MON_INFO (RFTYPE_INFO | RFTYPE_MON)
+#define RF_TOP_INFO (RFTYPE_INFO | RFTYPE_TOP)
#define RF_CTRL_BASE (RFTYPE_BASE | RFTYPE_CTRL)
/* List of all resource groups */
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index a869d4a073c5..6a2cd89fa9de 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -51,6 +51,9 @@ static struct kernfs_node *kn_mongrp;
/* Kernel fs node for "mon_data" directory under root */
static struct kernfs_node *kn_mondata;
+struct seq_buf last_cmd_status;
+static char last_cmd_status_buf[512];
+
/*
* Trivial allocator for CLOSIDs. Since h/w only supports a small number,
* we can keep a bitmap of free CLOSIDs in a single integer.
@@ -569,6 +572,21 @@ static int rdtgroup_tasks_show(struct kernfs_open_file *of,
return ret;
}
+static int rdt_last_cmd_status_show(struct kernfs_open_file *of,
+ struct seq_file *seq, void *v)
+{
+ int len;
+
+ mutex_lock(&rdtgroup_mutex);
+ len = seq_buf_used(&last_cmd_status);
+ if (len)
+ seq_printf(seq, "%.*s", len, last_cmd_status_buf);
+ else
+ seq_puts(seq, "ok\n");
+ mutex_unlock(&rdtgroup_mutex);
+ return 0;
+}
+
static int rdt_num_closids_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
@@ -686,6 +704,13 @@ static ssize_t max_threshold_occ_write(struct kernfs_open_file *of,
/* rdtgroup information files for one cache resource. */
static struct rftype res_common_files[] = {
{
+ .name = "last_cmd_status",
+ .mode = 0444,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = rdt_last_cmd_status_show,
+ .fflags = RF_TOP_INFO,
+ },
+ {
.name = "num_closids",
.mode = 0444,
.kf_ops = &rdtgroup_kf_single_ops,
@@ -855,6 +880,10 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
return PTR_ERR(kn_info);
kernfs_get(kn_info);
+ ret = rdtgroup_add_files(kn_info, RF_TOP_INFO);
+ if (ret)
+ goto out_destroy;
+
for_each_alloc_enabled_rdt_resource(r) {
fflags = r->fflags | RF_CTRL_INFO;
ret = rdtgroup_mkdir_info_resdir(r, r->name, fflags);
@@ -1156,6 +1185,7 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type,
out_cdp:
cdp_disable();
out:
+ seq_buf_clear(&last_cmd_status);
mutex_unlock(&rdtgroup_mutex);
return dentry;
@@ -1902,6 +1932,9 @@ int __init rdtgroup_init(void)
{
int ret = 0;
+ seq_buf_init(&last_cmd_status, last_cmd_status_buf,
+ sizeof(last_cmd_status_buf));
+
ret = rdtgroup_setup_root();
if (ret)
return ret;
--
2.11.0
^ permalink raw reply related [flat|nested] 14+ messages in thread* [PATCH 2/5] x86/intel_rdt: Add diagnostics when writing the schemata file
2017-09-18 22:18 [RFC 0/5] x86/intel_rdt: Better diagnostics Luck, Tony
2017-09-18 22:18 ` [PATCH 1/5] x86/intel_rdt: Add framework for better RDT UI diagnostics Luck, Tony
@ 2017-09-18 22:18 ` Luck, Tony
2017-09-25 14:04 ` Thomas Gleixner
2017-09-18 22:18 ` [PATCH 3/5] x86/intel_rdt: Add diagnostics when writing the tasks file Luck, Tony
` (5 subsequent siblings)
7 siblings, 1 reply; 14+ messages in thread
From: Luck, Tony @ 2017-09-18 22:18 UTC (permalink / raw)
To: Thomas Gleixner
Cc: Tony Luck, linux-kernel, Boris Petkov, Fenghua Yu,
Reinette Chatre, Steven Rostedt, Vikas Shivappa
From: Tony Luck <tony.luck@intel.com>
Save helpful descriptions of what went wrong when writing a
schemata file.
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c | 61 +++++++++++++++++++++++------
1 file changed, 50 insertions(+), 11 deletions(-)
diff --git a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
index f6ea94f8954a..d87a060ab0db 100644
--- a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
+++ b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
@@ -42,15 +42,25 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r)
/*
* Only linear delay values is supported for current Intel SKUs.
*/
- if (!r->membw.delay_linear)
+ if (!r->membw.delay_linear) {
+ seq_buf_puts(&last_cmd_status,
+ "No support for non-linear MB domains\n");
return false;
+ }
ret = kstrtoul(buf, 10, &bw);
- if (ret)
+ if (ret) {
+ seq_buf_printf(&last_cmd_status,
+ "Non-decimal digit in MB value %s\n", buf);
return false;
+ }
- if (bw < r->membw.min_bw || bw > r->default_ctrl)
+ if (bw < r->membw.min_bw || bw > r->default_ctrl) {
+ seq_buf_printf(&last_cmd_status,
+ "MB value %ld out of range [%d,%d]\n", bw,
+ r->membw.min_bw, r->default_ctrl);
return false;
+ }
*data = roundup(bw, (unsigned long)r->membw.bw_gran);
return true;
@@ -60,8 +70,11 @@ int parse_bw(char *buf, struct rdt_resource *r, struct rdt_domain *d)
{
unsigned long data;
- if (d->have_new_ctrl)
+ if (d->have_new_ctrl) {
+ seq_buf_printf(&last_cmd_status, "duplicate domain %d\n",
+ d->id);
return -EINVAL;
+ }
if (!bw_validate(buf, &data, r))
return -EINVAL;
@@ -84,20 +97,32 @@ static bool cbm_validate(char *buf, unsigned long *data, struct rdt_resource *r)
int ret;
ret = kstrtoul(buf, 16, &val);
- if (ret)
+ if (ret) {
+ seq_buf_printf(&last_cmd_status,
+ "non-hex character in mask %s\n", buf);
return false;
+ }
- if (val == 0 || val > r->default_ctrl)
+ if (val == 0 || val > r->default_ctrl) {
+ seq_buf_puts(&last_cmd_status, "mask out of range\n");
return false;
+ }
first_bit = find_first_bit(&val, cbm_len);
zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
- if (find_next_bit(&val, cbm_len, zero_bit) < cbm_len)
+ if (find_next_bit(&val, cbm_len, zero_bit) < cbm_len) {
+ seq_buf_printf(&last_cmd_status,
+ "mask %lx has non-consecutive 1-bits\n", val);
return false;
+ }
- if ((zero_bit - first_bit) < r->cache.min_cbm_bits)
+ if ((zero_bit - first_bit) < r->cache.min_cbm_bits) {
+ seq_buf_printf(&last_cmd_status,
+ "Need at least %d bits in mask\n",
+ r->cache.min_cbm_bits);
return false;
+ }
*data = val;
return true;
@@ -111,8 +136,11 @@ int parse_cbm(char *buf, struct rdt_resource *r, struct rdt_domain *d)
{
unsigned long data;
- if (d->have_new_ctrl)
+ if (d->have_new_ctrl) {
+ seq_buf_printf(&last_cmd_status, "duplicate domain %d\n",
+ d->id);
return -EINVAL;
+ }
if(!cbm_validate(buf, &data, r))
return -EINVAL;
@@ -139,8 +167,11 @@ static int parse_line(char *line, struct rdt_resource *r)
return 0;
dom = strsep(&line, ";");
id = strsep(&dom, "=");
- if (!dom || kstrtoul(id, 10, &dom_id))
+ if (!dom || kstrtoul(id, 10, &dom_id)) {
+ seq_buf_puts(&last_cmd_status,
+ "Missing '=' or non-numeric domain\n");
return -EINVAL;
+ }
dom = strim(dom);
list_for_each_entry(d, &r->domains, list) {
if (d->id == dom_id) {
@@ -196,6 +227,8 @@ static int rdtgroup_parse_resource(char *resname, char *tok, int closid)
if (!strcmp(resname, r->name) && closid < r->num_closid)
return parse_line(tok, r);
}
+ seq_buf_printf(&last_cmd_status,
+ "unknown/unsupported resource name '%s'\n", resname);
return -EINVAL;
}
@@ -208,14 +241,19 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
char *tok, *resname;
int closid, ret = 0;
+ seq_buf_clear(&last_cmd_status);
+
/* Valid input requires a trailing newline */
- if (nbytes == 0 || buf[nbytes - 1] != '\n')
+ if (nbytes == 0 || buf[nbytes - 1] != '\n') {
+ seq_buf_puts(&last_cmd_status, "no trailing newline\n");
return -EINVAL;
+ }
buf[nbytes - 1] = '\0';
rdtgrp = rdtgroup_kn_lock_live(of->kn);
if (!rdtgrp) {
rdtgroup_kn_unlock(of->kn);
+ seq_buf_puts(&last_cmd_status, "directory was removed\n");
return -ENOENT;
}
@@ -229,6 +267,7 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
while ((tok = strsep(&buf, "\n")) != NULL) {
resname = strim(strsep(&tok, ":"));
if (!tok) {
+ seq_buf_puts(&last_cmd_status, "Missing ':'\n");
ret = -EINVAL;
goto out;
}
--
2.11.0
^ permalink raw reply related [flat|nested] 14+ messages in thread* Re: [PATCH 2/5] x86/intel_rdt: Add diagnostics when writing the schemata file
2017-09-18 22:18 ` [PATCH 2/5] x86/intel_rdt: Add diagnostics when writing the schemata file Luck, Tony
@ 2017-09-25 14:04 ` Thomas Gleixner
2017-09-25 21:38 ` Luck, Tony
0 siblings, 1 reply; 14+ messages in thread
From: Thomas Gleixner @ 2017-09-25 14:04 UTC (permalink / raw)
To: Luck, Tony
Cc: linux-kernel, Boris Petkov, Fenghua Yu, Reinette Chatre,
Steven Rostedt, Vikas Shivappa
On Mon, 18 Sep 2017, Luck, Tony wrote:
> @@ -208,14 +241,19 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
> char *tok, *resname;
> int closid, ret = 0;
>
> + seq_buf_clear(&last_cmd_status);
> +
> /* Valid input requires a trailing newline */
> - if (nbytes == 0 || buf[nbytes - 1] != '\n')
> + if (nbytes == 0 || buf[nbytes - 1] != '\n') {
> + seq_buf_puts(&last_cmd_status, "no trailing newline\n");
> return -EINVAL;
> + }
> buf[nbytes - 1] = '\0';
In all other instances you access last_cmd_status within the
rdtgroup_kn_lock_live() protected section, which also serializes the show()
function via rdtgroup_mutex. Here you do it outside for obvious reasons,
but that opens a can of evil worms ...
Can you please provide and use two helpers - last_cmd_buf_clear() and
last_cmd_buf_puts() - which both have a
lockdep_assert_held(&rdtgroup_mutex) inside to make sure that we don't end
up with unprotected access accidentally?
Thanks,
tglx
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH 2/5] x86/intel_rdt: Add diagnostics when writing the schemata file
2017-09-25 14:04 ` Thomas Gleixner
@ 2017-09-25 21:38 ` Luck, Tony
2017-09-25 22:12 ` Thomas Gleixner
0 siblings, 1 reply; 14+ messages in thread
From: Luck, Tony @ 2017-09-25 21:38 UTC (permalink / raw)
To: Thomas Gleixner
Cc: linux-kernel, Boris Petkov, Fenghua Yu, Reinette Chatre,
Steven Rostedt, Vikas Shivappa
On Mon, Sep 25, 2017 at 04:04:07PM +0200, Thomas Gleixner wrote:
> On Mon, 18 Sep 2017, Luck, Tony wrote:
> > @@ -208,14 +241,19 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
> > char *tok, *resname;
> > int closid, ret = 0;
> >
> > + seq_buf_clear(&last_cmd_status);
> > +
> > /* Valid input requires a trailing newline */
> > - if (nbytes == 0 || buf[nbytes - 1] != '\n')
> > + if (nbytes == 0 || buf[nbytes - 1] != '\n') {
> > + seq_buf_puts(&last_cmd_status, "no trailing newline\n");
> > return -EINVAL;
> > + }
> > buf[nbytes - 1] = '\0';
>
> In all other instances you access last_cmd_status within the
> rdtgroup_kn_lock_live() protected section, which also serializes the show()
> function via rdtgroup_mutex. Here you do it outside for obvious reasons,
> but that opens a can of evil worms ...
Indeed.
> Can you please provide and use two helpers - last_cmd_buf_clear() and
> last_cmd_buf_puts() - which both have a
> lockdep_assert_held(&rdtgroup_mutex) inside to make sure that we don't end
> up with unprotected access accidentally?
Sure. In progress. But I also need a last_cmd_printf(), which for some
reason is giving me grief. In the header file I put:
+static inline void last_cmd_printf(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ lockdep_assert_held(&rdtgroup_mutex);
+ seq_buf_printf(&last_cmd_status, fmt, ap);
+ va_end(ap);
+}
and use it like this:
+ last_cmd_printf("unknown/unsupported resource name '%s'\n", resname);
but the argument gets lost/mangled. Instead of the string that
should have appeared for the %s, I just get a \b
Also with nummeric arguments:
+ last_cmd_printf("mask %lx has non-consecutive 1-bits\n", val);
I get some kernel pointer looking value instead of "5":
mask ffffa1ee62757c98 has non-consecutive 1-bits
Is there a limit on how many nested va_start()/va_end() can happen? Or is the
compiler confused because I made this "inline"? Or just a silly typo that I
can't see despite staring at it for a while?
-Tony
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH 2/5] x86/intel_rdt: Add diagnostics when writing the schemata file
2017-09-25 21:38 ` Luck, Tony
@ 2017-09-25 22:12 ` Thomas Gleixner
2017-09-25 22:18 ` Luck, Tony
0 siblings, 1 reply; 14+ messages in thread
From: Thomas Gleixner @ 2017-09-25 22:12 UTC (permalink / raw)
To: Luck, Tony
Cc: linux-kernel, Boris Petkov, Fenghua Yu, Reinette Chatre,
Steven Rostedt, Vikas Shivappa
On Mon, 25 Sep 2017, Luck, Tony wrote:
> On Mon, Sep 25, 2017 at 04:04:07PM +0200, Thomas Gleixner wrote:
> > On Mon, 18 Sep 2017, Luck, Tony wrote:
> > > @@ -208,14 +241,19 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
> > > char *tok, *resname;
> > > int closid, ret = 0;
> > >
> > > + seq_buf_clear(&last_cmd_status);
> > > +
> > > /* Valid input requires a trailing newline */
> > > - if (nbytes == 0 || buf[nbytes - 1] != '\n')
> > > + if (nbytes == 0 || buf[nbytes - 1] != '\n') {
> > > + seq_buf_puts(&last_cmd_status, "no trailing newline\n");
> > > return -EINVAL;
> > > + }
> > > buf[nbytes - 1] = '\0';
> >
> > In all other instances you access last_cmd_status within the
> > rdtgroup_kn_lock_live() protected section, which also serializes the show()
> > function via rdtgroup_mutex. Here you do it outside for obvious reasons,
> > but that opens a can of evil worms ...
>
> Indeed.
>
> > Can you please provide and use two helpers - last_cmd_buf_clear() and
> > last_cmd_buf_puts() - which both have a
> > lockdep_assert_held(&rdtgroup_mutex) inside to make sure that we don't end
> > up with unprotected access accidentally?
>
> Sure. In progress. But I also need a last_cmd_printf(), which for some
> reason is giving me grief. In the header file I put:
>
> +static inline void last_cmd_printf(const char *fmt, ...)
> +{
> + va_list ap;
> +
> + va_start(ap, fmt);
> + lockdep_assert_held(&rdtgroup_mutex);
> + seq_buf_printf(&last_cmd_status, fmt, ap);
seq_buf_vprintf() is your friend. It takes va_list as last argument.
While at it can you please make it a proper function? No point for inlining
that.
Thanks,
tglx
^ permalink raw reply [flat|nested] 14+ messages in thread* RE: [PATCH 2/5] x86/intel_rdt: Add diagnostics when writing the schemata file
2017-09-25 22:12 ` Thomas Gleixner
@ 2017-09-25 22:18 ` Luck, Tony
0 siblings, 0 replies; 14+ messages in thread
From: Luck, Tony @ 2017-09-25 22:18 UTC (permalink / raw)
To: Thomas Gleixner
Cc: linux-kernel@vger.kernel.org, Boris Petkov, Yu, Fenghua,
Chatre, Reinette, Steven Rostedt, Shivappa, Vikas
> seq_buf_vprintf() is your friend. It takes va_list as last argument.
Reinette spotted that a couple of minutes ahead of you.
/me looks for paper bag to put over my head.
> While at it can you please make it a proper function? No point for inlining
> that.
There was a small point ... I need the function in two files ... so the inline keeps
me from polluting global name space.
But I guess I can call it rdt_last_cmd_printf() to limit the damage.
-Tony
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 3/5] x86/intel_rdt: Add diagnostics when writing the tasks file
2017-09-18 22:18 [RFC 0/5] x86/intel_rdt: Better diagnostics Luck, Tony
2017-09-18 22:18 ` [PATCH 1/5] x86/intel_rdt: Add framework for better RDT UI diagnostics Luck, Tony
2017-09-18 22:18 ` [PATCH 2/5] x86/intel_rdt: Add diagnostics when writing the schemata file Luck, Tony
@ 2017-09-18 22:18 ` Luck, Tony
2017-09-18 22:18 ` [PATCH 4/5] x86/intel_rdt: Add diagnostics when writing the cpus file Luck, Tony
` (4 subsequent siblings)
7 siblings, 0 replies; 14+ messages in thread
From: Luck, Tony @ 2017-09-18 22:18 UTC (permalink / raw)
To: Thomas Gleixner
Cc: Tony Luck, linux-kernel, Boris Petkov, Fenghua Yu,
Reinette Chatre, Steven Rostedt, Vikas Shivappa
From: Tony Luck <tony.luck@intel.com>
About the only tricky case is trying to move a task into a monitor
group that is a subdirectory of a different control group. But cover
the simple cases too.
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 15 ++++++++++++---
1 file changed, 12 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index 6a2cd89fa9de..c54189a1b54e 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -455,6 +455,7 @@ static int __rdtgroup_move_task(struct task_struct *tsk,
*/
atomic_dec(&rdtgrp->waitcount);
kfree(callback);
+ seq_buf_puts(&last_cmd_status, "task exited\n");
} else {
/*
* For ctrl_mon groups move both closid and rmid.
@@ -465,10 +466,13 @@ static int __rdtgroup_move_task(struct task_struct *tsk,
tsk->closid = rdtgrp->closid;
tsk->rmid = rdtgrp->mon.rmid;
} else if (rdtgrp->type == RDTMON_GROUP) {
- if (rdtgrp->mon.parent->closid == tsk->closid)
+ if (rdtgrp->mon.parent->closid == tsk->closid) {
tsk->rmid = rdtgrp->mon.rmid;
- else
+ } else {
+ seq_buf_puts(&last_cmd_status,
+ "Can't move task to different control group\n");
ret = -EINVAL;
+ }
}
}
return ret;
@@ -487,8 +491,11 @@ static int rdtgroup_task_write_permission(struct task_struct *task,
*/
if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
!uid_eq(cred->euid, tcred->uid) &&
- !uid_eq(cred->euid, tcred->suid))
+ !uid_eq(cred->euid, tcred->suid)) {
+ seq_buf_printf(&last_cmd_status,
+ "No permission to move task %d\n", task->pid);
ret = -EPERM;
+ }
put_cred(tcred);
return ret;
@@ -505,6 +512,7 @@ static int rdtgroup_move_task(pid_t pid, struct rdtgroup *rdtgrp,
tsk = find_task_by_vpid(pid);
if (!tsk) {
rcu_read_unlock();
+ seq_buf_printf(&last_cmd_status, "No task %d\n", pid);
return -ESRCH;
}
} else {
@@ -532,6 +540,7 @@ static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of,
if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
return -EINVAL;
rdtgrp = rdtgroup_kn_lock_live(of->kn);
+ seq_buf_clear(&last_cmd_status);
if (rdtgrp)
ret = rdtgroup_move_task(pid, rdtgrp, of);
--
2.11.0
^ permalink raw reply related [flat|nested] 14+ messages in thread* [PATCH 4/5] x86/intel_rdt: Add diagnostics when writing the cpus file
2017-09-18 22:18 [RFC 0/5] x86/intel_rdt: Better diagnostics Luck, Tony
` (2 preceding siblings ...)
2017-09-18 22:18 ` [PATCH 3/5] x86/intel_rdt: Add diagnostics when writing the tasks file Luck, Tony
@ 2017-09-18 22:18 ` Luck, Tony
2017-09-18 22:18 ` [PATCH 5/5] x86/intel_rdt: Add diagnostics when making directories Luck, Tony
` (3 subsequent siblings)
7 siblings, 0 replies; 14+ messages in thread
From: Luck, Tony @ 2017-09-18 22:18 UTC (permalink / raw)
To: Thomas Gleixner
Cc: Tony Luck, linux-kernel, Boris Petkov, Fenghua Yu,
Reinette Chatre, Steven Rostedt, Vikas Shivappa
From: Tony Luck <tony.luck@intel.com>
Can't add a cpu to a monitor group unless it belongs to parent
group. Can't delete cpus from the default group.
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 17 ++++++++++++++---
1 file changed, 14 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index c54189a1b54e..8b851ae601df 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -241,8 +241,11 @@ static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
/* Check whether cpus belong to parent ctrl group */
cpumask_andnot(tmpmask, newmask, &prgrp->cpu_mask);
- if (cpumask_weight(tmpmask))
+ if (cpumask_weight(tmpmask)) {
+ seq_buf_puts(&last_cmd_status,
+ "can only add CPUs to mongroup that belong to parent\n");
return -EINVAL;
+ }
/* Check whether cpus are dropped from this group */
cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
@@ -294,8 +297,11 @@ static int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
if (cpumask_weight(tmpmask)) {
/* Can't drop from default group */
- if (rdtgrp == &rdtgroup_default)
+ if (rdtgrp == &rdtgroup_default) {
+ seq_buf_puts(&last_cmd_status,
+ "Can't drop CPUs from default group\n");
return -EINVAL;
+ }
/* Give any dropped cpus to rdtgroup_default */
cpumask_or(&rdtgroup_default.cpu_mask,
@@ -360,8 +366,10 @@ static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
}
rdtgrp = rdtgroup_kn_lock_live(of->kn);
+ seq_buf_clear(&last_cmd_status);
if (!rdtgrp) {
ret = -ENOENT;
+ seq_buf_puts(&last_cmd_status, "directory was removed\n");
goto unlock;
}
@@ -370,13 +378,16 @@ static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
else
ret = cpumask_parse(buf, newmask);
- if (ret)
+ if (ret) {
+ seq_buf_puts(&last_cmd_status, "bad cpu list/mask\n");
goto unlock;
+ }
/* check that user didn't specify any offline cpus */
cpumask_andnot(tmpmask, newmask, cpu_online_mask);
if (cpumask_weight(tmpmask)) {
ret = -EINVAL;
+ seq_buf_puts(&last_cmd_status, "can only assign online cpus\n");
goto unlock;
}
--
2.11.0
^ permalink raw reply related [flat|nested] 14+ messages in thread* [PATCH 5/5] x86/intel_rdt: Add diagnostics when making directories
2017-09-18 22:18 [RFC 0/5] x86/intel_rdt: Better diagnostics Luck, Tony
` (3 preceding siblings ...)
2017-09-18 22:18 ` [PATCH 4/5] x86/intel_rdt: Add diagnostics when writing the cpus file Luck, Tony
@ 2017-09-18 22:18 ` Luck, Tony
2017-09-19 1:10 ` [RFC 0/5] x86/intel_rdt: Better diagnostics Steven Rostedt
` (2 subsequent siblings)
7 siblings, 0 replies; 14+ messages in thread
From: Luck, Tony @ 2017-09-18 22:18 UTC (permalink / raw)
To: Thomas Gleixner
Cc: Tony Luck, linux-kernel, Boris Petkov, Fenghua Yu,
Reinette Chatre, Steven Rostedt, Vikas Shivappa
From: Tony Luck <tony.luck@intel.com>
Mostly this is about running out of RMIDs or CLOSIDs. Other
errors are various internal errors.
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 28 ++++++++++++++++++++++------
1 file changed, 22 insertions(+), 6 deletions(-)
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index 8b851ae601df..dc8bc21cc6ee 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -1574,8 +1574,10 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
int ret;
prdtgrp = rdtgroup_kn_lock_live(prgrp_kn);
+ seq_buf_clear(&last_cmd_status);
if (!prdtgrp) {
ret = -ENODEV;
+ seq_buf_puts(&last_cmd_status, "directory was removed\n");
goto out_unlock;
}
@@ -1583,6 +1585,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL);
if (!rdtgrp) {
ret = -ENOSPC;
+ seq_buf_puts(&last_cmd_status, "kernel out of memory\n");
goto out_unlock;
}
*r = rdtgrp;
@@ -1594,6 +1597,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
kn = kernfs_create_dir(parent_kn, name, mode, rdtgrp);
if (IS_ERR(kn)) {
ret = PTR_ERR(kn);
+ seq_buf_puts(&last_cmd_status, "kernfs create error\n");
goto out_free_rgrp;
}
rdtgrp->kn = kn;
@@ -1607,24 +1611,32 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
kernfs_get(kn);
ret = rdtgroup_kn_set_ugid(kn);
- if (ret)
+ if (ret) {
+ seq_buf_puts(&last_cmd_status, "kernfs perm error\n");
goto out_destroy;
+ }
files = RFTYPE_BASE | RFTYPE_CTRL;
files = RFTYPE_BASE | BIT(RF_CTRLSHIFT + rtype);
ret = rdtgroup_add_files(kn, files);
- if (ret)
+ if (ret) {
+ seq_buf_puts(&last_cmd_status, "kernfs fill error\n");
goto out_destroy;
+ }
if (rdt_mon_capable) {
ret = alloc_rmid();
- if (ret < 0)
+ if (ret < 0) {
+ seq_buf_puts(&last_cmd_status, "out of RMIDs\n");
goto out_destroy;
+ }
rdtgrp->mon.rmid = ret;
ret = mkdir_mondata_all(kn, rdtgrp, &rdtgrp->mon.mon_data_kn);
- if (ret)
+ if (ret) {
+ seq_buf_puts(&last_cmd_status, "kernfs subdir error\n");
goto out_idfree;
+ }
}
kernfs_activate(kn);
@@ -1702,8 +1714,10 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
kn = rdtgrp->kn;
ret = closid_alloc();
- if (ret < 0)
+ if (ret < 0) {
+ seq_buf_puts(&last_cmd_status, "out of CLOSIDs\n");
goto out_common_fail;
+ }
closid = ret;
rdtgrp->closid = closid;
@@ -1715,8 +1729,10 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
* of tasks and cpus to monitor.
*/
ret = mongroup_create_dir(kn, NULL, "mon_groups", NULL);
- if (ret)
+ if (ret) {
+ seq_buf_puts(&last_cmd_status, "kernfs subdir error\n");
goto out_id_free;
+ }
}
goto out_unlock;
--
2.11.0
^ permalink raw reply related [flat|nested] 14+ messages in thread* Re: [RFC 0/5] x86/intel_rdt: Better diagnostics
2017-09-18 22:18 [RFC 0/5] x86/intel_rdt: Better diagnostics Luck, Tony
` (4 preceding siblings ...)
2017-09-18 22:18 ` [PATCH 5/5] x86/intel_rdt: Add diagnostics when making directories Luck, Tony
@ 2017-09-19 1:10 ` Steven Rostedt
2017-09-19 23:11 ` [PATCH 6/5] x86/intel_rdt: Add documentation for "info/last_cmd_status" Luck, Tony
2017-09-21 12:08 ` [RFC 0/5] x86/intel_rdt: Better diagnostics Borislav Petkov
7 siblings, 0 replies; 14+ messages in thread
From: Steven Rostedt @ 2017-09-19 1:10 UTC (permalink / raw)
To: Luck, Tony
Cc: Thomas Gleixner, linux-kernel, Boris Petkov, Fenghua Yu,
Reinette Chatre, Vikas Shivappa
On Mon, 18 Sep 2017 15:18:38 -0700
"Luck, Tony" <tony.luck@intel.com> wrote:
> From: Tony Luck <tony.luck@intel.com>
>
> Chatting online with Boris to diagnose why his test cases for RDT
> weren't working, we came up with either a good idea (in which case
> I credit Boris) or a dumb one (in which case this is all my fault).
>
> The basic problem is that there aren't many good error codes for
> a file system interface to pass back to the user. I'd resisted
> adding printk() calls because it is a pain to parse the console
> log, doubly so if you want to do it from a shell script that is
> actually issuing the commands to RDT.
>
> The answer is to add new file in the "info" directory that gives
> the status of the last "command" to RDT (either a mkdir, or a
> write to one of the control files).
>
> I used the seq_buf* framework because I initially thought a single
> command might result in multiple messages. But currently that isn't
> true and we could potentially just use "strcpy()/sprintf()" to a
> fixed buffer. I didn't switch to that because the seq_buf* seems
> very lightweight and allows for future extra messages while including
> checking for exceeding the length of the buffer.
>
> Tony Luck (5):
> x86/intel_rdt: Add framework for better RDT UI diagnostics
> x86/intel_rdt: Add diagnostics when writing the schemata file
> x86/intel_rdt: Add diagnostics when writing the tasks file
> x86/intel_rdt: Add diagnostics when writing the cpus file
> x86/intel_rdt: Add diagnostics when making directories
>
> arch/x86/kernel/cpu/intel_rdt.h | 6 ++
> arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c | 61 +++++++++++++++----
> arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 93 +++++++++++++++++++++++++----
> 3 files changed, 137 insertions(+), 23 deletions(-)
>
They all look fine to me.
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
-- Steve
^ permalink raw reply [flat|nested] 14+ messages in thread* [PATCH 6/5] x86/intel_rdt: Add documentation for "info/last_cmd_status"
2017-09-18 22:18 [RFC 0/5] x86/intel_rdt: Better diagnostics Luck, Tony
` (5 preceding siblings ...)
2017-09-19 1:10 ` [RFC 0/5] x86/intel_rdt: Better diagnostics Steven Rostedt
@ 2017-09-19 23:11 ` Luck, Tony
2017-09-21 12:08 ` [RFC 0/5] x86/intel_rdt: Better diagnostics Borislav Petkov
7 siblings, 0 replies; 14+ messages in thread
From: Luck, Tony @ 2017-09-19 23:11 UTC (permalink / raw)
To: Thomas Gleixner
Cc: Tony Luck, linux-kernel, Boris Petkov, Fenghua Yu,
Reinette Chatre, Steven Rostedt, Vikas Shivappa
From: Tony Luck <tony.luck@intel.com>
New file in the "info" directory helps diagnose what went wrong
when using the /sys/fs/resctrl file system
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
Oops ... forgot the Documentation ... here it is.
Documentation/x86/intel_rdt_ui.txt | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/Documentation/x86/intel_rdt_ui.txt b/Documentation/x86/intel_rdt_ui.txt
index 4d8848e4e224..6851854cf69d 100644
--- a/Documentation/x86/intel_rdt_ui.txt
+++ b/Documentation/x86/intel_rdt_ui.txt
@@ -87,6 +87,17 @@ with the following files:
bytes) at which a previously used LLC_occupancy
counter can be considered for re-use.
+Finally, in the top level of the "info" directory there is a file
+named "last_cmd_status". This is reset with every "command" issued
+via the file system (making new directories or writing to any of the
+control files). If the command was successful, it will read as "ok".
+If the command failed, it will provide more information that can be
+conveyed in the error returns from file operations. E.g.
+
+ # echo L3:0=f7 > schemata
+ bash: echo: write error: Invalid argument
+ # cat info/last_cmd_status
+ mask f7 has non-consecutive 1-bits
Resource alloc and monitor groups
---------------------------------
--
2.11.0
^ permalink raw reply related [flat|nested] 14+ messages in thread* Re: [RFC 0/5] x86/intel_rdt: Better diagnostics
2017-09-18 22:18 [RFC 0/5] x86/intel_rdt: Better diagnostics Luck, Tony
` (6 preceding siblings ...)
2017-09-19 23:11 ` [PATCH 6/5] x86/intel_rdt: Add documentation for "info/last_cmd_status" Luck, Tony
@ 2017-09-21 12:08 ` Borislav Petkov
2017-09-30 8:27 ` Borislav Petkov
7 siblings, 1 reply; 14+ messages in thread
From: Borislav Petkov @ 2017-09-21 12:08 UTC (permalink / raw)
To: Luck, Tony
Cc: Thomas Gleixner, linux-kernel, Fenghua Yu, Reinette Chatre,
Steven Rostedt, Vikas Shivappa
On Mon, Sep 18, 2017 at 03:18:38PM -0700, Luck, Tony wrote:
> From: Tony Luck <tony.luck@intel.com>
>
> Chatting online with Boris to diagnose why his test cases for RDT
> weren't working, we came up with either a good idea (in which case
> I credit Boris) or a dumb one (in which case this is all my fault).
Ha! I can share the fault, no worries :-)
I'll test them on my box when I get a chance.
Thanks.
--
Regards/Gruss,
Boris.
SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
--
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [RFC 0/5] x86/intel_rdt: Better diagnostics
2017-09-21 12:08 ` [RFC 0/5] x86/intel_rdt: Better diagnostics Borislav Petkov
@ 2017-09-30 8:27 ` Borislav Petkov
0 siblings, 0 replies; 14+ messages in thread
From: Borislav Petkov @ 2017-09-30 8:27 UTC (permalink / raw)
To: Luck, Tony
Cc: Thomas Gleixner, linux-kernel, Fenghua Yu, Reinette Chatre,
Steven Rostedt, Vikas Shivappa
On Thu, Sep 21, 2017 at 02:08:15PM +0200, Borislav Petkov wrote:
> On Mon, Sep 18, 2017 at 03:18:38PM -0700, Luck, Tony wrote:
> > From: Tony Luck <tony.luck@intel.com>
> >
> > Chatting online with Boris to diagnose why his test cases for RDT
> > weren't working, we came up with either a good idea (in which case
> > I credit Boris) or a dumb one (in which case this is all my fault).
>
> Ha! I can share the fault, no worries :-)
>
> I'll test them on my box when I get a chance.
Ok, I ran latest tip/master which has your patches:
# mkdir p0 p1
# echo "L3:0=00fff;1=ff000\nMB:0=50;1=50" > /sys/fs/resctrl/p0/schemata
bash: echo: write error: Invalid argument
# cat info/last_cmd_status
non-hex character in mask ff000\nMB:0=50
<--- I think this needs to be fixed in the doc examples to say:
echo -e "L3:0=00fff;1=ff000\nMB:0=50;1=50" > /sys/fs/resctrl/p0/schemata
i.e., you need to supply -e in order to interpret backslash chars.
# echo -e "L3:0=00fff;1=ff000\nMB:0=50;1=50" > /sys/fs/resctrl/p0/schemata
bash: echo: write error: Invalid argument
# cat info/last_cmd_status
unknown/unsupported resource name 'MB'
<--- Yap, much better.
# mkdir c1
# cat c1/schemata
L3:0=fffff;1=fffff
# echo "L3:0=3;1=3" > c1/schemata
# cat info/last_cmd_status
ok
# echo 1 > c1/tasks
# cat info/last_cmd_status
ok
#
Yap, thanks for doing this!
--
Regards/Gruss,
Boris.
SUSE Linux GmbH, GF: Felix Imendörffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nürnberg)
--
^ permalink raw reply [flat|nested] 14+ messages in thread