From: Tony Luck <tony.luck@intel.com>
To: Fenghua Yu <fenghua.yu@intel.com>,
Reinette Chatre <reinette.chatre@intel.com>,
Peter Newman <peternewman@google.com>,
Jonathan Corbet <corbet@lwn.net>,
Shuah Khan <skhan@linuxfoundation.org>,
x86@kernel.org
Cc: Shaopeng Tan <tan.shaopeng@fujitsu.com>,
James Morse <james.morse@arm.com>,
Jamie Iles <quic_jiles@quicinc.com>,
Babu Moger <babu.moger@amd.com>,
Randy Dunlap <rdunlap@infradead.org>,
linux-kernel@vger.kernel.org, linux-doc@vger.kernel.org,
patches@lists.linux.dev, Tony Luck <tony.luck@intel.com>
Subject: [PATCH v6 1/3] x86/resctrl: Add mount option "mba_MBps_event"
Date: Thu, 7 Dec 2023 11:56:11 -0800 [thread overview]
Message-ID: <20231207195613.153980-2-tony.luck@intel.com> (raw)
In-Reply-To: <20231207195613.153980-1-tony.luck@intel.com>
The MBA Software Controller(mba_sc) is a feedback loop that uses
measurements of local memory bandwidth to adjust MBA throttling levels
to keep workloads in a resctrl group within a target bandwidth set in
the schemata file.
Users may want to use total memory bandwidth instead of local to handle
workloads that have poor NUMA localization.
Add a new mount option "mba_MBps_event={event_name}" where event_name
is one of "mbm_Local_bytes" or "mbm_total_bytes" that allows a user to
specify which monitoring event to use.
Update the once-per-second polling code to use the chosen event (local
or total memory bandwidth).
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
include/linux/resctrl.h | 2 +
arch/x86/kernel/cpu/resctrl/internal.h | 3 +-
arch/x86/kernel/cpu/resctrl/monitor.c | 21 +++++----
arch/x86/kernel/cpu/resctrl/rdtgroup.c | 61 +++++++++++++++++++++-----
4 files changed, 63 insertions(+), 24 deletions(-)
diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 66942d7fba7f..1feb3b2e64fa 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -129,6 +129,7 @@ enum membw_throttle_mode {
* @throttle_mode: Bandwidth throttling mode when threads request
* different memory bandwidths
* @mba_sc: True if MBA software controller(mba_sc) is enabled
+ * @mba_mbps_event: Event (local or total) for mba_sc
* @mb_map: Mapping of memory B/W percentage to memory B/W delay
*/
struct resctrl_membw {
@@ -138,6 +139,7 @@ struct resctrl_membw {
bool arch_needs_linear;
enum membw_throttle_mode throttle_mode;
bool mba_sc;
+ enum resctrl_event_id mba_mbps_event;
u32 *mb_map;
};
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index a4f1aa15f0a2..8b9b8f664324 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -58,7 +58,8 @@ struct rdt_fs_context {
struct kernfs_fs_context kfc;
bool enable_cdpl2;
bool enable_cdpl3;
- bool enable_mba_mbps;
+ bool enable_mba_mbps_local;
+ bool enable_mba_mbps_total;
bool enable_debug;
};
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index f136ac046851..d9e590f1cbc3 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -431,9 +431,10 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr)
*/
static void mbm_bw_count(u32 rmid, struct rmid_read *rr)
{
- struct mbm_state *m = &rr->d->mbm_local[rmid];
u64 cur_bw, bytes, cur_bytes;
+ struct mbm_state *m;
+ m = get_mbm_state(rr->d, rmid, rr->evtid);
cur_bytes = rr->val;
bytes = cur_bytes - m->prev_bw_bytes;
m->prev_bw_bytes = cur_bytes;
@@ -521,19 +522,21 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
u32 closid, rmid, cur_msr_val, new_msr_val;
struct mbm_state *pmbm_data, *cmbm_data;
u32 cur_bw, delta_bw, user_bw;
+ enum resctrl_event_id evt_id;
struct rdt_resource *r_mba;
struct rdt_domain *dom_mba;
struct list_head *head;
struct rdtgroup *entry;
- if (!is_mbm_local_enabled())
+ if (!is_mbm_enabled())
return;
r_mba = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
+ evt_id = r_mba->membw.mba_mbps_event;
closid = rgrp->closid;
rmid = rgrp->mon.rmid;
- pmbm_data = &dom_mbm->mbm_local[rmid];
+ pmbm_data = get_mbm_state(dom_mbm, rmid, evt_id);
dom_mba = get_domain_from_cpu(smp_processor_id(), r_mba);
if (!dom_mba) {
@@ -553,7 +556,7 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
*/
head = &rgrp->mon.crdtgrp_list;
list_for_each_entry(entry, head, mon.crdtgrp_list) {
- cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid];
+ cmbm_data = get_mbm_state(dom_mbm, entry->mon.rmid, evt_id);
cur_bw += cmbm_data->prev_bw;
delta_bw += cmbm_data->delta_bw;
}
@@ -616,18 +619,14 @@ static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, int rmid)
rr.evtid = QOS_L3_MBM_TOTAL_EVENT_ID;
rr.val = 0;
__mon_event_count(rmid, &rr);
+ if (is_mba_sc(NULL) && rr.evtid == r->membw.mba_mbps_event)
+ mbm_bw_count(rmid, &rr);
}
if (is_mbm_local_enabled()) {
rr.evtid = QOS_L3_MBM_LOCAL_EVENT_ID;
rr.val = 0;
__mon_event_count(rmid, &rr);
-
- /*
- * Call the MBA software controller only for the
- * control groups and when user has enabled
- * the software controller explicitly.
- */
- if (is_mba_sc(NULL))
+ if (is_mba_sc(NULL) && rr.evtid == r->membw.mba_mbps_event)
mbm_bw_count(rmid, &rr);
}
}
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 69a1de92384a..5f64a0b2597c 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -2294,7 +2294,7 @@ static bool supports_mba_mbps(void)
{
struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
- return (is_mbm_local_enabled() &&
+ return (is_mbm_enabled() &&
r->alloc_capable && is_mba_linear());
}
@@ -2302,7 +2302,7 @@ static bool supports_mba_mbps(void)
* Enable or disable the MBA software controller
* which helps user specify bandwidth in MBps.
*/
-static int set_mba_sc(bool mba_sc)
+static int set_mba_sc(bool mba_sc, enum resctrl_event_id mba_mbps_event)
{
struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
u32 num_closid = resctrl_arch_get_num_closid(r);
@@ -2313,6 +2313,7 @@ static int set_mba_sc(bool mba_sc)
return -EINVAL;
r->membw.mba_sc = mba_sc;
+ r->membw.mba_mbps_event = mba_mbps_event;
list_for_each_entry(d, &r->domains, list) {
for (i = 0; i < num_closid; i++)
@@ -2445,13 +2446,14 @@ static void rdt_disable_ctx(void)
{
resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false);
resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false);
- set_mba_sc(false);
+ set_mba_sc(false, QOS_L3_MBM_LOCAL_EVENT_ID);
resctrl_debug = false;
}
static int rdt_enable_ctx(struct rdt_fs_context *ctx)
{
+ enum resctrl_event_id mba_mbps_event;
int ret = 0;
if (ctx->enable_cdpl2) {
@@ -2466,8 +2468,12 @@ static int rdt_enable_ctx(struct rdt_fs_context *ctx)
goto out_cdpl2;
}
- if (ctx->enable_mba_mbps) {
- ret = set_mba_sc(true);
+ if (ctx->enable_mba_mbps_local || ctx->enable_mba_mbps_total) {
+ if (ctx->enable_mba_mbps_total)
+ mba_mbps_event = QOS_L3_MBM_TOTAL_EVENT_ID;
+ else
+ mba_mbps_event = QOS_L3_MBM_LOCAL_EVENT_ID;
+ ret = set_mba_sc(true, mba_mbps_event);
if (ret)
goto out_cdpl3;
}
@@ -2683,15 +2689,17 @@ enum rdt_param {
Opt_cdp,
Opt_cdpl2,
Opt_mba_mbps,
+ Opt_mba_mbps_event,
Opt_debug,
nr__rdt_params
};
static const struct fs_parameter_spec rdt_fs_parameters[] = {
- fsparam_flag("cdp", Opt_cdp),
- fsparam_flag("cdpl2", Opt_cdpl2),
- fsparam_flag("mba_MBps", Opt_mba_mbps),
- fsparam_flag("debug", Opt_debug),
+ fsparam_flag("cdp", Opt_cdp),
+ fsparam_flag("cdpl2", Opt_cdpl2),
+ fsparam_flag("mba_MBps", Opt_mba_mbps),
+ fsparam_string("mba_MBps_event", Opt_mba_mbps_event),
+ fsparam_flag("debug", Opt_debug),
{}
};
@@ -2715,7 +2723,25 @@ static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
case Opt_mba_mbps:
if (!supports_mba_mbps())
return -EINVAL;
- ctx->enable_mba_mbps = true;
+ if (is_mbm_local_enabled())
+ ctx->enable_mba_mbps_local = true;
+ else
+ return -EINVAL;
+ return 0;
+ case Opt_mba_mbps_event:
+ if (!supports_mba_mbps())
+ return -EINVAL;
+ if (!strcmp("mbm_local_bytes", param->string)) {
+ if (!is_mbm_local_enabled())
+ return -EINVAL;
+ ctx->enable_mba_mbps_local = true;
+ } else if (!strcmp("mbm_total_bytes", param->string)) {
+ if (!is_mbm_total_enabled())
+ return -EINVAL;
+ ctx->enable_mba_mbps_total = true;
+ } else {
+ return -EINVAL;
+ }
return 0;
case Opt_debug:
ctx->enable_debug = true;
@@ -3780,16 +3806,27 @@ static int rdtgroup_rename(struct kernfs_node *kn,
return ret;
}
+static char *mba_sc_event_opt_name(struct rdt_resource *r)
+{
+ if (r->membw.mba_mbps_event == QOS_L3_MBM_LOCAL_EVENT_ID)
+ return ",mba_MBps_event=mbm_local_bytes";
+ else if (r->membw.mba_mbps_event == QOS_L3_MBM_TOTAL_EVENT_ID)
+ return ",mba_MBps_event=mbm_total_bytes";
+ return "";
+}
+
static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf)
{
+ struct rdt_resource *r_mba = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
+
if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3))
seq_puts(seq, ",cdp");
if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2))
seq_puts(seq, ",cdpl2");
- if (is_mba_sc(&rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl))
- seq_puts(seq, ",mba_MBps");
+ if (is_mba_sc(r_mba))
+ seq_puts(seq, mba_sc_event_opt_name(r_mba));
if (resctrl_debug)
seq_puts(seq, ",debug");
--
2.41.0
next prev parent reply other threads:[~2023-12-07 19:56 UTC|newest]
Thread overview: 59+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-10-24 18:16 [PATCH] x86/resctrl: mba_MBps: Fall back to total b/w if local b/w unavailable Tony Luck
2023-10-24 18:24 ` Luck, Tony
2023-10-24 23:20 ` Moger, Babu
2023-10-24 23:43 ` Luck, Tony
2023-10-25 16:01 ` Moger, Babu
2023-10-25 12:46 ` Peter Newman
2023-10-25 19:38 ` Tony Luck
2023-10-25 20:39 ` Moger, Babu
2023-10-25 20:42 ` Moger, Babu
2023-10-25 20:52 ` Tony Luck
2023-10-25 23:41 ` Moger, Babu
2023-10-26 0:07 ` Luck, Tony
2023-10-25 21:06 ` Peter Newman
2023-10-26 13:55 ` Moger, Babu
2023-10-26 16:09 ` Luck, Tony
2023-10-26 17:19 ` Moger, Babu
2023-10-26 19:54 ` Tony Luck
2023-10-25 23:50 ` [PATCH v2] " Tony Luck
2023-10-26 20:02 ` [PATCH v3] " Tony Luck
2023-10-26 22:40 ` Moger, Babu
2023-10-26 22:59 ` Luck, Tony
2023-11-03 21:43 ` Reinette Chatre
2023-11-03 21:50 ` Reinette Chatre
2023-11-07 21:15 ` Tony Luck
2023-11-08 21:49 ` Reinette Chatre
2023-11-09 21:27 ` Luck, Tony
2023-11-15 16:09 ` Reinette Chatre
2023-11-15 21:54 ` Tony Luck
2023-11-16 19:48 ` Reinette Chatre
2023-11-28 23:14 ` [PATCH v4] x86/resctrl: Add mount option to pick total MBM event Tony Luck
2023-11-29 23:48 ` Reinette Chatre
2023-12-01 20:45 ` Tony Luck
2023-12-01 21:47 ` [PATCH v5] x86/resctrl: Add event choices for mba_MBps Tony Luck
2023-12-04 16:24 ` Moger, Babu
2023-12-04 18:16 ` Tony Luck
2023-12-04 19:04 ` Moger, Babu
2023-12-04 19:45 ` Luck, Tony
2023-12-04 20:03 ` Reinette Chatre
2023-12-04 21:08 ` Tony Luck
2023-12-04 22:15 ` Reinette Chatre
2023-12-04 22:51 ` Reinette Chatre
2023-12-07 19:56 ` [PATCH v6 0/3] x86/resctrl: mba_MBps enhancements Tony Luck
2023-12-07 19:56 ` Tony Luck [this message]
2023-12-08 18:17 ` [PATCH v6 1/3] x86/resctrl: Add mount option "mba_MBps_event" Peter Newman
2023-12-08 21:57 ` Tony Luck
2023-12-08 22:09 ` Peter Newman
2023-12-08 22:37 ` Luck, Tony
2023-12-12 17:54 ` Reinette Chatre
2023-12-12 20:02 ` Tony Luck
2023-12-12 21:42 ` Reinette Chatre
2023-12-13 1:07 ` Luck, Tony
2023-12-08 18:29 ` Moger, Babu
2023-12-08 21:50 ` Tony Luck
2023-12-12 18:59 ` Reinette Chatre
2023-12-07 19:56 ` [PATCH v6 2/3] x86/resctrl: Use total bandwidth for mba_MBps option when local isn't present Tony Luck
2023-12-08 18:26 ` Peter Newman
2023-12-07 19:56 ` [PATCH v6 3/3] x86/resctrl: Add new "mba_MBps_event" mount option to documentation Tony Luck
2023-12-08 19:22 ` Peter Newman
2023-12-12 18:59 ` Reinette Chatre
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20231207195613.153980-2-tony.luck@intel.com \
--to=tony.luck@intel.com \
--cc=babu.moger@amd.com \
--cc=corbet@lwn.net \
--cc=fenghua.yu@intel.com \
--cc=james.morse@arm.com \
--cc=linux-doc@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=patches@lists.linux.dev \
--cc=peternewman@google.com \
--cc=quic_jiles@quicinc.com \
--cc=rdunlap@infradead.org \
--cc=reinette.chatre@intel.com \
--cc=skhan@linuxfoundation.org \
--cc=tan.shaopeng@fujitsu.com \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).