From: Marcelo Tosatti <mtosatti@redhat.com>
To: kvm-devel <kvm@vger.kernel.org>, linux-kernel@vger.kernel.org
Cc: Thomas Gleixner <tglx@linutronix.de>,
Ingo Molnar <mingo@kernel.org>,
Andrea Arcangeli <aarcange@redhat.com>,
Bandan Das <bsd@redhat.com>, Paolo Bonzini <pbonzini@redhat.com>
Subject: [PATCH] sched: introduce configurable delay before entering idle
Date: Tue, 7 May 2019 15:56:49 -0300 [thread overview]
Message-ID: <20190507185647.GA29409@amt.cnet> (raw)
[-- Attachment #1: Type: text/plain, Size: 3259 bytes --]
Certain workloads perform poorly on KVM compared to baremetal
due to baremetal's ability to perform mwait on NEED_RESCHED
bit of task flags (therefore skipping the IPI).
This patch introduces a configurable busy-wait delay before entering the
architecture delay routine, allowing wakeup IPIs to be skipped
(if the IPI happens in that window).
The real-life workload which this patch improves performance
is SAP HANA (by 5-10%) (for which case setting idle_spin to 30
is sufficient).
This patch improves the attached server.py and client.py example
as follows:
Host: 31.814230202231556
Guest: 38.17718765199993 (83 %)
Guest, idle_spin=50us: 33.317709898000004 (95 %)
Guest, idle_spin=220us: 32.27826551499999 (98 %)
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
kernel/sched/idle.c | 86 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 86 insertions(+)
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index f5516bae0c1b..bca7656a7ea0 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -216,6 +216,29 @@ static void cpuidle_idle_call(void)
rcu_idle_exit();
}
+static unsigned int spin_before_idle_us;
+static void do_spin_before_idle(void)
+{
+ ktime_t now, end_spin;
+
+ now = ktime_get();
+ end_spin = ktime_add_ns(now, spin_before_idle_us*1000);
+
+ rcu_idle_enter();
+ local_irq_enable();
+ stop_critical_timings();
+
+ do {
+ cpu_relax();
+ now = ktime_get();
+ } while (!tif_need_resched() && ktime_before(now, end_spin));
+
+ start_critical_timings();
+ rcu_idle_exit();
+ local_irq_disable();
+}
+
/*
* Generic idle loop implementation
*
@@ -259,6 +282,8 @@ static void do_idle(void)
tick_nohz_idle_restart_tick();
cpu_idle_poll();
} else {
+ if (spin_before_idle_us)
+ do_spin_before_idle();
cpuidle_idle_call();
}
arch_cpu_idle_exit();
@@ -465,3 +490,64 @@ const struct sched_class idle_sched_class = {
.switched_to = switched_to_idle,
.update_curr = update_curr_idle,
};
+
+
+static ssize_t store_idle_spin(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ unsigned int val;
+
+ if (kstrtouint(buf, 10, &val) < 0)
+ return -EINVAL;
+
+ if (val > USEC_PER_SEC)
+ return -EINVAL;
+
+ spin_before_idle_us = val;
+ return count;
+}
+
+static ssize_t show_idle_spin(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
+{
+ ssize_t ret;
+
+ ret = sprintf(buf, "%d\n", spin_before_idle_us);
+
+ return ret;
+}
+
+static struct kobj_attribute idle_spin_attr =
+ __ATTR(idle_spin, 0644, show_idle_spin, store_idle_spin);
+
+static struct attribute *sched_attrs[] = {
+ &idle_spin_attr.attr,
+ NULL,
+};
+
+static const struct attribute_group sched_attr_group = {
+ .attrs = sched_attrs,
+};
+
+static struct kobject *sched_kobj;
+
+static int __init sched_sysfs_init(void)
+{
+ int error;
+
+ sched_kobj = kobject_create_and_add("sched", kernel_kobj);
+ if (!sched_kobj)
+ return -ENOMEM;
+
+ error = sysfs_create_group(sched_kobj, &sched_attr_group);
+ if (error)
+ goto err;
+ return 0;
+
+err:
+ kobject_put(sched_kobj);
+ return error;
+}
+postcore_initcall(sched_sysfs_init);
[-- Attachment #2: client.py --]
[-- Type: text/plain, Size: 619 bytes --]
#!/bin/python3
import socket
import sys
import struct, fcntl, os
import os, errno, time
import time
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server_address = ('127.0.0.1', 999)
print ("connecting to 127.0.0.1")
sock.connect(server_address)
nr_writes = 0
start_time = time.clock_gettime(time.CLOCK_MONOTONIC)
while nr_writes < 90000:
data = sock.recv(4096)
if len(data) == 0:
print("connection closed!\n");
exit(0);
# sleep 20us
time.sleep(20/1000000)
sock.send(data)
nr_writes = nr_writes+1
end_time = time.clock_gettime(time.CLOCK_MONOTONIC)
delta = end_time - start_time
print(delta)
[-- Attachment #3: server.py --]
[-- Type: text/plain, Size: 417 bytes --]
#!/bin/python3
import socket
import sys
import struct, fcntl, os
import os, errno, time
import time
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.bind(('127.0.0.1', 999))
sock.listen(10)
conn, addr = sock.accept()
nr_written=0
while 1:
conn.sendall(b"a response line of text")
data = conn.recv(1024)
if not data:
break
# sleep 200us
time.sleep(200/1000000)
nr_written = nr_written+1
next reply other threads:[~2019-05-07 18:57 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-05-07 18:56 Marcelo Tosatti [this message]
2019-05-07 22:15 ` [PATCH] sched: introduce configurable delay before entering idle Peter Zijlstra
2019-05-07 23:44 ` Marcelo Tosatti
2019-05-13 9:20 ` Wanpeng Li
2019-05-13 11:31 ` Konrad Rzeszutek Wilk
2019-05-13 11:51 ` Raslan, KarimAllah
2019-05-13 12:30 ` Boris Ostrovsky
2019-05-15 1:45 ` Wanpeng Li
2019-05-14 13:50 ` Marcelo Tosatti
2019-05-14 15:20 ` Konrad Rzeszutek Wilk
2019-05-14 17:42 ` Marcelo Tosatti
2019-05-15 1:42 ` Wanpeng Li
2019-05-15 20:26 ` Marcelo Tosatti
2019-05-15 18:42 ` Ankur Arora
2019-05-15 20:43 ` Marcelo Tosatti
2019-05-17 4:32 ` Ankur Arora
2019-05-17 17:49 ` Marcelo Tosatti
2019-05-16 1:07 ` Wanpeng Li
2019-05-17 2:06 ` Ankur Arora
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190507185647.GA29409@amt.cnet \
--to=mtosatti@redhat.com \
--cc=aarcange@redhat.com \
--cc=bsd@redhat.com \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@kernel.org \
--cc=pbonzini@redhat.com \
--cc=tglx@linutronix.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).