From: Peter Zijlstra <peterz@infradead.org>
To: mingo@redhat.com, tglx@linutronix.de, juri.lelli@redhat.com,
vincent.guittot@linaro.org, dietmar.eggemann@arm.com,
rostedt@goodmis.org, bsegall@google.com, mgorman@suse.de,
bristot@redhat.com
Cc: linux-kernel@vger.kernel.org, linux-mm@kvack.org,
linux-api@vger.kernel.org, x86@kernel.org, peterz@infradead.org,
pjt@google.com, posk@google.com, avagin@google.com,
jannh@google.com, tdelisle@uwaterloo.ca, mark.rutland@arm.com,
posk@posk.io
Subject: [RFC][PATCH v2 0/5] sched: User Managed Concurrency Groups
Date: Thu, 20 Jan 2022 16:55:17 +0100 [thread overview]
Message-ID: <20220120155517.066795336@infradead.org> (raw)
Latest version, many changes since last time, still under heavy discussion.
Seems to work with the test-case I have (below), but that still has a few gaps,
coverage wise.
Still haven't done the SMP wakeup thing, finally get the idea with
worker-timeouts but haven't yet implemented that.
Happy hacking..
----
#define _GNU_SOURCE
#include <unistd.h>
#include <sys/types.h>
#include <sys/syscall.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <signal.h>
#ifndef __NR_umcg_ctl
#define __NR_umcg_ctl 450
#define __NR_umcg_wait 451
#define __NR_umcg_kick 452
#endif
#include <linux/list.h>
#include "include/uapi/linux/umcg.h"
/* syscall wrappers */
static inline int
sys_umcg_ctl(u32 flags, struct umcg_task *self, clockid_t which_clock)
{
return syscall(__NR_umcg_ctl, flags, self, which_clock);
}
static inline int
sys_umcg_wait(u32 flags, u64 timo)
{
return syscall(__NR_umcg_wait, flags, timo);
}
static inline int
sys_umcg_kick(u32 flags, pid_t tid)
{
return syscall(__NR_umcg_kick, flags, tid);
}
/* the 'foo' scheduler */
struct foo_task {
struct umcg_task task;
struct list_head node;
pid_t tid;
};
struct foo_server {
struct umcg_task task;
struct list_head node;
pid_t tid;
struct foo_task *cur;
int workers;
};
void foo_add(struct foo_server *server, struct umcg_task *t)
{
struct foo_task *foo = container_of(t, struct foo_task, task);
t->runnable_workers_ptr = 0ULL;
list_add_tail(&foo->node, &server->node);
}
struct foo_task *foo_pick_next(struct foo_server *server)
{
struct foo_task *first = NULL;
if (list_empty(&server->node))
return first;
first = list_first_entry(&server->node, struct foo_task, node);
list_del(&first->node);
return first;
}
#define NSEC_PER_SEC 1000000000ULL
u64 foo_time(void)
{
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return (unsigned long long)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
}
void foo_yield(struct umcg_task *self)
{
self->state = UMCG_TASK_RUNNABLE | UMCG_TF_COND_WAIT;
sys_umcg_wait(0, 0);
}
#define TICK_NSEC NSEC_PER_SEC
static volatile bool foo_preemptible = false;
/* our workers */
static volatile bool done = false;
static void umcg_signal(int signr)
{
done = true;
}
/* always running worker */
void *worker_fn0(void *arg)
{
struct foo_server *server = arg;
struct foo_task task = { };
unsigned long i;
int ret;
task.tid = gettid();
task.task.server_tid = server->tid;
task.task.state = UMCG_TASK_BLOCKED;
printf("A == %d\n", gettid());
ret = sys_umcg_ctl(UMCG_CTL_REGISTER|UMCG_CTL_WORKER, &task.task, CLOCK_MONOTONIC);
if (ret) {
perror("umcg_ctl(A): ");
exit(-1);
}
__atomic_add_fetch(&server->workers, 1, __ATOMIC_RELAXED);
while (!done) {
int x = i++;
if (!(x % 1000000)) {
putchar('.');
fflush(stdout);
}
/* co-operative or preemptible */
if (!foo_preemptible && !(x % 10000000))
foo_yield(&task.task);
}
printf("A == done\n");
__atomic_add_fetch(&server->workers, -1, __ATOMIC_RELAXED);
ret = sys_umcg_ctl(UMCG_CTL_UNREGISTER|UMCG_CTL_WORKER, &task.task, 0);
if (ret) {
perror("umcg_ctl(~A): ");
exit(-1);
}
return NULL;
}
/* event driven worker */
void *worker_fn1(void *arg)
{
struct foo_server *server = arg;
struct foo_task task = { };
int ret;
task.tid = gettid();
task.task.server_tid = server->tid;
task.task.state = UMCG_TASK_BLOCKED;
printf("B == %d\n", gettid());
ret = sys_umcg_ctl(UMCG_CTL_REGISTER|UMCG_CTL_WORKER, &task.task, CLOCK_MONOTONIC);
if (ret) {
perror("umcg_ctl(B): ");
exit(-1);
}
__atomic_add_fetch(&server->workers, 1, __ATOMIC_RELAXED);
while (!done) {
printf("B\n");
fflush(stdout);
sleep(1);
}
printf("B == done\n");
__atomic_add_fetch(&server->workers, -1, __ATOMIC_RELAXED);
ret = sys_umcg_ctl(UMCG_CTL_UNREGISTER|UMCG_CTL_WORKER, &task.task, 0);
if (ret) {
perror("umcg_ctl(~B): ");
exit(-1);
}
return NULL;
}
void *worker_fn2(void *arg)
{
struct foo_server *server = arg;
struct foo_task task = { };
int ret;
task.tid = gettid();
task.task.server_tid = server->tid;
task.task.state = UMCG_TASK_BLOCKED;
printf("C == %d\n", gettid());
ret = sys_umcg_ctl(UMCG_CTL_REGISTER|UMCG_CTL_WORKER, &task.task, CLOCK_MONOTONIC);
if (ret) {
perror("umcg_ctl(C): ");
exit(-1);
}
__atomic_add_fetch(&server->workers, 1, __ATOMIC_RELAXED);
while (!done) {
printf("C\n");
fflush(stdout);
sleep(2);
}
printf("C == done\n");
__atomic_add_fetch(&server->workers, -1, __ATOMIC_RELAXED);
ret = sys_umcg_ctl(UMCG_CTL_UNREGISTER|UMCG_CTL_WORKER, &task.task, 0);
if (ret) {
perror("umcg_ctl(~C): ");
exit(-1);
}
return NULL;
}
/* the server */
int main(int argc, char **argv)
{
struct umcg_task *runnable_ptr, *next;
struct foo_server server = { };
pthread_t worker[3];
u64 timeout = 0;
u32 tid;
int ret;
struct sigaction sa = {
.sa_handler = umcg_signal,
};
sigaction(SIGINT, &sa, NULL);
printf("server == %d\n", gettid());
fflush(stdout);
server.tid = gettid();
INIT_LIST_HEAD(&server.node);
server.task.server_tid = gettid();
server.task.state = UMCG_TASK_RUNNING;
ret = sys_umcg_ctl(UMCG_CTL_REGISTER, &server.task, CLOCK_MONOTONIC);
if (ret) {
perror("umcg_ctl: ");
exit(-1);
}
pthread_create(&worker[0], NULL, worker_fn0, &server);
pthread_create(&worker[1], NULL, worker_fn1, &server);
pthread_create(&worker[2], NULL, worker_fn2, &server);
if (argc > 1) {
foo_preemptible = true;
/*
* setup preemption tick
*/
timeout = foo_time() + TICK_NSEC;
}
while (!(done && !__atomic_load_n(&server.workers, __ATOMIC_RELAXED))) {
/*
* Mark the server as runnable first, so we can detect
* additions to the runnable list after we read it.
*/
__atomic_store_n(&server.task.state,
UMCG_TASK_RUNNABLE | UMCG_TF_COND_WAIT,
__ATOMIC_RELAXED);
/*
* comsume the runnable notification list and add
* the tasks to our local runqueue.
*/
runnable_ptr = (void*)__atomic_exchange_n(&server.task.runnable_workers_ptr,
NULL, __ATOMIC_SEQ_CST);
while (runnable_ptr) {
next = (void *)runnable_ptr->runnable_workers_ptr;
foo_add(&server, runnable_ptr);
runnable_ptr = next;
}
if (server.cur && server.cur->task.state == UMCG_TASK_RUNNING) {
/*
* Assert ::next_tid still points there and has RUNNING bit on
*/
if (server.task.next_tid != (server.cur->tid | UMCG_TID_RUNNING)) {
printf("current not running: %d %x\n",
server.task.next_tid & UMCG_TID_MASK,
server.task.next_tid & ~UMCG_TID_MASK);
exit(-1);
}
putchar('x');
} else {
tid = 0;
server.cur = foo_pick_next(&server);
if (server.cur)
tid = server.cur->tid;
__atomic_store_n(&server.task.next_tid, tid, __ATOMIC_RELAXED);
printf("pick: %d\n", tid);
}
fflush(stdout);
ret = sys_umcg_wait(0, timeout);
/*
* If we set ::next_tid but it hasn't been consumed by the
* syscall due to failure, make sure to put the task back on
* the queue, lest we leak it.
*/
tid = __atomic_load_n(&server.task.next_tid, __ATOMIC_RELAXED);
if (tid && !(tid & UMCG_TID_RUNNING)) {
foo_add(&server, &server.cur->task);
server.cur = NULL;
putchar('*');
}
if (!ret)
continue;
switch (errno) {
case EAGAIN:
/*
* Got a wakeup, try again.
*/
continue;
case ETIMEDOUT:
/*
* timeout: drive preemption
*/
putchar('t');
fflush(stdout);
/*
* Next tick..
*/
timeout += TICK_NSEC;
/*
* If we have a current, cmpxchg set TF_PREEMPT and on success
* send it a signal to kick it into the kernel such that
* it might re-report itself runnable.
*/
if (server.cur) {
struct foo_task *t = server.cur;
u32 val = UMCG_TASK_RUNNING;
u32 new = UMCG_TASK_RUNNING | UMCG_TF_PREEMPT;
if (__atomic_compare_exchange_n(&t->task.state, &val, new,
false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
sys_umcg_kick(0, t->tid);
}
}
/*
* Either way around, if the cmpxchg
* failed the task will have blocked
* and we should re-start the loop.
*/
continue;
default:
printf("errno: %d\n", errno);
perror("wait:");
exit(-1);
}
}
pthread_join(worker[0], NULL);
pthread_join(worker[1], NULL);
pthread_join(worker[2], NULL);
return 0;
}
next reply other threads:[~2022-01-20 16:09 UTC|newest]
Thread overview: 47+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-01-20 15:55 Peter Zijlstra [this message]
2022-01-20 15:55 ` [RFC][PATCH v2 1/5] mm: Avoid unmapping pinned pages Peter Zijlstra
2022-01-20 18:03 ` Nadav Amit
2022-01-21 7:59 ` Peter Zijlstra
2022-01-20 18:25 ` David Hildenbrand
2022-01-21 7:51 ` Peter Zijlstra
2022-01-21 8:22 ` David Hildenbrand
2022-01-21 8:59 ` Peter Zijlstra
2022-01-21 9:04 ` David Hildenbrand
2022-01-21 11:40 ` Peter Zijlstra
2022-01-21 12:04 ` David Hildenbrand
2022-01-20 15:55 ` [RFC][PATCH v2 2/5] entry,x86: Create common IRQ operations for exceptions Peter Zijlstra
2022-01-21 16:34 ` Mark Rutland
2022-01-20 15:55 ` [RFC][PATCH v2 3/5] sched/umcg: add WF_CURRENT_CPU and externise ttwu Peter Zijlstra
2022-01-20 15:55 ` [RFC][PATCH v2 4/5] x86/uaccess: Implement unsafe_try_cmpxchg_user() Peter Zijlstra
2022-01-27 2:17 ` Sean Christopherson
2022-01-27 6:36 ` Sean Christopherson
2022-01-27 9:56 ` Peter Zijlstra
2022-01-27 23:33 ` Sean Christopherson
2022-01-28 0:17 ` Nick Desaulniers
2022-01-28 16:29 ` Sean Christopherson
2022-01-27 9:55 ` Peter Zijlstra
2022-01-20 15:55 ` [RFC][PATCH v2 5/5] sched: User Mode Concurency Groups Peter Zijlstra
2022-01-21 11:47 ` Peter Zijlstra
2022-01-21 15:18 ` Peter Zijlstra
2022-01-24 14:29 ` Peter Zijlstra
2022-01-24 16:44 ` Peter Zijlstra
2022-01-24 17:06 ` Peter Oskolkov
2022-01-25 14:59 ` Peter Zijlstra
2022-01-24 13:59 ` Peter Zijlstra
2022-01-21 12:26 ` Peter Zijlstra
2022-01-21 16:57 ` Mark Rutland
2022-01-24 9:48 ` Peter Zijlstra
2022-01-24 10:03 ` Peter Zijlstra
2022-01-24 10:07 ` Peter Zijlstra
2022-01-24 10:27 ` Mark Rutland
2022-01-24 14:46 ` Tao Zhou
2022-01-27 12:19 ` Peter Zijlstra
2022-01-27 18:33 ` Tao Zhou
2022-01-27 12:25 ` Peter Zijlstra
2022-01-27 18:47 ` Tao Zhou
2022-01-27 12:26 ` Peter Zijlstra
2022-01-27 18:31 ` Tao Zhou
2022-01-20 17:28 ` [RFC][PATCH v2 0/5] sched: User Managed Concurrency Groups Peter Oskolkov
2022-01-21 8:01 ` Peter Zijlstra
2022-01-21 18:01 ` Steven Rostedt
2022-01-24 8:20 ` Peter Zijlstra
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220120155517.066795336@infradead.org \
--to=peterz@infradead.org \
--cc=avagin@google.com \
--cc=bristot@redhat.com \
--cc=bsegall@google.com \
--cc=dietmar.eggemann@arm.com \
--cc=jannh@google.com \
--cc=juri.lelli@redhat.com \
--cc=linux-api@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mark.rutland@arm.com \
--cc=mgorman@suse.de \
--cc=mingo@redhat.com \
--cc=pjt@google.com \
--cc=posk@google.com \
--cc=posk@posk.io \
--cc=rostedt@goodmis.org \
--cc=tdelisle@uwaterloo.ca \
--cc=tglx@linutronix.de \
--cc=vincent.guittot@linaro.org \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).