[RFC][PATCH v2 0/5] sched: User Managed Concurrency Groups

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Peter Zijlstra <peterz@infradead.org>
To: mingo@redhat.com, tglx@linutronix.de, juri.lelli@redhat.com,
	vincent.guittot@linaro.org, dietmar.eggemann@arm.com,
	rostedt@goodmis.org, bsegall@google.com, mgorman@suse.de,
	bristot@redhat.com
Cc: linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	linux-api@vger.kernel.org, x86@kernel.org, peterz@infradead.org,
	pjt@google.com, posk@google.com, avagin@google.com,
	jannh@google.com, tdelisle@uwaterloo.ca, mark.rutland@arm.com,
	posk@posk.io
Subject: [RFC][PATCH v2 0/5] sched: User Managed Concurrency Groups
Date: Thu, 20 Jan 2022 16:55:17 +0100	[thread overview]
Message-ID: <20220120155517.066795336@infradead.org> (raw)

Latest version, many changes since last time, still under heavy discussion.

Seems to work with the test-case I have (below), but that still has a few gaps,
coverage wise.

Still haven't done the SMP wakeup thing, finally get the idea with
worker-timeouts but haven't yet implemented that.

Happy hacking..

----

#define _GNU_SOURCE
#include <unistd.h>
#include <sys/types.h>
#include <sys/syscall.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <signal.h>

#ifndef __NR_umcg_ctl
#define __NR_umcg_ctl  450
#define __NR_umcg_wait 451
#define __NR_umcg_kick 452
#endif

#include <linux/list.h>
#include "include/uapi/linux/umcg.h"

/* syscall wrappers */

static inline int
sys_umcg_ctl(u32 flags, struct umcg_task *self, clockid_t which_clock)
{
	return syscall(__NR_umcg_ctl, flags, self, which_clock);
}

static inline int
sys_umcg_wait(u32 flags, u64 timo)
{
	return syscall(__NR_umcg_wait, flags, timo);
}

static inline int
sys_umcg_kick(u32 flags, pid_t tid)
{
	return syscall(__NR_umcg_kick, flags, tid);
}

/* the 'foo' scheduler */

struct foo_task {
	struct umcg_task	task;
	struct list_head	node;
	pid_t			tid;
};

struct foo_server {
	struct umcg_task	task;
	struct list_head	node;
	pid_t			tid;
	struct foo_task		*cur;
	int			workers;
};

void foo_add(struct foo_server *server, struct umcg_task *t)
{
	struct foo_task *foo = container_of(t, struct foo_task, task);

	t->runnable_workers_ptr = 0ULL;
	list_add_tail(&foo->node, &server->node);
}

struct foo_task *foo_pick_next(struct foo_server *server)
{
	struct foo_task *first = NULL;

	if (list_empty(&server->node))
		return first;

	first = list_first_entry(&server->node, struct foo_task, node);
	list_del(&first->node);
	return first;
}

#define NSEC_PER_SEC 1000000000ULL

u64 foo_time(void)
{
	struct timespec ts;
	clock_gettime(CLOCK_MONOTONIC, &ts);
	return (unsigned long long)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
}

void foo_yield(struct umcg_task *self)
{
	self->state = UMCG_TASK_RUNNABLE | UMCG_TF_COND_WAIT;
	sys_umcg_wait(0, 0);
}

#define TICK_NSEC NSEC_PER_SEC

static volatile bool foo_preemptible = false;

/* our workers */

static volatile bool done = false;

static void umcg_signal(int signr)
{
	done = true;
}

/* always running worker */
void *worker_fn0(void *arg)
{
	struct foo_server *server = arg;
	struct foo_task task = { };
	unsigned long i;
	int ret;

	task.tid = gettid();
	task.task.server_tid = server->tid;
	task.task.state = UMCG_TASK_BLOCKED;

	printf("A == %d\n", gettid());

	ret = sys_umcg_ctl(UMCG_CTL_REGISTER|UMCG_CTL_WORKER, &task.task, CLOCK_MONOTONIC);
	if (ret) {
		perror("umcg_ctl(A): ");
		exit(-1);
	}

	__atomic_add_fetch(&server->workers, 1, __ATOMIC_RELAXED);

	while (!done) {
		int x = i++;

		if (!(x % 1000000)) {
			putchar('.');
			fflush(stdout);
		}

		/* co-operative or preemptible */
		if (!foo_preemptible && !(x % 10000000))
			foo_yield(&task.task);
	}

	printf("A == done\n");

	__atomic_add_fetch(&server->workers, -1, __ATOMIC_RELAXED);

	ret = sys_umcg_ctl(UMCG_CTL_UNREGISTER|UMCG_CTL_WORKER, &task.task, 0);
	if (ret) {
		perror("umcg_ctl(~A): ");
		exit(-1);
	}

	return NULL;
}

/* event driven worker */
void *worker_fn1(void *arg)
{
	struct foo_server *server = arg;
	struct foo_task task = { };
	int ret;

	task.tid = gettid();
	task.task.server_tid = server->tid;
	task.task.state = UMCG_TASK_BLOCKED;

	printf("B == %d\n", gettid());

	ret = sys_umcg_ctl(UMCG_CTL_REGISTER|UMCG_CTL_WORKER, &task.task, CLOCK_MONOTONIC);
	if (ret) {
		perror("umcg_ctl(B): ");
		exit(-1);
	}

	__atomic_add_fetch(&server->workers, 1, __ATOMIC_RELAXED);

	while (!done) {
		printf("B\n");
		fflush(stdout);

		sleep(1);
	}

	printf("B == done\n");

	__atomic_add_fetch(&server->workers, -1, __ATOMIC_RELAXED);

	ret = sys_umcg_ctl(UMCG_CTL_UNREGISTER|UMCG_CTL_WORKER, &task.task, 0);
	if (ret) {
		perror("umcg_ctl(~B): ");
		exit(-1);
	}

	return NULL;
}

void *worker_fn2(void *arg)
{
	struct foo_server *server = arg;
	struct foo_task task = { };
	int ret;

	task.tid = gettid();
	task.task.server_tid = server->tid;
	task.task.state = UMCG_TASK_BLOCKED;

	printf("C == %d\n", gettid());

	ret = sys_umcg_ctl(UMCG_CTL_REGISTER|UMCG_CTL_WORKER, &task.task, CLOCK_MONOTONIC);
	if (ret) {
		perror("umcg_ctl(C): ");
		exit(-1);
	}

	__atomic_add_fetch(&server->workers, 1, __ATOMIC_RELAXED);

	while (!done) {
		printf("C\n");
		fflush(stdout);

		sleep(2);
	}

	printf("C == done\n");

	__atomic_add_fetch(&server->workers, -1, __ATOMIC_RELAXED);

	ret = sys_umcg_ctl(UMCG_CTL_UNREGISTER|UMCG_CTL_WORKER, &task.task, 0);
	if (ret) {
		perror("umcg_ctl(~C): ");
		exit(-1);
	}

	return NULL;
}

/* the server */

int main(int argc, char **argv)
{
	struct umcg_task *runnable_ptr, *next;
	struct foo_server server = { };
	pthread_t worker[3];
	u64 timeout = 0;
	u32 tid;
	int ret;

	struct sigaction sa = {
		.sa_handler = umcg_signal,
	};

	sigaction(SIGINT, &sa, NULL);

	printf("server == %d\n", gettid());
	fflush(stdout);

	server.tid = gettid();
	INIT_LIST_HEAD(&server.node);
	server.task.server_tid = gettid();
	server.task.state = UMCG_TASK_RUNNING;

	ret = sys_umcg_ctl(UMCG_CTL_REGISTER, &server.task, CLOCK_MONOTONIC);
	if (ret) {
		perror("umcg_ctl: ");
		exit(-1);
	}

	pthread_create(&worker[0], NULL, worker_fn0, &server);
	pthread_create(&worker[1], NULL, worker_fn1, &server);
	pthread_create(&worker[2], NULL, worker_fn2, &server);

	if (argc > 1) {
		foo_preemptible = true;
		/*
		 * setup preemption tick
		 */
		timeout = foo_time() + TICK_NSEC;
	}

	while (!(done && !__atomic_load_n(&server.workers, __ATOMIC_RELAXED))) {
		/*
		 * Mark the server as runnable first, so we can detect
		 * additions to the runnable list after we read it.
		 */
		__atomic_store_n(&server.task.state,
				 UMCG_TASK_RUNNABLE | UMCG_TF_COND_WAIT,
				 __ATOMIC_RELAXED);

		/*
		 * comsume the runnable notification list and add
		 * the tasks to our local runqueue.
		 */
		runnable_ptr = (void*)__atomic_exchange_n(&server.task.runnable_workers_ptr,
							  NULL, __ATOMIC_SEQ_CST);
		while (runnable_ptr) {
			next = (void *)runnable_ptr->runnable_workers_ptr;
			foo_add(&server, runnable_ptr);
			runnable_ptr = next;
		}

		if (server.cur && server.cur->task.state == UMCG_TASK_RUNNING) {
			/*
			 * Assert ::next_tid still points there and has RUNNING bit on
			 */
			if (server.task.next_tid != (server.cur->tid | UMCG_TID_RUNNING)) {
				printf("current not running: %d %x\n",
				       server.task.next_tid & UMCG_TID_MASK,
				       server.task.next_tid & ~UMCG_TID_MASK);
				exit(-1);
			}

			putchar('x');
		} else {
			tid = 0;
			server.cur = foo_pick_next(&server);
			if (server.cur)
				tid = server.cur->tid;

			__atomic_store_n(&server.task.next_tid, tid, __ATOMIC_RELAXED);

			printf("pick: %d\n", tid);
		}
		fflush(stdout);

		ret = sys_umcg_wait(0, timeout);

		/*
		 * If we set ::next_tid but it hasn't been consumed by the
		 * syscall due to failure, make sure to put the task back on
		 * the queue, lest we leak it.
		 */
		tid = __atomic_load_n(&server.task.next_tid, __ATOMIC_RELAXED);
		if (tid && !(tid & UMCG_TID_RUNNING)) {
			foo_add(&server, &server.cur->task);
			server.cur = NULL;
			putchar('*');
		}

		if (!ret)
			continue;

		switch (errno) {
		case EAGAIN:
			/*
			 * Got a wakeup, try again.
			 */
			continue;

		case ETIMEDOUT:
			/*
			 * timeout: drive preemption
			 */
			putchar('t');
			fflush(stdout);

			/*
			 * Next tick..
			 */
			timeout += TICK_NSEC;

			/*
			 * If we have a current, cmpxchg set TF_PREEMPT and on success
			 * send it a signal to kick it into the kernel such that
			 * it might re-report itself runnable.
			 */
			if (server.cur) {
				struct foo_task *t = server.cur;
				u32 val = UMCG_TASK_RUNNING;
				u32 new = UMCG_TASK_RUNNING | UMCG_TF_PREEMPT;

				if (__atomic_compare_exchange_n(&t->task.state, &val, new,
								false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
					sys_umcg_kick(0, t->tid);
				}
			}
			/*
			 * Either way around, if the cmpxchg
			 * failed the task will have blocked
			 * and we should re-start the loop.
			 */
			continue;

		default:
			printf("errno: %d\n", errno);
			perror("wait:");
			exit(-1);
		}
	}

	pthread_join(worker[0], NULL);
	pthread_join(worker[1], NULL);
	pthread_join(worker[2], NULL);

	return 0;
}

next             reply	other threads:[~2022-01-20 16:09 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-01-20 15:55 Peter Zijlstra [this message]
2022-01-20 15:55 ` [RFC][PATCH v2 1/5] mm: Avoid unmapping pinned pages Peter Zijlstra
2022-01-20 18:03   ` Nadav Amit
2022-01-21  7:59     ` Peter Zijlstra
2022-01-20 18:25   ` David Hildenbrand
2022-01-21  7:51     ` Peter Zijlstra
2022-01-21  8:22       ` David Hildenbrand
2022-01-21  8:59       ` Peter Zijlstra
2022-01-21  9:04         ` David Hildenbrand
2022-01-21 11:40           ` Peter Zijlstra
2022-01-21 12:04             ` David Hildenbrand
2022-01-20 15:55 ` [RFC][PATCH v2 2/5] entry,x86: Create common IRQ operations for exceptions Peter Zijlstra
2022-01-21 16:34   ` Mark Rutland
2022-01-20 15:55 ` [RFC][PATCH v2 3/5] sched/umcg: add WF_CURRENT_CPU and externise ttwu Peter Zijlstra
2022-01-20 15:55 ` [RFC][PATCH v2 4/5] x86/uaccess: Implement unsafe_try_cmpxchg_user() Peter Zijlstra
2022-01-27  2:17   ` Sean Christopherson
2022-01-27  6:36     ` Sean Christopherson
2022-01-27  9:56       ` Peter Zijlstra
2022-01-27 23:33         ` Sean Christopherson
2022-01-28  0:17           ` Nick Desaulniers
2022-01-28 16:29             ` Sean Christopherson
2022-01-27  9:55     ` Peter Zijlstra
2022-01-20 15:55 ` [RFC][PATCH v2 5/5] sched: User Mode Concurency Groups Peter Zijlstra
2022-01-21 11:47   ` Peter Zijlstra
2022-01-21 15:18     ` Peter Zijlstra
2022-01-24 14:29       ` Peter Zijlstra
2022-01-24 16:44         ` Peter Zijlstra
2022-01-24 17:06           ` Peter Oskolkov
2022-01-25 14:59         ` Peter Zijlstra
2022-01-24 13:59     ` Peter Zijlstra
2022-01-21 12:26   ` Peter Zijlstra
2022-01-21 16:57   ` Mark Rutland
2022-01-24  9:48     ` Peter Zijlstra
2022-01-24 10:03     ` Peter Zijlstra
2022-01-24 10:07       ` Peter Zijlstra
2022-01-24 10:27         ` Mark Rutland
2022-01-24 14:46   ` Tao Zhou
2022-01-27 12:19     ` Peter Zijlstra
2022-01-27 18:33       ` Tao Zhou
2022-01-27 12:25     ` Peter Zijlstra
2022-01-27 18:47       ` Tao Zhou
2022-01-27 12:26     ` Peter Zijlstra
2022-01-27 18:31   ` Tao Zhou
2022-01-20 17:28 ` [RFC][PATCH v2 0/5] sched: User Managed Concurrency Groups Peter Oskolkov
2022-01-21  8:01   ` Peter Zijlstra
2022-01-21 18:01 ` Steven Rostedt
2022-01-24  8:20   ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220120155517.066795336@infradead.org \
    --to=peterz@infradead.org \
    --cc=avagin@google.com \
    --cc=bristot@redhat.com \
    --cc=bsegall@google.com \
    --cc=dietmar.eggemann@arm.com \
    --cc=jannh@google.com \
    --cc=juri.lelli@redhat.com \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mark.rutland@arm.com \
    --cc=mgorman@suse.de \
    --cc=mingo@redhat.com \
    --cc=pjt@google.com \
    --cc=posk@google.com \
    --cc=posk@posk.io \
    --cc=rostedt@goodmis.org \
    --cc=tdelisle@uwaterloo.ca \
    --cc=tglx@linutronix.de \
    --cc=vincent.guittot@linaro.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.