All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yao Zi <ziyao@disroot.org>
To: Jerome Forissier <jerome.forissier@linaro.org>, u-boot@lists.denx.de
Cc: Ilias Apalodimas <ilias.apalodimas@linaro.org>,
	Tom Rini <trini@konsulko.com>, Simon Glass <sjg@chromium.org>,
	Patrick Rudolph <patrick.rudolph@9elements.com>,
	Sughosh Ganu <sughosh.ganu@linaro.org>,
	Michal Simek <michal.simek@amd.com>,
	Raymond Mao <raymond.mao@linaro.org>,
	Heinrich Schuchardt <xypron.glpk@gmx.de>
Subject: Re: [RFC PATCH v2 1/3] Introduce coroutines framework
Date: Fri, 31 Jan 2025 16:10:34 +0000	[thread overview]
Message-ID: <Z5z1-nHenTmnjukU@pie.lan> (raw)
In-Reply-To: <912af13c205a31080cc61d40bec29d7402185d6e.1738059345.git.jerome.forissier@linaro.org>

On Tue, Jan 28, 2025 at 11:19:15AM +0100, Jerome Forissier wrote:
> Adds the COROUTINES Kconfig symbol which introduces a new internal API
> for coroutines support. As explained in the Kconfig file, this is meant
> to provide some kind of cooperative multi-tasking with the goal to
> improve performance by overlapping lengthy operations.
> 
> The API as well as the implementation is very much inspired from libaco
> [1]. The reference implementation is simplified to remove all things
> not needed in U-Boot, the coding style is updated, and the aco_ prefix
> is replaced by co_.
> 
> I believe the stack handling could be simplified: the stack of the main
> coroutine could probably probably be used by the secondary coroutines
> instead of allocating a new stack dynamically.
> 
> Only i386, x86_64 and aarch64 are supported at the moment. Other
> architectures need to provide a _co_switch() function in assembly.
> 
> Only aarch64 has been tested.
> 
> [1] https://github.com/hnes/libaco/
> 
> Signed-off-by: Jerome Forissier <jerome.forissier@linaro.org>
> ---
>  arch/arm/cpu/armv8/Makefile    |   1 +
>  arch/arm/cpu/armv8/co_switch.S |  36 +++++++
>  include/coroutines.h           | 130 ++++++++++++++++++++++++++
>  lib/Kconfig                    |  10 ++
>  lib/Makefile                   |   2 +
>  lib/coroutines.c               | 165 +++++++++++++++++++++++++++++++++
>  6 files changed, 344 insertions(+)
>  create mode 100644 arch/arm/cpu/armv8/co_switch.S
>  create mode 100644 include/coroutines.h
>  create mode 100644 lib/coroutines.c
> 
> diff --git a/arch/arm/cpu/armv8/Makefile b/arch/arm/cpu/armv8/Makefile
> index 2e71ff2dc97..6d07b6aa9f9 100644
> --- a/arch/arm/cpu/armv8/Makefile
> +++ b/arch/arm/cpu/armv8/Makefile
> @@ -46,3 +46,4 @@ obj-$(CONFIG_TARGET_BCMNS3) += bcmns3/
>  obj-$(CONFIG_XEN) += xen/
>  obj-$(CONFIG_ARMV8_CE_SHA1) += sha1_ce_glue.o sha1_ce_core.o
>  obj-$(CONFIG_ARMV8_CE_SHA256) += sha256_ce_glue.o sha256_ce_core.o
> +obj-$(CONFIG_COROUTINES) += co_switch.o
> diff --git a/arch/arm/cpu/armv8/co_switch.S b/arch/arm/cpu/armv8/co_switch.S
> new file mode 100644
> index 00000000000..4405e89ec56
> --- /dev/null
> +++ b/arch/arm/cpu/armv8/co_switch.S
> @@ -0,0 +1,36 @@
> +/* SPDX-License-Identifier: GPL-2.0+ */
> +/* void _co_switch(struct uco *from_co, struct uco *to_co); */
> +.text
> +.globl _co_switch
> +.type  _co_switch, @function
> +_co_switch:
> +    // x0: from_co
> +    // x1: to_co
> +    // from_co and to_co layout: { pc, sp, x19-x29 }
> +
> +    // Save context to from_co (x0)
> +    // AAPCS64 says "A subroutine invocation must preserve the contents of the
> +    // registers r19-r29 and SP"
> +    adr x2, 1f  // pc we should use to resume after this function
> +    mov x3, sp
> +    stp x2, x3, [x0, #0]  // pc, sp
> +    stp x19, x20, [x0, #16]
> +    stp x21, x22, [x0, #32]
> +    stp x23, x24, [x0, #48]
> +    stp x25, x26, [x0, #64]
> +    stp x27, x28, [x0, #80]
> +    stp x29, x30, [x0, #96]
> +
> +    // Load new context from to_co (x1)
> +    ldp x2, x3, [x1, #0]  // pc, sp
> +    ldp x19, x20, [x1, #16]
> +    ldp x21, x22, [x1, #32]
> +    ldp x23, x24, [x1, #48]
> +    ldp x25, x26, [x1, #64]
> +    ldp x27, x28, [x1, #80]
> +    ldp x29, x30, [x1, #96]
> +    mov sp, x3
> +    br x2
> +
> +1:  // Return to the caller
> +    ret

We've done similar context switching in setjmp/longjmp. Is it possible
to unify this part and get rid of the duplicated assembly for each
architecture?

The jmp_buf structure is actually non-opaque to the caller through
jmp_buf_data, thus I believe this logic could be rewritten in C with
setjmp/longjmp(),

	if (!setjmp(from_co))
		longjmp(to_co);
	else
		return;

and replace co.regs with jmp_buf_data.

btw, I guess the jmp_buf_data type is kept for historical usage in EFI
implementation, but looking through our EFI code it seems the details of
jmp_buf aren't used anymore now. So maybe it's the time to clean up and
make it a better context-switching API as well.

> diff --git a/include/coroutines.h b/include/coroutines.h
> new file mode 100644
> index 00000000000..b85b656127c
> --- /dev/null
> +++ b/include/coroutines.h
> @@ -0,0 +1,130 @@
> +/* SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later */
> +/*
> + * Copyright 2018 Sen Han <00hnes@gmail.com>
> + * Copyright 2025 Linaro Limited
> + */
> +
> +#ifndef _COROUTINES_H_
> +#define _COROUTINES_H_
> +
> +#ifndef CONFIG_COROUTINES
> +
> +static inline void co_yield(void) {}
> +static inline void co_exit(void) {}
> +
> +#else
> +
> +#ifdef __UBOOT__
> +#include <log.h>
> +#else
> +#include <assert.h>
> +#endif
> +#include <limits.h>
> +#include <stdbool.h>
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <time.h>
> +
> +#ifdef __aarch64__
> +#define CO_REG_IDX_RETADDR 0
> +#define CO_REG_IDX_SP 1
> +#else
> +#error Architecture no supported
> +#endif
> +
> +struct co_save_stack {
> +    void*  ptr;
> +    size_t sz;
> +    size_t valid_sz;
> +    size_t max_cpsz; /* max copy size in bytes */
> +};
> +
> +struct co_stack {
> +    void *ptr;
> +    size_t sz;
> +    void *align_highptr;
> +    void *align_retptr;
> +    size_t align_validsz;
> +    size_t align_limit;
> +    struct co *owner;
> +    void *real_ptr;
> +    size_t real_sz;
> +};
> +
> +struct co {
> +	/* CPU state: callee-saved registers plus SP and PC */
> +	void *reg[14];  // pc, sp, x19-x29, x30 (lr)
> +
> +	struct co *main_co;
> +	void *arg;
> +	bool done;
> +
> +	void (*fp)(void);
> +
> +	struct co_save_stack save_stack;
> +	struct co_stack *stack;
> +};
> +
> +extern struct co *current_co;
> +
> +static inline struct co *co_get_co(void)
> +{
> +	return current_co;
> +}
> +
> +static inline void *co_get_arg(void)
> +{
> +	return co_get_co()->arg;
> +}
> +
> +struct co_stack *co_stack_new(size_t sz);
> +
> +void co_stack_destroy(struct co_stack *s);
> +
> +struct co *co_create(struct co *main_co,
> +		     struct co_stack *stack,
> +		     size_t save_stack_sz, void (*fp)(void),
> +		     void *arg);
> +
> +void co_resume(struct co *resume_co);
> +
> +void co_destroy(struct co *co);
> +
> +void *_co_switch(struct co *from_co, struct co *to_co);
> +
> +static inline void _co_yield_to_main_co(struct co *yield_co)
> +{
> +    assert(yield_co);
> +    assert(yield_co->main_co);
> +    _co_switch(yield_co, yield_co->main_co);
> +}
> +
> +static inline void co_yield(void)
> +{
> +	if (current_co)
> +		_co_yield_to_main_co(current_co);
> +}
> +
> +static inline bool co_is_main_co(struct co *co)
> +{
> +	return !co->main_co;
> +}
> +
> +static inline void co_exit(void)
> +{
> +	struct co *co = co_get_co();
> +
> +	if (!co)
> +		return;
> +	co->done = true;
> +	assert(co->stack->owner == co);
> +	co->stack->owner = NULL;
> +	co->stack->align_validsz = 0;
> +	_co_yield_to_main_co(co);
> +	assert(false);
> +}
> +
> +#endif /* CONFIG_COROUTINES */
> +#endif /* _COROUTINES_H_ */
> diff --git a/lib/Kconfig b/lib/Kconfig
> index 8f1a96d98c4..b6c1380b927 100644
> --- a/lib/Kconfig
> +++ b/lib/Kconfig
> @@ -1226,6 +1226,16 @@ config PHANDLE_CHECK_SEQ
>  	  enable this config option to distinguish them using
>  	  phandles in fdtdec_get_alias_seq() function.
>  
> +config COROUTINES
> +	bool "Enable coroutine support"
> +	help
> +	  Coroutines allow to implement a simple form of cooperative
> +	  multi-tasking. The main thread of execution registers one or
> +	  more functions as coroutine entry points, then it schedules one
> +	  of them. At any point the scheduled coroutine may yield, that is,
> +	  suspend its execution and return back to the main thread. At this
> +	  point another coroutine may be scheduled and so on until all the
> +	  registered coroutines are done.
>  endmenu
>  
>  source "lib/fwu_updates/Kconfig"
> diff --git a/lib/Makefile b/lib/Makefile
> index 5cb3278d2ef..7b809151f5a 100644
> --- a/lib/Makefile
> +++ b/lib/Makefile
> @@ -159,6 +159,8 @@ obj-$(CONFIG_LIB_ELF) += elf.o
>  
>  obj-$(CONFIG_$(PHASE_)SEMIHOSTING) += semihosting.o
>  
> +obj-$(CONFIG_COROUTINES) += coroutines.o
> +
>  #
>  # Build a fast OID lookup registry from include/linux/oid_registry.h
>  #
> diff --git a/lib/coroutines.c b/lib/coroutines.c
> new file mode 100644
> index 00000000000..20c5aba5510
> --- /dev/null
> +++ b/lib/coroutines.c
> @@ -0,0 +1,165 @@
> +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
> +
> +// Copyright 2018 Sen Han <00hnes@gmail.com>
> +// Copyright 2025 Linaro Limited
> +
> +#include <coroutines.h>
> +#include <stdio.h>
> +#include <stdint.h>
> +
> +
> +/* Current co-routine */
> +struct co *current_co;
> +
> +struct co_stack *co_stack_new(size_t sz)
> +{
> +	struct co_stack *p = calloc(1, sizeof(*p));
> +	uintptr_t u_p;
> +
> +	if (!p)
> +		return NULL;
> +
> +	if (sz < 4096)
> +		sz = 4096;
> +
> +	p->sz = sz;
> +	p->ptr = malloc(sz);
> +	if (!p->ptr) {
> +		free(p);
> +		return NULL;
> +	}
> +
> +	p->owner = NULL;
> +	u_p = (uintptr_t)(p->sz - (sizeof(void*) << 1) + (uintptr_t)p->ptr);
> +	u_p = (u_p >> 4) << 4;
> +	p->align_highptr = (void*)u_p;
> +	p->align_retptr  = (void*)(u_p - sizeof(void*));
> +	assert(p->sz > (16 + (sizeof(void*) << 1) + sizeof(void*)));
> +	p->align_limit = p->sz - 16 - (sizeof(void*) << 1);
> +
> +	return p;
> +}
> +
> +void co_stack_destroy(struct co_stack *s){
> +	if (!s)
> +		return;
> +	free(s->ptr);
> +	free(s);
> +}
> +
> +struct co *co_create(struct co *main_co,
> +		     struct co_stack *stack,
> +		     size_t save_stack_sz,
> +		     void (*fp)(void), void *arg)
> +{
> +	struct co *p = malloc(sizeof(*p));
> +	assert(p);
> +	memset(p, 0, sizeof(*p));
> +
> +	if (main_co) {
> +		assert(stack);
> +		p->stack = stack;
> +		p->reg[CO_REG_IDX_RETADDR] = (void *)fp;
> +		// FIXME original code uses align_retptr; causes a crash
> +		p->reg[CO_REG_IDX_SP] = p->stack->align_highptr;
> +		p->main_co = main_co;
> +		p->arg = arg;
> +		p->fp = fp;
> +		if (!save_stack_sz)
> +			save_stack_sz = 64;
> +		p->save_stack.ptr = malloc(save_stack_sz);
> +		assert(p->save_stack.ptr);
> +		p->save_stack.sz = save_stack_sz;
> +		p->save_stack.valid_sz = 0;
> +	} else {
> +		p->main_co = NULL;
> +		p->arg = arg;
> +		p->fp = fp;
> +		p->stack = NULL;
> +		p->save_stack.ptr = NULL;
> +	}
> +	return p;
> +}
> +
> +static void grab_stack(struct co *resume_co)
> +{
> +	struct co *owner_co = resume_co->stack->owner;
> +
> +	if (owner_co) {
> +		assert(owner_co->stack == resume_co->stack);
> +		assert((uintptr_t)(owner_co->stack->align_retptr) >=
> +		       (uintptr_t)(owner_co->reg[CO_REG_IDX_SP]));
> +		assert((uintptr_t)owner_co->stack->align_highptr -
> +				(uintptr_t)owner_co->stack->align_limit
> +			<= (uintptr_t)owner_co->reg[CO_REG_IDX_SP]);
> +		owner_co->save_stack.valid_sz =
> +			(uintptr_t)owner_co->stack->align_retptr -
> +			(uintptr_t)owner_co->reg[CO_REG_IDX_SP];
> +		if (owner_co->save_stack.sz < owner_co->save_stack.valid_sz) {
> +			free(owner_co->save_stack.ptr);
> +			owner_co->save_stack.ptr = NULL;
> +			do {
> +				owner_co->save_stack.sz <<= 1;
> +				assert(owner_co->save_stack.sz > 0);
> +			} while (owner_co->save_stack.sz <
> +				 owner_co->save_stack.valid_sz);
> +			owner_co->save_stack.ptr =
> +				malloc(owner_co->save_stack.sz);
> +			assert(owner_co->save_stack.ptr);
> +		}
> +		if (owner_co->save_stack.valid_sz > 0)
> +			memcpy(owner_co->save_stack.ptr,
> +			       owner_co->reg[CO_REG_IDX_SP],
> +			       owner_co->save_stack.valid_sz);
> +		if (owner_co->save_stack.valid_sz >
> +		    owner_co->save_stack.max_cpsz)
> +			owner_co->save_stack.max_cpsz =
> +				owner_co->save_stack.valid_sz;
> +		owner_co->stack->owner = NULL;
> +		owner_co->stack->align_validsz = 0;
> +	}
> +	assert(!resume_co->stack->owner);
> +	assert(resume_co->save_stack.valid_sz <=
> +	       resume_co->stack->align_limit - sizeof(void *));
> +	if (resume_co->save_stack.valid_sz > 0)
> +		memcpy((void*)
> +		       (uintptr_t)(resume_co->stack->align_retptr) -
> +				resume_co->save_stack.valid_sz,
> +		       resume_co->save_stack.ptr,
> +		       resume_co->save_stack.valid_sz);
> +	if (resume_co->save_stack.valid_sz > resume_co->save_stack.max_cpsz)
> +		resume_co->save_stack.max_cpsz = resume_co->save_stack.valid_sz;
> +	resume_co->stack->align_validsz =
> +		resume_co->save_stack.valid_sz + sizeof(void *);
> +	resume_co->stack->owner = resume_co;
> +}
> +
> +void co_resume(struct co *resume_co)
> +{
> +	assert(resume_co && resume_co->main_co && !resume_co->done);
> +
> +	if (resume_co->stack->owner != resume_co)
> +		grab_stack(resume_co);
> +
> +	current_co = resume_co;
> +	_co_switch(resume_co->main_co, resume_co);
> +	current_co = resume_co->main_co;
> +}
> +
> +void co_destroy(struct co *co){
> +	if (!co)
> +		return;
> +
> +	if(co_is_main_co(co)){
> +		free(co);
> +		current_co = NULL;
> +	} else {
> +		if(co->stack->owner == co){
> +			co->stack->owner = NULL;
> +			co->stack->align_validsz = 0;
> +		}
> +		free(co->save_stack.ptr);
> +		co->save_stack.ptr = NULL;
> +		free(co);
> +	}
> +}
> -- 
> 2.43.0
> 

  parent reply	other threads:[~2025-01-31 17:08 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-01-28 10:19 [RFC PATCH v2 0/3] Coroutines Jerome Forissier
2025-01-28 10:19 ` [RFC PATCH v2 1/3] Introduce coroutines framework Jerome Forissier
2025-01-28 11:09   ` Michal Simek
2025-01-28 13:30     ` Jerome Forissier
2025-01-31 16:10   ` Yao Zi [this message]
2025-02-03 11:57     ` Ahmad Fatoum
2025-01-28 10:19 ` [RFC PATCH v2 2/3] efi_loader: optimize efi_init_obj_list() with coroutines Jerome Forissier
2025-01-28 10:19 ` [RFC PATCH v2 3/3] usb: scan multiple buses simultaneously " Jerome Forissier
2025-01-28 11:58   ` Michal Simek
2025-01-28 13:53     ` Jerome Forissier
2025-01-28 21:36   ` Marek Vasut
2025-01-28 14:04 ` [RFC PATCH v2 0/3] Coroutines Simon Glass

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Z5z1-nHenTmnjukU@pie.lan \
    --to=ziyao@disroot.org \
    --cc=ilias.apalodimas@linaro.org \
    --cc=jerome.forissier@linaro.org \
    --cc=michal.simek@amd.com \
    --cc=patrick.rudolph@9elements.com \
    --cc=raymond.mao@linaro.org \
    --cc=sjg@chromium.org \
    --cc=sughosh.ganu@linaro.org \
    --cc=trini@konsulko.com \
    --cc=u-boot@lists.denx.de \
    --cc=xypron.glpk@gmx.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.