Re: [RFC][PATCH 3/7] printk: introduce per-cpu alt_print seq buffer

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Petr Mladek <pmladek@suse.com>
To: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Jan Kara <jack@suse.cz>,
	Andrew Morton <akpm@linux-foundation.org>,
	Tejun Heo <tj@kernel.org>, Calvin Owens <calvinowens@fb.com>,
	linux-kernel@vger.kernel.org,
	Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Subject: Re: [RFC][PATCH 3/7] printk: introduce per-cpu alt_print seq buffer
Date: Thu, 29 Sep 2016 14:26:40 +0200	[thread overview]
Message-ID: <20160929122639.GD26796@pathway.suse.cz> (raw)
In-Reply-To: <20160927142237.5539-4-sergey.senozhatsky@gmail.com>

On Tue 2016-09-27 23:22:33, Sergey Senozhatsky wrote:
> This patch extends the idea of NMI per-cpu buffers to regions
> that may cause recursive printk() calls and possible deadlocks.
> Namely, printk() can't handle printk calls from schedule code
> or printk() calls from lock debugging code (spin_dump() for instance);
> because those may be called with `sem->lock' already taken or any
> of critical scheduler locks (p->pi_lock, etc.). An example of
> a deadlock can be
> 
>  vprintk_emit()
>   console_unlock()
>    up()                        << raw_spin_lock_irqsave(&sem->lock, flags);
>     wake_up_process()
>      try_to_wake_up()
>       ttwu_queue()
>        ttwu_do_activate()
>         ttwu_activate()
>          activate_task()
>           enqueue_task()
>            enqueue_task_fair()
>             cfs_rq_of()
>              task_of()
>               WARN_ON_ONCE(!entity_is_task(se))
>                vprintk_emit()
>                 console_trylock()
>                  down_trylock()
>                   raw_spin_lock_irqsave(&sem->lock, flags)
>                   ^^^^ deadlock
> 
> and some other cases.
> 
> Just like in NMI implementation, the solution uses a per-cpu
> `printk_func' pointer to 'redirect' printk() calls to a 'safe'
> callback, that store messages in a per-cpu buffer and flushes
> them back to logbuf buffer later.
> 
> Usage example:
> 
>  printk()
>   local_irq_save()
>   alt_printk_enter()

We need to make sure that exit() is called on the same CPU.
Therefore we need to disable preemption as well.

>   //
>   //  any printk() call from here will endup in vprintk_alt(),
>   //  that stores messages in a special per-CPU buffer.
>   //
>   alt_printk_exit()
>   local_irq_restore()
> 
> The patch only adds a alt_printk support, we don't use it yet.
> 
> Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
> ---
>  kernel/printk/alt_printk.c | 160 +++++++++++++++++++++++++++++++++++++++++----
>  kernel/printk/internal.h   |  12 ++++
>  2 files changed, 158 insertions(+), 14 deletions(-)
> 
> diff --git a/kernel/printk/alt_printk.c b/kernel/printk/alt_printk.c
> index 7178661..8978390 100644
> --- a/kernel/printk/alt_printk.c
> +++ b/kernel/printk/alt_printk.c
> @@ -1,5 +1,5 @@
>  /*
> - * alt_printk.c - Safe printk in NMI context
> + * alt_printk.c - Safe printk for printk-deadlock-prone contexts
>   *
>   * This program is free software; you can redistribute it and/or
>   * modify it under the terms of the GNU General Public License
> @@ -53,24 +53,39 @@ struct alt_printk_seq_buf {
>  static DEFINE_PER_CPU(struct alt_printk_seq_buf, nmi_print_seq);
>  
>  /*
> - * Safe printk() for NMI context. It uses a per-CPU buffer to
> - * store the message. NMIs are not nested, so there is always only
> - * one writer running. But the buffer might get flushed from another
> - * CPU, so we need to be careful.
> + * There can be two alt_printk contexts at most - a `normal' alt_printk
> + * and NMI alt_printk context. Normal alt_printk context is the one that
> + * direct caller of printk() setups (either a process or IRQ) and it can
> + * be preempted only by NMI (if the platform supports NMI). NMI context
> + * can preempt normal alt_printk context, but cannot be preempted on its
> + * own.
>   */
> -static int vprintk_nmi(const char *fmt, va_list args)
> +#ifdef CONFIG_PRINTK_NMI
> +#define MAX_ALT_PRINTK_CTX	2
> +#else
> +#define MAX_ALT_PRINTK_CTX	1
> +#endif
> +
> +struct alt_printk_ctx {
> +	atomic_t	idx;
> +	unsigned int	entry_count;
> +	printk_func_t	saved_printk_func[MAX_ALT_PRINTK_CTX];
> +};
> +
> +static DEFINE_PER_CPU(struct alt_printk_seq_buf, alt_print_seq);
> +static DEFINE_PER_CPU(struct alt_printk_ctx, alt_printk_ctx);
> +
> +static int alt_printk_log_store(struct alt_printk_seq_buf *s,
> +		const char *fmt, va_list args)
>  {
> -	struct alt_printk_seq_buf *s = this_cpu_ptr(&nmi_print_seq);
> -	int add = 0;
> +	int add;
>  	size_t len;
>  
>  again:
>  	len = atomic_read(&s->len);
>  
> -	if (len >= sizeof(s->buffer)) {
> -		atomic_inc(&nmi_message_lost);
> +	if (len >= sizeof(s->buffer))
>  		return 0;
> -	}
>  
>  	/*
>  	 * Make sure that all old data have been read before the buffer was
> @@ -99,6 +114,96 @@ static int vprintk_nmi(const char *fmt, va_list args)
>  	return add;
>  }
>  
> +/*
> + * Lockless printk(), to avoid deadlocks should the printk() recurse
> + * into itself. It uses a per-CPU buffer to store the message, just like
> + * NMI.
> + */
> +static int vprintk_alt(const char *fmt, va_list args)
> +{
> +	struct alt_printk_seq_buf *s = this_cpu_ptr(&alt_print_seq);
> +
> +	return alt_printk_log_store(s, fmt, args);
> +}
> +
> +/*
> + * We must keep the track of `printk_func' because alt_printk
> + * context can be preempted by NMI alt_printk context.
> + *
> + * Consider the following example:
> + *
> + * vprintk_emit()
> + * 	alt_printk_enter()
> + * 		printk_func = vprintk_alt;
> + *
> + * -> NMI
> + *  	printk_nmi_enter()
> + *		printk_func = vprintk_nmi;
> + *	printk_nmi_exit()
> + *		printk_func = vprintk_default;
> + *		^^^^^^^^^^^
> + * <- NMI
> + *
> + * 	printk("foo") -> vprintk_default();
> + *
> + * Thus we must restore the orignal `printk_func' value, the one
> + * NMI saw at printk_nmi_enter() time.
> + */
> +static void __lockless_printk_enter(printk_func_t new_func)
> +{
> +	struct alt_printk_ctx *ctx = this_cpu_ptr(&alt_printk_ctx);
> +	int idx = atomic_inc_return(&ctx->idx) - 1;
> +
> +	ctx->saved_printk_func[idx] = this_cpu_read(printk_func);
> +	this_cpu_write(printk_func, new_func);
> +}
> +
> +static void __lockless_printk_exit(void)
> +{
> +	struct alt_printk_ctx *ctx = this_cpu_ptr(&alt_printk_ctx);
> +	int idx = atomic_read(&ctx->idx) - 1;
> +
> +	this_cpu_write(printk_func, ctx->saved_printk_func[idx]);
> +	atomic_dec(&ctx->idx);
> +}
> +
> +/* Local IRQs must be disabled; can be preempted by NMI. */
> +void alt_printk_enter(void)
> +{
> +	struct alt_printk_ctx *ctx = this_cpu_ptr(&alt_printk_ctx);
> +
> +	/*
> +	 * We can't switch `printk_func' while the CPU is flushing its
> +	 * alternative buffers. At the same time, we leave flushing
> +	 * `unprotected', because we always use vprintk_default() there.
> +	 *
> +	 * ->entry_count can detect printk() recursion from flushing context:
> +	 *  -- alt_printk_flush() sets ->entry_count to 1
> +	 *  -- every vprintk_default() call from alt_printk_flush() increments
> +	 *     ->entry_count to 2 when it enters the recursion un-safe region
> +	 *     and decrements it back to 1 when it leaves that region
> +	 *  -- thus, if printk() will recurs from recursion un-safe region, we
> +	 *     will see ->entry_count > 2.
> +	 */
> +	ctx->entry_count++;
> +	if (ctx->entry_count > 1)
> +		return;
> +
> +	/* @TODO: do something sensible in case of printk() recursion */
> +
> +	__lockless_printk_enter(vprintk_alt);
> +}
> +
> +/* Local IRQs must be disabled; can be preempted by NMI. */
> +void alt_printk_exit(void)
> +{
> +	struct alt_printk_ctx *ctx = this_cpu_ptr(&alt_printk_ctx);
> +
> +	if (ctx->entry_count == 1)
> +		__lockless_printk_exit();
> +	ctx->entry_count--;
> +}

Huh, this is complicated like hell. I think that it is like this
because of two reasons. First, it tries to change more values a
lockless way. Second, it tries to be kind of generic.

Note that it is not much generic. idx is increased or decreased only
when entry_count == 1 or when called from nmi_enter()/exit().
Therefore it could not be easily extended to yet another alt buffer.
Also it relies on the fact the nmi_enter()/exit() calls are not
nested.

What do you think about my approach with the printk_context per-CPU
value from the WARN_DEFERRED() patchset? The main idea is that
the entry()/exit() functions manipulate preempt_count-like per-CPU
variable. The printk() function selects the safe implementation
according to the current state.

>  static void alt_printk_flush_line(const char *text, int len)
>  {
>  	/*
> @@ -110,7 +215,6 @@ static void alt_printk_flush_line(const char *text, int len)
>  		printk_deferred("%.*s", len, text);
>  	else
>  		printk("%.*s", len, text);
> -
>  }
>  
>  /*
> @@ -135,6 +239,7 @@ static void __alt_printk_flush(struct irq_work *work)
>  		__RAW_SPIN_LOCK_INITIALIZER(read_lock);
>  	struct alt_printk_seq_buf *s = container_of(work,
>  			struct alt_printk_seq_buf, work);
> +	struct alt_printk_ctx *ctx = this_cpu_ptr(&alt_printk_ctx);
>  	unsigned long flags;
>  	size_t len, size;
>  	int i, last_i;
> @@ -147,6 +252,11 @@ static void __alt_printk_flush(struct irq_work *work)
>  	 * a backtrace.
>  	 */
>  	raw_spin_lock_irqsave(&read_lock, flags);
> +	/*
> +	 * Forbid the alt_printk on this CPU, we want to flush messages to
> +	 * logbuf, not to alt_printk buffer again.
> +	 */
> +	ctx->entry_count++;

This looks very strange. If entry_count is not zero, it means that we
are not in a safe context to flush the alternative buffer. Or do
I miss something, please?
 

My other concern about this approach was that it would spread printk()
messages to even more buffers. But I am not scared anymore. The
new buffer will be used only for printk-internal errors that would
normally cause a deadlock. The important thing is that we are able
to share the implementation handling the extra buffer.

So, I think that this approach have a chance if we clean it a bit.

Best Regards,
Petr

next prev parent reply	other threads:[~2016-09-29 12:26 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-09-27 14:22 [RFC][PATCH 0/7] printk: use alt_printk to handle printk() recursive calls Sergey Senozhatsky
2016-09-27 14:22 ` [RFC][PATCH 1/7] printk: use vprintk_func in vprintk() Sergey Senozhatsky
2016-09-27 14:22 ` [RFC][PATCH 2/7] printk: rename nmi.c and exported api Sergey Senozhatsky
2016-09-27 14:22 ` [RFC][PATCH 3/7] printk: introduce per-cpu alt_print seq buffer Sergey Senozhatsky
2016-09-29 12:26   ` Petr Mladek [this message]
2016-09-30  1:05     ` Sergey Senozhatsky
2016-09-30 11:35       ` Petr Mladek
2016-09-27 14:22 ` [RFC][PATCH 4/7] printk: make alt_printk available when config printk set Sergey Senozhatsky
2016-09-27 14:22 ` [RFC][PATCH 5/7] printk: drop vprintk_func function Sergey Senozhatsky
2016-09-27 14:22 ` [RFC][PATCH 6/7] printk: use alternative printk buffers Sergey Senozhatsky
2016-09-29 13:00   ` Petr Mladek
2016-09-30  1:15     ` Sergey Senozhatsky
2016-09-30 11:15       ` Petr Mladek
2016-10-01  2:48         ` Sergey Senozhatsky
2016-10-04 12:22           ` Petr Mladek
2016-10-05  1:36             ` Sergey Senozhatsky
2016-10-05 10:18               ` Petr Mladek
2016-10-03  7:53         ` Sergey Senozhatsky
2016-10-04 14:52         ` Petr Mladek
2016-10-05  1:27           ` Sergey Senozhatsky
2016-10-05  9:50             ` Petr Mladek
2016-10-06  4:22               ` Sergey Senozhatsky
2016-10-06 11:32                 ` Petr Mladek
2016-10-10  4:09                   ` Sergey Senozhatsky
2016-10-10 11:17                     ` Petr Mladek
2016-10-11  7:35                       ` Sergey Senozhatsky
2016-10-11  9:30                         ` Petr Mladek
2016-09-27 14:22 ` [RFC][PATCH 7/7] printk: new printk() recursion detection Sergey Senozhatsky
2016-09-29 13:19   ` Petr Mladek
2016-09-30  2:00     ` Sergey Senozhatsky
2016-09-29 13:25 ` [RFC][PATCH 0/7] printk: use alt_printk to handle printk() recursive calls Petr Mladek
2016-09-30  2:43   ` Sergey Senozhatsky
2016-09-30 11:27     ` Petr Mladek
2016-10-01  3:02       ` Sergey Senozhatsky
2016-10-04 11:35         ` Petr Mladek

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160929122639.GD26796@pathway.suse.cz \
    --to=pmladek@suse.com \
    --cc=akpm@linux-foundation.org \
    --cc=calvinowens@fb.com \
    --cc=jack@suse.cz \
    --cc=linux-kernel@vger.kernel.org \
    --cc=sergey.senozhatsky.work@gmail.com \
    --cc=sergey.senozhatsky@gmail.com \
    --cc=tj@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.