Linux Serial subsystem development
 help / color / mirror / Atom feed
* [RFC PATCH v1 15/25] printk: print history for new consoles
From: John Ogness @ 2019-02-12 14:29 UTC (permalink / raw)
  To: linux-kernel
  Cc: Peter Zijlstra, Petr Mladek, Sergey Senozhatsky, Steven Rostedt,
	Daniel Wang, Andrew Morton, Linus Torvalds, Greg Kroah-Hartman,
	Alan Cox, Jiri Slaby, Peter Feiner, linux-serial,
	Sergey Senozhatsky
In-Reply-To: <20190212143003.48446-1-john.ogness@linutronix.de>

When new consoles register, they currently print how many messages
they have missed. However, many (or all) of those messages may still
be in the ring buffer. Add functionality to print as much of the
history as available. This is a clean replacement of the old
exclusive console hack.

Signed-off-by: John Ogness <john.ogness@linutronix.de>
---
 include/linux/console.h |  1 +
 kernel/printk/printk.c  | 75 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 76 insertions(+)

diff --git a/include/linux/console.h b/include/linux/console.h
index 7fa06a058339..633fb741e871 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -154,6 +154,7 @@ struct console {
 	short	index;
 	int	cflag;
 	unsigned long printk_seq;
+	int	wrote_history;
 	void	*data;
 	struct	 console *next;
 };
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 897219f34cab..6c875abd7b17 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1506,6 +1506,77 @@ static void format_text(struct printk_log *msg, u64 seq,
 	}
 }
 
+static void printk_write_history(struct console *con, u64 master_seq)
+{
+	struct prb_iterator iter;
+	bool time = printk_time;
+	static char *ext_text;
+	static char *text;
+	static char *buf;
+	u64 seq;
+
+	ext_text = kmalloc(CONSOLE_EXT_LOG_MAX, GFP_KERNEL);
+	text = kmalloc(PRINTK_SPRINT_MAX, GFP_KERNEL);
+	buf = kmalloc(PRINTK_RECORD_MAX, GFP_KERNEL);
+	if (!ext_text || !text || !buf)
+		return;
+
+	if (!(con->flags & CON_ENABLED))
+		goto out;
+
+	if (!con->write)
+		goto out;
+
+	if (!cpu_online(raw_smp_processor_id()) &&
+	    !(con->flags & CON_ANYTIME))
+		goto out;
+
+	prb_iter_init(&iter, &printk_rb, NULL);
+
+	for (;;) {
+		struct printk_log *msg;
+		size_t ext_len;
+		size_t len;
+		int ret;
+
+		ret = prb_iter_next(&iter, buf, PRINTK_RECORD_MAX, &seq);
+		if (ret == 0) {
+			break;
+		} else if (ret < 0) {
+			prb_iter_init(&iter, &printk_rb, NULL);
+			continue;
+		}
+
+		if (seq > master_seq)
+			break;
+
+		con->printk_seq++;
+		if (con->printk_seq < seq) {
+			print_console_dropped(con, seq - con->printk_seq);
+			con->printk_seq = seq;
+		}
+
+		msg = (struct printk_log *)buf;
+		format_text(msg, master_seq, ext_text, &ext_len, text,
+			    &len, time);
+
+		if (len == 0 && ext_len == 0)
+			continue;
+
+		if (con->flags & CON_EXTENDED)
+			con->write(con, ext_text, ext_len);
+		else
+			con->write(con, text, len);
+
+		printk_delay(msg->level);
+	}
+out:
+	con->wrote_history = 1;
+	kfree(ext_text);
+	kfree(text);
+	kfree(buf);
+}
+
 /*
  * Call the console drivers, asking them to write out
  * log_buf[start] to log_buf[end - 1].
@@ -1524,6 +1595,10 @@ static void call_console_drivers(u64 seq, const char *ext_text, size_t ext_len,
 	for_each_console(con) {
 		if (!(con->flags & CON_ENABLED))
 			continue;
+		if (!con->wrote_history) {
+			printk_write_history(con, seq);
+			continue;
+		}
 		if (!con->write)
 			continue;
 		if (!cpu_online(raw_smp_processor_id()) &&
-- 
2.11.0

^ permalink raw reply related

* [RFC PATCH v1 14/25] printk: do boot_delay_msec inside printk_delay
From: John Ogness @ 2019-02-12 14:29 UTC (permalink / raw)
  To: linux-kernel
  Cc: Peter Zijlstra, Petr Mladek, Sergey Senozhatsky, Steven Rostedt,
	Daniel Wang, Andrew Morton, Linus Torvalds, Greg Kroah-Hartman,
	Alan Cox, Jiri Slaby, Peter Feiner, linux-serial,
	Sergey Senozhatsky
In-Reply-To: <20190212143003.48446-1-john.ogness@linutronix.de>

Both functions needed to be called one after the other, so just
integrate boot_delay_msec into printk_delay for simplification.

Signed-off-by: John Ogness <john.ogness@linutronix.de>
---
 kernel/printk/printk.c | 35 +++++++++++++++++------------------
 1 file changed, 17 insertions(+), 18 deletions(-)

diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index ebd9aac06323..897219f34cab 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1453,6 +1453,21 @@ SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len)
 	return do_syslog(type, buf, len, SYSLOG_FROM_READER);
 }
 
+int printk_delay_msec __read_mostly;
+
+static inline void printk_delay(int level)
+{
+	boot_delay_msec(level);
+	if (unlikely(printk_delay_msec)) {
+		int m = printk_delay_msec;
+
+		while (m--) {
+			mdelay(1);
+			touch_nmi_watchdog();
+		}
+	}
+}
+
 static void print_console_dropped(struct console *con, u64 count)
 {
 	char text[64];
@@ -1534,20 +1549,6 @@ static void call_console_drivers(u64 seq, const char *ext_text, size_t ext_len,
 	}
 }
 
-int printk_delay_msec __read_mostly;
-
-static inline void printk_delay(void)
-{
-	if (unlikely(printk_delay_msec)) {
-		int m = printk_delay_msec;
-
-		while (m--) {
-			mdelay(1);
-			touch_nmi_watchdog();
-		}
-	}
-}
-
 /* FIXME: no support for LOG_CONT */
 #if 0
 /*
@@ -2506,10 +2507,8 @@ static int printk_kthread_func(void *data)
 		console_lock();
 		call_console_drivers(master_seq, ext_text,
 				     ext_len, text, len);
-		if (len > 0 || ext_len > 0) {
-			boot_delay_msec(msg->level);
-			printk_delay();
-		}
+		if (len > 0 || ext_len > 0)
+			printk_delay(msg->level);
 		console_unlock();
 	}
 
-- 
2.11.0

^ permalink raw reply related

* [RFC PATCH v1 13/25] printk: track seq per console
From: John Ogness @ 2019-02-12 14:29 UTC (permalink / raw)
  To: linux-kernel
  Cc: Peter Zijlstra, Petr Mladek, Sergey Senozhatsky, Steven Rostedt,
	Daniel Wang, Andrew Morton, Linus Torvalds, Greg Kroah-Hartman,
	Alan Cox, Jiri Slaby, Peter Feiner, linux-serial,
	Sergey Senozhatsky
In-Reply-To: <20190212143003.48446-1-john.ogness@linutronix.de>

Allow each console to track which seq record was last printed. This
simplifies identifying dropped records.

Signed-off-by: John Ogness <john.ogness@linutronix.de>
---
 include/linux/console.h |  1 +
 kernel/printk/printk.c  | 30 +++++++++++++++++++++++++++---
 2 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/include/linux/console.h b/include/linux/console.h
index ec9bdb3d7bab..7fa06a058339 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -153,6 +153,7 @@ struct console {
 	short	flags;
 	short	index;
 	int	cflag;
+	unsigned long printk_seq;
 	void	*data;
 	struct	 console *next;
 };
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index ece54c24ea0d..ebd9aac06323 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1453,6 +1453,16 @@ SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len)
 	return do_syslog(type, buf, len, SYSLOG_FROM_READER);
 }
 
+static void print_console_dropped(struct console *con, u64 count)
+{
+	char text[64];
+	int len;
+
+	len = sprintf(text, "** %llu printk message%s dropped **\n",
+		      count, count > 1 ? "s" : "");
+	con->write(con, text, len);
+}
+
 static void format_text(struct printk_log *msg, u64 seq,
 			char *ext_text, size_t *ext_len,
 			char *text, size_t *len, bool time)
@@ -1486,7 +1496,7 @@ static void format_text(struct printk_log *msg, u64 seq,
  * log_buf[start] to log_buf[end - 1].
  * The console_lock must be held.
  */
-static void call_console_drivers(const char *ext_text, size_t ext_len,
+static void call_console_drivers(u64 seq, const char *ext_text, size_t ext_len,
 				 const char *text, size_t len)
 {
 	struct console *con;
@@ -1504,6 +1514,19 @@ static void call_console_drivers(const char *ext_text, size_t ext_len,
 		if (!cpu_online(raw_smp_processor_id()) &&
 		    !(con->flags & CON_ANYTIME))
 			continue;
+		if (con->printk_seq >= seq)
+			continue;
+
+		con->printk_seq++;
+		if (con->printk_seq < seq) {
+			print_console_dropped(con, seq - con->printk_seq);
+			con->printk_seq = seq;
+		}
+
+		/* for supressed messages, only seq is updated */
+		if (len == 0 && ext_len == 0)
+			continue;
+
 		if (con->flags & CON_EXTENDED)
 			con->write(con, ext_text, ext_len);
 		else
@@ -1738,7 +1761,7 @@ static ssize_t msg_print_ext_header(char *buf, size_t size,
 static ssize_t msg_print_ext_body(char *buf, size_t size,
 				  char *dict, size_t dict_len,
 				  char *text, size_t text_len) { return 0; }
-static void call_console_drivers(const char *ext_text, size_t ext_len,
+static void call_console_drivers(u64 seq, const char *ext_text, size_t ext_len,
 				 const char *text, size_t len) {}
 static size_t msg_print_text(const struct printk_log *msg, bool syslog,
 			     bool time, char *buf, size_t size) { return 0; }
@@ -2481,8 +2504,9 @@ static int printk_kthread_func(void *data)
 			    &len, printk_time);
 
 		console_lock();
+		call_console_drivers(master_seq, ext_text,
+				     ext_len, text, len);
 		if (len > 0 || ext_len > 0) {
-			call_console_drivers(ext_text, ext_len, text, len);
 			boot_delay_msec(msg->level);
 			printk_delay();
 		}
-- 
2.11.0

^ permalink raw reply related

* [RFC PATCH v1 12/25] printk: minimize console locking implementation
From: John Ogness @ 2019-02-12 14:29 UTC (permalink / raw)
  To: linux-kernel
  Cc: Peter Zijlstra, Petr Mladek, Sergey Senozhatsky, Steven Rostedt,
	Daniel Wang, Andrew Morton, Linus Torvalds, Greg Kroah-Hartman,
	Alan Cox, Jiri Slaby, Peter Feiner, linux-serial,
	Sergey Senozhatsky
In-Reply-To: <20190212143003.48446-1-john.ogness@linutronix.de>

Since printing of the printk buffer is now handled by the printk
kthread, minimize the console locking functions to just handle
locking of the console.

NOTE: With this console_flush_on_panic will no longer flush.

Signed-off-by: John Ogness <john.ogness@linutronix.de>
---
 kernel/printk/printk.c | 255 +------------------------------------------------
 1 file changed, 1 insertion(+), 254 deletions(-)

diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 073ff9fd6872..ece54c24ea0d 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -209,19 +209,7 @@ static int nr_ext_console_drivers;
 
 static int __down_trylock_console_sem(unsigned long ip)
 {
-	int lock_failed;
-	unsigned long flags;
-
-	/*
-	 * Here and in __up_console_sem() we need to be in safe mode,
-	 * because spindump/WARN/etc from under console ->lock will
-	 * deadlock in printk()->down_trylock_console_sem() otherwise.
-	 */
-	printk_safe_enter_irqsave(flags);
-	lock_failed = down_trylock(&console_sem);
-	printk_safe_exit_irqrestore(flags);
-
-	if (lock_failed)
+	if (down_trylock(&console_sem))
 		return 1;
 	mutex_acquire(&console_lock_dep_map, 0, 1, ip);
 	return 0;
@@ -230,13 +218,9 @@ static int __down_trylock_console_sem(unsigned long ip)
 
 static void __up_console_sem(unsigned long ip)
 {
-	unsigned long flags;
-
 	mutex_release(&console_lock_dep_map, 1, ip);
 
-	printk_safe_enter_irqsave(flags);
 	up(&console_sem);
-	printk_safe_exit_irqrestore(flags);
 }
 #define up_console_sem() __up_console_sem(_RET_IP_)
 
@@ -1498,82 +1482,6 @@ static void format_text(struct printk_log *msg, u64 seq,
 }
 
 /*
- * Special console_lock variants that help to reduce the risk of soft-lockups.
- * They allow to pass console_lock to another printk() call using a busy wait.
- */
-
-#ifdef CONFIG_LOCKDEP
-static struct lockdep_map console_owner_dep_map = {
-	.name = "console_owner"
-};
-#endif
-
-static DEFINE_RAW_SPINLOCK(console_owner_lock);
-static struct task_struct *console_owner;
-static bool console_waiter;
-
-/**
- * console_lock_spinning_enable - mark beginning of code where another
- *	thread might safely busy wait
- *
- * This basically converts console_lock into a spinlock. This marks
- * the section where the console_lock owner can not sleep, because
- * there may be a waiter spinning (like a spinlock). Also it must be
- * ready to hand over the lock at the end of the section.
- */
-static void console_lock_spinning_enable(void)
-{
-	raw_spin_lock(&console_owner_lock);
-	console_owner = current;
-	raw_spin_unlock(&console_owner_lock);
-
-	/* The waiter may spin on us after setting console_owner */
-	spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_);
-}
-
-/**
- * console_lock_spinning_disable_and_check - mark end of code where another
- *	thread was able to busy wait and check if there is a waiter
- *
- * This is called at the end of the section where spinning is allowed.
- * It has two functions. First, it is a signal that it is no longer
- * safe to start busy waiting for the lock. Second, it checks if
- * there is a busy waiter and passes the lock rights to her.
- *
- * Important: Callers lose the lock if there was a busy waiter.
- *	They must not touch items synchronized by console_lock
- *	in this case.
- *
- * Return: 1 if the lock rights were passed, 0 otherwise.
- */
-static int console_lock_spinning_disable_and_check(void)
-{
-	int waiter;
-
-	raw_spin_lock(&console_owner_lock);
-	waiter = READ_ONCE(console_waiter);
-	console_owner = NULL;
-	raw_spin_unlock(&console_owner_lock);
-
-	if (!waiter) {
-		spin_release(&console_owner_dep_map, 1, _THIS_IP_);
-		return 0;
-	}
-
-	/* The waiter is now free to continue */
-	WRITE_ONCE(console_waiter, false);
-
-	spin_release(&console_owner_dep_map, 1, _THIS_IP_);
-
-	/*
-	 * Hand off console_lock to waiter. The waiter will perform
-	 * the up(). After this, the waiter is the console_lock owner.
-	 */
-	mutex_release(&console_lock_dep_map, 1, _THIS_IP_);
-	return 1;
-}
-
-/*
  * Call the console drivers, asking them to write out
  * log_buf[start] to log_buf[end - 1].
  * The console_lock must be held.
@@ -1830,8 +1738,6 @@ static ssize_t msg_print_ext_header(char *buf, size_t size,
 static ssize_t msg_print_ext_body(char *buf, size_t size,
 				  char *dict, size_t dict_len,
 				  char *text, size_t text_len) { return 0; }
-static void console_lock_spinning_enable(void) { }
-static int console_lock_spinning_disable_and_check(void) { return 0; }
 static void call_console_drivers(const char *ext_text, size_t ext_len,
 				 const char *text, size_t len) {}
 static size_t msg_print_text(const struct printk_log *msg, bool syslog,
@@ -2066,35 +1972,6 @@ int is_console_locked(void)
 {
 	return console_locked;
 }
-EXPORT_SYMBOL(is_console_locked);
-
-/*
- * Check if we have any console that is capable of printing while cpu is
- * booting or shutting down. Requires console_sem.
- */
-static int have_callable_console(void)
-{
-	struct console *con;
-
-	for_each_console(con)
-		if ((con->flags & CON_ENABLED) &&
-				(con->flags & CON_ANYTIME))
-			return 1;
-
-	return 0;
-}
-
-/*
- * Can we actually use the console at this time on this cpu?
- *
- * Console drivers may assume that per-cpu resources have been allocated. So
- * unless they're explicitly marked as being able to cope (CON_ANYTIME) don't
- * call them until this CPU is officially up.
- */
-static inline int can_use_console(void)
-{
-	return cpu_online(raw_smp_processor_id()) || have_callable_console();
-}
 
 /**
  * console_unlock - unlock the console system
@@ -2102,147 +1979,17 @@ static inline int can_use_console(void)
  * Releases the console_lock which the caller holds on the console system
  * and the console driver list.
  *
- * While the console_lock was held, console output may have been buffered
- * by printk().  If this is the case, console_unlock(); emits
- * the output prior to releasing the lock.
- *
- * If there is output waiting, we wake /dev/kmsg and syslog() users.
- *
  * console_unlock(); may be called from any context.
  */
 void console_unlock(void)
 {
-	static char ext_text[CONSOLE_EXT_LOG_MAX];
-	static char text[LOG_LINE_MAX + PREFIX_MAX];
-	unsigned long flags;
-	bool do_cond_resched, retry;
-
 	if (console_suspended) {
 		up_console_sem();
 		return;
 	}
 
-	/*
-	 * Console drivers are called with interrupts disabled, so
-	 * @console_may_schedule should be cleared before; however, we may
-	 * end up dumping a lot of lines, for example, if called from
-	 * console registration path, and should invoke cond_resched()
-	 * between lines if allowable.  Not doing so can cause a very long
-	 * scheduling stall on a slow console leading to RCU stall and
-	 * softlockup warnings which exacerbate the issue with more
-	 * messages practically incapacitating the system.
-	 *
-	 * console_trylock() is not able to detect the preemptive
-	 * context reliably. Therefore the value must be stored before
-	 * and cleared after the the "again" goto label.
-	 */
-	do_cond_resched = console_may_schedule;
-again:
-	console_may_schedule = 0;
-
-	/*
-	 * We released the console_sem lock, so we need to recheck if
-	 * cpu is online and (if not) is there at least one CON_ANYTIME
-	 * console.
-	 */
-	if (!can_use_console()) {
-		console_locked = 0;
-		up_console_sem();
-		return;
-	}
-
-	for (;;) {
-		struct printk_log *msg;
-		size_t ext_len = 0;
-		size_t len;
-
-		printk_safe_enter_irqsave(flags);
-		raw_spin_lock(&logbuf_lock);
-		if (console_seq < log_first_seq) {
-			len = sprintf(text,
-				      "** %llu printk messages dropped **\n",
-				      log_first_seq - console_seq);
-
-			/* messages are gone, move to first one */
-			console_seq = log_first_seq;
-			console_idx = log_first_idx;
-		} else {
-			len = 0;
-		}
-skip:
-		if (console_seq == log_next_seq)
-			break;
-
-		msg = log_from_idx(console_idx);
-		if (suppress_message_printing(msg->level)) {
-			/*
-			 * Skip record we have buffered and already printed
-			 * directly to the console when we received it, and
-			 * record that has level above the console loglevel.
-			 */
-			console_idx = log_next(console_idx);
-			console_seq++;
-			goto skip;
-		}
-
-		len += msg_print_text(msg,
-				console_msg_format & MSG_FORMAT_SYSLOG,
-				printk_time, text + len, sizeof(text) - len);
-		if (nr_ext_console_drivers) {
-			ext_len = msg_print_ext_header(ext_text,
-						sizeof(ext_text),
-						msg, console_seq);
-			ext_len += msg_print_ext_body(ext_text + ext_len,
-						sizeof(ext_text) - ext_len,
-						log_dict(msg), msg->dict_len,
-						log_text(msg), msg->text_len);
-		}
-		console_idx = log_next(console_idx);
-		console_seq++;
-		raw_spin_unlock(&logbuf_lock);
-
-		/*
-		 * While actively printing out messages, if another printk()
-		 * were to occur on another CPU, it may wait for this one to
-		 * finish. This task can not be preempted if there is a
-		 * waiter waiting to take over.
-		 */
-		console_lock_spinning_enable();
-
-		stop_critical_timings();	/* don't trace print latency */
-		//call_console_drivers(ext_text, ext_len, text, len);
-		start_critical_timings();
-
-		if (console_lock_spinning_disable_and_check()) {
-			printk_safe_exit_irqrestore(flags);
-			return;
-		}
-
-		printk_safe_exit_irqrestore(flags);
-
-		if (do_cond_resched)
-			cond_resched();
-	}
-
 	console_locked = 0;
-
-	raw_spin_unlock(&logbuf_lock);
-
 	up_console_sem();
-
-	/*
-	 * Someone could have filled up the buffer again, so re-check if there's
-	 * something to flush. In case we cannot trylock the console_sem again,
-	 * there's a new owner and the console_unlock() from them will do the
-	 * flush, no worries.
-	 */
-	raw_spin_lock(&logbuf_lock);
-	retry = console_seq != log_next_seq;
-	raw_spin_unlock(&logbuf_lock);
-	printk_safe_exit_irqrestore(flags);
-
-	if (retry && console_trylock())
-		goto again;
 }
 EXPORT_SYMBOL(console_unlock);
 
-- 
2.11.0

^ permalink raw reply related

* [RFC PATCH v1 11/25] printk_safe: remove printk safe code
From: John Ogness @ 2019-02-12 14:29 UTC (permalink / raw)
  To: linux-kernel
  Cc: Peter Zijlstra, Petr Mladek, Sergey Senozhatsky, Steven Rostedt,
	Daniel Wang, Andrew Morton, Linus Torvalds, Greg Kroah-Hartman,
	Alan Cox, Jiri Slaby, Peter Feiner, linux-serial,
	Sergey Senozhatsky
In-Reply-To: <20190212143003.48446-1-john.ogness@linutronix.de>

vprintk variants are now NMI-safe so there is no longer a need for
the "safe" calls.

NOTE: This also removes printk flushing functionality.

Signed-off-by: John Ogness <john.ogness@linutronix.de>
---
 include/linux/hardirq.h     |   2 -
 include/linux/printk.h      |  27 ---
 init/main.c                 |   1 -
 kernel/kexec_core.c         |   1 -
 kernel/panic.c              |   3 -
 kernel/printk/Makefile      |   1 -
 kernel/printk/internal.h    |  30 +---
 kernel/printk/printk.c      |  13 +-
 kernel/printk/printk_safe.c | 427 --------------------------------------------
 kernel/trace/trace.c        |   2 -
 lib/nmi_backtrace.c         |   6 -
 11 files changed, 7 insertions(+), 506 deletions(-)
 delete mode 100644 kernel/printk/printk_safe.c

diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 0fbbcdf0c178..c1effa24a71d 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -62,7 +62,6 @@ extern void irq_exit(void);
 
 #define nmi_enter()						\
 	do {							\
-		printk_nmi_enter();				\
 		lockdep_off();					\
 		ftrace_nmi_enter();				\
 		BUG_ON(in_nmi());				\
@@ -79,7 +78,6 @@ extern void irq_exit(void);
 		preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET);	\
 		ftrace_nmi_exit();				\
 		lockdep_on();					\
-		printk_nmi_exit();				\
 	} while (0)
 
 #endif /* LINUX_HARDIRQ_H */
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 77740a506ebb..a79a736b54b6 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -145,18 +145,6 @@ static inline __printf(1, 2) __cold
 void early_printk(const char *s, ...) { }
 #endif
 
-#ifdef CONFIG_PRINTK_NMI
-extern void printk_nmi_enter(void);
-extern void printk_nmi_exit(void);
-extern void printk_nmi_direct_enter(void);
-extern void printk_nmi_direct_exit(void);
-#else
-static inline void printk_nmi_enter(void) { }
-static inline void printk_nmi_exit(void) { }
-static inline void printk_nmi_direct_enter(void) { }
-static inline void printk_nmi_direct_exit(void) { }
-#endif /* PRINTK_NMI */
-
 #ifdef CONFIG_PRINTK
 asmlinkage __printf(5, 0)
 int vprintk_emit(int facility, int level,
@@ -201,9 +189,6 @@ __printf(1, 2) void dump_stack_set_arch_desc(const char *fmt, ...);
 void dump_stack_print_info(const char *log_lvl);
 void show_regs_print_info(const char *log_lvl);
 extern asmlinkage void dump_stack(void) __cold;
-extern void printk_safe_init(void);
-extern void printk_safe_flush(void);
-extern void printk_safe_flush_on_panic(void);
 #else
 static inline __printf(1, 0)
 int vprintk(const char *s, va_list args)
@@ -267,18 +252,6 @@ static inline void show_regs_print_info(const char *log_lvl)
 static inline void dump_stack(void)
 {
 }
-
-static inline void printk_safe_init(void)
-{
-}
-
-static inline void printk_safe_flush(void)
-{
-}
-
-static inline void printk_safe_flush_on_panic(void)
-{
-}
 #endif
 
 extern int kptr_restrict;
diff --git a/init/main.c b/init/main.c
index e2e80ca3165a..aec02435f00b 100644
--- a/init/main.c
+++ b/init/main.c
@@ -648,7 +648,6 @@ asmlinkage __visible void __init start_kernel(void)
 	softirq_init();
 	timekeeping_init();
 	time_init();
-	printk_safe_init();
 	perf_event_init();
 	profile_init();
 	call_function_init();
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index d7140447be75..bbe21da47e2e 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -972,7 +972,6 @@ void crash_kexec(struct pt_regs *regs)
 	old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, this_cpu);
 	if (old_cpu == PANIC_CPU_INVALID) {
 		/* This is the 1st CPU which comes here, so go ahead. */
-		printk_safe_flush_on_panic();
 		__crash_kexec(regs);
 
 		/*
diff --git a/kernel/panic.c b/kernel/panic.c
index f121e6ba7e11..09a836b3c687 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -223,7 +223,6 @@ void panic(const char *fmt, ...)
 	 * Bypass the panic_cpu check and call __crash_kexec directly.
 	 */
 	if (!_crash_kexec_post_notifiers) {
-		printk_safe_flush_on_panic();
 		__crash_kexec(NULL);
 
 		/*
@@ -247,8 +246,6 @@ void panic(const char *fmt, ...)
 	 */
 	atomic_notifier_call_chain(&panic_notifier_list, 0, buf);
 
-	/* Call flush even twice. It tries harder with a single online CPU */
-	printk_safe_flush_on_panic();
 	kmsg_dump(KMSG_DUMP_PANIC);
 
 	/*
diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile
index 4a2ffc39eb95..85405bdcf2b3 100644
--- a/kernel/printk/Makefile
+++ b/kernel/printk/Makefile
@@ -1,3 +1,2 @@
 obj-y	= printk.o
-obj-$(CONFIG_PRINTK)	+= printk_safe.o
 obj-$(CONFIG_A11Y_BRAILLE_CONSOLE)	+= braille.o
diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h
index 0f1898820cba..59ad43dba837 100644
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -32,32 +32,6 @@ int vprintk_store(int facility, int level,
 __printf(1, 0) int vprintk_default(const char *fmt, va_list args);
 __printf(1, 0) int vprintk_deferred(const char *fmt, va_list args);
 __printf(1, 0) int vprintk_func(const char *fmt, va_list args);
-void __printk_safe_enter(void);
-void __printk_safe_exit(void);
-
-#define printk_safe_enter_irqsave(flags)	\
-	do {					\
-		local_irq_save(flags);		\
-		__printk_safe_enter();		\
-	} while (0)
-
-#define printk_safe_exit_irqrestore(flags)	\
-	do {					\
-		__printk_safe_exit();		\
-		local_irq_restore(flags);	\
-	} while (0)
-
-#define printk_safe_enter_irq()		\
-	do {					\
-		local_irq_disable();		\
-		__printk_safe_enter();		\
-	} while (0)
-
-#define printk_safe_exit_irq()			\
-	do {					\
-		__printk_safe_exit();		\
-		local_irq_enable();		\
-	} while (0)
 
 void defer_console_output(void);
 
@@ -70,10 +44,10 @@ __printf(1, 0) int vprintk_func(const char *fmt, va_list args) { return 0; }
  * semaphore and some of console functions (console_unlock()/etc.), so
  * printk-safe must preserve the existing local IRQ guarantees.
  */
+#endif /* CONFIG_PRINTK */
+
 #define printk_safe_enter_irqsave(flags) local_irq_save(flags)
 #define printk_safe_exit_irqrestore(flags) local_irq_restore(flags)
 
 #define printk_safe_enter_irq() local_irq_disable()
 #define printk_safe_exit_irq() local_irq_enable()
-
-#endif /* CONFIG_PRINTK */
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index b6a6f1002741..073ff9fd6872 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1675,13 +1675,6 @@ static bool cont_add(int facility, int level, enum log_flags flags, const char *
 }
 #endif /* 0 */
 
-int vprintk_store(int facility, int level,
-		  const char *dict, size_t dictlen,
-		  const char *fmt, va_list args)
-{
-	return vprintk_emit(facility, level, dict, dictlen, fmt, args);
-}
-
 /* ring buffer used as memory allocator for temporary sprint buffers */
 DECLARE_STATIC_PRINTKRB(sprint_rb,
 			ilog2(PRINTK_RECORD_MAX + sizeof(struct prb_entry) +
@@ -1752,6 +1745,11 @@ asmlinkage int vprintk_emit(int facility, int level,
 }
 EXPORT_SYMBOL(vprintk_emit);
 
+__printf(1, 0) int vprintk_func(const char *fmt, va_list args)
+{
+	return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args);
+}
+
 asmlinkage int vprintk(const char *fmt, va_list args)
 {
 	return vprintk_func(fmt, args);
@@ -3142,5 +3140,4 @@ void kmsg_dump_rewind(struct kmsg_dumper *dumper)
 	logbuf_unlock_irqrestore(flags);
 }
 EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
-
 #endif
diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c
deleted file mode 100644
index 0913b4d385de..000000000000
--- a/kernel/printk/printk_safe.c
+++ /dev/null
@@ -1,427 +0,0 @@
-/*
- * printk_safe.c - Safe printk for printk-deadlock-prone contexts
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/preempt.h>
-#include <linux/spinlock.h>
-#include <linux/debug_locks.h>
-#include <linux/smp.h>
-#include <linux/cpumask.h>
-#include <linux/irq_work.h>
-#include <linux/printk.h>
-
-#include "internal.h"
-
-/*
- * printk() could not take logbuf_lock in NMI context. Instead,
- * it uses an alternative implementation that temporary stores
- * the strings into a per-CPU buffer. The content of the buffer
- * is later flushed into the main ring buffer via IRQ work.
- *
- * The alternative implementation is chosen transparently
- * by examinig current printk() context mask stored in @printk_context
- * per-CPU variable.
- *
- * The implementation allows to flush the strings also from another CPU.
- * There are situations when we want to make sure that all buffers
- * were handled or when IRQs are blocked.
- */
-static int printk_safe_irq_ready __read_mostly;
-
-#define SAFE_LOG_BUF_LEN ((1 << CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT) -	\
-				sizeof(atomic_t) -			\
-				sizeof(atomic_t) -			\
-				sizeof(struct irq_work))
-
-struct printk_safe_seq_buf {
-	atomic_t		len;	/* length of written data */
-	atomic_t		message_lost;
-	struct irq_work		work;	/* IRQ work that flushes the buffer */
-	unsigned char		buffer[SAFE_LOG_BUF_LEN];
-};
-
-static DEFINE_PER_CPU(struct printk_safe_seq_buf, safe_print_seq);
-static DEFINE_PER_CPU(int, printk_context);
-
-#ifdef CONFIG_PRINTK_NMI
-static DEFINE_PER_CPU(struct printk_safe_seq_buf, nmi_print_seq);
-#endif
-
-/* Get flushed in a more safe context. */
-static void queue_flush_work(struct printk_safe_seq_buf *s)
-{
-	if (printk_safe_irq_ready)
-		irq_work_queue(&s->work);
-}
-
-/*
- * Add a message to per-CPU context-dependent buffer. NMI and printk-safe
- * have dedicated buffers, because otherwise printk-safe preempted by
- * NMI-printk would have overwritten the NMI messages.
- *
- * The messages are flushed from irq work (or from panic()), possibly,
- * from other CPU, concurrently with printk_safe_log_store(). Should this
- * happen, printk_safe_log_store() will notice the buffer->len mismatch
- * and repeat the write.
- */
-static __printf(2, 0) int printk_safe_log_store(struct printk_safe_seq_buf *s,
-						const char *fmt, va_list args)
-{
-	int add;
-	size_t len;
-	va_list ap;
-
-again:
-	len = atomic_read(&s->len);
-
-	/* The trailing '\0' is not counted into len. */
-	if (len >= sizeof(s->buffer) - 1) {
-		atomic_inc(&s->message_lost);
-		queue_flush_work(s);
-		return 0;
-	}
-
-	/*
-	 * Make sure that all old data have been read before the buffer
-	 * was reset. This is not needed when we just append data.
-	 */
-	if (!len)
-		smp_rmb();
-
-	va_copy(ap, args);
-	add = vscnprintf(s->buffer + len, sizeof(s->buffer) - len, fmt, ap);
-	va_end(ap);
-	if (!add)
-		return 0;
-
-	/*
-	 * Do it once again if the buffer has been flushed in the meantime.
-	 * Note that atomic_cmpxchg() is an implicit memory barrier that
-	 * makes sure that the data were written before updating s->len.
-	 */
-	if (atomic_cmpxchg(&s->len, len, len + add) != len)
-		goto again;
-
-	queue_flush_work(s);
-	return add;
-}
-
-static inline void printk_safe_flush_line(const char *text, int len)
-{
-	/*
-	 * Avoid any console drivers calls from here, because we may be
-	 * in NMI or printk_safe context (when in panic). The messages
-	 * must go only into the ring buffer at this stage.  Consoles will
-	 * get explicitly called later when a crashdump is not generated.
-	 */
-	printk_deferred("%.*s", len, text);
-}
-
-/* printk part of the temporary buffer line by line */
-static int printk_safe_flush_buffer(const char *start, size_t len)
-{
-	const char *c, *end;
-	bool header;
-
-	c = start;
-	end = start + len;
-	header = true;
-
-	/* Print line by line. */
-	while (c < end) {
-		if (*c == '\n') {
-			printk_safe_flush_line(start, c - start + 1);
-			start = ++c;
-			header = true;
-			continue;
-		}
-
-		/* Handle continuous lines or missing new line. */
-		if ((c + 1 < end) && printk_get_level(c)) {
-			if (header) {
-				c = printk_skip_level(c);
-				continue;
-			}
-
-			printk_safe_flush_line(start, c - start);
-			start = c++;
-			header = true;
-			continue;
-		}
-
-		header = false;
-		c++;
-	}
-
-	/* Check if there was a partial line. Ignore pure header. */
-	if (start < end && !header) {
-		static const char newline[] = KERN_CONT "\n";
-
-		printk_safe_flush_line(start, end - start);
-		printk_safe_flush_line(newline, strlen(newline));
-	}
-
-	return len;
-}
-
-static void report_message_lost(struct printk_safe_seq_buf *s)
-{
-	int lost = atomic_xchg(&s->message_lost, 0);
-
-	if (lost)
-		printk_deferred("Lost %d message(s)!\n", lost);
-}
-
-/*
- * Flush data from the associated per-CPU buffer. The function
- * can be called either via IRQ work or independently.
- */
-static void __printk_safe_flush(struct irq_work *work)
-{
-	static raw_spinlock_t read_lock =
-		__RAW_SPIN_LOCK_INITIALIZER(read_lock);
-	struct printk_safe_seq_buf *s =
-		container_of(work, struct printk_safe_seq_buf, work);
-	unsigned long flags;
-	size_t len;
-	int i;
-
-	/*
-	 * The lock has two functions. First, one reader has to flush all
-	 * available message to make the lockless synchronization with
-	 * writers easier. Second, we do not want to mix messages from
-	 * different CPUs. This is especially important when printing
-	 * a backtrace.
-	 */
-	raw_spin_lock_irqsave(&read_lock, flags);
-
-	i = 0;
-more:
-	len = atomic_read(&s->len);
-
-	/*
-	 * This is just a paranoid check that nobody has manipulated
-	 * the buffer an unexpected way. If we printed something then
-	 * @len must only increase. Also it should never overflow the
-	 * buffer size.
-	 */
-	if ((i && i >= len) || len > sizeof(s->buffer)) {
-		const char *msg = "printk_safe_flush: internal error\n";
-
-		printk_safe_flush_line(msg, strlen(msg));
-		len = 0;
-	}
-
-	if (!len)
-		goto out; /* Someone else has already flushed the buffer. */
-
-	/* Make sure that data has been written up to the @len */
-	smp_rmb();
-	i += printk_safe_flush_buffer(s->buffer + i, len - i);
-
-	/*
-	 * Check that nothing has got added in the meantime and truncate
-	 * the buffer. Note that atomic_cmpxchg() is an implicit memory
-	 * barrier that makes sure that the data were copied before
-	 * updating s->len.
-	 */
-	if (atomic_cmpxchg(&s->len, len, 0) != len)
-		goto more;
-
-out:
-	report_message_lost(s);
-	raw_spin_unlock_irqrestore(&read_lock, flags);
-}
-
-/**
- * printk_safe_flush - flush all per-cpu nmi buffers.
- *
- * The buffers are flushed automatically via IRQ work. This function
- * is useful only when someone wants to be sure that all buffers have
- * been flushed at some point.
- */
-void printk_safe_flush(void)
-{
-	int cpu;
-
-	for_each_possible_cpu(cpu) {
-#ifdef CONFIG_PRINTK_NMI
-		__printk_safe_flush(&per_cpu(nmi_print_seq, cpu).work);
-#endif
-		__printk_safe_flush(&per_cpu(safe_print_seq, cpu).work);
-	}
-}
-
-/**
- * printk_safe_flush_on_panic - flush all per-cpu nmi buffers when the system
- *	goes down.
- *
- * Similar to printk_safe_flush() but it can be called even in NMI context when
- * the system goes down. It does the best effort to get NMI messages into
- * the main ring buffer.
- *
- * Note that it could try harder when there is only one CPU online.
- */
-void printk_safe_flush_on_panic(void)
-{
-	/*
-	 * Make sure that we could access the main ring buffer.
-	 * Do not risk a double release when more CPUs are up.
-	 */
-	if (raw_spin_is_locked(&logbuf_lock)) {
-		if (num_online_cpus() > 1)
-			return;
-
-		debug_locks_off();
-		raw_spin_lock_init(&logbuf_lock);
-	}
-
-	printk_safe_flush();
-}
-
-#ifdef CONFIG_PRINTK_NMI
-/*
- * Safe printk() for NMI context. It uses a per-CPU buffer to
- * store the message. NMIs are not nested, so there is always only
- * one writer running. But the buffer might get flushed from another
- * CPU, so we need to be careful.
- */
-static __printf(1, 0) int vprintk_nmi(const char *fmt, va_list args)
-{
-	struct printk_safe_seq_buf *s = this_cpu_ptr(&nmi_print_seq);
-
-	return printk_safe_log_store(s, fmt, args);
-}
-
-void notrace printk_nmi_enter(void)
-{
-	this_cpu_or(printk_context, PRINTK_NMI_CONTEXT_MASK);
-}
-
-void notrace printk_nmi_exit(void)
-{
-	this_cpu_and(printk_context, ~PRINTK_NMI_CONTEXT_MASK);
-}
-
-/*
- * Marks a code that might produce many messages in NMI context
- * and the risk of losing them is more critical than eventual
- * reordering.
- *
- * It has effect only when called in NMI context. Then printk()
- * will try to store the messages into the main logbuf directly
- * and use the per-CPU buffers only as a fallback when the lock
- * is not available.
- */
-void printk_nmi_direct_enter(void)
-{
-	if (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK)
-		this_cpu_or(printk_context, PRINTK_NMI_DIRECT_CONTEXT_MASK);
-}
-
-void printk_nmi_direct_exit(void)
-{
-	this_cpu_and(printk_context, ~PRINTK_NMI_DIRECT_CONTEXT_MASK);
-}
-
-#else
-
-static __printf(1, 0) int vprintk_nmi(const char *fmt, va_list args)
-{
-	return 0;
-}
-
-#endif /* CONFIG_PRINTK_NMI */
-
-/*
- * Lock-less printk(), to avoid deadlocks should the printk() recurse
- * into itself. It uses a per-CPU buffer to store the message, just like
- * NMI.
- */
-static __printf(1, 0) int vprintk_safe(const char *fmt, va_list args)
-{
-	struct printk_safe_seq_buf *s = this_cpu_ptr(&safe_print_seq);
-
-	return printk_safe_log_store(s, fmt, args);
-}
-
-/* Can be preempted by NMI. */
-void __printk_safe_enter(void)
-{
-	this_cpu_inc(printk_context);
-}
-
-/* Can be preempted by NMI. */
-void __printk_safe_exit(void)
-{
-	this_cpu_dec(printk_context);
-}
-
-__printf(1, 0) int vprintk_func(const char *fmt, va_list args)
-{
-	/*
-	 * Try to use the main logbuf even in NMI. But avoid calling console
-	 * drivers that might have their own locks.
-	 */
-	if ((this_cpu_read(printk_context) & PRINTK_NMI_DIRECT_CONTEXT_MASK) &&
-	    raw_spin_trylock(&logbuf_lock)) {
-		int len;
-
-		len = vprintk_store(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args);
-		raw_spin_unlock(&logbuf_lock);
-		defer_console_output();
-		return len;
-	}
-
-	/* Use extra buffer in NMI when logbuf_lock is taken or in safe mode. */
-	if (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK)
-		return vprintk_nmi(fmt, args);
-
-	/* Use extra buffer to prevent a recursion deadlock in safe mode. */
-	if (this_cpu_read(printk_context) & PRINTK_SAFE_CONTEXT_MASK)
-		return vprintk_safe(fmt, args);
-
-	/* No obstacles. */
-	return vprintk_default(fmt, args);
-}
-
-void __init printk_safe_init(void)
-{
-	int cpu;
-
-	for_each_possible_cpu(cpu) {
-		struct printk_safe_seq_buf *s;
-
-		s = &per_cpu(safe_print_seq, cpu);
-		init_irq_work(&s->work, __printk_safe_flush);
-
-#ifdef CONFIG_PRINTK_NMI
-		s = &per_cpu(nmi_print_seq, cpu);
-		init_irq_work(&s->work, __printk_safe_flush);
-#endif
-	}
-
-	/*
-	 * In the highly unlikely event that a NMI were to trigger at
-	 * this moment. Make sure IRQ work is set up before this
-	 * variable is set.
-	 */
-	barrier();
-	printk_safe_irq_ready = 1;
-
-	/* Flush pending messages that did not have scheduled IRQ works. */
-	printk_safe_flush();
-}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c521b7347482..cfce391621c0 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -8363,7 +8363,6 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
 	tracing_off();
 
 	local_irq_save(flags);
-	printk_nmi_direct_enter();
 
 	/* Simulate the iterator */
 	trace_init_global_iter(&iter);
@@ -8444,7 +8443,6 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
 		atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
 	}
 	atomic_dec(&dump_running);
-	printk_nmi_direct_exit();
 	local_irq_restore(flags);
 }
 EXPORT_SYMBOL_GPL(ftrace_dump);
diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c
index 15ca78e1c7d4..77bf84987cda 100644
--- a/lib/nmi_backtrace.c
+++ b/lib/nmi_backtrace.c
@@ -75,12 +75,6 @@ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask,
 		touch_softlockup_watchdog();
 	}
 
-	/*
-	 * Force flush any remote buffers that might be stuck in IRQ context
-	 * and therefore could not run their irq_work.
-	 */
-	printk_safe_flush();
-
 	clear_bit_unlock(0, &backtrace_flag);
 	put_cpu();
 }
-- 
2.11.0

^ permalink raw reply related

* [RFC PATCH v1 10/25] printk: redirect emit/store to new ringbuffer
From: John Ogness @ 2019-02-12 14:29 UTC (permalink / raw)
  To: linux-kernel
  Cc: Peter Zijlstra, Petr Mladek, Sergey Senozhatsky, Steven Rostedt,
	Daniel Wang, Andrew Morton, Linus Torvalds, Greg Kroah-Hartman,
	Alan Cox, Jiri Slaby, Peter Feiner, linux-serial,
	Sergey Senozhatsky
In-Reply-To: <20190212143003.48446-1-john.ogness@linutronix.de>

vprintk_emit and vprintk_store are the main functions that all printk
variants eventually go through. Change these to store the message in
the new printk ring buffer that the printk kthread is reading.

Remove functions no longer in use because of the changes to
vprintk_emit and vprintk_store.

In order to handle interrupts and NMIs, a second per-cpu ring buffer
(sprint_rb) is added. This ring buffer is used for NMI-safe memory
allocation in order to format the printk messages.

NOTE: LOG_CONT is ignored for now and handled as individual messages.
      LOG_CONT functions are masked behind "#if 0" blocks until their
      functionality can be restored

Signed-off-by: John Ogness <john.ogness@linutronix.de>
---
 kernel/printk/printk.c | 319 ++++++++-----------------------------------------
 1 file changed, 51 insertions(+), 268 deletions(-)

diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 5a5a685bb128..b6a6f1002741 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -493,90 +493,6 @@ static u32 log_next(u32 idx)
 	return idx + msg->len;
 }
 
-/*
- * Check whether there is enough free space for the given message.
- *
- * The same values of first_idx and next_idx mean that the buffer
- * is either empty or full.
- *
- * If the buffer is empty, we must respect the position of the indexes.
- * They cannot be reset to the beginning of the buffer.
- */
-static int logbuf_has_space(u32 msg_size, bool empty)
-{
-	u32 free;
-
-	if (log_next_idx > log_first_idx || empty)
-		free = max(log_buf_len - log_next_idx, log_first_idx);
-	else
-		free = log_first_idx - log_next_idx;
-
-	/*
-	 * We need space also for an empty header that signalizes wrapping
-	 * of the buffer.
-	 */
-	return free >= msg_size + sizeof(struct printk_log);
-}
-
-static int log_make_free_space(u32 msg_size)
-{
-	while (log_first_seq < log_next_seq &&
-	       !logbuf_has_space(msg_size, false)) {
-		/* drop old messages until we have enough contiguous space */
-		log_first_idx = log_next(log_first_idx);
-		log_first_seq++;
-	}
-
-	if (clear_seq < log_first_seq) {
-		clear_seq = log_first_seq;
-		clear_idx = log_first_idx;
-	}
-
-	/* sequence numbers are equal, so the log buffer is empty */
-	if (logbuf_has_space(msg_size, log_first_seq == log_next_seq))
-		return 0;
-
-	return -ENOMEM;
-}
-
-/* compute the message size including the padding bytes */
-static u32 msg_used_size(u16 text_len, u16 dict_len, u32 *pad_len)
-{
-	u32 size;
-
-	size = sizeof(struct printk_log) + text_len + dict_len;
-	*pad_len = (-size) & (LOG_ALIGN - 1);
-	size += *pad_len;
-
-	return size;
-}
-
-/*
- * Define how much of the log buffer we could take at maximum. The value
- * must be greater than two. Note that only half of the buffer is available
- * when the index points to the middle.
- */
-#define MAX_LOG_TAKE_PART 4
-static const char trunc_msg[] = "<truncated>";
-
-static u32 truncate_msg(u16 *text_len, u16 *trunc_msg_len,
-			u16 *dict_len, u32 *pad_len)
-{
-	/*
-	 * The message should not take the whole buffer. Otherwise, it might
-	 * get removed too soon.
-	 */
-	u32 max_text_len = log_buf_len / MAX_LOG_TAKE_PART;
-	if (*text_len > max_text_len)
-		*text_len = max_text_len;
-	/* enable the warning message */
-	*trunc_msg_len = strlen(trunc_msg);
-	/* disable the "dict" completely */
-	*dict_len = 0;
-	/* compute the size again, count also the warning message */
-	return msg_used_size(*text_len + *trunc_msg_len, 0, pad_len);
-}
-
 /* insert record into the buffer, discard old ones, update heads */
 static int log_store(int facility, int level,
 		     enum log_flags flags, u64 ts_nsec,
@@ -584,54 +500,36 @@ static int log_store(int facility, int level,
 		     const char *text, u16 text_len)
 {
 	struct printk_log *msg;
-	u32 size, pad_len;
-	u16 trunc_msg_len = 0;
-
-	/* number of '\0' padding bytes to next message */
-	size = msg_used_size(text_len, dict_len, &pad_len);
-
-	if (log_make_free_space(size)) {
-		/* truncate the message if it is too long for empty buffer */
-		size = truncate_msg(&text_len, &trunc_msg_len,
-				    &dict_len, &pad_len);
-		/* survive when the log buffer is too small for trunc_msg */
-		if (log_make_free_space(size))
-			return 0;
-	}
+	struct prb_handle h;
+	char *rbuf;
+	u32 size;
 
-	if (log_next_idx + size + sizeof(struct printk_log) > log_buf_len) {
+	size = sizeof(*msg) + text_len + dict_len;
+
+	rbuf = prb_reserve(&h, &printk_rb, size);
+	if (!rbuf) {
 		/*
-		 * This message + an additional empty header does not fit
-		 * at the end of the buffer. Add an empty header with len == 0
-		 * to signify a wrap around.
+		 * An emergency message would have been printed, but
+		 * it cannot be stored in the log.
 		 */
-		memset(log_buf + log_next_idx, 0, sizeof(struct printk_log));
-		log_next_idx = 0;
+		prb_inc_lost(&printk_rb);
+		return 0;
 	}
 
 	/* fill message */
-	msg = (struct printk_log *)(log_buf + log_next_idx);
+	msg = (struct printk_log *)rbuf;
 	memcpy(log_text(msg), text, text_len);
 	msg->text_len = text_len;
-	if (trunc_msg_len) {
-		memcpy(log_text(msg) + text_len, trunc_msg, trunc_msg_len);
-		msg->text_len += trunc_msg_len;
-	}
 	memcpy(log_dict(msg), dict, dict_len);
 	msg->dict_len = dict_len;
 	msg->facility = facility;
 	msg->level = level & 7;
 	msg->flags = flags & 0x1f;
-	if (ts_nsec > 0)
-		msg->ts_nsec = ts_nsec;
-	else
-		msg->ts_nsec = local_clock();
-	memset(log_dict(msg) + dict_len, 0, pad_len);
+	msg->ts_nsec = ts_nsec;
 	msg->len = size;
 
 	/* insert message */
-	log_next_idx += msg->len;
-	log_next_seq++;
+	prb_commit(&h);
 
 	return msg->text_len;
 }
@@ -1675,70 +1573,6 @@ static int console_lock_spinning_disable_and_check(void)
 	return 1;
 }
 
-/**
- * console_trylock_spinning - try to get console_lock by busy waiting
- *
- * This allows to busy wait for the console_lock when the current
- * owner is running in specially marked sections. It means that
- * the current owner is running and cannot reschedule until it
- * is ready to lose the lock.
- *
- * Return: 1 if we got the lock, 0 othrewise
- */
-static int console_trylock_spinning(void)
-{
-	struct task_struct *owner = NULL;
-	bool waiter;
-	bool spin = false;
-	unsigned long flags;
-
-	if (console_trylock())
-		return 1;
-
-	printk_safe_enter_irqsave(flags);
-
-	raw_spin_lock(&console_owner_lock);
-	owner = READ_ONCE(console_owner);
-	waiter = READ_ONCE(console_waiter);
-	if (!waiter && owner && owner != current) {
-		WRITE_ONCE(console_waiter, true);
-		spin = true;
-	}
-	raw_spin_unlock(&console_owner_lock);
-
-	/*
-	 * If there is an active printk() writing to the
-	 * consoles, instead of having it write our data too,
-	 * see if we can offload that load from the active
-	 * printer, and do some printing ourselves.
-	 * Go into a spin only if there isn't already a waiter
-	 * spinning, and there is an active printer, and
-	 * that active printer isn't us (recursive printk?).
-	 */
-	if (!spin) {
-		printk_safe_exit_irqrestore(flags);
-		return 0;
-	}
-
-	/* We spin waiting for the owner to release us */
-	spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_);
-	/* Owner will clear console_waiter on hand off */
-	while (READ_ONCE(console_waiter))
-		cpu_relax();
-	spin_release(&console_owner_dep_map, 1, _THIS_IP_);
-
-	printk_safe_exit_irqrestore(flags);
-	/*
-	 * The owner passed the console lock to us.
-	 * Since we did not spin on console lock, annotate
-	 * this as a trylock. Otherwise lockdep will
-	 * complain.
-	 */
-	mutex_acquire(&console_lock_dep_map, 0, 1, _THIS_IP_);
-
-	return 1;
-}
-
 /*
  * Call the console drivers, asking them to write out
  * log_buf[start] to log_buf[end - 1].
@@ -1759,7 +1593,7 @@ static void call_console_drivers(const char *ext_text, size_t ext_len,
 			continue;
 		if (!con->write)
 			continue;
-		if (!cpu_online(smp_processor_id()) &&
+		if (!cpu_online(raw_smp_processor_id()) &&
 		    !(con->flags & CON_ANYTIME))
 			continue;
 		if (con->flags & CON_EXTENDED)
@@ -1783,6 +1617,8 @@ static inline void printk_delay(void)
 	}
 }
 
+/* FIXME: no support for LOG_CONT */
+#if 0
 /*
  * Continuation lines are buffered, and not committed to the record buffer
  * until the line is complete, or a race forces it. The line fragments
@@ -1837,53 +1673,44 @@ static bool cont_add(int facility, int level, enum log_flags flags, const char *
 
 	return true;
 }
+#endif /* 0 */
 
-static size_t log_output(int facility, int level, enum log_flags lflags, const char *dict, size_t dictlen, char *text, size_t text_len)
-{
-	/*
-	 * If an earlier line was buffered, and we're a continuation
-	 * write from the same process, try to add it to the buffer.
-	 */
-	if (cont.len) {
-		if (cont.owner == current && (lflags & LOG_CONT)) {
-			if (cont_add(facility, level, lflags, text, text_len))
-				return text_len;
-		}
-		/* Otherwise, make sure it's flushed */
-		cont_flush();
-	}
-
-	/* Skip empty continuation lines that couldn't be added - they just flush */
-	if (!text_len && (lflags & LOG_CONT))
-		return 0;
-
-	/* If it doesn't end in a newline, try to buffer the current line */
-	if (!(lflags & LOG_NEWLINE)) {
-		if (cont_add(facility, level, lflags, text, text_len))
-			return text_len;
-	}
-
-	/* Store it in the record log */
-	return log_store(facility, level, lflags, 0, dict, dictlen, text, text_len);
-}
-
-/* Must be called under logbuf_lock. */
 int vprintk_store(int facility, int level,
 		  const char *dict, size_t dictlen,
 		  const char *fmt, va_list args)
 {
-	static char textbuf[LOG_LINE_MAX];
-	char *text = textbuf;
-	size_t text_len;
+	return vprintk_emit(facility, level, dict, dictlen, fmt, args);
+}
+
+/* ring buffer used as memory allocator for temporary sprint buffers */
+DECLARE_STATIC_PRINTKRB(sprint_rb,
+			ilog2(PRINTK_RECORD_MAX + sizeof(struct prb_entry) +
+			      sizeof(long)) + 2, &printk_cpulock);
+
+asmlinkage int vprintk_emit(int facility, int level,
+			    const char *dict, size_t dictlen,
+			    const char *fmt, va_list args)
+{
 	enum log_flags lflags = 0;
+	int printed_len = 0;
+	struct prb_handle h;
+	size_t text_len;
+	u64 ts_nsec;
+	char *text;
+	char *rbuf;
 
-	/*
-	 * The printf needs to come first; we need the syslog
-	 * prefix which might be passed-in as a parameter.
-	 */
-	text_len = vscnprintf(text, sizeof(textbuf), fmt, args);
+	ts_nsec = local_clock();
 
-	/* mark and strip a trailing newline */
+	rbuf = prb_reserve(&h, &sprint_rb, PRINTK_SPRINT_MAX);
+	if (!rbuf) {
+		prb_inc_lost(&printk_rb);
+		return printed_len;
+	}
+
+	text = rbuf;
+	text_len = vscnprintf(text, PRINTK_SPRINT_MAX, fmt, args);
+
+	/* strip and flag a trailing newline */
 	if (text_len && text[text_len-1] == '\n') {
 		text_len--;
 		lflags |= LOG_NEWLINE;
@@ -1917,54 +1744,10 @@ int vprintk_store(int facility, int level,
 	if (dict)
 		lflags |= LOG_PREFIX|LOG_NEWLINE;
 
-	return log_output(facility, level, lflags,
-			  dict, dictlen, text, text_len);
-}
-
-asmlinkage int vprintk_emit(int facility, int level,
-			    const char *dict, size_t dictlen,
-			    const char *fmt, va_list args)
-{
-	int printed_len;
-	bool in_sched = false, pending_output;
-	unsigned long flags;
-	u64 curr_log_seq;
-
-	if (level == LOGLEVEL_SCHED) {
-		level = LOGLEVEL_DEFAULT;
-		in_sched = true;
-	}
-
-	boot_delay_msec(level);
-	printk_delay();
-
-	/* This stops the holder of console_sem just where we want him */
-	logbuf_lock_irqsave(flags);
-	curr_log_seq = log_next_seq;
-	printed_len = vprintk_store(facility, level, dict, dictlen, fmt, args);
-	pending_output = (curr_log_seq != log_next_seq);
-	logbuf_unlock_irqrestore(flags);
-
-	/* If called from the scheduler, we can not call up(). */
-	if (!in_sched && pending_output) {
-		/*
-		 * Disable preemption to avoid being preempted while holding
-		 * console_sem which would prevent anyone from printing to
-		 * console
-		 */
-		preempt_disable();
-		/*
-		 * Try to acquire and then immediately release the console
-		 * semaphore.  The release will print out buffers and wake up
-		 * /dev/kmsg and syslog() users.
-		 */
-		if (console_trylock_spinning())
-			console_unlock();
-		preempt_enable();
-	}
+	printed_len = log_store(facility, level, lflags, ts_nsec,
+				dict, dictlen, text, text_len);
 
-	if (pending_output)
-		wake_up_klogd();
+	prb_commit(&h);
 	return printed_len;
 }
 EXPORT_SYMBOL(vprintk_emit);
@@ -2429,7 +2212,7 @@ void console_unlock(void)
 		console_lock_spinning_enable();
 
 		stop_critical_timings();	/* don't trace print latency */
-		call_console_drivers(ext_text, ext_len, text, len);
+		//call_console_drivers(ext_text, ext_len, text, len);
 		start_critical_timings();
 
 		if (console_lock_spinning_disable_and_check()) {
-- 
2.11.0

^ permalink raw reply related

* [RFC PATCH v1 09/25] printk: remove exclusive console hack
From: John Ogness @ 2019-02-12 14:29 UTC (permalink / raw)
  To: linux-kernel
  Cc: Peter Zijlstra, Petr Mladek, Sergey Senozhatsky, Steven Rostedt,
	Daniel Wang, Andrew Morton, Linus Torvalds, Greg Kroah-Hartman,
	Alan Cox, Jiri Slaby, Peter Feiner, linux-serial,
	Sergey Senozhatsky
In-Reply-To: <20190212143003.48446-1-john.ogness@linutronix.de>

In order to support printing the printk log history when new
consoles are registered, a global exclusive_console variable is
temporarily set. This only works because printk runs with
preemption disabled.

When console printing is moved to a fully preemptible dedicated
kthread, this hack no longer works.

Remove exclusive_console usage.

Signed-off-by: John Ogness <john.ogness@linutronix.de>
---
 kernel/printk/printk.c | 30 ++++--------------------------
 1 file changed, 4 insertions(+), 26 deletions(-)

diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 08e079b95652..5a5a685bb128 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -251,11 +251,6 @@ static void __up_console_sem(unsigned long ip)
 static int console_locked, console_suspended;
 
 /*
- * If exclusive_console is non-NULL then only this console is to be printed to.
- */
-static struct console *exclusive_console;
-
-/*
  *	Array of consoles built from command line options (console=)
  */
 
@@ -423,7 +418,6 @@ static u32 log_next_idx;
 /* the next printk record to write to the console */
 static u64 console_seq;
 static u32 console_idx;
-static u64 exclusive_console_stop_seq;
 
 /* the next printk record to read after the last 'clear' command */
 static u64 clear_seq;
@@ -1761,8 +1755,6 @@ static void call_console_drivers(const char *ext_text, size_t ext_len,
 		return;
 
 	for_each_console(con) {
-		if (exclusive_console && con != exclusive_console)
-			continue;
 		if (!(con->flags & CON_ENABLED))
 			continue;
 		if (!con->write)
@@ -2044,7 +2036,6 @@ static u64 syslog_seq;
 static u32 syslog_idx;
 static u64 console_seq;
 static u32 console_idx;
-static u64 exclusive_console_stop_seq;
 static u64 log_first_seq;
 static u32 log_first_idx;
 static u64 log_next_seq;
@@ -2413,12 +2404,6 @@ void console_unlock(void)
 			goto skip;
 		}
 
-		/* Output to all consoles once old messages replayed. */
-		if (unlikely(exclusive_console &&
-			     console_seq >= exclusive_console_stop_seq)) {
-			exclusive_console = NULL;
-		}
-
 		len += msg_print_text(msg,
 				console_msg_format & MSG_FORMAT_SYSLOG,
 				printk_time, text + len, sizeof(text) - len);
@@ -2736,17 +2721,6 @@ void register_console(struct console *newcon)
 		logbuf_lock_irqsave(flags);
 		console_seq = syslog_seq;
 		console_idx = syslog_idx;
-		/*
-		 * We're about to replay the log buffer.  Only do this to the
-		 * just-registered console to avoid excessive message spam to
-		 * the already-registered consoles.
-		 *
-		 * Set exclusive_console with disabled interrupts to reduce
-		 * race window with eventual console_flush_on_panic() that
-		 * ignores console_lock.
-		 */
-		exclusive_console = newcon;
-		exclusive_console_stop_seq = console_seq;
 		logbuf_unlock_irqrestore(flags);
 	}
 	console_unlock();
@@ -2758,6 +2732,10 @@ void register_console(struct console *newcon)
 	 * boot consoles, real consoles, etc - this is to ensure that end
 	 * users know there might be something in the kernel's log buffer that
 	 * went to the bootconsole (that they do not see on the real console)
+	 *
+	 * This message is also important because it will trigger the
+	 * printk kthread to begin dumping the log buffer to the newly
+	 * registered console.
 	 */
 	pr_info("%sconsole [%s%d] enabled\n",
 		(newcon->flags & CON_BOOT) ? "boot" : "" ,
-- 
2.11.0

^ permalink raw reply related

* [RFC PATCH v1 08/25] printk: add ring buffer and kthread
From: John Ogness @ 2019-02-12 14:29 UTC (permalink / raw)
  To: linux-kernel
  Cc: Peter Zijlstra, Petr Mladek, Sergey Senozhatsky, Steven Rostedt,
	Daniel Wang, Andrew Morton, Linus Torvalds, Greg Kroah-Hartman,
	Alan Cox, Jiri Slaby, Peter Feiner, linux-serial,
	Sergey Senozhatsky
In-Reply-To: <20190212143003.48446-1-john.ogness@linutronix.de>

The printk ring buffer provides an NMI-safe interface for writing
messages to a ring buffer. Using such a buffer for alleviates printk
callers from the current burdens of disabled preemption while calling
the console drivers (and possibly printing out many messages that
another task put into the log buffer).

Create a ring buffer to be used for storing messages to be
printed to the consoles.

Create a dedicated printk kthread to block on the ring buffer
and call the console drivers for the read messages.

NOTE: The printk_delay is relocated to _after_ the message is
      printed, where it makes more sense.

Signed-off-by: John Ogness <john.ogness@linutronix.de>
---
 kernel/printk/printk.c | 105 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 105 insertions(+)

diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index d3d170374ceb..08e079b95652 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -44,6 +44,8 @@
 #include <linux/irq_work.h>
 #include <linux/ctype.h>
 #include <linux/uio.h>
+#include <linux/kthread.h>
+#include <linux/printk_ringbuffer.h>
 #include <linux/sched/clock.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task_stack.h>
@@ -397,7 +399,12 @@ DEFINE_RAW_SPINLOCK(logbuf_lock);
 		printk_safe_exit_irqrestore(flags);	\
 	} while (0)
 
+DECLARE_STATIC_PRINTKRB_CPULOCK(printk_cpulock);
+
 #ifdef CONFIG_PRINTK
+/* record buffer */
+DECLARE_STATIC_PRINTKRB(printk_rb, CONFIG_LOG_BUF_SHIFT, &printk_cpulock);
+
 DECLARE_WAIT_QUEUE_HEAD(log_wait);
 /* the next printk record to read by syslog(READ) or /proc/kmsg */
 static u64 syslog_seq;
@@ -744,6 +751,10 @@ static ssize_t msg_print_ext_body(char *buf, size_t size,
 	return p - buf;
 }
 
+#define PRINTK_SPRINT_MAX (LOG_LINE_MAX + PREFIX_MAX)
+#define PRINTK_RECORD_MAX (sizeof(struct printk_log) + \
+				CONSOLE_EXT_LOG_MAX + PRINTK_SPRINT_MAX)
+
 /* /dev/kmsg - userspace message inject/listen interface */
 struct devkmsg_user {
 	u64 seq;
@@ -1566,6 +1577,34 @@ SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len)
 	return do_syslog(type, buf, len, SYSLOG_FROM_READER);
 }
 
+static void format_text(struct printk_log *msg, u64 seq,
+			char *ext_text, size_t *ext_len,
+			char *text, size_t *len, bool time)
+{
+	if (suppress_message_printing(msg->level)) {
+		/*
+		 * Skip record that has level above the console
+		 * loglevel and update each console's local seq.
+		 */
+		*len = 0;
+		*ext_len = 0;
+		return;
+	}
+
+	*len = msg_print_text(msg, console_msg_format & MSG_FORMAT_SYSLOG,
+			      time, text, PRINTK_SPRINT_MAX);
+	if (nr_ext_console_drivers) {
+		*ext_len = msg_print_ext_header(ext_text, CONSOLE_EXT_LOG_MAX,
+						msg, seq);
+		*ext_len += msg_print_ext_body(ext_text + *ext_len,
+					       CONSOLE_EXT_LOG_MAX - *ext_len,
+					       log_dict(msg), msg->dict_len,
+					       log_text(msg), msg->text_len);
+	} else {
+		*ext_len = 0;
+	}
+}
+
 /*
  * Special console_lock variants that help to reduce the risk of soft-lockups.
  * They allow to pass console_lock to another printk() call using a busy wait.
@@ -2899,6 +2938,72 @@ void wake_up_klogd(void)
 	preempt_enable();
 }
 
+static int printk_kthread_func(void *data)
+{
+	struct prb_iterator iter;
+	struct printk_log *msg;
+	size_t ext_len;
+	char *ext_text;
+	u64 master_seq;
+	size_t len;
+	char *text;
+	char *buf;
+	int ret;
+
+	ext_text = kmalloc(CONSOLE_EXT_LOG_MAX, GFP_KERNEL);
+	text = kmalloc(PRINTK_SPRINT_MAX, GFP_KERNEL);
+	buf = kmalloc(PRINTK_RECORD_MAX, GFP_KERNEL);
+	if (!ext_text || !text || !buf)
+		return -1;
+
+	prb_iter_init(&iter, &printk_rb, NULL);
+
+	/* the printk kthread never exits */
+	for (;;) {
+		ret = prb_iter_wait_next(&iter, buf,
+					 PRINTK_RECORD_MAX, &master_seq);
+		if (ret == -ERESTARTSYS) {
+			continue;
+		} else if (ret < 0) {
+			/* iterator invalid, start over */
+			prb_iter_init(&iter, &printk_rb, NULL);
+			continue;
+		}
+
+		msg = (struct printk_log *)buf;
+		format_text(msg, master_seq, ext_text, &ext_len, text,
+			    &len, printk_time);
+
+		console_lock();
+		if (len > 0 || ext_len > 0) {
+			call_console_drivers(ext_text, ext_len, text, len);
+			boot_delay_msec(msg->level);
+			printk_delay();
+		}
+		console_unlock();
+	}
+
+	kfree(ext_text);
+	kfree(text);
+	kfree(buf);
+
+	return 0;
+}
+
+static int __init init_printk_kthread(void)
+{
+	struct task_struct *thread;
+
+	thread = kthread_run(printk_kthread_func, NULL, "printk");
+	if (IS_ERR(thread)) {
+		pr_err("printk: unable to create printing thread\n");
+		return PTR_ERR(thread);
+	}
+
+	return 0;
+}
+late_initcall(init_printk_kthread);
+
 void defer_console_output(void)
 {
 	preempt_disable();
-- 
2.11.0

^ permalink raw reply related

* [RFC PATCH v1 07/25] printk-rb: add functionality required by printk
From: John Ogness @ 2019-02-12 14:29 UTC (permalink / raw)
  To: linux-kernel
  Cc: Peter Zijlstra, Petr Mladek, Sergey Senozhatsky, Steven Rostedt,
	Daniel Wang, Andrew Morton, Linus Torvalds, Greg Kroah-Hartman,
	Alan Cox, Jiri Slaby, Peter Feiner, linux-serial,
	Sergey Senozhatsky
In-Reply-To: <20190212143003.48446-1-john.ogness@linutronix.de>

The printk subsystem needs to be able to query the size of the ring
buffer, seek to specific entries within the ring buffer, and track
if records could not be stored in the ring buffer.

Signed-off-by: John Ogness <john.ogness@linutronix.de>
---
 include/linux/printk_ringbuffer.h |  5 +++
 lib/printk_ringbuffer.c           | 95 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 100 insertions(+)

diff --git a/include/linux/printk_ringbuffer.h b/include/linux/printk_ringbuffer.h
index 106f20ef8b4d..ec3d7ceec378 100644
--- a/include/linux/printk_ringbuffer.h
+++ b/include/linux/printk_ringbuffer.h
@@ -17,6 +17,7 @@ struct printk_ringbuffer {
 	unsigned int size_bits;
 
 	u64 seq;
+	atomic_long_t lost;
 
 	atomic_long_t tail;
 	atomic_long_t head;
@@ -78,6 +79,7 @@ static struct printk_ringbuffer name = {				\
 	.buffer = &_##name##_buffer[0],					\
 	.size_bits = szbits,						\
 	.seq = 0,							\
+	.lost = ATOMIC_LONG_INIT(0),					\
 	.tail = ATOMIC_LONG_INIT(-111 * sizeof(long)),			\
 	.head = ATOMIC_LONG_INIT(-111 * sizeof(long)),			\
 	.reserve = ATOMIC_LONG_INIT(-111 * sizeof(long)),		\
@@ -100,9 +102,12 @@ void prb_iter_copy(struct prb_iterator *dest, struct prb_iterator *src);
 int prb_iter_next(struct prb_iterator *iter, char *buf, int size, u64 *seq);
 int prb_iter_wait_next(struct prb_iterator *iter, char *buf, int size,
 		       u64 *seq);
+int prb_iter_seek(struct prb_iterator *iter, u64 seq);
 int prb_iter_data(struct prb_iterator *iter, char *buf, int size, u64 *seq);
 
 /* utility functions */
+int prb_buffer_size(struct printk_ringbuffer *rb);
+void prb_inc_lost(struct printk_ringbuffer *rb);
 void prb_lock(struct prb_cpulock *cpu_lock, unsigned int *cpu_store);
 void prb_unlock(struct prb_cpulock *cpu_lock, unsigned int cpu_store);
 
diff --git a/lib/printk_ringbuffer.c b/lib/printk_ringbuffer.c
index c2ddf4cb9f92..ce33b5add5a1 100644
--- a/lib/printk_ringbuffer.c
+++ b/lib/printk_ringbuffer.c
@@ -175,11 +175,16 @@ void prb_commit(struct prb_handle *h)
 				head = PRB_WRAP_LPOS(rb, head, 1);
 				continue;
 			}
+			while (atomic_long_read(&rb->lost)) {
+				atomic_long_dec(&rb->lost);
+				rb->seq++;
+			}
 			e->seq = ++rb->seq;
 			head += e->size;
 			changed = true;
 		}
 		atomic_long_set_release(&rb->head, res);
+
 		atomic_dec(&rb->ctx);
 
 		if (atomic_long_read(&rb->reserve) == res)
@@ -486,3 +491,93 @@ int prb_iter_wait_next(struct prb_iterator *iter, char *buf, int size, u64 *seq)
 
 	return ret;
 }
+
+/*
+ * prb_iter_seek: Seek forward to a specific record.
+ * @iter: Iterator to advance.
+ * @seq: Record number to advance to.
+ *
+ * Advance @iter such that a following call to prb_iter_data() will provide
+ * the contents of the specified record. If a record is specified that does
+ * not yet exist, advance @iter to the end of the record list.
+ *
+ * Note that iterators cannot be rewound. So if a record is requested that
+ * exists but is previous to @iter in position, @iter is considered invalid.
+ *
+ * It is safe to call this function from any context and state.
+ *
+ * Returns 1 on succces, 0 if specified record does not yet exist (@iter is
+ * now at the end of the list), or -EINVAL if @iter is now invalid.
+ */
+int prb_iter_seek(struct prb_iterator *iter, u64 seq)
+{
+	u64 cur_seq;
+	int ret;
+
+	/* first check if the iterator is already at the wanted seq */
+	if (seq == 0) {
+		if (iter->lpos == PRB_INIT)
+			return 1;
+		else
+			return -EINVAL;
+	}
+	if (iter->lpos != PRB_INIT) {
+		if (prb_iter_data(iter, NULL, 0, &cur_seq) >= 0) {
+			if (cur_seq == seq)
+				return 1;
+			if (cur_seq > seq)
+				return -EINVAL;
+		}
+	}
+
+	/* iterate to find the wanted seq */
+	for (;;) {
+		ret = prb_iter_next(iter, NULL, 0, &cur_seq);
+		if (ret <= 0)
+			break;
+
+		if (cur_seq == seq)
+			break;
+
+		if (cur_seq > seq) {
+			ret = -EINVAL;
+			break;
+		}
+	}
+
+	return ret;
+}
+
+/*
+ * prb_buffer_size: Get the size of the ring buffer.
+ * @rb: The ring buffer to get the size of.
+ *
+ * Return the number of bytes used for the ring buffer entry storage area.
+ * Note that this area stores both entry header and entry data. Therefore
+ * this represents an upper bound to the amount of data that can be stored
+ * in the ring buffer.
+ *
+ * It is safe to call this function from any context and state.
+ *
+ * Returns the size in bytes of the entry storage area.
+ */
+int prb_buffer_size(struct printk_ringbuffer *rb)
+{
+	return PRB_SIZE(rb);
+}
+
+/*
+ * prb_inc_lost: Increment the seq counter to signal a lost record.
+ * @rb: The ring buffer to increment the seq of.
+ *
+ * Increment the seq counter so that a seq number is intentially missing
+ * for the readers. This allows readers to identify that a record is
+ * missing. A writer will typically use this function if prb_reserve()
+ * fails.
+ *
+ * It is safe to call this function from any context and state.
+ */
+void prb_inc_lost(struct printk_ringbuffer *rb)
+{
+	atomic_long_inc(&rb->lost);
+}
-- 
2.11.0

^ permalink raw reply related

* [RFC PATCH v1 06/25] printk-rb: add blocking reader support
From: John Ogness @ 2019-02-12 14:29 UTC (permalink / raw)
  To: linux-kernel
  Cc: Peter Zijlstra, Petr Mladek, Sergey Senozhatsky, Steven Rostedt,
	Daniel Wang, Andrew Morton, Linus Torvalds, Greg Kroah-Hartman,
	Alan Cox, Jiri Slaby, Peter Feiner, linux-serial,
	Sergey Senozhatsky
In-Reply-To: <20190212143003.48446-1-john.ogness@linutronix.de>

Add a blocking read function for readers. An irq_work function is
used to signal the wait queue so that write notification can
be triggered from any context.

Signed-off-by: John Ogness <john.ogness@linutronix.de>
---
 include/linux/printk_ringbuffer.h | 20 ++++++++++++++++
 lib/printk_ringbuffer.c           | 49 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 69 insertions(+)

diff --git a/include/linux/printk_ringbuffer.h b/include/linux/printk_ringbuffer.h
index 5fdaf632c111..106f20ef8b4d 100644
--- a/include/linux/printk_ringbuffer.h
+++ b/include/linux/printk_ringbuffer.h
@@ -2,8 +2,10 @@
 #ifndef _LINUX_PRINTK_RINGBUFFER_H
 #define _LINUX_PRINTK_RINGBUFFER_H
 
+#include <linux/irq_work.h>
 #include <linux/atomic.h>
 #include <linux/percpu.h>
+#include <linux/wait.h>
 
 struct prb_cpulock {
 	atomic_t owner;
@@ -22,6 +24,10 @@ struct printk_ringbuffer {
 
 	struct prb_cpulock *cpulock;
 	atomic_t ctx;
+
+	struct wait_queue_head *wq;
+	atomic_long_t wq_counter;
+	struct irq_work *wq_work;
 };
 
 struct prb_entry {
@@ -59,6 +65,15 @@ struct prb_iterator {
 #define DECLARE_STATIC_PRINTKRB(name, szbits, cpulockptr)		\
 static char _##name##_buffer[1 << (szbits)]				\
 	__aligned(__alignof__(long));					\
+static DECLARE_WAIT_QUEUE_HEAD(_##name##_wait);				\
+static void _##name##_wake_work_func(struct irq_work *irq_work)		\
+{									\
+	wake_up_interruptible_all(&_##name##_wait);			\
+}									\
+static struct irq_work _##name##_wake_work = {				\
+	.func = _##name##_wake_work_func,				\
+	.flags = IRQ_WORK_LAZY,						\
+};									\
 static struct printk_ringbuffer name = {				\
 	.buffer = &_##name##_buffer[0],					\
 	.size_bits = szbits,						\
@@ -68,6 +83,9 @@ static struct printk_ringbuffer name = {				\
 	.reserve = ATOMIC_LONG_INIT(-111 * sizeof(long)),		\
 	.cpulock = cpulockptr,						\
 	.ctx = ATOMIC_INIT(0),						\
+	.wq = &_##name##_wait,						\
+	.wq_counter = ATOMIC_LONG_INIT(0),				\
+	.wq_work = &_##name##_wake_work,				\
 }
 
 /* writer interface */
@@ -80,6 +98,8 @@ void prb_iter_init(struct prb_iterator *iter, struct printk_ringbuffer *rb,
 		   u64 *seq);
 void prb_iter_copy(struct prb_iterator *dest, struct prb_iterator *src);
 int prb_iter_next(struct prb_iterator *iter, char *buf, int size, u64 *seq);
+int prb_iter_wait_next(struct prb_iterator *iter, char *buf, int size,
+		       u64 *seq);
 int prb_iter_data(struct prb_iterator *iter, char *buf, int size, u64 *seq);
 
 /* utility functions */
diff --git a/lib/printk_ringbuffer.c b/lib/printk_ringbuffer.c
index 1d1e886a0966..c2ddf4cb9f92 100644
--- a/lib/printk_ringbuffer.c
+++ b/lib/printk_ringbuffer.c
@@ -1,4 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
+#include <linux/sched.h>
 #include <linux/smp.h>
 #include <linux/string.h>
 #include <linux/errno.h>
@@ -154,6 +155,7 @@ static bool push_tail(struct printk_ringbuffer *rb, unsigned long tail)
 void prb_commit(struct prb_handle *h)
 {
 	struct printk_ringbuffer *rb = h->rb;
+	bool changed = false;
 	struct prb_entry *e;
 	unsigned long head;
 	unsigned long res;
@@ -175,6 +177,7 @@ void prb_commit(struct prb_handle *h)
 			}
 			e->seq = ++rb->seq;
 			head += e->size;
+			changed = true;
 		}
 		atomic_long_set_release(&rb->head, res);
 		atomic_dec(&rb->ctx);
@@ -185,6 +188,12 @@ void prb_commit(struct prb_handle *h)
 	}
 
 	prb_unlock(rb->cpulock, h->cpu);
+
+	if (changed) {
+		atomic_long_inc(&rb->wq_counter);
+		if (wq_has_sleeper(rb->wq))
+			irq_work_queue(rb->wq_work);
+	}
 }
 
 /*
@@ -437,3 +446,43 @@ int prb_iter_next(struct prb_iterator *iter, char *buf, int size, u64 *seq)
 
 	return 1;
 }
+
+/*
+ * prb_iter_wait_next: Advance to the next record, blocking if none available.
+ * @iter: Iterator tracking the current position.
+ * @buf: A buffer to store the data of the next record. May be NULL.
+ * @size: The size of @buf. (Ignored if @buf is NULL.)
+ * @seq: The sequence number of the next record. May be NULL.
+ *
+ * If a next record is already available, this function works like
+ * prb_iter_next(). Otherwise block interruptible until a next record is
+ * available.
+ *
+ * When a next record is available, @iter is advanced and (if specified)
+ * the data and/or sequence number of that record are provided.
+ *
+ * This function might sleep.
+ *
+ * Returns 1 if @iter was advanced, -EINVAL if @iter is now invalid, or
+ * -ERESTARTSYS if interrupted by a signal.
+ */
+int prb_iter_wait_next(struct prb_iterator *iter, char *buf, int size, u64 *seq)
+{
+	unsigned long last_seen;
+	int ret;
+
+	for (;;) {
+		last_seen = atomic_long_read(&iter->rb->wq_counter);
+
+		ret = prb_iter_next(iter, buf, size, seq);
+		if (ret != 0)
+			break;
+
+		ret = wait_event_interruptible(*iter->rb->wq,
+			last_seen != atomic_long_read(&iter->rb->wq_counter));
+		if (ret < 0)
+			break;
+	}
+
+	return ret;
+}
-- 
2.11.0

^ permalink raw reply related

* [RFC PATCH v1 05/25] printk-rb: add basic non-blocking reading interface
From: John Ogness @ 2019-02-12 14:29 UTC (permalink / raw)
  To: linux-kernel
  Cc: Peter Zijlstra, Petr Mladek, Sergey Senozhatsky, Steven Rostedt,
	Daniel Wang, Andrew Morton, Linus Torvalds, Greg Kroah-Hartman,
	Alan Cox, Jiri Slaby, Peter Feiner, linux-serial,
	Sergey Senozhatsky
In-Reply-To: <20190212143003.48446-1-john.ogness@linutronix.de>

Add reader iterator static declaration/initializer, dynamic
initializer, and functions to iterate and retrieve ring buffer data.

Signed-off-by: John Ogness <john.ogness@linutronix.de>
---
 include/linux/printk_ringbuffer.h |  20 ++++
 lib/printk_ringbuffer.c           | 190 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 210 insertions(+)

diff --git a/include/linux/printk_ringbuffer.h b/include/linux/printk_ringbuffer.h
index 1aec9d5666b1..5fdaf632c111 100644
--- a/include/linux/printk_ringbuffer.h
+++ b/include/linux/printk_ringbuffer.h
@@ -43,6 +43,19 @@ static struct prb_cpulock name = {					\
 	.irqflags = &_##name##_percpu_irqflags,				\
 }
 
+#define PRB_INIT ((unsigned long)-1)
+
+#define DECLARE_STATIC_PRINTKRB_ITER(name, rbaddr)			\
+static struct prb_iterator name = {					\
+	.rb = rbaddr,							\
+	.lpos = PRB_INIT,						\
+}
+
+struct prb_iterator {
+	struct printk_ringbuffer *rb;
+	unsigned long lpos;
+};
+
 #define DECLARE_STATIC_PRINTKRB(name, szbits, cpulockptr)		\
 static char _##name##_buffer[1 << (szbits)]				\
 	__aligned(__alignof__(long));					\
@@ -62,6 +75,13 @@ char *prb_reserve(struct prb_handle *h, struct printk_ringbuffer *rb,
 		  unsigned int size);
 void prb_commit(struct prb_handle *h);
 
+/* reader interface */
+void prb_iter_init(struct prb_iterator *iter, struct printk_ringbuffer *rb,
+		   u64 *seq);
+void prb_iter_copy(struct prb_iterator *dest, struct prb_iterator *src);
+int prb_iter_next(struct prb_iterator *iter, char *buf, int size, u64 *seq);
+int prb_iter_data(struct prb_iterator *iter, char *buf, int size, u64 *seq);
+
 /* utility functions */
 void prb_lock(struct prb_cpulock *cpu_lock, unsigned int *cpu_store);
 void prb_unlock(struct prb_cpulock *cpu_lock, unsigned int cpu_store);
diff --git a/lib/printk_ringbuffer.c b/lib/printk_ringbuffer.c
index 90c7f9a9f861..1d1e886a0966 100644
--- a/lib/printk_ringbuffer.c
+++ b/lib/printk_ringbuffer.c
@@ -1,5 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/smp.h>
+#include <linux/string.h>
+#include <linux/errno.h>
 #include <linux/printk_ringbuffer.h>
 
 #define PRB_SIZE(rb) (1 << rb->size_bits)
@@ -8,6 +10,7 @@
 #define PRB_WRAPS(rb, lpos) (lpos >> rb->size_bits)
 #define PRB_WRAP_LPOS(rb, lpos, xtra) \
 	((PRB_WRAPS(rb, lpos) + xtra) << rb->size_bits)
+#define PRB_DATA_SIZE(e) (e->size - sizeof(struct prb_entry))
 #define PRB_DATA_ALIGN sizeof(long)
 
 static bool __prb_trylock(struct prb_cpulock *cpu_lock,
@@ -247,3 +250,190 @@ char *prb_reserve(struct prb_handle *h, struct printk_ringbuffer *rb,
 
 	return &h->entry->data[0];
 }
+
+/*
+ * prb_iter_copy: Copy an iterator.
+ * @dest: The iterator to copy to.
+ * @src: The iterator to copy from.
+ *
+ * Make a deep copy of an iterator. This is particularly useful for making
+ * backup copies of an iterator in case a form of rewinding it needed.
+ *
+ * It is safe to call this function from any context and state. But
+ * note that this function is not atomic. Callers should not make copies
+ * to/from iterators that can be accessed by other tasks/contexts.
+ */
+void prb_iter_copy(struct prb_iterator *dest, struct prb_iterator *src)
+{
+	memcpy(dest, src, sizeof(*dest));
+}
+
+/*
+ * prb_iter_init: Initialize an iterator for a ring buffer.
+ * @iter: The iterator to initialize.
+ * @rb: A ring buffer to that @iter should iterate.
+ * @seq: The sequence number of the position preceding the first record.
+ *       May be NULL.
+ *
+ * Initialize an iterator to be used with a specified ring buffer. If @seq
+ * is non-NULL, it will be set such that prb_iter_next() will provide a
+ * sequence value of "@seq + 1" if no records were missed.
+ *
+ * It is safe to call this function from any context and state.
+ */
+void prb_iter_init(struct prb_iterator *iter, struct printk_ringbuffer *rb,
+		   u64 *seq)
+{
+	memset(iter, 0, sizeof(*iter));
+	iter->rb = rb;
+	iter->lpos = PRB_INIT;
+
+	if (!seq)
+		return;
+
+	for (;;) {
+		struct prb_iterator tmp_iter;
+		int ret;
+
+		prb_iter_copy(&tmp_iter, iter);
+
+		ret = prb_iter_next(&tmp_iter, NULL, 0, seq);
+		if (ret < 0)
+			continue;
+
+		if (ret == 0)
+			*seq = 0;
+		else
+			(*seq)--;
+		break;
+	}
+}
+
+static bool is_valid(struct printk_ringbuffer *rb, unsigned long lpos)
+{
+	unsigned long head, tail;
+
+	tail = atomic_long_read(&rb->tail);
+	head = atomic_long_read(&rb->head);
+	head -= tail;
+	lpos -= tail;
+
+	if (lpos >= head)
+		return false;
+	return true;
+}
+
+/*
+ * prb_iter_data: Retrieve the record data at the current position.
+ * @iter: Iterator tracking the current position.
+ * @buf: A buffer to store the data of the record. May be NULL.
+ * @size: The size of @buf. (Ignored if @buf is NULL.)
+ * @seq: The sequence number of the record. May be NULL.
+ *
+ * If @iter is at a record, provide the data and/or sequence number of that
+ * record (if specified by the caller).
+ *
+ * It is safe to call this function from any context and state.
+ *
+ * Returns >=0 if the current record contains valid data (returns 0 if @buf
+ * is NULL or returns the size of the data block if @buf is non-NULL) or
+ * -EINVAL if @iter is now invalid.
+ */
+int prb_iter_data(struct prb_iterator *iter, char *buf, int size, u64 *seq)
+{
+	struct printk_ringbuffer *rb = iter->rb;
+	unsigned long lpos = iter->lpos;
+	unsigned int datsize = 0;
+	struct prb_entry *e;
+
+	if (buf || seq) {
+		e = to_entry(rb, lpos);
+		if (!is_valid(rb, lpos))
+			return -EINVAL;
+		/* memory barrier to ensure valid lpos */
+		smp_rmb();
+		if (buf) {
+			datsize = PRB_DATA_SIZE(e);
+			/* memory barrier to ensure load of datsize */
+			smp_rmb();
+			if (!is_valid(rb, lpos))
+				return -EINVAL;
+			if (PRB_INDEX(rb, lpos) + datsize >
+			    PRB_SIZE(rb) - PRB_DATA_ALIGN) {
+				return -EINVAL;
+			}
+			if (size > datsize)
+				size = datsize;
+			memcpy(buf, &e->data[0], size);
+		}
+		if (seq)
+			*seq = e->seq;
+		/* memory barrier to ensure loads of entry data */
+		smp_rmb();
+	}
+
+	if (!is_valid(rb, lpos))
+		return -EINVAL;
+
+	return datsize;
+}
+
+/*
+ * prb_iter_next: Advance to the next record.
+ * @iter: Iterator tracking the current position.
+ * @buf: A buffer to store the data of the next record. May be NULL.
+ * @size: The size of @buf. (Ignored if @buf is NULL.)
+ * @seq: The sequence number of the next record. May be NULL.
+ *
+ * If a next record is available, @iter is advanced and (if specified)
+ * the data and/or sequence number of that record are provided.
+ *
+ * It is safe to call this function from any context and state.
+ *
+ * Returns 1 if @iter was advanced, 0 if @iter is at the end of the list, or
+ * -EINVAL if @iter is now invalid.
+ */
+int prb_iter_next(struct prb_iterator *iter, char *buf, int size, u64 *seq)
+{
+	struct printk_ringbuffer *rb = iter->rb;
+	unsigned long next_lpos;
+	struct prb_entry *e;
+	unsigned int esize;
+
+	if (iter->lpos == PRB_INIT) {
+		next_lpos = atomic_long_read(&rb->tail);
+	} else {
+		if (!is_valid(rb, iter->lpos))
+			return -EINVAL;
+		/* memory barrier to ensure valid lpos */
+		smp_rmb();
+		e = to_entry(rb, iter->lpos);
+		esize = e->size;
+		/* memory barrier to ensure load of size */
+		smp_rmb();
+		if (!is_valid(rb, iter->lpos))
+			return -EINVAL;
+		next_lpos = iter->lpos + esize;
+	}
+	if (next_lpos == atomic_long_read(&rb->head))
+		return 0;
+	if (!is_valid(rb, next_lpos))
+		return -EINVAL;
+	/* memory barrier to ensure valid lpos */
+	smp_rmb();
+
+	iter->lpos = next_lpos;
+	e = to_entry(rb, iter->lpos);
+	esize = e->size;
+	/* memory barrier to ensure load of size */
+	smp_rmb();
+	if (!is_valid(rb, iter->lpos))
+		return -EINVAL;
+	if (esize == -1)
+		iter->lpos = PRB_WRAP_LPOS(rb, iter->lpos, 1);
+
+	if (prb_iter_data(iter, buf, size, seq) < 0)
+		return -EINVAL;
+
+	return 1;
+}
-- 
2.11.0

^ permalink raw reply related

* [RFC PATCH v1 04/25] printk-rb: add writer interface
From: John Ogness @ 2019-02-12 14:29 UTC (permalink / raw)
  To: linux-kernel
  Cc: Peter Zijlstra, Petr Mladek, Sergey Senozhatsky, Steven Rostedt,
	Daniel Wang, Andrew Morton, Linus Torvalds, Greg Kroah-Hartman,
	Alan Cox, Jiri Slaby, Peter Feiner, linux-serial,
	Sergey Senozhatsky
In-Reply-To: <20190212143003.48446-1-john.ogness@linutronix.de>

Add the writer functions prb_reserve() and prb_commit(). These make
use of processor-reentrant spin locks to limit the number of possible
interruption scenarios for the writers.

Signed-off-by: John Ogness <john.ogness@linutronix.de>
---
 include/linux/printk_ringbuffer.h |  17 ++++
 lib/printk_ringbuffer.c           | 172 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 189 insertions(+)

diff --git a/include/linux/printk_ringbuffer.h b/include/linux/printk_ringbuffer.h
index 0e6e8dd0d01e..1aec9d5666b1 100644
--- a/include/linux/printk_ringbuffer.h
+++ b/include/linux/printk_ringbuffer.h
@@ -24,6 +24,18 @@ struct printk_ringbuffer {
 	atomic_t ctx;
 };
 
+struct prb_entry {
+	unsigned int size;
+	u64 seq;
+	char data[0];
+};
+
+struct prb_handle {
+	struct printk_ringbuffer *rb;
+	unsigned int cpu;
+	struct prb_entry *entry;
+};
+
 #define DECLARE_STATIC_PRINTKRB_CPULOCK(name)				\
 static DEFINE_PER_CPU(unsigned long, _##name##_percpu_irqflags);	\
 static struct prb_cpulock name = {					\
@@ -45,6 +57,11 @@ static struct printk_ringbuffer name = {				\
 	.ctx = ATOMIC_INIT(0),						\
 }
 
+/* writer interface */
+char *prb_reserve(struct prb_handle *h, struct printk_ringbuffer *rb,
+		  unsigned int size);
+void prb_commit(struct prb_handle *h);
+
 /* utility functions */
 void prb_lock(struct prb_cpulock *cpu_lock, unsigned int *cpu_store);
 void prb_unlock(struct prb_cpulock *cpu_lock, unsigned int cpu_store);
diff --git a/lib/printk_ringbuffer.c b/lib/printk_ringbuffer.c
index 28958b0cf774..90c7f9a9f861 100644
--- a/lib/printk_ringbuffer.c
+++ b/lib/printk_ringbuffer.c
@@ -2,6 +2,14 @@
 #include <linux/smp.h>
 #include <linux/printk_ringbuffer.h>
 
+#define PRB_SIZE(rb) (1 << rb->size_bits)
+#define PRB_SIZE_BITMASK(rb) (PRB_SIZE(rb) - 1)
+#define PRB_INDEX(rb, lpos) (lpos & PRB_SIZE_BITMASK(rb))
+#define PRB_WRAPS(rb, lpos) (lpos >> rb->size_bits)
+#define PRB_WRAP_LPOS(rb, lpos, xtra) \
+	((PRB_WRAPS(rb, lpos) + xtra) << rb->size_bits)
+#define PRB_DATA_ALIGN sizeof(long)
+
 static bool __prb_trylock(struct prb_cpulock *cpu_lock,
 			  unsigned int *cpu_store)
 {
@@ -75,3 +83,167 @@ void prb_unlock(struct prb_cpulock *cpu_lock, unsigned int cpu_store)
 
 	put_cpu();
 }
+
+static struct prb_entry *to_entry(struct printk_ringbuffer *rb,
+				  unsigned long lpos)
+{
+	char *buffer = rb->buffer;
+	buffer += PRB_INDEX(rb, lpos);
+	return (struct prb_entry *)buffer;
+}
+
+static int calc_next(struct printk_ringbuffer *rb, unsigned long tail,
+		     unsigned long lpos, int size, unsigned long *calced_next)
+{
+	unsigned long next_lpos;
+	int ret = 0;
+again:
+	next_lpos = lpos + size;
+	if (next_lpos - tail > PRB_SIZE(rb))
+		return -1;
+
+	if (PRB_WRAPS(rb, lpos) != PRB_WRAPS(rb, next_lpos)) {
+		lpos = PRB_WRAP_LPOS(rb, next_lpos, 0);
+		ret |= 1;
+		goto again;
+	}
+
+	*calced_next = next_lpos;
+	return ret;
+}
+
+static bool push_tail(struct printk_ringbuffer *rb, unsigned long tail)
+{
+	unsigned long new_tail;
+	struct prb_entry *e;
+	unsigned long head;
+
+	if (tail != atomic_long_read(&rb->tail))
+		return true;
+
+	e = to_entry(rb, tail);
+	if (e->size != -1)
+		new_tail = tail + e->size;
+	else
+		new_tail = PRB_WRAP_LPOS(rb, tail, 1);
+
+	/* make sure the new tail does not overtake the head */
+	head = atomic_long_read(&rb->head);
+	if (head - new_tail > PRB_SIZE(rb))
+		return false;
+
+	atomic_long_cmpxchg(&rb->tail, tail, new_tail);
+	return true;
+}
+
+/*
+ * prb_commit: Commit a reserved entry to the ring buffer.
+ * @h: An entry handle referencing the data entry to commit.
+ *
+ * Commit data that has been reserved using prb_reserve(). Once the data
+ * block has been committed, it can be invalidated at any time. If a writer
+ * is interested in using the data after committing, the writer should make
+ * its own copy first or use the prb_iter_ reader functions to access the
+ * data in the ring buffer.
+ *
+ * It is safe to call this function from any context and state.
+ */
+void prb_commit(struct prb_handle *h)
+{
+	struct printk_ringbuffer *rb = h->rb;
+	struct prb_entry *e;
+	unsigned long head;
+	unsigned long res;
+
+	for (;;) {
+		if (atomic_read(&rb->ctx) != 1) {
+			/* the interrupted context will fixup head */
+			atomic_dec(&rb->ctx);
+			break;
+		}
+		/* assign sequence numbers before moving head */
+		head = atomic_long_read(&rb->head);
+		res = atomic_long_read(&rb->reserve);
+		while (head != res) {
+			e = to_entry(rb, head);
+			if (e->size == -1) {
+				head = PRB_WRAP_LPOS(rb, head, 1);
+				continue;
+			}
+			e->seq = ++rb->seq;
+			head += e->size;
+		}
+		atomic_long_set_release(&rb->head, res);
+		atomic_dec(&rb->ctx);
+
+		if (atomic_long_read(&rb->reserve) == res)
+			break;
+		atomic_inc(&rb->ctx);
+	}
+
+	prb_unlock(rb->cpulock, h->cpu);
+}
+
+/*
+ * prb_reserve: Reserve an entry within a ring buffer.
+ * @h: An entry handle to be setup and reference an entry.
+ * @rb: A ring buffer to reserve data within.
+ * @size: The number of bytes to reserve.
+ *
+ * Reserve an entry of at least @size bytes to be used by the caller. If
+ * successful, the data region of the entry belongs to the caller and cannot
+ * be invalidated by any other task/context. For this reason, the caller
+ * should call prb_commit() as quickly as possible in order to avoid preventing
+ * other tasks/contexts from reserving data in the case that the ring buffer
+ * has wrapped.
+ *
+ * It is safe to call this function from any context and state.
+ *
+ * Returns a pointer to the reserved entry (and @h is setup to reference that
+ * entry) or NULL if it was not possible to reserve data.
+ */
+char *prb_reserve(struct prb_handle *h, struct printk_ringbuffer *rb,
+		  unsigned int size)
+{
+	unsigned long tail, res1, res2;
+	int ret;
+
+	if (size == 0)
+		return NULL;
+	size += sizeof(struct prb_entry);
+	size += PRB_DATA_ALIGN - 1;
+	size &= ~(PRB_DATA_ALIGN - 1);
+	if (size >= PRB_SIZE(rb))
+		return NULL;
+
+	h->rb = rb;
+	prb_lock(rb->cpulock, &h->cpu);
+
+	atomic_inc(&rb->ctx);
+
+	do {
+		for (;;) {
+			tail = atomic_long_read(&rb->tail);
+			res1 = atomic_long_read(&rb->reserve);
+			ret = calc_next(rb, tail, res1, size, &res2);
+			if (ret >= 0)
+				break;
+			if (!push_tail(rb, tail)) {
+				prb_commit(h);
+				return NULL;
+			}
+		}
+	} while (!atomic_long_try_cmpxchg_acquire(&rb->reserve, &res1, res2));
+
+	h->entry = to_entry(rb, res1);
+
+	if (ret) {
+		/* handle wrap */
+		h->entry->size = -1;
+		h->entry = to_entry(rb, PRB_WRAP_LPOS(rb, res2, 0));
+	}
+
+	h->entry->size = size;
+
+	return &h->entry->data[0];
+}
-- 
2.11.0

^ permalink raw reply related

* [RFC PATCH v1 03/25] printk-rb: define ring buffer struct and initializer
From: John Ogness @ 2019-02-12 14:29 UTC (permalink / raw)
  To: linux-kernel
  Cc: Peter Zijlstra, Petr Mladek, Sergey Senozhatsky, Steven Rostedt,
	Daniel Wang, Andrew Morton, Linus Torvalds, Greg Kroah-Hartman,
	Alan Cox, Jiri Slaby, Peter Feiner, linux-serial,
	Sergey Senozhatsky
In-Reply-To: <20190212143003.48446-1-john.ogness@linutronix.de>

See Documentation/printk-ringbuffer.txt for details about the
initializer arguments.

Signed-off-by: John Ogness <john.ogness@linutronix.de>
---
 include/linux/printk_ringbuffer.h | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/include/linux/printk_ringbuffer.h b/include/linux/printk_ringbuffer.h
index 75f5708ea902..0e6e8dd0d01e 100644
--- a/include/linux/printk_ringbuffer.h
+++ b/include/linux/printk_ringbuffer.h
@@ -10,6 +10,20 @@ struct prb_cpulock {
 	unsigned long __percpu *irqflags;
 };
 
+struct printk_ringbuffer {
+	void *buffer;
+	unsigned int size_bits;
+
+	u64 seq;
+
+	atomic_long_t tail;
+	atomic_long_t head;
+	atomic_long_t reserve;
+
+	struct prb_cpulock *cpulock;
+	atomic_t ctx;
+};
+
 #define DECLARE_STATIC_PRINTKRB_CPULOCK(name)				\
 static DEFINE_PER_CPU(unsigned long, _##name##_percpu_irqflags);	\
 static struct prb_cpulock name = {					\
@@ -17,6 +31,20 @@ static struct prb_cpulock name = {					\
 	.irqflags = &_##name##_percpu_irqflags,				\
 }
 
+#define DECLARE_STATIC_PRINTKRB(name, szbits, cpulockptr)		\
+static char _##name##_buffer[1 << (szbits)]				\
+	__aligned(__alignof__(long));					\
+static struct printk_ringbuffer name = {				\
+	.buffer = &_##name##_buffer[0],					\
+	.size_bits = szbits,						\
+	.seq = 0,							\
+	.tail = ATOMIC_LONG_INIT(-111 * sizeof(long)),			\
+	.head = ATOMIC_LONG_INIT(-111 * sizeof(long)),			\
+	.reserve = ATOMIC_LONG_INIT(-111 * sizeof(long)),		\
+	.cpulock = cpulockptr,						\
+	.ctx = ATOMIC_INIT(0),						\
+}
+
 /* utility functions */
 void prb_lock(struct prb_cpulock *cpu_lock, unsigned int *cpu_store);
 void prb_unlock(struct prb_cpulock *cpu_lock, unsigned int cpu_store);
-- 
2.11.0

^ permalink raw reply related

* [RFC PATCH v1 02/25] printk-rb: add prb locking functions
From: John Ogness @ 2019-02-12 14:29 UTC (permalink / raw)
  To: linux-kernel
  Cc: Peter Zijlstra, Petr Mladek, Sergey Senozhatsky, Steven Rostedt,
	Daniel Wang, Andrew Morton, Linus Torvalds, Greg Kroah-Hartman,
	Alan Cox, Jiri Slaby, Peter Feiner, linux-serial,
	Sergey Senozhatsky
In-Reply-To: <20190212143003.48446-1-john.ogness@linutronix.de>

Add processor-reentrant spin locking functions. These allow
restricting the number of possible contexts to 2, which can simplify
implementing code that also supports NMI interruptions.

    prb_lock();

    /*
     * This code is synchronized with all contexts
     * except an NMI on the same processor.
     */

    prb_unlock();

In order to support printk's emergency messages, a
processor-reentrant spin lock will be used to control raw access to
the emergency console. However, it must be the same
processor-reentrant spin lock as the one used by the ring buffer,
otherwise a deadlock can occur:

    CPU1: printk lock -> emergency -> serial lock
    CPU2: serial lock -> printk lock

By making the processor-reentrant implemtation available externally,
printk can use the same atomic_t for the ring buffer as for the
emergency console and thus avoid the above deadlock.

Signed-off-by: John Ogness <john.ogness@linutronix.de>
---
 include/linux/printk_ringbuffer.h | 24 ++++++++++++
 lib/Makefile                      |  2 +-
 lib/printk_ringbuffer.c           | 77 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 102 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/printk_ringbuffer.h
 create mode 100644 lib/printk_ringbuffer.c

diff --git a/include/linux/printk_ringbuffer.h b/include/linux/printk_ringbuffer.h
new file mode 100644
index 000000000000..75f5708ea902
--- /dev/null
+++ b/include/linux/printk_ringbuffer.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_PRINTK_RINGBUFFER_H
+#define _LINUX_PRINTK_RINGBUFFER_H
+
+#include <linux/atomic.h>
+#include <linux/percpu.h>
+
+struct prb_cpulock {
+	atomic_t owner;
+	unsigned long __percpu *irqflags;
+};
+
+#define DECLARE_STATIC_PRINTKRB_CPULOCK(name)				\
+static DEFINE_PER_CPU(unsigned long, _##name##_percpu_irqflags);	\
+static struct prb_cpulock name = {					\
+	.owner = ATOMIC_INIT(-1),					\
+	.irqflags = &_##name##_percpu_irqflags,				\
+}
+
+/* utility functions */
+void prb_lock(struct prb_cpulock *cpu_lock, unsigned int *cpu_store);
+void prb_unlock(struct prb_cpulock *cpu_lock, unsigned int cpu_store);
+
+#endif /*_LINUX_PRINTK_RINGBUFFER_H */
diff --git a/lib/Makefile b/lib/Makefile
index e1b59da71418..77a20bfd232e 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -19,7 +19,7 @@ KCOV_INSTRUMENT_dynamic_debug.o := n
 
 lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 rbtree.o radix-tree.o timerqueue.o xarray.o \
-	 idr.o int_sqrt.o extable.o \
+	 idr.o int_sqrt.o extable.o printk_ringbuffer.o \
 	 sha1.o chacha.o irq_regs.o argv_split.o \
 	 flex_proportions.o ratelimit.o show_mem.o \
 	 is_single_threaded.o plist.o decompress.o kobject_uevent.o \
diff --git a/lib/printk_ringbuffer.c b/lib/printk_ringbuffer.c
new file mode 100644
index 000000000000..28958b0cf774
--- /dev/null
+++ b/lib/printk_ringbuffer.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/smp.h>
+#include <linux/printk_ringbuffer.h>
+
+static bool __prb_trylock(struct prb_cpulock *cpu_lock,
+			  unsigned int *cpu_store)
+{
+	unsigned long *flags;
+	unsigned int cpu;
+
+	cpu = get_cpu();
+
+	*cpu_store = atomic_read(&cpu_lock->owner);
+	/* memory barrier to ensure the current lock owner is visible */
+	smp_rmb();
+	if (*cpu_store == -1) {
+		flags = per_cpu_ptr(cpu_lock->irqflags, cpu);
+		local_irq_save(*flags);
+		if (atomic_try_cmpxchg_acquire(&cpu_lock->owner,
+					       cpu_store, cpu)) {
+			return true;
+		}
+		local_irq_restore(*flags);
+	} else if (*cpu_store == cpu) {
+		return true;
+	}
+
+	put_cpu();
+	return false;
+}
+
+/*
+ * prb_lock: Perform a processor-reentrant spin lock.
+ * @cpu_lock: A pointer to the lock object.
+ * @cpu_store: A "flags" pointer to store lock status information.
+ *
+ * If no processor has the lock, the calling processor takes the lock and
+ * becomes the owner. If the calling processor is already the owner of the
+ * lock, this function succeeds immediately. If lock is locked by another
+ * processor, this function spins until the calling processor becomes the
+ * owner.
+ *
+ * It is safe to call this function from any context and state.
+ */
+void prb_lock(struct prb_cpulock *cpu_lock, unsigned int *cpu_store)
+{
+	for (;;) {
+		if (__prb_trylock(cpu_lock, cpu_store))
+			break;
+		cpu_relax();
+	}
+}
+
+/*
+ * prb_unlock: Perform a processor-reentrant spin unlock.
+ * @cpu_lock: A pointer to the lock object.
+ * @cpu_store: A "flags" object storing lock status information.
+ *
+ * Release the lock. The calling processor must be the owner of the lock.
+ *
+ * It is safe to call this function from any context and state.
+ */
+void prb_unlock(struct prb_cpulock *cpu_lock, unsigned int cpu_store)
+{
+	unsigned long *flags;
+	unsigned int cpu;
+
+	cpu = atomic_read(&cpu_lock->owner);
+	atomic_set_release(&cpu_lock->owner, cpu_store);
+
+	if (cpu_store == -1) {
+		flags = per_cpu_ptr(cpu_lock->irqflags, cpu);
+		local_irq_restore(*flags);
+	}
+
+	put_cpu();
+}
-- 
2.11.0

^ permalink raw reply related

* [RFC PATCH v1 01/25] printk-rb: add printk ring buffer documentation
From: John Ogness @ 2019-02-12 14:29 UTC (permalink / raw)
  To: linux-kernel
  Cc: Peter Zijlstra, Petr Mladek, Sergey Senozhatsky, Steven Rostedt,
	Daniel Wang, Andrew Morton, Linus Torvalds, Greg Kroah-Hartman,
	Alan Cox, Jiri Slaby, Peter Feiner, linux-serial,
	Sergey Senozhatsky
In-Reply-To: <20190212143003.48446-1-john.ogness@linutronix.de>

The full documentation file for the printk ring buffer.

Signed-off-by: John Ogness <john.ogness@linutronix.de>
---
 Documentation/printk-ringbuffer.txt | 377 ++++++++++++++++++++++++++++++++++++
 1 file changed, 377 insertions(+)
 create mode 100644 Documentation/printk-ringbuffer.txt

diff --git a/Documentation/printk-ringbuffer.txt b/Documentation/printk-ringbuffer.txt
new file mode 100644
index 000000000000..6bde5dbd8545
--- /dev/null
+++ b/Documentation/printk-ringbuffer.txt
@@ -0,0 +1,377 @@
+struct printk_ringbuffer
+------------------------
+John Ogness <john.ogness@linutronix.de>
+
+Overview
+~~~~~~~~
+As the name suggests, this ring buffer was implemented specifically to serve
+the needs of the printk() infrastructure. The ring buffer itself is not
+specific to printk and could be used for other purposes. _However_, the
+requirements and semantics of printk are rather unique. If you intend to use
+this ring buffer for anything other than printk, you need to be very clear on
+its features, behavior, and pitfalls.
+
+Features
+^^^^^^^^
+The printk ring buffer has the following features:
+
+- single global buffer
+- resides in initialized data section (available at early boot)
+- lockless readers
+- supports multiple writers
+- supports multiple non-consuming readers
+- safe from any context (including NMI)
+- groups bytes into variable length blocks (referenced by entries)
+- entries tagged with sequence numbers
+
+Behavior
+^^^^^^^^
+Since the printk ring buffer readers are lockless, there exists no
+synchronization between readers and writers. Basically writers are the tasks
+in control and may overwrite any and all committed data at any time and from
+any context. For this reason readers can miss entries if they are overwritten
+before the reader was able to access the data. The reader API implementation
+is such that reader access to entries is atomic, so there is no risk of
+readers having to deal with partial or corrupt data. Also, entries are
+tagged with sequence numbers so readers can recognize if entries were missed.
+
+Writing to the ring buffer consists of 2 steps. First a writer must reserve
+an entry of desired size. After this step the writer has exclusive access
+to the memory region. Once the data has been written to memory, it needs to
+be committed to the ring buffer. After this step the entry has been inserted
+into the ring buffer and assigned an appropriate sequence number.
+
+Once committed, a writer must no longer access the data directly. This is
+because the data may have been overwritten and no longer exists. If a
+writer must access the data, it should either keep a private copy before
+committing the entry or use the reader API to gain access to the data.
+
+Because of how the data backend is implemented, entries that have been
+reserved but not yet committed act as barriers, preventing future writers
+from filling the ring buffer beyond the location of the reserved but not
+yet committed entry region. For this reason it is *important* that writers
+perform both reserve and commit as quickly as possible. Also, be aware that
+preemption and local interrupts are disabled and writing to the ring buffer
+is processor-reentrant locked during the reserve/commit window. Writers in
+NMI contexts can still preempt any other writers, but as long as these
+writers do not write a large amount of data with respect to the ring buffer
+size, this should not become an issue.
+
+API
+~~~
+
+Declaration
+^^^^^^^^^^^
+The printk ring buffer can be instantiated as a static structure:
+
+ /* declare a static struct printk_ringbuffer */
+ #define DECLARE_STATIC_PRINTKRB(name, szbits, cpulockptr)
+
+The value of szbits specifies the size of the ring buffer in bits. The
+cpulockptr field is a pointer to a prb_cpulock struct that is used to
+perform processor-reentrant spin locking for the writers. It is specified
+externally because it may be used for multiple ring buffers (or other
+code) to synchronize writers without risk of deadlock.
+
+Here is an example of a declaration of a printk ring buffer specifying a
+32KB (2^15) ring buffer:
+
+....
+DECLARE_STATIC_PRINTKRB_CPULOCK(rb_cpulock);
+DECLARE_STATIC_PRINTKRB(rb, 15, &rb_cpulock);
+....
+
+If writers will be using multiple ring buffers and the ordering of that usage
+is not clear, the same prb_cpulock should be used for both ring buffers.
+
+Writer API
+^^^^^^^^^^
+The writer API consists of 2 functions. The first is to reserve an entry in
+the ring buffer, the second is to commit that data to the ring buffer. The
+reserved entry information is stored within a provided `struct prb_handle`.
+
+ /* reserve an entry */
+ char *prb_reserve(struct prb_handle *h, struct printk_ringbuffer *rb,
+                   unsigned int size);
+
+ /* commit a reserved entry to the ring buffer */
+ void prb_commit(struct prb_handle *h);
+
+Here is an example of a function to write data to a ring buffer:
+
+....
+int write_data(struct printk_ringbuffer *rb, char *data, int size)
+{
+    struct prb_handle h;
+    char *buf;
+
+    buf = prb_reserve(&h, rb, size);
+    if (!buf)
+        return -1;
+    memcpy(buf, data, size);
+    prb_commit(&h);
+
+    return 0;
+}
+....
+
+Pitfalls
+++++++++
+Be aware that prb_reserve() can fail. A retry might be successful, but it
+depends entirely on whether or not the next part of the ring buffer to
+overwrite belongs to reserved but not yet committed entries of other writers.
+Writers can use the prb_inc_lost() function to allow readers to notice that a
+message was lost.
+
+Reader API
+^^^^^^^^^^
+The reader API utilizes a `struct prb_iterator` to track the reader's
+position in the ring buffer.
+
+ /* declare a pre-initialized static iterator for a ring buffer */
+ #define DECLARE_STATIC_PRINTKRB_ITER(name, rbaddr)
+
+ /* initialize iterator for a ring buffer (if static macro NOT used) */
+ void prb_iter_init(struct prb_iterator *iter,
+                    struct printk_ringbuffer *rb, u64 *seq);
+
+ /* make a deep copy of an iterator */
+ void prb_iter_copy(struct prb_iterator *dest,
+                    struct prb_iterator *src);
+
+ /* non-blocking, advance to next entry (and read the data) */
+ int prb_iter_next(struct prb_iterator *iter, char *buf,
+                   int size, u64 *seq);
+
+ /* blocking, advance to next entry (and read the data) */
+ int prb_iter_wait_next(struct prb_iterator *iter, char *buf,
+                        int size, u64 *seq);
+
+ /* position iterator at the entry seq */
+ int prb_iter_seek(struct prb_iterator *iter, u64 seq);
+
+ /* read data at current position */
+ int prb_iter_data(struct prb_iterator *iter, char *buf,
+                   int size, u64 *seq);
+
+Typically prb_iter_data() is not needed because the data can be retrieved
+directly with prb_iter_next().
+
+Here is an example of a non-blocking function that will read all the data in
+a ring buffer:
+
+....
+void read_all_data(struct printk_ringbuffer *rb, char *buf, int size)
+{
+    struct prb_iterator iter;
+    u64 prev_seq = 0;
+    u64 seq;
+    int ret;
+
+    prb_iter_init(&iter, rb, NULL);
+
+    for (;;) {
+        ret = prb_iter_next(&iter, buf, size, &seq);
+        if (ret > 0) {
+            if (seq != ++prev_seq) {
+                /* "seq - prev_seq" entries missed */
+                prev_seq = seq;
+            }
+            /* process buf here */
+        } else if (ret == 0) {
+            /* hit the end, done */
+            break;
+        } else if (ret < 0) {
+            /*
+             * iterator is invalid, a writer overtook us, reset the
+             * iterator and keep going, entries were missed
+             */
+            prb_iter_init(&iter, rb, NULL);
+        }
+    }
+}
+....
+
+Pitfalls
+++++++++
+The reader's iterator can become invalid at any time because the reader was
+overtaken by a writer. Typically the reader should reset the iterator back
+to the current oldest entry (which will be newer than the entry the reader
+was at) and continue, noting the number of entries that were missed.
+
+Utility API
+^^^^^^^^^^^
+Several functions are available as convenience for external code.
+
+ /* query the size of the data buffer */
+ int prb_buffer_size(struct printk_ringbuffer *rb);
+
+ /* skip a seq number to signify a lost record */
+ void prb_inc_lost(struct printk_ringbuffer *rb);
+
+ /* processor-reentrant spin lock */
+ void prb_lock(struct prb_cpulock *cpu_lock, unsigned int *cpu_store);
+
+ /* processor-reentrant spin unlock */
+ void prb_lock(struct prb_cpulock *cpu_lock, unsigned int *cpu_store);
+
+Pitfalls
+++++++++
+Although the value returned by prb_buffer_size() does represent an absolute
+upper bound, the amount of data that can be stored within the ring buffer
+is actually less because of the additional storage space of a header for each
+entry.
+
+The prb_lock() and prb_unlock() functions can be used to synchronize between
+ring buffer writers and other external activities. The function of a
+processor-reentrant spin lock is to disable preemption and local interrupts
+and synchronize against other processors. It does *not* protect against
+multiple contexts of a single processor, i.e NMI.
+
+Implementation
+~~~~~~~~~~~~~~
+This section describes several of the implementation concepts and details to
+help developers better understand the code.
+
+Entries
+^^^^^^^
+All ring buffer data is stored within a single static byte array. The reason
+for this is to ensure that any pointers to the data (past and present) will
+always point to valid memory. This is important because the lockless readers
+may be preempted for long periods of time and when they resume may be working
+with expired pointers.
+
+Entries are identified by start index and size. (The start index plus size
+is the start index of the next entry.) The start index is not simply an
+offset into the byte array, but rather a logical position (lpos) that maps
+directly to byte array offsets.
+
+For example, for a byte array of 1000, an entry may have have a start index
+of 100. Another entry may have a start index of 1100. And yet another 2100.
+All of these entry are pointing to the same memory region, but only the most
+recent entry is valid. The other entries are pointing to valid memory, but
+represent entries that have been overwritten.
+
+Note that due to overflowing, the most recent entry is not necessarily the one
+with the highest lpos value. Indeed, the printk ring buffer initializes its
+data such that an overflow happens relatively quickly in order to validate the
+handling of this situation. The implementation assumes that an lpos (unsigned
+long) will never completely wrap while a reader is preempted. If this were to
+become an issue, the seq number (which never wraps) could be used to increase
+the robustness of handling this situation.
+
+Buffer Wrapping
+^^^^^^^^^^^^^^^
+If an entry starts near the end of the byte array but would extend beyond it,
+a special terminating entry (size = -1) is inserted into the byte array and
+the real entry is placed at the beginning of the byte array. This can waste
+space at the end of the byte array, but simplifies the implementation by
+allowing writers to always work with contiguous buffers.
+
+Note that the size field is the first 4 bytes of the entry header. Also note
+that calc_next() always ensures that there are at least 4 bytes left at the
+end of the byte array to allow room for a terminating entry.
+
+Ring Buffer Pointers
+^^^^^^^^^^^^^^^^^^^^
+Three pointers (lpos values) are used to manage the ring buffer:
+
+ - _tail_: points to the oldest entry
+ - _head_: points to where the next new committed entry will be
+ - _reserve_: points to where the next new reserved entry will be
+
+These pointers always maintain a logical ordering:
+
+ tail <= head <= reserve
+
+The reserve pointer moves forward when a writer reserves a new entry. The
+head pointer moves forward when a writer commits a new entry.
+
+The reserve pointer cannot overwrite the tail pointer in a wrap situation. In
+such a situation, the tail pointer must be "pushed forward", thus
+invalidating that oldest entry. Readers identify if they are accessing a
+valid entry by ensuring their entry pointer is `>= tail && < head`.
+
+If the tail pointer is equal to the head pointer, it cannot be pushed and any
+reserve operation will fail. The only resolution is for writers to commit
+their reserved entries.
+
+Processor-Reentrant Locking
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+The purpose of the processor-reentrant locking is to limit the interruption
+scenarios of writers to 2 contexts. This allows for a simplified
+implementation where:
+
+- The reserve/commit window only exists on 1 processor at a time. A reserve
+  can never fail due to uncommitted entries of other processors.
+
+- When committing entries, it is trivial to handle the situation when
+  subsequent entries have already been committed, i.e. managing the head
+  pointer.
+
+Performance
+~~~~~~~~~~~
+Some basic tests were performed on a quad Intel(R) Xeon(R) CPU E5-2697 v4 at
+2.30GHz (36 cores / 72 threads). All tests involved writing a total of
+32,000,000 records at an average of 33 bytes each. Each writer was pinned to
+its own CPU and would write as fast as it could until a total of 32,000,000
+records were written. All tests involved 2 readers that were both pinned
+together to another CPU. Each reader would read as fast as it could and track
+how many of the 32,000,000 records it could read. All tests used a ring buffer
+of 16KB in size, which holds around 350 records (header + data for each
+entry).
+
+The only difference between the tests is the number of writers (and thus also
+the number of records per writer). As more writers are added, the time to
+write a record increases. This is because data pointers, modified via cmpxchg,
+and global data access in general become more contended.
+
+1 writer
+^^^^^^^^
+ runtime: 0m 18s
+ reader1: 16219900/32000000 (50%) records
+ reader2: 16141582/32000000 (50%) records
+
+2 writers
+^^^^^^^^^
+ runtime: 0m 32s
+ reader1: 16327957/32000000 (51%) records
+ reader2: 16313988/32000000 (50%) records
+
+4 writers
+^^^^^^^^^
+ runtime: 0m 42s
+ reader1: 16421642/32000000 (51%) records
+ reader2: 16417224/32000000 (51%) records
+
+8 writers
+^^^^^^^^^
+ runtime: 0m 43s
+ reader1: 16418300/32000000 (51%) records
+ reader2: 16432222/32000000 (51%) records
+
+16 writers
+^^^^^^^^^^
+ runtime: 0m 54s
+ reader1: 16539189/32000000 (51%) records
+ reader2: 16542711/32000000 (51%) records
+
+32 writers
+^^^^^^^^^^
+ runtime: 1m 13s
+ reader1: 16731808/32000000 (52%) records
+ reader2: 16735119/32000000 (52%) records
+
+Comments
+^^^^^^^^
+It is particularly interesting to compare/contrast the 1-writer and 32-writer
+tests. Despite the writing of the 32,000,000 records taking over 4 times
+longer, the readers (which perform no cmpxchg) were still unable to keep up.
+This shows that the memory contention between the increasing number of CPUs
+also has a dramatic effect on readers.
+
+It should also be noted that in all cases each reader was able to read >=50%
+of the records. This means that a single reader would have been able to keep
+up with the writer(s) in all cases, becoming slightly easier as more writers
+are added. This was the purpose of pinning 2 readers to 1 CPU: to observe how
+maximum reader performance changes.
-- 
2.11.0

^ permalink raw reply related

* [RFC PATCH v1 00/25] printk: new implementation
From: John Ogness @ 2019-02-12 14:29 UTC (permalink / raw)
  To: linux-kernel
  Cc: Peter Zijlstra, Petr Mladek, Sergey Senozhatsky, Steven Rostedt,
	Daniel Wang, Andrew Morton, Linus Torvalds, Greg Kroah-Hartman,
	Alan Cox, Jiri Slaby, Peter Feiner, linux-serial,
	Sergey Senozhatsky

Hello,

As probably many of you are aware, the current printk implementation
has some issues. This series (against 5.0-rc6) makes some fundamental
changes in an attempt to address these issues. The particular issues I
am referring to:

1. The printk buffer is protected by a global raw spinlock for readers
   and writers. This restricts the contexts that are allowed to
   access the buffer.

2. Because of #1, NMI and recursive contexts are handled by deferring
   logging/printing to a spinlock-safe context. This means that
   messages will not be visible if (for example) the kernel dies in
   NMI context and the irq_work mechanism does not survive.

3. Because of #1, when *not* using features such as PREEMPT_RT, large
   latencies exist when printing to slow consoles.

4. Because of #1, when _using_ features such as PREEMPT_RT, printing
   to the consoles is further restricted to contexts that can sleep.
   This can cause large latencies in seeing the messages.

5. Printing to consoles is the responsibility of the printk caller
   and that caller may be required to print many messages that other
   printk callers inserted. Because of this there can be enormous
   variance in the runtime of a printk call.

6. The timestamps on the printk messages are generated when the
   message is inserted into the buffer. But the call to printk may
   have occurred long before that. This is especially true for
   messages in the printk_safe context. This means that printk timing
   information (although neatly "sorted") is neither accurate nor
   reliable.

7. Loglevel INFO is handled the same as ERR. There seems to be an
   endless effort to get printk to show _all_ messages as quickly as
   possible in case of a panic (i.e. printing from any context), but
   at the same time try not to have printk be too intrusive for the
   callers. These are conflicting requirements that lead to a printk
   implementation that does a sub-optimal job of satisfying both
   sides.

To address these issues this series makes the following changes:

- Implements a new printk ringbuffer that supports lockless multiple
  readers. Writers are synchronized per-cpu with support for all
  contexts (including NMI). (This implementation was inspired by a
  post[0] from Peter Zijlstra.)

- The new printk ringbuffer uses the initialized data segment of the
  kernel for its data buffer so that it is available on early boot.

- Timestamps are captured at the beginning of the printk call.

- A dedicated kernel thread is created for printing to all consoles in
  a fully preemptible context.

- A new (optional) console operation "write_atomic" is introduced that
  console drivers may implement. This function must be NMI-safe. An
  implementation for the 8250 UART driver is provided.

- The concept of "emergency messages" is introduced that allows
  important messages (based on a new emergency loglevel threshold) to
  be immediately written to any consoles supporting write_atomic,
  regardless of the context. This allows non-emergency printk calls
  (i.e. INFO) to run in nearly constant time, with their console
  printing taking place in a separate fully preemptible context. And
  emergency messages (i.e. ERR) are printed immediately for the user.

- Individual emergency messages are written contiguously and a CPU-ID
  field is added to all output to allow for sorting of messages being
  printed by multiple CPUs simultaneously.

Although the RFC series works, there are some open issues that I chose
to leave open until I received some initial feedback from the community.
These issues are:

- The behavior of LOG_CONT has been changed. Rather than using the
  current task as the "cont owner", the CPU ID is used. NMIs have
  their own cont buffer so NMI and non-NMI tasks can safely use
  LOG_CONT.

- The runtime resizing features of the printk buffer are not
  implemented.

- The exported vmcore symbols relating to the printk buffer no longer
  exist and no replacements have been defined. I do not know all the
  userspace consequences of making a change here.

- The introduction of the CPU-ID field not only changes the usual
  printk output, but also defines a new field in the extended console
  output. I do not know the userspace consequences of making a change
  here.

- console_flush_on_panic() currently is a NOP. It is pretty clear how
  this could be implemented if atomic_write was available. But if no
  such console is registered, it is not clear what should be done. Is
  this function really even needed?

- Right now the emergency messages are set apart from the
  non-emergency messages using '\n'. There have been requests that
  some special markers could be specifiable to make it easier for
  parsers. Possibly as CONFIG_ and boot options?

- Be aware that printk output is no longer time-sorted. Actually, it
  never was, but now you see the real timestamps. This seems strange
  at first.

- The ringbuffer API is not very pretty. It grew to be what it is
  due to the varying requirements of the different aspects of printk
  (syslog, kmsg_dump, /dev/kmsg, console) and the complexity of
  handling lockless reading, which can fall behind at any moment.

- Memory barriers are not my specialty. A critical eye on their
  usage (or lack thereof) in the ringbuffer code would be greatly
  appreciated.

The first 7 patches introduce the new printk ringbuffer. The
remaining 18 go through and replace the various components of the
printk implementation. All patches are against 5.0-rc6 and each
yield a buildable/testable system.

John Ogness

[0] http://lkml.kernel.org/r/20181017140044.GK3121@hirez.programming.kicks-ass.net

John Ogness (25):
  printk-rb: add printk ring buffer documentation
  printk-rb: add prb locking functions
  printk-rb: define ring buffer struct and initializer
  printk-rb: add writer interface
  printk-rb: add basic non-blocking reading interface
  printk-rb: add blocking reader support
  printk-rb: add functionality required by printk
  printk: add ring buffer and kthread
  printk: remove exclusive console hack
  printk: redirect emit/store to new ringbuffer
  printk_safe: remove printk safe code
  printk: minimize console locking implementation
  printk: track seq per console
  printk: do boot_delay_msec inside printk_delay
  printk: print history for new consoles
  printk: implement CON_PRINTBUFFER
  printk: add processor number to output
  console: add write_atomic interface
  printk: introduce emergency messages
  serial: 8250: implement write_atomic
  printk: implement KERN_CONT
  printk: implement /dev/kmsg
  printk: implement syslog
  printk: implement kmsg_dump
  printk: remove unused code

 Documentation/printk-ringbuffer.txt |  377 +++++++
 drivers/tty/serial/8250/8250.h      |    4 +
 drivers/tty/serial/8250/8250_core.c |   19 +-
 drivers/tty/serial/8250/8250_dma.c  |    5 +-
 drivers/tty/serial/8250/8250_port.c |  154 ++-
 fs/proc/kmsg.c                      |    4 +-
 include/linux/console.h             |    6 +
 include/linux/hardirq.h             |    2 -
 include/linux/kmsg_dump.h           |    6 +-
 include/linux/printk.h              |   30 +-
 include/linux/printk_ringbuffer.h   |  114 +++
 include/linux/serial_8250.h         |    5 +
 init/main.c                         |    1 -
 kernel/kexec_core.c                 |    1 -
 kernel/panic.c                      |    3 -
 kernel/printk/Makefile              |    1 -
 kernel/printk/internal.h            |   79 --
 kernel/printk/printk.c              | 1895 +++++++++++++++++------------------
 kernel/printk/printk_safe.c         |  427 --------
 kernel/trace/trace.c                |    2 -
 lib/Kconfig.debug                   |   17 +
 lib/Makefile                        |    2 +-
 lib/bust_spinlocks.c                |    3 +-
 lib/nmi_backtrace.c                 |    6 -
 lib/printk_ringbuffer.c             |  583 +++++++++++
 25 files changed, 2137 insertions(+), 1609 deletions(-)
 create mode 100644 Documentation/printk-ringbuffer.txt
 create mode 100644 include/linux/printk_ringbuffer.h
 delete mode 100644 kernel/printk/internal.h
 delete mode 100644 kernel/printk/printk_safe.c
 create mode 100644 lib/printk_ringbuffer.c

-- 
2.11.0

^ permalink raw reply

* Re: RFC: drop ISA support in the synlink tty driver?
From: Jiri Slaby @ 2019-02-12  8:40 UTC (permalink / raw)
  To: Greg Kroah-Hartman; +Cc: Christoph Hellwig, linux-kernel, linux-serial
In-Reply-To: <20190212082517.GA27889@kroah.com>

On 12. 02. 19, 9:25, Greg Kroah-Hartman wrote:
> It's just a #define, in a uapi file, so we should probably leave it as
> userspace programs _might_ depend on it.  I have no idea why, but oh
> well...

Uh, I didn't even think this could be in a uapi header. Ok, it makes
sense then.

thanks,
-- 
js
suse labs

^ permalink raw reply

* Re: RFC: drop ISA support in the synlink tty driver?
From: Greg Kroah-Hartman @ 2019-02-12  8:25 UTC (permalink / raw)
  To: Jiri Slaby; +Cc: Christoph Hellwig, linux-kernel, linux-serial
In-Reply-To: <7585c8b6-6dc4-2556-d14f-2fd60dc7df90@suse.cz>

On Tue, Feb 12, 2019 at 09:08:41AM +0100, Jiri Slaby wrote:
> On 12. 02. 19, 8:51, Greg Kroah-Hartman wrote:
> > On Mon, Feb 11, 2019 at 02:25:33PM +0100, Christoph Hellwig wrote:
> >> Hi Greg and Jiri,
> >>
> >> I've been working hard to get rid of the remaining callers the pass a
> >> NULL struct device to the DMA mapping functions and am almost done.
> >>
> >> The only non-trivial driver is the synclink driver, which has legacy
> >> early 90s style ISA support that doesn't use the device model at all.
> >>
> >> In theory we could convert it to an isa_driver, but without testing
> >> that seems rather dangerous.  So for now I would suggest that we
> >> remove the ISA support in this driver - if anyone cares enough we
> >> can resurrect it from the git history and convert it to use the driver
> >> model.
> > 
> > No objection from me at all, I'll go queue this up now, thanks.
> 
> Agreed, but I would kill also the MGSL_BUS_TYPE_ISA macro proper.

It's just a #define, in a uapi file, so we should probably leave it as
userspace programs _might_ depend on it.  I have no idea why, but oh
well...

thanks,

greg k-h

^ permalink raw reply

* Re: RFC: drop ISA support in the synlink tty driver?
From: Jiri Slaby @ 2019-02-12  8:08 UTC (permalink / raw)
  To: Greg Kroah-Hartman, Christoph Hellwig; +Cc: linux-kernel, linux-serial
In-Reply-To: <20190212075151.GA7588@kroah.com>

On 12. 02. 19, 8:51, Greg Kroah-Hartman wrote:
> On Mon, Feb 11, 2019 at 02:25:33PM +0100, Christoph Hellwig wrote:
>> Hi Greg and Jiri,
>>
>> I've been working hard to get rid of the remaining callers the pass a
>> NULL struct device to the DMA mapping functions and am almost done.
>>
>> The only non-trivial driver is the synclink driver, which has legacy
>> early 90s style ISA support that doesn't use the device model at all.
>>
>> In theory we could convert it to an isa_driver, but without testing
>> that seems rather dangerous.  So for now I would suggest that we
>> remove the ISA support in this driver - if anyone cares enough we
>> can resurrect it from the git history and convert it to use the driver
>> model.
> 
> No objection from me at all, I'll go queue this up now, thanks.

Agreed, but I would kill also the MGSL_BUS_TYPE_ISA macro proper.

thanks,
-- 
js
suse labs

^ permalink raw reply

* Re: RFC: drop ISA support in the synlink tty driver?
From: Greg Kroah-Hartman @ 2019-02-12  7:51 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Jiri Slaby, linux-serial, linux-kernel
In-Reply-To: <20190211132534.29407-1-hch@lst.de>

On Mon, Feb 11, 2019 at 02:25:33PM +0100, Christoph Hellwig wrote:
> Hi Greg and Jiri,
> 
> I've been working hard to get rid of the remaining callers the pass a
> NULL struct device to the DMA mapping functions and am almost done.
> 
> The only non-trivial driver is the synclink driver, which has legacy
> early 90s style ISA support that doesn't use the device model at all.
> 
> In theory we could convert it to an isa_driver, but without testing
> that seems rather dangerous.  So for now I would suggest that we
> remove the ISA support in this driver - if anyone cares enough we
> can resurrect it from the git history and convert it to use the driver
> model.

No objection from me at all, I'll go queue this up now, thanks.

greg k-h

^ permalink raw reply

* Re: [PATCH] tty: serial: qcom_geni_serial: Allow mctrl when flow control is disabled
From: Matthias Kaehlcke @ 2019-02-12  0:03 UTC (permalink / raw)
  To: msavaliy
  Cc: Greg Kroah-Hartman, Jiri Slaby, linux-serial, linux-kernel,
	Johan Hovold, Balakrishna Godavarthi, linux-kernel-owner
In-Reply-To: <3d739661db11d669cf8a9cba3f0853fa@codeaurora.org>

Sorry for the late reply, your message got buried in my inbox :(

On Mon, Jan 21, 2019 at 08:05:10PM +0530, msavaliy@codeaurora.org wrote:
> On 2019-01-19 05:53, Matthias Kaehlcke wrote:
> > The geni set/get_mctrl() functions currently do nothing unless
> > hardware flow control is enabled. Remove this arbitrary limitation.
> > 
> > Suggested-by: Johan Hovold <johan@kernel.org>
> > Fixes: 8a8a66a1a18a ("tty: serial: qcom_geni_serial: Add support for
> > flow control")
> > Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
> > ---
> >  drivers/tty/serial/qcom_geni_serial.c | 4 ++--
> >  1 file changed, 2 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/tty/serial/qcom_geni_serial.c
> > b/drivers/tty/serial/qcom_geni_serial.c
> > index a72d6d9fb9834..38016609c7fa9 100644
> > --- a/drivers/tty/serial/qcom_geni_serial.c
> > +++ b/drivers/tty/serial/qcom_geni_serial.c
> > @@ -225,7 +225,7 @@ static unsigned int
> > qcom_geni_serial_get_mctrl(struct uart_port *uport)
> >  	unsigned int mctrl = TIOCM_DSR | TIOCM_CAR;
> >  	u32 geni_ios;
> > 
> > -	if (uart_console(uport) || !uart_cts_enabled(uport)) {
> > +	if (uart_console(uport)) {
> >  		mctrl |= TIOCM_CTS;
> >  	} else {
> >  		geni_ios = readl_relaxed(uport->membase + SE_GENI_IOS);
> > @@ -241,7 +241,7 @@ static void qcom_geni_serial_set_mctrl(struct
> > uart_port *uport,
> >  {
> >  	u32 uart_manual_rfr = 0;
> > 
> > -	if (uart_console(uport) || !uart_cts_enabled(uport))
> > +	if (uart_console(uport))
> >  		return;
> > 
> >  	if (!(mctrl & TIOCM_RTS))
> 
> Though late but wanted to check on why flow control is disabled in a BT case
> ?

Flow control is generally enabled, but for the QCA WNC3990 (and
potentially others) it needs to be temporarily disabled during
baudrate switches:

  Bluetooth: hci_qca: Deassert RTS while baudrate change command
 
  This patch will help to stop frame reassembly errors while changing
  the baudrate. This is because host send a change baudrate request
  command to the chip with 115200 bps, Whereas chip will change their
  UART clocks to the enable for new baudrate and sends the response
  for the change request command with newer baudrate, On host side
  we are still operating in 115200 bps which results of reading garbage
  data. Here we are pulling RTS line, so that chip we will wait to send data
  to host until host change its baudrate.
 
  Signed-off-by: Balakrishna Godavarthi <bgodavar@codeaurora.org>

https://lore.kernel.org/patchwork/patch/1038502/

> If i understand, CRTSCTS at serial core is what makes flow as enabled with
> UPSTAT_CTS_ENABLE set and that in turn returned by uart_cts_enabled(uport).
> So is there any settings or configuration missing to enable flow
> control ?

To my knowledge there is no problem with enabling flow control (for
the non-console case). The problem was that RTS was not cleared when
flow control was disabled (done here: https://elixir.bootlin.com/linux/v4.20.7/source/drivers/bluetooth/hci_ldisc.c#L325)

> There could be a case to have 2 wire UART without flow control enablement,
> In that case we may need check for uart_cts_enabled() right ?
> Please add/correct if i missed something.

I'm no serial expert, but IIUC a UART driver doesn't know if the flow
control signals are wired up and is not supposed to do any specific
handling if flow control is enabled on a port without CTS/RTS
wires. Folks with more serial expertise please correct me if I'm
wrong.

Thanks

Matthias

^ permalink raw reply

* [PATCH] tty/synclink: remove ISA support
From: Christoph Hellwig @ 2019-02-11 13:25 UTC (permalink / raw)
  To: Greg Kroah-Hartman, Jiri Slaby; +Cc: linux-serial, linux-kernel
In-Reply-To: <20190211132534.29407-1-hch@lst.de>

The ISA support in this driver is horribly outdated and the last
driver that passes a NULL dev argument to the DMA mapping routines.

Drop the support, if someone wants to go through the work of converting
it to a proper isa_driver we can resurrect it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/tty/synclink.c | 54 ------------------------------------------
 1 file changed, 54 deletions(-)

diff --git a/drivers/tty/synclink.c b/drivers/tty/synclink.c
index d55c858d6058..84f26e43b229 100644
--- a/drivers/tty/synclink.c
+++ b/drivers/tty/synclink.c
@@ -4325,41 +4325,6 @@ static int mgsl_init_tty(void)
 	return 0;
 }
 
-/* enumerate user specified ISA adapters
- */
-static void mgsl_enum_isa_devices(void)
-{
-	struct mgsl_struct *info;
-	int i;
-		
-	/* Check for user specified ISA devices */
-	
-	for (i=0 ;(i < MAX_ISA_DEVICES) && io[i] && irq[i]; i++){
-		if ( debug_level >= DEBUG_LEVEL_INFO )
-			printk("ISA device specified io=%04X,irq=%d,dma=%d\n",
-				io[i], irq[i], dma[i] );
-		
-		info = mgsl_allocate_device();
-		if ( !info ) {
-			/* error allocating device instance data */
-			if ( debug_level >= DEBUG_LEVEL_ERROR )
-				printk( "can't allocate device instance data.\n");
-			continue;
-		}
-		
-		/* Copy user configuration info to device instance data */
-		info->io_base = (unsigned int)io[i];
-		info->irq_level = (unsigned int)irq[i];
-		info->irq_level = irq_canonicalize(info->irq_level);
-		info->dma_level = (unsigned int)dma[i];
-		info->bus_type = MGSL_BUS_TYPE_ISA;
-		info->io_addr_size = 16;
-		info->irq_flags = 0;
-		
-		mgsl_add_device( info );
-	}
-}
-
 static void synclink_cleanup(void)
 {
 	int rc;
@@ -4403,7 +4368,6 @@ static int __init synclink_init(void)
 
  	printk("%s %s\n", driver_name, driver_version);
 
-	mgsl_enum_isa_devices();
 	if ((rc = pci_register_driver(&synclink_pci_driver)) < 0)
 		printk("%s:failed to register PCI driver, error=%d\n",__FILE__,rc);
 	else
@@ -5025,12 +4989,6 @@ static void usc_set_sdlc_mode( struct mgsl_struct *info )
 	info->mbre_bit = BIT8;
 	outw( BIT8, info->io_base );			/* set Master Bus Enable (DCAR) */
 
-	if (info->bus_type == MGSL_BUS_TYPE_ISA) {
-		/* Enable DMAEN (Port 7, Bit 14) */
-		/* This connects the DMA request signal to the ISA bus */
-		usc_OutReg(info, PCR, (u16)((usc_InReg(info, PCR) | BIT15) & ~BIT14));
-	}
-
 	/* DMA Control Register (DCR)
 	 *
 	 * <15..14>	10	Priority mode = Alternating Tx/Rx
@@ -6007,12 +5965,6 @@ static void usc_set_async_mode( struct mgsl_struct *info )
 
 	usc_EnableMasterIrqBit( info );
 
-	if (info->bus_type == MGSL_BUS_TYPE_ISA) {
-		/* Enable INTEN (Port 6, Bit12) */
-		/* This connects the IRQ request signal to the ISA bus */
-		usc_OutReg(info, PCR, (u16)((usc_InReg(info, PCR) | BIT13) & ~BIT12));
-	}
-
 	if (info->params.loopback) {
 		info->loopback_bits = 0x300;
 		outw(0x0300, info->io_base + CCAR);
@@ -6107,12 +6059,6 @@ static void usc_set_sync_mode( struct mgsl_struct *info )
 	usc_loopback_frame( info );
 	usc_set_sdlc_mode( info );
 
-	if (info->bus_type == MGSL_BUS_TYPE_ISA) {
-		/* Enable INTEN (Port 6, Bit12) */
-		/* This connects the IRQ request signal to the ISA bus */
-		usc_OutReg(info, PCR, (u16)((usc_InReg(info, PCR) | BIT13) & ~BIT12));
-	}
-
 	usc_enable_aux_clock(info, info->params.clock_speed);
 
 	if (info->params.loopback)
-- 
2.20.1

^ permalink raw reply related

* RFC: drop ISA support in the synlink tty driver?
From: Christoph Hellwig @ 2019-02-11 13:25 UTC (permalink / raw)
  To: Greg Kroah-Hartman, Jiri Slaby; +Cc: linux-serial, linux-kernel

Hi Greg and Jiri,

I've been working hard to get rid of the remaining callers the pass a
NULL struct device to the DMA mapping functions and am almost done.

The only non-trivial driver is the synclink driver, which has legacy
early 90s style ISA support that doesn't use the device model at all.

In theory we could convert it to an isa_driver, but without testing
that seems rather dangerous.  So for now I would suggest that we
remove the ISA support in this driver - if anyone cares enough we
can resurrect it from the git history and convert it to use the driver
model.

^ permalink raw reply

* Re: [PATCH v6 1/6] irqchip/mtk-sysirq: support 4 interrupt parameters for sysirq
From: Seiya Wang @ 2019-02-11 13:01 UTC (permalink / raw)
  To: Marc Zyngier
  Cc: Matthias Brugger, Erin Lo, Rob Herring, Mark Rutland,
	Thomas Gleixner, Jason Cooper, Greg Kroah-Hartman, Stephen Boyd,
	devicetree, srv_heupstream, linux-kernel, linux-serial,
	linux-mediatek, linux-arm-kernel, yingjoe.chen, mars.cheng,
	eddie.huang, linux-clk
In-Reply-To: <868symu2ss.wl-marc.zyngier@arm.com>

On Mon, 2019-02-11 at 08:50 +0000, Marc Zyngier wrote:
> On Mon, 11 Feb 2019 06:35:29 +0000,
> Seiya Wang <seiya.wang@mediatek.com> wrote:
> > 
> > On Thu, 2019-02-07 at 15:52 +0000, Marc Zyngier wrote:
> > > On 07/02/2019 15:47, Marc Zyngier wrote:
> > > > On 07/02/2019 15:20, Matthias Brugger wrote:
> > > >>
> > > >>
> > > >> On 24/01/2019 09:07, Erin Lo wrote:
> > > >>> From: Seiya Wang <seiya.wang@mediatek.com>
> > > >>>
> > > >>> To support partitioned PPIs, 4 interrupt parameters should be valid
> > > >>> for sysirq.
> > > >>>
> > > >>> Signed-off-by: Seiya Wang <seiya.wang@mediatek.com>
> > > >>> Signed-off-by: Erin Lo <erin.lo@mediatek.com>
> > > >>> ---
> > > >>>  drivers/irqchip/irq-mtk-sysirq.c | 4 ++--
> > > >>>  1 file changed, 2 insertions(+), 2 deletions(-)
> > > >>>
> > > >>> diff --git a/drivers/irqchip/irq-mtk-sysirq.c b/drivers/irqchip/irq-mtk-sysirq.c
> > > >>> index 90aaf19..282736a 100644
> > > >>> --- a/drivers/irqchip/irq-mtk-sysirq.c
> > > >>> +++ b/drivers/irqchip/irq-mtk-sysirq.c
> > > >>> @@ -81,7 +81,7 @@ static int mtk_sysirq_domain_translate(struct irq_domain *d,
> > > >>>  				       unsigned int *type)
> > > >>>  {
> > > >>>  	if (is_of_node(fwspec->fwnode)) {
> > > >>> -		if (fwspec->param_count != 3)
> > > >>> +		if (fwspec->param_count != 3 && fwspec->param_count != 4)
> > > >>
> > > >> Where is this 4th parameter used?
> > > > 
> > > > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.txt#n14
> > > Sorry, I fired Send way too early.
> > > 
> > > What I wanted to add is that it is not clear to me why this change would
> > > be required here, as this driver only supports SPIs. It could be fixed
> > > by just relaxing the binding itself.
> > > 
> > > Thanks,
> > > 
> > > 	M.
> > 
> > Do you mean that we should change #interrupt-cells back to 3 for sysirq
> > and remove the 4th parameters of every spi interrupts in mt8183.dtsi
> > (i.e. 3 parameters for spi, 4 for ppi) such that we can discard this
> > patch?
> 
> It is more subtle than that:
> 
> - PPIs must have the affinity parameter in their int-spec (since you
>   need that for the PMU)
> 
> - SPIs that are directly routed to the GIC must also have the affinity
>   parameter (although set to zero).
> 
> - SPIs that are routed via the sysirq block (or any other) can use the
>   3 parameter variant, as they are not resolved in the context of the
>   GIC, but in that of the sysirq.
> 
> But in short, yes. You should be able to drop this patch altogether.
> 
> > If yes, we may need some time to verify the change before resending the
> > patch.
> 
> That's absolutely fine.
> 
> Thanks,
> 
> 	M.
> 
Thanks for the detailed descriptions.
We will remove this patch and resend again.

^ permalink raw reply

* Re: [PATCH v6 1/6] irqchip/mtk-sysirq: support 4 interrupt parameters for sysirq
From: Marc Zyngier @ 2019-02-11  8:50 UTC (permalink / raw)
  To: Seiya Wang
  Cc: Matthias Brugger, Erin Lo, Rob Herring, Mark Rutland,
	Thomas Gleixner, Jason Cooper, Greg Kroah-Hartman, Stephen Boyd,
	devicetree, srv_heupstream, linux-kernel, linux-serial,
	linux-mediatek, linux-arm-kernel, yingjoe.chen, mars.cheng,
	eddie.huang, linux-clk
In-Reply-To: <1549866929.22817.20.camel@mtksdccf07>

On Mon, 11 Feb 2019 06:35:29 +0000,
Seiya Wang <seiya.wang@mediatek.com> wrote:
> 
> On Thu, 2019-02-07 at 15:52 +0000, Marc Zyngier wrote:
> > On 07/02/2019 15:47, Marc Zyngier wrote:
> > > On 07/02/2019 15:20, Matthias Brugger wrote:
> > >>
> > >>
> > >> On 24/01/2019 09:07, Erin Lo wrote:
> > >>> From: Seiya Wang <seiya.wang@mediatek.com>
> > >>>
> > >>> To support partitioned PPIs, 4 interrupt parameters should be valid
> > >>> for sysirq.
> > >>>
> > >>> Signed-off-by: Seiya Wang <seiya.wang@mediatek.com>
> > >>> Signed-off-by: Erin Lo <erin.lo@mediatek.com>
> > >>> ---
> > >>>  drivers/irqchip/irq-mtk-sysirq.c | 4 ++--
> > >>>  1 file changed, 2 insertions(+), 2 deletions(-)
> > >>>
> > >>> diff --git a/drivers/irqchip/irq-mtk-sysirq.c b/drivers/irqchip/irq-mtk-sysirq.c
> > >>> index 90aaf19..282736a 100644
> > >>> --- a/drivers/irqchip/irq-mtk-sysirq.c
> > >>> +++ b/drivers/irqchip/irq-mtk-sysirq.c
> > >>> @@ -81,7 +81,7 @@ static int mtk_sysirq_domain_translate(struct irq_domain *d,
> > >>>  				       unsigned int *type)
> > >>>  {
> > >>>  	if (is_of_node(fwspec->fwnode)) {
> > >>> -		if (fwspec->param_count != 3)
> > >>> +		if (fwspec->param_count != 3 && fwspec->param_count != 4)
> > >>
> > >> Where is this 4th parameter used?
> > > 
> > > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/devicetree/bindings/interrupt-controller/arm,gic-v3.txt#n14
> > Sorry, I fired Send way too early.
> > 
> > What I wanted to add is that it is not clear to me why this change would
> > be required here, as this driver only supports SPIs. It could be fixed
> > by just relaxing the binding itself.
> > 
> > Thanks,
> > 
> > 	M.
> 
> Do you mean that we should change #interrupt-cells back to 3 for sysirq
> and remove the 4th parameters of every spi interrupts in mt8183.dtsi
> (i.e. 3 parameters for spi, 4 for ppi) such that we can discard this
> patch?

It is more subtle than that:

- PPIs must have the affinity parameter in their int-spec (since you
  need that for the PMU)

- SPIs that are directly routed to the GIC must also have the affinity
  parameter (although set to zero).

- SPIs that are routed via the sysirq block (or any other) can use the
  3 parameter variant, as they are not resolved in the context of the
  GIC, but in that of the sysirq.

But in short, yes. You should be able to drop this patch altogether.

> If yes, we may need some time to verify the change before resending the
> patch.

That's absolutely fine.

Thanks,

	M.

-- 
Jazz is not dead, it just smell funny.

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox