linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Theodore Ts'o" <tytso@mit.edu>
To: linux-kernel@vger.kernel.org
Cc: smueller@chronox.de, herbert@gondor.apana.org.au,
	andi@firstfloor.org, sandyinchina@gmail.com,
	cryptography@lakedaemon.net, jsd@av8n.com, hpa@zytor.com,
	linux-crypto@vger.kernel.org, "Theodore Ts'o" <tytso@mit.edu>
Subject: [PATCH 2/3] random: make /dev/urandom scalable for silly userspace programs
Date: Mon,  2 May 2016 02:26:52 -0400	[thread overview]
Message-ID: <1462170413-7164-3-git-send-email-tytso@mit.edu> (raw)
In-Reply-To: <1462170413-7164-1-git-send-email-tytso@mit.edu>

On a system with a 4 socket (NUMA) system where a large number of
application processes were all trying to read from /dev/urandom, this
can result in the system spending 80% of its time contending on the
global urandom spinlock.  The application have used its own PRNG, but
let's try to help it from running, lemming-like, straight over the
locking cliff.

Reported-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 drivers/char/random.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 62 insertions(+), 5 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 95f4451..d5bb3b3 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -746,6 +746,17 @@ struct crng_state primary_crng = {
 };
 static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait);
 
+#ifdef CONFIG_NUMA
+/*
+ * Hack to deal with crazy userspace progams when they are all trying
+ * to access /dev/urandom in parallel.  The programs are almost
+ * certainly doing something terribly wrong, but we'll work around
+ * their brain damage.
+ */
+static struct crng_state **crng_node_pool __read_mostly;
+#endif
+
+
 static void _initialize_crng(struct crng_state *crng)
 {
 	int		i;
@@ -761,11 +772,13 @@ static void _initialize_crng(struct crng_state *crng)
 	crng->init_time = jiffies - CRNG_RESEED_INTERVAL;
 }
 
+#ifdef CONFIG_NUMA
 static void initialize_crng(struct crng_state *crng)
 {
 	_initialize_crng(crng);
 	spin_lock_init(&crng->lock);
 }
+#endif
 
 static int crng_fast_load(__u32 pool[4])
 {
@@ -822,19 +835,23 @@ out:
 	return ret;
 }
 
+static inline void maybe_reseed_primary_crng(void)
+{
+	if (crng_init > 2 &&
+	    time_after(jiffies, primary_crng.init_time + CRNG_RESEED_INTERVAL))
+		crng_reseed(&input_pool);
+}
+
 static inline void crng_wait_ready(void)
 {
 	wait_event_interruptible(crng_init_wait, crng_ready());
 }
 
-static void extract_crng(__u8 out[CHACHA20_BLOCK_SIZE])
+static void _extract_crng(struct crng_state *crng,
+			  __u8 out[CHACHA20_BLOCK_SIZE])
 {
 	unsigned long v, flags;
-	struct crng_state *crng = &primary_crng;
 
-	if (crng_init > 2 &&
-	    time_after(jiffies, crng->init_time + CRNG_RESEED_INTERVAL))
-		crng_reseed(&input_pool);
 	spin_lock_irqsave(&crng->lock, flags);
 	if (arch_get_random_long(&v))
 		crng->state[14] ^= v;
@@ -844,6 +861,30 @@ static void extract_crng(__u8 out[CHACHA20_BLOCK_SIZE])
 	spin_unlock_irqrestore(&crng->lock, flags);
 }
 
+static void extract_crng(__u8 out[CHACHA20_BLOCK_SIZE])
+{
+#ifndef CONFIG_NUMA
+	maybe_reseed_primary_crng();
+	_extract_crng(&primary_crng, out);
+#else
+	int node_id = numa_node_id();
+	struct crng_state *crng = crng_node_pool[node_id];
+
+	if (time_after(jiffies, crng->init_time + CRNG_RESEED_INTERVAL)) {
+		unsigned long flags;
+
+		maybe_reseed_primary_crng();
+		_extract_crng(&primary_crng, out);
+		spin_lock_irqsave(&crng->lock, flags);
+		memcpy(&crng->state[4], out, CHACHA20_KEY_SIZE);
+		crng->state[15] = numa_node_id();
+		crng->init_time = jiffies;
+		spin_unlock_irqrestore(&crng->lock, flags);
+	}
+	_extract_crng(crng, out);
+#endif
+}
+
 static ssize_t extract_crng_user(void __user *buf, size_t nbytes)
 {
 	ssize_t ret = 0, i;
@@ -1548,6 +1589,22 @@ static void init_std_data(struct entropy_store *r)
  */
 static int rand_initialize(void)
 {
+#ifdef CONFIG_NUMA
+	int i;
+	int num_nodes = num_possible_nodes();
+	struct crng_state *crng;
+
+	crng_node_pool = kmalloc(num_nodes * sizeof(void *),
+				 GFP_KERNEL|__GFP_NOFAIL);
+
+	for (i=0; i < num_nodes; i++) {
+		crng = kmalloc(sizeof(struct crng_state),
+			       GFP_KERNEL | __GFP_NOFAIL);
+		initialize_crng(crng);
+		crng_node_pool[i] = crng;
+
+	}
+#endif
 	init_std_data(&input_pool);
 	init_std_data(&blocking_pool);
 	_initialize_crng(&primary_crng);
-- 
2.5.0

  parent reply	other threads:[~2016-05-02  6:27 UTC|newest]

Thread overview: 53+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-05-02  6:26 [RFC PATCH 0/3] random: replace urandom pool with a CRNG Theodore Ts'o
2016-05-02  6:26 ` [PATCH 1/3] random: replace non-blocking pool with a Chacha20-based CRNG Theodore Ts'o
2016-05-03  8:50   ` Stephan Mueller
2016-05-04 16:54     ` Jeffrey Walton
2016-05-04 17:30     ` tytso
2016-05-04 17:52       ` H. Peter Anvin
2016-05-03  9:36   ` Stephan Mueller
2016-05-04  6:24     ` Stephan Mueller
2016-05-04 14:40   ` Jeffrey Walton
2016-05-04 17:49     ` tytso
2016-05-04 18:22       ` Jeffrey Walton
2016-05-04 18:29         ` H. Peter Anvin
2016-05-04 19:07           ` tytso
2016-05-04 20:53             ` H. Peter Anvin
2016-05-04 21:42             ` John Denker
2016-05-04 21:52               ` better patch for linux/bitops.h John Denker
2016-05-05  1:35                 ` Jeffrey Walton
2016-05-05  2:41                   ` H. Peter Anvin
2016-05-05  2:54                     ` Jeffrey Walton
2016-05-05  3:08                       ` H. Peter Anvin
2016-05-05  3:30                         ` Jeffrey Walton
2016-05-05  3:50                           ` Theodore Ts'o
2016-05-05  4:03                             ` Jeffrey Walton
2016-05-05  6:35                               ` H. Peter Anvin
2016-05-05 16:15                                 ` UB in general ... and linux/bitops.h in particular John Denker
2016-05-05 17:32                                   ` Andi Kleen
2016-05-06  2:25                                   ` Jeffrey Walton
2016-05-05 21:34                             ` better patch for linux/bitops.h Sandy Harris
2016-05-05 22:18                               ` tytso
2016-05-05 22:22                                 ` H. Peter Anvin
2016-05-05 22:38                                 ` H. Peter Anvin
2016-05-06  0:13                                 ` H. Peter Anvin
2016-05-04 21:56               ` [PATCH 1/3] random: replace non-blocking pool with a Chacha20-based CRNG H. Peter Anvin
2016-05-04 22:06                 ` linux/bitops.h John Denker
2016-05-04 23:06                   ` linux/bitops.h Andi Kleen
2016-05-05  0:13                     ` linux/bitops.h John Denker
2016-05-05  1:20                     ` linux/bitops.h Jeffrey Walton
2016-05-05  1:27                       ` linux/bitops.h H. Peter Anvin
2016-05-05  0:30                   ` linux/bitops.h H. Peter Anvin
2016-05-05  0:48                     ` linux/bitops.h Linus Torvalds
2016-05-06 20:08                       ` linux/bitops.h Sasha Levin
2016-05-06 20:07                     ` linux/bitops.h Sasha Levin
2016-05-06 20:25                       ` linux/bitops.h H. Peter Anvin
2016-05-06 20:30                       ` linux/bitops.h H. Peter Anvin
2016-05-02  6:26 ` Theodore Ts'o [this message]
2016-05-02  7:00   ` [PATCH 2/3] random: make /dev/urandom scalable for silly userspace programs Stephan Mueller
2016-05-02 12:50     ` Theodore Ts'o
2016-05-02 13:48       ` Theodore Ts'o
2016-05-02 13:53         ` Stephan Mueller
2016-05-02  6:26 ` [PATCH 3/3] random: add interrupt callback to VMBus IRQ handler Theodore Ts'o
2016-05-02  9:00   ` Jeffrey Walton
2016-05-02  9:14     ` Stephan Mueller
2016-05-02 12:56       ` Theodore Ts'o

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1462170413-7164-3-git-send-email-tytso@mit.edu \
    --to=tytso@mit.edu \
    --cc=andi@firstfloor.org \
    --cc=cryptography@lakedaemon.net \
    --cc=herbert@gondor.apana.org.au \
    --cc=hpa@zytor.com \
    --cc=jsd@av8n.com \
    --cc=linux-crypto@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=sandyinchina@gmail.com \
    --cc=smueller@chronox.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).