[PATCH v2 1/2] tmpfs: Quick token library to allow scalable retrieval of tokens from token jar

linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH v2 1/2] tmpfs: Quick token library to allow scalable retrieval of tokens from token jar
@ 2010-05-26 19:32 Tim Chen
  2010-06-01 21:51 ` Andrew Morton
  0 siblings, 1 reply; 15+ messages in thread
From: Tim Chen @ 2010-05-26 19:32 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel, Andi Kleen, Hugh Dickins

This patch creates a quick token (qtoken) library to maintain local
caches of tokens. This avoids lock contention when a token is
retrieved or returned to the token jar to improve scalability performance
on system with many cpus.

 include/linux/qtoken.h |   41 +++++
 lib/Makefile           |    2 +-
 lib/qtoken.c           |  447 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 489 insertions(+), 1 deletions(-)

diff --git a/include/linux/qtoken.h b/include/linux/qtoken.h
new file mode 100644
index 0000000..a9e0f97
--- /dev/null
+++ b/include/linux/qtoken.h
@@ -0,0 +1,41 @@
+/*
+ * qtoken.h: Structure and function prototypes to implement quick token
+ * retrieval from a jar of tokens with per cpu cache.
+ *
+ * Copyright (C) 2010 Intel Corporation
+ * Author: Tim Chen <tim.c.chen@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ *
+ */
+
+#ifndef _QTOKEN_H
+#define _QTOKEN_H
+
+#define QTOKEN_CACHE_HIGH 2
+#define QTOKEN_POOL_HIGH (2 * QTOKEN_CACHE_HIGH)
+
+struct qtoken {
+	unsigned long pool;   /* pool of free tokens */
+	unsigned long total;  /* total num of tokens */
+#ifdef CONFIG_SMP
+	unsigned long init_cache_sz; /* initial cache size */
+	spinlock_t    lock;	/* lock on token jar */
+	atomic_long_t __percpu *cache; /* per cpu cache of tokens */
+#endif
+};
+
+extern void qtoken_return(struct qtoken *token_jar, unsigned long tokens);
+extern int qtoken_get(struct qtoken *token_jar, unsigned long tokens,
+			unsigned long reserve);
+extern int qtoken_init(struct qtoken *token_jar, unsigned long total_tokens,
+			unsigned long init_cache_sz);
+extern void qtoken_free(struct qtoken *token_jar);
+extern unsigned long qtoken_avail(struct qtoken *token_jar);
+extern int qtoken_resize(struct qtoken *token_jar, unsigned long new_size);
+extern unsigned long qtoken_total(struct qtoken *token_jar);
+
+#endif /* _QTOKEN_H */
diff --git a/lib/Makefile b/lib/Makefile
index 0d40152..1fa8945 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -12,7 +12,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 idr.o int_sqrt.o extable.o prio_tree.o \
 	 sha1.o irq_regs.o reciprocal_div.o argv_split.o \
 	 proportions.o prio_heap.o ratelimit.o show_mem.o \
-	 is_single_threaded.o plist.o decompress.o flex_array.o
+	 is_single_threaded.o plist.o decompress.o flex_array.o qtoken.o
 
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
diff --git a/lib/qtoken.c b/lib/qtoken.c
new file mode 100644
index 0000000..4fcd144
--- /dev/null
+++ b/lib/qtoken.c
@@ -0,0 +1,447 @@
+/*
+ * qtoken.c: Library of functions to implement quick token
+ * retrieval from a jar of tokens with per cpu cache.
+ *
+ * Copyright (C) 2010 Intel Corporation
+ * Author: Tim Chen <tim.c.chen@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/cpumask.h>
+#include <linux/qtoken.h>
+
+/*
+ * This library is useful when we have a large number of threads
+ * running concurrently on many cpus trying to access tokens in a token jar
+ * at the same time.
+ *
+ * The token jar is implemented with per cpu cache of tokens.
+ * This library allows tokens to be obtained from or returned quickly to
+ * the local cpu cache without any lock acquisition most of the time.
+ * We only need to lock and modify tokens in the common pool for the
+ * following cases:
+ *
+ * 1) Not enough tokens are in our local cache and we need to get tokens
+ * from the common pool
+ * 2) We have too many tokens in local cache and need to return
+ * some tokens to the common pool
+ * 3) We want to resize the token jar and need to freeze the free tokens
+ *
+ * The local token cache is disabled by setting it to -1 and tokens
+ * reaped into the common pool when we find the common pool low in tokens.
+ * The token jar should be initialized with a cache size large enough
+ * to avoid touching the common pool's tokens frequently.
+ *
+ * It is possible to implement the token jar with just a single
+ * atomic variable for all free tokens.  However, this approach is
+ * not used here to prevent excessive cache line bouncing which causes poor
+ * performance when token jar is accessed by large number of simultaneous
+ * threads.
+ */
+
+#ifdef CONFIG_SMP
+/**
+ * qtoken_return - return tokens to token jar
+ *
+ * @token_jar: pointer to token jar
+ * @tokens: the number of tokens to return to token jar
+ *
+ * @tokens are returned to the cache in current cpu, unless
+ * the cache is disabled (i.e. value -1).  For this case,
+ * @tokens are returned to common pool.
+ * If the number of tokens in current cpu's cache exceed
+ * a a highmark, we will return the extra tokens into the
+ * common pool.
+ */
+void qtoken_return(struct qtoken *token_jar, unsigned long tokens)
+{
+	long cnt;
+	unsigned long flags;
+	int	 cpu;
+	atomic_long_t *cache;
+
+	cpu = get_cpu();
+	cache = per_cpu_ptr(token_jar->cache, cpu);
+	cnt = atomic_long_read(cache);
+	if (cnt >= 0) {
+		if ((cnt + tokens) <= QTOKEN_CACHE_HIGH * token_jar->init_cache_sz) {
+			if (!atomic_long_add_unless(cache, tokens, -1)) {
+				spin_lock_irqsave(&token_jar->lock, flags);
+				token_jar->pool += tokens;
+				spin_unlock_irqrestore(&token_jar->lock, flags);
+			}
+		} else {
+			spin_lock_irqsave(&token_jar->lock, flags);
+			if (atomic_long_add_unless(cache, token_jar->init_cache_sz - cnt, -1)) {
+				int extra;
+
+				extra = cnt + tokens - token_jar->init_cache_sz;
+
+				token_jar->pool += extra;
+			} else
+				token_jar->pool += tokens;
+			spin_unlock_irqrestore(&token_jar->lock, flags);
+		}
+	} else {
+		spin_lock_irqsave(&token_jar->lock, flags);
+		token_jar->pool += tokens;
+		spin_unlock_irqrestore(&token_jar->lock, flags);
+	}
+	put_cpu();
+}
+EXPORT_SYMBOL(qtoken_return);
+
+/**
+ * qtoken_reap cache: Move tokens from cache into common pool in token jar
+ *
+ * @token_jar: pointer to token jar
+ *
+ * The tokens in each cpu's cache are reaped and and placed in
+ * common pool.  The cache of each cpu is disabled (set to -1).
+ */
+static void qtoken_reap_cache(struct qtoken *token_jar)
+{
+	long cnt;
+	int i;
+
+	for_each_possible_cpu(i) {
+		atomic_long_t	*cache;
+
+		cache = per_cpu_ptr(token_jar->cache, i);
+		cnt = atomic_long_xchg(cache, -1);
+		if (cnt > 0)
+			token_jar->pool += cnt;
+	}
+}
+
+/**
+ * qtoken_from pool: Get tokens from common pool in token jar
+ *
+ * @token_jar: pointer to struct qtoken
+ * @tokens: the number of tokens to acquire from token jar
+ * @reserve: number of tokens to reserve in token jar after getting tokens
+ *
+ * Get @tokens from the token jar's common pool but keep @reserve tokens.
+ * We will fail the operation if we cannot keep @reserve tokens in token jar.
+ *
+ * Return 0 if operations succeeds and -ENOSPC if operation fails.
+ */
+static int qtoken_from_pool(struct qtoken *token_jar, unsigned long tokens,
+				unsigned long reserve)
+{
+	int	allocated = -ENOSPC;
+
+	/* We should have acquired lock of token pool before coming here */
+	if (token_jar->pool < (reserve + tokens))
+		qtoken_reap_cache(token_jar);
+	if (token_jar->pool >= (reserve + tokens)) {
+		token_jar->pool -= tokens;
+		allocated = 0;
+	}
+	return allocated;
+}
+
+/**
+ * qtoken_get: Get tokens from token jar
+ *
+ * @token_jar: pointer to struct qtoken
+ * @tokens: the number of tokens to acquire from token jar
+ * @reserve: number of tokens to reserve in token jar after getting tokens
+ *
+ * Get @tokens from the token jar but leave @reserve tokens in jar.
+ * Some application may come back quickly to get the reserved tokens
+ * and they do not want the get operation to succeed if it cannot succeed
+ * later.  We fail the operation if we cannot keep @reserve tokens in token jar.
+ *
+ * Return 0 if operation succeeds, non zero error code if operation fails
+ */
+int qtoken_get(struct qtoken *token_jar, unsigned long token, unsigned long reserve)
+{
+	int	allocated = -ENOSPC;
+	int	from_cache = 0;
+	int	cpu;
+	long	cnt;
+	unsigned long flags;
+	atomic_long_t *cache;
+
+	cpu = get_cpu();
+	cache = per_cpu_ptr(token_jar->cache, cpu);
+	cnt = atomic_long_read(cache);
+	if ((cnt > 0) && (cnt > token)) {
+		/* check cache's reserve first to avoid reading pool var */
+		if (cnt >= (token + reserve))
+			from_cache = 1;
+		else if ((cnt + token_jar->pool) >= (token + reserve))
+			from_cache = 1;
+	}
+	if (from_cache) {
+		if (atomic_long_add_unless(cache, -(long)token, -1))
+			allocated = 0;
+		else {
+			/* cache disabled, get token from pool */
+			spin_lock_irqsave(&token_jar->lock, flags);
+			allocated = qtoken_from_pool(token_jar, token, reserve);
+			spin_unlock_irqrestore(&token_jar->lock, flags);
+		}
+	} else {
+		unsigned long pool_high_mark;
+
+		spin_lock_irqsave(&token_jar->lock, flags);
+		pool_high_mark = QTOKEN_POOL_HIGH * token_jar->init_cache_sz
+							* num_online_cpus();
+		if (token_jar->pool > pool_high_mark + token) {
+			/* plenty of tokens, replenish cache */
+			token_jar->pool -= token + token_jar->init_cache_sz;
+			allocated = 0;
+			cnt = atomic_long_read(cache);
+			if (cnt < 0)
+				cnt = token_jar->init_cache_sz;
+			else
+				cnt += token_jar->init_cache_sz;
+			atomic_long_set(cache, cnt);
+		} else
+			allocated = qtoken_from_pool(token_jar, token, reserve);
+		spin_unlock_irqrestore(&token_jar->lock, flags);
+	}
+	put_cpu();
+	return allocated;
+}
+EXPORT_SYMBOL(qtoken_get);
+
+/**
+ * qtoken_init: Init token jar
+ *
+ * @token_jar: pointer to token jar
+ * @total_tokens: the total number of tokens that the token jar holds
+ * @cache_size: the initial size of per cpu cache of tokens
+ *
+ * Initialize the token jar structure, and allocate per cpu cache of
+ * tokens in the token jar.
+ *
+ * Returns 0 if initialization successful and non-zero error code otherwise.
+ */
+int qtoken_init(struct qtoken *token_jar, unsigned long total_tokens, unsigned long cache_size)
+{
+	int	i;
+
+	token_jar->cache = alloc_percpu(atomic_long_t);
+	if (!token_jar->cache)
+		return -ENOMEM;
+	spin_lock_init(&token_jar->lock);
+	for_each_possible_cpu(i) {
+		atomic_long_t	*cache;
+
+		cache = per_cpu_ptr(token_jar->cache, i);
+		atomic_long_set(cache, -1);
+	}
+	token_jar->init_cache_sz = cache_size;
+	token_jar->total = token_jar->pool = total_tokens;
+	return 0;
+}
+EXPORT_SYMBOL(qtoken_init);
+
+/**
+ * qtoken_free: Free token jar
+ *
+ * @token_jar: pointer to token jar
+ *
+ * Free memory for per cpu cache used in token jar and
+ * clear the token counts.
+ */
+void qtoken_free(struct qtoken *token_jar)
+{
+	if (token_jar->cache)
+		free_percpu(token_jar->cache);
+	token_jar->pool = 0;
+	token_jar->total = 0;
+}
+EXPORT_SYMBOL(qtoken_free);
+
+/**
+ * qtoken_avail: Calculates token available in token jar
+ *
+ * @token_jar: pointer to token jar
+ *
+ * Get a count of all free tokens in the token jar.
+ */
+unsigned long qtoken_avail(struct qtoken *token_jar)
+{
+	int	i;
+	long	cnt;
+	unsigned long cnt_total;
+	unsigned long flags;
+
+	spin_lock_irqsave(&token_jar->lock, flags);
+	cnt_total = token_jar->pool;
+	for_each_possible_cpu(i) {
+		atomic_long_t	*cache;
+
+		cache = per_cpu_ptr(token_jar->cache, i);
+		cnt = atomic_long_read(cache);
+		if (cnt > 0)
+			cnt_total += cnt;
+	}
+	spin_unlock_irqrestore(&token_jar->lock, flags);
+	return cnt_total;
+}
+EXPORT_SYMBOL(qtoken_avail);
+
+/**
+ * qtoken_resize: Resize the total number of tokens in token jar
+ *
+ * @token_jar: pointer to token jar
+ * @new_size: new total number of tokens in token jar
+ * Returns 0 if token jar resize successful, non zero error code otherwise
+ *
+ * We will resize the token jar and return 0 if the new total number of tokens
+ * is greater than the existing tokens in use.  Otherwise, we will fail the
+ * operation and return an error code.
+ */
+int qtoken_resize(struct qtoken *token_jar, unsigned long new_size)
+{
+	unsigned long in_use;
+	unsigned long flags;
+
+	spin_lock_irqsave(&token_jar->lock, flags);
+	qtoken_reap_cache(token_jar);
+	in_use = token_jar->total - token_jar->pool;
+	if (in_use > new_size)
+		return -EBUSY;
+	token_jar->pool = new_size - in_use;
+	token_jar->total = new_size;
+	spin_unlock_irqrestore(&token_jar->lock, flags);
+	return 0;
+}
+EXPORT_SYMBOL(qtoken_resize);
+
+#else /* !CONFIG_SMP */
+
+/**
+ * qtoken_return - return tokens to token jar
+ *
+ * @token_jar: pointer to token jar
+ * @tokens: the number of tokens to return to token jar
+ */
+void qtoken_return(struct qtoken *token_jar, unsigned long tokens)
+{
+	token_jar->pool += tokens;
+}
+EXPORT_SYMBOL(qtoken_return);
+
+/**
+ * qtoken_get: Get tokens from token jar
+ *
+ * @token_jar: pointer to struct qtoken
+ * @tokens: the number of tokens to acquire from token jar
+ * @reserve: number of tokens to reserve in token jar after getting tokens
+ *
+ * Get @tokens from the token jar's but leave @reserve tokens in jar.
+ * Some application may come back quickly to get the reserved tokens
+ * and they do not want the get operation to succeed if it cannot succeed
+ * later.  We fail the operation if we cannot keep @reserve tokens in token jar.
+ *
+ * Return 0 if operation succeeds, non zero error code if operation fails
+ */
+int qtoken_get(struct qtoken *token_jar, unsigned long token, unsigned long reserve)
+{
+	int	allocated = -ENOSPC;
+
+	if (token_jar->pool >= (reserve + token)) {
+		token_jar->pool -= token;
+		allocated = 0;
+	}
+
+	return allocated;
+}
+EXPORT_SYMBOL(qtoken_get);
+
+/**
+ * qtoken_init: Init token jar
+ *
+ * @token_jar: pointer to token jar
+ * @total_tokens: the total number of tokens that the token jar holds
+ * @cache_size: the initial size of per cpu cache of tokens
+ *
+ * Initialize the token jar structure, and allocate per cpu cache of
+ * tokens for the token jar.
+ *
+ * Returns 0 if initialization is successful and non-zero error code otherwise.
+ */
+int qtoken_init(struct qtoken *token_jar, unsigned long total_tokens, unsigned long cache_size)
+{
+	token_jar->total = token_jar->pool = total_tokens;
+	return 0;
+}
+EXPORT_SYMBOL(qtoken_init);
+
+/**
+ * qtoken_free: Free token jar
+ *
+ * @token_jar: pointer to token jar
+ *
+ * Clear the token counts.
+ */
+void qtoken_free(struct qtoken *token_jar)
+{
+	token_jar->pool = 0;
+	token_jar->total = 0;
+}
+EXPORT_SYMBOL(qtoken_free);
+
+/**
+ * qtoken_avail: Calculate the tokens available in token jar
+ *
+ * @token_jar: pointer to token jar
+ *
+ */
+unsigned long qtoken_avail(struct qtoken *token_jar)
+{
+	return token_jar->pool;
+}
+EXPORT_SYMBOL(qtoken_avail);
+
+/**
+ * qtoken_resize: Resize the total number of tokens in token jar
+ *
+ * @token_jar: pointer to token jar
+ * @new_size: new total number of tokens in token jar
+ * Returns 0 if token jar resize successful, non zero error code otherwise
+ *
+ * We will resize the token jar if the new total number of tokens is greater
+ * than the existing tokens in use.  Otherwise, we will fail the operation.
+ */
+int qtoken_resize(struct qtoken *token_jar, unsigned long new_size)
+{
+	unsigned long in_use;
+
+	in_use = token_jar->total - token_jar->pool;
+	if (in_use > new_size)
+		return -EBUSY;
+	token_jar->pool = new_size - in_use;
+	token_jar->total = new_size;
+	return 0;
+}
+EXPORT_SYMBOL(qtoken_resize);
+
+#endif /* CONFIG_SMP */
+
+/**
+ * qtoken_total: Return the total number of tokens configured for token jar
+ *
+ * @token_jar: pointer to token jar
+ *
+ * Returns total number of tokens configured for token jar
+ */
+unsigned long qtoken_total(struct qtoken *token_jar)
+{
+	return token_jar->total;
+}
+EXPORT_SYMBOL(qtoken_total);



^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 1/2] tmpfs: Quick token library to allow scalable retrieval of tokens from token jar
  2010-05-26 19:32 [PATCH v2 1/2] tmpfs: Quick token library to allow scalable retrieval of tokens from token jar Tim Chen
@ 2010-06-01 21:51 ` Andrew Morton
  2010-06-02  8:58   ` Andi Kleen
  2010-06-02 17:32   ` Tim Chen
  0 siblings, 2 replies; 15+ messages in thread
From: Andrew Morton @ 2010-06-01 21:51 UTC (permalink / raw)
  To: Tim Chen; +Cc: linux-kernel, Andi Kleen, Hugh Dickins

On Wed, 26 May 2010 12:32:51 -0700
Tim Chen <tim.c.chen@linux.intel.com> wrote:

> This patch creates a quick token (qtoken) library to maintain local
> caches of tokens. This avoids lock contention when a token is
> retrieved or returned to the token jar to improve scalability performance
> on system with many cpus.
>
> ...
>
> +/*
> + * This library is useful when we have a large number of threads
> + * running concurrently on many cpus trying to access tokens in a token jar
> + * at the same time.
> + *
> + * The token jar is implemented with per cpu cache of tokens.
> + * This library allows tokens to be obtained from or returned quickly to
> + * the local cpu cache without any lock acquisition most of the time.
> + * We only need to lock and modify tokens in the common pool for the
> + * following cases:
> + *
> + * 1) Not enough tokens are in our local cache and we need to get tokens
> + * from the common pool
> + * 2) We have too many tokens in local cache and need to return
> + * some tokens to the common pool
> + * 3) We want to resize the token jar and need to freeze the free tokens
> + *
> + * The local token cache is disabled by setting it to -1 and tokens
> + * reaped into the common pool when we find the common pool low in tokens.
> + * The token jar should be initialized with a cache size large enough
> + * to avoid touching the common pool's tokens frequently.
> + *
> + * It is possible to implement the token jar with just a single
> + * atomic variable for all free tokens.  However, this approach is
> + * not used here to prevent excessive cache line bouncing which causes poor
> + * performance when token jar is accessed by large number of simultaneous
> + * threads.
> + */

OK, thanks.  But I'm still struggling a bit in understanding the
applicability.  Where else might one use this?  What particular
problem is it good at solving?

I think the problem here is that you're using the term "token jar" as
if others already know what a token jar _is_.  I guess I was asleep
during that compsci lecture, but google doesn't appear to know about
the term either.

And from looking at the tmpfs caller, it appears that the token jar is
being used to speed up the access to a single `unsigned long
free_blocks'.  Could we have used say a percpu_counter instead?

>
> ...
>
> +static void qtoken_reap_cache(struct qtoken *token_jar)
> +{
> +	long cnt;
> +	int i;
> +
> +	for_each_possible_cpu(i) {
> +		atomic_long_t	*cache;
> +
> +		cache = per_cpu_ptr(token_jar->cache, i);
> +		cnt = atomic_long_xchg(cache, -1);
> +		if (cnt > 0)
> +			token_jar->pool += cnt;
> +	}
> +}

Not cpu-hotplug aware.  Also, inefficient if num_online_cpus is much
less than num_possible_cpus.

It's really not hard to do!  lib/percpu_counter.c is per-cpu aware and
it does this by sneakily connecting all counters together system-wide.

> +/**
> + * qtoken_from pool: Get tokens from common pool in token jar
> + *
> + * @token_jar: pointer to struct qtoken
> + * @tokens: the number of tokens to acquire from token jar
> + * @reserve: number of tokens to reserve in token jar after getting tokens
> + *
> + * Get @tokens from the token jar's common pool but keep @reserve tokens.
> + * We will fail the operation if we cannot keep @reserve tokens in token jar.
> + *
> + * Return 0 if operations succeeds and -ENOSPC if operation fails.
> + */
> +static int qtoken_from_pool(struct qtoken *token_jar, unsigned long tokens,
> +				unsigned long reserve)
> +{
> +	int	allocated = -ENOSPC;
> +
> +	/* We should have acquired lock of token pool before coming here */
> +	if (token_jar->pool < (reserve + tokens))
> +		qtoken_reap_cache(token_jar);
> +	if (token_jar->pool >= (reserve + tokens)) {
> +		token_jar->pool -= tokens;
> +		allocated = 0;
> +	}
> +	return allocated;
> +}

ENOSPC means "your disk is full".  But my disk isn't full.  If this
inappropriate (indeed, incorrect) error code is ever propagated back to
userspace then users will be justifiably confused.

>
> ...
>


Have some comment fixes.  I don't know if the " - " is compulsory in
the kerneldoc, but it is conventional.


From: Andrew Morton <akpm@linux-foundation.org>

fix comment typo
repair kerneldoc

Cc: Andi Kleen <ak@linux.intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 lib/qtoken.c |   26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff -puN lib/qtoken.c~tmpfs-quick-token-library-to-allow-scalable-retrieval-of-tokens-from-token-jar-fix lib/qtoken.c
--- a/lib/qtoken.c~tmpfs-quick-token-library-to-allow-scalable-retrieval-of-tokens-from-token-jar-fix
+++ a/lib/qtoken.c
@@ -58,7 +58,7 @@
  * the cache is disabled (i.e. value -1).  For this case,
  * @tokens are returned to common pool.
  * If the number of tokens in current cpu's cache exceed
- * a a highmark, we will return the extra tokens into the
+ * a highmark, we will return the extra tokens into the
  * common pool.
  */
 void qtoken_return(struct qtoken *token_jar, unsigned long tokens)
@@ -100,7 +100,7 @@ void qtoken_return(struct qtoken *token_
 EXPORT_SYMBOL(qtoken_return);
 
 /**
- * qtoken_reap cache: Move tokens from cache into common pool in token jar
+ * qtoken_reap_cache - move tokens from cache into common pool in token jar
  *
  * @token_jar: pointer to token jar
  *
@@ -123,7 +123,7 @@ static void qtoken_reap_cache(struct qto
 }
 
 /**
- * qtoken_from pool: Get tokens from common pool in token jar
+ * qtoken_from_pool - get tokens from common pool in token jar
  *
  * @token_jar: pointer to struct qtoken
  * @tokens: the number of tokens to acquire from token jar
@@ -150,7 +150,7 @@ static int qtoken_from_pool(struct qtoke
 }
 
 /**
- * qtoken_get: Get tokens from token jar
+ * qtoken_get - get tokens from token jar
  *
  * @token_jar: pointer to struct qtoken
  * @tokens: the number of tokens to acquire from token jar
@@ -217,7 +217,7 @@ int qtoken_get(struct qtoken *token_jar,
 EXPORT_SYMBOL(qtoken_get);
 
 /**
- * qtoken_init: Init token jar
+ * qtoken_init - init token jar
  *
  * @token_jar: pointer to token jar
  * @total_tokens: the total number of tokens that the token jar holds
@@ -266,7 +266,7 @@ void qtoken_free(struct qtoken *token_ja
 EXPORT_SYMBOL(qtoken_free);
 
 /**
- * qtoken_avail: Calculates token available in token jar
+ * qtoken_avail - calculates token available in token jar
  *
  * @token_jar: pointer to token jar
  *
@@ -295,7 +295,7 @@ unsigned long qtoken_avail(struct qtoken
 EXPORT_SYMBOL(qtoken_avail);
 
 /**
- * qtoken_resize: Resize the total number of tokens in token jar
+ * qtoken_resize - resize the total number of tokens in token jar
  *
  * @token_jar: pointer to token jar
  * @new_size: new total number of tokens in token jar
@@ -337,7 +337,7 @@ void qtoken_return(struct qtoken *token_
 EXPORT_SYMBOL(qtoken_return);
 
 /**
- * qtoken_get: Get tokens from token jar
+ * qtoken_get - get tokens from token jar
  *
  * @token_jar: pointer to struct qtoken
  * @tokens: the number of tokens to acquire from token jar
@@ -364,7 +364,7 @@ int qtoken_get(struct qtoken *token_jar,
 EXPORT_SYMBOL(qtoken_get);
 
 /**
- * qtoken_init: Init token jar
+ * qtoken_init - init token jar
  *
  * @token_jar: pointer to token jar
  * @total_tokens: the total number of tokens that the token jar holds
@@ -383,7 +383,7 @@ int qtoken_init(struct qtoken *token_jar
 EXPORT_SYMBOL(qtoken_init);
 
 /**
- * qtoken_free: Free token jar
+ * qtoken_free - free token jar
  *
  * @token_jar: pointer to token jar
  *
@@ -397,7 +397,7 @@ void qtoken_free(struct qtoken *token_ja
 EXPORT_SYMBOL(qtoken_free);
 
 /**
- * qtoken_avail: Calculate the tokens available in token jar
+ * qtoken_avail - calculate the tokens available in token jar
  *
  * @token_jar: pointer to token jar
  *
@@ -409,7 +409,7 @@ unsigned long qtoken_avail(struct qtoken
 EXPORT_SYMBOL(qtoken_avail);
 
 /**
- * qtoken_resize: Resize the total number of tokens in token jar
+ * qtoken_resize - resize the total number of tokens in token jar
  *
  * @token_jar: pointer to token jar
  * @new_size: new total number of tokens in token jar
@@ -434,7 +434,7 @@ EXPORT_SYMBOL(qtoken_resize);
 #endif /* CONFIG_SMP */
 
 /**
- * qtoken_total: Return the total number of tokens configured for token jar
+ * qtoken_total - return the total number of tokens configured for token jar
  *
  * @token_jar: pointer to token jar
  *
_


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 1/2] tmpfs: Quick token library to allow scalable retrieval of tokens from token jar
  2010-06-01 21:51 ` Andrew Morton
@ 2010-06-02  8:58   ` Andi Kleen
  2010-06-09 22:36     ` Andrew Morton
  2010-06-02 17:32   ` Tim Chen
  1 sibling, 1 reply; 15+ messages in thread
From: Andi Kleen @ 2010-06-02  8:58 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Tim Chen, linux-kernel, Andi Kleen, Hugh Dickins

Andrew Morton <akpm@linux-foundation.org> writes:

I am to blame for the "token jar" name.

> OK, thanks.  But I'm still struggling a bit in understanding the
> applicability.  Where else might one use this?  What particular
> problem is it good at solving?

I wrote a similar scheme a long time ago for IP protocol ID
allocation (but that code never ended up in mainline).
Back then it was called "cookie jar" and you can actually
still google for it :)

It can be used for pretty much anything where you have
a global resource and want to hand it out to different CPUs,
but still have a global limit that is enforced.

For example a file system could use it for accounting 
free space too.

In principle you could even use it for pids for example
or other IDs.

>
> I think the problem here is that you're using the term "token jar" as
> if others already know what a token jar _is_.  I guess I was asleep
> during that compsci lecture, but google doesn't appear to know about
> the term either.
>
> And from looking at the tmpfs caller, it appears that the token jar is
> being used to speed up the access to a single `unsigned long
> free_blocks'.  Could we have used say a percpu_counter instead?

You need some synchronization, otherwise the accounting
would not be exact and you could overflow. Yes you could
open code it, but having it in a library is nicer.

That's what the token jar provides.

You grab some tokens out of the jar and to be fast take multiple in
advance. And there's a watchman who forces you to give them back if you
don't really need them all and they run low.

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 1/2] tmpfs: Quick token library to allow scalable retrieval of tokens from token jar
  2010-06-01 21:51 ` Andrew Morton
  2010-06-02  8:58   ` Andi Kleen
@ 2010-06-02 17:32   ` Tim Chen
  2010-06-09 22:41     ` Andrew Morton
  1 sibling, 1 reply; 15+ messages in thread
From: Tim Chen @ 2010-06-02 17:32 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel, Andi Kleen, Hugh Dickins

On Tue, 2010-06-01 at 14:51 -0700, Andrew Morton wrote:

> >
> > +static void qtoken_reap_cache(struct qtoken *token_jar)
> > +{
> > +	long cnt;
> > +	int i;
> > +
> > +	for_each_possible_cpu(i) {
> > +		atomic_long_t	*cache;
> > +
> > +		cache = per_cpu_ptr(token_jar->cache, i);
> > +		cnt = atomic_long_xchg(cache, -1);
> > +		if (cnt > 0)
> > +			token_jar->pool += cnt;
> > +	}
> > +}
> 
> Not cpu-hotplug aware.  Also, inefficient if num_online_cpus is much
> less than num_possible_cpus.
> 
> It's really not hard to do!  lib/percpu_counter.c is per-cpu aware and
> it does this by sneakily connecting all counters together system-wide.
> 

I've thought about using "for_each_online_cpu" in the above loop.
However, it someone takes a cpu offline, then we will lose the tokens
in that cpu's cache when we do reap cache.  So I thought it is safer
to do for_each_possible_cpu. We should not be calling reap cache
very often (only when we are low on tokens). The impact to
performance should be small.

> > +/**
> > + * qtoken_from pool: Get tokens from common pool in token jar
> > + *
> > + * @token_jar: pointer to struct qtoken
> > + * @tokens: the number of tokens to acquire from token jar
> > + * @reserve: number of tokens to reserve in token jar after getting tokens
> > + *
> > + * Get @tokens from the token jar's common pool but keep @reserve tokens.
> > + * We will fail the operation if we cannot keep @reserve tokens in token jar.
> > + *
> > + * Return 0 if operations succeeds and -ENOSPC if operation fails.
> > + */
> > +static int qtoken_from_pool(struct qtoken *token_jar, unsigned long tokens,
> > +				unsigned long reserve)
> > +{
> > +	int	allocated = -ENOSPC;
> > +
> > +	/* We should have acquired lock of token pool before coming here */
> > +	if (token_jar->pool < (reserve + tokens))
> > +		qtoken_reap_cache(token_jar);
> > +	if (token_jar->pool >= (reserve + tokens)) {
> > +		token_jar->pool -= tokens;
> > +		allocated = 0;
> > +	}
> > +	return allocated;
> > +}
> 
> ENOSPC means "your disk is full".  But my disk isn't full.  If this
> inappropriate (indeed, incorrect) error code is ever propagated back to
> userspace then users will be justifiably confused.
> 

Originally, I was using -ENOSPC to mean tmpfs being full and out of
space.  Will -EBUSY to denote all resources are used be more
appropriate?  

Thanks.

Tim Chen

Acked-by: Tim Chen <tim.c.chen@linux.intel.com>

> Have some comment fixes.  I don't know if the " - " is compulsory in
> the kerneldoc, but it is conventional.
> 
> 
> From: Andrew Morton <akpm@linux-foundation.org>
> 
> fix comment typo
> repair kerneldoc
> 
> Cc: Andi Kleen <ak@linux.intel.com>
> Cc: Hugh Dickins <hughd@google.com>
> Cc: Tim Chen <tim.c.chen@linux.intel.com>
> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
> ---
> 
>  lib/qtoken.c |   26 +++++++++++++-------------
>  1 file changed, 13 insertions(+), 13 deletions(-)
> 
> diff -puN lib/qtoken.c~tmpfs-quick-token-library-to-allow-scalable-retrieval-of-tokens-from-token-jar-fix lib/qtoken.c
> --- a/lib/qtoken.c~tmpfs-quick-token-library-to-allow-scalable-retrieval-of-tokens-from-token-jar-fix
> +++ a/lib/qtoken.c
> @@ -58,7 +58,7 @@
>   * the cache is disabled (i.e. value -1).  For this case,
>   * @tokens are returned to common pool.
>   * If the number of tokens in current cpu's cache exceed
> - * a a highmark, we will return the extra tokens into the
> + * a highmark, we will return the extra tokens into the
>   * common pool.
>   */
>  void qtoken_return(struct qtoken *token_jar, unsigned long tokens)
> @@ -100,7 +100,7 @@ void qtoken_return(struct qtoken *token_
>  EXPORT_SYMBOL(qtoken_return);
>  
>  /**
> - * qtoken_reap cache: Move tokens from cache into common pool in token jar
> + * qtoken_reap_cache - move tokens from cache into common pool in token jar
>   *
>   * @token_jar: pointer to token jar
>   *
> @@ -123,7 +123,7 @@ static void qtoken_reap_cache(struct qto
>  }
>  
>  /**
> - * qtoken_from pool: Get tokens from common pool in token jar
> + * qtoken_from_pool - get tokens from common pool in token jar
>   *
>   * @token_jar: pointer to struct qtoken
>   * @tokens: the number of tokens to acquire from token jar
> @@ -150,7 +150,7 @@ static int qtoken_from_pool(struct qtoke
>  }
>  
>  /**
> - * qtoken_get: Get tokens from token jar
> + * qtoken_get - get tokens from token jar
>   *
>   * @token_jar: pointer to struct qtoken
>   * @tokens: the number of tokens to acquire from token jar
> @@ -217,7 +217,7 @@ int qtoken_get(struct qtoken *token_jar,
>  EXPORT_SYMBOL(qtoken_get);
>  
>  /**
> - * qtoken_init: Init token jar
> + * qtoken_init - init token jar
>   *
>   * @token_jar: pointer to token jar
>   * @total_tokens: the total number of tokens that the token jar holds
> @@ -266,7 +266,7 @@ void qtoken_free(struct qtoken *token_ja
>  EXPORT_SYMBOL(qtoken_free);
>  
>  /**
> - * qtoken_avail: Calculates token available in token jar
> + * qtoken_avail - calculates token available in token jar
>   *
>   * @token_jar: pointer to token jar
>   *
> @@ -295,7 +295,7 @@ unsigned long qtoken_avail(struct qtoken
>  EXPORT_SYMBOL(qtoken_avail);
>  
>  /**
> - * qtoken_resize: Resize the total number of tokens in token jar
> + * qtoken_resize - resize the total number of tokens in token jar
>   *
>   * @token_jar: pointer to token jar
>   * @new_size: new total number of tokens in token jar
> @@ -337,7 +337,7 @@ void qtoken_return(struct qtoken *token_
>  EXPORT_SYMBOL(qtoken_return);
>  
>  /**
> - * qtoken_get: Get tokens from token jar
> + * qtoken_get - get tokens from token jar
>   *
>   * @token_jar: pointer to struct qtoken
>   * @tokens: the number of tokens to acquire from token jar
> @@ -364,7 +364,7 @@ int qtoken_get(struct qtoken *token_jar,
>  EXPORT_SYMBOL(qtoken_get);
>  
>  /**
> - * qtoken_init: Init token jar
> + * qtoken_init - init token jar
>   *
>   * @token_jar: pointer to token jar
>   * @total_tokens: the total number of tokens that the token jar holds
> @@ -383,7 +383,7 @@ int qtoken_init(struct qtoken *token_jar
>  EXPORT_SYMBOL(qtoken_init);
>  
>  /**
> - * qtoken_free: Free token jar
> + * qtoken_free - free token jar
>   *
>   * @token_jar: pointer to token jar
>   *
> @@ -397,7 +397,7 @@ void qtoken_free(struct qtoken *token_ja
>  EXPORT_SYMBOL(qtoken_free);
>  
>  /**
> - * qtoken_avail: Calculate the tokens available in token jar
> + * qtoken_avail - calculate the tokens available in token jar
>   *
>   * @token_jar: pointer to token jar
>   *
> @@ -409,7 +409,7 @@ unsigned long qtoken_avail(struct qtoken
>  EXPORT_SYMBOL(qtoken_avail);
>  
>  /**
> - * qtoken_resize: Resize the total number of tokens in token jar
> + * qtoken_resize - resize the total number of tokens in token jar
>   *
>   * @token_jar: pointer to token jar
>   * @new_size: new total number of tokens in token jar
> @@ -434,7 +434,7 @@ EXPORT_SYMBOL(qtoken_resize);
>  #endif /* CONFIG_SMP */
>  
>  /**
> - * qtoken_total: Return the total number of tokens configured for token jar
> + * qtoken_total - return the total number of tokens configured for token jar
>   *
>   * @token_jar: pointer to token jar
>   *
> _
> 



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 1/2] tmpfs: Quick token library to allow scalable retrieval of tokens from token jar
  2010-06-02  8:58   ` Andi Kleen
@ 2010-06-09 22:36     ` Andrew Morton
  2010-06-10 17:06       ` Tim Chen
  0 siblings, 1 reply; 15+ messages in thread
From: Andrew Morton @ 2010-06-09 22:36 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Tim Chen, linux-kernel, Andi Kleen, Hugh Dickins

On Wed, 02 Jun 2010 10:58:42 +0200
Andi Kleen <andi@firstfloor.org> wrote:

> Andrew Morton <akpm@linux-foundation.org> writes:
> 
> I am to blame for the "token jar" name.
> 
> > OK, thanks.  But I'm still struggling a bit in understanding the
> > applicability.  Where else might one use this?  What particular
> > problem is it good at solving?
> 
> I wrote a similar scheme a long time ago for IP protocol ID
> allocation (but that code never ended up in mainline).
> Back then it was called "cookie jar" and you can actually
> still google for it :)
> 
> It can be used for pretty much anything where you have
> a global resource and want to hand it out to different CPUs,
> but still have a global limit that is enforced.
> 
> For example a file system could use it for accounting 
> free space too.
> 
> In principle you could even use it for pids for example
> or other IDs.

You could, execept the code's basically identical to percpu_counters,
only worse.

> >
> > I think the problem here is that you're using the term "token jar" as
> > if others already know what a token jar _is_.  I guess I was asleep
> > during that compsci lecture, but google doesn't appear to know about
> > the term either.
> >
> > And from looking at the tmpfs caller, it appears that the token jar is
> > being used to speed up the access to a single `unsigned long
> > free_blocks'.  Could we have used say a percpu_counter instead?
> 
> You need some synchronization, otherwise the accounting
> would not be exact and you could overflow. Yes you could
> open code it, but having it in a library is nicer.

The code doesn't have synchronisation!  qtoken_return() can modify the
per-cpu "cache" in parallel with qtoken_avail()'s walk across the
per-cpu "caches", yielding an inaccurate result.

This is all the same as percpu_add() executing in parallel with
percpu_counter_sum() or percpu_counter_sum_positive().

If we cannot tolerate that inaccuracy then these patches are no good
and we need a rethink.

If we _can_ tolerate that inaccuracy then percpu_counters can be used
here.  And doing that is preferable to reinventing percpu_counters
badly.

I'm just not seeing it.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 1/2] tmpfs: Quick token library to allow scalable retrieval of tokens from token jar
  2010-06-02 17:32   ` Tim Chen
@ 2010-06-09 22:41     ` Andrew Morton
  0 siblings, 0 replies; 15+ messages in thread
From: Andrew Morton @ 2010-06-09 22:41 UTC (permalink / raw)
  To: Tim Chen; +Cc: linux-kernel, Andi Kleen, Hugh Dickins

On Wed, 02 Jun 2010 10:32:49 -0700
Tim Chen <tim.c.chen@linux.intel.com> wrote:

> On Tue, 2010-06-01 at 14:51 -0700, Andrew Morton wrote:
> 
> > >
> > > +static void qtoken_reap_cache(struct qtoken *token_jar)
> > > +{
> > > +	long cnt;
> > > +	int i;
> > > +
> > > +	for_each_possible_cpu(i) {
> > > +		atomic_long_t	*cache;
> > > +
> > > +		cache = per_cpu_ptr(token_jar->cache, i);
> > > +		cnt = atomic_long_xchg(cache, -1);
> > > +		if (cnt > 0)
> > > +			token_jar->pool += cnt;
> > > +	}
> > > +}
> > 
> > Not cpu-hotplug aware.  Also, inefficient if num_online_cpus is much
> > less than num_possible_cpus.
> > 
> > It's really not hard to do!  lib/percpu_counter.c is per-cpu aware and
> > it does this by sneakily connecting all counters together system-wide.
> > 
> 
> I've thought about using "for_each_online_cpu" in the above loop.
> However, it someone takes a cpu offline, then we will lose the tokens
> in that cpu's cache when we do reap cache.

No, that's what cpu hotplug notifiers are for.  pecpu_counters does all
this.

> > > +	/* We should have acquired lock of token pool before coming here */
> > > +	if (token_jar->pool < (reserve + tokens))
> > > +		qtoken_reap_cache(token_jar);
> > > +	if (token_jar->pool >= (reserve + tokens)) {
> > > +		token_jar->pool -= tokens;
> > > +		allocated = 0;
> > > +	}
> > > +	return allocated;
> > > +}
> > 
> > ENOSPC means "your disk is full".  But my disk isn't full.  If this
> > inappropriate (indeed, incorrect) error code is ever propagated back to
> > userspace then users will be justifiably confused.
> > 
> 
> Originally, I was using -ENOSPC to mean tmpfs being full and out of
> space.  Will -EBUSY to denote all resources are used be more
> appropriate?  
> 

Return the number of tokens which were allocated?

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 1/2] tmpfs: Quick token library to allow scalable retrieval of tokens from token jar
  2010-06-09 22:36     ` Andrew Morton
@ 2010-06-10 17:06       ` Tim Chen
  2010-06-11 21:52         ` Andrew Morton
  0 siblings, 1 reply; 15+ messages in thread
From: Tim Chen @ 2010-06-10 17:06 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Andi Kleen, linux-kernel, Andi Kleen, Hugh Dickins

On Wed, 2010-06-09 at 15:36 -0700, Andrew Morton wrote:

> > 
> > You need some synchronization, otherwise the accounting
> > would not be exact and you could overflow. Yes you could
> > open code it, but having it in a library is nicer.
> 
> The code doesn't have synchronisation!  qtoken_return() can modify the
> per-cpu "cache" in parallel with qtoken_avail()'s walk across the
> per-cpu "caches", yielding an inaccurate result.
> 
> This is all the same as percpu_add() executing in parallel with
> percpu_counter_sum() or percpu_counter_sum_positive().
> 
> If we cannot tolerate that inaccuracy then these patches are no good
> and we need a rethink.
> 
> If we _can_ tolerate that inaccuracy then percpu_counters can be used
> here.  And doing that is preferable to reinventing percpu_counters
> badly.
> 
> I'm just not seeing it.


The first version of the patch does a qtoken_reap_cache to reap the
tokens into pool before doing an accounting of the tokens and the token
count will be precise.  It was not done in the second version of the
patch due to objection that it may be  costly, and also the tokens count
will be fluctuating anyway.  However, qtoken_avail is not called very
often (usually caller will use qtoken_get to access the tokens and it
will not need a total accounting of the tokens). We can do it the
previous way and there will be no inaccuracies. 

Tim




^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 1/2] tmpfs: Quick token library to allow scalable retrieval of tokens from token jar
  2010-06-10 17:06       ` Tim Chen
@ 2010-06-11 21:52         ` Andrew Morton
  2010-06-11 22:06           ` Tim Chen
  0 siblings, 1 reply; 15+ messages in thread
From: Andrew Morton @ 2010-06-11 21:52 UTC (permalink / raw)
  To: Tim Chen; +Cc: Andi Kleen, linux-kernel, Andi Kleen, Hugh Dickins

On Thu, 10 Jun 2010 10:06:14 -0700
Tim Chen <tim.c.chen@linux.intel.com> wrote:

> On Wed, 2010-06-09 at 15:36 -0700, Andrew Morton wrote:
> 
> > > 
> > > You need some synchronization, otherwise the accounting
> > > would not be exact and you could overflow. Yes you could
> > > open code it, but having it in a library is nicer.
> > 
> > The code doesn't have synchronisation!  qtoken_return() can modify the
> > per-cpu "cache" in parallel with qtoken_avail()'s walk across the
> > per-cpu "caches", yielding an inaccurate result.
> > 
> > This is all the same as percpu_add() executing in parallel with
> > percpu_counter_sum() or percpu_counter_sum_positive().
> > 
> > If we cannot tolerate that inaccuracy then these patches are no good
> > and we need a rethink.
> > 
> > If we _can_ tolerate that inaccuracy then percpu_counters can be used
> > here.  And doing that is preferable to reinventing percpu_counters
> > badly.
> > 
> > I'm just not seeing it.
> 
> 
> The first version of the patch does a qtoken_reap_cache to reap the
> tokens into pool before doing an accounting of the tokens and the token
> count will be precise.  It was not done in the second version of the
> patch due to objection that it may be  costly, and also the tokens count
> will be fluctuating anyway.  However, qtoken_avail is not called very
> often (usually caller will use qtoken_get to access the tokens and it
> will not need a total accounting of the tokens). We can do it the
> previous way and there will be no inaccuracies. 
> 

afacit, your proposed implementation could have used percpu_counters. 
If so, that would be by far the best way of doing it, because that
doesn't require the addition of new infrastructure.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 1/2] tmpfs: Quick token library to allow scalable retrieval of tokens from token jar
  2010-06-11 21:52         ` Andrew Morton
@ 2010-06-11 22:06           ` Tim Chen
  2010-06-11 22:26             ` Andrew Morton
  0 siblings, 1 reply; 15+ messages in thread
From: Tim Chen @ 2010-06-11 22:06 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Andi Kleen, linux-kernel, Andi Kleen, Hugh Dickins

On Fri, 2010-06-11 at 14:52 -0700, Andrew Morton wrote:
> On Thu, 10 Jun 2010 10:06:14 -0700
> Tim Chen <tim.c.chen@linux.intel.com> wrote:
> 
> > On Wed, 2010-06-09 at 15:36 -0700, Andrew Morton wrote:
> > 
> > > > 
> > > > You need some synchronization, otherwise the accounting
> > > > would not be exact and you could overflow. Yes you could
> > > > open code it, but having it in a library is nicer.
> > > 
> > > The code doesn't have synchronisation!  qtoken_return() can modify the
> > > per-cpu "cache" in parallel with qtoken_avail()'s walk across the
> > > per-cpu "caches", yielding an inaccurate result.
> > > 
> > > This is all the same as percpu_add() executing in parallel with
> > > percpu_counter_sum() or percpu_counter_sum_positive().
> > > 
> > > If we cannot tolerate that inaccuracy then these patches are no good
> > > and we need a rethink.
> > > 
> > > If we _can_ tolerate that inaccuracy then percpu_counters can be used
> > > here.  And doing that is preferable to reinventing percpu_counters
> > > badly.
> > > 
> > > I'm just not seeing it.
> > 
> > 
> > The first version of the patch does a qtoken_reap_cache to reap the
> > tokens into pool before doing an accounting of the tokens and the token
> > count will be precise.  It was not done in the second version of the
> > patch due to objection that it may be  costly, and also the tokens count
> > will be fluctuating anyway.  However, qtoken_avail is not called very
> > often (usually caller will use qtoken_get to access the tokens and it
> > will not need a total accounting of the tokens). We can do it the
> > previous way and there will be no inaccuracies. 
> > 
> 
> afacit, your proposed implementation could have used percpu_counters. 
> If so, that would be by far the best way of doing it, because that
> doesn't require the addition of new infrastructure.

Just having percpu counters is not enough.
There is additional logic required to manage the per cpu counters: 
(1) set limit on the total number of tokens (2) distribute tokens into
per cpu counter (3) logic to coordinate where to get additional tokens
when we run out of tokens in the per cpu counter. (4) rebalance the
tokens when we have too many of the tokens returned to a per cpu
counter.  I'm trying to encapsulate these logic into the library.
Otherwise, these logic will still need to be duplicated in the code
using the per cpu counters.  

Tim


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 1/2] tmpfs: Quick token library to allow scalable retrieval of tokens from token jar
  2010-06-11 22:06           ` Tim Chen
@ 2010-06-11 22:26             ` Andrew Morton
  2010-06-11 23:29               ` Tim Chen
  0 siblings, 1 reply; 15+ messages in thread
From: Andrew Morton @ 2010-06-11 22:26 UTC (permalink / raw)
  To: Tim Chen; +Cc: Andi Kleen, linux-kernel, Andi Kleen, Hugh Dickins

On Fri, 11 Jun 2010 15:06:43 -0700
Tim Chen <tim.c.chen@linux.intel.com> wrote:

> > afacit, your proposed implementation could have used percpu_counters. 
> > If so, that would be by far the best way of doing it, because that
> > doesn't require the addition of new infrastructure.
> 
> Just having percpu counters is not enough.
> There is additional logic required to manage the per cpu counters: 
> (1) set limit on the total number of tokens (2) distribute tokens into
> per cpu counter (3) logic to coordinate where to get additional tokens
> when we run out of tokens in the per cpu counter. (4) rebalance the
> tokens when we have too many of the tokens returned to a per cpu
> counter.  I'm trying to encapsulate these logic into the library.
> Otherwise, these logic will still need to be duplicated in the code
> using the per cpu counters.  

There's no need for any of that.

free_blocks is just a counter.  You add things to it, you subtract
things from it and you compare it with a threshold.

Something like the below.

It a bit buggy - using percpu_counter_init() against an
already-initialised percpu_counter() is leaky.  I suspect that's
happening in remount_fs.

A better approach would be to remove free_blocks altogether and add a
new `percpu_counter used_blocks;' which simply counts how many blocks
are presently in use.  Such a thing would then never need to be
reinitialised.

 include/linux/shmem_fs.h |    3 ++-
 mm/shmem.c               |   20 +++++++++++---------
 2 files changed, 13 insertions(+), 10 deletions(-)

diff -puN mm/shmem.c~a mm/shmem.c
--- a/mm/shmem.c~a
+++ a/mm/shmem.c
@@ -28,6 +28,7 @@
 #include <linux/file.h>
 #include <linux/mm.h>
 #include <linux/module.h>
+#include <linux/percpu_counter.h>
 #include <linux/swap.h>
 
 static struct vfsmount *shm_mnt;
@@ -233,8 +234,8 @@ static void shmem_free_blocks(struct ino
 {
 	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
 	if (sbinfo->max_blocks) {
+		percpu_counter_add(&sbinfo->free_blocks, pages);
 		spin_lock(&sbinfo->stat_lock);
-		sbinfo->free_blocks += pages;
 		inode->i_blocks -= pages*BLOCKS_PER_PAGE;
 		spin_unlock(&sbinfo->stat_lock);
 	}
@@ -422,11 +423,11 @@ static swp_entry_t *shmem_swp_alloc(stru
 		 */
 		if (sbinfo->max_blocks) {
 			spin_lock(&sbinfo->stat_lock);
-			if (sbinfo->free_blocks <= 1) {
+			if (percpu_counter_read(&sbinfo->free_blocks) <= 1) {
 				spin_unlock(&sbinfo->stat_lock);
 				return ERR_PTR(-ENOSPC);
 			}
-			sbinfo->free_blocks--;
+			percpu_counter_dec(&sbinfo->free_blocks);
 			inode->i_blocks += BLOCKS_PER_PAGE;
 			spin_unlock(&sbinfo->stat_lock);
 		}
@@ -1389,14 +1390,14 @@ repeat:
 		sbinfo = SHMEM_SB(inode->i_sb);
 		if (sbinfo->max_blocks) {
 			spin_lock(&sbinfo->stat_lock);
-			if (sbinfo->free_blocks == 0 ||
+			if (percpu_counter_read(&sbinfo->free_blocks) <= 0 ||
 			    shmem_acct_block(info->flags)) {
 				spin_unlock(&sbinfo->stat_lock);
 				spin_unlock(&info->lock);
 				error = -ENOSPC;
 				goto failed;
 			}
-			sbinfo->free_blocks--;
+			percpu_counter_dec(&sbinfo->free_blocks);
 			inode->i_blocks += BLOCKS_PER_PAGE;
 			spin_unlock(&sbinfo->stat_lock);
 		} else if (shmem_acct_block(info->flags)) {
@@ -1795,7 +1796,8 @@ static int shmem_statfs(struct dentry *d
 	spin_lock(&sbinfo->stat_lock);
 	if (sbinfo->max_blocks) {
 		buf->f_blocks = sbinfo->max_blocks;
-		buf->f_bavail = buf->f_bfree = sbinfo->free_blocks;
+		buf->f_bavail = buf->f_bfree =
+				percpu_counter_read(&sbinfo->free_blocks);
 	}
 	if (sbinfo->max_inodes) {
 		buf->f_files = sbinfo->max_inodes;
@@ -2251,7 +2253,7 @@ static int shmem_remount_fs(struct super
 		return error;
 
 	spin_lock(&sbinfo->stat_lock);
-	blocks = sbinfo->max_blocks - sbinfo->free_blocks;
+	blocks = sbinfo->max_blocks - percpu_counter_read(&sbinfo->free_blocks);
 	inodes = sbinfo->max_inodes - sbinfo->free_inodes;
 	if (config.max_blocks < blocks)
 		goto out;
@@ -2270,7 +2272,7 @@ static int shmem_remount_fs(struct super
 
 	error = 0;
 	sbinfo->max_blocks  = config.max_blocks;
-	sbinfo->free_blocks = config.max_blocks - blocks;
+	percpu_counter_init(&sbinfo->free_blocks, config.max_blocks - blocks);
 	sbinfo->max_inodes  = config.max_inodes;
 	sbinfo->free_inodes = config.max_inodes - inodes;
 
@@ -2345,7 +2347,7 @@ int shmem_fill_super(struct super_block 
 #endif
 
 	spin_lock_init(&sbinfo->stat_lock);
-	sbinfo->free_blocks = sbinfo->max_blocks;
+	percpu_counter_init(&sbinfo->free_blocks, sbinfo->max_blocks);
 	sbinfo->free_inodes = sbinfo->max_inodes;
 
 	sb->s_maxbytes = SHMEM_MAX_BYTES;
diff -puN include/linux/shmem_fs.h~a include/linux/shmem_fs.h
--- a/include/linux/shmem_fs.h~a
+++ a/include/linux/shmem_fs.h
@@ -3,6 +3,7 @@
 
 #include <linux/swap.h>
 #include <linux/mempolicy.h>
+#include <linux/percpu_counter.h>
 
 /* inode in-kernel data */
 
@@ -23,7 +24,7 @@ struct shmem_inode_info {
 
 struct shmem_sb_info {
 	unsigned long max_blocks;   /* How many blocks are allowed */
-	unsigned long free_blocks;  /* How many are left for allocation */
+	struct percpu_counter free_blocks;  /* How many are left for allocation */
 	unsigned long max_inodes;   /* How many inodes are allowed */
 	unsigned long free_inodes;  /* How many are left for allocation */
 	spinlock_t stat_lock;	    /* Serialize shmem_sb_info changes */
_


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 1/2] tmpfs: Quick token library to allow scalable retrieval of tokens from token jar
  2010-06-11 22:26             ` Andrew Morton
@ 2010-06-11 23:29               ` Tim Chen
  2010-06-11 23:54                 ` Andrew Morton
  0 siblings, 1 reply; 15+ messages in thread
From: Tim Chen @ 2010-06-11 23:29 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Andi Kleen, linux-kernel, Andi Kleen, Hugh Dickins

On Fri, 2010-06-11 at 15:26 -0700, Andrew Morton wrote:

>  	}
> @@ -422,11 +423,11 @@ static swp_entry_t *shmem_swp_alloc(stru
>  		 */
>  		if (sbinfo->max_blocks) {
>  			spin_lock(&sbinfo->stat_lock);
> -			if (sbinfo->free_blocks <= 1) {
> +			if (percpu_counter_read(&sbinfo->free_blocks) <= 1) {
>  				spin_unlock(&sbinfo->stat_lock);

Thanks for pointing me to look at this alternative implementation.

However, looking at the percpu counter code, it appears that the
percpu_counter_read is imprecise.  The counters in the per cpu counters
are not accounted and the value read may be much less than the true
amount of free blocks left when used in the patch above.  We could fail
the above test and not allocate pages when we actually have additional
pages available. Using percpu_counter_sum will give the precise count
but will cause the acquisition of the spin lock in the percpu_counter
and slowed things down in this performance critical path.  If we feel
that we could tolerate fuzziness on the size we configured for tmpfs,
then this could be the way to go.

However, qtoken library implementation will impose a precise limit and
has the per cpu counter's speed advantage.

Tim

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 1/2] tmpfs: Quick token library to allow scalable retrieval of tokens from token jar
  2010-06-11 23:29               ` Tim Chen
@ 2010-06-11 23:54                 ` Andrew Morton
  2010-06-12  7:36                   ` Andi Kleen
  0 siblings, 1 reply; 15+ messages in thread
From: Andrew Morton @ 2010-06-11 23:54 UTC (permalink / raw)
  To: Tim Chen; +Cc: Andi Kleen, linux-kernel, Andi Kleen, Hugh Dickins

On Fri, 11 Jun 2010 16:29:59 -0700
Tim Chen <tim.c.chen@linux.intel.com> wrote:

> On Fri, 2010-06-11 at 15:26 -0700, Andrew Morton wrote:
> 
> >  	}
> > @@ -422,11 +423,11 @@ static swp_entry_t *shmem_swp_alloc(stru
> >  		 */
> >  		if (sbinfo->max_blocks) {
> >  			spin_lock(&sbinfo->stat_lock);
> > -			if (sbinfo->free_blocks <= 1) {
> > +			if (percpu_counter_read(&sbinfo->free_blocks) <= 1) {
> >  				spin_unlock(&sbinfo->stat_lock);
> 
> Thanks for pointing me to look at this alternative implementation.
> 
> However, looking at the percpu counter code, it appears that the
> percpu_counter_read is imprecise.

Sure, that's inevitable if we want to avoid one-atomic-op-per-operation.

>  The counters in the per cpu counters
> are not accounted and the value read may be much less than the true
> amount of free blocks left when used in the patch above.

The comparisons with 0 and 1 are ugly (although not necessarily wrong).
 The code would be nicer if we replace free_blocks with used_blocks and
perform comparisons agains max_blocks.

>  We could fail
> the above test and not allocate pages when we actually have additional
> pages available.

Yup.  We're assuming here that we can tolerate overshooting max_blocks a bit.

> Using percpu_counter_sum will give the precise count
> but will cause the acquisition of the spin lock in the percpu_counter
> and slowed things down in this performance critical path.  If we feel
> that we could tolerate fuzziness on the size we configured for tmpfs,
> then this could be the way to go.
> 
> However, qtoken library implementation will impose a precise limit and
> has the per cpu counter's speed advantage.

percpu_counters have a precise limit too!  It's
percpu_counter_batch*num_online_cpus.  You can implement your own
tolerance by not using percpu_counter_batch: pass your own batch into
__percpu_counter_add().

There's a trick that can be done to improve accuracy.  When checking to
see if the fs is full, use percpu_counter_read().  If the number that
percpu_counter_read() returns is "close" to max_blocks, then start
using the more expensive percpu_counter_sum().  So the kernel will be
fast, until the disk gets to within (batch*num_online_cpus) blocks of
being full.

This is not the first time I've seen that requirement, and it would be
a good idea to implement the concept within an addition to the
percpu_counter library.  Say, percpu_counter_compare().

percpu_counter_compare(struct percpu_counter *fbc, s64 rhs) would
compare percpu_counter_read() with `rhs' and if they're within
num_online_cpus*percpu_counter_batch, call percpu_counter_sum().

__percpu_counter_compare() would take the additional `batch' argument.

I think.  Needs a bit of head-scratching, because callers don't really
care about num_online_cpus.  The caller only really cares about the
absolute error.

(Where the heck did the "fbc" name come from?  I forget...)

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 1/2] tmpfs: Quick token library to allow scalable retrieval of tokens from token jar
  2010-06-11 23:54                 ` Andrew Morton
@ 2010-06-12  7:36                   ` Andi Kleen
  2010-06-12 15:27                     ` Andrew Morton
  0 siblings, 1 reply; 15+ messages in thread
From: Andi Kleen @ 2010-06-12  7:36 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Tim Chen, Andi Kleen, linux-kernel, Andi Kleen, Hugh Dickins

On Fri, Jun 11, 2010 at 04:54:25PM -0700, Andrew Morton wrote:
> > >  			spin_lock(&sbinfo->stat_lock);
> > > -			if (sbinfo->free_blocks <= 1) {
> > > +			if (percpu_counter_read(&sbinfo->free_blocks) <= 1) {
> > >  				spin_unlock(&sbinfo->stat_lock);
> > 
> > Thanks for pointing me to look at this alternative implementation.
> > 
> > However, looking at the percpu counter code, it appears that the
> > percpu_counter_read is imprecise.
> 
> Sure, that's inevitable if we want to avoid one-atomic-op-per-operation.

Only if you use the wrong primitive.
It's not inevitable as qtoken has proven.

> percpu_counters have a precise limit too!  It's
> percpu_counter_batch*num_online_cpus.  You can implement your own
> tolerance by not using percpu_counter_batch: pass your own batch into
> __percpu_counter_add().

Such a batch could be rather large on a large system.

e.g. on a 32 CPU system with batch 16 it's already 2MB.

> There's a trick that can be done to improve accuracy.  When checking to
> see if the fs is full, use percpu_counter_read().  If the number that
> percpu_counter_read() returns is "close" to max_blocks, then start
> using the more expensive percpu_counter_sum().  So the kernel will be
> fast, until the disk gets to within (batch*num_online_cpus) blocks of
> being full.

Ok it would work, but you would get a big dip in performance
once you're near the limit.

And the more CPUs you have the larger this window of slowness becomes.

> 
> This is not the first time I've seen that requirement, and it would be
> a good idea to implement the concept within an addition to the
> percpu_counter library.  Say, percpu_counter_compare().

But why not just use qtoken which solves this without any hacks?

I still think qtoken is a better proposal. Even if it's a bit more
code, but at least it solves this all cleanly without hacks and
arbitary limits.

Given perhaps it needs other users to really pay of its code weight.
I'll see if I can find any others. 

-Andi
-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 1/2] tmpfs: Quick token library to allow scalable retrieval of tokens from token jar
  2010-06-12  7:36                   ` Andi Kleen
@ 2010-06-12 15:27                     ` Andrew Morton
  2010-06-15  1:24                       ` Tim Chen
  0 siblings, 1 reply; 15+ messages in thread
From: Andrew Morton @ 2010-06-12 15:27 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Tim Chen, linux-kernel, Andi Kleen, Hugh Dickins

On Sat, 12 Jun 2010 09:36:39 +0200 Andi Kleen <andi@firstfloor.org> wrote:

> On Fri, Jun 11, 2010 at 04:54:25PM -0700, Andrew Morton wrote:
> > > >  			spin_lock(&sbinfo->stat_lock);
> > > > -			if (sbinfo->free_blocks <= 1) {
> > > > +			if (percpu_counter_read(&sbinfo->free_blocks) <= 1) {
> > > >  				spin_unlock(&sbinfo->stat_lock);
> > > 
> > > Thanks for pointing me to look at this alternative implementation.
> > > 
> > > However, looking at the percpu counter code, it appears that the
> > > percpu_counter_read is imprecise.
> > 
> > Sure, that's inevitable if we want to avoid one-atomic-op-per-operation.
> 
> Only if you use the wrong primitive.
> It's not inevitable as qtoken has proven.

No it has not.  qtoken is inaccurate.

> > percpu_counters have a precise limit too!  It's
> > percpu_counter_batch*num_online_cpus.  You can implement your own
> > tolerance by not using percpu_counter_batch: pass your own batch into
> > __percpu_counter_add().
> 
> Such a batch could be rather large on a large system.
> 
> e.g. on a 32 CPU system with batch 16 it's already 2MB.

That's not much.

> > There's a trick that can be done to improve accuracy.  When checking to
> > see if the fs is full, use percpu_counter_read().  If the number that
> > percpu_counter_read() returns is "close" to max_blocks, then start
> > using the more expensive percpu_counter_sum().  So the kernel will be
> > fast, until the disk gets to within (batch*num_online_cpus) blocks of
> > being full.
> 
> Ok it would work, but you would get a big dip in performance
> once you're near the limit.
> 
> And the more CPUs you have the larger this window of slowness becomes.
> 
> > 
> > This is not the first time I've seen that requirement, and it would be
> > a good idea to implement the concept within an addition to the
> > percpu_counter library.  Say, percpu_counter_compare().
> 
> But why not just use qtoken which solves this without any hacks?

a) because qtoken needs "hacks" to solve it

b) because the qtoken code is fairly revolting

c) because the facilities which qtoken provides are so darn close to
   those which percpu_counters provide that it's quite unlikely that
   there will be another user of qtoken which couldn't have used
   percpu-counters

d) because the down-counter-like behaviour of qtoken is weird.

> I still think qtoken is a better proposal.

why?

> Even if it's a bit more
> code, but at least it solves this all cleanly without hacks and
> arbitary limits.

Well maybe there are still undiscovered secrets in there.  For its size
this is one of the worst-explained pieces of code I've seen to date.  I
still haven't really worked out the significance of setting the per-cpu
cache to -1.


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v2 1/2] tmpfs: Quick token library to allow scalable retrieval of tokens from token jar
  2010-06-12 15:27                     ` Andrew Morton
@ 2010-06-15  1:24                       ` Tim Chen
  0 siblings, 0 replies; 15+ messages in thread
From: Tim Chen @ 2010-06-15  1:24 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Andi Kleen, linux-kernel, Andi Kleen, Hugh Dickins

Andrew,

I have tweaked your patch a bit and put in your suggestion of
implementing a percpu_counter_compare (see below), which
allowed for accurate but fast comparison.  This
is just meant for discussion so I have not broken
it into two patches (the percpu_counter part and shmem part).

One thing still bothers me with this approach:
When we are doing a remount of tmpfs, we cannot lock the
percpu_counter.  So we cannot guarantee that it won't get updated
after we read it.  So we could overshoot 
the new quota after a remount, or missed accounting for the
pages being returned while we remount.  Is this tolerable?

My previous qtoken implementation uses a special value (-1) 
to denote that the per cpu cache is disabled and synchronized access
by the lock on the whole counter.  So I didn't have to worry
that my count was inaccurate. This facility to lock access and freeze
counter update is not available in current percpu_counter
implementation.

Tim


diff --git a/include/linux/percpu_counter.h
b/include/linux/percpu_counter.h
index c88d67b..8a7d510 100644
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -40,6 +40,7 @@ void percpu_counter_destroy(struct percpu_counter
*fbc);
 void percpu_counter_set(struct percpu_counter *fbc, s64 amount);
 void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32
batch);
 s64 __percpu_counter_sum(struct percpu_counter *fbc);
+int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs);
 
 static inline void percpu_counter_add(struct percpu_counter *fbc, s64
amount)
 {
@@ -98,6 +99,16 @@ static inline void percpu_counter_set(struct
percpu_counter *fbc, s64 amount)
 	fbc->count = amount;
 }
 
+static inline int percpu_counter_compare(struct percpu_counter *fbc,
s64 rhs)
+{
+	if (fbc->count > rhs)
+		return 1;
+	else if (fbc->count < rhs)
+		return -1;
+	else
+		return 0;
+}
+
 static inline void
 percpu_counter_add(struct percpu_counter *fbc, s64 amount)
 {
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index e164291..d01d69e 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -3,6 +3,7 @@
 
 #include <linux/swap.h>
 #include <linux/mempolicy.h>
+#include <linux/percpu_counter.h>
 
 /* inode in-kernel data */
 
@@ -23,7 +24,7 @@ struct shmem_inode_info {
 
 struct shmem_sb_info {
 	unsigned long max_blocks;   /* How many blocks are allowed */
-	unsigned long free_blocks;  /* How many are left for allocation */
+	struct percpu_counter free_blocks;  /* How many are left for
allocation */
 	unsigned long max_inodes;   /* How many inodes are allowed */
 	unsigned long free_inodes;  /* How many are left for allocation */
 	spinlock_t stat_lock;	    /* Serialize shmem_sb_info changes */
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index aeaa6d7..583841d 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -137,6 +137,33 @@ static int __cpuinit
percpu_counter_hotcpu_callback(struct notifier_block *nb,
 	return NOTIFY_OK;
 }
 
+/* 
+ * Compare counter against given value. 
+ * Return 1 if greater, 0 if equal and -1 if less
+ */ 
+int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs)
+{
+	s64	count;
+
+	count = percpu_counter_read(fbc);
+	/* Check to see if approx count will be sufficient for comparison */
+        if (abs(count - rhs) >
(percpu_counter_batch*num_online_cpus())){
+		if (count > rhs)
+			return 1;
+		else
+			return -1;
+	}
+	/* Need to use more precise count */
+	count = percpu_counter_sum(fbc);
+	if (count > rhs)
+		return 1;
+	else if (count < rhs)
+		return -1;
+	else
+		return 0;
+}
+EXPORT_SYMBOL(percpu_counter_compare);
+
 static int __init percpu_counter_startup(void)
 {
 	compute_batch_value();
diff --git a/mm/shmem.c b/mm/shmem.c
index eef4ebe..7cd4f24 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -28,6 +28,7 @@
 #include <linux/file.h>
 #include <linux/mm.h>
 #include <linux/module.h>
+#include <linux/percpu_counter.h>
 #include <linux/swap.h>
 
 static struct vfsmount *shm_mnt;
@@ -233,8 +234,8 @@ static void shmem_free_blocks(struct inode *inode,
long pages)
 {
 	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
 	if (sbinfo->max_blocks) {
+		percpu_counter_add(&sbinfo->free_blocks, pages);
 		spin_lock(&sbinfo->stat_lock);
-		sbinfo->free_blocks += pages;
 		inode->i_blocks -= pages*BLOCKS_PER_PAGE;
 		spin_unlock(&sbinfo->stat_lock);
 	}
@@ -422,11 +423,11 @@ static swp_entry_t *shmem_swp_alloc(struct
shmem_inode_info *info, unsigned long
 		 */
 		if (sbinfo->max_blocks) {
 			spin_lock(&sbinfo->stat_lock);
-			if (sbinfo->free_blocks <= 1) {
+			if (percpu_counter_compare(&sbinfo->free_blocks, 1) <= 0) {
 				spin_unlock(&sbinfo->stat_lock);
 				return ERR_PTR(-ENOSPC);
 			}
-			sbinfo->free_blocks--;
+			percpu_counter_dec(&sbinfo->free_blocks);
 			inode->i_blocks += BLOCKS_PER_PAGE;
 			spin_unlock(&sbinfo->stat_lock);
 		}
@@ -1386,14 +1387,14 @@ repeat:
 		sbinfo = SHMEM_SB(inode->i_sb);
 		if (sbinfo->max_blocks) {
 			spin_lock(&sbinfo->stat_lock);
-			if (sbinfo->free_blocks == 0 ||
+			if ((percpu_counter_compare(&sbinfo->free_blocks, 0) <= 0) ||
 			    shmem_acct_block(info->flags)) {
 				spin_unlock(&sbinfo->stat_lock);
 				spin_unlock(&info->lock);
 				error = -ENOSPC;
 				goto failed;
 			}
-			sbinfo->free_blocks--;
+			percpu_counter_dec(&sbinfo->free_blocks);
 			inode->i_blocks += BLOCKS_PER_PAGE;
 			spin_unlock(&sbinfo->stat_lock);
 		} else if (shmem_acct_block(info->flags)) {
@@ -1794,7 +1795,8 @@ static int shmem_statfs(struct dentry *dentry,
struct kstatfs *buf)
 	spin_lock(&sbinfo->stat_lock);
 	if (sbinfo->max_blocks) {
 		buf->f_blocks = sbinfo->max_blocks;
-		buf->f_bavail = buf->f_bfree = sbinfo->free_blocks;
+		buf->f_bavail = buf->f_bfree =
+				percpu_counter_sum(&sbinfo->free_blocks);
 	}
 	if (sbinfo->max_inodes) {
 		buf->f_files = sbinfo->max_inodes;
@@ -2258,7 +2260,7 @@ static int shmem_remount_fs(struct super_block
*sb, int *flags, char *data)
 		return error;
 
 	spin_lock(&sbinfo->stat_lock);
-	blocks = sbinfo->max_blocks - sbinfo->free_blocks;
+	blocks = sbinfo->max_blocks -
percpu_counter_sum(&sbinfo->free_blocks);
 	inodes = sbinfo->max_inodes - sbinfo->free_inodes;
 	if (config.max_blocks < blocks)
 		goto out;
@@ -2277,7 +2279,7 @@ static int shmem_remount_fs(struct super_block
*sb, int *flags, char *data)
 
 	error = 0;
 	sbinfo->max_blocks  = config.max_blocks;
-	sbinfo->free_blocks = config.max_blocks - blocks;
+	percpu_counter_init(&sbinfo->free_blocks, config.max_blocks - blocks);
 	sbinfo->max_inodes  = config.max_inodes;
 	sbinfo->free_inodes = config.max_inodes - inodes;
 
@@ -2352,7 +2354,7 @@ int shmem_fill_super(struct super_block *sb, void
*data, int silent)
 #endif
 
 	spin_lock_init(&sbinfo->stat_lock);
-	sbinfo->free_blocks = sbinfo->max_blocks;
+	percpu_counter_init(&sbinfo->free_blocks, sbinfo->max_blocks);
 	sbinfo->free_inodes = sbinfo->max_inodes;
 
 	sb->s_maxbytes = SHMEM_MAX_BYTES;



^ permalink raw reply related	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2010-06-15  1:28 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-05-26 19:32 [PATCH v2 1/2] tmpfs: Quick token library to allow scalable retrieval of tokens from token jar Tim Chen
2010-06-01 21:51 ` Andrew Morton
2010-06-02  8:58   ` Andi Kleen
2010-06-09 22:36     ` Andrew Morton
2010-06-10 17:06       ` Tim Chen
2010-06-11 21:52         ` Andrew Morton
2010-06-11 22:06           ` Tim Chen
2010-06-11 22:26             ` Andrew Morton
2010-06-11 23:29               ` Tim Chen
2010-06-11 23:54                 ` Andrew Morton
2010-06-12  7:36                   ` Andi Kleen
2010-06-12 15:27                     ` Andrew Morton
2010-06-15  1:24                       ` Tim Chen
2010-06-02 17:32   ` Tim Chen
2010-06-09 22:41     ` Andrew Morton

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).