* [PATCH 010 of 11] knfsd: make pools numa aware
@ 2006-07-25 5:16 Greg Banks
2006-07-25 12:43 ` Trond Myklebust
0 siblings, 1 reply; 3+ messages in thread
From: Greg Banks @ 2006-07-25 5:16 UTC (permalink / raw)
To: Neil Brown; +Cc: Linux NFS Mailing List
knfsd: Actually implement multiple pools. On NUMA machines, allocate
a svc_pool per NUMA node; on SMP a svc_pool per CPU; otherwise a single
global pool. Enqueue sockets on the svc_pool corresponding to the CPU
on which the socket bh is run (i.e. the NIC interrupt CPU). Threads
have their cpu mask set to limit them to the CPUs in the svc_pool that
owns them.
This is the patch that allows an Altix to scale NFS traffic linearly
beyond 4 CPUs and 4 NICs.
Signed-off-by: Greg Banks <gnb@melbourne.sgi.com>
---
include/linux/sunrpc/svc.h | 62 +++++++++++
net/sunrpc/svc.c | 184 +++++++++++++++++++++++++++++++++-
net/sunrpc/svcsock.c | 7 +
3 files changed, 251 insertions(+), 2 deletions(-)
Index: linus-git/net/sunrpc/svc.c
===================================================================
--- linus-git.orig/net/sunrpc/svc.c 2006-07-24 22:16:36.157203063 +1000
+++ linus-git/net/sunrpc/svc.c 2006-07-24 22:54:13.557820093 +1000
@@ -4,6 +4,10 @@
* High-level RPC service routines
*
* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
+ *
+ * Multiple threads pools and NUMAisation
+ * Copyright (c) 2006 Silicon Graphics, Inc.
+ * by Greg Banks <gnb@melbourne.sgi.com>
*/
#include <linux/linkage.h>
@@ -24,6 +28,161 @@
#define RPCDBG_FACILITY RPCDBG_SVCDSP
#define RPC_PARANOIA 1
+
+#if SVC_HAVE_MULTIPLE_POOLS
+
+struct svc_pool_map svc_pool_map = { .mode = -1, .init = 0 };
+
+/*
+ * Build the global map of cpus to pools and vice versa.
+ */
+static unsigned int
+svc_pool_map_init(void)
+{
+ struct svc_pool_map *m = &svc_pool_map;
+ unsigned int node;
+ unsigned int cpu;
+ unsigned int pidx = 0;
+ unsigned int maxpools;
+
+ if (m->init)
+ return m->npools;
+ m->init = 1;
+
+ if (m->mode < 0) {
+ /*
+ * Detect best pool mapping mode heuristically.
+ */
+ m->mode = 0; /* default: one global pool */
+#ifdef CONFIG_NUMA
+ if (num_online_nodes() > 1) {
+ /*
+ * Actually have multiple NUMA nodes,
+ * so split pools on NUMA node boundaries
+ */
+ m->mode = 2;
+ } else {
+ node = any_online_node(node_online_map);
+ if (nr_cpus_node(node) > 2) {
+ /*
+ * Apparently we're running with CONFIG_NUMA
+ * on non-NUMA hardware, e.g. with a generic
+ * x86_64 kernel on Xeons. In this case we
+ * want to divide the pools on cpu boundaries.
+ */
+ m->mode = 1;
+ }
+ }
+#else
+ if (num_online_cpus() > 1) {
+ /*
+ * Plain SMP with multiple CPUs online.
+ */
+ m->mode = 1;
+ }
+#endif
+ }
+
+ switch (m->mode) {
+ case 0:
+fallback:
+ m->mode = 0;
+ m->npools = 1;
+ printk("nfsd: initialising 1 global pool\n");
+ break;
+
+ case 1:
+ maxpools = num_possible_cpus();
+ m->cpu_to_pool = kcalloc(maxpools, sizeof(unsigned int),
+ GFP_KERNEL);
+ if (!m->cpu_to_pool)
+ goto fallback;
+ m->pool_to_cpu = kcalloc(maxpools, sizeof(unsigned int),
+ GFP_KERNEL);
+ if (!m->pool_to_cpu) {
+ kfree(m->cpu_to_pool);
+ goto fallback;
+ }
+ for_each_online_cpu(cpu) {
+ BUG_ON(pidx > maxpools);
+ m->cpu_to_pool[cpu] = pidx;
+ m->pool_to_cpu[pidx] = cpu;
+ pidx++;
+ }
+ /* cpus brought online later all get mapped to pool0, sorry */
+ m->npools = pidx;
+
+ printk("nfsd: initialising %u pools, one per cpu\n", m->npools);
+ break;
+
+#ifdef CONFIG_NUMA
+ case 2:
+ maxpools = num_possible_nodes();
+ m->node_to_pool = kcalloc(maxpools, sizeof(unsigned int),
+ GFP_KERNEL);
+ if (!m->node_to_pool)
+ goto fallback;
+ m->pool_to_node = kcalloc(maxpools, sizeof(unsigned int),
+ GFP_KERNEL);
+ if (!m->pool_to_node) {
+ kfree(m->node_to_pool);
+ goto fallback;
+ }
+ for_each_node_with_cpus(node) {
+ /* some architectures (e.g. SN2) have cpuless nodes */
+ BUG_ON(pidx > maxpools);
+ m->node_to_pool[node] = pidx;
+ m->pool_to_node[pidx] = node;
+ pidx++;
+ }
+ /* nodes brought online later all get mapped to pool0, sorry */
+ m->npools = pidx;
+
+ printk("nfsd: initialising %u pools, one per numa node\n", m->npools);
+ break;
+#endif /* CONFIG_NUMA */
+ }
+
+ return m->npools;
+}
+
+/*
+ * Set the current thread's cpus_allowed mask so that it
+ * will only run on cpus in the given pool.
+ *
+ * Returns 1 and fills in oldmask iff a cpumask was applied.
+ */
+static int
+svc_pool_map_set_cpumask(unsigned int pidx, cpumask_t *oldmask)
+{
+ struct svc_pool_map *m = &svc_pool_map;
+ unsigned int node;
+ unsigned int cpu;
+
+ BUG_ON(!m->init);
+
+ switch (m->mode)
+ {
+ default:
+ case 0:
+ return 0;
+ case 1:
+ cpu = m->pool_to_cpu[pidx];
+ *oldmask = current->cpus_allowed;
+ set_cpus_allowed(current, cpumask_of_cpu(cpu));
+ return 1;
+#ifdef CONFIG_NUMA
+ case 2:
+ node = m->pool_to_node[pidx];
+ *oldmask = current->cpus_allowed;
+ set_cpus_allowed(current, node_to_cpumask(node));
+ return 1;
+#endif /* CONFIG_NUMA */
+ }
+}
+
+#endif /* SVC_HAVE_MULTIPLE_POOLS */
+
/*
* Create an RPC service
*/
@@ -101,8 +260,13 @@ svc_create_pooled(struct svc_program *pr
svc_thread_fn func, int sig, struct module *mod)
{
struct svc_serv *serv;
+ unsigned int npools = 1;
- serv = __svc_create(prog, bufsize, /*npools*/1);
+#if SVC_HAVE_MULTIPLE_POOLS
+ npools = svc_pool_map_init();
+#endif
+
+ serv = __svc_create(prog, bufsize, npools);
if (serv != NULL) {
serv->sv_function = func;
@@ -202,12 +366,18 @@ svc_release_buffer(struct svc_rqst *rqst
/*
* Create a thread in the given pool. Caller must hold BKL.
+ * On a NUMA or SMP machine, with a multi-pool serv, the thread
+ * will be restricted to run on the cpus belonging to the pool.
*/
static int
__svc_create_thread(svc_thread_fn func, struct svc_serv *serv, struct svc_pool *pool)
{
struct svc_rqst *rqstp;
int error = -ENOMEM;
+#if SVC_HAVE_MULTIPLE_POOLS
+ int have_oldmask = 0;
+ cpumask_t oldmask;
+#endif
rqstp = kzalloc(sizeof(*rqstp), GFP_KERNEL);
if (!rqstp)
@@ -227,7 +397,19 @@ __svc_create_thread(svc_thread_fn func,
spin_unlock_bh(&pool->sp_lock);
rqstp->rq_server = serv;
rqstp->rq_pool = pool;
+
+#if SVC_HAVE_MULTIPLE_POOLS
+ if (serv->sv_nrpools > 1)
+ have_oldmask = svc_pool_map_set_cpumask(pool->sp_id, &oldmask);
+#endif
+
error = kernel_thread((int (*)(void *)) func, rqstp, 0);
+
+#if SVC_HAVE_MULTIPLE_POOLS
+ if (have_oldmask)
+ set_cpus_allowed(current, oldmask);
+#endif
+
if (error < 0)
goto out_thread;
svc_sock_update_bufs(serv);
Index: linus-git/net/sunrpc/svcsock.c
===================================================================
--- linus-git.orig/net/sunrpc/svcsock.c 2006-07-24 20:44:46.911435470 +1000
+++ linus-git/net/sunrpc/svcsock.c 2006-07-24 22:45:23.263878219 +1000
@@ -150,8 +150,9 @@ static void
svc_sock_enqueue(struct svc_sock *svsk)
{
struct svc_serv *serv = svsk->sk_server;
- struct svc_pool *pool = &serv->sv_pools[0];
+ struct svc_pool *pool;
struct svc_rqst *rqstp;
+ int cpu;
if (!(svsk->sk_flags &
( (1<<SK_CONN)|(1<<SK_DATA)|(1<<SK_CLOSE)|(1<<SK_DEFERRED)) ))
@@ -159,6 +160,10 @@ svc_sock_enqueue(struct svc_sock *svsk)
if (test_bit(SK_DEAD, &svsk->sk_flags))
return;
+ cpu = get_cpu();
+ pool = svc_pool_for_cpu(svsk->sk_server, cpu);
+ put_cpu();
+
spin_lock_bh(&pool->sp_lock);
if (!list_empty(&pool->sp_threads) &&
Index: linus-git/include/linux/sunrpc/svc.h
===================================================================
--- linus-git.orig/include/linux/sunrpc/svc.h 2006-07-24 22:16:36.041218126 +1000
+++ linus-git/include/linux/sunrpc/svc.h 2006-07-24 22:45:23.347867112 +1000
@@ -41,6 +41,39 @@ struct svc_pool {
struct list_head sp_all_threads; /* all server threads */
} ____cacheline_aligned_in_smp;
+#if defined(CONFIG_NUMA) || defined(CONFIG_SMP)
+#define SVC_HAVE_MULTIPLE_POOLS 1
+#else
+#define SVC_HAVE_MULTIPLE_POOLS 0
+#endif
+
+#if SVC_HAVE_MULTIPLE_POOLS
+/*
+ * Global structure for mapping cpus to pools and vice versa.
+ * Setup once during sunrpc initialisation.
+ */
+struct svc_pool_map {
+ /*
+ * Mode for mapping cpus to pools.
+ *
+ * -1 = automatic, choose one of the other modes at boot
+ * 0 = no mapping, just a single global pool (legacy & UP mode)
+ * 1 = one pool per cpu
+ * 2 = one pool per numa node
+ */
+ int mode;
+ int init;
+ unsigned int npools;
+ unsigned int *pool_to_cpu;
+ unsigned int *cpu_to_pool;
+#ifdef CONFIG_NUMA
+ unsigned int *node_to_pool;
+ unsigned int *pool_to_node;
+#endif /* CONFIG_NUMA */
+};
+#endif /* SVC_HAVE_MULTIPLE_POOLS */
+
+
/*
* RPC service.
*
@@ -360,5 +393,34 @@ int svc_process(struct svc_serv *, s
int svc_register(struct svc_serv *, int, unsigned short);
void svc_wake_up(struct svc_serv *);
void svc_reserve(struct svc_rqst *rqstp, int space);
+extern struct svc_pool_map svc_pool_map;
+
+
+static inline struct svc_pool *svc_pool_for_cpu(struct svc_serv *serv, int cpu)
+{
+#if SVC_HAVE_MULTIPLE_POOLS
+ struct svc_pool_map *m = &svc_pool_map;
+ unsigned int pidx;
+
+ switch (m->mode) {
+ default:
+ case 0:
+ pidx = 0;
+ break;
+ case 1:
+ pidx = m->cpu_to_pool[cpu];
+ break;
+#ifdef CONFIG_NUMA
+ case 2:
+ pidx = m->node_to_pool[cpu_to_node(cpu)];
+ break;
+#endif /* CONFIG_NUMA */
+ }
+ return &serv->sv_pools[pidx % serv->sv_nrpools];
+#else
+ return &serv->sv_pools[0];
+#endif
+}
+
#endif /* SUNRPC_SVC_H */
--
Greg Banks, R&D Software Engineer, SGI Australian Software Group.
I don't speak for SGI.
-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys -- and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
_______________________________________________
NFS maillist - NFS@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nfs
^ permalink raw reply [flat|nested] 3+ messages in thread* Re: [PATCH 010 of 11] knfsd: make pools numa aware
2006-07-25 5:16 [PATCH 010 of 11] knfsd: make pools numa aware Greg Banks
@ 2006-07-25 12:43 ` Trond Myklebust
2006-07-26 2:20 ` Greg Banks
0 siblings, 1 reply; 3+ messages in thread
From: Trond Myklebust @ 2006-07-25 12:43 UTC (permalink / raw)
To: Greg Banks; +Cc: Neil Brown, Linux NFS Mailing List
On Tue, 2006-07-25 at 15:16 +1000, Greg Banks wrote:
> knfsd: Actually implement multiple pools. On NUMA machines, allocate
> a svc_pool per NUMA node; on SMP a svc_pool per CPU; otherwise a single
> global pool. Enqueue sockets on the svc_pool corresponding to the CPU
> on which the socket bh is run (i.e. the NIC interrupt CPU). Threads
> have their cpu mask set to limit them to the CPUs in the svc_pool that
> owns them.
>
> This is the patch that allows an Altix to scale NFS traffic linearly
> beyond 4 CPUs and 4 NICs.
>
> Signed-off-by: Greg Banks <gnb@melbourne.sgi.com>
> ---
>
> include/linux/sunrpc/svc.h | 62 +++++++++++
> net/sunrpc/svc.c | 184 +++++++++++++++++++++++++++++++++-
> net/sunrpc/svcsock.c | 7 +
> 3 files changed, 251 insertions(+), 2 deletions(-)
>
> Index: linus-git/net/sunrpc/svc.c
> ===================================================================
> --- linus-git.orig/net/sunrpc/svc.c 2006-07-24 22:16:36.157203063 +1000
> +++ linus-git/net/sunrpc/svc.c 2006-07-24 22:54:13.557820093 +1000
> @@ -4,6 +4,10 @@
> * High-level RPC service routines
> *
> * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
> + *
> + * Multiple threads pools and NUMAisation
> + * Copyright (c) 2006 Silicon Graphics, Inc.
> + * by Greg Banks <gnb@melbourne.sgi.com>
> */
>
> #include <linux/linkage.h>
> @@ -24,6 +28,161 @@
> #define RPCDBG_FACILITY RPCDBG_SVCDSP
> #define RPC_PARANOIA 1
>
> +
> +#if SVC_HAVE_MULTIPLE_POOLS
> +
> +struct svc_pool_map svc_pool_map = { .mode = -1, .init = 0 };
> +
> +/*
> + * Build the global map of cpus to pools and vice versa.
> + */
> +static unsigned int
> +svc_pool_map_init(void)
> +{
> + struct svc_pool_map *m = &svc_pool_map;
> + unsigned int node;
> + unsigned int cpu;
> + unsigned int pidx = 0;
> + unsigned int maxpools;
> +
> + if (m->init)
> + return m->npools;
> + m->init = 1;
> +
> + if (m->mode < 0) {
> + /*
> + * Detect best pool mapping mode heuristically.
> + */
> + m->mode = 0; /* default: one global pool */
> +#ifdef CONFIG_NUMA
^^^^^^^^^^^^^^^^^^ Growl...
Perhaps a helper function to hide the ifdef.
> + if (num_online_nodes() > 1) {
> + /*
> + * Actually have multiple NUMA nodes,
> + * so split pools on NUMA node boundaries
> + */
> + m->mode = 2;
> + } else {
> + node = any_online_node(node_online_map);
> + if (nr_cpus_node(node) > 2) {
> + /*
> + * Apparently we're running with CONFIG_NUMA
> + * on non-NUMA hardware, e.g. with a generic
> + * x86_64 kernel on Xeons. In this case we
> + * want to divide the pools on cpu boundaries.
> + */
> + m->mode = 1;
> + }
> + }
> +#else
> + if (num_online_cpus() > 1) {
> + /*
> + * Plain SMP with multiple CPUs online.
> + */
> + m->mode = 1;
> + }
> +#endif
> + }
> +
> + switch (m->mode) {
> + case 0:
> +fallback:
> + m->mode = 0;
> + m->npools = 1;
> + printk("nfsd: initialising 1 global pool\n");
^^^^ ho hum....
Please keep sunrpc and nfsd separate. Also, this should probably be a
dprintk() in order to avoid spamming the syslogs.
> + break;
> +
> + case 1:
> + maxpools = num_possible_cpus();
> + m->cpu_to_pool = kcalloc(maxpools, sizeof(unsigned int),
> + GFP_KERNEL);
> + if (!m->cpu_to_pool)
> + goto fallback;
> + m->pool_to_cpu = kcalloc(maxpools, sizeof(unsigned int),
> + GFP_KERNEL);
> + if (!m->pool_to_cpu) {
> + kfree(m->cpu_to_pool);
> + goto fallback;
> + }
> + for_each_online_cpu(cpu) {
> + BUG_ON(pidx > maxpools);
> + m->cpu_to_pool[cpu] = pidx;
> + m->pool_to_cpu[pidx] = cpu;
> + pidx++;
> + }
> + /* cpus brought online later all get mapped to pool0, sorry */
> + m->npools = pidx;
> +
> + printk("nfsd: initialising %u pools, one per cpu\n", m->npools);
^^^^
> + break;
> +
> +#ifdef CONFIG_NUMA
^^^^^^^^^^^^^^^^^^^ See above
> + case 2:
> + maxpools = num_possible_nodes();
> + m->node_to_pool = kcalloc(maxpools, sizeof(unsigned int),
> + GFP_KERNEL);
> + if (!m->node_to_pool)
> + goto fallback;
> + m->pool_to_node = kcalloc(maxpools, sizeof(unsigned int),
> + GFP_KERNEL);
> + if (!m->pool_to_node) {
> + kfree(m->node_to_pool);
> + goto fallback;
> + }
> + for_each_node_with_cpus(node) {
> + /* some architectures (e.g. SN2) have cpuless nodes */
> + BUG_ON(pidx > maxpools);
> + m->node_to_pool[node] = pidx;
> + m->pool_to_node[pidx] = node;
> + pidx++;
> + }
> + /* nodes brought online later all get mapped to pool0, sorry */
> + m->npools = pidx;
> +
> + printk("nfsd: initialising %u pools, one per numa node\n", m->npools);
^^^^
> + break;
> +#endif /* CONFIG_NUMA */
> + }
> +
> + return m->npools;
> +}
> +
> +/*
> + * Set the current thread's cpus_allowed mask so that it
> + * will only run on cpus in the given pool.
> + *
> + * Returns 1 and fills in oldmask iff a cpumask was applied.
> + */
> +static int
> +svc_pool_map_set_cpumask(unsigned int pidx, cpumask_t *oldmask)
> +{
> + struct svc_pool_map *m = &svc_pool_map;
> + unsigned int node;
> + unsigned int cpu;
> +
> + BUG_ON(!m->init);
> +
> + switch (m->mode)
> + {
> + default:
> + case 0:
> + return 0;
> + case 1:
> + cpu = m->pool_to_cpu[pidx];
> + *oldmask = current->cpus_allowed;
> + set_cpus_allowed(current, cpumask_of_cpu(cpu));
> + return 1;
> +#ifdef CONFIG_NUMA
^^^^^^^^^^^^^^^^^ See above
> + case 2:
> + node = m->pool_to_node[pidx];
> + *oldmask = current->cpus_allowed;
> + set_cpus_allowed(current, node_to_cpumask(node));
> + return 1;
> +#endif /* CONFIG_NUMA */
> + }
> +}
> +
> +#endif /* SVC_HAVE_MULTIPLE_POOLS */
> +
> /*
> * Create an RPC service
> */
> @@ -101,8 +260,13 @@ svc_create_pooled(struct svc_program *pr
> svc_thread_fn func, int sig, struct module *mod)
> {
> struct svc_serv *serv;
> + unsigned int npools = 1;
>
> - serv = __svc_create(prog, bufsize, /*npools*/1);
> +#if SVC_HAVE_MULTIPLE_POOLS
No...
#ifndef SVC_HAVE_MULTIPLE_POOLS
static inline svc_pool_map_init(void)
{
return 0;
}
#else
.....
#endif
> + npools = svc_pool_map_init();
> +#endif
> +
> + serv = __svc_create(prog, bufsize, npools);
>
> if (serv != NULL) {
> serv->sv_function = func;
> @@ -202,12 +366,18 @@ svc_release_buffer(struct svc_rqst *rqst
>
> /*
> * Create a thread in the given pool. Caller must hold BKL.
> + * On a NUMA or SMP machine, with a multi-pool serv, the thread
> + * will be restricted to run on the cpus belonging to the pool.
> */
> static int
> __svc_create_thread(svc_thread_fn func, struct svc_serv *serv, struct svc_pool *pool)
> {
> struct svc_rqst *rqstp;
> int error = -ENOMEM;
> +#if SVC_HAVE_MULTIPLE_POOLS
> + int have_oldmask = 0;
> + cpumask_t oldmask;
> +#endif
>
> rqstp = kzalloc(sizeof(*rqstp), GFP_KERNEL);
> if (!rqstp)
> @@ -227,7 +397,19 @@ __svc_create_thread(svc_thread_fn func,
> spin_unlock_bh(&pool->sp_lock);
> rqstp->rq_server = serv;
> rqstp->rq_pool = pool;
> +
> +#if SVC_HAVE_MULTIPLE_POOLS
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
See above. Setting have_oldmask to zero in the case where
SVC_HAVE_MULTIPLE_POOLS should work fine, and will be optimised away by
the compiler.
> + if (serv->sv_nrpools > 1)
> + have_oldmask = svc_pool_map_set_cpumask(pool->sp_id, &oldmask);
> +#endif
> +
> error = kernel_thread((int (*)(void *)) func, rqstp, 0);
> +
> +#if SVC_HAVE_MULTIPLE_POOLS
> + if (have_oldmask)
> + set_cpus_allowed(current, oldmask);
> +#endif
> +
> if (error < 0)
> goto out_thread;
> svc_sock_update_bufs(serv);
> Index: linus-git/net/sunrpc/svcsock.c
> ===================================================================
> --- linus-git.orig/net/sunrpc/svcsock.c 2006-07-24 20:44:46.911435470 +1000
> +++ linus-git/net/sunrpc/svcsock.c 2006-07-24 22:45:23.263878219 +1000
> @@ -150,8 +150,9 @@ static void
> svc_sock_enqueue(struct svc_sock *svsk)
> {
> struct svc_serv *serv = svsk->sk_server;
> - struct svc_pool *pool = &serv->sv_pools[0];
> + struct svc_pool *pool;
> struct svc_rqst *rqstp;
> + int cpu;
>
> if (!(svsk->sk_flags &
> ( (1<<SK_CONN)|(1<<SK_DATA)|(1<<SK_CLOSE)|(1<<SK_DEFERRED)) ))
> @@ -159,6 +160,10 @@ svc_sock_enqueue(struct svc_sock *svsk)
> if (test_bit(SK_DEAD, &svsk->sk_flags))
> return;
>
> + cpu = get_cpu();
> + pool = svc_pool_for_cpu(svsk->sk_server, cpu);
> + put_cpu();
> +
> spin_lock_bh(&pool->sp_lock);
>
> if (!list_empty(&pool->sp_threads) &&
> Index: linus-git/include/linux/sunrpc/svc.h
> ===================================================================
> --- linus-git.orig/include/linux/sunrpc/svc.h 2006-07-24 22:16:36.041218126 +1000
> +++ linus-git/include/linux/sunrpc/svc.h 2006-07-24 22:45:23.347867112 +1000
> @@ -41,6 +41,39 @@ struct svc_pool {
> struct list_head sp_all_threads; /* all server threads */
> } ____cacheline_aligned_in_smp;
>
> +#if defined(CONFIG_NUMA) || defined(CONFIG_SMP)
> +#define SVC_HAVE_MULTIPLE_POOLS 1
> +#else
> +#define SVC_HAVE_MULTIPLE_POOLS 0
> +#endif
> +
> +#if SVC_HAVE_MULTIPLE_POOLS
^^^^^^^^^^^^ Any reason why you've done this? A definition shouldn't be
that worrying to us...
> +/*
> + * Global structure for mapping cpus to pools and vice versa.
> + * Setup once during sunrpc initialisation.
> + */
> +struct svc_pool_map {
> + /*
> + * Mode for mapping cpus to pools.
> + *
> + * -1 = automatic, choose one of the other modes at boot
> + * 0 = no mapping, just a single global pool (legacy & UP mode)
> + * 1 = one pool per cpu
> + * 2 = one pool per numa node
> + */
> + int mode;
> + int init;
> + unsigned int npools;
> + unsigned int *pool_to_cpu;
> + unsigned int *cpu_to_pool;
> +#ifdef CONFIG_NUMA
> + unsigned int *node_to_pool;
> + unsigned int *pool_to_node;
> +#endif /* CONFIG_NUMA */
> +};
> +#endif /* SVC_HAVE_MULTIPLE_POOLS */
> +
> +
> /*
> * RPC service.
> *
> @@ -360,5 +393,34 @@ int svc_process(struct svc_serv *, s
> int svc_register(struct svc_serv *, int, unsigned short);
> void svc_wake_up(struct svc_serv *);
> void svc_reserve(struct svc_rqst *rqstp, int space);
> +extern struct svc_pool_map svc_pool_map;
> +
> +
> +static inline struct svc_pool *svc_pool_for_cpu(struct svc_serv *serv, int cpu)
> +{
> +#if SVC_HAVE_MULTIPLE_POOLS
> + struct svc_pool_map *m = &svc_pool_map;
> + unsigned int pidx;
> +
> + switch (m->mode) {
> + default:
> + case 0:
> + pidx = 0;
> + break;
> + case 1:
> + pidx = m->cpu_to_pool[cpu];
> + break;
> +#ifdef CONFIG_NUMA
> + case 2:
> + pidx = m->node_to_pool[cpu_to_node(cpu)];
> + break;
> +#endif /* CONFIG_NUMA */
> + }
> + return &serv->sv_pools[pidx % serv->sv_nrpools];
> +#else
> + return &serv->sv_pools[0];
> +#endif
> +}
> +
>
> #endif /* SUNRPC_SVC_H */
>
Cheers,
Trond
-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys -- and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
_______________________________________________
NFS maillist - NFS@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nfs
^ permalink raw reply [flat|nested] 3+ messages in thread* Re: [PATCH 010 of 11] knfsd: make pools numa aware
2006-07-25 12:43 ` Trond Myklebust
@ 2006-07-26 2:20 ` Greg Banks
0 siblings, 0 replies; 3+ messages in thread
From: Greg Banks @ 2006-07-26 2:20 UTC (permalink / raw)
To: Trond Myklebust; +Cc: Neil Brown, Linux NFS Mailing List
On Tue, 2006-07-25 at 22:43, Trond Myklebust wrote:
> On Tue, 2006-07-25 at 15:16 +1000, Greg Banks wrote:
> > + m->mode = 0; /* default: one global pool */
> > +#ifdef CONFIG_NUMA
> ^^^^^^^^^^^^^^^^^^ Growl...
>
> Perhaps a helper function to hide the ifdef.
I believe Neil has since cured the worst of my #ifdef disease.
> > + m->mode = 0;
> > + m->npools = 1;
> > + printk("nfsd: initialising 1 global pool\n");
> ^^^^ ho hum....
>
> Please keep sunrpc and nfsd separate.
Sorry, my bad.
> Also, this should probably be a
> dprintk() in order to avoid spamming the syslogs.
I don't see how that would be helpful. This message happens
when the first nfsd thread is started. On a system where sunrpc
and nfsd are modular, there's a very small time window between
loading the sunrpc module and starting the first nfsd thread.
So there would be little opportunity to set the debug flag to
enable the dprintk() to appear. Would you be happy with just
removing the printk() entirely?
> > +
> > +#if SVC_HAVE_MULTIPLE_POOLS
>
> ^^^^^^^^^^^^ Any reason why you've done this? A definition shouldn't be
> that worrying to us...
When you put it like that, no good reason at all.
Greg.
--
Greg Banks, R&D Software Engineer, SGI Australian Software Group.
I don't speak for SGI.
-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys -- and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
_______________________________________________
NFS maillist - NFS@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nfs
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2006-07-26 2:25 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-07-25 5:16 [PATCH 010 of 11] knfsd: make pools numa aware Greg Banks
2006-07-25 12:43 ` Trond Myklebust
2006-07-26 2:20 ` Greg Banks
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox