* [PATCH] SGI XPC fails to load when cpu 0 is out of IRQ resources. @ 2012-08-03 19:46 Robin Holt 2012-08-15 22:56 ` Andrew Morton 0 siblings, 1 reply; 3+ messages in thread From: Robin Holt @ 2012-08-03 19:46 UTC (permalink / raw) To: Andrew Morton; +Cc: linux-kernel, Robin Holt On many of our larger systems, CPU 0 has had all of its IRQ resources consumed before XPC loads. Worse cases on machines with multiple 10 GigE cards and multiple IB cards have depleted the entire first socket of IRQs. That patch makes selecting the node upon which IRQs are allocated (as well as all the other GRU Message Queue structures) specifiable as a module load param and has a default behavior of searching all nodes/cpus for an available resource. Signed-off-by: Robin Holt <holt@sgi.com> --- drivers/misc/sgi-xp/xpc_uv.c | 66 ++++++++++++++++++++++++++++++++--------- 1 files changed, 51 insertions(+), 15 deletions(-) diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c index 87b251a..f4398bb 100644 --- a/drivers/misc/sgi-xp/xpc_uv.c +++ b/drivers/misc/sgi-xp/xpc_uv.c @@ -59,6 +59,8 @@ static struct xpc_heartbeat_uv *xpc_heartbeat_uv; XPC_NOTIFY_MSG_SIZE_UV) #define XPC_NOTIFY_IRQ_NAME "xpc_notify" +static int xpc_mq_node = -1; + static struct xpc_gru_mq_uv *xpc_activate_mq_uv; static struct xpc_gru_mq_uv *xpc_notify_mq_uv; @@ -110,8 +112,6 @@ xpc_get_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq, int cpu, char *irq_name) mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset, UV_AFFINITY_CPU); if (mq->irq < 0) { - dev_err(xpc_part, "uv_setup_irq() returned error=%d\n", - -mq->irq); return mq->irq; } @@ -1731,9 +1731,42 @@ static struct xpc_arch_operations xpc_arch_ops_uv = { .notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_uv, }; +static int +xpc_init_mq_node(int nid) +{ + int cpu; + + for_each_cpu(cpu, cpumask_of_node(nid)) { + xpc_activate_mq_uv = xpc_create_gru_mq_uv(XPC_ACTIVATE_MQ_SIZE_UV, nid, + XPC_ACTIVATE_IRQ_NAME, + xpc_handle_activate_IRQ_uv); + if (!IS_ERR(xpc_activate_mq_uv)) + break; + } + if (IS_ERR(xpc_activate_mq_uv)) + return PTR_ERR(xpc_activate_mq_uv); + + for_each_cpu(cpu, cpumask_of_node(nid)) { + xpc_notify_mq_uv = xpc_create_gru_mq_uv(XPC_NOTIFY_MQ_SIZE_UV, nid, + XPC_NOTIFY_IRQ_NAME, + xpc_handle_notify_IRQ_uv); + if (!IS_ERR(xpc_notify_mq_uv)) + break; + } + if (IS_ERR(xpc_notify_mq_uv)) { + xpc_destroy_gru_mq_uv(xpc_activate_mq_uv); + return PTR_ERR(xpc_notify_mq_uv); + } + + return 0; +} + int xpc_init_uv(void) { + int nid; + int ret = 0; + xpc_arch_ops = xpc_arch_ops_uv; if (sizeof(struct xpc_notify_mq_msghdr_uv) > XPC_MSG_HDR_MAX_SIZE) { @@ -1742,21 +1775,21 @@ xpc_init_uv(void) return -E2BIG; } - xpc_activate_mq_uv = xpc_create_gru_mq_uv(XPC_ACTIVATE_MQ_SIZE_UV, 0, - XPC_ACTIVATE_IRQ_NAME, - xpc_handle_activate_IRQ_uv); - if (IS_ERR(xpc_activate_mq_uv)) - return PTR_ERR(xpc_activate_mq_uv); + if (xpc_mq_node < 0) + for_each_online_node(nid) { + ret = xpc_init_mq_node(nid); - xpc_notify_mq_uv = xpc_create_gru_mq_uv(XPC_NOTIFY_MQ_SIZE_UV, 0, - XPC_NOTIFY_IRQ_NAME, - xpc_handle_notify_IRQ_uv); - if (IS_ERR(xpc_notify_mq_uv)) { - xpc_destroy_gru_mq_uv(xpc_activate_mq_uv); - return PTR_ERR(xpc_notify_mq_uv); - } + if (!ret) + break; + } + else + ret = xpc_init_mq_node(xpc_mq_node); - return 0; + if (ret < 0) + dev_err(xpc_part, "xpc_init_mq_node() returned error=%d\n", + -ret); + + return ret; } void @@ -1765,3 +1798,6 @@ xpc_exit_uv(void) xpc_destroy_gru_mq_uv(xpc_notify_mq_uv); xpc_destroy_gru_mq_uv(xpc_activate_mq_uv); } + +module_param(xpc_mq_node, int, 0); +MODULE_PARM_DESC(xpc_mq_node, "Node number on which to allocate message queues."); -- 1.7.6.1 ^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH] SGI XPC fails to load when cpu 0 is out of IRQ resources. 2012-08-03 19:46 [PATCH] SGI XPC fails to load when cpu 0 is out of IRQ resources Robin Holt @ 2012-08-15 22:56 ` Andrew Morton 2012-08-16 3:58 ` [PATCH] SGI XPC fails to load when cpu 0 is out of IRQ resources. -v2 Robin Holt 0 siblings, 1 reply; 3+ messages in thread From: Andrew Morton @ 2012-08-15 22:56 UTC (permalink / raw) To: Robin Holt; +Cc: linux-kernel On Fri, 3 Aug 2012 14:46:29 -0500 Robin Holt <holt@sgi.com> wrote: > On many of our larger systems, CPU 0 has had all of its IRQ resources > consumed before XPC loads. Worse cases on machines with multiple > 10 GigE cards and multiple IB cards have depleted the entire first > socket of IRQs. That patch makes selecting the node upon which > IRQs are allocated (as well as all the other GRU Message Queue > structures) specifiable as a module load param and has a default > behavior of searching all nodes/cpus for an available resource. > Is this problem serious enough to warrant a -stable backport? If you want it to appear in vendor kernels then I guess "yes". > +static int > +xpc_init_mq_node(int nid) > +{ > + int cpu; > + > + for_each_cpu(cpu, cpumask_of_node(nid)) { > + xpc_activate_mq_uv = xpc_create_gru_mq_uv(XPC_ACTIVATE_MQ_SIZE_UV, nid, > + XPC_ACTIVATE_IRQ_NAME, > + xpc_handle_activate_IRQ_uv); > + if (!IS_ERR(xpc_activate_mq_uv)) > + break; > + } > + if (IS_ERR(xpc_activate_mq_uv)) > + return PTR_ERR(xpc_activate_mq_uv); > + > + for_each_cpu(cpu, cpumask_of_node(nid)) { > + xpc_notify_mq_uv = xpc_create_gru_mq_uv(XPC_NOTIFY_MQ_SIZE_UV, nid, > + XPC_NOTIFY_IRQ_NAME, > + xpc_handle_notify_IRQ_uv); > + if (!IS_ERR(xpc_notify_mq_uv)) > + break; > + } > + if (IS_ERR(xpc_notify_mq_uv)) { > + xpc_destroy_gru_mq_uv(xpc_activate_mq_uv); > + return PTR_ERR(xpc_notify_mq_uv); > + } > + > + return 0; > +} This seems to take the optimistic approach to CPU hotplug ;) get_online_cpus(), perhaps? ^ permalink raw reply [flat|nested] 3+ messages in thread
* [PATCH] SGI XPC fails to load when cpu 0 is out of IRQ resources. -v2 2012-08-15 22:56 ` Andrew Morton @ 2012-08-16 3:58 ` Robin Holt 0 siblings, 0 replies; 3+ messages in thread From: Robin Holt @ 2012-08-16 3:58 UTC (permalink / raw) To: Andrew Morton; +Cc: Robin Holt, linux-kernel On many of our larger systems, CPU 0 has had all of its IRQ resources consumed before XPC loads. Worst cases on machines with multiple 10 GigE cards and multiple IB cards have depleted the entire first socket of IRQs. This patch makes selecting the node upon which IRQs are allocated (as well as all the other GRU Message Queue structures) specifiable as a module load param and has a default behavior of searching all nodes/cpus for an available resources. Signed-off-by: Robin Holt <holt@sgi.com> Cc: stable@vger.kernel.org --- -v2 - incorporate review comments from Andrew Morton. drivers/misc/sgi-xp/xpc_uv.c | 82 ++++++++++++++++++++++++++++++++---------- 1 files changed, 63 insertions(+), 19 deletions(-) diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c index 87b251a..57a53c1 100644 --- a/drivers/misc/sgi-xp/xpc_uv.c +++ b/drivers/misc/sgi-xp/xpc_uv.c @@ -59,6 +59,8 @@ static struct xpc_heartbeat_uv *xpc_heartbeat_uv; XPC_NOTIFY_MSG_SIZE_UV) #define XPC_NOTIFY_IRQ_NAME "xpc_notify" +static int xpc_mq_node = -1; + static struct xpc_gru_mq_uv *xpc_activate_mq_uv; static struct xpc_gru_mq_uv *xpc_notify_mq_uv; @@ -109,11 +111,8 @@ xpc_get_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq, int cpu, char *irq_name) #if defined CONFIG_X86_64 mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset, UV_AFFINITY_CPU); - if (mq->irq < 0) { - dev_err(xpc_part, "uv_setup_irq() returned error=%d\n", - -mq->irq); + if (mq->irq < 0) return mq->irq; - } mq->mmr_value = uv_read_global_mmr64(mmr_pnode, mq->mmr_offset); @@ -238,8 +237,9 @@ xpc_create_gru_mq_uv(unsigned int mq_size, int cpu, char *irq_name, mq->mmr_blade = uv_cpu_to_blade_id(cpu); nid = cpu_to_node(cpu); - page = alloc_pages_exact_node(nid, GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, - pg_order); + page = alloc_pages_exact_node(nid, + GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, + pg_order); if (page == NULL) { dev_err(xpc_part, "xpc_create_gru_mq_uv() failed to alloc %d " "bytes of memory on nid=%d for GRU mq\n", mq_size, nid); @@ -1731,9 +1731,50 @@ static struct xpc_arch_operations xpc_arch_ops_uv = { .notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_uv, }; +static int +xpc_init_mq_node(int nid) +{ + int cpu; + + get_online_cpus(); + + for_each_cpu(cpu, cpumask_of_node(nid)) { + xpc_activate_mq_uv = + xpc_create_gru_mq_uv(XPC_ACTIVATE_MQ_SIZE_UV, nid, + XPC_ACTIVATE_IRQ_NAME, + xpc_handle_activate_IRQ_uv); + if (!IS_ERR(xpc_activate_mq_uv)) + break; + } + if (IS_ERR(xpc_activate_mq_uv)) { + put_online_cpus(); + return PTR_ERR(xpc_activate_mq_uv); + } + + for_each_cpu(cpu, cpumask_of_node(nid)) { + xpc_notify_mq_uv = + xpc_create_gru_mq_uv(XPC_NOTIFY_MQ_SIZE_UV, nid, + XPC_NOTIFY_IRQ_NAME, + xpc_handle_notify_IRQ_uv); + if (!IS_ERR(xpc_notify_mq_uv)) + break; + } + if (IS_ERR(xpc_notify_mq_uv)) { + xpc_destroy_gru_mq_uv(xpc_activate_mq_uv); + put_online_cpus(); + return PTR_ERR(xpc_notify_mq_uv); + } + + put_online_cpus(); + return 0; +} + int xpc_init_uv(void) { + int nid; + int ret = 0; + xpc_arch_ops = xpc_arch_ops_uv; if (sizeof(struct xpc_notify_mq_msghdr_uv) > XPC_MSG_HDR_MAX_SIZE) { @@ -1742,21 +1783,21 @@ xpc_init_uv(void) return -E2BIG; } - xpc_activate_mq_uv = xpc_create_gru_mq_uv(XPC_ACTIVATE_MQ_SIZE_UV, 0, - XPC_ACTIVATE_IRQ_NAME, - xpc_handle_activate_IRQ_uv); - if (IS_ERR(xpc_activate_mq_uv)) - return PTR_ERR(xpc_activate_mq_uv); + if (xpc_mq_node < 0) + for_each_online_node(nid) { + ret = xpc_init_mq_node(nid); - xpc_notify_mq_uv = xpc_create_gru_mq_uv(XPC_NOTIFY_MQ_SIZE_UV, 0, - XPC_NOTIFY_IRQ_NAME, - xpc_handle_notify_IRQ_uv); - if (IS_ERR(xpc_notify_mq_uv)) { - xpc_destroy_gru_mq_uv(xpc_activate_mq_uv); - return PTR_ERR(xpc_notify_mq_uv); - } + if (!ret) + break; + } + else + ret = xpc_init_mq_node(xpc_mq_node); - return 0; + if (ret < 0) + dev_err(xpc_part, "xpc_init_mq_node() returned error=%d\n", + -ret); + + return ret; } void @@ -1765,3 +1806,6 @@ xpc_exit_uv(void) xpc_destroy_gru_mq_uv(xpc_notify_mq_uv); xpc_destroy_gru_mq_uv(xpc_activate_mq_uv); } + +module_param(xpc_mq_node, int, 0); +MODULE_PARM_DESC(xpc_mq_node, "Node number on which to allocate message queues."); -- 1.7.6.1 ^ permalink raw reply related [flat|nested] 3+ messages in thread
end of thread, other threads:[~2012-08-16 3:58 UTC | newest] Thread overview: 3+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2012-08-03 19:46 [PATCH] SGI XPC fails to load when cpu 0 is out of IRQ resources Robin Holt 2012-08-15 22:56 ` Andrew Morton 2012-08-16 3:58 ` [PATCH] SGI XPC fails to load when cpu 0 is out of IRQ resources. -v2 Robin Holt
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox