From: NeilBrown <neilb@suse.com>
To: lustre-devel@lists.lustre.org
Subject: [lustre-devel] [PATCH v3 03/26] staging: lustre: libcfs: properly handle failure cases in SMP code
Date: Mon, 25 Jun 2018 10:20:27 +1000 [thread overview]
Message-ID: <87d0wfaf90.fsf@notabene.neil.brown.name> (raw)
In-Reply-To: <1529875250-11531-4-git-send-email-jsimmons@infradead.org>
On Sun, Jun 24 2018, James Simmons wrote:
> While pushing the SMP work some bugs were pointed out by Dan
> Carpenter in the code. Due to single err label in cfs_cpu_init()
> and cfs_cpt_table_alloc() a few items were being cleaned up that
> were never initialized. This can lead to crashed and other problems.
> In those initialization function introduce individual labels to
> jump to only the thing initialized get freed on failure.
>
> Signed-off-by: James Simmons <uja.ornl@yahoo.com>
> WC-bug-id: https://jira.whamcloud.com/browse/LU-10932
> Reviewed-on: https://review.whamcloud.com/32085
> Reviewed-by: Dmitry Eremin <dmitry.eremin@intel.com>
> Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
> Signed-off-by: James Simmons <jsimmons@infradead.org>
> ---
> drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c | 72 ++++++++++++++++++-------
> 1 file changed, 52 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
> index 46d3530..bdd71a3 100644
> --- a/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
> +++ b/drivers/staging/lustre/lnet/libcfs/libcfs_cpu.c
> @@ -85,17 +85,19 @@ struct cfs_cpt_table *
>
> cptab->ctb_nparts = ncpt;
>
> + if (!zalloc_cpumask_var(&cptab->ctb_cpumask, GFP_NOFS))
> + goto failed_alloc_cpumask;
> +
> cptab->ctb_nodemask = kzalloc(sizeof(*cptab->ctb_nodemask),
> GFP_NOFS);
> - if (!zalloc_cpumask_var(&cptab->ctb_cpumask, GFP_NOFS) ||
> - !cptab->ctb_nodemask)
> - goto failed;
> + if (!cptab->ctb_nodemask)
> + goto failed_alloc_nodemask;
>
> cptab->ctb_cpu2cpt = kvmalloc_array(num_possible_cpus(),
> sizeof(cptab->ctb_cpu2cpt[0]),
> GFP_KERNEL);
> if (!cptab->ctb_cpu2cpt)
> - goto failed;
> + goto failed_alloc_cpu2cpt;
>
> memset(cptab->ctb_cpu2cpt, -1,
> num_possible_cpus() * sizeof(cptab->ctb_cpu2cpt[0]));
> @@ -103,22 +105,41 @@ struct cfs_cpt_table *
> cptab->ctb_parts = kvmalloc_array(ncpt, sizeof(cptab->ctb_parts[0]),
> GFP_KERNEL);
> if (!cptab->ctb_parts)
> - goto failed;
> + goto failed_alloc_ctb_parts;
> +
> + memset(cptab->ctb_parts, -1, ncpt * sizeof(cptab->ctb_parts[0]));
>
> for (i = 0; i < ncpt; i++) {
> struct cfs_cpu_partition *part = &cptab->ctb_parts[i];
>
> + if (!zalloc_cpumask_var(&part->cpt_cpumask, GFP_NOFS))
> + goto failed_setting_ctb_parts;
> +
> part->cpt_nodemask = kzalloc(sizeof(*part->cpt_nodemask),
> GFP_NOFS);
> - if (!zalloc_cpumask_var(&part->cpt_cpumask, GFP_NOFS) ||
> - !part->cpt_nodemask)
> - goto failed;
> + if (!part->cpt_nodemask)
> + goto failed_setting_ctb_parts;
If zalloc_cpumask_var() succeeds, but kzalloc() fails (which is almost
impossible, but still) we go to failed_setting_ctb_parts, with
cptab->ctb_parts[i]->cpt_cpumask needing to be freed.
> }
>
> return cptab;
>
> - failed:
> - cfs_cpt_table_free(cptab);
> +failed_setting_ctb_parts:
> + while (i-- >= 0) {
but we don't free anything in cptab->ctb_parts[i].
I've fix this by calling free_cpumask_var() before the goto.
And will propagate the change through future patches in this series.
> + struct cfs_cpu_partition *part = &cptab->ctb_parts[i];
> +
> + kfree(part->cpt_nodemask);
> + free_cpumask_var(part->cpt_cpumask);
> + }
> +
> + kvfree(cptab->ctb_parts);
> +failed_alloc_ctb_parts:
> + kvfree(cptab->ctb_cpu2cpt);
> +failed_alloc_cpu2cpt:
> + kfree(cptab->ctb_nodemask);
> +failed_alloc_nodemask:
> + free_cpumask_var(cptab->ctb_cpumask);
> +failed_alloc_cpumask:
> + kfree(cptab);
> return NULL;
> }
> EXPORT_SYMBOL(cfs_cpt_table_alloc);
> @@ -944,7 +965,7 @@ static int cfs_cpu_dead(unsigned int cpu)
> int
> cfs_cpu_init(void)
> {
> - int ret = 0;
> + int ret;
>
> LASSERT(!cfs_cpt_tab);
>
> @@ -953,23 +974,23 @@ static int cfs_cpu_dead(unsigned int cpu)
> "staging/lustre/cfe:dead", NULL,
> cfs_cpu_dead);
> if (ret < 0)
> - goto failed;
> + goto failed_cpu_dead;
> +
> ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
> "staging/lustre/cfe:online",
> cfs_cpu_online, NULL);
> if (ret < 0)
> - goto failed;
> + goto failed_cpu_online;
> +
> lustre_cpu_online = ret;
> #endif
> - ret = -EINVAL;
> -
> get_online_cpus();
> if (*cpu_pattern) {
> char *cpu_pattern_dup = kstrdup(cpu_pattern, GFP_KERNEL);
>
> if (!cpu_pattern_dup) {
> CERROR("Failed to duplicate cpu_pattern\n");
> - goto failed;
> + goto failed_alloc_table;
> }
>
> cfs_cpt_tab = cfs_cpt_table_create_pattern(cpu_pattern_dup);
> @@ -977,7 +998,7 @@ static int cfs_cpu_dead(unsigned int cpu)
> if (!cfs_cpt_tab) {
> CERROR("Failed to create cptab from pattern %s\n",
> cpu_pattern);
> - goto failed;
> + goto failed_alloc_table;
> }
>
> } else {
> @@ -985,7 +1006,7 @@ static int cfs_cpu_dead(unsigned int cpu)
> if (!cfs_cpt_tab) {
> CERROR("Failed to create ptable with npartitions %d\n",
> cpu_npartitions);
> - goto failed;
> + goto failed_alloc_table;
> }
> }
>
> @@ -996,8 +1017,19 @@ static int cfs_cpu_dead(unsigned int cpu)
> cfs_cpt_number(cfs_cpt_tab));
> return 0;
>
> - failed:
> +failed_alloc_table:
> put_online_cpus();
> - cfs_cpu_fini();
> +
> + if (cfs_cpt_tab)
> + cfs_cpt_table_free(cfs_cpt_tab);
> +
> + ret = -EINVAL;
> +#ifdef CONFIG_HOTPLUG_CPU
> + if (lustre_cpu_online > 0)
> + cpuhp_remove_state_nocalls(lustre_cpu_online);
> +failed_cpu_online:
> + cpuhp_remove_state_nocalls(CPUHP_LUSTRE_CFS_DEAD);
> +failed_cpu_dead:
> +#endif
> return ret;
> }
> --
> 1.8.3.1
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 832 bytes
Desc: not available
URL: <http://lists.lustre.org/pipermail/lustre-devel-lustre.org/attachments/20180625/120e6fbe/attachment.sig>
next prev parent reply other threads:[~2018-06-25 0:20 UTC|newest]
Thread overview: 66+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-06-24 21:20 [lustre-devel] [PATCH v3 00/26] staging: lustre: libcfs: SMP rework James Simmons
2018-06-24 21:20 ` [lustre-devel] [PATCH v3 01/26] staging: lustre: libcfs: remove useless CPU partition code James Simmons
2018-06-24 21:20 ` [lustre-devel] [PATCH v3 02/26] staging: lustre: libcfs: rename variable i to cpu James Simmons
2018-06-24 21:20 ` [lustre-devel] [PATCH v3 03/26] staging: lustre: libcfs: properly handle failure cases in SMP code James Simmons
2018-06-25 0:20 ` NeilBrown [this message]
2018-06-26 0:33 ` James Simmons
2018-06-24 21:20 ` [lustre-devel] [PATCH v3 04/26] staging: lustre: libcfs: replace MAX_NUMNODES with nr_node_ids James Simmons
2018-06-24 21:20 ` [lustre-devel] [PATCH v3 05/26] staging: lustre: libcfs: remove excess space James Simmons
2018-06-25 0:35 ` NeilBrown
2018-06-26 0:55 ` James Simmons
2018-06-24 21:20 ` [lustre-devel] [PATCH v3 06/26] staging: lustre: libcfs: replace num_possible_cpus() with nr_cpu_ids James Simmons
2018-06-24 21:20 ` [lustre-devel] [PATCH v3 07/26] staging: lustre: libcfs: NUMA support James Simmons
2018-06-25 0:39 ` NeilBrown
2018-06-25 18:22 ` Doug Oucharek
2018-06-27 2:44 ` NeilBrown
2018-06-27 12:42 ` Patrick Farrell
2018-06-28 1:17 ` NeilBrown
2018-06-29 17:19 ` Doug Oucharek
2018-06-29 17:27 ` Amir Shehata
2018-06-29 17:47 ` Weber, Olaf
2018-07-04 5:22 ` NeilBrown
2018-07-04 8:40 ` Weber, Olaf
2018-07-05 1:57 ` NeilBrown
2018-07-06 0:20 ` James Simmons
2018-07-06 0:40 ` Patrick Farrell
2018-07-06 3:11 ` NeilBrown
2018-07-06 5:36 ` Doug Oucharek
2018-07-06 6:13 ` NeilBrown
2018-07-06 15:57 ` James Simmons
2018-07-06 16:04 ` Patrick Farrell
2018-06-26 0:39 ` James Simmons
2018-06-24 21:20 ` [lustre-devel] [PATCH v3 08/26] staging: lustre: libcfs: add cpu distance handling James Simmons
2018-06-25 0:48 ` NeilBrown
2018-06-26 1:15 ` James Simmons
2018-06-27 2:50 ` NeilBrown
2018-06-24 21:20 ` [lustre-devel] [PATCH v3 09/26] staging: lustre: libcfs: use distance in cpu and node handling James Simmons
2018-06-24 21:20 ` [lustre-devel] [PATCH v3 10/26] staging: lustre: libcfs: provide debugfs files for distance handling James Simmons
2018-06-24 21:20 ` [lustre-devel] [PATCH v3 11/26] staging: lustre: libcfs: invert error handling for cfs_cpt_table_print James Simmons
2018-06-24 21:20 ` [lustre-devel] [PATCH v3 12/26] staging: lustre: libcfs: fix libcfs_cpu coding style James Simmons
2018-06-24 21:20 ` [lustre-devel] [PATCH v3 13/26] staging: lustre: libcfs: use int type for CPT identification James Simmons
2018-06-25 0:57 ` NeilBrown
2018-06-26 0:42 ` James Simmons
2018-06-24 21:20 ` [lustre-devel] [PATCH v3 14/26] staging: lustre: libcfs: rename i to node for cfs_cpt_set_nodemask James Simmons
2018-06-24 21:20 ` [lustre-devel] [PATCH v3 15/26] staging: lustre: libcfs: rename i to cpu for cfs_cpt_bind James Simmons
2018-06-24 21:20 ` [lustre-devel] [PATCH v3 16/26] staging: lustre: libcfs: rename cpumask_var_t variables to *_mask James Simmons
2018-06-24 21:20 ` [lustre-devel] [PATCH v3 17/26] staging: lustre: libcfs: update debug messages James Simmons
2018-06-24 21:20 ` [lustre-devel] [PATCH v3 18/26] staging: lustre: libcfs: make tolerant to offline CPUs and empty NUMA nodes James Simmons
2018-06-24 21:20 ` [lustre-devel] [PATCH v3 19/26] staging: lustre: libcfs: report NUMA node instead of just node James Simmons
2018-06-25 1:09 ` NeilBrown
2018-06-25 1:11 ` NeilBrown
2018-06-25 22:57 ` James Simmons
2018-06-26 0:54 ` James Simmons
2018-06-27 2:49 ` NeilBrown
2018-06-24 21:20 ` [lustre-devel] [PATCH v3 20/26] staging: lustre: libcfs: update debug messages in CPT code James Simmons
2018-06-24 21:20 ` [lustre-devel] [PATCH v3 21/26] staging: lustre: libcfs: rework CPU pattern parsing code James Simmons
2018-06-24 21:20 ` [lustre-devel] [PATCH v3 22/26] staging: lustre: libcfs: change CPT estimate algorithm James Simmons
2018-06-24 21:20 ` [lustre-devel] [PATCH v3 23/26] staging: lustre: ptlrpc: use current CPU instead of hardcoded 0 James Simmons
2018-06-25 2:38 ` NeilBrown
2018-06-25 22:51 ` James Simmons
2018-06-26 0:34 ` NeilBrown
2018-06-24 21:20 ` [lustre-devel] [PATCH v3 24/26] staging: lustre: libcfs: restore debugfs table reporting for UMP James Simmons
2018-06-25 1:27 ` NeilBrown
2018-06-24 21:20 ` [lustre-devel] [PATCH v3 25/26] staging: lustre: libcfs: make cfs_cpt_tab a static structure James Simmons
2018-06-25 1:32 ` NeilBrown
2018-06-24 21:20 ` [lustre-devel] [PATCH v3 26/26] staging: lustre: libcfs: restore UMP support James Simmons
2018-06-25 1:33 ` [lustre-devel] [PATCH v3 00/26] staging: lustre: libcfs: SMP rework NeilBrown
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=87d0wfaf90.fsf@notabene.neil.brown.name \
--to=neilb@suse.com \
--cc=lustre-devel@lists.lustre.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.