All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dan Carpenter <dan.carpenter@oracle.com>
To: Schspa Shi <schspa@gmail.com>
Cc: linux-pm@vger.kernel.org
Subject: Re: [bug report] cpufreq: Fix possible race in cpufreq online error path
Date: Fri, 6 May 2022 10:21:46 +0300	[thread overview]
Message-ID: <20220506072146.GD4031@kadam> (raw)
In-Reply-To: <CAMA88TpC_7-S7HDnjE5VLS-h_y0pQw1Qb_Q-2DYsSDoZJLdUgQ@mail.gmail.com>

On Fri, May 06, 2022 at 02:43:59PM +0800, Schspa Shi wrote:
> > drivers/cpufreq/cpufreq.c
> >     1318 static int cpufreq_online(unsigned int cpu)
> >     1319 {
> >     1320         struct cpufreq_policy *policy;
> >     1321         bool new_policy;
> >     1322         unsigned long flags;
> >     1323         unsigned int j;
> >     1324         int ret;
> >     1325
> >     1326         pr_debug("%s: bringing CPU%u online\n", __func__, cpu);
> >     1327
> >     1328         /* Check if this CPU already has a policy to manage it */
> >     1329         policy = per_cpu(cpufreq_cpu_data, cpu);
> >     1330         if (policy) {
> >     1331                 WARN_ON(!cpumask_test_cpu(cpu, policy->related_cpus));
> >     1332                 if (!policy_is_inactive(policy))
> >     1333                         return cpufreq_add_policy_cpu(policy, cpu);
> >     1334
> >     1335                 /* This is the only online CPU for the policy.  Start over. */
> >     1336                 new_policy = false;
> >     1337                 down_write(&policy->rwsem);
> >     1338                 policy->cpu = cpu;
> >     1339                 policy->governor = NULL;
> >     1340                 up_write(&policy->rwsem);
> >
> > unlocked here
> >
> >     1341         } else {
> >     1342                 new_policy = true;
> >     1343                 policy = cpufreq_policy_alloc(cpu);
> >     1344                 if (!policy)
> >     1345                         return -ENOMEM;
> >     1346         }
> >     1347
> >     1348         if (!new_policy && cpufreq_driver->online) {
> >     1349                 ret = cpufreq_driver->online(policy);
> >     1350                 if (ret) {
> >     1351                         pr_debug("%s: %d: initialization failed\n", __func__,
> >     1352                                  __LINE__);
> >     1353                         goto out_exit_policy;
> >
> > goto
> >
> >     1354                 }
> >     1355
> >     1356                 /* Recover policy->cpus using related_cpus */
> >     1357                 cpumask_copy(policy->cpus, policy->related_cpus);
> >     1358         } else {
> >     1359                 cpumask_copy(policy->cpus, cpumask_of(cpu));
> >     1360
> >     1361                 /*
> >     1362                  * Call driver. From then on the cpufreq must be able
> >     1363                  * to accept all calls to ->verify and ->setpolicy for this CPU.
> >     1364                  */
> >     1365                 ret = cpufreq_driver->init(policy);
> >     1366                 if (ret) {
> >     1367                         pr_debug("%s: %d: initialization failed\n", __func__,
> >     1368                                  __LINE__);
> >     1369                         goto out_free_policy;
> >     1370                 }
> >     1371
> >     1372                 /*
> >     1373                  * The initialization has succeeded and the policy is online.
> >     1374                  * If there is a problem with its frequency table, take it
> >     1375                  * offline and drop it.
> >     1376                  */
> >     1377                 ret = cpufreq_table_validate_and_sort(policy);
> >     1378                 if (ret)
> >     1379                         goto out_offline_policy;
> >     1380
> >     1381                 /* related_cpus should at least include policy->cpus. */
> >     1382                 cpumask_copy(policy->related_cpus, policy->cpus);
> >     1383         }
> >     1384
> >     1385         down_write(&policy->rwsem);
> >     1386         /*
> >     1387          * affected cpus must always be the one, which are online. We aren't
> >     1388          * managing offline cpus here.
> >     1389          */
> >     1390         cpumask_and(policy->cpus, policy->cpus, cpu_online_mask);
> >     1391
> >     1392         if (new_policy) {
> >     1393                 for_each_cpu(j, policy->related_cpus) {
> >     1394                         per_cpu(cpufreq_cpu_data, j) = policy;
> >     1395                         add_cpu_dev_symlink(policy, j, get_cpu_device(j));
> >     1396                 }
> >     1397
> >     1398                 policy->min_freq_req = kzalloc(2 * sizeof(*policy->min_freq_req),
> >     1399                                                GFP_KERNEL);
> >     1400                 if (!policy->min_freq_req) {
> >     1401                         ret = -ENOMEM;
> >     1402                         goto out_destroy_policy;
> >     1403                 }
> >     1404
> >     1405                 ret = freq_qos_add_request(&policy->constraints,
> >     1406                                            policy->min_freq_req, FREQ_QOS_MIN,
> >     1407                                            FREQ_QOS_MIN_DEFAULT_VALUE);
> >     1408                 if (ret < 0) {
> >     1409                         /*
> >     1410                          * So we don't call freq_qos_remove_request() for an
> >     1411                          * uninitialized request.
> >     1412                          */
> >     1413                         kfree(policy->min_freq_req);
> >     1414                         policy->min_freq_req = NULL;
> >     1415                         goto out_destroy_policy;
> >     1416                 }
> >     1417
> >     1418                 /*
> >     1419                  * This must be initialized right here to avoid calling
> >     1420                  * freq_qos_remove_request() on uninitialized request in case
> >     1421                  * of errors.
> >     1422                  */
> >     1423                 policy->max_freq_req = policy->min_freq_req + 1;
> >     1424
> >     1425                 ret = freq_qos_add_request(&policy->constraints,
> >     1426                                            policy->max_freq_req, FREQ_QOS_MAX,
> >     1427                                            FREQ_QOS_MAX_DEFAULT_VALUE);
> >     1428                 if (ret < 0) {
> >     1429                         policy->max_freq_req = NULL;
> >     1430                         goto out_destroy_policy;
> >     1431                 }
> >     1432
> >     1433                 blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
> >     1434                                 CPUFREQ_CREATE_POLICY, policy);
> >     1435         }
> >     1436
> >     1437         if (cpufreq_driver->get && has_target()) {
> >     1438                 policy->cur = cpufreq_driver->get(policy->cpu);
> >     1439                 if (!policy->cur) {
> >     1440                         ret = -EIO;
> >     1441                         pr_err("%s: ->get() failed\n", __func__);
> >     1442                         goto out_destroy_policy;
> >     1443                 }
> >     1444         }
> >     1445
> >     1446         /*
> >     1447          * Sometimes boot loaders set CPU frequency to a value outside of
> >     1448          * frequency table present with cpufreq core. In such cases CPU might be
> >     1449          * unstable if it has to run on that frequency for long duration of time
> >     1450          * and so its better to set it to a frequency which is specified in
> >     1451          * freq-table. This also makes cpufreq stats inconsistent as
> >     1452          * cpufreq-stats would fail to register because current frequency of CPU
> >     1453          * isn't found in freq-table.
> >     1454          *
> >     1455          * Because we don't want this change to effect boot process badly, we go
> >     1456          * for the next freq which is >= policy->cur ('cur' must be set by now,
> >     1457          * otherwise we will end up setting freq to lowest of the table as 'cur'
> >     1458          * is initialized to zero).
> >     1459          *
> >     1460          * We are passing target-freq as "policy->cur - 1" otherwise
> >     1461          * __cpufreq_driver_target() would simply fail, as policy->cur will be
> >     1462          * equal to target-freq.
> >     1463          */
> >     1464         if ((cpufreq_driver->flags & CPUFREQ_NEED_INITIAL_FREQ_CHECK)
> >     1465             && has_target()) {
> >     1466                 unsigned int old_freq = policy->cur;
> >     1467
> >     1468                 /* Are we running at unknown frequency ? */
> >     1469                 ret = cpufreq_frequency_table_get_index(policy, old_freq);
> >     1470                 if (ret == -EINVAL) {
> >     1471                         ret = __cpufreq_driver_target(policy, old_freq - 1,
> >     1472                                                       CPUFREQ_RELATION_L);
> >     1473
> >     1474                         /*
> >     1475                          * Reaching here after boot in a few seconds may not
> >     1476                          * mean that system will remain stable at "unknown"
> >     1477                          * frequency for longer duration. Hence, a BUG_ON().
> >     1478                          */
> >     1479                         BUG_ON(ret);
> >     1480                         pr_info("%s: CPU%d: Running at unlisted initial frequency: %u KHz, changing to: %u KHz\n",
> >     1481                                 __func__, policy->cpu, old_freq, policy->cur);
> >     1482                 }
> >     1483         }
> >     1484
> >     1485         if (new_policy) {
> >     1486                 ret = cpufreq_add_dev_interface(policy);
> >     1487                 if (ret)
> >     1488                         goto out_destroy_policy;
> >     1489
> >     1490                 cpufreq_stats_create_table(policy);
> >     1491
> >     1492                 write_lock_irqsave(&cpufreq_driver_lock, flags);
> >     1493                 list_add(&policy->policy_list, &cpufreq_policy_list);
> >     1494                 write_unlock_irqrestore(&cpufreq_driver_lock, flags);
> >     1495
> >     1496                 /*
> >     1497                  * Register with the energy model before
> >     1498                  * sched_cpufreq_governor_change() is called, which will result
> >     1499                  * in rebuilding of the sched domains, which should only be done
> >     1500                  * once the energy model is properly initialized for the policy
> >     1501                  * first.
> >     1502                  *
> >     1503                  * Also, this should be called before the policy is registered
> >     1504                  * with cooling framework.
> >     1505                  */
> >     1506                 if (cpufreq_driver->register_em)
> >     1507                         cpufreq_driver->register_em(policy);
> >     1508         }
> >     1509
> >     1510         ret = cpufreq_init_policy(policy);
> >     1511         if (ret) {
> >     1512                 pr_err("%s: Failed to initialize policy for cpu: %d (%d)\n",
> >     1513                        __func__, cpu, ret);
> >     1514                 goto out_destroy_policy;
> >     1515         }
> >     1516
> >     1517         up_write(&policy->rwsem);
> >     1518
> >     1519         kobject_uevent(&policy->kobj, KOBJ_ADD);
> >     1520
> >     1521         /* Callback for handling stuff after policy is ready */
> >     1522         if (cpufreq_driver->ready)
> >     1523                 cpufreq_driver->ready(policy);
> >     1524
> >     1525         if (cpufreq_thermal_control_enabled(cpufreq_driver))
> >     1526                 policy->cdev = of_cpufreq_cooling_register(policy);
> >     1527
> >     1528         pr_debug("initialization complete\n");
> >     1529
> >     1530         return 0;
> >     1531
> >     1532 out_destroy_policy:
> >     1533         for_each_cpu(j, policy->real_cpus)
> >     1534                 remove_cpu_dev_symlink(policy, get_cpu_device(j));
> >     1535
> >     1536 out_offline_policy:
> >     1537         if (cpufreq_driver->offline)
> >     1538                 cpufreq_driver->offline(policy);
> >     1539
> >     1540 out_exit_policy:
> >     1541         if (cpufreq_driver->exit)
> >     1542                 cpufreq_driver->exit(policy);
> >     1543
> >     1544         cpumask_clear(policy->cpus);
> > --> 1545         up_write(&policy->rwsem);
> >
> > Double unlock
> 
> Thanks for pointing out this double unlock, do you want to fix it by yourself?
> Or I will fix it.
> 

Could you fix it?

regards,
dan carpenter


  reply	other threads:[~2022-05-06  7:22 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-05-04 15:17 [bug report] cpufreq: Fix possible race in cpufreq online error path Dan Carpenter
2022-05-06  6:43 ` Schspa Shi
2022-05-06  7:21   ` Dan Carpenter [this message]
2022-05-06 17:00     ` [PATCH] cpufreq: fix double unlock when cpufreq online Schspa Shi
2022-05-09  4:00       ` Viresh Kumar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220506072146.GD4031@kadam \
    --to=dan.carpenter@oracle.com \
    --cc=linux-pm@vger.kernel.org \
    --cc=schspa@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.