From: Dan Carpenter <dan.carpenter@oracle.com>
To: Schspa Shi <schspa@gmail.com>
Cc: linux-pm@vger.kernel.org
Subject: Re: [bug report] cpufreq: Fix possible race in cpufreq online error path
Date: Fri, 6 May 2022 10:21:46 +0300 [thread overview]
Message-ID: <20220506072146.GD4031@kadam> (raw)
In-Reply-To: <CAMA88TpC_7-S7HDnjE5VLS-h_y0pQw1Qb_Q-2DYsSDoZJLdUgQ@mail.gmail.com>
On Fri, May 06, 2022 at 02:43:59PM +0800, Schspa Shi wrote:
> > drivers/cpufreq/cpufreq.c
> > 1318 static int cpufreq_online(unsigned int cpu)
> > 1319 {
> > 1320 struct cpufreq_policy *policy;
> > 1321 bool new_policy;
> > 1322 unsigned long flags;
> > 1323 unsigned int j;
> > 1324 int ret;
> > 1325
> > 1326 pr_debug("%s: bringing CPU%u online\n", __func__, cpu);
> > 1327
> > 1328 /* Check if this CPU already has a policy to manage it */
> > 1329 policy = per_cpu(cpufreq_cpu_data, cpu);
> > 1330 if (policy) {
> > 1331 WARN_ON(!cpumask_test_cpu(cpu, policy->related_cpus));
> > 1332 if (!policy_is_inactive(policy))
> > 1333 return cpufreq_add_policy_cpu(policy, cpu);
> > 1334
> > 1335 /* This is the only online CPU for the policy. Start over. */
> > 1336 new_policy = false;
> > 1337 down_write(&policy->rwsem);
> > 1338 policy->cpu = cpu;
> > 1339 policy->governor = NULL;
> > 1340 up_write(&policy->rwsem);
> >
> > unlocked here
> >
> > 1341 } else {
> > 1342 new_policy = true;
> > 1343 policy = cpufreq_policy_alloc(cpu);
> > 1344 if (!policy)
> > 1345 return -ENOMEM;
> > 1346 }
> > 1347
> > 1348 if (!new_policy && cpufreq_driver->online) {
> > 1349 ret = cpufreq_driver->online(policy);
> > 1350 if (ret) {
> > 1351 pr_debug("%s: %d: initialization failed\n", __func__,
> > 1352 __LINE__);
> > 1353 goto out_exit_policy;
> >
> > goto
> >
> > 1354 }
> > 1355
> > 1356 /* Recover policy->cpus using related_cpus */
> > 1357 cpumask_copy(policy->cpus, policy->related_cpus);
> > 1358 } else {
> > 1359 cpumask_copy(policy->cpus, cpumask_of(cpu));
> > 1360
> > 1361 /*
> > 1362 * Call driver. From then on the cpufreq must be able
> > 1363 * to accept all calls to ->verify and ->setpolicy for this CPU.
> > 1364 */
> > 1365 ret = cpufreq_driver->init(policy);
> > 1366 if (ret) {
> > 1367 pr_debug("%s: %d: initialization failed\n", __func__,
> > 1368 __LINE__);
> > 1369 goto out_free_policy;
> > 1370 }
> > 1371
> > 1372 /*
> > 1373 * The initialization has succeeded and the policy is online.
> > 1374 * If there is a problem with its frequency table, take it
> > 1375 * offline and drop it.
> > 1376 */
> > 1377 ret = cpufreq_table_validate_and_sort(policy);
> > 1378 if (ret)
> > 1379 goto out_offline_policy;
> > 1380
> > 1381 /* related_cpus should at least include policy->cpus. */
> > 1382 cpumask_copy(policy->related_cpus, policy->cpus);
> > 1383 }
> > 1384
> > 1385 down_write(&policy->rwsem);
> > 1386 /*
> > 1387 * affected cpus must always be the one, which are online. We aren't
> > 1388 * managing offline cpus here.
> > 1389 */
> > 1390 cpumask_and(policy->cpus, policy->cpus, cpu_online_mask);
> > 1391
> > 1392 if (new_policy) {
> > 1393 for_each_cpu(j, policy->related_cpus) {
> > 1394 per_cpu(cpufreq_cpu_data, j) = policy;
> > 1395 add_cpu_dev_symlink(policy, j, get_cpu_device(j));
> > 1396 }
> > 1397
> > 1398 policy->min_freq_req = kzalloc(2 * sizeof(*policy->min_freq_req),
> > 1399 GFP_KERNEL);
> > 1400 if (!policy->min_freq_req) {
> > 1401 ret = -ENOMEM;
> > 1402 goto out_destroy_policy;
> > 1403 }
> > 1404
> > 1405 ret = freq_qos_add_request(&policy->constraints,
> > 1406 policy->min_freq_req, FREQ_QOS_MIN,
> > 1407 FREQ_QOS_MIN_DEFAULT_VALUE);
> > 1408 if (ret < 0) {
> > 1409 /*
> > 1410 * So we don't call freq_qos_remove_request() for an
> > 1411 * uninitialized request.
> > 1412 */
> > 1413 kfree(policy->min_freq_req);
> > 1414 policy->min_freq_req = NULL;
> > 1415 goto out_destroy_policy;
> > 1416 }
> > 1417
> > 1418 /*
> > 1419 * This must be initialized right here to avoid calling
> > 1420 * freq_qos_remove_request() on uninitialized request in case
> > 1421 * of errors.
> > 1422 */
> > 1423 policy->max_freq_req = policy->min_freq_req + 1;
> > 1424
> > 1425 ret = freq_qos_add_request(&policy->constraints,
> > 1426 policy->max_freq_req, FREQ_QOS_MAX,
> > 1427 FREQ_QOS_MAX_DEFAULT_VALUE);
> > 1428 if (ret < 0) {
> > 1429 policy->max_freq_req = NULL;
> > 1430 goto out_destroy_policy;
> > 1431 }
> > 1432
> > 1433 blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
> > 1434 CPUFREQ_CREATE_POLICY, policy);
> > 1435 }
> > 1436
> > 1437 if (cpufreq_driver->get && has_target()) {
> > 1438 policy->cur = cpufreq_driver->get(policy->cpu);
> > 1439 if (!policy->cur) {
> > 1440 ret = -EIO;
> > 1441 pr_err("%s: ->get() failed\n", __func__);
> > 1442 goto out_destroy_policy;
> > 1443 }
> > 1444 }
> > 1445
> > 1446 /*
> > 1447 * Sometimes boot loaders set CPU frequency to a value outside of
> > 1448 * frequency table present with cpufreq core. In such cases CPU might be
> > 1449 * unstable if it has to run on that frequency for long duration of time
> > 1450 * and so its better to set it to a frequency which is specified in
> > 1451 * freq-table. This also makes cpufreq stats inconsistent as
> > 1452 * cpufreq-stats would fail to register because current frequency of CPU
> > 1453 * isn't found in freq-table.
> > 1454 *
> > 1455 * Because we don't want this change to effect boot process badly, we go
> > 1456 * for the next freq which is >= policy->cur ('cur' must be set by now,
> > 1457 * otherwise we will end up setting freq to lowest of the table as 'cur'
> > 1458 * is initialized to zero).
> > 1459 *
> > 1460 * We are passing target-freq as "policy->cur - 1" otherwise
> > 1461 * __cpufreq_driver_target() would simply fail, as policy->cur will be
> > 1462 * equal to target-freq.
> > 1463 */
> > 1464 if ((cpufreq_driver->flags & CPUFREQ_NEED_INITIAL_FREQ_CHECK)
> > 1465 && has_target()) {
> > 1466 unsigned int old_freq = policy->cur;
> > 1467
> > 1468 /* Are we running at unknown frequency ? */
> > 1469 ret = cpufreq_frequency_table_get_index(policy, old_freq);
> > 1470 if (ret == -EINVAL) {
> > 1471 ret = __cpufreq_driver_target(policy, old_freq - 1,
> > 1472 CPUFREQ_RELATION_L);
> > 1473
> > 1474 /*
> > 1475 * Reaching here after boot in a few seconds may not
> > 1476 * mean that system will remain stable at "unknown"
> > 1477 * frequency for longer duration. Hence, a BUG_ON().
> > 1478 */
> > 1479 BUG_ON(ret);
> > 1480 pr_info("%s: CPU%d: Running at unlisted initial frequency: %u KHz, changing to: %u KHz\n",
> > 1481 __func__, policy->cpu, old_freq, policy->cur);
> > 1482 }
> > 1483 }
> > 1484
> > 1485 if (new_policy) {
> > 1486 ret = cpufreq_add_dev_interface(policy);
> > 1487 if (ret)
> > 1488 goto out_destroy_policy;
> > 1489
> > 1490 cpufreq_stats_create_table(policy);
> > 1491
> > 1492 write_lock_irqsave(&cpufreq_driver_lock, flags);
> > 1493 list_add(&policy->policy_list, &cpufreq_policy_list);
> > 1494 write_unlock_irqrestore(&cpufreq_driver_lock, flags);
> > 1495
> > 1496 /*
> > 1497 * Register with the energy model before
> > 1498 * sched_cpufreq_governor_change() is called, which will result
> > 1499 * in rebuilding of the sched domains, which should only be done
> > 1500 * once the energy model is properly initialized for the policy
> > 1501 * first.
> > 1502 *
> > 1503 * Also, this should be called before the policy is registered
> > 1504 * with cooling framework.
> > 1505 */
> > 1506 if (cpufreq_driver->register_em)
> > 1507 cpufreq_driver->register_em(policy);
> > 1508 }
> > 1509
> > 1510 ret = cpufreq_init_policy(policy);
> > 1511 if (ret) {
> > 1512 pr_err("%s: Failed to initialize policy for cpu: %d (%d)\n",
> > 1513 __func__, cpu, ret);
> > 1514 goto out_destroy_policy;
> > 1515 }
> > 1516
> > 1517 up_write(&policy->rwsem);
> > 1518
> > 1519 kobject_uevent(&policy->kobj, KOBJ_ADD);
> > 1520
> > 1521 /* Callback for handling stuff after policy is ready */
> > 1522 if (cpufreq_driver->ready)
> > 1523 cpufreq_driver->ready(policy);
> > 1524
> > 1525 if (cpufreq_thermal_control_enabled(cpufreq_driver))
> > 1526 policy->cdev = of_cpufreq_cooling_register(policy);
> > 1527
> > 1528 pr_debug("initialization complete\n");
> > 1529
> > 1530 return 0;
> > 1531
> > 1532 out_destroy_policy:
> > 1533 for_each_cpu(j, policy->real_cpus)
> > 1534 remove_cpu_dev_symlink(policy, get_cpu_device(j));
> > 1535
> > 1536 out_offline_policy:
> > 1537 if (cpufreq_driver->offline)
> > 1538 cpufreq_driver->offline(policy);
> > 1539
> > 1540 out_exit_policy:
> > 1541 if (cpufreq_driver->exit)
> > 1542 cpufreq_driver->exit(policy);
> > 1543
> > 1544 cpumask_clear(policy->cpus);
> > --> 1545 up_write(&policy->rwsem);
> >
> > Double unlock
>
> Thanks for pointing out this double unlock, do you want to fix it by yourself?
> Or I will fix it.
>
Could you fix it?
regards,
dan carpenter
next prev parent reply other threads:[~2022-05-06 7:22 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-05-04 15:17 [bug report] cpufreq: Fix possible race in cpufreq online error path Dan Carpenter
2022-05-06 6:43 ` Schspa Shi
2022-05-06 7:21 ` Dan Carpenter [this message]
2022-05-06 17:00 ` [PATCH] cpufreq: fix double unlock when cpufreq online Schspa Shi
2022-05-09 4:00 ` Viresh Kumar
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220506072146.GD4031@kadam \
--to=dan.carpenter@oracle.com \
--cc=linux-pm@vger.kernel.org \
--cc=schspa@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).