From: Dan Carpenter <dan.carpenter@oracle.com>
To: Schspa Shi <schspa@gmail.com>
Cc: linux-pm@vger.kernel.org
Subject: Re: [bug report] cpufreq: Fix possible race in cpufreq online error path
Date: Fri, 6 May 2022 10:21:46 +0300 [thread overview]
Message-ID: <20220506072146.GD4031@kadam> (raw)
In-Reply-To: <CAMA88TpC_7-S7HDnjE5VLS-h_y0pQw1Qb_Q-2DYsSDoZJLdUgQ@mail.gmail.com>
On Fri, May 06, 2022 at 02:43:59PM +0800, Schspa Shi wrote:
> > drivers/cpufreq/cpufreq.c
> > 1318 static int cpufreq_online(unsigned int cpu)
> > 1319 {
> > 1320 struct cpufreq_policy *policy;
> > 1321 bool new_policy;
> > 1322 unsigned long flags;
> > 1323 unsigned int j;
> > 1324 int ret;
> > 1325
> > 1326 pr_debug("%s: bringing CPU%u online\n", __func__, cpu);
> > 1327
> > 1328 /* Check if this CPU already has a policy to manage it */
> > 1329 policy = per_cpu(cpufreq_cpu_data, cpu);
> > 1330 if (policy) {
> > 1331 WARN_ON(!cpumask_test_cpu(cpu, policy->related_cpus));
> > 1332 if (!policy_is_inactive(policy))
> > 1333 return cpufreq_add_policy_cpu(policy, cpu);
> > 1334
> > 1335 /* This is the only online CPU for the policy. Start over. */
> > 1336 new_policy = false;
> > 1337 down_write(&policy->rwsem);
> > 1338 policy->cpu = cpu;
> > 1339 policy->governor = NULL;
> > 1340 up_write(&policy->rwsem);
> >
> > unlocked here
> >
> > 1341 } else {
> > 1342 new_policy = true;
> > 1343 policy = cpufreq_policy_alloc(cpu);
> > 1344 if (!policy)
> > 1345 return -ENOMEM;
> > 1346 }
> > 1347
> > 1348 if (!new_policy && cpufreq_driver->online) {
> > 1349 ret = cpufreq_driver->online(policy);
> > 1350 if (ret) {
> > 1351 pr_debug("%s: %d: initialization failed\n", __func__,
> > 1352 __LINE__);
> > 1353 goto out_exit_policy;
> >
> > goto
> >
> > 1354 }
> > 1355
> > 1356 /* Recover policy->cpus using related_cpus */
> > 1357 cpumask_copy(policy->cpus, policy->related_cpus);
> > 1358 } else {
> > 1359 cpumask_copy(policy->cpus, cpumask_of(cpu));
> > 1360
> > 1361 /*
> > 1362 * Call driver. From then on the cpufreq must be able
> > 1363 * to accept all calls to ->verify and ->setpolicy for this CPU.
> > 1364 */
> > 1365 ret = cpufreq_driver->init(policy);
> > 1366 if (ret) {
> > 1367 pr_debug("%s: %d: initialization failed\n", __func__,
> > 1368 __LINE__);
> > 1369 goto out_free_policy;
> > 1370 }
> > 1371
> > 1372 /*
> > 1373 * The initialization has succeeded and the policy is online.
> > 1374 * If there is a problem with its frequency table, take it
> > 1375 * offline and drop it.
> > 1376 */
> > 1377 ret = cpufreq_table_validate_and_sort(policy);
> > 1378 if (ret)
> > 1379 goto out_offline_policy;
> > 1380
> > 1381 /* related_cpus should at least include policy->cpus. */
> > 1382 cpumask_copy(policy->related_cpus, policy->cpus);
> > 1383 }
> > 1384
> > 1385 down_write(&policy->rwsem);
> > 1386 /*
> > 1387 * affected cpus must always be the one, which are online. We aren't
> > 1388 * managing offline cpus here.
> > 1389 */
> > 1390 cpumask_and(policy->cpus, policy->cpus, cpu_online_mask);
> > 1391
> > 1392 if (new_policy) {
> > 1393 for_each_cpu(j, policy->related_cpus) {
> > 1394 per_cpu(cpufreq_cpu_data, j) = policy;
> > 1395 add_cpu_dev_symlink(policy, j, get_cpu_device(j));
> > 1396 }
> > 1397
> > 1398 policy->min_freq_req = kzalloc(2 * sizeof(*policy->min_freq_req),
> > 1399 GFP_KERNEL);
> > 1400 if (!policy->min_freq_req) {
> > 1401 ret = -ENOMEM;
> > 1402 goto out_destroy_policy;
> > 1403 }
> > 1404
> > 1405 ret = freq_qos_add_request(&policy->constraints,
> > 1406 policy->min_freq_req, FREQ_QOS_MIN,
> > 1407 FREQ_QOS_MIN_DEFAULT_VALUE);
> > 1408 if (ret < 0) {
> > 1409 /*
> > 1410 * So we don't call freq_qos_remove_request() for an
> > 1411 * uninitialized request.
> > 1412 */
> > 1413 kfree(policy->min_freq_req);
> > 1414 policy->min_freq_req = NULL;
> > 1415 goto out_destroy_policy;
> > 1416 }
> > 1417
> > 1418 /*
> > 1419 * This must be initialized right here to avoid calling
> > 1420 * freq_qos_remove_request() on uninitialized request in case
> > 1421 * of errors.
> > 1422 */
> > 1423 policy->max_freq_req = policy->min_freq_req + 1;
> > 1424
> > 1425 ret = freq_qos_add_request(&policy->constraints,
> > 1426 policy->max_freq_req, FREQ_QOS_MAX,
> > 1427 FREQ_QOS_MAX_DEFAULT_VALUE);
> > 1428 if (ret < 0) {
> > 1429 policy->max_freq_req = NULL;
> > 1430 goto out_destroy_policy;
> > 1431 }
> > 1432
> > 1433 blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
> > 1434 CPUFREQ_CREATE_POLICY, policy);
> > 1435 }
> > 1436
> > 1437 if (cpufreq_driver->get && has_target()) {
> > 1438 policy->cur = cpufreq_driver->get(policy->cpu);
> > 1439 if (!policy->cur) {
> > 1440 ret = -EIO;
> > 1441 pr_err("%s: ->get() failed\n", __func__);
> > 1442 goto out_destroy_policy;
> > 1443 }
> > 1444 }
> > 1445
> > 1446 /*
> > 1447 * Sometimes boot loaders set CPU frequency to a value outside of
> > 1448 * frequency table present with cpufreq core. In such cases CPU might be
> > 1449 * unstable if it has to run on that frequency for long duration of time
> > 1450 * and so its better to set it to a frequency which is specified in
> > 1451 * freq-table. This also makes cpufreq stats inconsistent as
> > 1452 * cpufreq-stats would fail to register because current frequency of CPU
> > 1453 * isn't found in freq-table.
> > 1454 *
> > 1455 * Because we don't want this change to effect boot process badly, we go
> > 1456 * for the next freq which is >= policy->cur ('cur' must be set by now,
> > 1457 * otherwise we will end up setting freq to lowest of the table as 'cur'
> > 1458 * is initialized to zero).
> > 1459 *
> > 1460 * We are passing target-freq as "policy->cur - 1" otherwise
> > 1461 * __cpufreq_driver_target() would simply fail, as policy->cur will be
> > 1462 * equal to target-freq.
> > 1463 */
> > 1464 if ((cpufreq_driver->flags & CPUFREQ_NEED_INITIAL_FREQ_CHECK)
> > 1465 && has_target()) {
> > 1466 unsigned int old_freq = policy->cur;
> > 1467
> > 1468 /* Are we running at unknown frequency ? */
> > 1469 ret = cpufreq_frequency_table_get_index(policy, old_freq);
> > 1470 if (ret == -EINVAL) {
> > 1471 ret = __cpufreq_driver_target(policy, old_freq - 1,
> > 1472 CPUFREQ_RELATION_L);
> > 1473
> > 1474 /*
> > 1475 * Reaching here after boot in a few seconds may not
> > 1476 * mean that system will remain stable at "unknown"
> > 1477 * frequency for longer duration. Hence, a BUG_ON().
> > 1478 */
> > 1479 BUG_ON(ret);
> > 1480 pr_info("%s: CPU%d: Running at unlisted initial frequency: %u KHz, changing to: %u KHz\n",
> > 1481 __func__, policy->cpu, old_freq, policy->cur);
> > 1482 }
> > 1483 }
> > 1484
> > 1485 if (new_policy) {
> > 1486 ret = cpufreq_add_dev_interface(policy);
> > 1487 if (ret)
> > 1488 goto out_destroy_policy;
> > 1489
> > 1490 cpufreq_stats_create_table(policy);
> > 1491
> > 1492 write_lock_irqsave(&cpufreq_driver_lock, flags);
> > 1493 list_add(&policy->policy_list, &cpufreq_policy_list);
> > 1494 write_unlock_irqrestore(&cpufreq_driver_lock, flags);
> > 1495
> > 1496 /*
> > 1497 * Register with the energy model before
> > 1498 * sched_cpufreq_governor_change() is called, which will result
> > 1499 * in rebuilding of the sched domains, which should only be done
> > 1500 * once the energy model is properly initialized for the policy
> > 1501 * first.
> > 1502 *
> > 1503 * Also, this should be called before the policy is registered
> > 1504 * with cooling framework.
> > 1505 */
> > 1506 if (cpufreq_driver->register_em)
> > 1507 cpufreq_driver->register_em(policy);
> > 1508 }
> > 1509
> > 1510 ret = cpufreq_init_policy(policy);
> > 1511 if (ret) {
> > 1512 pr_err("%s: Failed to initialize policy for cpu: %d (%d)\n",
> > 1513 __func__, cpu, ret);
> > 1514 goto out_destroy_policy;
> > 1515 }
> > 1516
> > 1517 up_write(&policy->rwsem);
> > 1518
> > 1519 kobject_uevent(&policy->kobj, KOBJ_ADD);
> > 1520
> > 1521 /* Callback for handling stuff after policy is ready */
> > 1522 if (cpufreq_driver->ready)
> > 1523 cpufreq_driver->ready(policy);
> > 1524
> > 1525 if (cpufreq_thermal_control_enabled(cpufreq_driver))
> > 1526 policy->cdev = of_cpufreq_cooling_register(policy);
> > 1527
> > 1528 pr_debug("initialization complete\n");
> > 1529
> > 1530 return 0;
> > 1531
> > 1532 out_destroy_policy:
> > 1533 for_each_cpu(j, policy->real_cpus)
> > 1534 remove_cpu_dev_symlink(policy, get_cpu_device(j));
> > 1535
> > 1536 out_offline_policy:
> > 1537 if (cpufreq_driver->offline)
> > 1538 cpufreq_driver->offline(policy);
> > 1539
> > 1540 out_exit_policy:
> > 1541 if (cpufreq_driver->exit)
> > 1542 cpufreq_driver->exit(policy);
> > 1543
> > 1544 cpumask_clear(policy->cpus);
> > --> 1545 up_write(&policy->rwsem);
> >
> > Double unlock
>
> Thanks for pointing out this double unlock, do you want to fix it by yourself?
> Or I will fix it.
>
Could you fix it?
regards,
dan carpenter
next prev parent reply other threads:[~2022-05-06 7:22 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-05-04 15:17 [bug report] cpufreq: Fix possible race in cpufreq online error path Dan Carpenter
2022-05-06 6:43 ` Schspa Shi
2022-05-06 7:21 ` Dan Carpenter [this message]
2022-05-06 17:00 ` [PATCH] cpufreq: fix double unlock when cpufreq online Schspa Shi
2022-05-09 4:00 ` Viresh Kumar
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220506072146.GD4031@kadam \
--to=dan.carpenter@oracle.com \
--cc=linux-pm@vger.kernel.org \
--cc=schspa@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.