* [PATCH 06/18] powerpc/numa: remove unreachable topology timer code
From: Nathan Lynch @ 2020-06-12 5:12 UTC (permalink / raw)
To: linuxppc-dev; +Cc: tyreld, srikar, ego, npiggin, svaidy
In-Reply-To: <20200612051238.1007764-1-nathanl@linux.ibm.com>
Since vphn_enabled is always 0, we can stub out
timed_topology_update() and remove the code which becomes unreachable.
Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
---
arch/powerpc/mm/numa.c | 21 ---------------------
1 file changed, 21 deletions(-)
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 1b89bacb8975..6207297490a8 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1129,13 +1129,9 @@ struct topology_update_data {
int new_nid;
};
-#define TOPOLOGY_DEF_TIMER_SECS 60
-
static cpumask_t cpu_associativity_changes_mask;
static const int vphn_enabled;
static const int prrn_enabled;
-static void reset_topology_timer(void);
-static int topology_timer_secs = 1;
static int topology_inited;
/*
@@ -1143,15 +1139,6 @@ static int topology_inited;
*/
int timed_topology_update(int nsecs)
{
- if (vphn_enabled) {
- if (nsecs > 0)
- topology_timer_secs = nsecs;
- else
- topology_timer_secs = TOPOLOGY_DEF_TIMER_SECS;
-
- reset_topology_timer();
- }
-
return 0;
}
@@ -1438,14 +1425,6 @@ static void topology_schedule_update(void)
schedule_work(&topology_work);
}
-static struct timer_list topology_timer;
-
-static void reset_topology_timer(void)
-{
- if (vphn_enabled)
- mod_timer(&topology_timer, jiffies + topology_timer_secs * HZ);
-}
-
/*
* Start polling for associativity changes.
*/
--
2.25.4
^ permalink raw reply related
* [PATCH 05/18] powerpc/numa: make vphn_enabled, prrn_enabled flags const
From: Nathan Lynch @ 2020-06-12 5:12 UTC (permalink / raw)
To: linuxppc-dev; +Cc: tyreld, srikar, ego, npiggin, svaidy
In-Reply-To: <20200612051238.1007764-1-nathanl@linux.ibm.com>
Previous changes have made it so these flags are never changed;
enforce this by making them const.
Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
---
arch/powerpc/mm/numa.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 9e20f12e6caf..1b89bacb8975 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1132,8 +1132,8 @@ struct topology_update_data {
#define TOPOLOGY_DEF_TIMER_SECS 60
static cpumask_t cpu_associativity_changes_mask;
-static int vphn_enabled;
-static int prrn_enabled;
+static const int vphn_enabled;
+static const int prrn_enabled;
static void reset_topology_timer(void);
static int topology_timer_secs = 1;
static int topology_inited;
--
2.25.4
^ permalink raw reply related
* [PATCH 04/18] powerpc/numa: remove unreachable topology update code
From: Nathan Lynch @ 2020-06-12 5:12 UTC (permalink / raw)
To: linuxppc-dev; +Cc: tyreld, srikar, ego, npiggin, svaidy
In-Reply-To: <20200612051238.1007764-1-nathanl@linux.ibm.com>
Since the topology_updates_enabled flag is now always false, remove it
and the code which has become unreachable. This is the minimum change
that prevents 'defined but unused' warnings emitted by the compiler
after stubbing out the start/stop_topology_updates() functions.
Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
---
arch/powerpc/mm/numa.c | 149 +----------------------------------------
1 file changed, 2 insertions(+), 147 deletions(-)
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 34d95de77bdd..9e20f12e6caf 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -984,8 +984,6 @@ static int __init early_numa(char *p)
}
early_param("numa", early_numa);
-static const bool topology_updates_enabled;
-
#ifdef CONFIG_MEMORY_HOTPLUG
/*
* Find the node associated with a hot added memory section for
@@ -1133,7 +1131,6 @@ struct topology_update_data {
#define TOPOLOGY_DEF_TIMER_SECS 60
-static u8 vphn_cpu_change_counts[NR_CPUS][MAX_DISTANCE_REF_POINTS];
static cpumask_t cpu_associativity_changes_mask;
static int vphn_enabled;
static int prrn_enabled;
@@ -1158,63 +1155,6 @@ int timed_topology_update(int nsecs)
return 0;
}
-/*
- * Store the current values of the associativity change counters in the
- * hypervisor.
- */
-static void setup_cpu_associativity_change_counters(void)
-{
- int cpu;
-
- /* The VPHN feature supports a maximum of 8 reference points */
- BUILD_BUG_ON(MAX_DISTANCE_REF_POINTS > 8);
-
- for_each_possible_cpu(cpu) {
- int i;
- u8 *counts = vphn_cpu_change_counts[cpu];
- volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts;
-
- for (i = 0; i < distance_ref_points_depth; i++)
- counts[i] = hypervisor_counts[i];
- }
-}
-
-/*
- * The hypervisor maintains a set of 8 associativity change counters in
- * the VPA of each cpu that correspond to the associativity levels in the
- * ibm,associativity-reference-points property. When an associativity
- * level changes, the corresponding counter is incremented.
- *
- * Set a bit in cpu_associativity_changes_mask for each cpu whose home
- * node associativity levels have changed.
- *
- * Returns the number of cpus with unhandled associativity changes.
- */
-static int update_cpu_associativity_changes_mask(void)
-{
- int cpu;
- cpumask_t *changes = &cpu_associativity_changes_mask;
-
- for_each_possible_cpu(cpu) {
- int i, changed = 0;
- u8 *counts = vphn_cpu_change_counts[cpu];
- volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts;
-
- for (i = 0; i < distance_ref_points_depth; i++) {
- if (hypervisor_counts[i] != counts[i]) {
- counts[i] = hypervisor_counts[i];
- changed = 1;
- }
- }
- if (changed) {
- cpumask_or(changes, changes, cpu_sibling_mask(cpu));
- cpu = cpu_last_thread_sibling(cpu);
- }
- }
-
- return cpumask_weight(changes);
-}
-
/*
* Retrieve the new associativity information for a virtual processor's
* home node.
@@ -1498,16 +1438,6 @@ static void topology_schedule_update(void)
schedule_work(&topology_work);
}
-static void topology_timer_fn(struct timer_list *unused)
-{
- if (prrn_enabled && cpumask_weight(&cpu_associativity_changes_mask))
- topology_schedule_update();
- else if (vphn_enabled) {
- if (update_cpu_associativity_changes_mask() > 0)
- topology_schedule_update();
- reset_topology_timer();
- }
-}
static struct timer_list topology_timer;
static void reset_topology_timer(void)
@@ -1516,69 +1446,12 @@ static void reset_topology_timer(void)
mod_timer(&topology_timer, jiffies + topology_timer_secs * HZ);
}
-#ifdef CONFIG_SMP
-
-static int dt_update_callback(struct notifier_block *nb,
- unsigned long action, void *data)
-{
- struct of_reconfig_data *update = data;
- int rc = NOTIFY_DONE;
-
- switch (action) {
- case OF_RECONFIG_UPDATE_PROPERTY:
- if (of_node_is_type(update->dn, "cpu") &&
- !of_prop_cmp(update->prop->name, "ibm,associativity")) {
- u32 core_id;
- of_property_read_u32(update->dn, "reg", &core_id);
- rc = dlpar_cpu_readd(core_id);
- rc = NOTIFY_OK;
- }
- break;
- }
-
- return rc;
-}
-
-static struct notifier_block dt_update_nb = {
- .notifier_call = dt_update_callback,
-};
-
-#endif
-
/*
* Start polling for associativity changes.
*/
int start_topology_update(void)
{
- int rc = 0;
-
- if (!topology_updates_enabled)
- return 0;
-
- if (firmware_has_feature(FW_FEATURE_PRRN)) {
- if (!prrn_enabled) {
- prrn_enabled = 1;
-#ifdef CONFIG_SMP
- rc = of_reconfig_notifier_register(&dt_update_nb);
-#endif
- }
- }
- if (firmware_has_feature(FW_FEATURE_VPHN) &&
- lppaca_shared_proc(get_lppaca())) {
- if (!vphn_enabled) {
- vphn_enabled = 1;
- setup_cpu_associativity_change_counters();
- timer_setup(&topology_timer, topology_timer_fn,
- TIMER_DEFERRABLE);
- reset_topology_timer();
- }
- }
-
- pr_info("Starting topology update%s%s\n",
- (prrn_enabled ? " prrn_enabled" : ""),
- (vphn_enabled ? " vphn_enabled" : ""));
-
- return rc;
+ return 0;
}
/*
@@ -1586,25 +1459,7 @@ int start_topology_update(void)
*/
int stop_topology_update(void)
{
- int rc = 0;
-
- if (!topology_updates_enabled)
- return 0;
-
- if (prrn_enabled) {
- prrn_enabled = 0;
-#ifdef CONFIG_SMP
- rc = of_reconfig_notifier_unregister(&dt_update_nb);
-#endif
- }
- if (vphn_enabled) {
- vphn_enabled = 0;
- rc = del_timer_sync(&topology_timer);
- }
-
- pr_info("Stopping topology update\n");
-
- return rc;
+ return 0;
}
int prrn_is_enabled(void)
--
2.25.4
^ permalink raw reply related
* [PATCH 02/18] powerpc/rtas: don't online CPUs for partition suspend
From: Nathan Lynch @ 2020-06-12 5:12 UTC (permalink / raw)
To: linuxppc-dev; +Cc: tyreld, srikar, ego, npiggin, svaidy
In-Reply-To: <20200612051238.1007764-1-nathanl@linux.ibm.com>
Partition suspension, used for hibernation and migration, requires
that the OS place all but one of the LPAR's processor threads into one
of two states prior to calling the ibm,suspend-me RTAS function:
* the architected offline state (via RTAS stop-self); or
* the H_JOIN hcall, which does not return until the partition
resumes execution
Using H_CEDE as the offline mode, introduced by
commit 3aa565f53c39 ("powerpc/pseries: Add hooks to put the CPU into
an appropriate offline state"), means that any threads which are
offline from Linux's point of view must be moved to one of those two
states before a partition suspension can proceed.
This was eventually addressed in commit 120496ac2d2d ("powerpc: Bring
all threads online prior to migration/hibernation"), which added code
to temporarily bring up any offline processor threads so they can call
H_JOIN. Conceptually this is fine, but the implementation has had
multiple races with cpu hotplug operations initiated from user
space[1][2][3], the error handling is fragile, and it generates
user-visible cpu hotplug events which is a lot of noise for a platform
feature that's supposed to minimize disruption to workloads.
With commit 3aa565f53c39 ("powerpc/pseries: Add hooks to put the CPU
into an appropriate offline state") reverted, this code becomes
unnecessary, so remove it. Since any offline CPUs now are truly
offline from the platform's point of view, it is no longer necessary
to bring up CPUs only to have them call H_JOIN and then go offline
again upon resuming. Only active threads are required to call H_JOIN;
stopped threads can be left alone.
[1] commit a6717c01ddc2 ("powerpc/rtas: use device model APIs and
serialization during LPM")
[2] commit 9fb603050ffd ("powerpc/rtas: retry when cpu offline races
with suspend/migration")
[3] commit dfd718a2ed1f ("powerpc/rtas: Fix a potential race between
CPU-Offline & Migration")
Fixes: 120496ac2d2d ("powerpc: Bring all threads online prior to migration/hibernation")
Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
---
arch/powerpc/include/asm/rtas.h | 2 -
arch/powerpc/kernel/rtas.c | 122 +----------------------
arch/powerpc/platforms/pseries/suspend.c | 22 +---
3 files changed, 3 insertions(+), 143 deletions(-)
diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 014968f25f7e..0107d724e9da 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -253,8 +253,6 @@ extern int rtas_set_indicator_fast(int indicator, int index, int new_value);
extern void rtas_progress(char *s, unsigned short hex);
extern int rtas_suspend_cpu(struct rtas_suspend_me_data *data);
extern int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data);
-extern int rtas_online_cpus_mask(cpumask_var_t cpus);
-extern int rtas_offline_cpus_mask(cpumask_var_t cpus);
extern int rtas_ibm_suspend_me(u64 handle);
struct rtc_time;
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index a09eba03f180..806d554ce357 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -843,96 +843,6 @@ static void rtas_percpu_suspend_me(void *info)
__rtas_suspend_cpu((struct rtas_suspend_me_data *)info, 1);
}
-enum rtas_cpu_state {
- DOWN,
- UP,
-};
-
-#ifndef CONFIG_SMP
-static int rtas_cpu_state_change_mask(enum rtas_cpu_state state,
- cpumask_var_t cpus)
-{
- if (!cpumask_empty(cpus)) {
- cpumask_clear(cpus);
- return -EINVAL;
- } else
- return 0;
-}
-#else
-/* On return cpumask will be altered to indicate CPUs changed.
- * CPUs with states changed will be set in the mask,
- * CPUs with status unchanged will be unset in the mask. */
-static int rtas_cpu_state_change_mask(enum rtas_cpu_state state,
- cpumask_var_t cpus)
-{
- int cpu;
- int cpuret = 0;
- int ret = 0;
-
- if (cpumask_empty(cpus))
- return 0;
-
- for_each_cpu(cpu, cpus) {
- struct device *dev = get_cpu_device(cpu);
-
- switch (state) {
- case DOWN:
- cpuret = device_offline(dev);
- break;
- case UP:
- cpuret = device_online(dev);
- break;
- }
- if (cpuret < 0) {
- pr_debug("%s: cpu_%s for cpu#%d returned %d.\n",
- __func__,
- ((state == UP) ? "up" : "down"),
- cpu, cpuret);
- if (!ret)
- ret = cpuret;
- if (state == UP) {
- /* clear bits for unchanged cpus, return */
- cpumask_shift_right(cpus, cpus, cpu);
- cpumask_shift_left(cpus, cpus, cpu);
- break;
- } else {
- /* clear bit for unchanged cpu, continue */
- cpumask_clear_cpu(cpu, cpus);
- }
- }
- cond_resched();
- }
-
- return ret;
-}
-#endif
-
-int rtas_online_cpus_mask(cpumask_var_t cpus)
-{
- int ret;
-
- ret = rtas_cpu_state_change_mask(UP, cpus);
-
- if (ret) {
- cpumask_var_t tmp_mask;
-
- if (!alloc_cpumask_var(&tmp_mask, GFP_KERNEL))
- return ret;
-
- /* Use tmp_mask to preserve cpus mask from first failure */
- cpumask_copy(tmp_mask, cpus);
- rtas_offline_cpus_mask(tmp_mask);
- free_cpumask_var(tmp_mask);
- }
-
- return ret;
-}
-
-int rtas_offline_cpus_mask(cpumask_var_t cpus)
-{
- return rtas_cpu_state_change_mask(DOWN, cpus);
-}
-
int rtas_ibm_suspend_me(u64 handle)
{
long state;
@@ -940,8 +850,6 @@ int rtas_ibm_suspend_me(u64 handle)
unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
struct rtas_suspend_me_data data;
DECLARE_COMPLETION_ONSTACK(done);
- cpumask_var_t offline_mask;
- int cpuret;
if (!rtas_service_present("ibm,suspend-me"))
return -ENOSYS;
@@ -962,9 +870,6 @@ int rtas_ibm_suspend_me(u64 handle)
return -EIO;
}
- if (!alloc_cpumask_var(&offline_mask, GFP_KERNEL))
- return -ENOMEM;
-
atomic_set(&data.working, 0);
atomic_set(&data.done, 0);
atomic_set(&data.error, 0);
@@ -973,24 +878,8 @@ int rtas_ibm_suspend_me(u64 handle)
lock_device_hotplug();
- /* All present CPUs must be online */
- cpumask_andnot(offline_mask, cpu_present_mask, cpu_online_mask);
- cpuret = rtas_online_cpus_mask(offline_mask);
- if (cpuret) {
- pr_err("%s: Could not bring present CPUs online.\n", __func__);
- atomic_set(&data.error, cpuret);
- goto out;
- }
-
cpu_hotplug_disable();
- /* Check if we raced with a CPU-Offline Operation */
- if (!cpumask_equal(cpu_present_mask, cpu_online_mask)) {
- pr_info("%s: Raced against a concurrent CPU-Offline\n", __func__);
- atomic_set(&data.error, -EAGAIN);
- goto out_hotplug_enable;
- }
-
/* Call function on all CPUs. One of us will make the
* rtas call
*/
@@ -1001,18 +890,11 @@ int rtas_ibm_suspend_me(u64 handle)
if (atomic_read(&data.error) != 0)
printk(KERN_ERR "Error doing global join\n");
-out_hotplug_enable:
- cpu_hotplug_enable();
- /* Take down CPUs not online prior to suspend */
- cpuret = rtas_offline_cpus_mask(offline_mask);
- if (cpuret)
- pr_warn("%s: Could not restore CPUs to offline state.\n",
- __func__);
+ cpu_hotplug_enable();
-out:
unlock_device_hotplug();
- free_cpumask_var(offline_mask);
+
return atomic_read(&data.error);
}
diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c
index 0a24a5a185f0..f789693f61f4 100644
--- a/arch/powerpc/platforms/pseries/suspend.c
+++ b/arch/powerpc/platforms/pseries/suspend.c
@@ -132,15 +132,11 @@ static ssize_t store_hibernate(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
- cpumask_var_t offline_mask;
int rc;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- if (!alloc_cpumask_var(&offline_mask, GFP_KERNEL))
- return -ENOMEM;
-
stream_id = simple_strtoul(buf, NULL, 16);
do {
@@ -150,32 +146,16 @@ static ssize_t store_hibernate(struct device *dev,
} while (rc == -EAGAIN);
if (!rc) {
- /* All present CPUs must be online */
- cpumask_andnot(offline_mask, cpu_present_mask,
- cpu_online_mask);
- rc = rtas_online_cpus_mask(offline_mask);
- if (rc) {
- pr_err("%s: Could not bring present CPUs online.\n",
- __func__);
- goto out;
- }
-
stop_topology_update();
rc = pm_suspend(PM_SUSPEND_MEM);
start_topology_update();
-
- /* Take down CPUs not online prior to suspend */
- if (!rtas_offline_cpus_mask(offline_mask))
- pr_warn("%s: Could not restore CPUs to offline "
- "state.\n", __func__);
}
stream_id = 0;
if (!rc)
rc = count;
-out:
- free_cpumask_var(offline_mask);
+
return rc;
}
--
2.25.4
^ permalink raw reply related
* [PATCH 03/18] powerpc/numa: remove ability to enable topology updates
From: Nathan Lynch @ 2020-06-12 5:12 UTC (permalink / raw)
To: linuxppc-dev; +Cc: tyreld, srikar, ego, npiggin, svaidy
In-Reply-To: <20200612051238.1007764-1-nathanl@linux.ibm.com>
Remove the /proc/powerpc/topology_updates interface and the
topology_updates=on/off command line argument. The internal
topology_updates_enabled flag remains for now, but always false.
Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
---
arch/powerpc/mm/numa.c | 71 +-----------------------------------------
1 file changed, 1 insertion(+), 70 deletions(-)
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 9fcf2d195830..34d95de77bdd 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -984,27 +984,7 @@ static int __init early_numa(char *p)
}
early_param("numa", early_numa);
-/*
- * The platform can inform us through one of several mechanisms
- * (post-migration device tree updates, PRRN or VPHN) that the NUMA
- * assignment of a resource has changed. This controls whether we act
- * on that. Disabled by default.
- */
-static bool topology_updates_enabled;
-
-static int __init early_topology_updates(char *p)
-{
- if (!p)
- return 0;
-
- if (!strcmp(p, "on")) {
- pr_warn("Caution: enabling topology updates\n");
- topology_updates_enabled = true;
- }
-
- return 0;
-}
-early_param("topology_updates", early_topology_updates);
+static const bool topology_updates_enabled;
#ifdef CONFIG_MEMORY_HOTPLUG
/*
@@ -1632,52 +1612,6 @@ int prrn_is_enabled(void)
return prrn_enabled;
}
-static int topology_read(struct seq_file *file, void *v)
-{
- if (vphn_enabled || prrn_enabled)
- seq_puts(file, "on\n");
- else
- seq_puts(file, "off\n");
-
- return 0;
-}
-
-static int topology_open(struct inode *inode, struct file *file)
-{
- return single_open(file, topology_read, NULL);
-}
-
-static ssize_t topology_write(struct file *file, const char __user *buf,
- size_t count, loff_t *off)
-{
- char kbuf[4]; /* "on" or "off" plus null. */
- int read_len;
-
- read_len = count < 3 ? count : 3;
- if (copy_from_user(kbuf, buf, read_len))
- return -EINVAL;
-
- kbuf[read_len] = '\0';
-
- if (!strncmp(kbuf, "on", 2)) {
- topology_updates_enabled = true;
- start_topology_update();
- } else if (!strncmp(kbuf, "off", 3)) {
- stop_topology_update();
- topology_updates_enabled = false;
- } else
- return -EINVAL;
-
- return count;
-}
-
-static const struct proc_ops topology_proc_ops = {
- .proc_read = seq_read,
- .proc_write = topology_write,
- .proc_open = topology_open,
- .proc_release = single_release,
-};
-
static int topology_update_init(void)
{
start_topology_update();
@@ -1685,9 +1619,6 @@ static int topology_update_init(void)
if (vphn_enabled)
topology_schedule_update();
- if (!proc_create("powerpc/topology_updates", 0644, NULL, &topology_proc_ops))
- return -ENOMEM;
-
topology_inited = 1;
return 0;
}
--
2.25.4
^ permalink raw reply related
* [PATCH 00/18] remove extended cede offline mode and bogus topology update code
From: Nathan Lynch @ 2020-06-12 5:12 UTC (permalink / raw)
To: linuxppc-dev; +Cc: tyreld, srikar, ego, npiggin, svaidy
Two major parts to this series:
1. Removal of the extended cede offline mode for CPUs as well as the
partition suspend code which accommodates it by temporarily
onlining all CPUs prior to suspending the LPAR. This solves some
accounting problems, simplifies the pseries CPU hotplug code, and
greatly uncomplicates the existing partition suspend code, easing
a much-needed transition to the Linux suspend framework. The two
patches which make up this part have been posted before:
https://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=180718
and they are simply incorporated unchanged into the larger series
here, with Gautham's Reviewed-by added to patch #1.
2. Removal of the long-disabled "topology update" code, most of which
resides in mm/numa.c, but there are pieces in pseries and rtasd to
excise as well. This code was an attempt to honor changes in a
partition's NUMA properties arising from resource reassignments
which occur as part of a migration, VPHN change, or a Dynamic
Platform Optimizer operation. Its main technique is to remove and
re-add affected processors and LMBs and hope in vain that the
changes in cpu-node and physaddr-node relationships aren't
disruptive. We want to provide user space with some indication that
Linux's logical NUMA representation has become out of sync with the
platform's assignments, but we need to get this unusable stuff out
of the way before this code can sustain new features.
Nathan Lynch (18):
powerpc/pseries: remove cede offline state for CPUs
powerpc/rtas: don't online CPUs for partition suspend
powerpc/numa: remove ability to enable topology updates
powerpc/numa: remove unreachable topology update code
powerpc/numa: make vphn_enabled, prrn_enabled flags const
powerpc/numa: remove unreachable topology timer code
powerpc/numa: remove unreachable topology workqueue code
powerpc/numa: remove vphn_enabled and prrn_enabled internal flags
powerpc/numa: stub out numa_update_cpu_topology()
powerpc/numa: remove timed_topology_update()
powerpc/numa: remove start/stop_topology_update()
powerpc/rtasd: simplify handle_rtas_event(), emit message on events
powerpc/numa: remove prrn_is_enabled()
powerpc/numa: remove arch_update_cpu_topology
powerpc/pseries: remove prrn special case from DT update path
powerpc/pseries: remove memory "re-add" implementation
powerpc/pseries: remove dlpar_cpu_readd()
powerpc/pseries: remove obsolete memory hotplug DT notifier code
Documentation/core-api/cpu_hotplug.rst | 7 -
arch/powerpc/include/asm/rtas.h | 3 -
arch/powerpc/include/asm/topology.h | 27 -
arch/powerpc/kernel/rtas.c | 122 +----
arch/powerpc/kernel/rtasd.c | 28 +-
arch/powerpc/mm/numa.c | 486 ------------------
arch/powerpc/platforms/pseries/hotplug-cpu.c | 189 +------
.../platforms/pseries/hotplug-memory.c | 107 +---
arch/powerpc/platforms/pseries/mobility.c | 31 --
.../platforms/pseries/offline_states.h | 38 --
arch/powerpc/platforms/pseries/pmem.c | 1 -
arch/powerpc/platforms/pseries/smp.c | 28 +-
arch/powerpc/platforms/pseries/suspend.c | 27 +-
13 files changed, 22 insertions(+), 1072 deletions(-)
delete mode 100644 arch/powerpc/platforms/pseries/offline_states.h
--
2.25.4
^ permalink raw reply
* [PATCH 01/18] powerpc/pseries: remove cede offline state for CPUs
From: Nathan Lynch @ 2020-06-12 5:12 UTC (permalink / raw)
To: linuxppc-dev; +Cc: tyreld, srikar, ego, npiggin, svaidy
In-Reply-To: <20200612051238.1007764-1-nathanl@linux.ibm.com>
This effectively reverts commit 3aa565f53c39 ("powerpc/pseries: Add
hooks to put the CPU into an appropriate offline state"), which added
an offline mode for CPUs which uses the H_CEDE hcall instead of the
architected stop-self RTAS function in order to facilitate "folding"
of dedicated mode processors on PowerVM platforms to achieve energy
savings. This has been the default offline mode since its
introduction.
There's nothing about stop-self that would prevent the hypervisor from
achieving the energy savings available via H_CEDE, so the original
premise of this change appears to be flawed.
I also have encountered the claim that the transition to and from
ceded state is much faster than stop-self/start-cpu. Certainly we
would not want to use stop-self as an *idle* mode. That is what H_CEDE
is for. However, this difference is insignificant in the context of
Linux CPU hotplug, where the latency of an offline or online operation
on current systems is on the order of 100ms, mainly attributable to
all the various subsystems' cpuhp callbacks.
The cede offline mode also prevents accurate accounting, as discussed
before:
https://lore.kernel.org/linuxppc-dev/1571740391-3251-1-git-send-email-ego@linux.vnet.ibm.com/
Unconditionally use stop-self to offline processor threads. This is
the architected method for offlining CPUs on PAPR systems.
The "cede_offline" boot parameter is rendered obsolete.
Removing this code enables the removal of the partition suspend code
which temporarily onlines all present CPUs.
Fixes: 3aa565f53c39 ("powerpc/pseries: Add hooks to put the CPU into an appropriate offline state")
Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
---
Documentation/core-api/cpu_hotplug.rst | 7 -
arch/powerpc/platforms/pseries/hotplug-cpu.c | 170 ++----------------
.../platforms/pseries/offline_states.h | 38 ----
arch/powerpc/platforms/pseries/pmem.c | 1 -
arch/powerpc/platforms/pseries/smp.c | 28 +--
5 files changed, 15 insertions(+), 229 deletions(-)
delete mode 100644 arch/powerpc/platforms/pseries/offline_states.h
diff --git a/Documentation/core-api/cpu_hotplug.rst b/Documentation/core-api/cpu_hotplug.rst
index 4a50ab7817f7..b1ae1ac159cf 100644
--- a/Documentation/core-api/cpu_hotplug.rst
+++ b/Documentation/core-api/cpu_hotplug.rst
@@ -50,13 +50,6 @@ Command Line Switches
This option is limited to the X86 and S390 architecture.
-``cede_offline={"off","on"}``
- Use this option to disable/enable putting offlined processors to an extended
- ``H_CEDE`` state on supported pseries platforms. If nothing is specified,
- ``cede_offline`` is set to "on".
-
- This option is limited to the PowerPC architecture.
-
``cpu0_hotplug``
Allow to shutdown CPU0.
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 3e8cbfe7a80f..d4b346355bb9 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -35,54 +35,10 @@
#include <asm/topology.h>
#include "pseries.h"
-#include "offline_states.h"
/* This version can't take the spinlock, because it never returns */
static int rtas_stop_self_token = RTAS_UNKNOWN_SERVICE;
-static DEFINE_PER_CPU(enum cpu_state_vals, preferred_offline_state) =
- CPU_STATE_OFFLINE;
-static DEFINE_PER_CPU(enum cpu_state_vals, current_state) = CPU_STATE_OFFLINE;
-
-static enum cpu_state_vals default_offline_state = CPU_STATE_OFFLINE;
-
-static bool cede_offline_enabled __read_mostly = true;
-
-/*
- * Enable/disable cede_offline when available.
- */
-static int __init setup_cede_offline(char *str)
-{
- return (kstrtobool(str, &cede_offline_enabled) == 0);
-}
-
-__setup("cede_offline=", setup_cede_offline);
-
-enum cpu_state_vals get_cpu_current_state(int cpu)
-{
- return per_cpu(current_state, cpu);
-}
-
-void set_cpu_current_state(int cpu, enum cpu_state_vals state)
-{
- per_cpu(current_state, cpu) = state;
-}
-
-enum cpu_state_vals get_preferred_offline_state(int cpu)
-{
- return per_cpu(preferred_offline_state, cpu);
-}
-
-void set_preferred_offline_state(int cpu, enum cpu_state_vals state)
-{
- per_cpu(preferred_offline_state, cpu) = state;
-}
-
-void set_default_offline_state(int cpu)
-{
- per_cpu(preferred_offline_state, cpu) = default_offline_state;
-}
-
static void rtas_stop_self(void)
{
static struct rtas_args args;
@@ -101,9 +57,7 @@ static void rtas_stop_self(void)
static void pseries_mach_cpu_die(void)
{
- unsigned int cpu = smp_processor_id();
unsigned int hwcpu = hard_smp_processor_id();
- u8 cede_latency_hint = 0;
local_irq_disable();
idle_task_exit();
@@ -112,49 +66,6 @@ static void pseries_mach_cpu_die(void)
else
xics_teardown_cpu();
- if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
- set_cpu_current_state(cpu, CPU_STATE_INACTIVE);
- if (ppc_md.suspend_disable_cpu)
- ppc_md.suspend_disable_cpu();
-
- cede_latency_hint = 2;
-
- get_lppaca()->idle = 1;
- if (!lppaca_shared_proc(get_lppaca()))
- get_lppaca()->donate_dedicated_cpu = 1;
-
- while (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
- while (!prep_irq_for_idle()) {
- local_irq_enable();
- local_irq_disable();
- }
-
- extended_cede_processor(cede_latency_hint);
- }
-
- local_irq_disable();
-
- if (!lppaca_shared_proc(get_lppaca()))
- get_lppaca()->donate_dedicated_cpu = 0;
- get_lppaca()->idle = 0;
-
- if (get_preferred_offline_state(cpu) == CPU_STATE_ONLINE) {
- unregister_slb_shadow(hwcpu);
-
- hard_irq_disable();
- /*
- * Call to start_secondary_resume() will not return.
- * Kernel stack will be reset and start_secondary()
- * will be called to continue the online operation.
- */
- start_secondary_resume();
- }
- }
-
- /* Requested state is CPU_STATE_OFFLINE at this point */
- WARN_ON(get_preferred_offline_state(cpu) != CPU_STATE_OFFLINE);
-
- set_cpu_current_state(cpu, CPU_STATE_OFFLINE);
unregister_slb_shadow(hwcpu);
rtas_stop_self();
@@ -200,24 +111,13 @@ static void pseries_cpu_die(unsigned int cpu)
int cpu_status = 1;
unsigned int pcpu = get_hard_smp_processor_id(cpu);
- if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
- cpu_status = 1;
- for (tries = 0; tries < 5000; tries++) {
- if (get_cpu_current_state(cpu) == CPU_STATE_INACTIVE) {
- cpu_status = 0;
- break;
- }
- msleep(1);
- }
- } else if (get_preferred_offline_state(cpu) == CPU_STATE_OFFLINE) {
+ for (tries = 0; tries < 25; tries++) {
+ cpu_status = smp_query_cpu_stopped(pcpu);
+ if (cpu_status == QCSS_STOPPED ||
+ cpu_status == QCSS_HARDWARE_ERROR)
+ break;
+ cpu_relax();
- for (tries = 0; tries < 25; tries++) {
- cpu_status = smp_query_cpu_stopped(pcpu);
- if (cpu_status == QCSS_STOPPED ||
- cpu_status == QCSS_HARDWARE_ERROR)
- break;
- cpu_relax();
- }
}
if (cpu_status != 0) {
@@ -359,28 +259,15 @@ static int dlpar_offline_cpu(struct device_node *dn)
if (get_hard_smp_processor_id(cpu) != thread)
continue;
- if (get_cpu_current_state(cpu) == CPU_STATE_OFFLINE)
+ if (!cpu_online(cpu))
break;
- if (get_cpu_current_state(cpu) == CPU_STATE_ONLINE) {
- set_preferred_offline_state(cpu,
- CPU_STATE_OFFLINE);
- cpu_maps_update_done();
- timed_topology_update(1);
- rc = device_offline(get_cpu_device(cpu));
- if (rc)
- goto out;
- cpu_maps_update_begin();
- break;
- }
-
- /*
- * The cpu is in CPU_STATE_INACTIVE.
- * Upgrade it's state to CPU_STATE_OFFLINE.
- */
- set_preferred_offline_state(cpu, CPU_STATE_OFFLINE);
- WARN_ON(plpar_hcall_norets(H_PROD, thread) != H_SUCCESS);
- __cpu_die(cpu);
+ cpu_maps_update_done();
+ timed_topology_update(1);
+ rc = device_offline(get_cpu_device(cpu));
+ if (rc)
+ goto out;
+ cpu_maps_update_begin();
break;
}
if (cpu == num_possible_cpus()) {
@@ -414,8 +301,6 @@ static int dlpar_online_cpu(struct device_node *dn)
for_each_present_cpu(cpu) {
if (get_hard_smp_processor_id(cpu) != thread)
continue;
- BUG_ON(get_cpu_current_state(cpu)
- != CPU_STATE_OFFLINE);
cpu_maps_update_done();
timed_topology_update(1);
find_and_online_cpu_nid(cpu);
@@ -1013,27 +898,8 @@ static struct notifier_block pseries_smp_nb = {
.notifier_call = pseries_smp_notifier,
};
-#define MAX_CEDE_LATENCY_LEVELS 4
-#define CEDE_LATENCY_PARAM_LENGTH 10
-#define CEDE_LATENCY_PARAM_MAX_LENGTH \
- (MAX_CEDE_LATENCY_LEVELS * CEDE_LATENCY_PARAM_LENGTH * sizeof(char))
-#define CEDE_LATENCY_TOKEN 45
-
-static char cede_parameters[CEDE_LATENCY_PARAM_MAX_LENGTH];
-
-static int parse_cede_parameters(void)
-{
- memset(cede_parameters, 0, CEDE_LATENCY_PARAM_MAX_LENGTH);
- return rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
- NULL,
- CEDE_LATENCY_TOKEN,
- __pa(cede_parameters),
- CEDE_LATENCY_PARAM_MAX_LENGTH);
-}
-
static int __init pseries_cpu_hotplug_init(void)
{
- int cpu;
int qcss_tok;
#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
@@ -1056,16 +922,8 @@ static int __init pseries_cpu_hotplug_init(void)
smp_ops->cpu_die = pseries_cpu_die;
/* Processors can be added/removed only on LPAR */
- if (firmware_has_feature(FW_FEATURE_LPAR)) {
+ if (firmware_has_feature(FW_FEATURE_LPAR))
of_reconfig_notifier_register(&pseries_smp_nb);
- cpu_maps_update_begin();
- if (cede_offline_enabled && parse_cede_parameters() == 0) {
- default_offline_state = CPU_STATE_INACTIVE;
- for_each_online_cpu(cpu)
- set_default_offline_state(cpu);
- }
- cpu_maps_update_done();
- }
return 0;
}
diff --git a/arch/powerpc/platforms/pseries/offline_states.h b/arch/powerpc/platforms/pseries/offline_states.h
deleted file mode 100644
index 51414aee2862..000000000000
--- a/arch/powerpc/platforms/pseries/offline_states.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _OFFLINE_STATES_H_
-#define _OFFLINE_STATES_H_
-
-/* Cpu offline states go here */
-enum cpu_state_vals {
- CPU_STATE_OFFLINE,
- CPU_STATE_INACTIVE,
- CPU_STATE_ONLINE,
- CPU_MAX_OFFLINE_STATES
-};
-
-#ifdef CONFIG_HOTPLUG_CPU
-extern enum cpu_state_vals get_cpu_current_state(int cpu);
-extern void set_cpu_current_state(int cpu, enum cpu_state_vals state);
-extern void set_preferred_offline_state(int cpu, enum cpu_state_vals state);
-extern void set_default_offline_state(int cpu);
-#else
-static inline enum cpu_state_vals get_cpu_current_state(int cpu)
-{
- return CPU_STATE_ONLINE;
-}
-
-static inline void set_cpu_current_state(int cpu, enum cpu_state_vals state)
-{
-}
-
-static inline void set_preferred_offline_state(int cpu, enum cpu_state_vals state)
-{
-}
-
-static inline void set_default_offline_state(int cpu)
-{
-}
-#endif
-
-extern enum cpu_state_vals get_preferred_offline_state(int cpu);
-#endif
diff --git a/arch/powerpc/platforms/pseries/pmem.c b/arch/powerpc/platforms/pseries/pmem.c
index f860a897a9e0..f827de7087e9 100644
--- a/arch/powerpc/platforms/pseries/pmem.c
+++ b/arch/powerpc/platforms/pseries/pmem.c
@@ -24,7 +24,6 @@
#include <asm/topology.h>
#include "pseries.h"
-#include "offline_states.h"
static struct device_node *pmem_node;
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index 6891710833be..7ebacac03dc3 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -44,8 +44,6 @@
#include <asm/svm.h>
#include "pseries.h"
-#include "offline_states.h"
-
/*
* The Primary thread of each non-boot processor was started from the OF client
@@ -108,10 +106,7 @@ static inline int smp_startup_cpu(unsigned int lcpu)
/* Fixup atomic count: it exited inside IRQ handler. */
task_thread_info(paca_ptrs[lcpu]->__current)->preempt_count = 0;
-#ifdef CONFIG_HOTPLUG_CPU
- if (get_cpu_current_state(lcpu) == CPU_STATE_INACTIVE)
- goto out;
-#endif
+
/*
* If the RTAS start-cpu token does not exist then presume the
* cpu is already spinning.
@@ -126,9 +121,6 @@ static inline int smp_startup_cpu(unsigned int lcpu)
return 0;
}
-#ifdef CONFIG_HOTPLUG_CPU
-out:
-#endif
return 1;
}
@@ -143,10 +135,6 @@ static void smp_setup_cpu(int cpu)
vpa_init(cpu);
cpumask_clear_cpu(cpu, of_spin_mask);
-#ifdef CONFIG_HOTPLUG_CPU
- set_cpu_current_state(cpu, CPU_STATE_ONLINE);
- set_default_offline_state(cpu);
-#endif
}
static int smp_pSeries_kick_cpu(int nr)
@@ -163,20 +151,6 @@ static int smp_pSeries_kick_cpu(int nr)
* the processor will continue on to secondary_start
*/
paca_ptrs[nr]->cpu_start = 1;
-#ifdef CONFIG_HOTPLUG_CPU
- set_preferred_offline_state(nr, CPU_STATE_ONLINE);
-
- if (get_cpu_current_state(nr) == CPU_STATE_INACTIVE) {
- long rc;
- unsigned long hcpuid;
-
- hcpuid = get_hard_smp_processor_id(nr);
- rc = plpar_hcall_norets(H_PROD, hcpuid);
- if (rc != H_SUCCESS)
- printk(KERN_ERR "Error: Prod to wake up processor %d "
- "Ret= %ld\n", nr, rc);
- }
-#endif
return 0;
}
--
2.25.4
^ permalink raw reply related
* Re: [RFC PATCH v2 3/3] ASoC: fsl_asrc_dma: Reuse the dma channel if available in Back-End
From: Nicolin Chen @ 2020-06-12 5:02 UTC (permalink / raw)
To: Shengjiu Wang
Cc: Linux-ALSA, lars, Timur Tabi, Xiubo Li, Fabio Estevam,
Shengjiu Wang, Takashi Iwai, linux-kernel, Liam Girdwood,
Mark Brown, linuxppc-dev
In-Reply-To: <CAA+D8ANbr-nAzY436-AFPOzwGb2LBaZSb40VwoEQrYScKr=0NA@mail.gmail.com>
On Fri, Jun 12, 2020 at 10:17:08AM +0800, Shengjiu Wang wrote:
> > > diff --git a/sound/soc/fsl/fsl_asrc_common.h b/sound/soc/fsl/fsl_asrc_common.h
> > > + * @req_dma_chan_dev_to_dev: flag for release dev_to_dev chan
> >
> > Since we only have dma_request call for back-end only:
> > + * @req_dma_chan: flag to release back-end dma chan
>
> I prefer to use the description "flag to release dev_to_dev chan"
> because we won't release the dma chan of the back-end. if the chan
> is from the back-end, it is owned by the back-end component.
TBH, it just looks too long. But I wouldn't have problem if you
insist so.
> > > @@ -273,19 +299,21 @@ static int fsl_asrc_dma_hw_params(struct snd_soc_component *component,
> > > static int fsl_asrc_dma_hw_free(struct snd_soc_component *component,
> > > struct snd_pcm_substream *substream)
> > > {
> > > + bool tx = substream->stream == SNDRV_PCM_STREAM_PLAYBACK;
> > > struct snd_pcm_runtime *runtime = substream->runtime;
> > > struct fsl_asrc_pair *pair = runtime->private_data;
> > > + u8 dir = tx ? OUT : IN;
> > >
> > > snd_pcm_set_runtime_buffer(substream, NULL);
> > >
> > > - if (pair->dma_chan[IN])
> > > - dma_release_channel(pair->dma_chan[IN]);
> > > + if (pair->dma_chan[!dir])
> > > + dma_release_channel(pair->dma_chan[!dir]);
> > >
> > > - if (pair->dma_chan[OUT])
> > > - dma_release_channel(pair->dma_chan[OUT]);
> > > + if (pair->dma_chan[dir] && pair->req_dma_chan_dev_to_dev)
> > > + dma_release_channel(pair->dma_chan[dir]);
> >
> > Why we only apply this to one direction?
>
> if the chan is from the back-end, it is owned by the back-end
> component, so it should be released by the back-end component,
> not here. That's why I added the flag "req_dma_chan".
Ah...I forgot the IN and OUT is for front-end and back-end. The
naming isn't very good indeed. Probably we should add a line of
comments somewhere as a reminder.
Thanks
^ permalink raw reply
* [PATCH kernel] powerpc/xive: Ignore kmemleak false positives
From: Alexey Kardashevskiy @ 2020-06-12 4:33 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Alexey Kardashevskiy, Paul Mackerras
xive_native_provision_pages() allocates memory and passes the pointer to
OPAL so kmemleak cannot find the pointer usage in the kernel memory and
produces a false positive report (below) (even if the kernel did scan
OPAL memory, it is unable to deal with __pa() addresses anyway).
This silences the warning.
unreferenced object 0xc000200350c40000 (size 65536):
comm "qemu-system-ppc", pid 2725, jiffies 4294946414 (age 70776.530s)
hex dump (first 32 bytes):
02 00 00 00 50 00 00 00 00 00 00 00 00 00 00 00 ....P...........
01 00 08 07 00 00 00 00 00 00 00 00 00 00 00 00 ................
backtrace:
[<0000000081ff046c>] xive_native_alloc_vp_block+0x120/0x250
[<00000000d555d524>] kvmppc_xive_compute_vp_id+0x248/0x350 [kvm]
[<00000000d69b9c9f>] kvmppc_xive_connect_vcpu+0xc0/0x520 [kvm]
[<000000006acbc81c>] kvm_arch_vcpu_ioctl+0x308/0x580 [kvm]
[<0000000089c69580>] kvm_vcpu_ioctl+0x19c/0xae0 [kvm]
[<00000000902ae91e>] ksys_ioctl+0x184/0x1b0
[<00000000f3e68bd7>] sys_ioctl+0x48/0xb0
[<0000000001b2c127>] system_call_exception+0x124/0x1f0
[<00000000d2b2ee40>] system_call_common+0xe8/0x214
Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
Does kmemleak actually check the OPAL memory? Because if it did, we
would still have a warning as kmemleak does not trace __pa() addresses
anyway.
---
arch/powerpc/sysdev/xive/native.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
index 71b881e554fc..cb58ec7ce77a 100644
--- a/arch/powerpc/sysdev/xive/native.c
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -18,6 +18,7 @@
#include <linux/delay.h>
#include <linux/cpumask.h>
#include <linux/mm.h>
+#include <linux/kmemleak.h>
#include <asm/machdep.h>
#include <asm/prom.h>
@@ -647,6 +648,7 @@ static bool xive_native_provision_pages(void)
pr_err("Failed to allocate provisioning page\n");
return false;
}
+ kmemleak_ignore(p);
opal_xive_donate_page(chip, __pa(p));
}
return true;
--
2.17.1
^ permalink raw reply related
* Re: [RFC PATCH v2 3/3] ASoC: fsl_asrc_dma: Reuse the dma channel if available in Back-End
From: Shengjiu Wang @ 2020-06-12 2:17 UTC (permalink / raw)
To: Nicolin Chen
Cc: Linux-ALSA, lars, Timur Tabi, Xiubo Li, Fabio Estevam,
Shengjiu Wang, Takashi Iwai, linux-kernel, Liam Girdwood,
Mark Brown, linuxppc-dev
In-Reply-To: <20200612003103.GA28228@Asurada-Nvidia>
On Fri, Jun 12, 2020 at 8:33 AM Nicolin Chen <nicoleotsuka@gmail.com> wrote:
>
> On Wed, Jun 10, 2020 at 06:05:49PM +0800, Shengjiu Wang wrote:
> > The dma channel has been requested by Back-End cpu dai driver already.
> > If fsl_asrc_dma requests dma chan with same dma:tx symlink, then
> > there will be below warning with SDMA.
> >
> > [ 48.174236] fsl-esai-dai 2024000.esai: Cannot create DMA dma:tx symlink
> >
> > or with EDMA the request operation will fail for EDMA channel
> > can only be requested once.
> >
> > So If we can reuse the dma channel of Back-End, then the issue can be
> > fixed.
> >
> > In order to get the dma channel which is already requested in Back-End.
> > we use the exported two functions (snd_soc_lookup_component_nolocked
> > and soc_component_to_pcm). If we can get the dma channel, then reuse it,
> > if can't, then request a new one.
> >
> > Signed-off-by: Shengjiu Wang <shengjiu.wang@nxp.com>
> > ---
> > sound/soc/fsl/fsl_asrc_common.h | 2 ++
> > sound/soc/fsl/fsl_asrc_dma.c | 52 +++++++++++++++++++++++++--------
> > 2 files changed, 42 insertions(+), 12 deletions(-)
>
> > diff --git a/sound/soc/fsl/fsl_asrc_common.h b/sound/soc/fsl/fsl_asrc_common.h
> > index 77665b15c8db..09512bc79b80 100644
> > --- a/sound/soc/fsl/fsl_asrc_common.h
> > +++ b/sound/soc/fsl/fsl_asrc_common.h
> > @@ -32,6 +32,7 @@ enum asrc_pair_index {
> > * @dma_chan: inputer and output DMA channels
> > * @dma_data: private dma data
> > * @pos: hardware pointer position
> > + * @req_dma_chan_dev_to_dev: flag for release dev_to_dev chan
>
> Since we only have dma_request call for back-end only:
> + * @req_dma_chan: flag to release back-end dma chan
I prefer to use the description "flag to release dev_to_dev chan"
because we won't release the dma chan of the back-end. if the chan
is from the back-end, it is owned by the back-end component.
>
> > diff --git a/sound/soc/fsl/fsl_asrc_dma.c b/sound/soc/fsl/fsl_asrc_dma.c
> > index d6a3fc5f87e5..5ecb77d466d3 100644
> > --- a/sound/soc/fsl/fsl_asrc_dma.c
> > +++ b/sound/soc/fsl/fsl_asrc_dma.c
> > @@ -160,6 +161,9 @@ static int fsl_asrc_dma_hw_params(struct snd_soc_component *component,
> > substream_be = snd_soc_dpcm_get_substream(be, stream);
> > dma_params_be = snd_soc_dai_get_dma_data(dai, substream_be);
> > dev_be = dai->dev;
> > + component_be = snd_soc_lookup_component_nolocked(dev_be, SND_DMAENGINE_PCM_DRV_NAME);
> > + if (component_be)
> > + tmp_chan = soc_component_to_pcm(component_be)->chan[substream->stream];
>
> Should we use substream_be->stream or just substream->stream?
substream_be->stream should be better.
>
> And would be better to add these lines right before we really use
> tmp_chan because there's still some distance till it reaches that
> point. And would be better to have a line of comments too.
ok.
>
> > @@ -205,10 +209,14 @@ static int fsl_asrc_dma_hw_params(struct snd_soc_component *component,
> > */
> > if (!asrc->use_edma) {
> > /* Get DMA request of Back-End */
> > - tmp_chan = dma_request_slave_channel(dev_be, tx ? "tx" : "rx");
> > + if (!tmp_chan) {
> > + tmp_chan_new = dma_request_slave_channel(dev_be, tx ? "tx" : "rx");
> > + tmp_chan = tmp_chan_new;
>
> This is a bit confusing...though I finally got it :)
> So probably better to have a line of comments.
ok.
>
> > @@ -220,9 +228,26 @@ static int fsl_asrc_dma_hw_params(struct snd_soc_component *component,
> >
> > pair->dma_chan[dir] =
> > dma_request_channel(mask, filter, &pair->dma_data);
> > + pair->req_dma_chan_dev_to_dev = true;
> > } else {
> > - pair->dma_chan[dir] =
> > - asrc->get_dma_channel(pair, dir);
> > + /*
> > + * With EDMA, there is two dma channels can be used for p2p,
> > + * one is from ASRC, one is from another peripheral
> > + * (ESAI or SAI). Previously we select the dma channel of ASRC,
> > + * but find an issue for ideal ratio case, there is no control
> > + * for data copy speed, the speed is faster than sample
> > + * frequency.
> > + *
> > + * So we switch to use dma channel of peripheral (ESAI or SAI),
> > + * that copy speed of DMA is controlled by data consumption
> > + * speed in the peripheral FIFO.
> > + */
>
> This sounds like a different issue and should be fixed separately?
> If you prefer not to, better to move this one to commit log, other
> than having a changelog here, in my opinion.
ok, will move it in commit log.
>
> Since it no longer uses get_dma_channel() for EDMA case, we should
> update the comments at the top as well.
>
> > + pair->req_dma_chan_dev_to_dev = false;
> > + pair->dma_chan[dir] = tmp_chan;
> > + if (!pair->dma_chan[dir]) {
> > + pair->dma_chan[dir] = dma_request_slave_channel(dev_be, tx ? "tx" : "rx");
> > + pair->req_dma_chan_dev_to_dev = true;
> > + }
> > }
>
> Now there are some duplicated lines between these if-else routines, so
> combining my previous comments, we can do (sample change, not tested):
ok, will try yours.
>
> @@ -197,18 +199,29 @@ static int fsl_asrc_dma_hw_params(struct snd_soc_component *component,
> dma_cap_set(DMA_SLAVE, mask);
> dma_cap_set(DMA_CYCLIC, mask);
>
> + /*
> + * The Back-End device might have already requested a DMA channel,
> + * so try to reuse it first, and then request a new one upon NULL.
> + */
> + component_be = snd_soc_lookup_component_nolocked(dev_be, SND_DMAENGINE_PCM_DRV_NAME);
> + if (component_be) // should probably error out if !component_be?
> + tmp_chan = be_chan = soc_component_to_pcm(component_be)->chan[substream->stream];
> + if (!tmp_chan)
> + tmp_chan = dma_request_slave_channel(dev_be, tx ? "tx" : "rx");
> +
> /*
> * An EDMA DEV_TO_DEV channel is fixed and bound with DMA event of each
> * peripheral, unlike SDMA channel that is allocated dynamically. So no
> - * need to configure dma_request and dma_request2, but get dma_chan via
> - * dma_request_slave_channel directly with dma name of Front-End device
> + * need to configure dma_request and dma_request2, but get dma_chan of
> + * Back-End device directly via dma_request_slave_channel.
> */
> if (!asrc->use_edma) {
> /* Get DMA request of Back-End */
> - tmp_chan = dma_request_slave_channel(dev_be, tx ? "tx" : "rx");
> tmp_data = tmp_chan->private;
> pair->dma_data.dma_request = tmp_data->dma_request;
> - dma_release_channel(tmp_chan);
> + /* Do not release tmp_chan if we are reusing the Back-End one */
> + if (!be_chan)
> + dma_release_channel(tmp_chan);
>
> /* Get DMA request of Front-End */
> tmp_chan = asrc->get_dma_channel(pair, dir);
> @@ -220,9 +233,11 @@ static int fsl_asrc_dma_hw_params(struct snd_soc_component *component,
>
> pair->dma_chan[dir] =
> dma_request_channel(mask, filter, &pair->dma_data);
> + pair->req_dma_chan = true;
> } else {
> - pair->dma_chan[dir] =
> - asrc->get_dma_channel(pair, dir);
> + pair->dma_chan[dir] = tmp_chan;
> + /* Do not flag to release if we are reusing the Back-End one */
> + pair->req_dma_chan = !be_chan;
> }
>
> if (!pair->dma_chan[dir]) {
>
> > @@ -273,19 +299,21 @@ static int fsl_asrc_dma_hw_params(struct snd_soc_component *component,
> > static int fsl_asrc_dma_hw_free(struct snd_soc_component *component,
> > struct snd_pcm_substream *substream)
> > {
> > + bool tx = substream->stream == SNDRV_PCM_STREAM_PLAYBACK;
> > struct snd_pcm_runtime *runtime = substream->runtime;
> > struct fsl_asrc_pair *pair = runtime->private_data;
> > + u8 dir = tx ? OUT : IN;
> >
> > snd_pcm_set_runtime_buffer(substream, NULL);
> >
> > - if (pair->dma_chan[IN])
> > - dma_release_channel(pair->dma_chan[IN]);
> > + if (pair->dma_chan[!dir])
> > + dma_release_channel(pair->dma_chan[!dir]);
> >
> > - if (pair->dma_chan[OUT])
> > - dma_release_channel(pair->dma_chan[OUT]);
> > + if (pair->dma_chan[dir] && pair->req_dma_chan_dev_to_dev)
> > + dma_release_channel(pair->dma_chan[dir]);
>
> Why we only apply this to one direction?
if the chan is from the back-end, it is owned by the back-end
component, so it should be released by the back-end component,
not here. That's why I added the flag "req_dma_chan".
^ permalink raw reply
* Re: [RFC PATCH v2 3/3] ASoC: fsl_asrc_dma: Reuse the dma channel if available in Back-End
From: Nicolin Chen @ 2020-06-12 0:31 UTC (permalink / raw)
To: Shengjiu Wang
Cc: alsa-devel, lars, timur, Xiubo.Lee, linux-kernel, linuxppc-dev,
lgirdwood, tiwai, broonie, perex, festevam
In-Reply-To: <0473d4191ae04ab711d63c5c875e47f45f598137.1591783089.git.shengjiu.wang@nxp.com>
On Wed, Jun 10, 2020 at 06:05:49PM +0800, Shengjiu Wang wrote:
> The dma channel has been requested by Back-End cpu dai driver already.
> If fsl_asrc_dma requests dma chan with same dma:tx symlink, then
> there will be below warning with SDMA.
>
> [ 48.174236] fsl-esai-dai 2024000.esai: Cannot create DMA dma:tx symlink
>
> or with EDMA the request operation will fail for EDMA channel
> can only be requested once.
>
> So If we can reuse the dma channel of Back-End, then the issue can be
> fixed.
>
> In order to get the dma channel which is already requested in Back-End.
> we use the exported two functions (snd_soc_lookup_component_nolocked
> and soc_component_to_pcm). If we can get the dma channel, then reuse it,
> if can't, then request a new one.
>
> Signed-off-by: Shengjiu Wang <shengjiu.wang@nxp.com>
> ---
> sound/soc/fsl/fsl_asrc_common.h | 2 ++
> sound/soc/fsl/fsl_asrc_dma.c | 52 +++++++++++++++++++++++++--------
> 2 files changed, 42 insertions(+), 12 deletions(-)
> diff --git a/sound/soc/fsl/fsl_asrc_common.h b/sound/soc/fsl/fsl_asrc_common.h
> index 77665b15c8db..09512bc79b80 100644
> --- a/sound/soc/fsl/fsl_asrc_common.h
> +++ b/sound/soc/fsl/fsl_asrc_common.h
> @@ -32,6 +32,7 @@ enum asrc_pair_index {
> * @dma_chan: inputer and output DMA channels
> * @dma_data: private dma data
> * @pos: hardware pointer position
> + * @req_dma_chan_dev_to_dev: flag for release dev_to_dev chan
Since we only have dma_request call for back-end only:
+ * @req_dma_chan: flag to release back-end dma chan
> diff --git a/sound/soc/fsl/fsl_asrc_dma.c b/sound/soc/fsl/fsl_asrc_dma.c
> index d6a3fc5f87e5..5ecb77d466d3 100644
> --- a/sound/soc/fsl/fsl_asrc_dma.c
> +++ b/sound/soc/fsl/fsl_asrc_dma.c
> @@ -160,6 +161,9 @@ static int fsl_asrc_dma_hw_params(struct snd_soc_component *component,
> substream_be = snd_soc_dpcm_get_substream(be, stream);
> dma_params_be = snd_soc_dai_get_dma_data(dai, substream_be);
> dev_be = dai->dev;
> + component_be = snd_soc_lookup_component_nolocked(dev_be, SND_DMAENGINE_PCM_DRV_NAME);
> + if (component_be)
> + tmp_chan = soc_component_to_pcm(component_be)->chan[substream->stream];
Should we use substream_be->stream or just substream->stream?
And would be better to add these lines right before we really use
tmp_chan because there's still some distance till it reaches that
point. And would be better to have a line of comments too.
> @@ -205,10 +209,14 @@ static int fsl_asrc_dma_hw_params(struct snd_soc_component *component,
> */
> if (!asrc->use_edma) {
> /* Get DMA request of Back-End */
> - tmp_chan = dma_request_slave_channel(dev_be, tx ? "tx" : "rx");
> + if (!tmp_chan) {
> + tmp_chan_new = dma_request_slave_channel(dev_be, tx ? "tx" : "rx");
> + tmp_chan = tmp_chan_new;
This is a bit confusing...though I finally got it :)
So probably better to have a line of comments.
> @@ -220,9 +228,26 @@ static int fsl_asrc_dma_hw_params(struct snd_soc_component *component,
>
> pair->dma_chan[dir] =
> dma_request_channel(mask, filter, &pair->dma_data);
> + pair->req_dma_chan_dev_to_dev = true;
> } else {
> - pair->dma_chan[dir] =
> - asrc->get_dma_channel(pair, dir);
> + /*
> + * With EDMA, there is two dma channels can be used for p2p,
> + * one is from ASRC, one is from another peripheral
> + * (ESAI or SAI). Previously we select the dma channel of ASRC,
> + * but find an issue for ideal ratio case, there is no control
> + * for data copy speed, the speed is faster than sample
> + * frequency.
> + *
> + * So we switch to use dma channel of peripheral (ESAI or SAI),
> + * that copy speed of DMA is controlled by data consumption
> + * speed in the peripheral FIFO.
> + */
This sounds like a different issue and should be fixed separately?
If you prefer not to, better to move this one to commit log, other
than having a changelog here, in my opinion.
Since it no longer uses get_dma_channel() for EDMA case, we should
update the comments at the top as well.
> + pair->req_dma_chan_dev_to_dev = false;
> + pair->dma_chan[dir] = tmp_chan;
> + if (!pair->dma_chan[dir]) {
> + pair->dma_chan[dir] = dma_request_slave_channel(dev_be, tx ? "tx" : "rx");
> + pair->req_dma_chan_dev_to_dev = true;
> + }
> }
Now there are some duplicated lines between these if-else routines, so
combining my previous comments, we can do (sample change, not tested):
@@ -197,18 +199,29 @@ static int fsl_asrc_dma_hw_params(struct snd_soc_component *component,
dma_cap_set(DMA_SLAVE, mask);
dma_cap_set(DMA_CYCLIC, mask);
+ /*
+ * The Back-End device might have already requested a DMA channel,
+ * so try to reuse it first, and then request a new one upon NULL.
+ */
+ component_be = snd_soc_lookup_component_nolocked(dev_be, SND_DMAENGINE_PCM_DRV_NAME);
+ if (component_be) // should probably error out if !component_be?
+ tmp_chan = be_chan = soc_component_to_pcm(component_be)->chan[substream->stream];
+ if (!tmp_chan)
+ tmp_chan = dma_request_slave_channel(dev_be, tx ? "tx" : "rx");
+
/*
* An EDMA DEV_TO_DEV channel is fixed and bound with DMA event of each
* peripheral, unlike SDMA channel that is allocated dynamically. So no
- * need to configure dma_request and dma_request2, but get dma_chan via
- * dma_request_slave_channel directly with dma name of Front-End device
+ * need to configure dma_request and dma_request2, but get dma_chan of
+ * Back-End device directly via dma_request_slave_channel.
*/
if (!asrc->use_edma) {
/* Get DMA request of Back-End */
- tmp_chan = dma_request_slave_channel(dev_be, tx ? "tx" : "rx");
tmp_data = tmp_chan->private;
pair->dma_data.dma_request = tmp_data->dma_request;
- dma_release_channel(tmp_chan);
+ /* Do not release tmp_chan if we are reusing the Back-End one */
+ if (!be_chan)
+ dma_release_channel(tmp_chan);
/* Get DMA request of Front-End */
tmp_chan = asrc->get_dma_channel(pair, dir);
@@ -220,9 +233,11 @@ static int fsl_asrc_dma_hw_params(struct snd_soc_component *component,
pair->dma_chan[dir] =
dma_request_channel(mask, filter, &pair->dma_data);
+ pair->req_dma_chan = true;
} else {
- pair->dma_chan[dir] =
- asrc->get_dma_channel(pair, dir);
+ pair->dma_chan[dir] = tmp_chan;
+ /* Do not flag to release if we are reusing the Back-End one */
+ pair->req_dma_chan = !be_chan;
}
if (!pair->dma_chan[dir]) {
> @@ -273,19 +299,21 @@ static int fsl_asrc_dma_hw_params(struct snd_soc_component *component,
> static int fsl_asrc_dma_hw_free(struct snd_soc_component *component,
> struct snd_pcm_substream *substream)
> {
> + bool tx = substream->stream == SNDRV_PCM_STREAM_PLAYBACK;
> struct snd_pcm_runtime *runtime = substream->runtime;
> struct fsl_asrc_pair *pair = runtime->private_data;
> + u8 dir = tx ? OUT : IN;
>
> snd_pcm_set_runtime_buffer(substream, NULL);
>
> - if (pair->dma_chan[IN])
> - dma_release_channel(pair->dma_chan[IN]);
> + if (pair->dma_chan[!dir])
> + dma_release_channel(pair->dma_chan[!dir]);
>
> - if (pair->dma_chan[OUT])
> - dma_release_channel(pair->dma_chan[OUT]);
> + if (pair->dma_chan[dir] && pair->req_dma_chan_dev_to_dev)
> + dma_release_channel(pair->dma_chan[dir]);
Why we only apply this to one direction?
^ permalink raw reply
* Re: [PATCH v4 1/2] powerpc/uaccess: Implement unsafe_put_user() using 'asm goto'
From: Segher Boessenkool @ 2020-06-11 23:52 UTC (permalink / raw)
To: Nick Desaulniers
Cc: christophe.leroy, patch-notifications, linux-kernel, npiggin,
clang-built-linux, paulus, linuxppc-dev
In-Reply-To: <20200611224355.71174-1-ndesaulniers@google.com>
On Thu, Jun 11, 2020 at 03:43:55PM -0700, Nick Desaulniers wrote:
> Segher, Cristophe, I suspect Clang is missing support for the %L and %U
> output templates [1].
The arch/powerpc kernel first used the %U output modifier in 0c176fa80fdf
(from 2016), and %L in b8b572e1015f (2008). include/asm-ppc (and ppc64)
have had %U since 2005 (1da177e4c3f4), and %L as well (0c541b4406a6).
> I've implemented support for some of these before
> in Clang via the documentation at [2], but these seem to be machine
> specific?
Yes, almost all output modifiers are. Only %l, %a, %n, and part of %c
are generic (and %% and %= and on some targets, %{, %|, %}).
> Can you please point me to documentation/unit tests/source for
> these so that I can figure out what they should be doing, and look into
> implementing them in Clang?
The PowerPC part of
https://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html#Machine-Constraints
(sorry, no anchor) documents %U.
Traditionally the source code is the documentation for this. The code
here starts with the comment
/* Write second word of DImode or DFmode reference. Works on register
or non-indexed memory only. */
(which is very out-of-date itself, it works fine for e.g. TImode as well,
but alas).
Unit tests are completely unsuitable for most compiler things like this.
The source code is gcc/config/rs6000/rs6000.c, easiest is to search for
'L' (with those quotes). Function print_operand.
HtH,
Segher
^ permalink raw reply
* Re: [PATCH v4 1/2] powerpc/uaccess: Implement unsafe_put_user() using 'asm goto'
From: Nick Desaulniers @ 2020-06-11 22:43 UTC (permalink / raw)
To: patch-notifications, christophe.leroy, segher
Cc: linux-kernel, npiggin, clang-built-linux, paulus, linuxppc-dev
In-Reply-To: <49YBKY13Szz9sT4@ozlabs.org>
Hello! It seems this patch broke our ppc32 builds, and we had to
disable them [0]. :(
From what I can tell, though Michael mentioned this was merged on May
29, but our CI of -next was green for ppc32 until June 4, then mainline
went red June 6. So this patch only got 2 days of soak time before the
merge window opened.
A general issue with the -next workflow seems to be that patches get
different amounts of soak time. For higher risk patches like this one,
can I please ask that they be help back a release if close to the merge
window?
Segher, Cristophe, I suspect Clang is missing support for the %L and %U
output templates [1]. I've implemented support for some of these before
in Clang via the documentation at [2], but these seem to be machine
specific? Can you please point me to documentation/unit tests/source for
these so that I can figure out what they should be doing, and look into
implementing them in Clang?
[0] https://github.com/ClangBuiltLinux/continuous-integration/pull/279
[1] https://bugs.llvm.org/show_bug.cgi?id=46186
[2]
https://gcc.gnu.org/onlinedocs/gccint/Output-Template.html#Output-Template
^ permalink raw reply
* [PATCH v3 2/2] powerpc: configs: remove CMDLINE_BOOL
From: Chris Packham @ 2020-06-11 22:42 UTC (permalink / raw)
To: mpe, benh, paulus, christophe.leroy
Cc: Chris Packham, linuxppc-dev, linux-kernel
In-Reply-To: <20200611224220.25066-1-chris.packham@alliedtelesis.co.nz>
Regenerate defconfigs to remove CONFIG_CMDLINE_BOOL and the default
CONFIG_CMDLINE where applicable.
Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
---
Changes in v3:
- new
arch/powerpc/configs/44x/akebono_defconfig | 2 --
arch/powerpc/configs/44x/arches_defconfig | 2 --
arch/powerpc/configs/44x/bamboo_defconfig | 2 --
arch/powerpc/configs/44x/bluestone_defconfig | 2 --
arch/powerpc/configs/44x/canyonlands_defconfig | 2 --
arch/powerpc/configs/44x/currituck_defconfig | 2 --
arch/powerpc/configs/44x/eiger_defconfig | 2 --
arch/powerpc/configs/44x/fsp2_defconfig | 1 -
arch/powerpc/configs/44x/icon_defconfig | 2 --
arch/powerpc/configs/44x/iss476-smp_defconfig | 1 -
arch/powerpc/configs/44x/katmai_defconfig | 2 --
arch/powerpc/configs/44x/rainier_defconfig | 2 --
arch/powerpc/configs/44x/redwood_defconfig | 2 --
arch/powerpc/configs/44x/sam440ep_defconfig | 2 --
arch/powerpc/configs/44x/sequoia_defconfig | 2 --
arch/powerpc/configs/44x/taishan_defconfig | 2 --
arch/powerpc/configs/44x/warp_defconfig | 1 -
arch/powerpc/configs/holly_defconfig | 1 -
arch/powerpc/configs/mvme5100_defconfig | 3 +--
arch/powerpc/configs/ps3_defconfig | 2 --
arch/powerpc/configs/skiroot_defconfig | 1 -
arch/powerpc/configs/storcenter_defconfig | 1 -
22 files changed, 1 insertion(+), 38 deletions(-)
diff --git a/arch/powerpc/configs/44x/akebono_defconfig b/arch/powerpc/configs/44x/akebono_defconfig
index 7705a5c3f4ea..60d5fa2c3b93 100644
--- a/arch/powerpc/configs/44x/akebono_defconfig
+++ b/arch/powerpc/configs/44x/akebono_defconfig
@@ -19,8 +19,6 @@ CONFIG_HIGHMEM=y
CONFIG_HZ_100=y
CONFIG_IRQ_ALL_CPUS=y
# CONFIG_COMPACTION is not set
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
# CONFIG_SUSPEND is not set
CONFIG_NET=y
CONFIG_PACKET=y
diff --git a/arch/powerpc/configs/44x/arches_defconfig b/arch/powerpc/configs/44x/arches_defconfig
index 82c6f49b8dcb..41d04e70d4fb 100644
--- a/arch/powerpc/configs/44x/arches_defconfig
+++ b/arch/powerpc/configs/44x/arches_defconfig
@@ -11,8 +11,6 @@ CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_EBONY is not set
CONFIG_ARCHES=y
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
diff --git a/arch/powerpc/configs/44x/bamboo_defconfig b/arch/powerpc/configs/44x/bamboo_defconfig
index 679213214a75..acbce718eaa8 100644
--- a/arch/powerpc/configs/44x/bamboo_defconfig
+++ b/arch/powerpc/configs/44x/bamboo_defconfig
@@ -9,8 +9,6 @@ CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
CONFIG_BAMBOO=y
# CONFIG_EBONY is not set
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
diff --git a/arch/powerpc/configs/44x/bluestone_defconfig b/arch/powerpc/configs/44x/bluestone_defconfig
index 8006a5728afd..37088f250c9e 100644
--- a/arch/powerpc/configs/44x/bluestone_defconfig
+++ b/arch/powerpc/configs/44x/bluestone_defconfig
@@ -11,8 +11,6 @@ CONFIG_EXPERT=y
# CONFIG_COMPAT_BRK is not set
CONFIG_BLUESTONE=y
# CONFIG_EBONY is not set
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
diff --git a/arch/powerpc/configs/44x/canyonlands_defconfig b/arch/powerpc/configs/44x/canyonlands_defconfig
index ccc14eb7a2f1..61776ade572b 100644
--- a/arch/powerpc/configs/44x/canyonlands_defconfig
+++ b/arch/powerpc/configs/44x/canyonlands_defconfig
@@ -11,8 +11,6 @@ CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_EBONY is not set
CONFIG_CANYONLANDS=y
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
diff --git a/arch/powerpc/configs/44x/currituck_defconfig b/arch/powerpc/configs/44x/currituck_defconfig
index be76e066df01..34c86b3abecb 100644
--- a/arch/powerpc/configs/44x/currituck_defconfig
+++ b/arch/powerpc/configs/44x/currituck_defconfig
@@ -17,8 +17,6 @@ CONFIG_HIGHMEM=y
CONFIG_HZ_100=y
CONFIG_MATH_EMULATION=y
CONFIG_IRQ_ALL_CPUS=y
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
# CONFIG_SUSPEND is not set
CONFIG_NET=y
CONFIG_PACKET=y
diff --git a/arch/powerpc/configs/44x/eiger_defconfig b/arch/powerpc/configs/44x/eiger_defconfig
index 1abaa63e067f..509300f400e2 100644
--- a/arch/powerpc/configs/44x/eiger_defconfig
+++ b/arch/powerpc/configs/44x/eiger_defconfig
@@ -10,8 +10,6 @@ CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_EBONY is not set
CONFIG_EIGER=y
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
CONFIG_PCIEPORTBUS=y
# CONFIG_PCIEASPM is not set
CONFIG_NET=y
diff --git a/arch/powerpc/configs/44x/fsp2_defconfig b/arch/powerpc/configs/44x/fsp2_defconfig
index e67fc041ca3e..30845ce0885a 100644
--- a/arch/powerpc/configs/44x/fsp2_defconfig
+++ b/arch/powerpc/configs/44x/fsp2_defconfig
@@ -28,7 +28,6 @@ CONFIG_476FPE_ERR46=y
CONFIG_SWIOTLB=y
CONFIG_KEXEC=y
CONFIG_CRASH_DUMP=y
-CONFIG_CMDLINE_BOOL=y
CONFIG_CMDLINE="ip=on rw"
# CONFIG_SUSPEND is not set
# CONFIG_PCI is not set
diff --git a/arch/powerpc/configs/44x/icon_defconfig b/arch/powerpc/configs/44x/icon_defconfig
index 7d7ff84c8200..930948a1da76 100644
--- a/arch/powerpc/configs/44x/icon_defconfig
+++ b/arch/powerpc/configs/44x/icon_defconfig
@@ -9,8 +9,6 @@ CONFIG_MODULE_UNLOAD=y
# CONFIG_EBONY is not set
CONFIG_ICON=y
CONFIG_HIGHMEM=y
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
CONFIG_PCIEPORTBUS=y
# CONFIG_PCIEASPM is not set
CONFIG_NET=y
diff --git a/arch/powerpc/configs/44x/iss476-smp_defconfig b/arch/powerpc/configs/44x/iss476-smp_defconfig
index fb5c73a29bf4..2c3834eebca3 100644
--- a/arch/powerpc/configs/44x/iss476-smp_defconfig
+++ b/arch/powerpc/configs/44x/iss476-smp_defconfig
@@ -17,7 +17,6 @@ CONFIG_ISS4xx=y
CONFIG_HZ_100=y
CONFIG_MATH_EMULATION=y
CONFIG_IRQ_ALL_CPUS=y
-CONFIG_CMDLINE_BOOL=y
CONFIG_CMDLINE="root=/dev/issblk0"
# CONFIG_PCI is not set
CONFIG_ADVANCED_OPTIONS=y
diff --git a/arch/powerpc/configs/44x/katmai_defconfig b/arch/powerpc/configs/44x/katmai_defconfig
index c6dc1445fc04..1a0f1c3e0ee9 100644
--- a/arch/powerpc/configs/44x/katmai_defconfig
+++ b/arch/powerpc/configs/44x/katmai_defconfig
@@ -9,8 +9,6 @@ CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_EBONY is not set
CONFIG_KATMAI=y
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
diff --git a/arch/powerpc/configs/44x/rainier_defconfig b/arch/powerpc/configs/44x/rainier_defconfig
index c83ad03182df..6dd67de06a0b 100644
--- a/arch/powerpc/configs/44x/rainier_defconfig
+++ b/arch/powerpc/configs/44x/rainier_defconfig
@@ -10,8 +10,6 @@ CONFIG_MODULE_UNLOAD=y
# CONFIG_EBONY is not set
CONFIG_RAINIER=y
CONFIG_MATH_EMULATION=y
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
diff --git a/arch/powerpc/configs/44x/redwood_defconfig b/arch/powerpc/configs/44x/redwood_defconfig
index 640fe1d5af28..e28d76416537 100644
--- a/arch/powerpc/configs/44x/redwood_defconfig
+++ b/arch/powerpc/configs/44x/redwood_defconfig
@@ -10,8 +10,6 @@ CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_EBONY is not set
CONFIG_REDWOOD=y
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
CONFIG_PCIEPORTBUS=y
# CONFIG_PCIEASPM is not set
CONFIG_NET=y
diff --git a/arch/powerpc/configs/44x/sam440ep_defconfig b/arch/powerpc/configs/44x/sam440ep_defconfig
index 22dc0dadf576..ef09786d49b9 100644
--- a/arch/powerpc/configs/44x/sam440ep_defconfig
+++ b/arch/powerpc/configs/44x/sam440ep_defconfig
@@ -12,8 +12,6 @@ CONFIG_PARTITION_ADVANCED=y
CONFIG_AMIGA_PARTITION=y
# CONFIG_EBONY is not set
CONFIG_SAM440EP=y
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
diff --git a/arch/powerpc/configs/44x/sequoia_defconfig b/arch/powerpc/configs/44x/sequoia_defconfig
index 2c0973db8837..b4984eab43eb 100644
--- a/arch/powerpc/configs/44x/sequoia_defconfig
+++ b/arch/powerpc/configs/44x/sequoia_defconfig
@@ -11,8 +11,6 @@ CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_EBONY is not set
CONFIG_SEQUOIA=y
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
diff --git a/arch/powerpc/configs/44x/taishan_defconfig b/arch/powerpc/configs/44x/taishan_defconfig
index a2d355ca62b2..3ea5932ab852 100644
--- a/arch/powerpc/configs/44x/taishan_defconfig
+++ b/arch/powerpc/configs/44x/taishan_defconfig
@@ -9,8 +9,6 @@ CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_EBONY is not set
CONFIG_TAISHAN=y
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
diff --git a/arch/powerpc/configs/44x/warp_defconfig b/arch/powerpc/configs/44x/warp_defconfig
index af66c69c49fe..47252c2d7669 100644
--- a/arch/powerpc/configs/44x/warp_defconfig
+++ b/arch/powerpc/configs/44x/warp_defconfig
@@ -14,7 +14,6 @@ CONFIG_MODULE_UNLOAD=y
CONFIG_WARP=y
CONFIG_PPC4xx_GPIO=y
CONFIG_HZ_1000=y
-CONFIG_CMDLINE_BOOL=y
CONFIG_CMDLINE="ip=on"
# CONFIG_PCI is not set
CONFIG_NET=y
diff --git a/arch/powerpc/configs/holly_defconfig b/arch/powerpc/configs/holly_defconfig
index 067f433c8f5e..271daff47d1d 100644
--- a/arch/powerpc/configs/holly_defconfig
+++ b/arch/powerpc/configs/holly_defconfig
@@ -13,7 +13,6 @@ CONFIG_EMBEDDED6xx=y
CONFIG_PPC_HOLLY=y
CONFIG_GEN_RTC=y
CONFIG_BINFMT_MISC=y
-CONFIG_CMDLINE_BOOL=y
CONFIG_CMDLINE="console=ttyS0,115200"
# CONFIG_SECCOMP is not set
CONFIG_NET=y
diff --git a/arch/powerpc/configs/mvme5100_defconfig b/arch/powerpc/configs/mvme5100_defconfig
index 0a0d046fc445..3d53d69ed36c 100644
--- a/arch/powerpc/configs/mvme5100_defconfig
+++ b/arch/powerpc/configs/mvme5100_defconfig
@@ -20,10 +20,9 @@ CONFIG_EMBEDDED6xx=y
CONFIG_MVME5100=y
CONFIG_KVM_GUEST=y
CONFIG_HZ_100=y
+CONFIG_CMDLINE="console=ttyS0,9600 ip=dhcp root=/dev/nfs"
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
# CONFIG_COMPACTION is not set
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyS0,9600 ip=dhcp root=/dev/nfs"
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig
index 81b55c880fc3..142f1321fa58 100644
--- a/arch/powerpc/configs/ps3_defconfig
+++ b/arch/powerpc/configs/ps3_defconfig
@@ -34,8 +34,6 @@ CONFIG_KEXEC=y
# CONFIG_SPARSEMEM_VMEMMAP is not set
# CONFIG_COMPACTION is not set
CONFIG_SCHED_SMT=y
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
CONFIG_PM=y
CONFIG_PM_DEBUG=y
# CONFIG_SECCOMP is not set
diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig
index ad6739ac63dc..b806a5d3a695 100644
--- a/arch/powerpc/configs/skiroot_defconfig
+++ b/arch/powerpc/configs/skiroot_defconfig
@@ -45,7 +45,6 @@ CONFIG_IRQ_ALL_CPUS=y
CONFIG_NUMA=y
CONFIG_PPC_64K_PAGES=y
CONFIG_SCHED_SMT=y
-CONFIG_CMDLINE_BOOL=y
CONFIG_CMDLINE="console=tty0 console=hvc0 ipr.fast_reboot=1 quiet"
# CONFIG_SECCOMP is not set
# CONFIG_PPC_MEM_KEYS is not set
diff --git a/arch/powerpc/configs/storcenter_defconfig b/arch/powerpc/configs/storcenter_defconfig
index b964084e4056..47dcfaddc1ac 100644
--- a/arch/powerpc/configs/storcenter_defconfig
+++ b/arch/powerpc/configs/storcenter_defconfig
@@ -12,7 +12,6 @@ CONFIG_EMBEDDED6xx=y
CONFIG_STORCENTER=y
CONFIG_HZ_100=y
CONFIG_BINFMT_MISC=y
-CONFIG_CMDLINE_BOOL=y
CONFIG_CMDLINE="console=ttyS0,115200"
# CONFIG_SECCOMP is not set
CONFIG_NET=y
--
2.27.0
^ permalink raw reply related
* [PATCH v3 1/2] powerpc: Remove inaccessible CMDLINE default
From: Chris Packham @ 2020-06-11 22:42 UTC (permalink / raw)
To: mpe, benh, paulus, christophe.leroy
Cc: Chris Packham, linuxppc-dev, linux-kernel
In-Reply-To: <20200611224220.25066-1-chris.packham@alliedtelesis.co.nz>
Since commit cbe46bd4f510 ("powerpc: remove CONFIG_CMDLINE #ifdef mess")
CONFIG_CMDLINE has always had a value regardless of CONFIG_CMDLINE_BOOL.
For example:
$ make ARCH=powerpc defconfig
$ cat .config
# CONFIG_CMDLINE_BOOL is not set
CONFIG_CMDLINE=""
When enabling CONFIG_CMDLINE_BOOL this value is kept making the 'default
"..." if CONFIG_CMDLINE_BOOL' ineffective.
$ ./scripts/config --enable CONFIG_CMDLINE_BOOL
$ cat .config
CONFIG_CMDLINE_BOOL=y
CONFIG_CMDLINE=""
Remove CONFIG_CMDLINE_BOOL and the inaccessible default.
Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
Reviewed-by: Christophe Leroy <christophe.leroy@c-s.fr>
---
Changes in v3:
- none
Changes in v2:
- Rebase on top of Linus's tree
- Fix some typos in commit message
- Add review from Christophe
- Remove CONFIG_CMDLINE_BOOL
arch/powerpc/Kconfig | 6 +-----
1 file changed, 1 insertion(+), 5 deletions(-)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 9fa23eb320ff..51abc59c3334 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -859,12 +859,8 @@ config PPC_DENORMALISATION
Add support for handling denormalisation of single precision
values. Useful for bare metal only. If unsure say Y here.
-config CMDLINE_BOOL
- bool "Default bootloader kernel arguments"
-
config CMDLINE
- string "Initial kernel command string" if CMDLINE_BOOL
- default "console=ttyS0,9600 console=tty0 root=/dev/sda2" if CMDLINE_BOOL
+ string "Initial kernel command string"
default ""
help
On some platforms, there is currently no way for the boot loader to
--
2.27.0
^ permalink raw reply related
* [PATCH v3 0/2] powerpc: CMDLINE config cleanup
From: Chris Packham @ 2020-06-11 22:42 UTC (permalink / raw)
To: mpe, benh, paulus, christophe.leroy
Cc: Chris Packham, linuxppc-dev, linux-kernel
This series cleans up the config options related to the boot command line.
Chris Packham (2):
powerpc: Remove inaccessible CMDLINE default
powerpc: configs: remove CMDLINE_BOOL
arch/powerpc/Kconfig | 6 +-----
arch/powerpc/configs/44x/akebono_defconfig | 2 --
arch/powerpc/configs/44x/arches_defconfig | 2 --
arch/powerpc/configs/44x/bamboo_defconfig | 2 --
arch/powerpc/configs/44x/bluestone_defconfig | 2 --
arch/powerpc/configs/44x/canyonlands_defconfig | 2 --
arch/powerpc/configs/44x/currituck_defconfig | 2 --
arch/powerpc/configs/44x/eiger_defconfig | 2 --
arch/powerpc/configs/44x/fsp2_defconfig | 1 -
arch/powerpc/configs/44x/icon_defconfig | 2 --
arch/powerpc/configs/44x/iss476-smp_defconfig | 1 -
arch/powerpc/configs/44x/katmai_defconfig | 2 --
arch/powerpc/configs/44x/rainier_defconfig | 2 --
arch/powerpc/configs/44x/redwood_defconfig | 2 --
arch/powerpc/configs/44x/sam440ep_defconfig | 2 --
arch/powerpc/configs/44x/sequoia_defconfig | 2 --
arch/powerpc/configs/44x/taishan_defconfig | 2 --
arch/powerpc/configs/44x/warp_defconfig | 1 -
arch/powerpc/configs/holly_defconfig | 1 -
arch/powerpc/configs/mvme5100_defconfig | 3 +--
arch/powerpc/configs/ps3_defconfig | 2 --
arch/powerpc/configs/skiroot_defconfig | 1 -
arch/powerpc/configs/storcenter_defconfig | 1 -
23 files changed, 2 insertions(+), 43 deletions(-)
--
2.27.0
^ permalink raw reply
* (no subject)
From: ndesaulniers @ 2020-06-11 22:38 UTC (permalink / raw)
To: patch-notifications, christophe.leroy, segher, benh, paulus,
npiggin, linuxppc-dev, linux-kernel, clang-built-linux
Date: Thu, 11 Jun 2020 15:38:38 -0700
From: Nick Desaulniers <ndesaulniers@google.com>
To: Michael Ellerman <patch-notifications@ellerman.id.au>,
christophe.leroy@c-s.fr, segher@kernel.crashing.org
Cc: Christophe Leroy <christophe.leroy@c-s.fr>,
Benjamin Herrenschmidt <benh@kernel.crashing.org>,
Paul Mackerras <paulus@samba.org>, npiggin@gmail.com,
segher@kernel.crashing.org, linuxppc-dev@lists.ozlabs.org,
linux-kernel@vger.kernel.org, clang-built-linux@googlegroups.com
Subject: Re: [PATCH v4 1/2] powerpc/uaccess: Implement unsafe_put_user()
using 'asm goto'
Message-ID: <20200611223838.GA60089@google.com>
References:
<23e680624680a9a5405f4b88740d2596d4b17c26.1587143308.git.christophe.leroy@c-s.fr>
<49YBKY13Szz9sT4@ozlabs.org>
MIME-Version: 1.0
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
In-Reply-To: <49YBKY13Szz9sT4@ozlabs.org>
On Fri, May 29, 2020 at 02:24:16PM +1000, Michael Ellerman wrote:
> On Fri, 2020-04-17 at 17:08:51 UTC, Christophe Leroy wrote:
> > unsafe_put_user() is designed to take benefit of 'asm goto'.
> >
> > Instead of using the standard __put_user() approach and branch
> > based on the returned error, use 'asm goto' and make the
> > exception code branch directly to the error label. There is
> > no code anymore in the fixup section.
> >
> > This change significantly simplifies functions using
> > unsafe_put_user()
> ...
> >
> > Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
> > Reviewed-by: Segher Boessenkool <segher@kernel.crashing.org>
> Applied to powerpc topic/uaccess-ppc, thanks.
> https://git.kernel.org/powerpc/c/334710b1496af8a0960e70121f850e209c20958f
> cheers
Hello! It seems this patch broke our ppc32 builds, and we had to
disable them [0]. :(
From what I can tell, though Michael mentioned this was merged on May
29, but our CI of -next was green for ppc32 until June 4, then mainline
went red June 6. So this patch only got 2 days of soak time before the
merge window opened.
A general issue with the -next workflow seems to be that patches get
different amounts of soak time. For higher risk patches like this one,
can I please ask that they be help back a release if close to the merge
window?
Segher, Cristophe, I suspect Clang is missing support for the %L and %U
output templates [1]. I've implemented support for some of these before
in Clang via the documentation at [2], but these seem to be machine
specific? Can you please point me to documentation/unit tests/source for
these so that I can figure out what they should be doing, and look into
implementing them in Clang?
(Apologies for the tone off this email; I had typed up a nice fuller
report with links, but it seemed that mutt wrote out an empty postponed
file, and I kind of just want to put my laptop in the garbage right now.
I suspect our internal SMTP tool will also mess up some headers, but
lets see (Also, too lazy+angry right now to solve).)
[0] https://github.com/ClangBuiltLinux/continuous-integration/pull/279
[1] https://bugs.llvm.org/show_bug.cgi?id=46186
[2]
https://gcc.gnu.org/onlinedocs/gccint/Output-Template.html#Output-Template
^ permalink raw reply
* Re: [PATCH v5 1/4] riscv: Move kernel mapping to vmalloc zone
From: Atish Patra @ 2020-06-11 21:34 UTC (permalink / raw)
To: Alexandre Ghiti
Cc: Albert Ou, Anup Patel, linux-kernel@vger.kernel.org List,
Atish Patra, Paul Mackerras, Zong Li, Paul Walmsley,
Palmer Dabbelt, linux-riscv, linuxppc-dev
In-Reply-To: <20200607075949.665-2-alex@ghiti.fr>
On Sun, Jun 7, 2020 at 1:01 AM Alexandre Ghiti <alex@ghiti.fr> wrote:
>
> This is a preparatory patch for relocatable kernel.
>
> The kernel used to be linked at PAGE_OFFSET address and used to be loaded
> physically at the beginning of the main memory. Therefore, we could use
> the linear mapping for the kernel mapping.
>
> But the relocated kernel base address will be different from PAGE_OFFSET
> and since in the linear mapping, two different virtual addresses cannot
> point to the same physical address, the kernel mapping needs to lie outside
> the linear mapping.
>
> In addition, because modules and BPF must be close to the kernel (inside
> +-2GB window), the kernel is placed at the end of the vmalloc zone minus
> 2GB, which leaves room for modules and BPF. The kernel could not be
> placed at the beginning of the vmalloc zone since other vmalloc
> allocations from the kernel could get all the +-2GB window around the
> kernel which would prevent new modules and BPF programs to be loaded.
>
> Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
> Reviewed-by: Zong Li <zong.li@sifive.com>
> ---
> arch/riscv/boot/loader.lds.S | 3 +-
> arch/riscv/include/asm/page.h | 10 +++++-
> arch/riscv/include/asm/pgtable.h | 38 ++++++++++++++-------
> arch/riscv/kernel/head.S | 3 +-
> arch/riscv/kernel/module.c | 4 +--
> arch/riscv/kernel/vmlinux.lds.S | 3 +-
> arch/riscv/mm/init.c | 58 +++++++++++++++++++++++++-------
> arch/riscv/mm/physaddr.c | 2 +-
> 8 files changed, 88 insertions(+), 33 deletions(-)
>
> diff --git a/arch/riscv/boot/loader.lds.S b/arch/riscv/boot/loader.lds.S
> index 47a5003c2e28..62d94696a19c 100644
> --- a/arch/riscv/boot/loader.lds.S
> +++ b/arch/riscv/boot/loader.lds.S
> @@ -1,13 +1,14 @@
> /* SPDX-License-Identifier: GPL-2.0 */
>
> #include <asm/page.h>
> +#include <asm/pgtable.h>
>
> OUTPUT_ARCH(riscv)
> ENTRY(_start)
>
> SECTIONS
> {
> - . = PAGE_OFFSET;
> + . = KERNEL_LINK_ADDR;
>
> .payload : {
> *(.payload)
> diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
> index 2d50f76efe48..48bb09b6a9b7 100644
> --- a/arch/riscv/include/asm/page.h
> +++ b/arch/riscv/include/asm/page.h
> @@ -90,18 +90,26 @@ typedef struct page *pgtable_t;
>
> #ifdef CONFIG_MMU
> extern unsigned long va_pa_offset;
> +extern unsigned long va_kernel_pa_offset;
> extern unsigned long pfn_base;
> #define ARCH_PFN_OFFSET (pfn_base)
> #else
> #define va_pa_offset 0
> +#define va_kernel_pa_offset 0
> #define ARCH_PFN_OFFSET (PAGE_OFFSET >> PAGE_SHIFT)
> #endif /* CONFIG_MMU */
>
> extern unsigned long max_low_pfn;
> extern unsigned long min_low_pfn;
> +extern unsigned long kernel_virt_addr;
>
> #define __pa_to_va_nodebug(x) ((void *)((unsigned long) (x) + va_pa_offset))
> -#define __va_to_pa_nodebug(x) ((unsigned long)(x) - va_pa_offset)
> +#define linear_mapping_va_to_pa(x) ((unsigned long)(x) - va_pa_offset)
> +#define kernel_mapping_va_to_pa(x) \
> + ((unsigned long)(x) - va_kernel_pa_offset)
> +#define __va_to_pa_nodebug(x) \
> + (((x) >= PAGE_OFFSET) ? \
> + linear_mapping_va_to_pa(x) : kernel_mapping_va_to_pa(x))
>
> #ifdef CONFIG_DEBUG_VIRTUAL
> extern phys_addr_t __virt_to_phys(unsigned long x);
> diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
> index 35b60035b6b0..94ef3b49dfb6 100644
> --- a/arch/riscv/include/asm/pgtable.h
> +++ b/arch/riscv/include/asm/pgtable.h
> @@ -11,23 +11,29 @@
>
> #include <asm/pgtable-bits.h>
>
> -#ifndef __ASSEMBLY__
> -
> -/* Page Upper Directory not used in RISC-V */
> -#include <asm-generic/pgtable-nopud.h>
> -#include <asm/page.h>
> -#include <asm/tlbflush.h>
> -#include <linux/mm_types.h>
> -
> -#ifdef CONFIG_MMU
> +#ifndef CONFIG_MMU
> +#define KERNEL_VIRT_ADDR PAGE_OFFSET
> +#define KERNEL_LINK_ADDR PAGE_OFFSET
> +#else
> +/*
> + * Leave 2GB for modules and BPF that must lie within a 2GB range around
> + * the kernel.
> + */
> +#define KERNEL_VIRT_ADDR (VMALLOC_END - SZ_2G + 1)
> +#define KERNEL_LINK_ADDR KERNEL_VIRT_ADDR
>
> #define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
> #define VMALLOC_END (PAGE_OFFSET - 1)
> #define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE)
>
> #define BPF_JIT_REGION_SIZE (SZ_128M)
> -#define BPF_JIT_REGION_START (PAGE_OFFSET - BPF_JIT_REGION_SIZE)
> -#define BPF_JIT_REGION_END (VMALLOC_END)
> +#define BPF_JIT_REGION_START PFN_ALIGN((unsigned long)&_end)
> +#define BPF_JIT_REGION_END (BPF_JIT_REGION_START + BPF_JIT_REGION_SIZE)
> +
As these mappings have changed a few times in recent months including
this one, I think it would be
better to have virtual memory layout documentation in RISC-V similar
to other architectures.
If you can include the page table layout for 3/4 level page tables in
the same document, that would be really helpful.
> +#ifdef CONFIG_64BIT
> +#define VMALLOC_MODULE_START BPF_JIT_REGION_END
> +#define VMALLOC_MODULE_END (((unsigned long)&_start & PAGE_MASK) + SZ_2G)
> +#endif
>
> /*
> * Roughly size the vmemmap space to be large enough to fit enough
> @@ -57,9 +63,16 @@
> #define FIXADDR_SIZE PGDIR_SIZE
> #endif
> #define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
> -
> #endif
>
> +#ifndef __ASSEMBLY__
> +
> +/* Page Upper Directory not used in RISC-V */
> +#include <asm-generic/pgtable-nopud.h>
> +#include <asm/page.h>
> +#include <asm/tlbflush.h>
> +#include <linux/mm_types.h>
> +
> #ifdef CONFIG_64BIT
> #include <asm/pgtable-64.h>
> #else
> @@ -483,6 +496,7 @@ static inline void __kernel_map_pages(struct page *page, int numpages, int enabl
>
> #define kern_addr_valid(addr) (1) /* FIXME */
>
> +extern char _start[];
> extern void *dtb_early_va;
> void setup_bootmem(void);
> void paging_init(void);
> diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
> index 98a406474e7d..8f5bb7731327 100644
> --- a/arch/riscv/kernel/head.S
> +++ b/arch/riscv/kernel/head.S
> @@ -49,7 +49,8 @@ ENTRY(_start)
> #ifdef CONFIG_MMU
> relocate:
> /* Relocate return address */
> - li a1, PAGE_OFFSET
> + la a1, kernel_virt_addr
> + REG_L a1, 0(a1)
> la a2, _start
> sub a1, a1, a2
> add ra, ra, a1
> diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c
> index 8bbe5dbe1341..1a8fbe05accf 100644
> --- a/arch/riscv/kernel/module.c
> +++ b/arch/riscv/kernel/module.c
> @@ -392,12 +392,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
> }
>
> #if defined(CONFIG_MMU) && defined(CONFIG_64BIT)
> -#define VMALLOC_MODULE_START \
> - max(PFN_ALIGN((unsigned long)&_end - SZ_2G), VMALLOC_START)
> void *module_alloc(unsigned long size)
> {
> return __vmalloc_node_range(size, 1, VMALLOC_MODULE_START,
> - VMALLOC_END, GFP_KERNEL,
> + VMALLOC_MODULE_END, GFP_KERNEL,
> PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
> __builtin_return_address(0));
> }
> diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S
> index 0339b6bbe11a..a9abde62909f 100644
> --- a/arch/riscv/kernel/vmlinux.lds.S
> +++ b/arch/riscv/kernel/vmlinux.lds.S
> @@ -4,7 +4,8 @@
> * Copyright (C) 2017 SiFive
> */
>
> -#define LOAD_OFFSET PAGE_OFFSET
> +#include <asm/pgtable.h>
> +#define LOAD_OFFSET KERNEL_LINK_ADDR
> #include <asm/vmlinux.lds.h>
> #include <asm/page.h>
> #include <asm/cache.h>
> diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
> index 736de6c8739f..71da78914645 100644
> --- a/arch/riscv/mm/init.c
> +++ b/arch/riscv/mm/init.c
> @@ -22,6 +22,9 @@
>
> #include "../kernel/head.h"
>
> +unsigned long kernel_virt_addr = KERNEL_VIRT_ADDR;
> +EXPORT_SYMBOL(kernel_virt_addr);
> +
> unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
> __page_aligned_bss;
> EXPORT_SYMBOL(empty_zero_page);
> @@ -178,8 +181,12 @@ void __init setup_bootmem(void)
> }
>
> #ifdef CONFIG_MMU
> +/* Offset between linear mapping virtual address and kernel load address */
> unsigned long va_pa_offset;
> EXPORT_SYMBOL(va_pa_offset);
> +/* Offset between kernel mapping virtual address and kernel load address */
> +unsigned long va_kernel_pa_offset;
> +EXPORT_SYMBOL(va_kernel_pa_offset);
> unsigned long pfn_base;
> EXPORT_SYMBOL(pfn_base);
>
> @@ -271,7 +278,7 @@ static phys_addr_t __init alloc_pmd(uintptr_t va)
> if (mmu_enabled)
> return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
>
> - pmd_num = (va - PAGE_OFFSET) >> PGDIR_SHIFT;
> + pmd_num = (va - kernel_virt_addr) >> PGDIR_SHIFT;
> BUG_ON(pmd_num >= NUM_EARLY_PMDS);
> return (uintptr_t)&early_pmd[pmd_num * PTRS_PER_PMD];
> }
> @@ -372,14 +379,30 @@ static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size)
> #error "setup_vm() is called from head.S before relocate so it should not use absolute addressing."
> #endif
>
> +static uintptr_t load_pa, load_sz;
> +
> +static void __init create_kernel_page_table(pgd_t *pgdir, uintptr_t map_size)
> +{
> + uintptr_t va, end_va;
> +
> + end_va = kernel_virt_addr + load_sz;
> + for (va = kernel_virt_addr; va < end_va; va += map_size)
> + create_pgd_mapping(pgdir, va,
> + load_pa + (va - kernel_virt_addr),
> + map_size, PAGE_KERNEL_EXEC);
> +}
> +
> asmlinkage void __init setup_vm(uintptr_t dtb_pa)
> {
> uintptr_t va, end_va;
> - uintptr_t load_pa = (uintptr_t)(&_start);
> - uintptr_t load_sz = (uintptr_t)(&_end) - load_pa;
> uintptr_t map_size = best_map_size(load_pa, MAX_EARLY_MAPPING_SIZE);
>
> + load_pa = (uintptr_t)(&_start);
> + load_sz = (uintptr_t)(&_end) - load_pa;
> +
> va_pa_offset = PAGE_OFFSET - load_pa;
> + va_kernel_pa_offset = kernel_virt_addr - load_pa;
> +
> pfn_base = PFN_DOWN(load_pa);
>
> /*
> @@ -402,26 +425,22 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
> create_pmd_mapping(fixmap_pmd, FIXADDR_START,
> (uintptr_t)fixmap_pte, PMD_SIZE, PAGE_TABLE);
> /* Setup trampoline PGD and PMD */
> - create_pgd_mapping(trampoline_pg_dir, PAGE_OFFSET,
> + create_pgd_mapping(trampoline_pg_dir, kernel_virt_addr,
> (uintptr_t)trampoline_pmd, PGDIR_SIZE, PAGE_TABLE);
> - create_pmd_mapping(trampoline_pmd, PAGE_OFFSET,
> + create_pmd_mapping(trampoline_pmd, kernel_virt_addr,
> load_pa, PMD_SIZE, PAGE_KERNEL_EXEC);
> #else
> /* Setup trampoline PGD */
> - create_pgd_mapping(trampoline_pg_dir, PAGE_OFFSET,
> + create_pgd_mapping(trampoline_pg_dir, kernel_virt_addr,
> load_pa, PGDIR_SIZE, PAGE_KERNEL_EXEC);
> #endif
>
> /*
> - * Setup early PGD covering entire kernel which will allows
> + * Setup early PGD covering entire kernel which will allow
> * us to reach paging_init(). We map all memory banks later
> * in setup_vm_final() below.
> */
> - end_va = PAGE_OFFSET + load_sz;
> - for (va = PAGE_OFFSET; va < end_va; va += map_size)
> - create_pgd_mapping(early_pg_dir, va,
> - load_pa + (va - PAGE_OFFSET),
> - map_size, PAGE_KERNEL_EXEC);
> + create_kernel_page_table(early_pg_dir, map_size);
>
> /* Create fixed mapping for early FDT parsing */
> end_va = __fix_to_virt(FIX_FDT) + FIX_FDT_SIZE;
> @@ -441,6 +460,7 @@ static void __init setup_vm_final(void)
> uintptr_t va, map_size;
> phys_addr_t pa, start, end;
> struct memblock_region *reg;
> + static struct vm_struct vm_kernel = { 0 };
>
> /* Set mmu_enabled flag */
> mmu_enabled = true;
> @@ -467,10 +487,22 @@ static void __init setup_vm_final(void)
> for (pa = start; pa < end; pa += map_size) {
> va = (uintptr_t)__va(pa);
> create_pgd_mapping(swapper_pg_dir, va, pa,
> - map_size, PAGE_KERNEL_EXEC);
> + map_size, PAGE_KERNEL);
> }
> }
>
> + /* Map the kernel */
> + create_kernel_page_table(swapper_pg_dir, PMD_SIZE);
> +
> + /* Reserve the vmalloc area occupied by the kernel */
> + vm_kernel.addr = (void *)kernel_virt_addr;
> + vm_kernel.phys_addr = load_pa;
> + vm_kernel.size = (load_sz + PMD_SIZE - 1) & ~(PMD_SIZE - 1);
> + vm_kernel.flags = VM_MAP | VM_NO_GUARD;
> + vm_kernel.caller = __builtin_return_address(0);
> +
> + vm_area_add_early(&vm_kernel);
> +
> /* Clear fixmap PTE and PMD mappings */
> clear_fixmap(FIX_PTE);
> clear_fixmap(FIX_PMD);
> diff --git a/arch/riscv/mm/physaddr.c b/arch/riscv/mm/physaddr.c
> index e8e4dcd39fed..35703d5ef5fd 100644
> --- a/arch/riscv/mm/physaddr.c
> +++ b/arch/riscv/mm/physaddr.c
> @@ -23,7 +23,7 @@ EXPORT_SYMBOL(__virt_to_phys);
>
> phys_addr_t __phys_addr_symbol(unsigned long x)
> {
> - unsigned long kernel_start = (unsigned long)PAGE_OFFSET;
> + unsigned long kernel_start = (unsigned long)kernel_virt_addr;
> unsigned long kernel_end = (unsigned long)_end;
>
> /*
> --
> 2.20.1
>
>
--
Regards,
Atish
^ permalink raw reply
* Re: [PATCH v2] powerpc: Remove inaccessible CMDLINE default
From: Chris Packham @ 2020-06-11 21:14 UTC (permalink / raw)
To: Christophe Leroy, mpe@ellerman.id.au, benh@kernel.crashing.org,
paulus@samba.org, christophe.leroy@c-s.fr
Cc: linuxppc-dev@lists.ozlabs.org, linux-kernel@vger.kernel.org
In-Reply-To: <34bb20ad-8522-6071-7a36-9f615204561f@csgroup.eu>
On 11/06/20 5:46 pm, Christophe Leroy wrote:
>
>
> Le 11/06/2020 à 05:41, Chris Packham a écrit :
>> Since commit cbe46bd4f510 ("powerpc: remove CONFIG_CMDLINE #ifdef mess")
>> CONFIG_CMDLINE has always had a value regardless of CONFIG_CMDLINE_BOOL.
>>
>> For example:
>>
>> $ make ARCH=powerpc defconfig
>> $ cat .config
>> # CONFIG_CMDLINE_BOOL is not set
>> CONFIG_CMDLINE=""
>>
>> When enabling CONFIG_CMDLINE_BOOL this value is kept making the 'default
>> "..." if CONFIG_CMDLINE_BOOL' ineffective.
>>
>> $ ./scripts/config --enable CONFIG_CMDLINE_BOOL
>> $ cat .config
>> CONFIG_CMDLINE_BOOL=y
>> CONFIG_CMDLINE=""
>>
>> Remove CONFIG_CMDLINE_BOOL and the inaccessible default.
>
> You also have to remove all CONFIG_CMDLINE_BOOL from the defconfigs
OK. I'll do so as a follow-up patch and send a v3.
>
> Christophe
>
>>
>> Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
>> Reviewed-by: Christophe Leroy <christophe.leroy@c-s.fr>
>> ---
>> It took me a while to get round to sending a v2, for a refresher v1
>> can be found here:
>>
>> http://patchwork.ozlabs.org/project/linuxppc-dev/patch/20190802050232.22978-1-chris.packham@alliedtelesis.co.nz/
>>
>>
>> Changes in v2:
>> - Rebase on top of Linus's tree
>> - Fix some typos in commit message
>> - Add review from Christophe
>> - Remove CONFIG_CMDLINE_BOOL
>>
>> arch/powerpc/Kconfig | 6 +-----
>> 1 file changed, 1 insertion(+), 5 deletions(-)
>>
>> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
>> index 9fa23eb320ff..51abc59c3334 100644
>> --- a/arch/powerpc/Kconfig
>> +++ b/arch/powerpc/Kconfig
>> @@ -859,12 +859,8 @@ config PPC_DENORMALISATION
>> Add support for handling denormalisation of single precision
>> values. Useful for bare metal only. If unsure say Y here.
>> -config CMDLINE_BOOL
>> - bool "Default bootloader kernel arguments"
>> -
>> config CMDLINE
>> - string "Initial kernel command string" if CMDLINE_BOOL
>> - default "console=ttyS0,9600 console=tty0 root=/dev/sda2" if
>> CMDLINE_BOOL
>> + string "Initial kernel command string"
>> default ""
>> help
>> On some platforms, there is currently no way for the boot
>> loader to
>>
^ permalink raw reply
* Re: [PATCH v2] All arch: remove system call sys_sysctl
From: Eric W. Biederman @ 2020-06-11 18:23 UTC (permalink / raw)
To: Rich Felker
Cc: linux-sh, catalin.marinas, paulus, ak, paulburton, geert,
mattst88, brgerst, acme, cyphar, viro, luto, tglx, surenb, rth,
young.liuyang, linux-parisc, rdunlap, linux-kernel, mcgrof,
linux-fsdevel, akpm, mark.rutland, linux-ia64, linux-xtensa,
jongk, linux, James.Bottomley, jcmvbkbc, linux-s390, ysato,
deller, yzaikin, Xiaoming Ni, mszeredi, gor, linux-alpha,
linux-m68k, linux-arm-kernel, chris, tony.luck, linux-api,
zhouyanjie, minchan, sargun, alexander.shishkin, heiko.carstens,
alex.huangjianhui, will, krzk, borntraeger, vbabka, samitolvanen,
flameeyes, ravi.bangoria, elver, keescook, arnd, bp, christian,
tsbogend, jiri, martin.petersen, yamada.masahiro, oleg,
sudeep.holla, olof, shawnguo, davem, bauerman, fenghua.yu, peterz,
dhowells, hpa, sparclinux, jolsa, svens, x86, linux, mingo,
naveen.n.rao, paulmck, sfr, npiggin, namhyung, dvyukov, axboe,
monstr, haolee.swjtu, linux-mips, ink, linuxppc-dev
In-Reply-To: <20200611172028.GO1079@brightrain.aerifal.cx>
Rich Felker <dalias@libc.org> writes:
> On Thu, Jun 11, 2020 at 12:01:11PM -0500, Eric W. Biederman wrote:
>> Rich Felker <dalias@libc.org> writes:
>>
>> > On Thu, Jun 11, 2020 at 06:43:00AM -0500, Eric W. Biederman wrote:
>> >> Xiaoming Ni <nixiaoming@huawei.com> writes:
>> >>
>> >> > Since the commit 61a47c1ad3a4dc ("sysctl: Remove the sysctl system call"),
>> >> > sys_sysctl is actually unavailable: any input can only return an error.
>> >> >
>> >> > We have been warning about people using the sysctl system call for years
>> >> > and believe there are no more users. Even if there are users of this
>> >> > interface if they have not complained or fixed their code by now they
>> >> > probably are not going to, so there is no point in warning them any
>> >> > longer.
>> >> >
>> >> > So completely remove sys_sysctl on all architectures.
>> >>
>> >>
>> >>
>> >> >
>> >> > Signed-off-by: Xiaoming Ni <nixiaoming@huawei.com>
>> >> >
>> >> > changes in v2:
>> >> > According to Kees Cook's suggestion, completely remove sys_sysctl on all arch
>> >> > According to Eric W. Biederman's suggestion, update the commit log
>> >> >
>> >> > V1: https://lore.kernel.org/lkml/1591683605-8585-1-git-send-email-nixiaoming@huawei.com/
>> >> > Delete the code of sys_sysctl and return -ENOSYS directly at the function entry
>> >> > ---
>> >> > include/uapi/linux/sysctl.h | 15 --
>> >> [snip]
>> >>
>> >> > diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h
>> >> > index 27c1ed2..84b44c3 100644
>> >> > --- a/include/uapi/linux/sysctl.h
>> >> > +++ b/include/uapi/linux/sysctl.h
>> >> > @@ -27,21 +27,6 @@
>> >> > #include <linux/types.h>
>> >> > #include <linux/compiler.h>
>> >> >
>> >> > -#define CTL_MAXNAME 10 /* how many path components do we allow in a
>> >> > - call to sysctl? In other words, what is
>> >> > - the largest acceptable value for the nlen
>> >> > - member of a struct __sysctl_args to have? */
>> >> > -
>> >> > -struct __sysctl_args {
>> >> > - int __user *name;
>> >> > - int nlen;
>> >> > - void __user *oldval;
>> >> > - size_t __user *oldlenp;
>> >> > - void __user *newval;
>> >> > - size_t newlen;
>> >> > - unsigned long __unused[4];
>> >> > -};
>> >> > -
>> >> > /* Define sysctl names first */
>> >> >
>> >> > /* Top-level names: */
>> >> [snip]
>> >>
>> >> The uapi header change does not make sense. The entire point of the
>> >> header is to allow userspace programs to be able to call sys_sysctl.
>> >> It either needs to all stay or all go.
>> >>
>> >> As the concern with the uapi header is about userspace programs being
>> >> able to compile please leave the header for now.
>> >>
>> >> We should leave auditing userspace and seeing if userspace code will
>> >> still compile if we remove this header for a separate patch. The
>> >> concerns and justifications for the uapi header are completely different
>> >> then for the removing the sys_sysctl implementation.
>> >>
>> >> Otherwise
>> >> Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
>> >
>> > The UAPI header should be kept because it's defining an API not just
>> > for the kernel the headers are supplied with, but for all past
>> > kernels. In particular programs needing a failsafe CSPRNG source that
>> > works on old kernels may (do) use this as a fallback only if modern
>> > syscalls are missing. Removing the syscall is no problem since it
>> > won't be used, but if you remove the types/macros from the UAPI
>> > headers, they'll have to copy that into their own sources.
>>
>> May we assume you know of a least one piece of userspace that will fail
>> to compile if this header file is removed?
>
> I know at least one piece of software is using SYS_sysctl for a
> fallback CSPRNG source. I'm not 100% sure that they're using the
> kernel headers; they might have copied it already. I'm also not sure
> how many there are.
>
> Regardless, I think the principle stands. There's no need to remove
> definitions that are essentially maintenance-free now that the
> interface is no longer available in new kernels, and doing so
> contributes to the myth that you're supposed to use kernel headers
> matching runtime kernel rather than it always being safe to use latest
> headers.
If there is no one using the definitions removing them saves people
having to remember what they are there for.
The big rule is don't break userspace. The goal is to allow people to
upgrade their kernel without needing to worry about userspace breaking,
and to be able to downgrade to the extent possible to help in tracking
bugs.
Not being able to compile userspace seems like a pretty clear cut case.
Although there are some fuzzy edges given the history of the kernel
headers. Things like your libc requiring kernel headers to be processed
before they can be used. I think there are still some kernel headers
that have that restriction when used with glibc as glibc uses different
sizes for types like dev_t.
The bottom line is we can't do it casually so that any work in the
direction of removing from or deleting uapi headers needs to be it's own
separate patch.
Given how much effort it can be to show that userspace is not using
something I don't expect us to be mucking with the uapi headers any time
soon.
Eric
^ permalink raw reply
* Re: [PATCH v2] All arch: remove system call sys_sysctl
From: Eric W. Biederman @ 2020-06-11 17:01 UTC (permalink / raw)
To: Rich Felker
Cc: linux-sh, catalin.marinas, paulus, ak, paulburton, geert,
mattst88, brgerst, acme, cyphar, viro, luto, tglx, surenb, rth,
young.liuyang, linux-parisc, rdunlap, linux-kernel, mcgrof,
linux-fsdevel, akpm, mark.rutland, linux-ia64, linux-xtensa,
jongk, linux, James.Bottomley, jcmvbkbc, linux-s390, ysato,
deller, yzaikin, Xiaoming Ni, mszeredi, gor, linux-alpha,
linux-m68k, linux-arm-kernel, chris, tony.luck, linux-api,
zhouyanjie, minchan, sargun, alexander.shishkin, heiko.carstens,
alex.huangjianhui, will, krzk, borntraeger, vbabka, samitolvanen,
flameeyes, ravi.bangoria, elver, keescook, arnd, bp, christian,
tsbogend, jiri, martin.petersen, yamada.masahiro, oleg,
sudeep.holla, olof, shawnguo, davem, bauerman, fenghua.yu, peterz,
dhowells, hpa, sparclinux, jolsa, svens, x86, linux, mingo,
naveen.n.rao, paulmck, sfr, npiggin, namhyung, dvyukov, axboe,
monstr, haolee.swjtu, linux-mips, ink, linuxppc-dev
In-Reply-To: <20200611163902.GN1079@brightrain.aerifal.cx>
Rich Felker <dalias@libc.org> writes:
> On Thu, Jun 11, 2020 at 06:43:00AM -0500, Eric W. Biederman wrote:
>> Xiaoming Ni <nixiaoming@huawei.com> writes:
>>
>> > Since the commit 61a47c1ad3a4dc ("sysctl: Remove the sysctl system call"),
>> > sys_sysctl is actually unavailable: any input can only return an error.
>> >
>> > We have been warning about people using the sysctl system call for years
>> > and believe there are no more users. Even if there are users of this
>> > interface if they have not complained or fixed their code by now they
>> > probably are not going to, so there is no point in warning them any
>> > longer.
>> >
>> > So completely remove sys_sysctl on all architectures.
>>
>>
>>
>> >
>> > Signed-off-by: Xiaoming Ni <nixiaoming@huawei.com>
>> >
>> > changes in v2:
>> > According to Kees Cook's suggestion, completely remove sys_sysctl on all arch
>> > According to Eric W. Biederman's suggestion, update the commit log
>> >
>> > V1: https://lore.kernel.org/lkml/1591683605-8585-1-git-send-email-nixiaoming@huawei.com/
>> > Delete the code of sys_sysctl and return -ENOSYS directly at the function entry
>> > ---
>> > include/uapi/linux/sysctl.h | 15 --
>> [snip]
>>
>> > diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h
>> > index 27c1ed2..84b44c3 100644
>> > --- a/include/uapi/linux/sysctl.h
>> > +++ b/include/uapi/linux/sysctl.h
>> > @@ -27,21 +27,6 @@
>> > #include <linux/types.h>
>> > #include <linux/compiler.h>
>> >
>> > -#define CTL_MAXNAME 10 /* how many path components do we allow in a
>> > - call to sysctl? In other words, what is
>> > - the largest acceptable value for the nlen
>> > - member of a struct __sysctl_args to have? */
>> > -
>> > -struct __sysctl_args {
>> > - int __user *name;
>> > - int nlen;
>> > - void __user *oldval;
>> > - size_t __user *oldlenp;
>> > - void __user *newval;
>> > - size_t newlen;
>> > - unsigned long __unused[4];
>> > -};
>> > -
>> > /* Define sysctl names first */
>> >
>> > /* Top-level names: */
>> [snip]
>>
>> The uapi header change does not make sense. The entire point of the
>> header is to allow userspace programs to be able to call sys_sysctl.
>> It either needs to all stay or all go.
>>
>> As the concern with the uapi header is about userspace programs being
>> able to compile please leave the header for now.
>>
>> We should leave auditing userspace and seeing if userspace code will
>> still compile if we remove this header for a separate patch. The
>> concerns and justifications for the uapi header are completely different
>> then for the removing the sys_sysctl implementation.
>>
>> Otherwise
>> Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
>
> The UAPI header should be kept because it's defining an API not just
> for the kernel the headers are supplied with, but for all past
> kernels. In particular programs needing a failsafe CSPRNG source that
> works on old kernels may (do) use this as a fallback only if modern
> syscalls are missing. Removing the syscall is no problem since it
> won't be used, but if you remove the types/macros from the UAPI
> headers, they'll have to copy that into their own sources.
May we assume you know of a least one piece of userspace that will fail
to compile if this header file is removed?
Eric
^ permalink raw reply
* Re: [PATCH v2] All arch: remove system call sys_sysctl
From: Rich Felker @ 2020-06-11 17:20 UTC (permalink / raw)
To: Eric W. Biederman
Cc: linux-sh, catalin.marinas, paulus, ak, paulburton, geert,
mattst88, brgerst, acme, cyphar, viro, luto, tglx, surenb, rth,
young.liuyang, linux-parisc, rdunlap, linux-kernel, mcgrof,
linux-fsdevel, akpm, mark.rutland, linux-ia64, linux-xtensa,
jongk, linux, James.Bottomley, jcmvbkbc, linux-s390, ysato,
deller, yzaikin, Xiaoming Ni, mszeredi, gor, linux-alpha,
linux-m68k, linux-arm-kernel, chris, tony.luck, linux-api,
zhouyanjie, minchan, sargun, alexander.shishkin, heiko.carstens,
alex.huangjianhui, will, krzk, borntraeger, vbabka, samitolvanen,
flameeyes, ravi.bangoria, elver, keescook, arnd, bp, christian,
tsbogend, jiri, martin.petersen, yamada.masahiro, oleg,
sudeep.holla, olof, shawnguo, davem, bauerman, fenghua.yu, peterz,
dhowells, hpa, sparclinux, jolsa, svens, x86, linux, mingo,
naveen.n.rao, paulmck, sfr, npiggin, namhyung, dvyukov, axboe,
monstr, haolee.swjtu, linux-mips, ink, linuxppc-dev
In-Reply-To: <87ftb1sfjc.fsf@x220.int.ebiederm.org>
On Thu, Jun 11, 2020 at 12:01:11PM -0500, Eric W. Biederman wrote:
> Rich Felker <dalias@libc.org> writes:
>
> > On Thu, Jun 11, 2020 at 06:43:00AM -0500, Eric W. Biederman wrote:
> >> Xiaoming Ni <nixiaoming@huawei.com> writes:
> >>
> >> > Since the commit 61a47c1ad3a4dc ("sysctl: Remove the sysctl system call"),
> >> > sys_sysctl is actually unavailable: any input can only return an error.
> >> >
> >> > We have been warning about people using the sysctl system call for years
> >> > and believe there are no more users. Even if there are users of this
> >> > interface if they have not complained or fixed their code by now they
> >> > probably are not going to, so there is no point in warning them any
> >> > longer.
> >> >
> >> > So completely remove sys_sysctl on all architectures.
> >>
> >>
> >>
> >> >
> >> > Signed-off-by: Xiaoming Ni <nixiaoming@huawei.com>
> >> >
> >> > changes in v2:
> >> > According to Kees Cook's suggestion, completely remove sys_sysctl on all arch
> >> > According to Eric W. Biederman's suggestion, update the commit log
> >> >
> >> > V1: https://lore.kernel.org/lkml/1591683605-8585-1-git-send-email-nixiaoming@huawei.com/
> >> > Delete the code of sys_sysctl and return -ENOSYS directly at the function entry
> >> > ---
> >> > include/uapi/linux/sysctl.h | 15 --
> >> [snip]
> >>
> >> > diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h
> >> > index 27c1ed2..84b44c3 100644
> >> > --- a/include/uapi/linux/sysctl.h
> >> > +++ b/include/uapi/linux/sysctl.h
> >> > @@ -27,21 +27,6 @@
> >> > #include <linux/types.h>
> >> > #include <linux/compiler.h>
> >> >
> >> > -#define CTL_MAXNAME 10 /* how many path components do we allow in a
> >> > - call to sysctl? In other words, what is
> >> > - the largest acceptable value for the nlen
> >> > - member of a struct __sysctl_args to have? */
> >> > -
> >> > -struct __sysctl_args {
> >> > - int __user *name;
> >> > - int nlen;
> >> > - void __user *oldval;
> >> > - size_t __user *oldlenp;
> >> > - void __user *newval;
> >> > - size_t newlen;
> >> > - unsigned long __unused[4];
> >> > -};
> >> > -
> >> > /* Define sysctl names first */
> >> >
> >> > /* Top-level names: */
> >> [snip]
> >>
> >> The uapi header change does not make sense. The entire point of the
> >> header is to allow userspace programs to be able to call sys_sysctl.
> >> It either needs to all stay or all go.
> >>
> >> As the concern with the uapi header is about userspace programs being
> >> able to compile please leave the header for now.
> >>
> >> We should leave auditing userspace and seeing if userspace code will
> >> still compile if we remove this header for a separate patch. The
> >> concerns and justifications for the uapi header are completely different
> >> then for the removing the sys_sysctl implementation.
> >>
> >> Otherwise
> >> Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
> >
> > The UAPI header should be kept because it's defining an API not just
> > for the kernel the headers are supplied with, but for all past
> > kernels. In particular programs needing a failsafe CSPRNG source that
> > works on old kernels may (do) use this as a fallback only if modern
> > syscalls are missing. Removing the syscall is no problem since it
> > won't be used, but if you remove the types/macros from the UAPI
> > headers, they'll have to copy that into their own sources.
>
> May we assume you know of a least one piece of userspace that will fail
> to compile if this header file is removed?
I know at least one piece of software is using SYS_sysctl for a
fallback CSPRNG source. I'm not 100% sure that they're using the
kernel headers; they might have copied it already. I'm also not sure
how many there are.
Regardless, I think the principle stands. There's no need to remove
definitions that are essentially maintenance-free now that the
interface is no longer available in new kernels, and doing so
contributes to the myth that you're supposed to use kernel headers
matching runtime kernel rather than it always being safe to use latest
headers.
Rich
^ permalink raw reply
* Re: [PATCH v2] All arch: remove system call sys_sysctl
From: Rich Felker @ 2020-06-11 16:39 UTC (permalink / raw)
To: Eric W. Biederman
Cc: linux-sh, catalin.marinas, paulus, ak, paulburton, geert,
mattst88, brgerst, acme, cyphar, viro, luto, tglx, surenb, rth,
young.liuyang, linux-parisc, rdunlap, linux-kernel, mcgrof,
linux-fsdevel, akpm, mark.rutland, linux-ia64, linux-xtensa,
jongk, linux, James.Bottomley, jcmvbkbc, linux-s390, ysato,
deller, yzaikin, Xiaoming Ni, mszeredi, gor, linux-alpha,
linux-m68k, linux-arm-kernel, chris, tony.luck, linux-api,
zhouyanjie, minchan, sargun, alexander.shishkin, heiko.carstens,
alex.huangjianhui, will, krzk, borntraeger, vbabka, samitolvanen,
flameeyes, ravi.bangoria, elver, keescook, arnd, bp, christian,
tsbogend, jiri, martin.petersen, yamada.masahiro, oleg,
sudeep.holla, olof, shawnguo, davem, bauerman, fenghua.yu, peterz,
dhowells, hpa, sparclinux, jolsa, svens, x86, linux, mingo,
naveen.n.rao, paulmck, sfr, npiggin, namhyung, dvyukov, axboe,
monstr, haolee.swjtu, linux-mips, ink, linuxppc-dev
In-Reply-To: <87ftb1u8u3.fsf@x220.int.ebiederm.org>
On Thu, Jun 11, 2020 at 06:43:00AM -0500, Eric W. Biederman wrote:
> Xiaoming Ni <nixiaoming@huawei.com> writes:
>
> > Since the commit 61a47c1ad3a4dc ("sysctl: Remove the sysctl system call"),
> > sys_sysctl is actually unavailable: any input can only return an error.
> >
> > We have been warning about people using the sysctl system call for years
> > and believe there are no more users. Even if there are users of this
> > interface if they have not complained or fixed their code by now they
> > probably are not going to, so there is no point in warning them any
> > longer.
> >
> > So completely remove sys_sysctl on all architectures.
>
>
>
> >
> > Signed-off-by: Xiaoming Ni <nixiaoming@huawei.com>
> >
> > changes in v2:
> > According to Kees Cook's suggestion, completely remove sys_sysctl on all arch
> > According to Eric W. Biederman's suggestion, update the commit log
> >
> > V1: https://lore.kernel.org/lkml/1591683605-8585-1-git-send-email-nixiaoming@huawei.com/
> > Delete the code of sys_sysctl and return -ENOSYS directly at the function entry
> > ---
> > include/uapi/linux/sysctl.h | 15 --
> [snip]
>
> > diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h
> > index 27c1ed2..84b44c3 100644
> > --- a/include/uapi/linux/sysctl.h
> > +++ b/include/uapi/linux/sysctl.h
> > @@ -27,21 +27,6 @@
> > #include <linux/types.h>
> > #include <linux/compiler.h>
> >
> > -#define CTL_MAXNAME 10 /* how many path components do we allow in a
> > - call to sysctl? In other words, what is
> > - the largest acceptable value for the nlen
> > - member of a struct __sysctl_args to have? */
> > -
> > -struct __sysctl_args {
> > - int __user *name;
> > - int nlen;
> > - void __user *oldval;
> > - size_t __user *oldlenp;
> > - void __user *newval;
> > - size_t newlen;
> > - unsigned long __unused[4];
> > -};
> > -
> > /* Define sysctl names first */
> >
> > /* Top-level names: */
> [snip]
>
> The uapi header change does not make sense. The entire point of the
> header is to allow userspace programs to be able to call sys_sysctl.
> It either needs to all stay or all go.
>
> As the concern with the uapi header is about userspace programs being
> able to compile please leave the header for now.
>
> We should leave auditing userspace and seeing if userspace code will
> still compile if we remove this header for a separate patch. The
> concerns and justifications for the uapi header are completely different
> then for the removing the sys_sysctl implementation.
>
> Otherwise
> Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
The UAPI header should be kept because it's defining an API not just
for the kernel the headers are supplied with, but for all past
kernels. In particular programs needing a failsafe CSPRNG source that
works on old kernels may (do) use this as a fallback only if modern
syscalls are missing. Removing the syscall is no problem since it
won't be used, but if you remove the types/macros from the UAPI
headers, they'll have to copy that into their own sources.
Rich
^ permalink raw reply
* Re: Linux powerpc new system call instruction and ABI
From: Segher Boessenkool @ 2020-06-11 21:02 UTC (permalink / raw)
To: Nicholas Piggin; +Cc: libc-dev, linux-api, linuxppc-dev, musl
In-Reply-To: <20200611081203.995112-1-npiggin@gmail.com>
Hi!
On Thu, Jun 11, 2020 at 06:12:01PM +1000, Nicholas Piggin wrote:
> Calling convention
> ------------------
> The proposal is for scv 0 to provide the standard Linux system call ABI
> with the following differences from sc convention[1]:
>
> - lr is to be volatile across scv calls. This is necessary because the
> scv instruction clobbers lr. From previous discussion, this should be
> possible to deal with in GCC clobbers and CFI.
>
> - cr1 and cr5-cr7 are volatile. This matches the C ABI and would allow the
> kernel system call exit to avoid restoring the volatile cr registers
> (although we probably still would anyway to avoid information leaks).
>
> - Error handling: The consensus among kernel, glibc, and musl is to move to
> using negative return values in r3 rather than CR0[SO]=1 to indicate error,
> which matches most other architectures, and is closer to a function call.
What about cr0 then? Will it be volatile as well (exactly like for
function calls)?
> Notes
> -----
> - r0,r4-r8 are documented as volatile in the ABI, but the kernel patch as
> submitted currently preserves them. This is to leave room for deciding
> which way to go with these.
The kernel has to set it to *something* that doesn't leak information ;-)
Segher
^ permalink raw reply
* Re: [PATCH v5 2/4] riscv: Introduce CONFIG_RELOCATABLE
From: Alex Ghiti @ 2020-06-11 19:43 UTC (permalink / raw)
To: Jerome Forissier, Michael Ellerman, Benjamin Herrenschmidt,
Paul Mackerras, Paul Walmsley, Palmer Dabbelt, Albert Ou,
Anup Patel, Atish Patra, Zong Li, linux-kernel, linuxppc-dev,
linux-riscv
Cc: Anup Patel
In-Reply-To: <b588dd9e-dff8-3458-0c7d-149e3990bca7@forissier.org>
Hi Jerome,
Le 6/10/20 à 10:10 AM, Jerome Forissier a écrit :
> On 6/7/20 9:59 AM, Alexandre Ghiti wrote:
> [...]
>
>> +config RELOCATABLE
>> + bool
>> + depends on MMU
>> + help
>> + This builds a kernel as a Position Independent Executable (PIE),
>> + which retains all relocation metadata required to relocate the
>> + kernel binary at runtime to a different virtual address than the
>> + address it was linked at.
>> + Since RISCV uses the RELA relocation format, this requires a
>> + relocation pass at runtime even if the kernel is loaded at the
>> + same address it was linked at.
> Is this true? I thought that the GNU linker would write the "proper"
> values by default, contrary to the LLVM linker (ld.lld) which would need
> a special flag: --apply-dynamic-relocs (by default the relocated places
> are set to zero). At least, it is my experience with Aarch64 on a
> different project. So, sorry if I'm talking nonsense here -- I have not
> looked at the details.
>
>
It seems that you're right, at least for aarch64 since they specifically
specify the --no-apply-dynamic-relocs option. I retried to boot without
relocating at runtime, and it fails on riscv. Can this be arch specific ?
Thanks,
Alex
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox