From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org, akpm@linux-foundation.org,
torvalds@linux-foundation.org, stable@vger.kernel.org
Cc: lwn@lwn.net, jslaby@suse.cz,
Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Subject: Re: Linux 4.19.197
Date: Sun, 11 Jul 2021 13:21:12 +0200 [thread overview]
Message-ID: <162600247151114@kroah.com> (raw)
In-Reply-To: <162600247113716@kroah.com>
diff --git a/Makefile b/Makefile
index 63b0bc92a0fa..42073a4c6e2e 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
VERSION = 4
PATCHLEVEL = 19
-SUBLEVEL = 196
+SUBLEVEL = 197
EXTRAVERSION =
NAME = "People's Front"
diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi
index 0c0781a37c5a..7f1fe4a72447 100644
--- a/arch/arm/boot/dts/dra7.dtsi
+++ b/arch/arm/boot/dts/dra7.dtsi
@@ -48,6 +48,7 @@
timer {
compatible = "arm,armv7-timer";
+ status = "disabled"; /* See ARM architected timer wrap erratum i940 */
interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>,
<GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>,
<GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>,
@@ -910,6 +911,8 @@
reg = <0x48032000 0x80>;
interrupts = <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>;
ti,hwmods = "timer2";
+ clock-names = "fck";
+ clocks = <&l4per_clkctrl DRA7_TIMER2_CLKCTRL 24>;
};
timer3: timer@48034000 {
@@ -917,6 +920,10 @@
reg = <0x48034000 0x80>;
interrupts = <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>;
ti,hwmods = "timer3";
+ clock-names = "fck";
+ clocks = <&l4per_clkctrl DRA7_TIMER3_CLKCTRL 24>;
+ assigned-clocks = <&l4per_clkctrl DRA7_TIMER3_CLKCTRL 24>;
+ assigned-clock-parents = <&timer_sys_clk_div>;
};
timer4: timer@48036000 {
@@ -924,6 +931,10 @@
reg = <0x48036000 0x80>;
interrupts = <GIC_SPI 35 IRQ_TYPE_LEVEL_HIGH>;
ti,hwmods = "timer4";
+ clock-names = "fck";
+ clocks = <&l4per_clkctrl DRA7_TIMER4_CLKCTRL 24>;
+ assigned-clocks = <&l4per_clkctrl DRA7_TIMER4_CLKCTRL 24>;
+ assigned-clock-parents = <&timer_sys_clk_div>;
};
timer5: timer@48820000 {
diff --git a/arch/arm/boot/dts/imx6qdl-sabresd.dtsi b/arch/arm/boot/dts/imx6qdl-sabresd.dtsi
index 41384bbd2f60..03357d39870e 100644
--- a/arch/arm/boot/dts/imx6qdl-sabresd.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-sabresd.dtsi
@@ -675,10 +675,6 @@
vin-supply = <&vgen5_reg>;
};
-®_vdd3p0 {
- vin-supply = <&sw2_reg>;
-};
-
®_vdd2p5 {
vin-supply = <&vgen5_reg>;
};
diff --git a/arch/arm/mach-omap1/pm.c b/arch/arm/mach-omap1/pm.c
index 3e1de14805e4..65b91a8379c9 100644
--- a/arch/arm/mach-omap1/pm.c
+++ b/arch/arm/mach-omap1/pm.c
@@ -610,11 +610,6 @@ static irqreturn_t omap_wakeup_interrupt(int irq, void *dev)
return IRQ_HANDLED;
}
-static struct irqaction omap_wakeup_irq = {
- .name = "peripheral wakeup",
- .handler = omap_wakeup_interrupt
-};
-
static const struct platform_suspend_ops omap_pm_ops = {
@@ -627,6 +622,7 @@ static const struct platform_suspend_ops omap_pm_ops = {
static int __init omap_pm_init(void)
{
int error = 0;
+ int irq;
if (!cpu_class_is_omap1())
return -ENODEV;
@@ -670,9 +666,12 @@ static int __init omap_pm_init(void)
arm_pm_idle = omap1_pm_idle;
if (cpu_is_omap7xx())
- setup_irq(INT_7XX_WAKE_UP_REQ, &omap_wakeup_irq);
+ irq = INT_7XX_WAKE_UP_REQ;
else if (cpu_is_omap16xx())
- setup_irq(INT_1610_WAKE_UP_REQ, &omap_wakeup_irq);
+ irq = INT_1610_WAKE_UP_REQ;
+ if (request_irq(irq, omap_wakeup_interrupt, 0, "peripheral wakeup",
+ NULL))
+ pr_err("Failed to request irq %d (peripheral wakeup)\n", irq);
/* Program new power ramp-up time
* (0 for most boards since we don't lower voltage when in deep sleep)
diff --git a/arch/arm/mach-omap1/time.c b/arch/arm/mach-omap1/time.c
index 524977a31a49..de590a85a42b 100644
--- a/arch/arm/mach-omap1/time.c
+++ b/arch/arm/mach-omap1/time.c
@@ -155,15 +155,11 @@ static irqreturn_t omap_mpu_timer1_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
}
-static struct irqaction omap_mpu_timer1_irq = {
- .name = "mpu_timer1",
- .flags = IRQF_TIMER | IRQF_IRQPOLL,
- .handler = omap_mpu_timer1_interrupt,
-};
-
static __init void omap_init_mpu_timer(unsigned long rate)
{
- setup_irq(INT_TIMER1, &omap_mpu_timer1_irq);
+ if (request_irq(INT_TIMER1, omap_mpu_timer1_interrupt,
+ IRQF_TIMER | IRQF_IRQPOLL, "mpu_timer1", NULL))
+ pr_err("Failed to request irq %d (mpu_timer1)\n", INT_TIMER1);
omap_mpu_timer_start(0, (rate / HZ) - 1, 1);
clockevent_mpu_timer1.cpumask = cpumask_of(0);
diff --git a/arch/arm/mach-omap1/timer32k.c b/arch/arm/mach-omap1/timer32k.c
index 0ae6c52a7d70..780fdf03c3ce 100644
--- a/arch/arm/mach-omap1/timer32k.c
+++ b/arch/arm/mach-omap1/timer32k.c
@@ -148,15 +148,11 @@ static irqreturn_t omap_32k_timer_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
}
-static struct irqaction omap_32k_timer_irq = {
- .name = "32KHz timer",
- .flags = IRQF_TIMER | IRQF_IRQPOLL,
- .handler = omap_32k_timer_interrupt,
-};
-
static __init void omap_init_32k_timer(void)
{
- setup_irq(INT_OS_TIMER, &omap_32k_timer_irq);
+ if (request_irq(INT_OS_TIMER, omap_32k_timer_interrupt,
+ IRQF_TIMER | IRQF_IRQPOLL, "32KHz timer", NULL))
+ pr_err("Failed to request irq %d(32KHz timer)\n", INT_OS_TIMER);
clockevent_32k_timer.cpumask = cpumask_of(0);
clockevents_config_and_register(&clockevent_32k_timer,
diff --git a/arch/arm/mach-omap2/board-generic.c b/arch/arm/mach-omap2/board-generic.c
index 6b4f4975cf7a..6e59c11131c4 100644
--- a/arch/arm/mach-omap2/board-generic.c
+++ b/arch/arm/mach-omap2/board-generic.c
@@ -330,7 +330,7 @@ DT_MACHINE_START(DRA74X_DT, "Generic DRA74X (Flattened Device Tree)")
.init_late = dra7xx_init_late,
.init_irq = omap_gic_of_init,
.init_machine = omap_generic_init,
- .init_time = omap5_realtime_timer_init,
+ .init_time = omap3_gptimer_timer_init,
.dt_compat = dra74x_boards_compat,
.restart = omap44xx_restart,
MACHINE_END
@@ -353,7 +353,7 @@ DT_MACHINE_START(DRA72X_DT, "Generic DRA72X (Flattened Device Tree)")
.init_late = dra7xx_init_late,
.init_irq = omap_gic_of_init,
.init_machine = omap_generic_init,
- .init_time = omap5_realtime_timer_init,
+ .init_time = omap3_gptimer_timer_init,
.dt_compat = dra72x_boards_compat,
.restart = omap44xx_restart,
MACHINE_END
diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c
index 98ed5ac073bc..c4ba848e8af6 100644
--- a/arch/arm/mach-omap2/timer.c
+++ b/arch/arm/mach-omap2/timer.c
@@ -42,6 +42,7 @@
#include <linux/platform_device.h>
#include <linux/platform_data/dmtimer-omap.h>
#include <linux/sched_clock.h>
+#include <linux/cpu.h>
#include <asm/mach/time.h>
#include <asm/smp_twd.h>
@@ -64,15 +65,28 @@
/* Clockevent code */
-static struct omap_dm_timer clkev;
-static struct clock_event_device clockevent_gpt;
-
/* Clockevent hwmod for am335x and am437x suspend */
static struct omap_hwmod *clockevent_gpt_hwmod;
/* Clockesource hwmod for am437x suspend */
static struct omap_hwmod *clocksource_gpt_hwmod;
+struct dmtimer_clockevent {
+ struct clock_event_device dev;
+ struct omap_dm_timer timer;
+};
+
+static struct dmtimer_clockevent clockevent;
+
+static struct omap_dm_timer *to_dmtimer(struct clock_event_device *clockevent)
+{
+ struct dmtimer_clockevent *clkevt =
+ container_of(clockevent, struct dmtimer_clockevent, dev);
+ struct omap_dm_timer *timer = &clkevt->timer;
+
+ return timer;
+}
+
#ifdef CONFIG_SOC_HAS_REALTIME_COUNTER
static unsigned long arch_timer_freq;
@@ -84,24 +98,21 @@ void set_cntfreq(void)
static irqreturn_t omap2_gp_timer_interrupt(int irq, void *dev_id)
{
- struct clock_event_device *evt = &clockevent_gpt;
-
- __omap_dm_timer_write_status(&clkev, OMAP_TIMER_INT_OVERFLOW);
+ struct dmtimer_clockevent *clkevt = dev_id;
+ struct clock_event_device *evt = &clkevt->dev;
+ struct omap_dm_timer *timer = &clkevt->timer;
+ __omap_dm_timer_write_status(timer, OMAP_TIMER_INT_OVERFLOW);
evt->event_handler(evt);
return IRQ_HANDLED;
}
-static struct irqaction omap2_gp_timer_irq = {
- .name = "gp_timer",
- .flags = IRQF_TIMER | IRQF_IRQPOLL,
- .handler = omap2_gp_timer_interrupt,
-};
-
static int omap2_gp_timer_set_next_event(unsigned long cycles,
struct clock_event_device *evt)
{
- __omap_dm_timer_load_start(&clkev, OMAP_TIMER_CTRL_ST,
+ struct omap_dm_timer *timer = to_dmtimer(evt);
+
+ __omap_dm_timer_load_start(timer, OMAP_TIMER_CTRL_ST,
0xffffffff - cycles, OMAP_TIMER_POSTED);
return 0;
@@ -109,22 +120,26 @@ static int omap2_gp_timer_set_next_event(unsigned long cycles,
static int omap2_gp_timer_shutdown(struct clock_event_device *evt)
{
- __omap_dm_timer_stop(&clkev, OMAP_TIMER_POSTED, clkev.rate);
+ struct omap_dm_timer *timer = to_dmtimer(evt);
+
+ __omap_dm_timer_stop(timer, OMAP_TIMER_POSTED, timer->rate);
+
return 0;
}
static int omap2_gp_timer_set_periodic(struct clock_event_device *evt)
{
+ struct omap_dm_timer *timer = to_dmtimer(evt);
u32 period;
- __omap_dm_timer_stop(&clkev, OMAP_TIMER_POSTED, clkev.rate);
+ __omap_dm_timer_stop(timer, OMAP_TIMER_POSTED, timer->rate);
- period = clkev.rate / HZ;
+ period = timer->rate / HZ;
period -= 1;
/* Looks like we need to first set the load value separately */
- __omap_dm_timer_write(&clkev, OMAP_TIMER_LOAD_REG, 0xffffffff - period,
+ __omap_dm_timer_write(timer, OMAP_TIMER_LOAD_REG, 0xffffffff - period,
OMAP_TIMER_POSTED);
- __omap_dm_timer_load_start(&clkev,
+ __omap_dm_timer_load_start(timer,
OMAP_TIMER_CTRL_AR | OMAP_TIMER_CTRL_ST,
0xffffffff - period, OMAP_TIMER_POSTED);
return 0;
@@ -138,25 +153,16 @@ static void omap_clkevt_idle(struct clock_event_device *unused)
omap_hwmod_idle(clockevent_gpt_hwmod);
}
-static void omap_clkevt_unidle(struct clock_event_device *unused)
+static void omap_clkevt_unidle(struct clock_event_device *evt)
{
+ struct omap_dm_timer *timer = to_dmtimer(evt);
+
if (!clockevent_gpt_hwmod)
return;
omap_hwmod_enable(clockevent_gpt_hwmod);
- __omap_dm_timer_int_enable(&clkev, OMAP_TIMER_INT_OVERFLOW);
-}
-
-static struct clock_event_device clockevent_gpt = {
- .features = CLOCK_EVT_FEAT_PERIODIC |
- CLOCK_EVT_FEAT_ONESHOT,
- .rating = 300,
- .set_next_event = omap2_gp_timer_set_next_event,
- .set_state_shutdown = omap2_gp_timer_shutdown,
- .set_state_periodic = omap2_gp_timer_set_periodic,
- .set_state_oneshot = omap2_gp_timer_shutdown,
- .tick_resume = omap2_gp_timer_shutdown,
-};
+ __omap_dm_timer_int_enable(timer, OMAP_TIMER_INT_OVERFLOW);
+}
static const struct of_device_id omap_timer_match[] __initconst = {
{ .compatible = "ti,omap2420-timer", },
@@ -363,47 +369,104 @@ void tick_broadcast(const struct cpumask *mask)
}
#endif
-static void __init omap2_gp_clockevent_init(int gptimer_id,
- const char *fck_source,
- const char *property)
+static void __init dmtimer_clkevt_init_common(struct dmtimer_clockevent *clkevt,
+ int gptimer_id,
+ const char *fck_source,
+ unsigned int features,
+ const struct cpumask *cpumask,
+ const char *property,
+ int rating, const char *name)
{
+ struct omap_dm_timer *timer = &clkevt->timer;
int res;
- clkev.id = gptimer_id;
- clkev.errata = omap_dm_timer_get_errata();
+ timer->id = gptimer_id;
+ timer->errata = omap_dm_timer_get_errata();
+ clkevt->dev.features = features;
+ clkevt->dev.rating = rating;
+ clkevt->dev.set_next_event = omap2_gp_timer_set_next_event;
+ clkevt->dev.set_state_shutdown = omap2_gp_timer_shutdown;
+ clkevt->dev.set_state_periodic = omap2_gp_timer_set_periodic;
+ clkevt->dev.set_state_oneshot = omap2_gp_timer_shutdown;
+ clkevt->dev.tick_resume = omap2_gp_timer_shutdown;
/*
* For clock-event timers we never read the timer counter and
* so we are not impacted by errata i103 and i767. Therefore,
* we can safely ignore this errata for clock-event timers.
*/
- __omap_dm_timer_override_errata(&clkev, OMAP_TIMER_ERRATA_I103_I767);
+ __omap_dm_timer_override_errata(timer, OMAP_TIMER_ERRATA_I103_I767);
- res = omap_dm_timer_init_one(&clkev, fck_source, property,
- &clockevent_gpt.name, OMAP_TIMER_POSTED);
+ res = omap_dm_timer_init_one(timer, fck_source, property,
+ &clkevt->dev.name, OMAP_TIMER_POSTED);
BUG_ON(res);
- omap2_gp_timer_irq.dev_id = &clkev;
- setup_irq(clkev.irq, &omap2_gp_timer_irq);
+ clkevt->dev.cpumask = cpumask;
+ clkevt->dev.irq = omap_dm_timer_get_irq(timer);
- __omap_dm_timer_int_enable(&clkev, OMAP_TIMER_INT_OVERFLOW);
+ if (request_irq(clkevt->dev.irq, omap2_gp_timer_interrupt,
+ IRQF_TIMER | IRQF_IRQPOLL, name, clkevt))
+ pr_err("Failed to request irq %d (gp_timer)\n", clkevt->dev.irq);
- clockevent_gpt.cpumask = cpu_possible_mask;
- clockevent_gpt.irq = omap_dm_timer_get_irq(&clkev);
- clockevents_config_and_register(&clockevent_gpt, clkev.rate,
- 3, /* Timer internal resynch latency */
- 0xffffffff);
+ __omap_dm_timer_int_enable(timer, OMAP_TIMER_INT_OVERFLOW);
if (soc_is_am33xx() || soc_is_am43xx()) {
- clockevent_gpt.suspend = omap_clkevt_idle;
- clockevent_gpt.resume = omap_clkevt_unidle;
+ clkevt->dev.suspend = omap_clkevt_idle;
+ clkevt->dev.resume = omap_clkevt_unidle;
clockevent_gpt_hwmod =
- omap_hwmod_lookup(clockevent_gpt.name);
+ omap_hwmod_lookup(clkevt->dev.name);
+ }
+
+ pr_info("OMAP clockevent source: %s at %lu Hz\n", clkevt->dev.name,
+ timer->rate);
+}
+
+static DEFINE_PER_CPU(struct dmtimer_clockevent, dmtimer_percpu_timer);
+
+static int omap_gptimer_starting_cpu(unsigned int cpu)
+{
+ struct dmtimer_clockevent *clkevt = per_cpu_ptr(&dmtimer_percpu_timer, cpu);
+ struct clock_event_device *dev = &clkevt->dev;
+ struct omap_dm_timer *timer = &clkevt->timer;
+
+ clockevents_config_and_register(dev, timer->rate, 3, ULONG_MAX);
+ irq_force_affinity(dev->irq, cpumask_of(cpu));
+
+ return 0;
+}
+
+static int __init dmtimer_percpu_quirk_init(void)
+{
+ struct dmtimer_clockevent *clkevt;
+ struct clock_event_device *dev;
+ struct device_node *arm_timer;
+ struct omap_dm_timer *timer;
+ int cpu = 0;
+
+ arm_timer = of_find_compatible_node(NULL, NULL, "arm,armv7-timer");
+ if (of_device_is_available(arm_timer)) {
+ pr_warn_once("ARM architected timer wrap issue i940 detected\n");
+ return 0;
+ }
+
+ for_each_possible_cpu(cpu) {
+ clkevt = per_cpu_ptr(&dmtimer_percpu_timer, cpu);
+ dev = &clkevt->dev;
+ timer = &clkevt->timer;
+
+ dmtimer_clkevt_init_common(clkevt, 0, "timer_sys_ck",
+ CLOCK_EVT_FEAT_ONESHOT,
+ cpumask_of(cpu),
+ "assigned-clock-parents",
+ 500, "percpu timer");
}
- pr_info("OMAP clockevent source: %s at %lu Hz\n", clockevent_gpt.name,
- clkev.rate);
+ cpuhp_setup_state(CPUHP_AP_OMAP_DM_TIMER_STARTING,
+ "clockevents/omap/gptimer:starting",
+ omap_gptimer_starting_cpu, NULL);
+
+ return 0;
}
/* Clocksource code */
@@ -543,7 +606,15 @@ static void __init __omap_sync32k_timer_init(int clkev_nr, const char *clkev_src
{
omap_clk_init();
omap_dmtimer_init();
- omap2_gp_clockevent_init(clkev_nr, clkev_src, clkev_prop);
+ dmtimer_clkevt_init_common(&clockevent, clkev_nr, clkev_src,
+ CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
+ cpu_possible_mask, clkev_prop, 300, "clockevent");
+ clockevents_config_and_register(&clockevent.dev, clockevent.timer.rate,
+ 3, /* Timer internal resynch latency */
+ 0xffffffff);
+
+ if (soc_is_dra7xx())
+ dmtimer_percpu_quirk_init();
/* Enable the use of clocksource="gp_timer" kernel parameter */
if (use_gptimer_clksrc || gptimer)
@@ -572,7 +643,7 @@ void __init omap3_secure_sync32k_timer_init(void)
#endif /* CONFIG_ARCH_OMAP3 */
#if defined(CONFIG_ARCH_OMAP3) || defined(CONFIG_SOC_AM33XX) || \
- defined(CONFIG_SOC_AM43XX)
+ defined(CONFIG_SOC_AM43XX) || defined(CONFIG_SOC_DRA7XX)
void __init omap3_gptimer_timer_init(void)
{
__omap_sync32k_timer_init(2, "timer_sys_ck", NULL,
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index ad24e6777277..bd463d684237 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1791,9 +1791,25 @@ static void sev_asid_free(struct kvm *kvm)
__sev_asid_free(sev->asid);
}
-static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
+static void sev_decommission(unsigned int handle)
{
struct sev_data_decommission *decommission;
+
+ if (!handle)
+ return;
+
+ decommission = kzalloc(sizeof(*decommission), GFP_KERNEL);
+ if (!decommission)
+ return;
+
+ decommission->handle = handle;
+ sev_guest_decommission(decommission, NULL);
+
+ kfree(decommission);
+}
+
+static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
+{
struct sev_data_deactivate *data;
if (!handle)
@@ -1811,15 +1827,7 @@ static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
sev_guest_df_flush(NULL);
kfree(data);
- decommission = kzalloc(sizeof(*decommission), GFP_KERNEL);
- if (!decommission)
- return;
-
- /* decommission handle */
- decommission->handle = handle;
- sev_guest_decommission(decommission, NULL);
-
- kfree(decommission);
+ sev_decommission(handle);
}
static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
@@ -1954,6 +1962,7 @@ static void sev_vm_destroy(struct kvm *kvm)
list_for_each_safe(pos, q, head) {
__unregister_enc_region_locked(kvm,
list_entry(pos, struct enc_region, list));
+ cond_resched();
}
}
@@ -6468,8 +6477,10 @@ static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
/* Bind ASID to this guest */
ret = sev_bind_asid(kvm, start->handle, error);
- if (ret)
+ if (ret) {
+ sev_decommission(start->handle);
goto e_free_session;
+ }
/* return handle to userspace */
params.handle = start->handle;
diff --git a/drivers/clk/ti/clk-7xx.c b/drivers/clk/ti/clk-7xx.c
index 71a122b2dc67..b6d1ec49fa01 100644
--- a/drivers/clk/ti/clk-7xx.c
+++ b/drivers/clk/ti/clk-7xx.c
@@ -733,6 +733,7 @@ const struct omap_clkctrl_data dra7_clkctrl_data[] __initconst = {
static struct ti_dt_clk dra7xx_clks[] = {
DT_CLK(NULL, "timer_32k_ck", "sys_32k_ck"),
DT_CLK(NULL, "sys_clkin_ck", "timer_sys_clk_div"),
+ DT_CLK(NULL, "timer_sys_ck", "timer_sys_clk_div"),
DT_CLK(NULL, "sys_clkin", "sys_clkin1"),
DT_CLK(NULL, "atl_dpll_clk_mux", "atl_cm:0000:24"),
DT_CLK(NULL, "atl_gfclk_mux", "atl_cm:0000:26"),
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 7214022dfb91..d230536e7086 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -512,7 +512,7 @@ nouveau_bo_sync_for_device(struct nouveau_bo *nvbo)
struct ttm_dma_tt *ttm_dma = (struct ttm_dma_tt *)nvbo->bo.ttm;
int i;
- if (!ttm_dma)
+ if (!ttm_dma || !ttm_dma->dma_address)
return;
/* Don't waste time looping if the object is coherent */
@@ -532,7 +532,7 @@ nouveau_bo_sync_for_cpu(struct nouveau_bo *nvbo)
struct ttm_dma_tt *ttm_dma = (struct ttm_dma_tt *)nvbo->bo.ttm;
int i;
- if (!ttm_dma)
+ if (!ttm_dma || !ttm_dma->dma_address)
return;
/* Don't waste time looping if the object is coherent */
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 45c8bf39ad23..acf0c244141f 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -216,6 +216,8 @@ static unsigned int sr_get_events(struct scsi_device *sdev)
return DISK_EVENT_EJECT_REQUEST;
else if (med->media_event_code == 2)
return DISK_EVENT_MEDIA_CHANGE;
+ else if (med->media_event_code == 3)
+ return DISK_EVENT_EJECT_REQUEST;
return 0;
}
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index b370144682ed..a2f8130e18fe 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -524,6 +524,9 @@ static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious)
}
info->eoi_time = 0;
+
+ /* is_active hasn't been reset yet, do it now. */
+ smp_store_release(&info->is_active, 0);
do_unmask(info, EVT_MASK_REASON_EOI_PENDING);
}
@@ -1780,10 +1783,22 @@ static void lateeoi_ack_dynirq(struct irq_data *data)
struct irq_info *info = info_for_irq(data->irq);
evtchn_port_t evtchn = info ? info->evtchn : 0;
- if (VALID_EVTCHN(evtchn)) {
- do_mask(info, EVT_MASK_REASON_EOI_PENDING);
- ack_dynirq(data);
- }
+ if (!VALID_EVTCHN(evtchn))
+ return;
+
+ do_mask(info, EVT_MASK_REASON_EOI_PENDING);
+
+ if (unlikely(irqd_is_setaffinity_pending(data)) &&
+ likely(!irqd_irq_disabled(data))) {
+ do_mask(info, EVT_MASK_REASON_TEMPORARY);
+
+ clear_evtchn(evtchn);
+
+ irq_move_masked_irq(data);
+
+ do_unmask(info, EVT_MASK_REASON_TEMPORARY);
+ } else
+ clear_evtchn(evtchn);
}
static void lateeoi_mask_ack_dynirq(struct irq_data *data)
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 1ea8fc9ff048..1bc65ecd4bd6 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -171,8 +171,10 @@ static int ext4_data_block_valid_rcu(struct ext4_sb_info *sbi,
else if (start_blk >= (entry->start_blk + entry->count))
n = n->rb_right;
else {
+ if (entry->ino == ino)
+ return 1;
sbi->s_es->s_last_error_block = cpu_to_le64(start_blk);
- return entry->ino == ino;
+ return 0;
}
}
return 1;
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index d67c0035165c..3d323c6c8526 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -118,6 +118,7 @@ enum cpuhp_state {
CPUHP_AP_ARM_L2X0_STARTING,
CPUHP_AP_EXYNOS4_MCT_TIMER_STARTING,
CPUHP_AP_ARM_ARCH_TIMER_STARTING,
+ CPUHP_AP_OMAP_DM_TIMER_STARTING,
CPUHP_AP_ARM_GLOBAL_TIMER_STARTING,
CPUHP_AP_JCORE_TIMER_STARTING,
CPUHP_AP_ARM_TWD_STARTING,
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index e375f2249f52..becf9b1eae5a 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -224,6 +224,7 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
extern vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd);
extern struct page *huge_zero_page;
+extern unsigned long huge_zero_pfn;
static inline bool is_huge_zero_page(struct page *page)
{
@@ -232,7 +233,7 @@ static inline bool is_huge_zero_page(struct page *page)
static inline bool is_huge_zero_pmd(pmd_t pmd)
{
- return is_huge_zero_page(pmd_page(pmd));
+ return READ_ONCE(huge_zero_pfn) == pmd_pfn(pmd) && pmd_present(pmd);
}
static inline bool is_huge_zero_pud(pud_t pud)
@@ -342,6 +343,11 @@ static inline bool is_huge_zero_page(struct page *page)
return false;
}
+static inline bool is_huge_zero_pmd(pmd_t pmd)
+{
+ return false;
+}
+
static inline bool is_huge_zero_pud(pud_t pud)
{
return false;
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index c129c1c14c5f..2df83a659818 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -477,17 +477,6 @@ static inline int hstate_index(struct hstate *h)
return h - hstates;
}
-pgoff_t __basepage_index(struct page *page);
-
-/* Return page->index in PAGE_SIZE units */
-static inline pgoff_t basepage_index(struct page *page)
-{
- if (!PageCompound(page))
- return page->index;
-
- return __basepage_index(page);
-}
-
extern int dissolve_free_huge_page(struct page *page);
extern int dissolve_free_huge_pages(unsigned long start_pfn,
unsigned long end_pfn);
@@ -582,11 +571,6 @@ static inline int hstate_index(struct hstate *h)
return 0;
}
-static inline pgoff_t basepage_index(struct page *page)
-{
- return page->index;
-}
-
static inline int dissolve_free_huge_page(struct page *page)
{
return 0;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index f6ecf41aea83..c736c677b876 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1338,6 +1338,7 @@ struct zap_details {
struct address_space *check_mapping; /* Check page->mapping if set */
pgoff_t first_index; /* Lowest page->index to unmap */
pgoff_t last_index; /* Highest page->index to unmap */
+ struct page *single_page; /* Locked page to be unmapped */
};
struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
@@ -1428,6 +1429,7 @@ extern vm_fault_t handle_mm_fault(struct vm_area_struct *vma,
extern int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
unsigned long address, unsigned int fault_flags,
bool *unlocked);
+void unmap_mapping_page(struct page *page);
void unmap_mapping_pages(struct address_space *mapping,
pgoff_t start, pgoff_t nr, bool even_cows);
void unmap_mapping_range(struct address_space *mapping,
@@ -1448,6 +1450,7 @@ static inline int fixup_user_fault(struct task_struct *tsk,
BUG();
return -EFAULT;
}
+static inline void unmap_mapping_page(struct page *page) { }
static inline void unmap_mapping_pages(struct address_space *mapping,
pgoff_t start, pgoff_t nr, bool even_cows) { }
static inline void unmap_mapping_range(struct address_space *mapping,
diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index 2ad72d2c8cc5..5d0767cb424a 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -37,6 +37,18 @@ void dump_mm(const struct mm_struct *mm);
BUG(); \
} \
} while (0)
+#define VM_WARN_ON_ONCE_PAGE(cond, page) ({ \
+ static bool __section(".data.once") __warned; \
+ int __ret_warn_once = !!(cond); \
+ \
+ if (unlikely(__ret_warn_once && !__warned)) { \
+ dump_page(page, "VM_WARN_ON_ONCE_PAGE(" __stringify(cond)")");\
+ __warned = true; \
+ WARN_ON(1); \
+ } \
+ unlikely(__ret_warn_once); \
+})
+
#define VM_WARN_ON(cond) (void)WARN_ON(cond)
#define VM_WARN_ON_ONCE(cond) (void)WARN_ON_ONCE(cond)
#define VM_WARN_ONCE(cond, format...) (void)WARN_ONCE(cond, format)
@@ -48,6 +60,7 @@ void dump_mm(const struct mm_struct *mm);
#define VM_BUG_ON_MM(cond, mm) VM_BUG_ON(cond)
#define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond)
#define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond)
+#define VM_WARN_ON_ONCE_PAGE(cond, page) BUILD_BUG_ON_INVALID(cond)
#define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
#define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond)
#endif
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index b1bd2186e6d2..33b63b2a163f 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -403,7 +403,7 @@ static inline struct page *read_mapping_page(struct address_space *mapping,
}
/*
- * Get index of the page with in radix-tree
+ * Get index of the page within radix-tree (but not for hugetlb pages).
* (TODO: remove once hugetlb pages will have ->index in PAGE_SIZE)
*/
static inline pgoff_t page_to_index(struct page *page)
@@ -422,15 +422,16 @@ static inline pgoff_t page_to_index(struct page *page)
return pgoff;
}
+extern pgoff_t hugetlb_basepage_index(struct page *page);
+
/*
- * Get the offset in PAGE_SIZE.
- * (TODO: hugepage should have ->index in PAGE_SIZE)
+ * Get the offset in PAGE_SIZE (even for hugetlb pages).
+ * (TODO: hugetlb pages should have ->index in PAGE_SIZE)
*/
static inline pgoff_t page_to_pgoff(struct page *page)
{
- if (unlikely(PageHeadHuge(page)))
- return page->index << compound_order(page);
-
+ if (unlikely(PageHuge(page)))
+ return hugetlb_basepage_index(page);
return page_to_index(page);
}
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index d7d6d4eb1794..91ccae946716 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -98,7 +98,8 @@ enum ttu_flags {
* do a final flush if necessary */
TTU_RMAP_LOCKED = 0x80, /* do not grab rmap lock:
* caller holds it */
- TTU_SPLIT_FREEZE = 0x100, /* freeze pte under splitting thp */
+ TTU_SPLIT_FREEZE = 0x100, /* freeze pte under splitting thp */
+ TTU_SYNC = 0x200, /* avoid racy checks with PVMW_SYNC */
};
#ifdef CONFIG_MMU
diff --git a/kernel/futex.c b/kernel/futex.c
index 526ebcff5a0a..3c67da9b8408 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -719,7 +719,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
key->both.offset |= FUT_OFF_INODE; /* inode-based key */
key->shared.i_seq = get_inode_sequence_number(inode);
- key->shared.pgoff = basepage_index(tail);
+ key->shared.pgoff = page_to_pgoff(tail);
rcu_read_unlock();
}
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 81abfac35127..9750f4f7f901 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -1010,8 +1010,38 @@ void kthread_flush_work(struct kthread_work *work)
EXPORT_SYMBOL_GPL(kthread_flush_work);
/*
- * This function removes the work from the worker queue. Also it makes sure
- * that it won't get queued later via the delayed work's timer.
+ * Make sure that the timer is neither set nor running and could
+ * not manipulate the work list_head any longer.
+ *
+ * The function is called under worker->lock. The lock is temporary
+ * released but the timer can't be set again in the meantime.
+ */
+static void kthread_cancel_delayed_work_timer(struct kthread_work *work,
+ unsigned long *flags)
+{
+ struct kthread_delayed_work *dwork =
+ container_of(work, struct kthread_delayed_work, work);
+ struct kthread_worker *worker = work->worker;
+
+ /*
+ * del_timer_sync() must be called to make sure that the timer
+ * callback is not running. The lock must be temporary released
+ * to avoid a deadlock with the callback. In the meantime,
+ * any queuing is blocked by setting the canceling counter.
+ */
+ work->canceling++;
+ spin_unlock_irqrestore(&worker->lock, *flags);
+ del_timer_sync(&dwork->timer);
+ spin_lock_irqsave(&worker->lock, *flags);
+ work->canceling--;
+}
+
+/*
+ * This function removes the work from the worker queue.
+ *
+ * It is called under worker->lock. The caller must make sure that
+ * the timer used by delayed work is not running, e.g. by calling
+ * kthread_cancel_delayed_work_timer().
*
* The work might still be in use when this function finishes. See the
* current_work proceed by the worker.
@@ -1019,28 +1049,8 @@ EXPORT_SYMBOL_GPL(kthread_flush_work);
* Return: %true if @work was pending and successfully canceled,
* %false if @work was not pending
*/
-static bool __kthread_cancel_work(struct kthread_work *work, bool is_dwork,
- unsigned long *flags)
+static bool __kthread_cancel_work(struct kthread_work *work)
{
- /* Try to cancel the timer if exists. */
- if (is_dwork) {
- struct kthread_delayed_work *dwork =
- container_of(work, struct kthread_delayed_work, work);
- struct kthread_worker *worker = work->worker;
-
- /*
- * del_timer_sync() must be called to make sure that the timer
- * callback is not running. The lock must be temporary released
- * to avoid a deadlock with the callback. In the meantime,
- * any queuing is blocked by setting the canceling counter.
- */
- work->canceling++;
- spin_unlock_irqrestore(&worker->lock, *flags);
- del_timer_sync(&dwork->timer);
- spin_lock_irqsave(&worker->lock, *flags);
- work->canceling--;
- }
-
/*
* Try to remove the work from a worker list. It might either
* be from worker->work_list or from worker->delayed_work_list.
@@ -1093,11 +1103,23 @@ bool kthread_mod_delayed_work(struct kthread_worker *worker,
/* Work must not be used with >1 worker, see kthread_queue_work() */
WARN_ON_ONCE(work->worker != worker);
- /* Do not fight with another command that is canceling this work. */
+ /*
+ * Temporary cancel the work but do not fight with another command
+ * that is canceling the work as well.
+ *
+ * It is a bit tricky because of possible races with another
+ * mod_delayed_work() and cancel_delayed_work() callers.
+ *
+ * The timer must be canceled first because worker->lock is released
+ * when doing so. But the work can be removed from the queue (list)
+ * only when it can be queued again so that the return value can
+ * be used for reference counting.
+ */
+ kthread_cancel_delayed_work_timer(work, &flags);
if (work->canceling)
goto out;
+ ret = __kthread_cancel_work(work);
- ret = __kthread_cancel_work(work, true, &flags);
fast_queue:
__kthread_queue_delayed_work(worker, dwork, delay);
out:
@@ -1119,7 +1141,10 @@ static bool __kthread_cancel_work_sync(struct kthread_work *work, bool is_dwork)
/* Work must not be used with >1 worker, see kthread_queue_work(). */
WARN_ON_ONCE(work->worker != worker);
- ret = __kthread_cancel_work(work, is_dwork, &flags);
+ if (is_dwork)
+ kthread_cancel_delayed_work_timer(work, &flags);
+
+ ret = __kthread_cancel_work(work);
if (worker->current_work != work)
goto out_fast;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 7c374c0fcf0c..4400957d8e4e 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -62,6 +62,7 @@ static struct shrinker deferred_split_shrinker;
static atomic_t huge_zero_refcount;
struct page *huge_zero_page __read_mostly;
+unsigned long huge_zero_pfn __read_mostly = ~0UL;
bool transparent_hugepage_enabled(struct vm_area_struct *vma)
{
@@ -93,6 +94,7 @@ static struct page *get_huge_zero_page(void)
__free_pages(zero_page, compound_order(zero_page));
goto retry;
}
+ WRITE_ONCE(huge_zero_pfn, page_to_pfn(zero_page));
/* We take additional reference here. It will be put back by shrinker */
atomic_set(&huge_zero_refcount, 2);
@@ -142,6 +144,7 @@ static unsigned long shrink_huge_zero_page_scan(struct shrinker *shrink,
if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) {
struct page *zero_page = xchg(&huge_zero_page, NULL);
BUG_ON(zero_page == NULL);
+ WRITE_ONCE(huge_zero_pfn, ~0UL);
__free_pages(zero_page, compound_order(zero_page));
return HPAGE_PMD_NR;
}
@@ -2125,7 +2128,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
count_vm_event(THP_SPLIT_PMD);
if (!vma_is_anonymous(vma)) {
- _pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd);
+ old_pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd);
/*
* We are going to unmap this huge page. So
* just go ahead and zap it
@@ -2134,16 +2137,25 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
zap_deposited_table(mm, pmd);
if (vma_is_dax(vma))
return;
- page = pmd_page(_pmd);
- if (!PageDirty(page) && pmd_dirty(_pmd))
- set_page_dirty(page);
- if (!PageReferenced(page) && pmd_young(_pmd))
- SetPageReferenced(page);
- page_remove_rmap(page, true);
- put_page(page);
+ if (unlikely(is_pmd_migration_entry(old_pmd))) {
+ swp_entry_t entry;
+
+ entry = pmd_to_swp_entry(old_pmd);
+ page = migration_entry_to_page(entry);
+ } else {
+ page = pmd_page(old_pmd);
+ if (!PageDirty(page) && pmd_dirty(old_pmd))
+ set_page_dirty(page);
+ if (!PageReferenced(page) && pmd_young(old_pmd))
+ SetPageReferenced(page);
+ page_remove_rmap(page, true);
+ put_page(page);
+ }
add_mm_counter(mm, mm_counter_file(page), -HPAGE_PMD_NR);
return;
- } else if (pmd_trans_huge(*pmd) && is_huge_zero_pmd(*pmd)) {
+ }
+
+ if (is_huge_zero_pmd(*pmd)) {
/*
* FIXME: Do we want to invalidate secondary mmu by calling
* mmu_notifier_invalidate_range() see comments below inside
@@ -2418,16 +2430,16 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
static void unmap_page(struct page *page)
{
enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS |
- TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD;
- bool unmap_success;
+ TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD | TTU_SYNC;
VM_BUG_ON_PAGE(!PageHead(page), page);
if (PageAnon(page))
ttu_flags |= TTU_SPLIT_FREEZE;
- unmap_success = try_to_unmap(page, ttu_flags);
- VM_BUG_ON_PAGE(!unmap_success, page);
+ try_to_unmap(page, ttu_flags);
+
+ VM_WARN_ON_ONCE_PAGE(page_mapped(page), page);
}
static void remap_page(struct page *page)
@@ -2686,7 +2698,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
struct pglist_data *pgdata = NODE_DATA(page_to_nid(head));
struct anon_vma *anon_vma = NULL;
struct address_space *mapping = NULL;
- int count, mapcount, extra_pins, ret;
+ int extra_pins, ret;
bool mlocked;
unsigned long flags;
pgoff_t end;
@@ -2748,7 +2760,6 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
mlocked = PageMlocked(page);
unmap_page(head);
- VM_BUG_ON_PAGE(compound_mapcount(head), head);
/* Make sure the page is not on per-CPU pagevec as it takes pin */
if (mlocked)
@@ -2774,9 +2785,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
/* Prevent deferred_split_scan() touching ->_refcount */
spin_lock(&pgdata->split_queue_lock);
- count = page_count(head);
- mapcount = total_mapcount(head);
- if (!mapcount && page_ref_freeze(head, 1 + extra_pins)) {
+ if (page_ref_freeze(head, 1 + extra_pins)) {
if (!list_empty(page_deferred_list(head))) {
pgdata->split_queue_len--;
list_del(page_deferred_list(head));
@@ -2792,16 +2801,9 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
} else
ret = 0;
} else {
- if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) {
- pr_alert("total_mapcount: %u, page_count(): %u\n",
- mapcount, count);
- if (PageTail(page))
- dump_page(head, NULL);
- dump_page(page, "total_mapcount(head) > 0");
- BUG();
- }
spin_unlock(&pgdata->split_queue_lock);
-fail: if (mapping)
+fail:
+ if (mapping)
xa_unlock(&mapping->i_pages);
spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);
remap_page(head);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index c69f12e4c149..ebcf26bc4cd4 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1391,15 +1391,12 @@ int PageHeadHuge(struct page *page_head)
return get_compound_page_dtor(page_head) == free_huge_page;
}
-pgoff_t __basepage_index(struct page *page)
+pgoff_t hugetlb_basepage_index(struct page *page)
{
struct page *page_head = compound_head(page);
pgoff_t index = page_index(page_head);
unsigned long compound_idx;
- if (!PageHuge(page_head))
- return page_index(page);
-
if (compound_order(page_head) >= MAX_ORDER)
compound_idx = page_to_pfn(page) - page_to_pfn(page_head);
else
diff --git a/mm/internal.h b/mm/internal.h
index 397183c8fe47..3a2e973138d3 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -331,27 +331,52 @@ static inline void mlock_migrate_page(struct page *newpage, struct page *page)
extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
/*
- * At what user virtual address is page expected in @vma?
+ * At what user virtual address is page expected in vma?
+ * Returns -EFAULT if all of the page is outside the range of vma.
+ * If page is a compound head, the entire compound page is considered.
*/
static inline unsigned long
-__vma_address(struct page *page, struct vm_area_struct *vma)
+vma_address(struct page *page, struct vm_area_struct *vma)
{
- pgoff_t pgoff = page_to_pgoff(page);
- return vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
+ pgoff_t pgoff;
+ unsigned long address;
+
+ VM_BUG_ON_PAGE(PageKsm(page), page); /* KSM page->index unusable */
+ pgoff = page_to_pgoff(page);
+ if (pgoff >= vma->vm_pgoff) {
+ address = vma->vm_start +
+ ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
+ /* Check for address beyond vma (or wrapped through 0?) */
+ if (address < vma->vm_start || address >= vma->vm_end)
+ address = -EFAULT;
+ } else if (PageHead(page) &&
+ pgoff + (1UL << compound_order(page)) - 1 >= vma->vm_pgoff) {
+ /* Test above avoids possibility of wrap to 0 on 32-bit */
+ address = vma->vm_start;
+ } else {
+ address = -EFAULT;
+ }
+ return address;
}
+/*
+ * Then at what user virtual address will none of the page be found in vma?
+ * Assumes that vma_address() already returned a good starting address.
+ * If page is a compound head, the entire compound page is considered.
+ */
static inline unsigned long
-vma_address(struct page *page, struct vm_area_struct *vma)
+vma_address_end(struct page *page, struct vm_area_struct *vma)
{
- unsigned long start, end;
-
- start = __vma_address(page, vma);
- end = start + PAGE_SIZE * (hpage_nr_pages(page) - 1);
-
- /* page should be within @vma mapping range */
- VM_BUG_ON_VMA(end < vma->vm_start || start >= vma->vm_end, vma);
-
- return max(start, vma->vm_start);
+ pgoff_t pgoff;
+ unsigned long address;
+
+ VM_BUG_ON_PAGE(PageKsm(page), page); /* KSM page->index unusable */
+ pgoff = page_to_pgoff(page) + (1UL << compound_order(page));
+ address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
+ /* Check for address beyond vma (or wrapped through 0?) */
+ if (address < vma->vm_start || address > vma->vm_end)
+ address = vma->vm_end;
+ return address;
}
#else /* !CONFIG_MMU */
diff --git a/mm/memory.c b/mm/memory.c
index c2011c51f15d..49b546cdce0d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1439,7 +1439,18 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
else if (zap_huge_pmd(tlb, vma, pmd, addr))
goto next;
/* fall through */
+ } else if (details && details->single_page &&
+ PageTransCompound(details->single_page) &&
+ next - addr == HPAGE_PMD_SIZE && pmd_none(*pmd)) {
+ spinlock_t *ptl = pmd_lock(tlb->mm, pmd);
+ /*
+ * Take and drop THP pmd lock so that we cannot return
+ * prematurely, while zap_huge_pmd() has cleared *pmd,
+ * but not yet decremented compound_mapcount().
+ */
+ spin_unlock(ptl);
}
+
/*
* Here there can be other concurrent MADV_DONTNEED or
* trans huge page faults running, and if the pmd is
@@ -2924,6 +2935,36 @@ static inline void unmap_mapping_range_tree(struct rb_root_cached *root,
}
}
+/**
+ * unmap_mapping_page() - Unmap single page from processes.
+ * @page: The locked page to be unmapped.
+ *
+ * Unmap this page from any userspace process which still has it mmaped.
+ * Typically, for efficiency, the range of nearby pages has already been
+ * unmapped by unmap_mapping_pages() or unmap_mapping_range(). But once
+ * truncation or invalidation holds the lock on a page, it may find that
+ * the page has been remapped again: and then uses unmap_mapping_page()
+ * to unmap it finally.
+ */
+void unmap_mapping_page(struct page *page)
+{
+ struct address_space *mapping = page->mapping;
+ struct zap_details details = { };
+
+ VM_BUG_ON(!PageLocked(page));
+ VM_BUG_ON(PageTail(page));
+
+ details.check_mapping = mapping;
+ details.first_index = page->index;
+ details.last_index = page->index + hpage_nr_pages(page) - 1;
+ details.single_page = page;
+
+ i_mmap_lock_write(mapping);
+ if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
+ unmap_mapping_range_tree(&mapping->i_mmap, &details);
+ i_mmap_unlock_write(mapping);
+}
+
/**
* unmap_mapping_pages() - Unmap pages from processes.
* @mapping: The address space containing pages to be unmapped.
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index 11df03e71288..edca78609318 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -111,6 +111,13 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw)
return pfn_in_hpage(pvmw->page, pfn);
}
+static void step_forward(struct page_vma_mapped_walk *pvmw, unsigned long size)
+{
+ pvmw->address = (pvmw->address + size) & ~(size - 1);
+ if (!pvmw->address)
+ pvmw->address = ULONG_MAX;
+}
+
/**
* page_vma_mapped_walk - check if @pvmw->page is mapped in @pvmw->vma at
* @pvmw->address
@@ -139,6 +146,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
{
struct mm_struct *mm = pvmw->vma->vm_mm;
struct page *page = pvmw->page;
+ unsigned long end;
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
@@ -148,10 +156,11 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
if (pvmw->pmd && !pvmw->pte)
return not_found(pvmw);
- if (pvmw->pte)
- goto next_pte;
+ if (unlikely(PageHuge(page))) {
+ /* The only possible mapping was handled on last iteration */
+ if (pvmw->pte)
+ return not_found(pvmw);
- if (unlikely(PageHuge(pvmw->page))) {
/* when pud is not present, pte will be NULL */
pvmw->pte = huge_pte_offset(mm, pvmw->address,
PAGE_SIZE << compound_order(page));
@@ -164,78 +173,108 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
return not_found(pvmw);
return true;
}
-restart:
- pgd = pgd_offset(mm, pvmw->address);
- if (!pgd_present(*pgd))
- return false;
- p4d = p4d_offset(pgd, pvmw->address);
- if (!p4d_present(*p4d))
- return false;
- pud = pud_offset(p4d, pvmw->address);
- if (!pud_present(*pud))
- return false;
- pvmw->pmd = pmd_offset(pud, pvmw->address);
+
/*
- * Make sure the pmd value isn't cached in a register by the
- * compiler and used as a stale value after we've observed a
- * subsequent update.
+ * Seek to next pte only makes sense for THP.
+ * But more important than that optimization, is to filter out
+ * any PageKsm page: whose page->index misleads vma_address()
+ * and vma_address_end() to disaster.
*/
- pmde = READ_ONCE(*pvmw->pmd);
- if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) {
- pvmw->ptl = pmd_lock(mm, pvmw->pmd);
- if (likely(pmd_trans_huge(*pvmw->pmd))) {
- if (pvmw->flags & PVMW_MIGRATION)
- return not_found(pvmw);
- if (pmd_page(*pvmw->pmd) != page)
- return not_found(pvmw);
- return true;
- } else if (!pmd_present(*pvmw->pmd)) {
- if (thp_migration_supported()) {
- if (!(pvmw->flags & PVMW_MIGRATION))
+ end = PageTransCompound(page) ?
+ vma_address_end(page, pvmw->vma) :
+ pvmw->address + PAGE_SIZE;
+ if (pvmw->pte)
+ goto next_pte;
+restart:
+ do {
+ pgd = pgd_offset(mm, pvmw->address);
+ if (!pgd_present(*pgd)) {
+ step_forward(pvmw, PGDIR_SIZE);
+ continue;
+ }
+ p4d = p4d_offset(pgd, pvmw->address);
+ if (!p4d_present(*p4d)) {
+ step_forward(pvmw, P4D_SIZE);
+ continue;
+ }
+ pud = pud_offset(p4d, pvmw->address);
+ if (!pud_present(*pud)) {
+ step_forward(pvmw, PUD_SIZE);
+ continue;
+ }
+
+ pvmw->pmd = pmd_offset(pud, pvmw->address);
+ /*
+ * Make sure the pmd value isn't cached in a register by the
+ * compiler and used as a stale value after we've observed a
+ * subsequent update.
+ */
+ pmde = READ_ONCE(*pvmw->pmd);
+
+ if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) {
+ pvmw->ptl = pmd_lock(mm, pvmw->pmd);
+ pmde = *pvmw->pmd;
+ if (likely(pmd_trans_huge(pmde))) {
+ if (pvmw->flags & PVMW_MIGRATION)
return not_found(pvmw);
- if (is_migration_entry(pmd_to_swp_entry(*pvmw->pmd))) {
- swp_entry_t entry = pmd_to_swp_entry(*pvmw->pmd);
+ if (pmd_page(pmde) != page)
+ return not_found(pvmw);
+ return true;
+ }
+ if (!pmd_present(pmde)) {
+ swp_entry_t entry;
- if (migration_entry_to_page(entry) != page)
- return not_found(pvmw);
- return true;
- }
+ if (!thp_migration_supported() ||
+ !(pvmw->flags & PVMW_MIGRATION))
+ return not_found(pvmw);
+ entry = pmd_to_swp_entry(pmde);
+ if (!is_migration_entry(entry) ||
+ migration_entry_to_page(entry) != page)
+ return not_found(pvmw);
+ return true;
}
- return not_found(pvmw);
- } else {
/* THP pmd was split under us: handle on pte level */
spin_unlock(pvmw->ptl);
pvmw->ptl = NULL;
+ } else if (!pmd_present(pmde)) {
+ /*
+ * If PVMW_SYNC, take and drop THP pmd lock so that we
+ * cannot return prematurely, while zap_huge_pmd() has
+ * cleared *pmd but not decremented compound_mapcount().
+ */
+ if ((pvmw->flags & PVMW_SYNC) &&
+ PageTransCompound(page)) {
+ spinlock_t *ptl = pmd_lock(mm, pvmw->pmd);
+
+ spin_unlock(ptl);
+ }
+ step_forward(pvmw, PMD_SIZE);
+ continue;
}
- } else if (!pmd_present(pmde)) {
- return false;
- }
- if (!map_pte(pvmw))
- goto next_pte;
- while (1) {
+ if (!map_pte(pvmw))
+ goto next_pte;
+this_pte:
if (check_pte(pvmw))
return true;
next_pte:
- /* Seek to next pte only makes sense for THP */
- if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page))
- return not_found(pvmw);
do {
pvmw->address += PAGE_SIZE;
- if (pvmw->address >= pvmw->vma->vm_end ||
- pvmw->address >=
- __vma_address(pvmw->page, pvmw->vma) +
- hpage_nr_pages(pvmw->page) * PAGE_SIZE)
+ if (pvmw->address >= end)
return not_found(pvmw);
/* Did we cross page table boundary? */
- if (pvmw->address % PMD_SIZE == 0) {
- pte_unmap(pvmw->pte);
+ if ((pvmw->address & (PMD_SIZE - PAGE_SIZE)) == 0) {
if (pvmw->ptl) {
spin_unlock(pvmw->ptl);
pvmw->ptl = NULL;
}
+ pte_unmap(pvmw->pte);
+ pvmw->pte = NULL;
goto restart;
- } else {
- pvmw->pte++;
+ }
+ pvmw->pte++;
+ if ((pvmw->flags & PVMW_SYNC) && !pvmw->ptl) {
+ pvmw->ptl = pte_lockptr(mm, pvmw->pmd);
+ spin_lock(pvmw->ptl);
}
} while (pte_none(*pvmw->pte));
@@ -243,7 +282,10 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
pvmw->ptl = pte_lockptr(mm, pvmw->pmd);
spin_lock(pvmw->ptl);
}
- }
+ goto this_pte;
+ } while (pvmw->address < end);
+
+ return false;
}
/**
@@ -262,14 +304,10 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
.vma = vma,
.flags = PVMW_SYNC,
};
- unsigned long start, end;
-
- start = __vma_address(page, vma);
- end = start + PAGE_SIZE * (hpage_nr_pages(page) - 1);
- if (unlikely(end < vma->vm_start || start >= vma->vm_end))
+ pvmw.address = vma_address(page, vma);
+ if (pvmw.address == -EFAULT)
return 0;
- pvmw.address = max(start, vma->vm_start);
if (!page_vma_mapped_walk(&pvmw))
return 0;
page_vma_mapped_walk_done(&pvmw);
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
index cf2af04b34b9..36770fcdc358 100644
--- a/mm/pgtable-generic.c
+++ b/mm/pgtable-generic.c
@@ -125,8 +125,8 @@ pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address,
{
pmd_t pmd;
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
- VM_BUG_ON((pmd_present(*pmdp) && !pmd_trans_huge(*pmdp) &&
- !pmd_devmap(*pmdp)) || !pmd_present(*pmdp));
+ VM_BUG_ON(pmd_present(*pmdp) && !pmd_trans_huge(*pmdp) &&
+ !pmd_devmap(*pmdp));
pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);
flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
return pmd;
diff --git a/mm/rmap.c b/mm/rmap.c
index 1bd94ea62f7f..699f445e3e78 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -686,7 +686,6 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
*/
unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
{
- unsigned long address;
if (PageAnon(page)) {
struct anon_vma *page__anon_vma = page_anon_vma(page);
/*
@@ -696,15 +695,13 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
if (!vma->anon_vma || !page__anon_vma ||
vma->anon_vma->root != page__anon_vma->root)
return -EFAULT;
- } else if (page->mapping) {
- if (!vma->vm_file || vma->vm_file->f_mapping != page->mapping)
- return -EFAULT;
- } else
+ } else if (!vma->vm_file) {
return -EFAULT;
- address = __vma_address(page, vma);
- if (unlikely(address < vma->vm_start || address >= vma->vm_end))
+ } else if (vma->vm_file->f_mapping != compound_head(page)->mapping) {
return -EFAULT;
- return address;
+ }
+
+ return vma_address(page, vma);
}
pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
@@ -896,7 +893,7 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
* We have to assume the worse case ie pmd for invalidation. Note that
* the page can not be free from this function.
*/
- end = min(vma->vm_end, start + (PAGE_SIZE << compound_order(page)));
+ end = vma_address_end(page, vma);
mmu_notifier_invalidate_range_start(vma->vm_mm, start, end);
while (page_vma_mapped_walk(&pvmw)) {
@@ -1348,6 +1345,15 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
unsigned long start = address, end;
enum ttu_flags flags = (enum ttu_flags)arg;
+ /*
+ * When racing against e.g. zap_pte_range() on another cpu,
+ * in between its ptep_get_and_clear_full() and page_remove_rmap(),
+ * try_to_unmap() may return false when it is about to become true,
+ * if page table locking is skipped: use TTU_SYNC to wait for that.
+ */
+ if (flags & TTU_SYNC)
+ pvmw.flags = PVMW_SYNC;
+
/* munlock has nothing to gain from examining un-locked vmas */
if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED))
return true;
@@ -1369,7 +1375,8 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
* Note that the page can not be free in this function as call of
* try_to_unmap() must hold a reference on the page.
*/
- end = min(vma->vm_end, start + (PAGE_SIZE << compound_order(page)));
+ end = PageKsm(page) ?
+ address + PAGE_SIZE : vma_address_end(page, vma);
if (PageHuge(page)) {
/*
* If sharing is possible, start and end will be adjusted
@@ -1682,9 +1689,9 @@ static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg)
return is_vma_temporary_stack(vma);
}
-static int page_mapcount_is_zero(struct page *page)
+static int page_not_mapped(struct page *page)
{
- return !total_mapcount(page);
+ return !page_mapped(page);
}
/**
@@ -1702,7 +1709,7 @@ bool try_to_unmap(struct page *page, enum ttu_flags flags)
struct rmap_walk_control rwc = {
.rmap_one = try_to_unmap_one,
.arg = (void *)flags,
- .done = page_mapcount_is_zero,
+ .done = page_not_mapped,
.anon_lock = page_lock_anon_vma_read,
};
@@ -1723,14 +1730,15 @@ bool try_to_unmap(struct page *page, enum ttu_flags flags)
else
rmap_walk(page, &rwc);
- return !page_mapcount(page) ? true : false;
+ /*
+ * When racing against e.g. zap_pte_range() on another cpu,
+ * in between its ptep_get_and_clear_full() and page_remove_rmap(),
+ * try_to_unmap() may return false when it is about to become true,
+ * if page table locking is skipped: use TTU_SYNC to wait for that.
+ */
+ return !page_mapcount(page);
}
-static int page_not_mapped(struct page *page)
-{
- return !page_mapped(page);
-};
-
/**
* try_to_munlock - try to munlock a page
* @page: the page to be munlocked
@@ -1825,6 +1833,7 @@ static void rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc,
struct vm_area_struct *vma = avc->vma;
unsigned long address = vma_address(page, vma);
+ VM_BUG_ON_VMA(address == -EFAULT, vma);
cond_resched();
if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
@@ -1879,6 +1888,7 @@ static void rmap_walk_file(struct page *page, struct rmap_walk_control *rwc,
pgoff_start, pgoff_end) {
unsigned long address = vma_address(page, vma);
+ VM_BUG_ON_VMA(address == -EFAULT, vma);
cond_resched();
if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
diff --git a/mm/truncate.c b/mm/truncate.c
index 71b65aab8077..43c73db17a0a 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -175,13 +175,10 @@ void do_invalidatepage(struct page *page, unsigned int offset,
* its lock, b) when a concurrent invalidate_mapping_pages got there first and
* c) when tmpfs swizzles a page between a tmpfs inode and swapper_space.
*/
-static void
-truncate_cleanup_page(struct address_space *mapping, struct page *page)
+static void truncate_cleanup_page(struct page *page)
{
- if (page_mapped(page)) {
- pgoff_t nr = PageTransHuge(page) ? HPAGE_PMD_NR : 1;
- unmap_mapping_pages(mapping, page->index, nr, false);
- }
+ if (page_mapped(page))
+ unmap_mapping_page(page);
if (page_has_private(page))
do_invalidatepage(page, 0, PAGE_SIZE);
@@ -226,7 +223,7 @@ int truncate_inode_page(struct address_space *mapping, struct page *page)
if (page->mapping != mapping)
return -EIO;
- truncate_cleanup_page(mapping, page);
+ truncate_cleanup_page(page);
delete_from_page_cache(page);
return 0;
}
@@ -364,7 +361,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
pagevec_add(&locked_pvec, page);
}
for (i = 0; i < pagevec_count(&locked_pvec); i++)
- truncate_cleanup_page(mapping, locked_pvec.pages[i]);
+ truncate_cleanup_page(locked_pvec.pages[i]);
delete_from_page_cache_batch(mapping, &locked_pvec);
for (i = 0; i < pagevec_count(&locked_pvec); i++)
unlock_page(locked_pvec.pages[i]);
@@ -703,6 +700,16 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
continue;
}
+ if (!did_range_unmap && page_mapped(page)) {
+ /*
+ * If page is mapped, before taking its lock,
+ * zap the rest of the file in one hit.
+ */
+ unmap_mapping_pages(mapping, index,
+ (1 + end - index), false);
+ did_range_unmap = 1;
+ }
+
lock_page(page);
WARN_ON(page_to_index(page) != index);
if (page->mapping != mapping) {
@@ -710,23 +717,11 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
continue;
}
wait_on_page_writeback(page);
- if (page_mapped(page)) {
- if (!did_range_unmap) {
- /*
- * Zap the rest of the file in one hit.
- */
- unmap_mapping_pages(mapping, index,
- (1 + end - index), false);
- did_range_unmap = 1;
- } else {
- /*
- * Just zap this page
- */
- unmap_mapping_pages(mapping, index,
- 1, false);
- }
- }
+
+ if (page_mapped(page))
+ unmap_mapping_page(page);
BUG_ON(page_mapped(page));
+
ret2 = do_launder_page(mapping, page);
if (ret2 == 0) {
if (!invalidate_complete_page2(mapping, page))
prev parent reply other threads:[~2021-07-11 11:21 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-07-11 11:21 Linux 4.19.197 Greg Kroah-Hartman
2021-07-11 11:21 ` Greg Kroah-Hartman [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=162600247151114@kroah.com \
--to=gregkh@linuxfoundation.org \
--cc=akpm@linux-foundation.org \
--cc=jslaby@suse.cz \
--cc=linux-kernel@vger.kernel.org \
--cc=lwn@lwn.net \
--cc=stable@vger.kernel.org \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.