Re: Linux 4.19.197

From: Greg Kroah-Hartman
Date: Sun Jul 11 2021 - 07:21:27 EST

Next message: Greg Kroah-Hartman: "Linux 5.4.131"
Previous message: Greg Kroah-Hartman: "Linux 4.19.197"
In reply to: Greg Kroah-Hartman: "Linux 4.19.197"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

diff --git a/Makefile b/Makefile
index 63b0bc92a0fa..42073a4c6e2e 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
VERSION = 4
PATCHLEVEL = 19
-SUBLEVEL = 196
+SUBLEVEL = 197
EXTRAVERSION =
NAME = "People's Front"

diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi
index 0c0781a37c5a..7f1fe4a72447 100644
--- a/arch/arm/boot/dts/dra7.dtsi
+++ b/arch/arm/boot/dts/dra7.dtsi
@@ -48,6 +48,7 @@

timer {
compatible = "arm,armv7-timer";
+ status = "disabled"; /* See ARM architected timer wrap erratum i940 */
interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>,
<GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>,
<GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>,
@@ -910,6 +911,8 @@
reg = <0x48032000 0x80>;
interrupts = <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>;
ti,hwmods = "timer2";
+ clock-names = "fck";
+ clocks = <&l4per_clkctrl DRA7_TIMER2_CLKCTRL 24>;
};

timer3: timer@48034000 {
@@ -917,6 +920,10 @@
reg = <0x48034000 0x80>;
interrupts = <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>;
ti,hwmods = "timer3";
+ clock-names = "fck";
+ clocks = <&l4per_clkctrl DRA7_TIMER3_CLKCTRL 24>;
+ assigned-clocks = <&l4per_clkctrl DRA7_TIMER3_CLKCTRL 24>;
+ assigned-clock-parents = <&timer_sys_clk_div>;
};

timer4: timer@48036000 {
@@ -924,6 +931,10 @@
reg = <0x48036000 0x80>;
interrupts = <GIC_SPI 35 IRQ_TYPE_LEVEL_HIGH>;
ti,hwmods = "timer4";
+ clock-names = "fck";
+ clocks = <&l4per_clkctrl DRA7_TIMER4_CLKCTRL 24>;
+ assigned-clocks = <&l4per_clkctrl DRA7_TIMER4_CLKCTRL 24>;
+ assigned-clock-parents = <&timer_sys_clk_div>;
};

timer5: timer@48820000 {
diff --git a/arch/arm/boot/dts/imx6qdl-sabresd.dtsi b/arch/arm/boot/dts/imx6qdl-sabresd.dtsi
index 41384bbd2f60..03357d39870e 100644
--- a/arch/arm/boot/dts/imx6qdl-sabresd.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-sabresd.dtsi
@@ -675,10 +675,6 @@
vin-supply = <&vgen5_reg>;
};

-&reg_vdd3p0 {
- vin-supply = <&sw2_reg>;
-};
-
&reg_vdd2p5 {
vin-supply = <&vgen5_reg>;
};
diff --git a/arch/arm/mach-omap1/pm.c b/arch/arm/mach-omap1/pm.c
index 3e1de14805e4..65b91a8379c9 100644
--- a/arch/arm/mach-omap1/pm.c
+++ b/arch/arm/mach-omap1/pm.c
@@ -610,11 +610,6 @@ static irqreturn_t omap_wakeup_interrupt(int irq, void *dev)
return IRQ_HANDLED;
}

-static struct irqaction omap_wakeup_irq = {
- .name = "peripheral wakeup",
- .handler = omap_wakeup_interrupt
-};
-

static const struct platform_suspend_ops omap_pm_ops = {
@@ -627,6 +622,7 @@ static const struct platform_suspend_ops omap_pm_ops = {
static int __init omap_pm_init(void)
{
int error = 0;
+ int irq;

if (!cpu_class_is_omap1())
return -ENODEV;
@@ -670,9 +666,12 @@ static int __init omap_pm_init(void)
arm_pm_idle = omap1_pm_idle;

if (cpu_is_omap7xx())
- setup_irq(INT_7XX_WAKE_UP_REQ, &omap_wakeup_irq);
+ irq = INT_7XX_WAKE_UP_REQ;
else if (cpu_is_omap16xx())
- setup_irq(INT_1610_WAKE_UP_REQ, &omap_wakeup_irq);
+ irq = INT_1610_WAKE_UP_REQ;
+ if (request_irq(irq, omap_wakeup_interrupt, 0, "peripheral wakeup",
+ NULL))
+ pr_err("Failed to request irq %d (peripheral wakeup)\n", irq);

/* Program new power ramp-up time
* (0 for most boards since we don't lower voltage when in deep sleep)
diff --git a/arch/arm/mach-omap1/time.c b/arch/arm/mach-omap1/time.c
index 524977a31a49..de590a85a42b 100644
--- a/arch/arm/mach-omap1/time.c
+++ b/arch/arm/mach-omap1/time.c
@@ -155,15 +155,11 @@ static irqreturn_t omap_mpu_timer1_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
}

-static struct irqaction omap_mpu_timer1_irq = {
- .name = "mpu_timer1",
- .flags = IRQF_TIMER | IRQF_IRQPOLL,
- .handler = omap_mpu_timer1_interrupt,
-};
-
static __init void omap_init_mpu_timer(unsigned long rate)
{
- setup_irq(INT_TIMER1, &omap_mpu_timer1_irq);
+ if (request_irq(INT_TIMER1, omap_mpu_timer1_interrupt,
+ IRQF_TIMER | IRQF_IRQPOLL, "mpu_timer1", NULL))
+ pr_err("Failed to request irq %d (mpu_timer1)\n", INT_TIMER1);
omap_mpu_timer_start(0, (rate / HZ) - 1, 1);

clockevent_mpu_timer1.cpumask = cpumask_of(0);
diff --git a/arch/arm/mach-omap1/timer32k.c b/arch/arm/mach-omap1/timer32k.c
index 0ae6c52a7d70..780fdf03c3ce 100644
--- a/arch/arm/mach-omap1/timer32k.c
+++ b/arch/arm/mach-omap1/timer32k.c
@@ -148,15 +148,11 @@ static irqreturn_t omap_32k_timer_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
}

-static struct irqaction omap_32k_timer_irq = {
- .name = "32KHz timer",
- .flags = IRQF_TIMER | IRQF_IRQPOLL,
- .handler = omap_32k_timer_interrupt,
-};
-
static __init void omap_init_32k_timer(void)
{
- setup_irq(INT_OS_TIMER, &omap_32k_timer_irq);
+ if (request_irq(INT_OS_TIMER, omap_32k_timer_interrupt,
+ IRQF_TIMER | IRQF_IRQPOLL, "32KHz timer", NULL))
+ pr_err("Failed to request irq %d(32KHz timer)\n", INT_OS_TIMER);

clockevent_32k_timer.cpumask = cpumask_of(0);
clockevents_config_and_register(&clockevent_32k_timer,
diff --git a/arch/arm/mach-omap2/board-generic.c b/arch/arm/mach-omap2/board-generic.c
index 6b4f4975cf7a..6e59c11131c4 100644
--- a/arch/arm/mach-omap2/board-generic.c
+++ b/arch/arm/mach-omap2/board-generic.c
@@ -330,7 +330,7 @@ DT_MACHINE_START(DRA74X_DT, "Generic DRA74X (Flattened Device Tree)")
.init_late = dra7xx_init_late,
.init_irq = omap_gic_of_init,
.init_machine = omap_generic_init,
- .init_time = omap5_realtime_timer_init,
+ .init_time = omap3_gptimer_timer_init,
.dt_compat = dra74x_boards_compat,
.restart = omap44xx_restart,
MACHINE_END
@@ -353,7 +353,7 @@ DT_MACHINE_START(DRA72X_DT, "Generic DRA72X (Flattened Device Tree)")
.init_late = dra7xx_init_late,
.init_irq = omap_gic_of_init,
.init_machine = omap_generic_init,
- .init_time = omap5_realtime_timer_init,
+ .init_time = omap3_gptimer_timer_init,
.dt_compat = dra72x_boards_compat,
.restart = omap44xx_restart,
MACHINE_END
diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c
index 98ed5ac073bc..c4ba848e8af6 100644
--- a/arch/arm/mach-omap2/timer.c
+++ b/arch/arm/mach-omap2/timer.c
@@ -42,6 +42,7 @@
#include <linux/platform_device.h>
#include <linux/platform_data/dmtimer-omap.h>
#include <linux/sched_clock.h>
+#include <linux/cpu.h>

#include <asm/mach/time.h>
#include <asm/smp_twd.h>
@@ -64,15 +65,28 @@

/* Clockevent code */

-static struct omap_dm_timer clkev;
-static struct clock_event_device clockevent_gpt;
-
/* Clockevent hwmod for am335x and am437x suspend */
static struct omap_hwmod *clockevent_gpt_hwmod;

/* Clockesource hwmod for am437x suspend */
static struct omap_hwmod *clocksource_gpt_hwmod;

+struct dmtimer_clockevent {
+ struct clock_event_device dev;
+ struct omap_dm_timer timer;
+};
+
+static struct dmtimer_clockevent clockevent;
+
+static struct omap_dm_timer *to_dmtimer(struct clock_event_device *clockevent)
+{
+ struct dmtimer_clockevent *clkevt =
+ container_of(clockevent, struct dmtimer_clockevent, dev);
+ struct omap_dm_timer *timer = &clkevt->timer;
+
+ return timer;
+}
+
#ifdef CONFIG_SOC_HAS_REALTIME_COUNTER
static unsigned long arch_timer_freq;

@@ -84,24 +98,21 @@ void set_cntfreq(void)

static irqreturn_t omap2_gp_timer_interrupt(int irq, void *dev_id)
{
- struct clock_event_device *evt = &clockevent_gpt;
-
- __omap_dm_timer_write_status(&clkev, OMAP_TIMER_INT_OVERFLOW);
+ struct dmtimer_clockevent *clkevt = dev_id;
+ struct clock_event_device *evt = &clkevt->dev;
+ struct omap_dm_timer *timer = &clkevt->timer;

+ __omap_dm_timer_write_status(timer, OMAP_TIMER_INT_OVERFLOW);
evt->event_handler(evt);
return IRQ_HANDLED;
}

-static struct irqaction omap2_gp_timer_irq = {
- .name = "gp_timer",
- .flags = IRQF_TIMER | IRQF_IRQPOLL,
- .handler = omap2_gp_timer_interrupt,
-};
-
static int omap2_gp_timer_set_next_event(unsigned long cycles,
struct clock_event_device *evt)
{
- __omap_dm_timer_load_start(&clkev, OMAP_TIMER_CTRL_ST,
+ struct omap_dm_timer *timer = to_dmtimer(evt);
+
+ __omap_dm_timer_load_start(timer, OMAP_TIMER_CTRL_ST,
0xffffffff - cycles, OMAP_TIMER_POSTED);

return 0;
@@ -109,22 +120,26 @@ static int omap2_gp_timer_set_next_event(unsigned long cycles,

static int omap2_gp_timer_shutdown(struct clock_event_device *evt)
{
- __omap_dm_timer_stop(&clkev, OMAP_TIMER_POSTED, clkev.rate);
+ struct omap_dm_timer *timer = to_dmtimer(evt);
+
+ __omap_dm_timer_stop(timer, OMAP_TIMER_POSTED, timer->rate);
+
return 0;
}

static int omap2_gp_timer_set_periodic(struct clock_event_device *evt)
{
+ struct omap_dm_timer *timer = to_dmtimer(evt);
u32 period;

- __omap_dm_timer_stop(&clkev, OMAP_TIMER_POSTED, clkev.rate);
+ __omap_dm_timer_stop(timer, OMAP_TIMER_POSTED, timer->rate);

- period = clkev.rate / HZ;
+ period = timer->rate / HZ;
period -= 1;
/* Looks like we need to first set the load value separately */
- __omap_dm_timer_write(&clkev, OMAP_TIMER_LOAD_REG, 0xffffffff - period,
+ __omap_dm_timer_write(timer, OMAP_TIMER_LOAD_REG, 0xffffffff - period,
OMAP_TIMER_POSTED);
- __omap_dm_timer_load_start(&clkev,
+ __omap_dm_timer_load_start(timer,
OMAP_TIMER_CTRL_AR | OMAP_TIMER_CTRL_ST,
0xffffffff - period, OMAP_TIMER_POSTED);
return 0;
@@ -138,25 +153,16 @@ static void omap_clkevt_idle(struct clock_event_device *unused)
omap_hwmod_idle(clockevent_gpt_hwmod);
}

-static void omap_clkevt_unidle(struct clock_event_device *unused)
+static void omap_clkevt_unidle(struct clock_event_device *evt)
{
+ struct omap_dm_timer *timer = to_dmtimer(evt);
+
if (!clockevent_gpt_hwmod)
return;

omap_hwmod_enable(clockevent_gpt_hwmod);
- __omap_dm_timer_int_enable(&clkev, OMAP_TIMER_INT_OVERFLOW);
-}
-
-static struct clock_event_device clockevent_gpt = {
- .features = CLOCK_EVT_FEAT_PERIODIC |
- CLOCK_EVT_FEAT_ONESHOT,
- .rating = 300,
- .set_next_event = omap2_gp_timer_set_next_event,
- .set_state_shutdown = omap2_gp_timer_shutdown,
- .set_state_periodic = omap2_gp_timer_set_periodic,
- .set_state_oneshot = omap2_gp_timer_shutdown,
- .tick_resume = omap2_gp_timer_shutdown,
-};
+ __omap_dm_timer_int_enable(timer, OMAP_TIMER_INT_OVERFLOW);
+}

static const struct of_device_id omap_timer_match[] __initconst = {
{ .compatible = "ti,omap2420-timer", },
@@ -363,47 +369,104 @@ void tick_broadcast(const struct cpumask *mask)
}
#endif

-static void __init omap2_gp_clockevent_init(int gptimer_id,
- const char *fck_source,
- const char *property)
+static void __init dmtimer_clkevt_init_common(struct dmtimer_clockevent *clkevt,
+ int gptimer_id,
+ const char *fck_source,
+ unsigned int features,
+ const struct cpumask *cpumask,
+ const char *property,
+ int rating, const char *name)
{
+ struct omap_dm_timer *timer = &clkevt->timer;
int res;

- clkev.id = gptimer_id;
- clkev.errata = omap_dm_timer_get_errata();
+ timer->id = gptimer_id;
+ timer->errata = omap_dm_timer_get_errata();
+ clkevt->dev.features = features;
+ clkevt->dev.rating = rating;
+ clkevt->dev.set_next_event = omap2_gp_timer_set_next_event;
+ clkevt->dev.set_state_shutdown = omap2_gp_timer_shutdown;
+ clkevt->dev.set_state_periodic = omap2_gp_timer_set_periodic;
+ clkevt->dev.set_state_oneshot = omap2_gp_timer_shutdown;
+ clkevt->dev.tick_resume = omap2_gp_timer_shutdown;

/*
* For clock-event timers we never read the timer counter and
* so we are not impacted by errata i103 and i767. Therefore,
* we can safely ignore this errata for clock-event timers.
*/
- __omap_dm_timer_override_errata(&clkev, OMAP_TIMER_ERRATA_I103_I767);
+ __omap_dm_timer_override_errata(timer, OMAP_TIMER_ERRATA_I103_I767);

- res = omap_dm_timer_init_one(&clkev, fck_source, property,
- &clockevent_gpt.name, OMAP_TIMER_POSTED);
+ res = omap_dm_timer_init_one(timer, fck_source, property,
+ &clkevt->dev.name, OMAP_TIMER_POSTED);
BUG_ON(res);

- omap2_gp_timer_irq.dev_id = &clkev;
- setup_irq(clkev.irq, &omap2_gp_timer_irq);
+ clkevt->dev.cpumask = cpumask;
+ clkevt->dev.irq = omap_dm_timer_get_irq(timer);

- __omap_dm_timer_int_enable(&clkev, OMAP_TIMER_INT_OVERFLOW);
+ if (request_irq(clkevt->dev.irq, omap2_gp_timer_interrupt,
+ IRQF_TIMER | IRQF_IRQPOLL, name, clkevt))
+ pr_err("Failed to request irq %d (gp_timer)\n", clkevt->dev.irq);

- clockevent_gpt.cpumask = cpu_possible_mask;
- clockevent_gpt.irq = omap_dm_timer_get_irq(&clkev);
- clockevents_config_and_register(&clockevent_gpt, clkev.rate,
- 3, /* Timer internal resynch latency */
- 0xffffffff);
+ __omap_dm_timer_int_enable(timer, OMAP_TIMER_INT_OVERFLOW);

if (soc_is_am33xx() || soc_is_am43xx()) {
- clockevent_gpt.suspend = omap_clkevt_idle;
- clockevent_gpt.resume = omap_clkevt_unidle;
+ clkevt->dev.suspend = omap_clkevt_idle;
+ clkevt->dev.resume = omap_clkevt_unidle;

clockevent_gpt_hwmod =
- omap_hwmod_lookup(clockevent_gpt.name);
+ omap_hwmod_lookup(clkevt->dev.name);
+ }
+
+ pr_info("OMAP clockevent source: %s at %lu Hz\n", clkevt->dev.name,
+ timer->rate);
+}
+
+static DEFINE_PER_CPU(struct dmtimer_clockevent, dmtimer_percpu_timer);
+
+static int omap_gptimer_starting_cpu(unsigned int cpu)
+{
+ struct dmtimer_clockevent *clkevt = per_cpu_ptr(&dmtimer_percpu_timer, cpu);
+ struct clock_event_device *dev = &clkevt->dev;
+ struct omap_dm_timer *timer = &clkevt->timer;
+
+ clockevents_config_and_register(dev, timer->rate, 3, ULONG_MAX);
+ irq_force_affinity(dev->irq, cpumask_of(cpu));
+
+ return 0;
+}
+
+static int __init dmtimer_percpu_quirk_init(void)
+{
+ struct dmtimer_clockevent *clkevt;
+ struct clock_event_device *dev;
+ struct device_node *arm_timer;
+ struct omap_dm_timer *timer;
+ int cpu = 0;
+
+ arm_timer = of_find_compatible_node(NULL, NULL, "arm,armv7-timer");
+ if (of_device_is_available(arm_timer)) {
+ pr_warn_once("ARM architected timer wrap issue i940 detected\n");
+ return 0;
+ }
+
+ for_each_possible_cpu(cpu) {
+ clkevt = per_cpu_ptr(&dmtimer_percpu_timer, cpu);
+ dev = &clkevt->dev;
+ timer = &clkevt->timer;
+
+ dmtimer_clkevt_init_common(clkevt, 0, "timer_sys_ck",
+ CLOCK_EVT_FEAT_ONESHOT,
+ cpumask_of(cpu),
+ "assigned-clock-parents",
+ 500, "percpu timer");
}

- pr_info("OMAP clockevent source: %s at %lu Hz\n", clockevent_gpt.name,
- clkev.rate);
+ cpuhp_setup_state(CPUHP_AP_OMAP_DM_TIMER_STARTING,
+ "clockevents/omap/gptimer:starting",
+ omap_gptimer_starting_cpu, NULL);
+
+ return 0;
}

/* Clocksource code */
@@ -543,7 +606,15 @@ static void __init __omap_sync32k_timer_init(int clkev_nr, const char *clkev_src
{
omap_clk_init();
omap_dmtimer_init();
- omap2_gp_clockevent_init(clkev_nr, clkev_src, clkev_prop);
+ dmtimer_clkevt_init_common(&clockevent, clkev_nr, clkev_src,
+ CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
+ cpu_possible_mask, clkev_prop, 300, "clockevent");
+ clockevents_config_and_register(&clockevent.dev, clockevent.timer.rate,
+ 3, /* Timer internal resynch latency */
+ 0xffffffff);
+
+ if (soc_is_dra7xx())
+ dmtimer_percpu_quirk_init();

/* Enable the use of clocksource="gp_timer" kernel parameter */
if (use_gptimer_clksrc || gptimer)
@@ -572,7 +643,7 @@ void __init omap3_secure_sync32k_timer_init(void)
#endif /* CONFIG_ARCH_OMAP3 */

#if defined(CONFIG_ARCH_OMAP3) || defined(CONFIG_SOC_AM33XX) || \
- defined(CONFIG_SOC_AM43XX)
+ defined(CONFIG_SOC_AM43XX) || defined(CONFIG_SOC_DRA7XX)
void __init omap3_gptimer_timer_init(void)
{
__omap_sync32k_timer_init(2, "timer_sys_ck", NULL,
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index ad24e6777277..bd463d684237 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1791,9 +1791,25 @@ static void sev_asid_free(struct kvm *kvm)
__sev_asid_free(sev->asid);
}

-static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
+static void sev_decommission(unsigned int handle)
{
struct sev_data_decommission *decommission;
+
+ if (!handle)
+ return;
+
+ decommission = kzalloc(sizeof(*decommission), GFP_KERNEL);
+ if (!decommission)
+ return;
+
+ decommission->handle = handle;
+ sev_guest_decommission(decommission, NULL);
+
+ kfree(decommission);
+}
+
+static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
+{
struct sev_data_deactivate *data;

if (!handle)
@@ -1811,15 +1827,7 @@ static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
sev_guest_df_flush(NULL);
kfree(data);

- decommission = kzalloc(sizeof(*decommission), GFP_KERNEL);
- if (!decommission)
- return;
-
- /* decommission handle */
- decommission->handle = handle;
- sev_guest_decommission(decommission, NULL);
-
- kfree(decommission);
+ sev_decommission(handle);
}

static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
@@ -1954,6 +1962,7 @@ static void sev_vm_destroy(struct kvm *kvm)
list_for_each_safe(pos, q, head) {
__unregister_enc_region_locked(kvm,
list_entry(pos, struct enc_region, list));
+ cond_resched();
}
}

@@ -6468,8 +6477,10 @@ static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)

/* Bind ASID to this guest */
ret = sev_bind_asid(kvm, start->handle, error);
- if (ret)
+ if (ret) {
+ sev_decommission(start->handle);
goto e_free_session;
+ }

/* return handle to userspace */
params.handle = start->handle;
diff --git a/drivers/clk/ti/clk-7xx.c b/drivers/clk/ti/clk-7xx.c
index 71a122b2dc67..b6d1ec49fa01 100644
--- a/drivers/clk/ti/clk-7xx.c
+++ b/drivers/clk/ti/clk-7xx.c
@@ -733,6 +733,7 @@ const struct omap_clkctrl_data dra7_clkctrl_data[] __initconst = {
static struct ti_dt_clk dra7xx_clks[] = {
DT_CLK(NULL, "timer_32k_ck", "sys_32k_ck"),
DT_CLK(NULL, "sys_clkin_ck", "timer_sys_clk_div"),
+ DT_CLK(NULL, "timer_sys_ck", "timer_sys_clk_div"),
DT_CLK(NULL, "sys_clkin", "sys_clkin1"),
DT_CLK(NULL, "atl_dpll_clk_mux", "atl_cm:0000:24"),
DT_CLK(NULL, "atl_gfclk_mux", "atl_cm:0000:26"),
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 7214022dfb91..d230536e7086 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -512,7 +512,7 @@ nouveau_bo_sync_for_device(struct nouveau_bo *nvbo)
struct ttm_dma_tt *ttm_dma = (struct ttm_dma_tt *)nvbo->bo.ttm;
int i;

- if (!ttm_dma)
+ if (!ttm_dma || !ttm_dma->dma_address)
return;

/* Don't waste time looping if the object is coherent */
@@ -532,7 +532,7 @@ nouveau_bo_sync_for_cpu(struct nouveau_bo *nvbo)
struct ttm_dma_tt *ttm_dma = (struct ttm_dma_tt *)nvbo->bo.ttm;
int i;

- if (!ttm_dma)
+ if (!ttm_dma || !ttm_dma->dma_address)
return;

/* Don't waste time looping if the object is coherent */
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 45c8bf39ad23..acf0c244141f 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -216,6 +216,8 @@ static unsigned int sr_get_events(struct scsi_device *sdev)
return DISK_EVENT_EJECT_REQUEST;
else if (med->media_event_code == 2)
return DISK_EVENT_MEDIA_CHANGE;
+ else if (med->media_event_code == 3)
+ return DISK_EVENT_EJECT_REQUEST;
return 0;
}

diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index b370144682ed..a2f8130e18fe 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -524,6 +524,9 @@ static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious)
}

info->eoi_time = 0;
+
+ /* is_active hasn't been reset yet, do it now. */
+ smp_store_release(&info->is_active, 0);
do_unmask(info, EVT_MASK_REASON_EOI_PENDING);
}

@@ -1780,10 +1783,22 @@ static void lateeoi_ack_dynirq(struct irq_data *data)
struct irq_info *info = info_for_irq(data->irq);
evtchn_port_t evtchn = info ? info->evtchn : 0;

- if (VALID_EVTCHN(evtchn)) {
- do_mask(info, EVT_MASK_REASON_EOI_PENDING);
- ack_dynirq(data);
- }
+ if (!VALID_EVTCHN(evtchn))
+ return;
+
+ do_mask(info, EVT_MASK_REASON_EOI_PENDING);
+
+ if (unlikely(irqd_is_setaffinity_pending(data)) &&
+ likely(!irqd_irq_disabled(data))) {
+ do_mask(info, EVT_MASK_REASON_TEMPORARY);
+
+ clear_evtchn(evtchn);
+
+ irq_move_masked_irq(data);
+
+ do_unmask(info, EVT_MASK_REASON_TEMPORARY);
+ } else
+ clear_evtchn(evtchn);
}

static void lateeoi_mask_ack_dynirq(struct irq_data *data)
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 1ea8fc9ff048..1bc65ecd4bd6 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -171,8 +171,10 @@ static int ext4_data_block_valid_rcu(struct ext4_sb_info *sbi,
else if (start_blk >= (entry->start_blk + entry->count))
n = n->rb_right;
else {
+ if (entry->ino == ino)
+ return 1;
sbi->s_es->s_last_error_block = cpu_to_le64(start_blk);
- return entry->ino == ino;
+ return 0;
}
}
return 1;
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index d67c0035165c..3d323c6c8526 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -118,6 +118,7 @@ enum cpuhp_state {
CPUHP_AP_ARM_L2X0_STARTING,
CPUHP_AP_EXYNOS4_MCT_TIMER_STARTING,
CPUHP_AP_ARM_ARCH_TIMER_STARTING,
+ CPUHP_AP_OMAP_DM_TIMER_STARTING,
CPUHP_AP_ARM_GLOBAL_TIMER_STARTING,
CPUHP_AP_JCORE_TIMER_STARTING,
CPUHP_AP_ARM_TWD_STARTING,
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index e375f2249f52..becf9b1eae5a 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -224,6 +224,7 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
extern vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd);

extern struct page *huge_zero_page;
+extern unsigned long huge_zero_pfn;

static inline bool is_huge_zero_page(struct page *page)
{
@@ -232,7 +233,7 @@ static inline bool is_huge_zero_page(struct page *page)

static inline bool is_huge_zero_pmd(pmd_t pmd)
{
- return is_huge_zero_page(pmd_page(pmd));
+ return READ_ONCE(huge_zero_pfn) == pmd_pfn(pmd) && pmd_present(pmd);
}

static inline bool is_huge_zero_pud(pud_t pud)
@@ -342,6 +343,11 @@ static inline bool is_huge_zero_page(struct page *page)
return false;
}

+static inline bool is_huge_zero_pmd(pmd_t pmd)
+{
+ return false;
+}
+
static inline bool is_huge_zero_pud(pud_t pud)
{
return false;
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index c129c1c14c5f..2df83a659818 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -477,17 +477,6 @@ static inline int hstate_index(struct hstate *h)
return h - hstates;
}

-pgoff_t __basepage_index(struct page *page);
-
-/* Return page->index in PAGE_SIZE units */
-static inline pgoff_t basepage_index(struct page *page)
-{
- if (!PageCompound(page))
- return page->index;
-
- return __basepage_index(page);
-}
-
extern int dissolve_free_huge_page(struct page *page);
extern int dissolve_free_huge_pages(unsigned long start_pfn,
unsigned long end_pfn);
@@ -582,11 +571,6 @@ static inline int hstate_index(struct hstate *h)
return 0;
}

-static inline pgoff_t basepage_index(struct page *page)
-{
- return page->index;
-}
-
static inline int dissolve_free_huge_page(struct page *page)
{
return 0;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index f6ecf41aea83..c736c677b876 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1338,6 +1338,7 @@ struct zap_details {
struct address_space *check_mapping; /* Check page->mapping if set */
pgoff_t first_index; /* Lowest page->index to unmap */
pgoff_t last_index; /* Highest page->index to unmap */
+ struct page *single_page; /* Locked page to be unmapped */
};

struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
@@ -1428,6 +1429,7 @@ extern vm_fault_t handle_mm_fault(struct vm_area_struct *vma,
extern int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
unsigned long address, unsigned int fault_flags,
bool *unlocked);
+void unmap_mapping_page(struct page *page);
void unmap_mapping_pages(struct address_space *mapping,
pgoff_t start, pgoff_t nr, bool even_cows);
void unmap_mapping_range(struct address_space *mapping,
@@ -1448,6 +1450,7 @@ static inline int fixup_user_fault(struct task_struct *tsk,
BUG();
return -EFAULT;
}
+static inline void unmap_mapping_page(struct page *page) { }
static inline void unmap_mapping_pages(struct address_space *mapping,
pgoff_t start, pgoff_t nr, bool even_cows) { }
static inline void unmap_mapping_range(struct address_space *mapping,
diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index 2ad72d2c8cc5..5d0767cb424a 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -37,6 +37,18 @@ void dump_mm(const struct mm_struct *mm);
BUG(); \
} \
} while (0)
+#define VM_WARN_ON_ONCE_PAGE(cond, page) ({ \
+ static bool __section(".data.once") __warned; \
+ int __ret_warn_once = !!(cond); \
+ \
+ if (unlikely(__ret_warn_once && !__warned)) { \
+ dump_page(page, "VM_WARN_ON_ONCE_PAGE(" __stringify(cond)")");\
+ __warned = true; \
+ WARN_ON(1); \
+ } \
+ unlikely(__ret_warn_once); \
+})
+
#define VM_WARN_ON(cond) (void)WARN_ON(cond)
#define VM_WARN_ON_ONCE(cond) (void)WARN_ON_ONCE(cond)
#define VM_WARN_ONCE(cond, format...) (void)WARN_ONCE(cond, format)
@@ -48,6 +60,7 @@ void dump_mm(const struct mm_struct *mm);
#define VM_BUG_ON_MM(cond, mm) VM_BUG_ON(cond)
#define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond)
#define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond)
+#define VM_WARN_ON_ONCE_PAGE(cond, page) BUILD_BUG_ON_INVALID(cond)
#define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
#define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond)
#endif
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index b1bd2186e6d2..33b63b2a163f 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -403,7 +403,7 @@ static inline struct page *read_mapping_page(struct address_space *mapping,
}

/*
- * Get index of the page with in radix-tree
+ * Get index of the page within radix-tree (but not for hugetlb pages).
* (TODO: remove once hugetlb pages will have ->index in PAGE_SIZE)
*/
static inline pgoff_t page_to_index(struct page *page)
@@ -422,15 +422,16 @@ static inline pgoff_t page_to_index(struct page *page)
return pgoff;
}

+extern pgoff_t hugetlb_basepage_index(struct page *page);
+
/*
- * Get the offset in PAGE_SIZE.
- * (TODO: hugepage should have ->index in PAGE_SIZE)
+ * Get the offset in PAGE_SIZE (even for hugetlb pages).
+ * (TODO: hugetlb pages should have ->index in PAGE_SIZE)
*/
static inline pgoff_t page_to_pgoff(struct page *page)
{
- if (unlikely(PageHeadHuge(page)))
- return page->index << compound_order(page);
-
+ if (unlikely(PageHuge(page)))
+ return hugetlb_basepage_index(page);
return page_to_index(page);
}

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index d7d6d4eb1794..91ccae946716 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -98,7 +98,8 @@ enum ttu_flags {
* do a final flush if necessary */
TTU_RMAP_LOCKED = 0x80, /* do not grab rmap lock:
* caller holds it */
- TTU_SPLIT_FREEZE = 0x100, /* freeze pte under splitting thp */
+ TTU_SPLIT_FREEZE = 0x100, /* freeze pte under splitting thp */
+ TTU_SYNC = 0x200, /* avoid racy checks with PVMW_SYNC */
};

#ifdef CONFIG_MMU
diff --git a/kernel/futex.c b/kernel/futex.c
index 526ebcff5a0a..3c67da9b8408 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -719,7 +719,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)

key->both.offset |= FUT_OFF_INODE; /* inode-based key */
key->shared.i_seq = get_inode_sequence_number(inode);
- key->shared.pgoff = basepage_index(tail);
+ key->shared.pgoff = page_to_pgoff(tail);
rcu_read_unlock();
}

diff --git a/kernel/kthread.c b/kernel/kthread.c
index 81abfac35127..9750f4f7f901 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -1010,8 +1010,38 @@ void kthread_flush_work(struct kthread_work *work)
EXPORT_SYMBOL_GPL(kthread_flush_work);

/*
- * This function removes the work from the worker queue. Also it makes sure
- * that it won't get queued later via the delayed work's timer.
+ * Make sure that the timer is neither set nor running and could
+ * not manipulate the work list_head any longer.
+ *
+ * The function is called under worker->lock. The lock is temporary
+ * released but the timer can't be set again in the meantime.
+ */
+static void kthread_cancel_delayed_work_timer(struct kthread_work *work,
+ unsigned long *flags)
+{
+ struct kthread_delayed_work *dwork =
+ container_of(work, struct kthread_delayed_work, work);
+ struct kthread_worker *worker = work->worker;
+
+ /*
+ * del_timer_sync() must be called to make sure that the timer
+ * callback is not running. The lock must be temporary released
+ * to avoid a deadlock with the callback. In the meantime,
+ * any queuing is blocked by setting the canceling counter.
+ */
+ work->canceling++;
+ spin_unlock_irqrestore(&worker->lock, *flags);
+ del_timer_sync(&dwork->timer);
+ spin_lock_irqsave(&worker->lock, *flags);
+ work->canceling--;
+}
+
+/*
+ * This function removes the work from the worker queue.
+ *
+ * It is called under worker->lock. The caller must make sure that
+ * the timer used by delayed work is not running, e.g. by calling
+ * kthread_cancel_delayed_work_timer().
*
* The work might still be in use when this function finishes. See the
* current_work proceed by the worker.
@@ -1019,28 +1049,8 @@ EXPORT_SYMBOL_GPL(kthread_flush_work);
* Return: %true if @work was pending and successfully canceled,
* %false if @work was not pending
*/
-static bool __kthread_cancel_work(struct kthread_work *work, bool is_dwork,
- unsigned long *flags)
+static bool __kthread_cancel_work(struct kthread_work *work)
{
- /* Try to cancel the timer if exists. */
- if (is_dwork) {
- struct kthread_delayed_work *dwork =
- container_of(work, struct kthread_delayed_work, work);
- struct kthread_worker *worker = work->worker;
-
- /*
- * del_timer_sync() must be called to make sure that the timer
- * callback is not running. The lock must be temporary released
- * to avoid a deadlock with the callback. In the meantime,
- * any queuing is blocked by setting the canceling counter.
- */
- work->canceling++;
- spin_unlock_irqrestore(&worker->lock, *flags);
- del_timer_sync(&dwork->timer);
- spin_lock_irqsave(&worker->lock, *flags);
- work->canceling--;
- }
-
/*
* Try to remove the work from a worker list. It might either
* be from worker->work_list or from worker->delayed_work_list.
@@ -1093,11 +1103,23 @@ bool kthread_mod_delayed_work(struct kthread_worker *worker,
/* Work must not be used with >1 worker, see kthread_queue_work() */
WARN_ON_ONCE(work->worker != worker);

- /* Do not fight with another command that is canceling this work. */
+ /*
+ * Temporary cancel the work but do not fight with another command
+ * that is canceling the work as well.
+ *
+ * It is a bit tricky because of possible races with another
+ * mod_delayed_work() and cancel_delayed_work() callers.
+ *
+ * The timer must be canceled first because worker->lock is released
+ * when doing so. But the work can be removed from the queue (list)
+ * only when it can be queued again so that the return value can
+ * be used for reference counting.
+ */
+ kthread_cancel_delayed_work_timer(work, &flags);
if (work->canceling)
goto out;
+ ret = __kthread_cancel_work(work);

- ret = __kthread_cancel_work(work, true, &flags);
fast_queue:
__kthread_queue_delayed_work(worker, dwork, delay);
out:
@@ -1119,7 +1141,10 @@ static bool __kthread_cancel_work_sync(struct kthread_work *work, bool is_dwork)
/* Work must not be used with >1 worker, see kthread_queue_work(). */
WARN_ON_ONCE(work->worker != worker);

- ret = __kthread_cancel_work(work, is_dwork, &flags);
+ if (is_dwork)
+ kthread_cancel_delayed_work_timer(work, &flags);
+
+ ret = __kthread_cancel_work(work);

if (worker->current_work != work)
goto out_fast;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 7c374c0fcf0c..4400957d8e4e 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -62,6 +62,7 @@ static struct shrinker deferred_split_shrinker;

static atomic_t huge_zero_refcount;
struct page *huge_zero_page __read_mostly;
+unsigned long huge_zero_pfn __read_mostly = ~0UL;

bool transparent_hugepage_enabled(struct vm_area_struct *vma)
{
@@ -93,6 +94,7 @@ static struct page *get_huge_zero_page(void)
__free_pages(zero_page, compound_order(zero_page));
goto retry;
}
+ WRITE_ONCE(huge_zero_pfn, page_to_pfn(zero_page));

/* We take additional reference here. It will be put back by shrinker */
atomic_set(&huge_zero_refcount, 2);
@@ -142,6 +144,7 @@ static unsigned long shrink_huge_zero_page_scan(struct shrinker *shrink,
if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) {
struct page *zero_page = xchg(&huge_zero_page, NULL);
BUG_ON(zero_page == NULL);
+ WRITE_ONCE(huge_zero_pfn, ~0UL);
__free_pages(zero_page, compound_order(zero_page));
return HPAGE_PMD_NR;
}
@@ -2125,7 +2128,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
count_vm_event(THP_SPLIT_PMD);

if (!vma_is_anonymous(vma)) {
- _pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd);
+ old_pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd);
/*
* We are going to unmap this huge page. So
* just go ahead and zap it
@@ -2134,16 +2137,25 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
zap_deposited_table(mm, pmd);
if (vma_is_dax(vma))
return;
- page = pmd_page(_pmd);
- if (!PageDirty(page) && pmd_dirty(_pmd))
- set_page_dirty(page);
- if (!PageReferenced(page) && pmd_young(_pmd))
- SetPageReferenced(page);
- page_remove_rmap(page, true);
- put_page(page);
+ if (unlikely(is_pmd_migration_entry(old_pmd))) {
+ swp_entry_t entry;
+
+ entry = pmd_to_swp_entry(old_pmd);
+ page = migration_entry_to_page(entry);
+ } else {
+ page = pmd_page(old_pmd);
+ if (!PageDirty(page) && pmd_dirty(old_pmd))
+ set_page_dirty(page);
+ if (!PageReferenced(page) && pmd_young(old_pmd))
+ SetPageReferenced(page);
+ page_remove_rmap(page, true);
+ put_page(page);
+ }
add_mm_counter(mm, mm_counter_file(page), -HPAGE_PMD_NR);
return;
- } else if (pmd_trans_huge(*pmd) && is_huge_zero_pmd(*pmd)) {
+ }
+
+ if (is_huge_zero_pmd(*pmd)) {
/*
* FIXME: Do we want to invalidate secondary mmu by calling
* mmu_notifier_invalidate_range() see comments below inside
@@ -2418,16 +2430,16 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
static void unmap_page(struct page *page)
{
enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS |
- TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD;
- bool unmap_success;
+ TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD | TTU_SYNC;

VM_BUG_ON_PAGE(!PageHead(page), page);

if (PageAnon(page))
ttu_flags |= TTU_SPLIT_FREEZE;

- unmap_success = try_to_unmap(page, ttu_flags);
- VM_BUG_ON_PAGE(!unmap_success, page);
+ try_to_unmap(page, ttu_flags);
+
+ VM_WARN_ON_ONCE_PAGE(page_mapped(page), page);
}

static void remap_page(struct page *page)
@@ -2686,7 +2698,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
struct pglist_data *pgdata = NODE_DATA(page_to_nid(head));
struct anon_vma *anon_vma = NULL;
struct address_space *mapping = NULL;
- int count, mapcount, extra_pins, ret;
+ int extra_pins, ret;
bool mlocked;
unsigned long flags;
pgoff_t end;
@@ -2748,7 +2760,6 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)

mlocked = PageMlocked(page);
unmap_page(head);
- VM_BUG_ON_PAGE(compound_mapcount(head), head);

/* Make sure the page is not on per-CPU pagevec as it takes pin */
if (mlocked)
@@ -2774,9 +2785,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)

/* Prevent deferred_split_scan() touching ->_refcount */
spin_lock(&pgdata->split_queue_lock);
- count = page_count(head);
- mapcount = total_mapcount(head);
- if (!mapcount && page_ref_freeze(head, 1 + extra_pins)) {
+ if (page_ref_freeze(head, 1 + extra_pins)) {
if (!list_empty(page_deferred_list(head))) {
pgdata->split_queue_len--;
list_del(page_deferred_list(head));
@@ -2792,16 +2801,9 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
} else
ret = 0;
} else {
- if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) {
- pr_alert("total_mapcount: %u, page_count(): %u\n",
- mapcount, count);
- if (PageTail(page))
- dump_page(head, NULL);
- dump_page(page, "total_mapcount(head) > 0");
- BUG();
- }
spin_unlock(&pgdata->split_queue_lock);
-fail: if (mapping)
+fail:
+ if (mapping)
xa_unlock(&mapping->i_pages);
spin_unlock_irqrestore(zone_lru_lock(page_zone(head)), flags);
remap_page(head);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index c69f12e4c149..ebcf26bc4cd4 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1391,15 +1391,12 @@ int PageHeadHuge(struct page *page_head)
return get_compound_page_dtor(page_head) == free_huge_page;
}

-pgoff_t __basepage_index(struct page *page)
+pgoff_t hugetlb_basepage_index(struct page *page)
{
struct page *page_head = compound_head(page);
pgoff_t index = page_index(page_head);
unsigned long compound_idx;

- if (!PageHuge(page_head))
- return page_index(page);
-
if (compound_order(page_head) >= MAX_ORDER)
compound_idx = page_to_pfn(page) - page_to_pfn(page_head);
else
diff --git a/mm/internal.h b/mm/internal.h
index 397183c8fe47..3a2e973138d3 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -331,27 +331,52 @@ static inline void mlock_migrate_page(struct page *newpage, struct page *page)
extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);

/*
- * At what user virtual address is page expected in @vma?
+ * At what user virtual address is page expected in vma?
+ * Returns -EFAULT if all of the page is outside the range of vma.
+ * If page is a compound head, the entire compound page is considered.
*/
static inline unsigned long
-__vma_address(struct page *page, struct vm_area_struct *vma)
+vma_address(struct page *page, struct vm_area_struct *vma)
{
- pgoff_t pgoff = page_to_pgoff(page);
- return vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
+ pgoff_t pgoff;
+ unsigned long address;
+
+ VM_BUG_ON_PAGE(PageKsm(page), page); /* KSM page->index unusable */
+ pgoff = page_to_pgoff(page);
+ if (pgoff >= vma->vm_pgoff) {
+ address = vma->vm_start +
+ ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
+ /* Check for address beyond vma (or wrapped through 0?) */
+ if (address < vma->vm_start || address >= vma->vm_end)
+ address = -EFAULT;
+ } else if (PageHead(page) &&
+ pgoff + (1UL << compound_order(page)) - 1 >= vma->vm_pgoff) {
+ /* Test above avoids possibility of wrap to 0 on 32-bit */
+ address = vma->vm_start;
+ } else {
+ address = -EFAULT;
+ }
+ return address;
}

+/*
+ * Then at what user virtual address will none of the page be found in vma?
+ * Assumes that vma_address() already returned a good starting address.
+ * If page is a compound head, the entire compound page is considered.
+ */
static inline unsigned long
-vma_address(struct page *page, struct vm_area_struct *vma)
+vma_address_end(struct page *page, struct vm_area_struct *vma)
{
- unsigned long start, end;
-
- start = __vma_address(page, vma);
- end = start + PAGE_SIZE * (hpage_nr_pages(page) - 1);
-
- /* page should be within @vma mapping range */
- VM_BUG_ON_VMA(end < vma->vm_start || start >= vma->vm_end, vma);
-
- return max(start, vma->vm_start);
+ pgoff_t pgoff;
+ unsigned long address;
+
+ VM_BUG_ON_PAGE(PageKsm(page), page); /* KSM page->index unusable */
+ pgoff = page_to_pgoff(page) + (1UL << compound_order(page));
+ address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
+ /* Check for address beyond vma (or wrapped through 0?) */
+ if (address < vma->vm_start || address > vma->vm_end)
+ address = vma->vm_end;
+ return address;
}

#else /* !CONFIG_MMU */
diff --git a/mm/memory.c b/mm/memory.c
index c2011c51f15d..49b546cdce0d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1439,7 +1439,18 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
else if (zap_huge_pmd(tlb, vma, pmd, addr))
goto next;
/* fall through */
+ } else if (details && details->single_page &&
+ PageTransCompound(details->single_page) &&
+ next - addr == HPAGE_PMD_SIZE && pmd_none(*pmd)) {
+ spinlock_t *ptl = pmd_lock(tlb->mm, pmd);
+ /*
+ * Take and drop THP pmd lock so that we cannot return
+ * prematurely, while zap_huge_pmd() has cleared *pmd,
+ * but not yet decremented compound_mapcount().
+ */
+ spin_unlock(ptl);
}
+
/*
* Here there can be other concurrent MADV_DONTNEED or
* trans huge page faults running, and if the pmd is
@@ -2924,6 +2935,36 @@ static inline void unmap_mapping_range_tree(struct rb_root_cached *root,
}
}

+/**
+ * unmap_mapping_page() - Unmap single page from processes.
+ * @page: The locked page to be unmapped.
+ *
+ * Unmap this page from any userspace process which still has it mmaped.
+ * Typically, for efficiency, the range of nearby pages has already been
+ * unmapped by unmap_mapping_pages() or unmap_mapping_range(). But once
+ * truncation or invalidation holds the lock on a page, it may find that
+ * the page has been remapped again: and then uses unmap_mapping_page()
+ * to unmap it finally.
+ */
+void unmap_mapping_page(struct page *page)
+{
+ struct address_space *mapping = page->mapping;
+ struct zap_details details = { };
+
+ VM_BUG_ON(!PageLocked(page));
+ VM_BUG_ON(PageTail(page));
+
+ details.check_mapping = mapping;
+ details.first_index = page->index;
+ details.last_index = page->index + hpage_nr_pages(page) - 1;
+ details.single_page = page;
+
+ i_mmap_lock_write(mapping);
+ if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
+ unmap_mapping_range_tree(&mapping->i_mmap, &details);
+ i_mmap_unlock_write(mapping);
+}
+
/**
* unmap_mapping_pages() - Unmap pages from processes.
* @mapping: The address space containing pages to be unmapped.
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index 11df03e71288..edca78609318 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -111,6 +111,13 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw)
return pfn_in_hpage(pvmw->page, pfn);
}

+static void step_forward(struct page_vma_mapped_walk *pvmw, unsigned long size)
+{
+ pvmw->address = (pvmw->address + size) & ~(size - 1);
+ if (!pvmw->address)
+ pvmw->address = ULONG_MAX;
+}
+
/**
* page_vma_mapped_walk - check if @pvmw->page is mapped in @pvmw->vma at
* @pvmw->address
@@ -139,6 +146,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
{
struct mm_struct *mm = pvmw->vma->vm_mm;
struct page *page = pvmw->page;
+ unsigned long end;
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
@@ -148,10 +156,11 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
if (pvmw->pmd && !pvmw->pte)
return not_found(pvmw);

- if (pvmw->pte)
- goto next_pte;
+ if (unlikely(PageHuge(page))) {
+ /* The only possible mapping was handled on last iteration */
+ if (pvmw->pte)
+ return not_found(pvmw);

- if (unlikely(PageHuge(pvmw->page))) {
/* when pud is not present, pte will be NULL */
pvmw->pte = huge_pte_offset(mm, pvmw->address,
PAGE_SIZE << compound_order(page));
@@ -164,78 +173,108 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
return not_found(pvmw);
return true;
}
-restart:
- pgd = pgd_offset(mm, pvmw->address);
- if (!pgd_present(*pgd))
- return false;
- p4d = p4d_offset(pgd, pvmw->address);
- if (!p4d_present(*p4d))
- return false;
- pud = pud_offset(p4d, pvmw->address);
- if (!pud_present(*pud))
- return false;
- pvmw->pmd = pmd_offset(pud, pvmw->address);
+
/*
- * Make sure the pmd value isn't cached in a register by the
- * compiler and used as a stale value after we've observed a
- * subsequent update.
+ * Seek to next pte only makes sense for THP.
+ * But more important than that optimization, is to filter out
+ * any PageKsm page: whose page->index misleads vma_address()
+ * and vma_address_end() to disaster.
*/
- pmde = READ_ONCE(*pvmw->pmd);
- if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) {
- pvmw->ptl = pmd_lock(mm, pvmw->pmd);
- if (likely(pmd_trans_huge(*pvmw->pmd))) {
- if (pvmw->flags & PVMW_MIGRATION)
- return not_found(pvmw);
- if (pmd_page(*pvmw->pmd) != page)
- return not_found(pvmw);
- return true;
- } else if (!pmd_present(*pvmw->pmd)) {
- if (thp_migration_supported()) {
- if (!(pvmw->flags & PVMW_MIGRATION))
+ end = PageTransCompound(page) ?
+ vma_address_end(page, pvmw->vma) :
+ pvmw->address + PAGE_SIZE;
+ if (pvmw->pte)
+ goto next_pte;
+restart:
+ do {
+ pgd = pgd_offset(mm, pvmw->address);
+ if (!pgd_present(*pgd)) {
+ step_forward(pvmw, PGDIR_SIZE);
+ continue;
+ }
+ p4d = p4d_offset(pgd, pvmw->address);
+ if (!p4d_present(*p4d)) {
+ step_forward(pvmw, P4D_SIZE);
+ continue;
+ }
+ pud = pud_offset(p4d, pvmw->address);
+ if (!pud_present(*pud)) {
+ step_forward(pvmw, PUD_SIZE);
+ continue;
+ }
+
+ pvmw->pmd = pmd_offset(pud, pvmw->address);
+ /*
+ * Make sure the pmd value isn't cached in a register by the
+ * compiler and used as a stale value after we've observed a
+ * subsequent update.
+ */
+ pmde = READ_ONCE(*pvmw->pmd);
+
+ if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) {
+ pvmw->ptl = pmd_lock(mm, pvmw->pmd);
+ pmde = *pvmw->pmd;
+ if (likely(pmd_trans_huge(pmde))) {
+ if (pvmw->flags & PVMW_MIGRATION)
return not_found(pvmw);
- if (is_migration_entry(pmd_to_swp_entry(*pvmw->pmd))) {
- swp_entry_t entry = pmd_to_swp_entry(*pvmw->pmd);
+ if (pmd_page(pmde) != page)
+ return not_found(pvmw);
+ return true;
+ }
+ if (!pmd_present(pmde)) {
+ swp_entry_t entry;

- if (migration_entry_to_page(entry) != page)
- return not_found(pvmw);
- return true;
- }
+ if (!thp_migration_supported() ||
+ !(pvmw->flags & PVMW_MIGRATION))
+ return not_found(pvmw);
+ entry = pmd_to_swp_entry(pmde);
+ if (!is_migration_entry(entry) ||
+ migration_entry_to_page(entry) != page)
+ return not_found(pvmw);
+ return true;
}
- return not_found(pvmw);
- } else {
/* THP pmd was split under us: handle on pte level */
spin_unlock(pvmw->ptl);
pvmw->ptl = NULL;
+ } else if (!pmd_present(pmde)) {
+ /*
+ * If PVMW_SYNC, take and drop THP pmd lock so that we
+ * cannot return prematurely, while zap_huge_pmd() has
+ * cleared *pmd but not decremented compound_mapcount().
+ */
+ if ((pvmw->flags & PVMW_SYNC) &&
+ PageTransCompound(page)) {
+ spinlock_t *ptl = pmd_lock(mm, pvmw->pmd);
+
+ spin_unlock(ptl);
+ }
+ step_forward(pvmw, PMD_SIZE);
+ continue;
}
- } else if (!pmd_present(pmde)) {
- return false;
- }
- if (!map_pte(pvmw))
- goto next_pte;
- while (1) {
+ if (!map_pte(pvmw))
+ goto next_pte;
+this_pte:
if (check_pte(pvmw))
return true;
next_pte:
- /* Seek to next pte only makes sense for THP */
- if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page))
- return not_found(pvmw);
do {
pvmw->address += PAGE_SIZE;
- if (pvmw->address >= pvmw->vma->vm_end ||
- pvmw->address >=
- __vma_address(pvmw->page, pvmw->vma) +
- hpage_nr_pages(pvmw->page) * PAGE_SIZE)
+ if (pvmw->address >= end)
return not_found(pvmw);
/* Did we cross page table boundary? */
- if (pvmw->address % PMD_SIZE == 0) {
- pte_unmap(pvmw->pte);
+ if ((pvmw->address & (PMD_SIZE - PAGE_SIZE)) == 0) {
if (pvmw->ptl) {
spin_unlock(pvmw->ptl);
pvmw->ptl = NULL;
}
+ pte_unmap(pvmw->pte);
+ pvmw->pte = NULL;
goto restart;
- } else {
- pvmw->pte++;
+ }
+ pvmw->pte++;
+ if ((pvmw->flags & PVMW_SYNC) && !pvmw->ptl) {
+ pvmw->ptl = pte_lockptr(mm, pvmw->pmd);
+ spin_lock(pvmw->ptl);
}
} while (pte_none(*pvmw->pte));

@@ -243,7 +282,10 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
pvmw->ptl = pte_lockptr(mm, pvmw->pmd);
spin_lock(pvmw->ptl);
}
- }
+ goto this_pte;
+ } while (pvmw->address < end);
+
+ return false;
}

/**
@@ -262,14 +304,10 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
.vma = vma,
.flags = PVMW_SYNC,
};
- unsigned long start, end;
-
- start = __vma_address(page, vma);
- end = start + PAGE_SIZE * (hpage_nr_pages(page) - 1);

- if (unlikely(end < vma->vm_start || start >= vma->vm_end))
+ pvmw.address = vma_address(page, vma);
+ if (pvmw.address == -EFAULT)
return 0;
- pvmw.address = max(start, vma->vm_start);
if (!page_vma_mapped_walk(&pvmw))
return 0;
page_vma_mapped_walk_done(&pvmw);
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
index cf2af04b34b9..36770fcdc358 100644
--- a/mm/pgtable-generic.c
+++ b/mm/pgtable-generic.c
@@ -125,8 +125,8 @@ pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address,
{
pmd_t pmd;
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
- VM_BUG_ON((pmd_present(*pmdp) && !pmd_trans_huge(*pmdp) &&
- !pmd_devmap(*pmdp)) || !pmd_present(*pmdp));
+ VM_BUG_ON(pmd_present(*pmdp) && !pmd_trans_huge(*pmdp) &&
+ !pmd_devmap(*pmdp));
pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);
flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
return pmd;
diff --git a/mm/rmap.c b/mm/rmap.c
index 1bd94ea62f7f..699f445e3e78 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -686,7 +686,6 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
*/
unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
{
- unsigned long address;
if (PageAnon(page)) {
struct anon_vma *page__anon_vma = page_anon_vma(page);
/*
@@ -696,15 +695,13 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
if (!vma->anon_vma || !page__anon_vma ||
vma->anon_vma->root != page__anon_vma->root)
return -EFAULT;
- } else if (page->mapping) {
- if (!vma->vm_file || vma->vm_file->f_mapping != page->mapping)
- return -EFAULT;
- } else
+ } else if (!vma->vm_file) {
return -EFAULT;
- address = __vma_address(page, vma);
- if (unlikely(address < vma->vm_start || address >= vma->vm_end))
+ } else if (vma->vm_file->f_mapping != compound_head(page)->mapping) {
return -EFAULT;
- return address;
+ }
+
+ return vma_address(page, vma);
}

pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
@@ -896,7 +893,7 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
* We have to assume the worse case ie pmd for invalidation. Note that
* the page can not be free from this function.
*/
- end = min(vma->vm_end, start + (PAGE_SIZE << compound_order(page)));
+ end = vma_address_end(page, vma);
mmu_notifier_invalidate_range_start(vma->vm_mm, start, end);

while (page_vma_mapped_walk(&pvmw)) {
@@ -1348,6 +1345,15 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
unsigned long start = address, end;
enum ttu_flags flags = (enum ttu_flags)arg;

+ /*
+ * When racing against e.g. zap_pte_range() on another cpu,
+ * in between its ptep_get_and_clear_full() and page_remove_rmap(),
+ * try_to_unmap() may return false when it is about to become true,
+ * if page table locking is skipped: use TTU_SYNC to wait for that.
+ */
+ if (flags & TTU_SYNC)
+ pvmw.flags = PVMW_SYNC;
+
/* munlock has nothing to gain from examining un-locked vmas */
if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED))
return true;
@@ -1369,7 +1375,8 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
* Note that the page can not be free in this function as call of
* try_to_unmap() must hold a reference on the page.
*/
- end = min(vma->vm_end, start + (PAGE_SIZE << compound_order(page)));
+ end = PageKsm(page) ?
+ address + PAGE_SIZE : vma_address_end(page, vma);
if (PageHuge(page)) {
/*
* If sharing is possible, start and end will be adjusted
@@ -1682,9 +1689,9 @@ static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg)
return is_vma_temporary_stack(vma);
}

-static int page_mapcount_is_zero(struct page *page)
+static int page_not_mapped(struct page *page)
{
- return !total_mapcount(page);
+ return !page_mapped(page);
}

/**
@@ -1702,7 +1709,7 @@ bool try_to_unmap(struct page *page, enum ttu_flags flags)
struct rmap_walk_control rwc = {
.rmap_one = try_to_unmap_one,
.arg = (void *)flags,
- .done = page_mapcount_is_zero,
+ .done = page_not_mapped,
.anon_lock = page_lock_anon_vma_read,
};

@@ -1723,14 +1730,15 @@ bool try_to_unmap(struct page *page, enum ttu_flags flags)
else
rmap_walk(page, &rwc);

- return !page_mapcount(page) ? true : false;
+ /*
+ * When racing against e.g. zap_pte_range() on another cpu,
+ * in between its ptep_get_and_clear_full() and page_remove_rmap(),
+ * try_to_unmap() may return false when it is about to become true,
+ * if page table locking is skipped: use TTU_SYNC to wait for that.
+ */
+ return !page_mapcount(page);
}

-static int page_not_mapped(struct page *page)
-{
- return !page_mapped(page);
-};
-
/**
* try_to_munlock - try to munlock a page
* @page: the page to be munlocked
@@ -1825,6 +1833,7 @@ static void rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc,
struct vm_area_struct *vma = avc->vma;
unsigned long address = vma_address(page, vma);

+ VM_BUG_ON_VMA(address == -EFAULT, vma);
cond_resched();

if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
@@ -1879,6 +1888,7 @@ static void rmap_walk_file(struct page *page, struct rmap_walk_control *rwc,
pgoff_start, pgoff_end) {
unsigned long address = vma_address(page, vma);

+ VM_BUG_ON_VMA(address == -EFAULT, vma);
cond_resched();

if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
diff --git a/mm/truncate.c b/mm/truncate.c
index 71b65aab8077..43c73db17a0a 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -175,13 +175,10 @@ void do_invalidatepage(struct page *page, unsigned int offset,
* its lock, b) when a concurrent invalidate_mapping_pages got there first and
* c) when tmpfs swizzles a page between a tmpfs inode and swapper_space.
*/
-static void
-truncate_cleanup_page(struct address_space *mapping, struct page *page)
+static void truncate_cleanup_page(struct page *page)
{
- if (page_mapped(page)) {
- pgoff_t nr = PageTransHuge(page) ? HPAGE_PMD_NR : 1;
- unmap_mapping_pages(mapping, page->index, nr, false);
- }
+ if (page_mapped(page))
+ unmap_mapping_page(page);

if (page_has_private(page))
do_invalidatepage(page, 0, PAGE_SIZE);
@@ -226,7 +223,7 @@ int truncate_inode_page(struct address_space *mapping, struct page *page)
if (page->mapping != mapping)
return -EIO;

- truncate_cleanup_page(mapping, page);
+ truncate_cleanup_page(page);
delete_from_page_cache(page);
return 0;
}
@@ -364,7 +361,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
pagevec_add(&locked_pvec, page);
}
for (i = 0; i < pagevec_count(&locked_pvec); i++)
- truncate_cleanup_page(mapping, locked_pvec.pages[i]);
+ truncate_cleanup_page(locked_pvec.pages[i]);
delete_from_page_cache_batch(mapping, &locked_pvec);
for (i = 0; i < pagevec_count(&locked_pvec); i++)
unlock_page(locked_pvec.pages[i]);
@@ -703,6 +700,16 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
continue;
}

+ if (!did_range_unmap && page_mapped(page)) {
+ /*
+ * If page is mapped, before taking its lock,
+ * zap the rest of the file in one hit.
+ */
+ unmap_mapping_pages(mapping, index,
+ (1 + end - index), false);
+ did_range_unmap = 1;
+ }
+
lock_page(page);
WARN_ON(page_to_index(page) != index);
if (page->mapping != mapping) {
@@ -710,23 +717,11 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
continue;
}
wait_on_page_writeback(page);
- if (page_mapped(page)) {
- if (!did_range_unmap) {
- /*
- * Zap the rest of the file in one hit.
- */
- unmap_mapping_pages(mapping, index,
- (1 + end - index), false);
- did_range_unmap = 1;
- } else {
- /*
- * Just zap this page
- */
- unmap_mapping_pages(mapping, index,
- 1, false);
- }
- }
+
+ if (page_mapped(page))
+ unmap_mapping_page(page);
BUG_ON(page_mapped(page));
+
ret2 = do_launder_page(mapping, page);
if (ret2 == 0) {
if (!invalidate_complete_page2(mapping, page))

Next message: Greg Kroah-Hartman: "Linux 5.4.131"
Previous message: Greg Kroah-Hartman: "Linux 4.19.197"
In reply to: Greg Kroah-Hartman: "Linux 4.19.197"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]