From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org, akpm@linux-foundation.org,
torvalds@linux-foundation.org, stable@vger.kernel.org
Cc: lwn@lwn.net, jslaby@suse.cz,
Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Subject: Re: Linux 5.19.6
Date: Wed, 31 Aug 2022 18:26:45 +0200 [thread overview]
Message-ID: <166196320419649@kroah.com> (raw)
In-Reply-To: <16619632046269@kroah.com>
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index bcc974d276dc..3cda940108f6 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -527,6 +527,7 @@ What: /sys/devices/system/cpu/vulnerabilities
/sys/devices/system/cpu/vulnerabilities/tsx_async_abort
/sys/devices/system/cpu/vulnerabilities/itlb_multihit
/sys/devices/system/cpu/vulnerabilities/mmio_stale_data
+ /sys/devices/system/cpu/vulnerabilities/retbleed
Date: January 2018
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
Description: Information about CPU vulnerabilities
diff --git a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst
index 9393c50b5afc..c98fd11907cc 100644
--- a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst
+++ b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst
@@ -230,6 +230,20 @@ The possible values in this file are:
* - 'Mitigation: Clear CPU buffers'
- The processor is vulnerable and the CPU buffer clearing mitigation is
enabled.
+ * - 'Unknown: No mitigations'
+ - The processor vulnerability status is unknown because it is
+ out of Servicing period. Mitigation is not attempted.
+
+Definitions:
+------------
+
+Servicing period: The process of providing functional and security updates to
+Intel processors or platforms, utilizing the Intel Platform Update (IPU)
+process or other similar mechanisms.
+
+End of Servicing Updates (ESU): ESU is the date at which Intel will no
+longer provide Servicing, such as through IPU or other similar update
+processes. ESU dates will typically be aligned to end of quarter.
If the processor is vulnerable then the following information is appended to
the above information:
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index e4fe443bea77..1b38d0f70677 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5260,6 +5260,8 @@
rodata= [KNL]
on Mark read-only kernel memory as read-only (default).
off Leave read-only kernel memory writable for debugging.
+ full Mark read-only kernel memory and aliases as read-only
+ [arm64]
rockchip.usb_uart
Enable the uart passthrough on the designated usb port
diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst
index fcd650bdbc7e..01d985819783 100644
--- a/Documentation/admin-guide/sysctl/net.rst
+++ b/Documentation/admin-guide/sysctl/net.rst
@@ -271,7 +271,7 @@ poll cycle or the number of packets processed reaches netdev_budget.
netdev_max_backlog
------------------
-Maximum number of packets, queued on the INPUT side, when the interface
+Maximum number of packets, queued on the INPUT side, when the interface
receives packets faster than kernel can process them.
netdev_rss_key
diff --git a/Makefile b/Makefile
index 1c4f1ecb9348..cb68101ea070 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
VERSION = 5
PATCHLEVEL = 19
-SUBLEVEL = 5
+SUBLEVEL = 6
EXTRAVERSION =
NAME = Superb Owl
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index 9bb1873f5295..6f86b7ab6c28 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -153,7 +153,7 @@ struct vl_info {
#ifdef CONFIG_ARM64_SVE
-extern void sve_alloc(struct task_struct *task);
+extern void sve_alloc(struct task_struct *task, bool flush);
extern void fpsimd_release_task(struct task_struct *task);
extern void fpsimd_sync_to_sve(struct task_struct *task);
extern void fpsimd_force_sync_to_sve(struct task_struct *task);
@@ -256,7 +256,7 @@ size_t sve_state_size(struct task_struct const *task);
#else /* ! CONFIG_ARM64_SVE */
-static inline void sve_alloc(struct task_struct *task) { }
+static inline void sve_alloc(struct task_struct *task, bool flush) { }
static inline void fpsimd_release_task(struct task_struct *task) { }
static inline void sve_sync_to_fpsimd(struct task_struct *task) { }
static inline void sve_sync_from_fpsimd_zeropad(struct task_struct *task) { }
diff --git a/arch/arm64/include/asm/setup.h b/arch/arm64/include/asm/setup.h
index 6437df661700..f4af547ef54c 100644
--- a/arch/arm64/include/asm/setup.h
+++ b/arch/arm64/include/asm/setup.h
@@ -3,6 +3,8 @@
#ifndef __ARM64_ASM_SETUP_H
#define __ARM64_ASM_SETUP_H
+#include <linux/string.h>
+
#include <uapi/asm/setup.h>
void *get_early_fdt_ptr(void);
@@ -14,4 +16,19 @@ void early_fdt_map(u64 dt_phys);
extern phys_addr_t __fdt_pointer __initdata;
extern u64 __cacheline_aligned boot_args[4];
+static inline bool arch_parse_debug_rodata(char *arg)
+{
+ extern bool rodata_enabled;
+ extern bool rodata_full;
+
+ if (arg && !strcmp(arg, "full")) {
+ rodata_enabled = true;
+ rodata_full = true;
+ return true;
+ }
+
+ return false;
+}
+#define arch_parse_debug_rodata arch_parse_debug_rodata
+
#endif
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 6b92989f4cc2..b374e258f705 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -208,6 +208,8 @@ static const struct arm64_cpu_capabilities arm64_repeat_tlbi_list[] = {
#ifdef CONFIG_ARM64_ERRATUM_1286807
{
ERRATA_MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 3, 0),
+ },
+ {
/* Kryo4xx Gold (rcpe to rfpe) => (r0p0 to r3p0) */
ERRATA_MIDR_RANGE(MIDR_QCOM_KRYO_4XX_GOLD, 0xc, 0xe, 0xf, 0xe),
},
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index aecf3071efdd..52f7ffdffbcb 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -716,10 +716,12 @@ size_t sve_state_size(struct task_struct const *task)
* do_sve_acc() case, there is no ABI requirement to hide stale data
* written previously be task.
*/
-void sve_alloc(struct task_struct *task)
+void sve_alloc(struct task_struct *task, bool flush)
{
if (task->thread.sve_state) {
- memset(task->thread.sve_state, 0, sve_state_size(task));
+ if (flush)
+ memset(task->thread.sve_state, 0,
+ sve_state_size(task));
return;
}
@@ -1389,7 +1391,7 @@ void do_sve_acc(unsigned long esr, struct pt_regs *regs)
return;
}
- sve_alloc(current);
+ sve_alloc(current, true);
if (!current->thread.sve_state) {
force_sig(SIGKILL);
return;
@@ -1440,7 +1442,7 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs)
return;
}
- sve_alloc(current);
+ sve_alloc(current, false);
sme_alloc(current);
if (!current->thread.sve_state || !current->thread.za_state) {
force_sig(SIGKILL);
@@ -1461,17 +1463,6 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs)
fpsimd_bind_task_to_cpu();
}
- /*
- * If SVE was not already active initialise the SVE registers,
- * any non-shared state between the streaming and regular SVE
- * registers is architecturally guaranteed to be zeroed when
- * we enter streaming mode. We do not need to initialize ZA
- * since ZA must be disabled at this point and enabling ZA is
- * architecturally defined to zero ZA.
- */
- if (system_supports_sve() && !test_thread_flag(TIF_SVE))
- sve_init_regs();
-
put_cpu_fpsimd_context();
}
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 21da83187a60..eb7c08dfb834 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -882,7 +882,7 @@ static int sve_set_common(struct task_struct *target,
* state and ensure there's storage.
*/
if (target->thread.svcr != old_svcr)
- sve_alloc(target);
+ sve_alloc(target, true);
}
/* Registers: FPSIMD-only case */
@@ -912,7 +912,7 @@ static int sve_set_common(struct task_struct *target,
goto out;
}
- sve_alloc(target);
+ sve_alloc(target, true);
if (!target->thread.sve_state) {
ret = -ENOMEM;
clear_tsk_thread_flag(target, TIF_SVE);
@@ -1082,7 +1082,7 @@ static int za_set(struct task_struct *target,
/* Ensure there is some SVE storage for streaming mode */
if (!target->thread.sve_state) {
- sve_alloc(target);
+ sve_alloc(target, false);
if (!target->thread.sve_state) {
clear_thread_flag(TIF_SME);
ret = -ENOMEM;
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index b0980fbb6bc7..8bb631bf9464 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -307,7 +307,7 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
fpsimd_flush_task_state(current);
/* From now, fpsimd_thread_switch() won't touch thread.sve_state */
- sve_alloc(current);
+ sve_alloc(current, true);
if (!current->thread.sve_state) {
clear_thread_flag(TIF_SVE);
return -ENOMEM;
@@ -922,6 +922,16 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka,
/* Signal handlers are invoked with ZA and streaming mode disabled */
if (system_supports_sme()) {
+ /*
+ * If we were in streaming mode the saved register
+ * state was SVE but we will exit SM and use the
+ * FPSIMD register state - flush the saved FPSIMD
+ * register state in case it gets loaded.
+ */
+ if (current->thread.svcr & SVCR_SM_MASK)
+ memset(¤t->thread.uw.fpsimd_state, 0,
+ sizeof(current->thread.uw.fpsimd_state));
+
current->thread.svcr &= ~(SVCR_ZA_MASK |
SVCR_SM_MASK);
sme_smstop();
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 626ec32873c6..1de896e4d334 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -625,24 +625,6 @@ static void __init map_kernel_segment(pgd_t *pgdp, void *va_start, void *va_end,
vm_area_add_early(vma);
}
-static int __init parse_rodata(char *arg)
-{
- int ret = strtobool(arg, &rodata_enabled);
- if (!ret) {
- rodata_full = false;
- return 0;
- }
-
- /* permit 'full' in addition to boolean options */
- if (strcmp(arg, "full"))
- return -EINVAL;
-
- rodata_enabled = true;
- rodata_full = true;
- return 0;
-}
-early_param("rodata", parse_rodata);
-
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
static int __init map_entry_trampoline(void)
{
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index fa400055b2d5..cd2b3fe15672 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -147,10 +147,10 @@ menu "Processor type and features"
choice
prompt "Processor type"
- default PA7000
+ default PA7000 if "$(ARCH)" = "parisc"
config PA7000
- bool "PA7000/PA7100"
+ bool "PA7000/PA7100" if "$(ARCH)" = "parisc"
help
This is the processor type of your CPU. This information is
used for optimizing purposes. In order to compile a kernel
@@ -161,21 +161,21 @@ config PA7000
which is required on some machines.
config PA7100LC
- bool "PA7100LC"
+ bool "PA7100LC" if "$(ARCH)" = "parisc"
help
Select this option for the PCX-L processor, as used in the
712, 715/64, 715/80, 715/100, 715/100XC, 725/100, 743, 748,
D200, D210, D300, D310 and E-class
config PA7200
- bool "PA7200"
+ bool "PA7200" if "$(ARCH)" = "parisc"
help
Select this option for the PCX-T' processor, as used in the
C100, C110, J100, J110, J210XC, D250, D260, D350, D360,
K100, K200, K210, K220, K400, K410 and K420
config PA7300LC
- bool "PA7300LC"
+ bool "PA7300LC" if "$(ARCH)" = "parisc"
help
Select this option for the PCX-L2 processor, as used in the
744, A180, B132L, B160L, B180L, C132L, C160L, C180L,
@@ -225,17 +225,8 @@ config MLONGCALLS
Enabling this option will probably slow down your kernel.
config 64BIT
- bool "64-bit kernel"
+ def_bool "$(ARCH)" = "parisc64"
depends on PA8X00
- help
- Enable this if you want to support 64bit kernel on PA-RISC platform.
-
- At the moment, only people willing to use more than 2GB of RAM,
- or having a 64bit-only capable PA-RISC machine should say Y here.
-
- Since there is no 64bit userland on PA-RISC, there is no point to
- enable this option otherwise. The 64bit kernel is significantly bigger
- and slower than the 32bit one.
choice
prompt "Kernel page size"
diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
index bac581b5ecfc..e8a4d77cff53 100644
--- a/arch/parisc/kernel/unaligned.c
+++ b/arch/parisc/kernel/unaligned.c
@@ -93,7 +93,7 @@
#define R1(i) (((i)>>21)&0x1f)
#define R2(i) (((i)>>16)&0x1f)
#define R3(i) ((i)&0x1f)
-#define FR3(i) ((((i)<<1)&0x1f)|(((i)>>6)&1))
+#define FR3(i) ((((i)&0x1f)<<1)|(((i)>>6)&1))
#define IM(i,n) (((i)>>1&((1<<(n-1))-1))|((i)&1?((0-1L)<<(n-1)):0))
#define IM5_2(i) IM((i)>>16,5)
#define IM5_3(i) IM((i),5)
diff --git a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts
index 044982a11df5..f3f87ed2007f 100644
--- a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts
+++ b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts
@@ -84,12 +84,10 @@ &mac1 {
phy1: ethernet-phy@9 {
reg = <9>;
- ti,fifo-depth = <0x1>;
};
phy0: ethernet-phy@8 {
reg = <8>;
- ti,fifo-depth = <0x1>;
};
};
@@ -102,7 +100,6 @@ &mmc {
disable-wp;
cap-sd-highspeed;
cap-mmc-highspeed;
- card-detect-delay = <200>;
mmc-ddr-1_8v;
mmc-hs200-1_8v;
sd-uhs-sdr12;
diff --git a/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts b/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts
index 82c93c8f5c17..c87cc2d8fe29 100644
--- a/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts
+++ b/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts
@@ -54,12 +54,10 @@ &mac1 {
phy1: ethernet-phy@5 {
reg = <5>;
- ti,fifo-depth = <0x01>;
};
phy0: ethernet-phy@4 {
reg = <4>;
- ti,fifo-depth = <0x01>;
};
};
@@ -72,7 +70,6 @@ &mmc {
disable-wp;
cap-sd-highspeed;
cap-mmc-highspeed;
- card-detect-delay = <200>;
mmc-ddr-1_8v;
mmc-hs200-1_8v;
sd-uhs-sdr12;
diff --git a/arch/riscv/boot/dts/microchip/mpfs.dtsi b/arch/riscv/boot/dts/microchip/mpfs.dtsi
index 496d3b7642bd..9f5bce1488d9 100644
--- a/arch/riscv/boot/dts/microchip/mpfs.dtsi
+++ b/arch/riscv/boot/dts/microchip/mpfs.dtsi
@@ -169,7 +169,7 @@ cctrllr: cache-controller@2010000 {
cache-size = <2097152>;
cache-unified;
interrupt-parent = <&plic>;
- interrupts = <1>, <2>, <3>;
+ interrupts = <1>, <3>, <4>, <2>;
};
clint: clint@2000000 {
@@ -446,9 +446,8 @@ pcie: pcie@2000000000 {
ranges = <0x3000000 0x0 0x8000000 0x20 0x8000000 0x0 0x80000000>;
msi-parent = <&pcie>;
msi-controller;
- microchip,axi-m-atr0 = <0x10 0x0>;
status = "disabled";
- pcie_intc: legacy-interrupt-controller {
+ pcie_intc: interrupt-controller {
#address-cells = <0>;
#interrupt-cells = <1>;
interrupt-controller;
diff --git a/arch/riscv/include/asm/signal.h b/arch/riscv/include/asm/signal.h
new file mode 100644
index 000000000000..532c29ef0376
--- /dev/null
+++ b/arch/riscv/include/asm/signal.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __ASM_SIGNAL_H
+#define __ASM_SIGNAL_H
+
+#include <uapi/asm/signal.h>
+#include <uapi/asm/ptrace.h>
+
+asmlinkage __visible
+void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags);
+
+#endif
diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h
index 78933ac04995..67322f878e0d 100644
--- a/arch/riscv/include/asm/thread_info.h
+++ b/arch/riscv/include/asm/thread_info.h
@@ -42,6 +42,8 @@
#ifndef __ASSEMBLY__
+extern long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE / sizeof(long)];
+
#include <asm/processor.h>
#include <asm/csr.h>
diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c
index 38b05ca6fe66..5a2de6b6f882 100644
--- a/arch/riscv/kernel/signal.c
+++ b/arch/riscv/kernel/signal.c
@@ -15,6 +15,7 @@
#include <asm/ucontext.h>
#include <asm/vdso.h>
+#include <asm/signal.h>
#include <asm/signal32.h>
#include <asm/switch_to.h>
#include <asm/csr.h>
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index 39d0f8bba4b4..635e6ec26938 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -20,9 +20,10 @@
#include <asm/asm-prototypes.h>
#include <asm/bug.h>
+#include <asm/csr.h>
#include <asm/processor.h>
#include <asm/ptrace.h>
-#include <asm/csr.h>
+#include <asm/thread_info.h>
int show_unhandled_signals = 1;
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 89949b9f3cf8..d5119e039d85 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -91,6 +91,18 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
memcpy(dst, src, arch_task_struct_size);
dst->thread.fpu.regs = dst->thread.fpu.fprs;
+
+ /*
+ * Don't transfer over the runtime instrumentation or the guarded
+ * storage control block pointers. These fields are cleared here instead
+ * of in copy_thread() to avoid premature freeing of associated memory
+ * on fork() failure. Wait to clear the RI flag because ->stack still
+ * refers to the source thread.
+ */
+ dst->thread.ri_cb = NULL;
+ dst->thread.gs_cb = NULL;
+ dst->thread.gs_bc_cb = NULL;
+
return 0;
}
@@ -150,13 +162,11 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
frame->childregs.flags = 0;
if (new_stackp)
frame->childregs.gprs[15] = new_stackp;
-
- /* Don't copy runtime instrumentation info */
- p->thread.ri_cb = NULL;
+ /*
+ * Clear the runtime instrumentation flag after the above childregs
+ * copy. The CB pointer was already cleared in arch_dup_task_struct().
+ */
frame->childregs.psw.mask &= ~PSW_MASK_RI;
- /* Don't copy guarded storage control block */
- p->thread.gs_cb = NULL;
- p->thread.gs_bc_cb = NULL;
/* Set a new TLS ? */
if (clone_flags & CLONE_SETTLS) {
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index e173b6187ad5..516d232e66d9 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -379,7 +379,9 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
flags = FAULT_FLAG_DEFAULT;
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
- if (access == VM_WRITE || is_write)
+ if (is_write)
+ access = VM_WRITE;
+ if (access == VM_WRITE)
flags |= FAULT_FLAG_WRITE;
mmap_read_lock(mm);
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 4910bf230d7b..62208ec04ca4 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -132,7 +132,17 @@ void snp_set_page_private(unsigned long paddr);
void snp_set_page_shared(unsigned long paddr);
void sev_prep_identity_maps(unsigned long top_level_pgt);
#else
-static inline void sev_enable(struct boot_params *bp) { }
+static inline void sev_enable(struct boot_params *bp)
+{
+ /*
+ * bp->cc_blob_address should only be set by boot/compressed kernel.
+ * Initialize it to 0 unconditionally (thus here in this stub too) to
+ * ensure that uninitialized values from buggy bootloaders aren't
+ * propagated.
+ */
+ if (bp)
+ bp->cc_blob_address = 0;
+}
static inline void sev_es_shutdown_ghcb(void) { }
static inline bool sev_es_check_ghcb_fault(unsigned long address)
{
diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c
index 52f989f6acc2..c93930d5ccbd 100644
--- a/arch/x86/boot/compressed/sev.c
+++ b/arch/x86/boot/compressed/sev.c
@@ -276,6 +276,14 @@ void sev_enable(struct boot_params *bp)
struct msr m;
bool snp;
+ /*
+ * bp->cc_blob_address should only be set by boot/compressed kernel.
+ * Initialize it to 0 to ensure that uninitialized values from
+ * buggy bootloaders aren't propagated.
+ */
+ if (bp)
+ bp->cc_blob_address = 0;
+
/*
* Setup/preliminary detection of SNP. This will be sanity-checked
* against CPUID/MSR values later.
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 682338e7e2a3..4dd19819053a 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -311,7 +311,7 @@ SYM_CODE_START(entry_INT80_compat)
* Interrupts are off on entry.
*/
ASM_CLAC /* Do this early to minimize exposure */
- SWAPGS
+ ALTERNATIVE "swapgs", "", X86_FEATURE_XENPV
/*
* User tracing code (ptrace or signal handlers) might assume that
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index ba60427caa6d..9b48d957d2b3 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -291,6 +291,7 @@ static u64 load_latency_data(struct perf_event *event, u64 status)
static u64 store_latency_data(struct perf_event *event, u64 status)
{
union intel_x86_pebs_dse dse;
+ union perf_mem_data_src src;
u64 val;
dse.val = status;
@@ -304,7 +305,14 @@ static u64 store_latency_data(struct perf_event *event, u64 status)
val |= P(BLK, NA);
- return val;
+ /*
+ * the pebs_data_source table is only for loads
+ * so override the mem_op to say STORE instead
+ */
+ src.val = val;
+ src.mem_op = P(OP,STORE);
+
+ return src.val;
}
struct pebs_record_core {
@@ -822,7 +830,7 @@ struct event_constraint intel_glm_pebs_event_constraints[] = {
struct event_constraint intel_grt_pebs_event_constraints[] = {
/* Allow all events as PEBS with no flags */
- INTEL_HYBRID_LAT_CONSTRAINT(0x5d0, 0xf),
+ INTEL_HYBRID_LAT_CONSTRAINT(0x5d0, 0x3),
INTEL_HYBRID_LAT_CONSTRAINT(0x6d0, 0xf),
EVENT_CONSTRAINT_END
};
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 4f70fb6c2c1e..47fca6a7a8bc 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -1097,6 +1097,14 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
if (static_cpu_has(X86_FEATURE_ARCH_LBR)) {
reg->config = mask;
+
+ /*
+ * The Arch LBR HW can retrieve the common branch types
+ * from the LBR_INFO. It doesn't require the high overhead
+ * SW disassemble.
+ * Enable the branch type by default for the Arch LBR.
+ */
+ reg->reg |= X86_BR_TYPE_SAVE;
return 0;
}
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index ce440011cc4e..1ef4f7861e2e 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -841,6 +841,22 @@ int snb_pci2phy_map_init(int devid)
return 0;
}
+static u64 snb_uncore_imc_read_counter(struct intel_uncore_box *box, struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ /*
+ * SNB IMC counters are 32-bit and are laid out back to back
+ * in MMIO space. Therefore we must use a 32-bit accessor function
+ * using readq() from uncore_mmio_read_counter() causes problems
+ * because it is reading 64-bit at a time. This is okay for the
+ * uncore_perf_event_update() function because it drops the upper
+ * 32-bits but not okay for plain uncore_read_counter() as invoked
+ * in uncore_pmu_event_start().
+ */
+ return (u64)readl(box->io_addr + hwc->event_base);
+}
+
static struct pmu snb_uncore_imc_pmu = {
.task_ctx_nr = perf_invalid_context,
.event_init = snb_uncore_imc_event_init,
@@ -860,7 +876,7 @@ static struct intel_uncore_ops snb_uncore_imc_ops = {
.disable_event = snb_uncore_imc_disable_event,
.enable_event = snb_uncore_imc_enable_event,
.hw_config = snb_uncore_imc_hw_config,
- .read_counter = uncore_mmio_read_counter,
+ .read_counter = snb_uncore_imc_read_counter,
};
static struct intel_uncore_type snb_uncore_imc = {
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index ede8990f3e41..ccbb838f995c 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -456,7 +456,8 @@
#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
#define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */
#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */
-#define X86_BUG_RETBLEED X86_BUG(26) /* CPU is affected by RETBleed */
-#define X86_BUG_EIBRS_PBRSB X86_BUG(27) /* EIBRS is vulnerable to Post Barrier RSB Predictions */
+#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* CPU is too old and its MMIO Stale Data status is unknown */
+#define X86_BUG_RETBLEED X86_BUG(27) /* CPU is affected by RETBleed */
+#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index d3a3cc6772ee..c31083d77e09 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -35,33 +35,56 @@
#define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */
/*
+ * Common helper for __FILL_RETURN_BUFFER and __FILL_ONE_RETURN.
+ */
+#define __FILL_RETURN_SLOT \
+ ANNOTATE_INTRA_FUNCTION_CALL; \
+ call 772f; \
+ int3; \
+772:
+
+/*
+ * Stuff the entire RSB.
+ *
* Google experimented with loop-unrolling and this turned out to be
* the optimal version - two calls, each with their own speculation
* trap should their return address end up getting used, in a loop.
*/
-#define __FILL_RETURN_BUFFER(reg, nr, sp) \
- mov $(nr/2), reg; \
-771: \
- ANNOTATE_INTRA_FUNCTION_CALL; \
- call 772f; \
-773: /* speculation trap */ \
- UNWIND_HINT_EMPTY; \
- pause; \
- lfence; \
- jmp 773b; \
-772: \
- ANNOTATE_INTRA_FUNCTION_CALL; \
- call 774f; \
-775: /* speculation trap */ \
- UNWIND_HINT_EMPTY; \
- pause; \
- lfence; \
- jmp 775b; \
-774: \
- add $(BITS_PER_LONG/8) * 2, sp; \
- dec reg; \
- jnz 771b; \
- /* barrier for jnz misprediction */ \
+#ifdef CONFIG_X86_64
+#define __FILL_RETURN_BUFFER(reg, nr) \
+ mov $(nr/2), reg; \
+771: \
+ __FILL_RETURN_SLOT \
+ __FILL_RETURN_SLOT \
+ add $(BITS_PER_LONG/8) * 2, %_ASM_SP; \
+ dec reg; \
+ jnz 771b; \
+ /* barrier for jnz misprediction */ \
+ lfence;
+#else
+/*
+ * i386 doesn't unconditionally have LFENCE, as such it can't
+ * do a loop.
+ */
+#define __FILL_RETURN_BUFFER(reg, nr) \
+ .rept nr; \
+ __FILL_RETURN_SLOT; \
+ .endr; \
+ add $(BITS_PER_LONG/8) * nr, %_ASM_SP;
+#endif
+
+/*
+ * Stuff a single RSB slot.
+ *
+ * To mitigate Post-Barrier RSB speculation, one CALL instruction must be
+ * forced to retire before letting a RET instruction execute.
+ *
+ * On PBRSB-vulnerable CPUs, it is not safe for a RET to be executed
+ * before this point.
+ */
+#define __FILL_ONE_RETURN \
+ __FILL_RETURN_SLOT \
+ add $(BITS_PER_LONG/8), %_ASM_SP; \
lfence;
#ifdef __ASSEMBLY__
@@ -120,28 +143,15 @@
#endif
.endm
-.macro ISSUE_UNBALANCED_RET_GUARD
- ANNOTATE_INTRA_FUNCTION_CALL
- call .Lunbalanced_ret_guard_\@
- int3
-.Lunbalanced_ret_guard_\@:
- add $(BITS_PER_LONG/8), %_ASM_SP
- lfence
-.endm
-
/*
* A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
* monstrosity above, manually.
*/
-.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2
-.ifb \ftr2
- ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr
-.else
- ALTERNATIVE_2 "jmp .Lskip_rsb_\@", "", \ftr, "jmp .Lunbalanced_\@", \ftr2
-.endif
- __FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)
-.Lunbalanced_\@:
- ISSUE_UNBALANCED_RET_GUARD
+.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req ftr2=ALT_NOT(X86_FEATURE_ALWAYS)
+ ALTERNATIVE_2 "jmp .Lskip_rsb_\@", \
+ __stringify(__FILL_RETURN_BUFFER(\reg,\nr)), \ftr, \
+ __stringify(__FILL_ONE_RETURN), \ftr2
+
.Lskip_rsb_\@:
.endm
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 510d85261132..da7c361f47e0 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -433,7 +433,8 @@ static void __init mmio_select_mitigation(void)
u64 ia32_cap;
if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) ||
- cpu_mitigations_off()) {
+ boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN) ||
+ cpu_mitigations_off()) {
mmio_mitigation = MMIO_MITIGATION_OFF;
return;
}
@@ -538,6 +539,8 @@ static void __init md_clear_update_mitigation(void)
pr_info("TAA: %s\n", taa_strings[taa_mitigation]);
if (boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA))
pr_info("MMIO Stale Data: %s\n", mmio_strings[mmio_mitigation]);
+ else if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN))
+ pr_info("MMIO Stale Data: Unknown: No mitigations\n");
}
static void __init md_clear_select_mitigation(void)
@@ -2275,6 +2278,9 @@ static ssize_t tsx_async_abort_show_state(char *buf)
static ssize_t mmio_stale_data_show_state(char *buf)
{
+ if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN))
+ return sysfs_emit(buf, "Unknown: No mitigations\n");
+
if (mmio_mitigation == MMIO_MITIGATION_OFF)
return sysfs_emit(buf, "%s\n", mmio_strings[mmio_mitigation]);
@@ -2421,6 +2427,7 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
return srbds_show_state(buf);
case X86_BUG_MMIO_STALE_DATA:
+ case X86_BUG_MMIO_UNKNOWN:
return mmio_stale_data_show_state(buf);
case X86_BUG_RETBLEED:
@@ -2480,7 +2487,10 @@ ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char *
ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *attr, char *buf)
{
- return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA);
+ if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN))
+ return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_UNKNOWN);
+ else
+ return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA);
}
ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, char *buf)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 64a73f415f03..3e508f239098 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1135,7 +1135,8 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
#define NO_SWAPGS BIT(6)
#define NO_ITLB_MULTIHIT BIT(7)
#define NO_SPECTRE_V2 BIT(8)
-#define NO_EIBRS_PBRSB BIT(9)
+#define NO_MMIO BIT(9)
+#define NO_EIBRS_PBRSB BIT(10)
#define VULNWL(vendor, family, model, whitelist) \
X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist)
@@ -1158,6 +1159,11 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
VULNWL(VORTEX, 6, X86_MODEL_ANY, NO_SPECULATION),
/* Intel Family 6 */
+ VULNWL_INTEL(TIGERLAKE, NO_MMIO),
+ VULNWL_INTEL(TIGERLAKE_L, NO_MMIO),
+ VULNWL_INTEL(ALDERLAKE, NO_MMIO),
+ VULNWL_INTEL(ALDERLAKE_L, NO_MMIO),
+
VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION | NO_ITLB_MULTIHIT),
VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION | NO_ITLB_MULTIHIT),
VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT),
@@ -1176,9 +1182,9 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
VULNWL_INTEL(ATOM_AIRMONT_NP, NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
- VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
- VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
- VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB),
+ VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
+ VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
+ VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB),
/*
* Technically, swapgs isn't serializing on AMD (despite it previously
@@ -1193,18 +1199,18 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB),
/* AMD Family 0xf - 0x12 */
- VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
- VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
- VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
- VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+ VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
+ VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
+ VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
+ VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
/* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
- VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
- VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+ VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
+ VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
/* Zhaoxin Family 7 */
- VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS),
- VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS),
+ VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
+ VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
{}
};
@@ -1358,10 +1364,16 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
* Affected CPU list is generally enough to enumerate the vulnerability,
* but for virtualization case check for ARCH_CAP MSR bits also, VMM may
* not want the guest to enumerate the bug.
+ *
+ * Set X86_BUG_MMIO_UNKNOWN for CPUs that are neither in the blacklist,
+ * nor in the whitelist and also don't enumerate MSR ARCH_CAP MMIO bits.
*/
- if (cpu_matches(cpu_vuln_blacklist, MMIO) &&
- !arch_cap_mmio_immune(ia32_cap))
- setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
+ if (!arch_cap_mmio_immune(ia32_cap)) {
+ if (cpu_matches(cpu_vuln_blacklist, MMIO))
+ setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
+ else if (!cpu_matches(cpu_vuln_whitelist, NO_MMIO))
+ setup_force_cpu_bug(X86_BUG_MMIO_UNKNOWN);
+ }
if (!cpu_has(c, X86_FEATURE_BTC_NO)) {
if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA))
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index 63dc626627a0..4f84c3f11af5 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -701,7 +701,13 @@ static void __init early_set_pages_state(unsigned long paddr, unsigned int npage
void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
unsigned int npages)
{
- if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+ /*
+ * This can be invoked in early boot while running identity mapped, so
+ * use an open coded check for SNP instead of using cc_platform_has().
+ * This eliminates worries about jump tables or checking boot_cpu_data
+ * in the cc_platform_has() function.
+ */
+ if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
return;
/*
@@ -717,7 +723,13 @@ void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long padd
void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
unsigned int npages)
{
- if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+ /*
+ * This can be invoked in early boot while running identity mapped, so
+ * use an open coded check for SNP instead of using cc_platform_has().
+ * This eliminates worries about jump tables or checking boot_cpu_data
+ * in the cc_platform_has() function.
+ */
+ if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
return;
/* Invalidate the memory pages before they are marked shared in the RMP table. */
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 38185aedf7d1..0ea57da92940 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -93,22 +93,27 @@ static struct orc_entry *orc_find(unsigned long ip);
static struct orc_entry *orc_ftrace_find(unsigned long ip)
{
struct ftrace_ops *ops;
- unsigned long caller;
+ unsigned long tramp_addr, offset;
ops = ftrace_ops_trampoline(ip);
if (!ops)
return NULL;
+ /* Set tramp_addr to the start of the code copied by the trampoline */
if (ops->flags & FTRACE_OPS_FL_SAVE_REGS)
- caller = (unsigned long)ftrace_regs_call;
+ tramp_addr = (unsigned long)ftrace_regs_caller;
else
- caller = (unsigned long)ftrace_call;
+ tramp_addr = (unsigned long)ftrace_caller;
+
+ /* Now place tramp_addr to the location within the trampoline ip is at */
+ offset = ip - ops->trampoline;
+ tramp_addr += offset;
/* Prevent unlikely recursion */
- if (ip == caller)
+ if (ip == tramp_addr)
return NULL;
- return orc_find(caller);
+ return orc_find(tramp_addr);
}
#else
static struct orc_entry *orc_ftrace_find(unsigned long ip)
diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
index d5ef64ddd35e..66a209f7eb86 100644
--- a/arch/x86/mm/pat/memtype.c
+++ b/arch/x86/mm/pat/memtype.c
@@ -62,6 +62,7 @@
static bool __read_mostly pat_bp_initialized;
static bool __read_mostly pat_disabled = !IS_ENABLED(CONFIG_X86_PAT);
+static bool __initdata pat_force_disabled = !IS_ENABLED(CONFIG_X86_PAT);
static bool __read_mostly pat_bp_enabled;
static bool __read_mostly pat_cm_initialized;
@@ -86,6 +87,7 @@ void pat_disable(const char *msg_reason)
static int __init nopat(char *str)
{
pat_disable("PAT support disabled via boot option.");
+ pat_force_disabled = true;
return 0;
}
early_param("nopat", nopat);
@@ -272,7 +274,7 @@ static void pat_ap_init(u64 pat)
wrmsrl(MSR_IA32_CR_PAT, pat);
}
-void init_cache_modes(void)
+void __init init_cache_modes(void)
{
u64 pat = 0;
@@ -313,6 +315,12 @@ void init_cache_modes(void)
*/
pat = PAT(0, WB) | PAT(1, WT) | PAT(2, UC_MINUS) | PAT(3, UC) |
PAT(4, WB) | PAT(5, WT) | PAT(6, UC_MINUS) | PAT(7, UC);
+ } else if (!pat_force_disabled && cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) {
+ /*
+ * Clearly PAT is enabled underneath. Allow pat_enabled() to
+ * reflect this.
+ */
+ pat_bp_enabled = true;
}
__init_cache_modes(pat);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 1eb13d57a946..0a299941c622 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1925,7 +1925,8 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
/* If we didn't flush the entire list, we could have told the driver
* there was more coming, but that turned out to be a lie.
*/
- if ((!list_empty(list) || errors) && q->mq_ops->commit_rqs && queued)
+ if ((!list_empty(list) || errors || needs_resource ||
+ ret == BLK_STS_DEV_RESOURCE) && q->mq_ops->commit_rqs && queued)
q->mq_ops->commit_rqs(hctx);
/*
* Any items that need requeuing? Stuff them into hctx->dispatch,
@@ -2678,6 +2679,7 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
list_del_init(&rq->queuelist);
ret = blk_mq_request_issue_directly(rq, list_empty(list));
if (ret != BLK_STS_OK) {
+ errors++;
if (ret == BLK_STS_RESOURCE ||
ret == BLK_STS_DEV_RESOURCE) {
blk_mq_request_bypass_insert(rq, false,
@@ -2685,7 +2687,6 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
break;
}
blk_mq_end_request(rq, ret);
- errors++;
} else
queued++;
}
diff --git a/drivers/acpi/processor_thermal.c b/drivers/acpi/processor_thermal.c
index d8b2dfcd59b5..83a430ad2255 100644
--- a/drivers/acpi/processor_thermal.c
+++ b/drivers/acpi/processor_thermal.c
@@ -151,7 +151,7 @@ void acpi_thermal_cpufreq_exit(struct cpufreq_policy *policy)
unsigned int cpu;
for_each_cpu(cpu, policy->related_cpus) {
- struct acpi_processor *pr = per_cpu(processors, policy->cpu);
+ struct acpi_processor *pr = per_cpu(processors, cpu);
if (pr)
freq_qos_remove_request(&pr->thermal_req);
diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
index d044418294f9..7981a2598376 100644
--- a/drivers/android/binder_alloc.c
+++ b/drivers/android/binder_alloc.c
@@ -395,12 +395,15 @@ static struct binder_buffer *binder_alloc_new_buf_locked(
size_t size, data_offsets_size;
int ret;
+ mmap_read_lock(alloc->vma_vm_mm);
if (!binder_alloc_get_vma(alloc)) {
+ mmap_read_unlock(alloc->vma_vm_mm);
binder_alloc_debug(BINDER_DEBUG_USER_ERROR,
"%d: binder_alloc_buf, no vma\n",
alloc->pid);
return ERR_PTR(-ESRCH);
}
+ mmap_read_unlock(alloc->vma_vm_mm);
data_offsets_size = ALIGN(data_size, sizeof(void *)) +
ALIGN(offsets_size, sizeof(void *));
@@ -922,17 +925,25 @@ void binder_alloc_print_pages(struct seq_file *m,
* Make sure the binder_alloc is fully initialized, otherwise we might
* read inconsistent state.
*/
- if (binder_alloc_get_vma(alloc) != NULL) {
- for (i = 0; i < alloc->buffer_size / PAGE_SIZE; i++) {
- page = &alloc->pages[i];
- if (!page->page_ptr)
- free++;
- else if (list_empty(&page->lru))
- active++;
- else
- lru++;
- }
+
+ mmap_read_lock(alloc->vma_vm_mm);
+ if (binder_alloc_get_vma(alloc) == NULL) {
+ mmap_read_unlock(alloc->vma_vm_mm);
+ goto uninitialized;
}
+
+ mmap_read_unlock(alloc->vma_vm_mm);
+ for (i = 0; i < alloc->buffer_size / PAGE_SIZE; i++) {
+ page = &alloc->pages[i];
+ if (!page->page_ptr)
+ free++;
+ else if (list_empty(&page->lru))
+ active++;
+ else
+ lru++;
+ }
+
+uninitialized:
mutex_unlock(&alloc->mutex);
seq_printf(m, " pages: %d:%d:%d\n", active, lru, free);
seq_printf(m, " pages high watermark: %zu\n", alloc->pages_high);
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 084f9b8a0ba3..a59910ef948e 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -979,6 +979,11 @@ loop_set_status_from_info(struct loop_device *lo,
lo->lo_offset = info->lo_offset;
lo->lo_sizelimit = info->lo_sizelimit;
+
+ /* loff_t vars have been assigned __u64 */
+ if (lo->lo_offset < 0 || lo->lo_sizelimit < 0)
+ return -EOVERFLOW;
+
memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
lo->lo_file_name[LO_NAME_SIZE-1] = 0;
lo->lo_flags = info->lo_flags;
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index b8549c61ff2c..b144be41290e 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1144,14 +1144,15 @@ static ssize_t bd_stat_show(struct device *dev,
static ssize_t debug_stat_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- int version = 2;
+ int version = 1;
struct zram *zram = dev_to_zram(dev);
ssize_t ret;
down_read(&zram->init_lock);
ret = scnprintf(buf, PAGE_SIZE,
- "version: %d\n%8llu\n",
+ "version: %d\n%8llu %8llu\n",
version,
+ (u64)atomic64_read(&zram->stats.writestall),
(u64)atomic64_read(&zram->stats.miss_free));
up_read(&zram->init_lock);
@@ -1367,6 +1368,7 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
}
kunmap_atomic(mem);
+compress_again:
zstrm = zcomp_stream_get(zram->comp);
src = kmap_atomic(page);
ret = zcomp_compress(zstrm, src, &comp_len);
@@ -1375,20 +1377,39 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
if (unlikely(ret)) {
zcomp_stream_put(zram->comp);
pr_err("Compression failed! err=%d\n", ret);
+ zs_free(zram->mem_pool, handle);
return ret;
}
if (comp_len >= huge_class_size)
comp_len = PAGE_SIZE;
-
- handle = zs_malloc(zram->mem_pool, comp_len,
- __GFP_KSWAPD_RECLAIM |
- __GFP_NOWARN |
- __GFP_HIGHMEM |
- __GFP_MOVABLE);
-
- if (unlikely(!handle)) {
+ /*
+ * handle allocation has 2 paths:
+ * a) fast path is executed with preemption disabled (for
+ * per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear,
+ * since we can't sleep;
+ * b) slow path enables preemption and attempts to allocate
+ * the page with __GFP_DIRECT_RECLAIM bit set. we have to
+ * put per-cpu compression stream and, thus, to re-do
+ * the compression once handle is allocated.
+ *
+ * if we have a 'non-null' handle here then we are coming
+ * from the slow path and handle has already been allocated.
+ */
+ if (!handle)
+ handle = zs_malloc(zram->mem_pool, comp_len,
+ __GFP_KSWAPD_RECLAIM |
+ __GFP_NOWARN |
+ __GFP_HIGHMEM |
+ __GFP_MOVABLE);
+ if (!handle) {
zcomp_stream_put(zram->comp);
+ atomic64_inc(&zram->stats.writestall);
+ handle = zs_malloc(zram->mem_pool, comp_len,
+ GFP_NOIO | __GFP_HIGHMEM |
+ __GFP_MOVABLE);
+ if (handle)
+ goto compress_again;
return -ENOMEM;
}
@@ -1946,6 +1967,7 @@ static int zram_add(void)
if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE)
blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX);
+ blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, zram->disk->queue);
ret = device_add_disk(NULL, zram->disk, zram_disk_groups);
if (ret)
goto out_cleanup_disk;
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index 158c91e54850..80c3b43b4828 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -81,6 +81,7 @@ struct zram_stats {
atomic64_t huge_pages_since; /* no. of huge pages since zram set up */
atomic64_t pages_stored; /* no. of pages currently stored */
atomic_long_t max_used_pages; /* no. of maximum pages stored */
+ atomic64_t writestall; /* no. of write slow paths */
atomic64_t miss_free; /* no. of missed free */
#ifdef CONFIG_ZRAM_WRITEBACK
atomic64_t bd_count; /* no. of pages in backing device */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index d9f57a20a8bc..29ebc9e51305 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -377,12 +377,8 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
f2g = &gfx_v10_3_kfd2kgd;
break;
case IP_VERSION(10, 3, 6):
- gfx_target_version = 100306;
- if (!vf)
- f2g = &gfx_v10_3_kfd2kgd;
- break;
case IP_VERSION(10, 3, 7):
- gfx_target_version = 100307;
+ gfx_target_version = 100306;
if (!vf)
f2g = &gfx_v10_3_kfd2kgd;
break;
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 05076e530e7d..e29175e4b44c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -820,6 +820,15 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict,
if (ret == 0) {
ret = nouveau_fence_new(chan, false, &fence);
if (ret == 0) {
+ /* TODO: figure out a better solution here
+ *
+ * wait on the fence here explicitly as going through
+ * ttm_bo_move_accel_cleanup somehow doesn't seem to do it.
+ *
+ * Without this the operation can timeout and we'll fallback to a
+ * software copy, which might take several minutes to finish.
+ */
+ nouveau_fence_wait(fence, false, false);
ret = ttm_bo_move_accel_cleanup(bo,
&fence->base,
evict, false,
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 522b3d6b8c46..91e7e80fce48 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -6244,11 +6244,11 @@ static void mddev_detach(struct mddev *mddev)
static void __md_stop(struct mddev *mddev)
{
struct md_personality *pers = mddev->pers;
+ md_bitmap_destroy(mddev);
mddev_detach(mddev);
/* Ensure ->event_work is done */
if (mddev->event_work.func)
flush_workqueue(md_misc_wq);
- md_bitmap_destroy(mddev);
spin_lock(&mddev->lock);
mddev->pers = NULL;
spin_unlock(&mddev->lock);
@@ -6266,6 +6266,7 @@ void md_stop(struct mddev *mddev)
/* stop the array and free an attached data structures.
* This is called from dm-raid
*/
+ __md_stop_writes(mddev);
__md_stop(mddev);
bioset_exit(&mddev->bio_set);
bioset_exit(&mddev->sync_set);
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index d7fb33c078e8..1f0120cbe9e8 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -2007,30 +2007,24 @@ void bond_3ad_initiate_agg_selection(struct bonding *bond, int timeout)
*/
void bond_3ad_initialize(struct bonding *bond, u16 tick_resolution)
{
- /* check that the bond is not initialized yet */
- if (!MAC_ADDRESS_EQUAL(&(BOND_AD_INFO(bond).system.sys_mac_addr),
- bond->dev->dev_addr)) {
-
- BOND_AD_INFO(bond).aggregator_identifier = 0;
-
- BOND_AD_INFO(bond).system.sys_priority =
- bond->params.ad_actor_sys_prio;
- if (is_zero_ether_addr(bond->params.ad_actor_system))
- BOND_AD_INFO(bond).system.sys_mac_addr =
- *((struct mac_addr *)bond->dev->dev_addr);
- else
- BOND_AD_INFO(bond).system.sys_mac_addr =
- *((struct mac_addr *)bond->params.ad_actor_system);
+ BOND_AD_INFO(bond).aggregator_identifier = 0;
+ BOND_AD_INFO(bond).system.sys_priority =
+ bond->params.ad_actor_sys_prio;
+ if (is_zero_ether_addr(bond->params.ad_actor_system))
+ BOND_AD_INFO(bond).system.sys_mac_addr =
+ *((struct mac_addr *)bond->dev->dev_addr);
+ else
+ BOND_AD_INFO(bond).system.sys_mac_addr =
+ *((struct mac_addr *)bond->params.ad_actor_system);
- /* initialize how many times this module is called in one
- * second (should be about every 100ms)
- */
- ad_ticks_per_sec = tick_resolution;
+ /* initialize how many times this module is called in one
+ * second (should be about every 100ms)
+ */
+ ad_ticks_per_sec = tick_resolution;
- bond_3ad_initiate_agg_selection(bond,
- AD_AGGREGATOR_SELECTION_TIMER *
- ad_ticks_per_sec);
- }
+ bond_3ad_initiate_agg_selection(bond,
+ AD_AGGREGATOR_SELECTION_TIMER *
+ ad_ticks_per_sec);
}
/**
diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c
index 12a599d5e61a..c771797fd902 100644
--- a/drivers/net/dsa/microchip/ksz8795.c
+++ b/drivers/net/dsa/microchip/ksz8795.c
@@ -898,17 +898,6 @@ static void ksz8_w_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 val)
}
}
-static enum dsa_tag_protocol ksz8_get_tag_protocol(struct dsa_switch *ds,
- int port,
- enum dsa_tag_protocol mp)
-{
- struct ksz_device *dev = ds->priv;
-
- /* ksz88x3 uses the same tag schema as KSZ9893 */
- return ksz_is_ksz88x3(dev) ?
- DSA_TAG_PROTO_KSZ9893 : DSA_TAG_PROTO_KSZ8795;
-}
-
static u32 ksz8_sw_get_phy_flags(struct dsa_switch *ds, int port)
{
/* Silicon Errata Sheet (DS80000830A):
@@ -969,11 +958,9 @@ static void ksz8_flush_dyn_mac_table(struct ksz_device *dev, int port)
}
}
-static int ksz8_port_vlan_filtering(struct dsa_switch *ds, int port, bool flag,
+static int ksz8_port_vlan_filtering(struct ksz_device *dev, int port, bool flag,
struct netlink_ext_ack *extack)
{
- struct ksz_device *dev = ds->priv;
-
if (ksz_is_ksz88x3(dev))
return -ENOTSUPP;
@@ -998,12 +985,11 @@ static void ksz8_port_enable_pvid(struct ksz_device *dev, int port, bool state)
}
}
-static int ksz8_port_vlan_add(struct dsa_switch *ds, int port,
+static int ksz8_port_vlan_add(struct ksz_device *dev, int port,
const struct switchdev_obj_port_vlan *vlan,
struct netlink_ext_ack *extack)
{
bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
- struct ksz_device *dev = ds->priv;
struct ksz_port *p = &dev->ports[port];
u16 data, new_pvid = 0;
u8 fid, member, valid;
@@ -1071,10 +1057,9 @@ static int ksz8_port_vlan_add(struct dsa_switch *ds, int port,
return 0;
}
-static int ksz8_port_vlan_del(struct dsa_switch *ds, int port,
+static int ksz8_port_vlan_del(struct ksz_device *dev, int port,
const struct switchdev_obj_port_vlan *vlan)
{
- struct ksz_device *dev = ds->priv;
u16 data, pvid;
u8 fid, member, valid;
@@ -1104,12 +1089,10 @@ static int ksz8_port_vlan_del(struct dsa_switch *ds, int port,
return 0;
}
-static int ksz8_port_mirror_add(struct dsa_switch *ds, int port,
+static int ksz8_port_mirror_add(struct ksz_device *dev, int port,
struct dsa_mall_mirror_tc_entry *mirror,
bool ingress, struct netlink_ext_ack *extack)
{
- struct ksz_device *dev = ds->priv;
-
if (ingress) {
ksz_port_cfg(dev, port, P_MIRROR_CTRL, PORT_MIRROR_RX, true);
dev->mirror_rx |= BIT(port);
@@ -1128,10 +1111,9 @@ static int ksz8_port_mirror_add(struct dsa_switch *ds, int port,
return 0;
}
-static void ksz8_port_mirror_del(struct dsa_switch *ds, int port,
+static void ksz8_port_mirror_del(struct ksz_device *dev, int port,
struct dsa_mall_mirror_tc_entry *mirror)
{
- struct ksz_device *dev = ds->priv;
u8 data;
if (mirror->ingress) {
@@ -1272,7 +1254,7 @@ static void ksz8_config_cpu_port(struct dsa_switch *ds)
continue;
if (!ksz_is_ksz88x3(dev)) {
ksz_pread8(dev, i, regs[P_REMOTE_STATUS], &remote);
- if (remote & PORT_FIBER_MODE)
+ if (remote & KSZ8_PORT_FIBER_MODE)
p->fiber = 1;
}
if (p->fiber)
@@ -1371,13 +1353,9 @@ static int ksz8_setup(struct dsa_switch *ds)
return ksz8_handle_global_errata(ds);
}
-static void ksz8_get_caps(struct dsa_switch *ds, int port,
+static void ksz8_get_caps(struct ksz_device *dev, int port,
struct phylink_config *config)
{
- struct ksz_device *dev = ds->priv;
-
- ksz_phylink_get_caps(ds, port, config);
-
config->mac_capabilities = MAC_10 | MAC_100;
/* Silicon Errata Sheet (DS80000830A):
@@ -1394,12 +1372,12 @@ static void ksz8_get_caps(struct dsa_switch *ds, int port,
}
static const struct dsa_switch_ops ksz8_switch_ops = {
- .get_tag_protocol = ksz8_get_tag_protocol,
+ .get_tag_protocol = ksz_get_tag_protocol,
.get_phy_flags = ksz8_sw_get_phy_flags,
.setup = ksz8_setup,
.phy_read = ksz_phy_read16,
.phy_write = ksz_phy_write16,
- .phylink_get_caps = ksz8_get_caps,
+ .phylink_get_caps = ksz_phylink_get_caps,
.phylink_mac_link_down = ksz_mac_link_down,
.port_enable = ksz_enable_port,
.get_strings = ksz_get_strings,
@@ -1409,14 +1387,14 @@ static const struct dsa_switch_ops ksz8_switch_ops = {
.port_bridge_leave = ksz_port_bridge_leave,
.port_stp_state_set = ksz8_port_stp_state_set,
.port_fast_age = ksz_port_fast_age,
- .port_vlan_filtering = ksz8_port_vlan_filtering,
- .port_vlan_add = ksz8_port_vlan_add,
- .port_vlan_del = ksz8_port_vlan_del,
+ .port_vlan_filtering = ksz_port_vlan_filtering,
+ .port_vlan_add = ksz_port_vlan_add,
+ .port_vlan_del = ksz_port_vlan_del,
.port_fdb_dump = ksz_port_fdb_dump,
.port_mdb_add = ksz_port_mdb_add,
.port_mdb_del = ksz_port_mdb_del,
- .port_mirror_add = ksz8_port_mirror_add,
- .port_mirror_del = ksz8_port_mirror_del,
+ .port_mirror_add = ksz_port_mirror_add,
+ .port_mirror_del = ksz_port_mirror_del,
};
static u32 ksz8_get_port_addr(int port, int offset)
@@ -1424,51 +1402,6 @@ static u32 ksz8_get_port_addr(int port, int offset)
return PORT_CTRL_ADDR(port, offset);
}
-static int ksz8_switch_detect(struct ksz_device *dev)
-{
- u8 id1, id2;
- u16 id16;
- int ret;
-
- /* read chip id */
- ret = ksz_read16(dev, REG_CHIP_ID0, &id16);
- if (ret)
- return ret;
-
- id1 = id16 >> 8;
- id2 = id16 & SW_CHIP_ID_M;
-
- switch (id1) {
- case KSZ87_FAMILY_ID:
- if ((id2 != CHIP_ID_94 && id2 != CHIP_ID_95))
- return -ENODEV;
-
- if (id2 == CHIP_ID_95) {
- u8 val;
-
- id2 = 0x95;
- ksz_read8(dev, REG_PORT_STATUS_0, &val);
- if (val & PORT_FIBER_MODE)
- id2 = 0x65;
- } else if (id2 == CHIP_ID_94) {
- id2 = 0x94;
- }
- break;
- case KSZ88_FAMILY_ID:
- if (id2 != CHIP_ID_63)
- return -ENODEV;
- break;
- default:
- dev_err(dev->dev, "invalid family id: %d\n", id1);
- return -ENODEV;
- }
- id16 &= ~0xff;
- id16 |= id2;
- dev->chip_id = id16;
-
- return 0;
-}
-
static int ksz8_switch_init(struct ksz_device *dev)
{
struct ksz8 *ksz8 = dev->priv;
@@ -1521,8 +1454,13 @@ static const struct ksz_dev_ops ksz8_dev_ops = {
.r_mib_pkt = ksz8_r_mib_pkt,
.freeze_mib = ksz8_freeze_mib,
.port_init_cnt = ksz8_port_init_cnt,
+ .vlan_filtering = ksz8_port_vlan_filtering,
+ .vlan_add = ksz8_port_vlan_add,
+ .vlan_del = ksz8_port_vlan_del,
+ .mirror_add = ksz8_port_mirror_add,
+ .mirror_del = ksz8_port_mirror_del,
+ .get_caps = ksz8_get_caps,
.shutdown = ksz8_reset_switch,
- .detect = ksz8_switch_detect,
.init = ksz8_switch_init,
.exit = ksz8_switch_exit,
};
diff --git a/drivers/net/dsa/microchip/ksz8795_reg.h b/drivers/net/dsa/microchip/ksz8795_reg.h
index 4109433b6b6c..b8f6ad7581bc 100644
--- a/drivers/net/dsa/microchip/ksz8795_reg.h
+++ b/drivers/net/dsa/microchip/ksz8795_reg.h
@@ -14,23 +14,10 @@
#define KS_PRIO_M 0x3
#define KS_PRIO_S 2
-#define REG_CHIP_ID0 0x00
-
-#define KSZ87_FAMILY_ID 0x87
-#define KSZ88_FAMILY_ID 0x88
-
-#define REG_CHIP_ID1 0x01
-
-#define SW_CHIP_ID_M 0xF0
-#define SW_CHIP_ID_S 4
#define SW_REVISION_M 0x0E
#define SW_REVISION_S 1
#define SW_START 0x01
-#define CHIP_ID_94 0x60
-#define CHIP_ID_95 0x90
-#define CHIP_ID_63 0x30
-
#define KSZ8863_REG_SW_RESET 0x43
#define KSZ8863_GLOBAL_SOFTWARE_RESET BIT(4)
@@ -217,8 +204,6 @@
#define REG_PORT_4_STATUS_0 0x48
/* For KSZ8765. */
-#define PORT_FIBER_MODE BIT(7)
-
#define PORT_REMOTE_ASYM_PAUSE BIT(5)
#define PORT_REMOTE_SYM_PAUSE BIT(4)
#define PORT_REMOTE_100BTX_FD BIT(3)
@@ -322,7 +307,6 @@
#define REG_PORT_CTRL_5 0x05
-#define REG_PORT_STATUS_0 0x08
#define REG_PORT_STATUS_1 0x09
#define REG_PORT_LINK_MD_CTRL 0x0A
#define REG_PORT_LINK_MD_RESULT 0x0B
diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c
index ebad795e4e95..125124fdefbf 100644
--- a/drivers/net/dsa/microchip/ksz9477.c
+++ b/drivers/net/dsa/microchip/ksz9477.c
@@ -276,18 +276,6 @@ static void ksz9477_port_init_cnt(struct ksz_device *dev, int port)
mutex_unlock(&mib->cnt_mutex);
}
-static enum dsa_tag_protocol ksz9477_get_tag_protocol(struct dsa_switch *ds,
- int port,
- enum dsa_tag_protocol mp)
-{
- enum dsa_tag_protocol proto = DSA_TAG_PROTO_KSZ9477;
- struct ksz_device *dev = ds->priv;
-
- if (dev->features & IS_9893)
- proto = DSA_TAG_PROTO_KSZ9893;
- return proto;
-}
-
static int ksz9477_phy_read16(struct dsa_switch *ds, int addr, int reg)
{
struct ksz_device *dev = ds->priv;
@@ -389,12 +377,10 @@ static void ksz9477_flush_dyn_mac_table(struct ksz_device *dev, int port)
}
}
-static int ksz9477_port_vlan_filtering(struct dsa_switch *ds, int port,
+static int ksz9477_port_vlan_filtering(struct ksz_device *dev, int port,
bool flag,
struct netlink_ext_ack *extack)
{
- struct ksz_device *dev = ds->priv;
-
if (flag) {
ksz_port_cfg(dev, port, REG_PORT_LUE_CTRL,
PORT_VLAN_LOOKUP_VID_0, true);
@@ -408,11 +394,10 @@ static int ksz9477_port_vlan_filtering(struct dsa_switch *ds, int port,
return 0;
}
-static int ksz9477_port_vlan_add(struct dsa_switch *ds, int port,
+static int ksz9477_port_vlan_add(struct ksz_device *dev, int port,
const struct switchdev_obj_port_vlan *vlan,
struct netlink_ext_ack *extack)
{
- struct ksz_device *dev = ds->priv;
u32 vlan_table[3];
bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
int err;
@@ -445,10 +430,9 @@ static int ksz9477_port_vlan_add(struct dsa_switch *ds, int port,
return 0;
}
-static int ksz9477_port_vlan_del(struct dsa_switch *ds, int port,
+static int ksz9477_port_vlan_del(struct ksz_device *dev, int port,
const struct switchdev_obj_port_vlan *vlan)
{
- struct ksz_device *dev = ds->priv;
bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
u32 vlan_table[3];
u16 pvid;
@@ -835,11 +819,10 @@ static int ksz9477_port_mdb_del(struct dsa_switch *ds, int port,
return ret;
}
-static int ksz9477_port_mirror_add(struct dsa_switch *ds, int port,
+static int ksz9477_port_mirror_add(struct ksz_device *dev, int port,
struct dsa_mall_mirror_tc_entry *mirror,
bool ingress, struct netlink_ext_ack *extack)
{
- struct ksz_device *dev = ds->priv;
u8 data;
int p;
@@ -875,10 +858,9 @@ static int ksz9477_port_mirror_add(struct dsa_switch *ds, int port,
return 0;
}
-static void ksz9477_port_mirror_del(struct dsa_switch *ds, int port,
+static void ksz9477_port_mirror_del(struct ksz_device *dev, int port,
struct dsa_mall_mirror_tc_entry *mirror)
{
- struct ksz_device *dev = ds->priv;
bool in_use = false;
u8 data;
int p;
@@ -1100,11 +1082,9 @@ static void ksz9477_phy_errata_setup(struct ksz_device *dev, int port)
ksz9477_port_mmd_write(dev, port, 0x1c, 0x20, 0xeeee);
}
-static void ksz9477_get_caps(struct dsa_switch *ds, int port,
+static void ksz9477_get_caps(struct ksz_device *dev, int port,
struct phylink_config *config)
{
- ksz_phylink_get_caps(ds, port, config);
-
config->mac_capabilities = MAC_10 | MAC_100 | MAC_1000FD |
MAC_ASYM_PAUSE | MAC_SYM_PAUSE;
}
@@ -1329,12 +1309,12 @@ static int ksz9477_setup(struct dsa_switch *ds)
}
static const struct dsa_switch_ops ksz9477_switch_ops = {
- .get_tag_protocol = ksz9477_get_tag_protocol,
+ .get_tag_protocol = ksz_get_tag_protocol,
.setup = ksz9477_setup,
.phy_read = ksz9477_phy_read16,
.phy_write = ksz9477_phy_write16,
.phylink_mac_link_down = ksz_mac_link_down,
- .phylink_get_caps = ksz9477_get_caps,
+ .phylink_get_caps = ksz_phylink_get_caps,
.port_enable = ksz_enable_port,
.get_strings = ksz_get_strings,
.get_ethtool_stats = ksz_get_ethtool_stats,
@@ -1343,16 +1323,16 @@ static const struct dsa_switch_ops ksz9477_switch_ops = {
.port_bridge_leave = ksz_port_bridge_leave,
.port_stp_state_set = ksz9477_port_stp_state_set,
.port_fast_age = ksz_port_fast_age,
- .port_vlan_filtering = ksz9477_port_vlan_filtering,
- .port_vlan_add = ksz9477_port_vlan_add,
- .port_vlan_del = ksz9477_port_vlan_del,
+ .port_vlan_filtering = ksz_port_vlan_filtering,
+ .port_vlan_add = ksz_port_vlan_add,
+ .port_vlan_del = ksz_port_vlan_del,
.port_fdb_dump = ksz9477_port_fdb_dump,
.port_fdb_add = ksz9477_port_fdb_add,
.port_fdb_del = ksz9477_port_fdb_del,
.port_mdb_add = ksz9477_port_mdb_add,
.port_mdb_del = ksz9477_port_mdb_del,
- .port_mirror_add = ksz9477_port_mirror_add,
- .port_mirror_del = ksz9477_port_mirror_del,
+ .port_mirror_add = ksz_port_mirror_add,
+ .port_mirror_del = ksz_port_mirror_del,
.get_stats64 = ksz_get_stats64,
.port_change_mtu = ksz9477_change_mtu,
.port_max_mtu = ksz9477_max_mtu,
@@ -1363,14 +1343,15 @@ static u32 ksz9477_get_port_addr(int port, int offset)
return PORT_CTRL_ADDR(port, offset);
}
-static int ksz9477_switch_detect(struct ksz_device *dev)
+static int ksz9477_switch_init(struct ksz_device *dev)
{
u8 data8;
- u8 id_hi;
- u8 id_lo;
- u32 id32;
int ret;
+ dev->ds->ops = &ksz9477_switch_ops;
+
+ dev->port_mask = (1 << dev->info->port_cnt) - 1;
+
/* turn off SPI DO Edge select */
ret = ksz_read8(dev, REG_SW_GLOBAL_SERIAL_CTRL_0, &data8);
if (ret)
@@ -1381,10 +1362,6 @@ static int ksz9477_switch_detect(struct ksz_device *dev)
if (ret)
return ret;
- /* read chip id */
- ret = ksz_read32(dev, REG_CHIP_ID0__1, &id32);
- if (ret)
- return ret;
ret = ksz_read8(dev, REG_GLOBAL_OPTIONS, &data8);
if (ret)
return ret;
@@ -1395,12 +1372,7 @@ static int ksz9477_switch_detect(struct ksz_device *dev)
/* Default capability is gigabit capable. */
dev->features = GBIT_SUPPORT;
- dev_dbg(dev->dev, "Switch detect: ID=%08x%02x\n", id32, data8);
- id_hi = (u8)(id32 >> 16);
- id_lo = (u8)(id32 >> 8);
- if ((id_lo & 0xf) == 3) {
- /* Chip is from KSZ9893 design. */
- dev_info(dev->dev, "Found KSZ9893\n");
+ if (dev->chip_id == KSZ9893_CHIP_ID) {
dev->features |= IS_9893;
/* Chip does not support gigabit. */
@@ -1408,7 +1380,6 @@ static int ksz9477_switch_detect(struct ksz_device *dev)
dev->features &= ~GBIT_SUPPORT;
dev->phy_port_cnt = 2;
} else {
- dev_info(dev->dev, "Found KSZ9477 or compatible\n");
/* Chip uses new XMII register definitions. */
dev->features |= NEW_XMII;
@@ -1416,21 +1387,6 @@ static int ksz9477_switch_detect(struct ksz_device *dev)
if (!(data8 & SW_GIGABIT_ABLE))
dev->features &= ~GBIT_SUPPORT;
}
-
- /* Change chip id to known ones so it can be matched against them. */
- id32 = (id_hi << 16) | (id_lo << 8);
-
- dev->chip_id = id32;
-
- return 0;
-}
-
-static int ksz9477_switch_init(struct ksz_device *dev)
-{
- dev->ds->ops = &ksz9477_switch_ops;
-
- dev->port_mask = (1 << dev->info->port_cnt) - 1;
-
return 0;
}
@@ -1449,8 +1405,13 @@ static const struct ksz_dev_ops ksz9477_dev_ops = {
.r_mib_stat64 = ksz_r_mib_stats64,
.freeze_mib = ksz9477_freeze_mib,
.port_init_cnt = ksz9477_port_init_cnt,
+ .vlan_filtering = ksz9477_port_vlan_filtering,
+ .vlan_add = ksz9477_port_vlan_add,
+ .vlan_del = ksz9477_port_vlan_del,
+ .mirror_add = ksz9477_port_mirror_add,
+ .mirror_del = ksz9477_port_mirror_del,
+ .get_caps = ksz9477_get_caps,
.shutdown = ksz9477_reset_switch,
- .detect = ksz9477_switch_detect,
.init = ksz9477_switch_init,
.exit = ksz9477_switch_exit,
};
diff --git a/drivers/net/dsa/microchip/ksz9477_reg.h b/drivers/net/dsa/microchip/ksz9477_reg.h
index 7a2c8d4767af..077e35ab11b5 100644
--- a/drivers/net/dsa/microchip/ksz9477_reg.h
+++ b/drivers/net/dsa/microchip/ksz9477_reg.h
@@ -25,7 +25,6 @@
#define REG_CHIP_ID2__1 0x0002
-#define CHIP_ID_63 0x63
#define CHIP_ID_66 0x66
#define CHIP_ID_67 0x67
#define CHIP_ID_77 0x77
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index 92a500e1ccd2..c9389880ad1f 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -453,9 +453,18 @@ void ksz_phylink_get_caps(struct dsa_switch *ds, int port,
if (dev->info->supports_rgmii[port])
phy_interface_set_rgmii(config->supported_interfaces);
- if (dev->info->internal_phy[port])
+ if (dev->info->internal_phy[port]) {
__set_bit(PHY_INTERFACE_MODE_INTERNAL,
config->supported_interfaces);
+ /* Compatibility for phylib's default interface type when the
+ * phy-mode property is absent
+ */
+ __set_bit(PHY_INTERFACE_MODE_GMII,
+ config->supported_interfaces);
+ }
+
+ if (dev->dev_ops->get_caps)
+ dev->dev_ops->get_caps(dev, port, config);
}
EXPORT_SYMBOL_GPL(ksz_phylink_get_caps);
@@ -930,6 +939,156 @@ void ksz_port_stp_state_set(struct dsa_switch *ds, int port,
}
EXPORT_SYMBOL_GPL(ksz_port_stp_state_set);
+enum dsa_tag_protocol ksz_get_tag_protocol(struct dsa_switch *ds,
+ int port, enum dsa_tag_protocol mp)
+{
+ struct ksz_device *dev = ds->priv;
+ enum dsa_tag_protocol proto = DSA_TAG_PROTO_NONE;
+
+ if (dev->chip_id == KSZ8795_CHIP_ID ||
+ dev->chip_id == KSZ8794_CHIP_ID ||
+ dev->chip_id == KSZ8765_CHIP_ID)
+ proto = DSA_TAG_PROTO_KSZ8795;
+
+ if (dev->chip_id == KSZ8830_CHIP_ID ||
+ dev->chip_id == KSZ9893_CHIP_ID)
+ proto = DSA_TAG_PROTO_KSZ9893;
+
+ if (dev->chip_id == KSZ9477_CHIP_ID ||
+ dev->chip_id == KSZ9897_CHIP_ID ||
+ dev->chip_id == KSZ9567_CHIP_ID)
+ proto = DSA_TAG_PROTO_KSZ9477;
+
+ return proto;
+}
+EXPORT_SYMBOL_GPL(ksz_get_tag_protocol);
+
+int ksz_port_vlan_filtering(struct dsa_switch *ds, int port,
+ bool flag, struct netlink_ext_ack *extack)
+{
+ struct ksz_device *dev = ds->priv;
+
+ if (!dev->dev_ops->vlan_filtering)
+ return -EOPNOTSUPP;
+
+ return dev->dev_ops->vlan_filtering(dev, port, flag, extack);
+}
+EXPORT_SYMBOL_GPL(ksz_port_vlan_filtering);
+
+int ksz_port_vlan_add(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_vlan *vlan,
+ struct netlink_ext_ack *extack)
+{
+ struct ksz_device *dev = ds->priv;
+
+ if (!dev->dev_ops->vlan_add)
+ return -EOPNOTSUPP;
+
+ return dev->dev_ops->vlan_add(dev, port, vlan, extack);
+}
+EXPORT_SYMBOL_GPL(ksz_port_vlan_add);
+
+int ksz_port_vlan_del(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_vlan *vlan)
+{
+ struct ksz_device *dev = ds->priv;
+
+ if (!dev->dev_ops->vlan_del)
+ return -EOPNOTSUPP;
+
+ return dev->dev_ops->vlan_del(dev, port, vlan);
+}
+EXPORT_SYMBOL_GPL(ksz_port_vlan_del);
+
+int ksz_port_mirror_add(struct dsa_switch *ds, int port,
+ struct dsa_mall_mirror_tc_entry *mirror,
+ bool ingress, struct netlink_ext_ack *extack)
+{
+ struct ksz_device *dev = ds->priv;
+
+ if (!dev->dev_ops->mirror_add)
+ return -EOPNOTSUPP;
+
+ return dev->dev_ops->mirror_add(dev, port, mirror, ingress, extack);
+}
+EXPORT_SYMBOL_GPL(ksz_port_mirror_add);
+
+void ksz_port_mirror_del(struct dsa_switch *ds, int port,
+ struct dsa_mall_mirror_tc_entry *mirror)
+{
+ struct ksz_device *dev = ds->priv;
+
+ if (dev->dev_ops->mirror_del)
+ dev->dev_ops->mirror_del(dev, port, mirror);
+}
+EXPORT_SYMBOL_GPL(ksz_port_mirror_del);
+
+static int ksz_switch_detect(struct ksz_device *dev)
+{
+ u8 id1, id2;
+ u16 id16;
+ u32 id32;
+ int ret;
+
+ /* read chip id */
+ ret = ksz_read16(dev, REG_CHIP_ID0, &id16);
+ if (ret)
+ return ret;
+
+ id1 = FIELD_GET(SW_FAMILY_ID_M, id16);
+ id2 = FIELD_GET(SW_CHIP_ID_M, id16);
+
+ switch (id1) {
+ case KSZ87_FAMILY_ID:
+ if (id2 == KSZ87_CHIP_ID_95) {
+ u8 val;
+
+ dev->chip_id = KSZ8795_CHIP_ID;
+
+ ksz_read8(dev, KSZ8_PORT_STATUS_0, &val);
+ if (val & KSZ8_PORT_FIBER_MODE)
+ dev->chip_id = KSZ8765_CHIP_ID;
+ } else if (id2 == KSZ87_CHIP_ID_94) {
+ dev->chip_id = KSZ8794_CHIP_ID;
+ } else {
+ return -ENODEV;
+ }
+ break;
+ case KSZ88_FAMILY_ID:
+ if (id2 == KSZ88_CHIP_ID_63)
+ dev->chip_id = KSZ8830_CHIP_ID;
+ else
+ return -ENODEV;
+ break;
+ default:
+ ret = ksz_read32(dev, REG_CHIP_ID0, &id32);
+ if (ret)
+ return ret;
+
+ dev->chip_rev = FIELD_GET(SW_REV_ID_M, id32);
+ id32 &= ~0xFF;
+
+ switch (id32) {
+ case KSZ9477_CHIP_ID:
+ case KSZ9897_CHIP_ID:
+ case KSZ9893_CHIP_ID:
+ case KSZ9567_CHIP_ID:
+ case LAN9370_CHIP_ID:
+ case LAN9371_CHIP_ID:
+ case LAN9372_CHIP_ID:
+ case LAN9373_CHIP_ID:
+ case LAN9374_CHIP_ID:
+ dev->chip_id = id32;
+ break;
+ default:
+ dev_err(dev->dev,
+ "unsupported switch detected %x)\n", id32);
+ return -ENODEV;
+ }
+ }
+ return 0;
+}
+
struct ksz_device *ksz_switch_alloc(struct device *base, void *priv)
{
struct dsa_switch *ds;
@@ -986,10 +1145,9 @@ int ksz_switch_register(struct ksz_device *dev,
mutex_init(&dev->alu_mutex);
mutex_init(&dev->vlan_mutex);
- dev->dev_ops = ops;
-
- if (dev->dev_ops->detect(dev))
- return -EINVAL;
+ ret = ksz_switch_detect(dev);
+ if (ret)
+ return ret;
info = ksz_lookup_info(dev->chip_id);
if (!info)
@@ -998,10 +1156,15 @@ int ksz_switch_register(struct ksz_device *dev,
/* Update the compatible info with the probed one */
dev->info = info;
+ dev_info(dev->dev, "found switch: %s, rev %i\n",
+ dev->info->dev_name, dev->chip_rev);
+
ret = ksz_check_device_id(dev);
if (ret)
return ret;
+ dev->dev_ops = ops;
+
ret = dev->dev_ops->init(dev);
if (ret)
return ret;
diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h
index 8500eaedad67..10f9ef2dbf1c 100644
--- a/drivers/net/dsa/microchip/ksz_common.h
+++ b/drivers/net/dsa/microchip/ksz_common.h
@@ -90,6 +90,7 @@ struct ksz_device {
/* chip specific data */
u32 chip_id;
+ u8 chip_rev;
int cpu_port; /* port connected to CPU */
int phy_port_cnt;
phy_interface_t compat_interface;
@@ -179,10 +180,23 @@ struct ksz_dev_ops {
void (*r_mib_pkt)(struct ksz_device *dev, int port, u16 addr,
u64 *dropped, u64 *cnt);
void (*r_mib_stat64)(struct ksz_device *dev, int port);
+ int (*vlan_filtering)(struct ksz_device *dev, int port,
+ bool flag, struct netlink_ext_ack *extack);
+ int (*vlan_add)(struct ksz_device *dev, int port,
+ const struct switchdev_obj_port_vlan *vlan,
+ struct netlink_ext_ack *extack);
+ int (*vlan_del)(struct ksz_device *dev, int port,
+ const struct switchdev_obj_port_vlan *vlan);
+ int (*mirror_add)(struct ksz_device *dev, int port,
+ struct dsa_mall_mirror_tc_entry *mirror,
+ bool ingress, struct netlink_ext_ack *extack);
+ void (*mirror_del)(struct ksz_device *dev, int port,
+ struct dsa_mall_mirror_tc_entry *mirror);
+ void (*get_caps)(struct ksz_device *dev, int port,
+ struct phylink_config *config);
void (*freeze_mib)(struct ksz_device *dev, int port, bool freeze);
void (*port_init_cnt)(struct ksz_device *dev, int port);
int (*shutdown)(struct ksz_device *dev);
- int (*detect)(struct ksz_device *dev);
int (*init)(struct ksz_device *dev);
void (*exit)(struct ksz_device *dev);
};
@@ -231,6 +245,20 @@ int ksz_port_mdb_del(struct dsa_switch *ds, int port,
int ksz_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy);
void ksz_get_strings(struct dsa_switch *ds, int port,
u32 stringset, uint8_t *buf);
+enum dsa_tag_protocol ksz_get_tag_protocol(struct dsa_switch *ds,
+ int port, enum dsa_tag_protocol mp);
+int ksz_port_vlan_filtering(struct dsa_switch *ds, int port,
+ bool flag, struct netlink_ext_ack *extack);
+int ksz_port_vlan_add(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_vlan *vlan,
+ struct netlink_ext_ack *extack);
+int ksz_port_vlan_del(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_vlan *vlan);
+int ksz_port_mirror_add(struct dsa_switch *ds, int port,
+ struct dsa_mall_mirror_tc_entry *mirror,
+ bool ingress, struct netlink_ext_ack *extack);
+void ksz_port_mirror_del(struct dsa_switch *ds, int port,
+ struct dsa_mall_mirror_tc_entry *mirror);
/* Common register access functions */
@@ -353,6 +381,23 @@ static inline void ksz_regmap_unlock(void *__mtx)
#define PORT_RX_ENABLE BIT(1)
#define PORT_LEARN_DISABLE BIT(0)
+/* Switch ID Defines */
+#define REG_CHIP_ID0 0x00
+
+#define SW_FAMILY_ID_M GENMASK(15, 8)
+#define KSZ87_FAMILY_ID 0x87
+#define KSZ88_FAMILY_ID 0x88
+
+#define KSZ8_PORT_STATUS_0 0x08
+#define KSZ8_PORT_FIBER_MODE BIT(7)
+
+#define SW_CHIP_ID_M GENMASK(7, 4)
+#define KSZ87_CHIP_ID_94 0x6
+#define KSZ87_CHIP_ID_95 0x9
+#define KSZ88_CHIP_ID_63 0x3
+
+#define SW_REV_ID_M GENMASK(7, 4)
+
/* Regmap tables generation */
#define KSZ_SPI_OP_RD 3
#define KSZ_SPI_OP_WR 2
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index cf9b00576ed3..964354536f9c 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -11183,10 +11183,7 @@ static netdev_features_t bnxt_fix_features(struct net_device *dev,
if ((features & NETIF_F_NTUPLE) && !bnxt_rfs_capable(bp))
features &= ~NETIF_F_NTUPLE;
- if (bp->flags & BNXT_FLAG_NO_AGG_RINGS)
- features &= ~(NETIF_F_LRO | NETIF_F_GRO_HW);
-
- if (!(bp->flags & BNXT_FLAG_TPA))
+ if ((bp->flags & BNXT_FLAG_NO_AGG_RINGS) || bp->xdp_prog)
features &= ~(NETIF_F_LRO | NETIF_F_GRO_HW);
if (!(features & NETIF_F_GRO))
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 075c6206325c..b1b17f911300 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -2130,6 +2130,7 @@ struct bnxt {
#define BNXT_DUMP_CRASH 1
struct bpf_prog *xdp_prog;
+ u8 xdp_has_frags;
struct bnxt_ptp_cfg *ptp_cfg;
u8 ptp_all_rx_tstamp;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
index 6b3d4f4c2a75..d83be40785b8 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
@@ -1246,6 +1246,7 @@ int bnxt_dl_register(struct bnxt *bp)
if (rc)
goto err_dl_port_unreg;
+ devlink_set_features(dl, DEVLINK_F_RELOAD);
out:
devlink_register(dl);
return 0;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
index a1a2c7a64fd5..c9cf0569451a 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
@@ -623,7 +623,7 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs, bool reset)
hw_resc->max_stat_ctxs -= le16_to_cpu(req->min_stat_ctx) * n;
hw_resc->max_vnics -= le16_to_cpu(req->min_vnics) * n;
if (bp->flags & BNXT_FLAG_CHIP_P5)
- hw_resc->max_irqs -= vf_msix * n;
+ hw_resc->max_nqs -= vf_msix;
rc = pf->active_vfs;
}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index f53387ed0167..c3065ec0a479 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
@@ -181,6 +181,7 @@ void bnxt_xdp_buff_init(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
struct xdp_buff *xdp)
{
struct bnxt_sw_rx_bd *rx_buf;
+ u32 buflen = PAGE_SIZE;
struct pci_dev *pdev;
dma_addr_t mapping;
u32 offset;
@@ -192,7 +193,10 @@ void bnxt_xdp_buff_init(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
mapping = rx_buf->mapping - bp->rx_dma_offset;
dma_sync_single_for_cpu(&pdev->dev, mapping + offset, *len, bp->rx_dir);
- xdp_init_buff(xdp, BNXT_PAGE_MODE_BUF_SIZE + offset, &rxr->xdp_rxq);
+ if (bp->xdp_has_frags)
+ buflen = BNXT_PAGE_MODE_BUF_SIZE + offset;
+
+ xdp_init_buff(xdp, buflen, &rxr->xdp_rxq);
xdp_prepare_buff(xdp, *data_ptr - offset, offset, *len, false);
}
@@ -397,8 +401,10 @@ static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog)
netdev_warn(dev, "ethtool rx/tx channels must be combined to support XDP.\n");
return -EOPNOTSUPP;
}
- if (prog)
+ if (prog) {
tx_xdp = bp->rx_nr_rings;
+ bp->xdp_has_frags = prog->aux->xdp_has_frags;
+ }
tc = netdev_get_num_tc(dev);
if (!tc)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 19704f5c8291..22a61802a402 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -4395,7 +4395,7 @@ static int i40e_check_fdir_input_set(struct i40e_vsi *vsi,
(struct in6_addr *)&ipv6_full_mask))
new_mask |= I40E_L3_V6_DST_MASK;
else if (ipv6_addr_any((struct in6_addr *)
- &usr_ip6_spec->ip6src))
+ &usr_ip6_spec->ip6dst))
new_mask &= ~I40E_L3_V6_DST_MASK;
else
return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 60453b3b8d23..6911cbb7afa5 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -684,8 +684,8 @@ static inline void ice_set_ring_xdp(struct ice_tx_ring *ring)
* ice_xsk_pool - get XSK buffer pool bound to a ring
* @ring: Rx ring to use
*
- * Returns a pointer to xdp_umem structure if there is a buffer pool present,
- * NULL otherwise.
+ * Returns a pointer to xsk_buff_pool structure if there is a buffer pool
+ * present, NULL otherwise.
*/
static inline struct xsk_buff_pool *ice_xsk_pool(struct ice_rx_ring *ring)
{
@@ -699,23 +699,33 @@ static inline struct xsk_buff_pool *ice_xsk_pool(struct ice_rx_ring *ring)
}
/**
- * ice_tx_xsk_pool - get XSK buffer pool bound to a ring
- * @ring: Tx ring to use
+ * ice_tx_xsk_pool - assign XSK buff pool to XDP ring
+ * @vsi: pointer to VSI
+ * @qid: index of a queue to look at XSK buff pool presence
*
- * Returns a pointer to xdp_umem structure if there is a buffer pool present,
- * NULL otherwise. Tx equivalent of ice_xsk_pool.
+ * Sets XSK buff pool pointer on XDP ring.
+ *
+ * XDP ring is picked from Rx ring, whereas Rx ring is picked based on provided
+ * queue id. Reason for doing so is that queue vectors might have assigned more
+ * than one XDP ring, e.g. when user reduced the queue count on netdev; Rx ring
+ * carries a pointer to one of these XDP rings for its own purposes, such as
+ * handling XDP_TX action, therefore we can piggyback here on the
+ * rx_ring->xdp_ring assignment that was done during XDP rings initialization.
*/
-static inline struct xsk_buff_pool *ice_tx_xsk_pool(struct ice_tx_ring *ring)
+static inline void ice_tx_xsk_pool(struct ice_vsi *vsi, u16 qid)
{
- struct ice_vsi *vsi = ring->vsi;
- u16 qid;
+ struct ice_tx_ring *ring;
- qid = ring->q_index - vsi->alloc_txq;
+ ring = vsi->rx_rings[qid]->xdp_ring;
+ if (!ring)
+ return;
- if (!ice_is_xdp_ena_vsi(vsi) || !test_bit(qid, vsi->af_xdp_zc_qps))
- return NULL;
+ if (!ice_is_xdp_ena_vsi(vsi) || !test_bit(qid, vsi->af_xdp_zc_qps)) {
+ ring->xsk_pool = NULL;
+ return;
+ }
- return xsk_get_pool_from_qid(vsi->netdev, qid);
+ ring->xsk_pool = xsk_get_pool_from_qid(vsi->netdev, qid);
}
/**
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index d6aafa272fb0..6c4e1d45235e 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -1983,8 +1983,8 @@ int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi)
if (ret)
return ret;
- ice_for_each_xdp_txq(vsi, i)
- vsi->xdp_rings[i]->xsk_pool = ice_tx_xsk_pool(vsi->xdp_rings[i]);
+ ice_for_each_rxq(vsi, i)
+ ice_tx_xsk_pool(vsi, i);
return ret;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index bfd97a9a8f2e..3d45e075204e 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -2581,7 +2581,6 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi)
if (ice_setup_tx_ring(xdp_ring))
goto free_xdp_rings;
ice_set_ring_xdp(xdp_ring);
- xdp_ring->xsk_pool = ice_tx_xsk_pool(xdp_ring);
spin_lock_init(&xdp_ring->tx_lock);
for (j = 0; j < xdp_ring->count; j++) {
tx_desc = ICE_TX_DESC(xdp_ring, j);
@@ -2589,13 +2588,6 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi)
}
}
- ice_for_each_rxq(vsi, i) {
- if (static_key_enabled(&ice_xdp_locking_key))
- vsi->rx_rings[i]->xdp_ring = vsi->xdp_rings[i % vsi->num_xdp_txq];
- else
- vsi->rx_rings[i]->xdp_ring = vsi->xdp_rings[i];
- }
-
return 0;
free_xdp_rings:
@@ -2685,6 +2677,23 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog)
xdp_rings_rem -= xdp_rings_per_v;
}
+ ice_for_each_rxq(vsi, i) {
+ if (static_key_enabled(&ice_xdp_locking_key)) {
+ vsi->rx_rings[i]->xdp_ring = vsi->xdp_rings[i % vsi->num_xdp_txq];
+ } else {
+ struct ice_q_vector *q_vector = vsi->rx_rings[i]->q_vector;
+ struct ice_tx_ring *ring;
+
+ ice_for_each_tx_ring(ring, q_vector->tx) {
+ if (ice_ring_is_xdp(ring)) {
+ vsi->rx_rings[i]->xdp_ring = ring;
+ break;
+ }
+ }
+ }
+ ice_tx_xsk_pool(vsi, i);
+ }
+
/* omit the scheduler update if in reset path; XDP queues will be
* taken into account at the end of ice_vsi_rebuild, where
* ice_cfg_vsi_lan is being called
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index 49ba8bfdbf04..e48e29258450 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -243,7 +243,7 @@ static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx)
if (err)
goto free_buf;
ice_set_ring_xdp(xdp_ring);
- xdp_ring->xsk_pool = ice_tx_xsk_pool(xdp_ring);
+ ice_tx_xsk_pool(vsi, q_idx);
}
err = ice_vsi_cfg_rxq(rx_ring);
@@ -329,6 +329,12 @@ int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
bool if_running, pool_present = !!pool;
int ret = 0, pool_failure = 0;
+ if (qid >= vsi->num_rxq || qid >= vsi->num_txq) {
+ netdev_err(vsi->netdev, "Please use queue id in scope of combined queues count\n");
+ pool_failure = -EINVAL;
+ goto failure;
+ }
+
if (!is_power_of_2(vsi->rx_rings[qid]->count) ||
!is_power_of_2(vsi->tx_rings[qid]->count)) {
netdev_err(vsi->netdev, "Please align ring sizes to power of 2\n");
@@ -353,7 +359,7 @@ int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
if (if_running) {
ret = ice_qp_ena(vsi, qid);
if (!ret && pool_present)
- napi_schedule(&vsi->xdp_rings[qid]->q_vector->napi);
+ napi_schedule(&vsi->rx_rings[qid]->xdp_ring->q_vector->napi);
else if (ret)
netdev_err(vsi->netdev, "ice_qp_ena error = %d\n", ret);
}
@@ -944,13 +950,13 @@ ice_xsk_wakeup(struct net_device *netdev, u32 queue_id,
if (!ice_is_xdp_ena_vsi(vsi))
return -EINVAL;
- if (queue_id >= vsi->num_txq)
+ if (queue_id >= vsi->num_txq || queue_id >= vsi->num_rxq)
return -EINVAL;
- if (!vsi->xdp_rings[queue_id]->xsk_pool)
- return -EINVAL;
+ ring = vsi->rx_rings[queue_id]->xdp_ring;
- ring = vsi->xdp_rings[queue_id];
+ if (!ring->xsk_pool)
+ return -EINVAL;
/* The idea here is that if NAPI is running, mark a miss, so
* it will run again. If not, trigger an interrupt and
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
index 336426a67ac1..38cda659f65f 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
@@ -1208,7 +1208,6 @@ void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter)
struct cyclecounter cc;
unsigned long flags;
u32 incval = 0;
- u32 tsauxc = 0;
u32 fuse0 = 0;
/* For some of the boards below this mask is technically incorrect.
@@ -1243,18 +1242,6 @@ void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter)
case ixgbe_mac_x550em_a:
case ixgbe_mac_X550:
cc.read = ixgbe_ptp_read_X550;
-
- /* enable SYSTIME counter */
- IXGBE_WRITE_REG(hw, IXGBE_SYSTIMR, 0);
- IXGBE_WRITE_REG(hw, IXGBE_SYSTIML, 0);
- IXGBE_WRITE_REG(hw, IXGBE_SYSTIMH, 0);
- tsauxc = IXGBE_READ_REG(hw, IXGBE_TSAUXC);
- IXGBE_WRITE_REG(hw, IXGBE_TSAUXC,
- tsauxc & ~IXGBE_TSAUXC_DISABLE_SYSTIME);
- IXGBE_WRITE_REG(hw, IXGBE_TSIM, IXGBE_TSIM_TXTS);
- IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMS_TIMESYNC);
-
- IXGBE_WRITE_FLUSH(hw);
break;
case ixgbe_mac_X540:
cc.read = ixgbe_ptp_read_82599;
@@ -1286,6 +1273,50 @@ void ixgbe_ptp_start_cyclecounter(struct ixgbe_adapter *adapter)
spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
}
+/**
+ * ixgbe_ptp_init_systime - Initialize SYSTIME registers
+ * @adapter: the ixgbe private board structure
+ *
+ * Initialize and start the SYSTIME registers.
+ */
+static void ixgbe_ptp_init_systime(struct ixgbe_adapter *adapter)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+ u32 tsauxc;
+
+ switch (hw->mac.type) {
+ case ixgbe_mac_X550EM_x:
+ case ixgbe_mac_x550em_a:
+ case ixgbe_mac_X550:
+ tsauxc = IXGBE_READ_REG(hw, IXGBE_TSAUXC);
+
+ /* Reset SYSTIME registers to 0 */
+ IXGBE_WRITE_REG(hw, IXGBE_SYSTIMR, 0);
+ IXGBE_WRITE_REG(hw, IXGBE_SYSTIML, 0);
+ IXGBE_WRITE_REG(hw, IXGBE_SYSTIMH, 0);
+
+ /* Reset interrupt settings */
+ IXGBE_WRITE_REG(hw, IXGBE_TSIM, IXGBE_TSIM_TXTS);
+ IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMS_TIMESYNC);
+
+ /* Activate the SYSTIME counter */
+ IXGBE_WRITE_REG(hw, IXGBE_TSAUXC,
+ tsauxc & ~IXGBE_TSAUXC_DISABLE_SYSTIME);
+ break;
+ case ixgbe_mac_X540:
+ case ixgbe_mac_82599EB:
+ /* Reset SYSTIME registers to 0 */
+ IXGBE_WRITE_REG(hw, IXGBE_SYSTIML, 0);
+ IXGBE_WRITE_REG(hw, IXGBE_SYSTIMH, 0);
+ break;
+ default:
+ /* Other devices aren't supported */
+ return;
+ };
+
+ IXGBE_WRITE_FLUSH(hw);
+}
+
/**
* ixgbe_ptp_reset
* @adapter: the ixgbe private board structure
@@ -1312,6 +1343,8 @@ void ixgbe_ptp_reset(struct ixgbe_adapter *adapter)
ixgbe_ptp_start_cyclecounter(adapter);
+ ixgbe_ptp_init_systime(adapter);
+
spin_lock_irqsave(&adapter->tmreg_lock, flags);
timecounter_init(&adapter->hw_tc, &adapter->hw_cc,
ktime_to_ns(ktime_get_real()));
diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c
index 5edb68a8aab1..57f27cc7724e 100644
--- a/drivers/net/ethernet/lantiq_xrx200.c
+++ b/drivers/net/ethernet/lantiq_xrx200.c
@@ -193,6 +193,7 @@ static int xrx200_alloc_buf(struct xrx200_chan *ch, void *(*alloc)(unsigned int
ch->rx_buff[ch->dma.desc] = alloc(priv->rx_skb_size);
if (!ch->rx_buff[ch->dma.desc]) {
+ ch->rx_buff[ch->dma.desc] = buf;
ret = -ENOMEM;
goto skip;
}
@@ -239,6 +240,12 @@ static int xrx200_hw_receive(struct xrx200_chan *ch)
}
skb = build_skb(buf, priv->rx_skb_size);
+ if (!skb) {
+ skb_free_frag(buf);
+ net_dev->stats.rx_dropped++;
+ return -ENOMEM;
+ }
+
skb_reserve(skb, NET_SKB_PAD);
skb_put(skb, len);
@@ -288,7 +295,7 @@ static int xrx200_poll_rx(struct napi_struct *napi, int budget)
if (ret == XRX200_DMA_PACKET_IN_PROGRESS)
continue;
if (ret != XRX200_DMA_PACKET_COMPLETE)
- return ret;
+ break;
rx++;
} else {
break;
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 59c9a10f83ba..dcf0aac0aa65 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1444,8 +1444,8 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
int done = 0, bytes = 0;
while (done < budget) {
+ unsigned int pktlen, *rxdcsum;
struct net_device *netdev;
- unsigned int pktlen;
dma_addr_t dma_addr;
u32 hash, reason;
int mac = 0;
@@ -1512,23 +1512,31 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
pktlen = RX_DMA_GET_PLEN0(trxd.rxd2);
skb->dev = netdev;
skb_put(skb, pktlen);
- if (trxd.rxd4 & eth->soc->txrx.rx_dma_l4_valid)
+
+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) {
+ hash = trxd.rxd5 & MTK_RXD5_FOE_ENTRY;
+ if (hash != MTK_RXD5_FOE_ENTRY)
+ skb_set_hash(skb, jhash_1word(hash, 0),
+ PKT_HASH_TYPE_L4);
+ rxdcsum = &trxd.rxd3;
+ } else {
+ hash = trxd.rxd4 & MTK_RXD4_FOE_ENTRY;
+ if (hash != MTK_RXD4_FOE_ENTRY)
+ skb_set_hash(skb, jhash_1word(hash, 0),
+ PKT_HASH_TYPE_L4);
+ rxdcsum = &trxd.rxd4;
+ }
+
+ if (*rxdcsum & eth->soc->txrx.rx_dma_l4_valid)
skb->ip_summed = CHECKSUM_UNNECESSARY;
else
skb_checksum_none_assert(skb);
skb->protocol = eth_type_trans(skb, netdev);
bytes += pktlen;
- hash = trxd.rxd4 & MTK_RXD4_FOE_ENTRY;
- if (hash != MTK_RXD4_FOE_ENTRY) {
- hash = jhash_1word(hash, 0);
- skb_set_hash(skb, hash, PKT_HASH_TYPE_L4);
- }
-
reason = FIELD_GET(MTK_RXD4_PPE_CPU_REASON, trxd.rxd4);
if (reason == MTK_PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED)
- mtk_ppe_check_skb(eth->ppe, skb,
- trxd.rxd4 & MTK_RXD4_FOE_ENTRY);
+ mtk_ppe_check_skb(eth->ppe, skb, hash);
if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX) {
if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) {
@@ -3761,6 +3769,7 @@ static const struct mtk_soc_data mt7986_data = {
.txd_size = sizeof(struct mtk_tx_dma_v2),
.rxd_size = sizeof(struct mtk_rx_dma_v2),
.rx_irq_done_mask = MTK_RX_DONE_INT_V2,
+ .rx_dma_l4_valid = RX_DMA_L4_VALID_V2,
.dma_max_len = MTK_TX_DMA_BUF_LEN_V2,
.dma_len_offset = 8,
},
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index 0a632896451a..98d6a6d047e3 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -307,6 +307,11 @@
#define RX_DMA_L4_VALID_PDMA BIT(30) /* when PDMA is used */
#define RX_DMA_SPECIAL_TAG BIT(22)
+/* PDMA descriptor rxd5 */
+#define MTK_RXD5_FOE_ENTRY GENMASK(14, 0)
+#define MTK_RXD5_PPE_CPU_REASON GENMASK(22, 18)
+#define MTK_RXD5_SRC_PORT GENMASK(29, 26)
+
#define RX_DMA_GET_SPORT(x) (((x) >> 19) & 0xf)
#define RX_DMA_GET_SPORT_V2(x) (((x) >> 26) & 0x7)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 087952b84ccb..9e6db779b6ef 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3678,7 +3678,9 @@ static int set_feature_hw_tc(struct net_device *netdev, bool enable)
struct mlx5e_priv *priv = netdev_priv(netdev);
#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
- if (!enable && mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD))) {
+ int tc_flag = mlx5e_is_uplink_rep(priv) ? MLX5_TC_FLAG(ESW_OFFLOAD) :
+ MLX5_TC_FLAG(NIC_OFFLOAD);
+ if (!enable && mlx5e_tc_num_filters(priv, tc_flag)) {
netdev_err(netdev,
"Active offloaded tc filters, can't turn hw_tc_offload off\n");
return -EINVAL;
@@ -4733,14 +4735,6 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16
/* RQ */
mlx5e_build_rq_params(mdev, params);
- /* HW LRO */
- if (MLX5_CAP_ETH(mdev, lro_cap) &&
- params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
- /* No XSK params: checking the availability of striding RQ in general. */
- if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL))
- params->packet_merge.type = slow_pci_heuristic(mdev) ?
- MLX5E_PACKET_MERGE_NONE : MLX5E_PACKET_MERGE_LRO;
- }
params->packet_merge.timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT);
/* CQ moderation params */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index f797fd97d305..7da3dc626192 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -662,6 +662,8 @@ static void mlx5e_build_rep_params(struct net_device *netdev)
params->mqprio.num_tc = 1;
params->tunneled_offload_en = false;
+ if (rep->vport != MLX5_VPORT_UPLINK)
+ params->vlan_strip_disable = true;
mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index eb79810199d3..d04739cb793e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -427,7 +427,8 @@ esw_setup_vport_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *f
dest[dest_idx].vport.vhca_id =
MLX5_CAP_GEN(esw_attr->dests[attr_idx].mdev, vhca_id);
dest[dest_idx].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
- if (mlx5_lag_mpesw_is_activated(esw->dev))
+ if (dest[dest_idx].vport.num == MLX5_VPORT_UPLINK &&
+ mlx5_lag_mpesw_is_activated(esw->dev))
dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_UPLINK;
}
if (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
index 5d41e19378e0..d98acd68af2e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
@@ -1067,30 +1067,32 @@ static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev,
struct net_device *netdev)
{
unsigned int fn = mlx5_get_dev_index(dev);
+ unsigned long flags;
if (fn >= ldev->ports)
return;
- spin_lock(&lag_lock);
+ spin_lock_irqsave(&lag_lock, flags);
ldev->pf[fn].netdev = netdev;
ldev->tracker.netdev_state[fn].link_up = 0;
ldev->tracker.netdev_state[fn].tx_enabled = 0;
- spin_unlock(&lag_lock);
+ spin_unlock_irqrestore(&lag_lock, flags);
}
static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev,
struct net_device *netdev)
{
+ unsigned long flags;
int i;
- spin_lock(&lag_lock);
+ spin_lock_irqsave(&lag_lock, flags);
for (i = 0; i < ldev->ports; i++) {
if (ldev->pf[i].netdev == netdev) {
ldev->pf[i].netdev = NULL;
break;
}
}
- spin_unlock(&lag_lock);
+ spin_unlock_irqrestore(&lag_lock, flags);
}
static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
@@ -1234,7 +1236,7 @@ void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
mlx5_ldev_add_netdev(ldev, dev, netdev);
for (i = 0; i < ldev->ports; i++)
- if (!ldev->pf[i].dev)
+ if (!ldev->pf[i].netdev)
break;
if (i >= ldev->ports)
@@ -1246,12 +1248,13 @@ void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev;
+ unsigned long flags;
bool res;
- spin_lock(&lag_lock);
+ spin_lock_irqsave(&lag_lock, flags);
ldev = mlx5_lag_dev(dev);
res = ldev && __mlx5_lag_is_roce(ldev);
- spin_unlock(&lag_lock);
+ spin_unlock_irqrestore(&lag_lock, flags);
return res;
}
@@ -1260,12 +1263,13 @@ EXPORT_SYMBOL(mlx5_lag_is_roce);
bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev;
+ unsigned long flags;
bool res;
- spin_lock(&lag_lock);
+ spin_lock_irqsave(&lag_lock, flags);
ldev = mlx5_lag_dev(dev);
res = ldev && __mlx5_lag_is_active(ldev);
- spin_unlock(&lag_lock);
+ spin_unlock_irqrestore(&lag_lock, flags);
return res;
}
@@ -1274,13 +1278,14 @@ EXPORT_SYMBOL(mlx5_lag_is_active);
bool mlx5_lag_is_master(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev;
+ unsigned long flags;
bool res;
- spin_lock(&lag_lock);
+ spin_lock_irqsave(&lag_lock, flags);
ldev = mlx5_lag_dev(dev);
res = ldev && __mlx5_lag_is_active(ldev) &&
dev == ldev->pf[MLX5_LAG_P1].dev;
- spin_unlock(&lag_lock);
+ spin_unlock_irqrestore(&lag_lock, flags);
return res;
}
@@ -1289,12 +1294,13 @@ EXPORT_SYMBOL(mlx5_lag_is_master);
bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev;
+ unsigned long flags;
bool res;
- spin_lock(&lag_lock);
+ spin_lock_irqsave(&lag_lock, flags);
ldev = mlx5_lag_dev(dev);
res = ldev && __mlx5_lag_is_sriov(ldev);
- spin_unlock(&lag_lock);
+ spin_unlock_irqrestore(&lag_lock, flags);
return res;
}
@@ -1303,13 +1309,14 @@ EXPORT_SYMBOL(mlx5_lag_is_sriov);
bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev;
+ unsigned long flags;
bool res;
- spin_lock(&lag_lock);
+ spin_lock_irqsave(&lag_lock, flags);
ldev = mlx5_lag_dev(dev);
res = ldev && __mlx5_lag_is_sriov(ldev) &&
test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
- spin_unlock(&lag_lock);
+ spin_unlock_irqrestore(&lag_lock, flags);
return res;
}
@@ -1352,9 +1359,10 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
{
struct net_device *ndev = NULL;
struct mlx5_lag *ldev;
+ unsigned long flags;
int i;
- spin_lock(&lag_lock);
+ spin_lock_irqsave(&lag_lock, flags);
ldev = mlx5_lag_dev(dev);
if (!(ldev && __mlx5_lag_is_roce(ldev)))
@@ -1373,7 +1381,7 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
dev_hold(ndev);
unlock:
- spin_unlock(&lag_lock);
+ spin_unlock_irqrestore(&lag_lock, flags);
return ndev;
}
@@ -1383,10 +1391,11 @@ u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
struct net_device *slave)
{
struct mlx5_lag *ldev;
+ unsigned long flags;
u8 port = 0;
int i;
- spin_lock(&lag_lock);
+ spin_lock_irqsave(&lag_lock, flags);
ldev = mlx5_lag_dev(dev);
if (!(ldev && __mlx5_lag_is_roce(ldev)))
goto unlock;
@@ -1401,7 +1410,7 @@ u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
port = ldev->v2p_map[port * ldev->buckets];
unlock:
- spin_unlock(&lag_lock);
+ spin_unlock_irqrestore(&lag_lock, flags);
return port;
}
EXPORT_SYMBOL(mlx5_lag_get_slave_port);
@@ -1422,8 +1431,9 @@ struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev)
{
struct mlx5_core_dev *peer_dev = NULL;
struct mlx5_lag *ldev;
+ unsigned long flags;
- spin_lock(&lag_lock);
+ spin_lock_irqsave(&lag_lock, flags);
ldev = mlx5_lag_dev(dev);
if (!ldev)
goto unlock;
@@ -1433,7 +1443,7 @@ struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev)
ldev->pf[MLX5_LAG_P1].dev;
unlock:
- spin_unlock(&lag_lock);
+ spin_unlock_irqrestore(&lag_lock, flags);
return peer_dev;
}
EXPORT_SYMBOL(mlx5_lag_get_peer_mdev);
@@ -1446,6 +1456,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
struct mlx5_core_dev **mdev;
struct mlx5_lag *ldev;
+ unsigned long flags;
int num_ports;
int ret, i, j;
void *out;
@@ -1462,7 +1473,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
memset(values, 0, sizeof(*values) * num_counters);
- spin_lock(&lag_lock);
+ spin_lock_irqsave(&lag_lock, flags);
ldev = mlx5_lag_dev(dev);
if (ldev && __mlx5_lag_is_active(ldev)) {
num_ports = ldev->ports;
@@ -1472,7 +1483,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
num_ports = 1;
mdev[MLX5_LAG_P1] = dev;
}
- spin_unlock(&lag_lock);
+ spin_unlock_irqrestore(&lag_lock, flags);
for (i = 0; i < num_ports; ++i) {
u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index ba2e5232b90b..616207c3b187 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1472,7 +1472,9 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx)
memcpy(&dev->profile, &profile[profile_idx], sizeof(dev->profile));
INIT_LIST_HEAD(&priv->ctx_list);
spin_lock_init(&priv->ctx_lock);
+ lockdep_register_key(&dev->lock_key);
mutex_init(&dev->intf_state_mutex);
+ lockdep_set_class(&dev->intf_state_mutex, &dev->lock_key);
mutex_init(&priv->bfregs.reg_head.lock);
mutex_init(&priv->bfregs.wc_head.lock);
@@ -1527,6 +1529,7 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx)
mutex_destroy(&priv->bfregs.wc_head.lock);
mutex_destroy(&priv->bfregs.reg_head.lock);
mutex_destroy(&dev->intf_state_mutex);
+ lockdep_unregister_key(&dev->lock_key);
return err;
}
@@ -1545,6 +1548,7 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev)
mutex_destroy(&priv->bfregs.wc_head.lock);
mutex_destroy(&priv->bfregs.reg_head.lock);
mutex_destroy(&dev->intf_state_mutex);
+ lockdep_unregister_key(&dev->lock_key);
}
static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index ec76a8b1acc1..60596357bfc7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -376,8 +376,8 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
goto out_dropped;
}
}
+ err = mlx5_cmd_check(dev, err, in, out);
if (err) {
- err = mlx5_cmd_check(dev, err, in, out);
mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n",
func_id, npages, err);
goto out_dropped;
@@ -524,10 +524,13 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
dev->priv.reclaim_pages_discard += npages;
}
/* if triggered by FW event and failed by FW then ignore */
- if (event && err == -EREMOTEIO)
+ if (event && err == -EREMOTEIO) {
err = 0;
+ goto out_free;
+ }
+
+ err = mlx5_cmd_check(dev, err, in, out);
if (err) {
- err = mlx5_cmd_check(dev, err, in, out);
mlx5_core_err(dev, "failed reclaiming pages: err %d\n", err);
goto out_free;
}
diff --git a/drivers/net/ethernet/moxa/moxart_ether.c b/drivers/net/ethernet/moxa/moxart_ether.c
index f11f1cb92025..3b6beb96ca85 100644
--- a/drivers/net/ethernet/moxa/moxart_ether.c
+++ b/drivers/net/ethernet/moxa/moxart_ether.c
@@ -74,11 +74,6 @@ static int moxart_set_mac_address(struct net_device *ndev, void *addr)
static void moxart_mac_free_memory(struct net_device *ndev)
{
struct moxart_mac_priv_t *priv = netdev_priv(ndev);
- int i;
-
- for (i = 0; i < RX_DESC_NUM; i++)
- dma_unmap_single(&priv->pdev->dev, priv->rx_mapping[i],
- priv->rx_buf_size, DMA_FROM_DEVICE);
if (priv->tx_desc_base)
dma_free_coherent(&priv->pdev->dev,
@@ -193,6 +188,7 @@ static int moxart_mac_open(struct net_device *ndev)
static int moxart_mac_stop(struct net_device *ndev)
{
struct moxart_mac_priv_t *priv = netdev_priv(ndev);
+ int i;
napi_disable(&priv->napi);
@@ -204,6 +200,11 @@ static int moxart_mac_stop(struct net_device *ndev)
/* disable all functions */
writel(0, priv->base + REG_MAC_CTRL);
+ /* unmap areas mapped in moxart_mac_setup_desc_ring() */
+ for (i = 0; i < RX_DESC_NUM; i++)
+ dma_unmap_single(&priv->pdev->dev, priv->rx_mapping[i],
+ priv->rx_buf_size, DMA_FROM_DEVICE);
+
return 0;
}
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 1443f788ee37..0be79c516781 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -1564,8 +1564,67 @@ static int ionic_set_features(struct net_device *netdev,
return err;
}
+static int ionic_set_attr_mac(struct ionic_lif *lif, u8 *mac)
+{
+ struct ionic_admin_ctx ctx = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
+ .cmd.lif_setattr = {
+ .opcode = IONIC_CMD_LIF_SETATTR,
+ .index = cpu_to_le16(lif->index),
+ .attr = IONIC_LIF_ATTR_MAC,
+ },
+ };
+
+ ether_addr_copy(ctx.cmd.lif_setattr.mac, mac);
+ return ionic_adminq_post_wait(lif, &ctx);
+}
+
+static int ionic_get_attr_mac(struct ionic_lif *lif, u8 *mac_addr)
+{
+ struct ionic_admin_ctx ctx = {
+ .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work),
+ .cmd.lif_getattr = {
+ .opcode = IONIC_CMD_LIF_GETATTR,
+ .index = cpu_to_le16(lif->index),
+ .attr = IONIC_LIF_ATTR_MAC,
+ },
+ };
+ int err;
+
+ err = ionic_adminq_post_wait(lif, &ctx);
+ if (err)
+ return err;
+
+ ether_addr_copy(mac_addr, ctx.comp.lif_getattr.mac);
+ return 0;
+}
+
+static int ionic_program_mac(struct ionic_lif *lif, u8 *mac)
+{
+ u8 get_mac[ETH_ALEN];
+ int err;
+
+ err = ionic_set_attr_mac(lif, mac);
+ if (err)
+ return err;
+
+ err = ionic_get_attr_mac(lif, get_mac);
+ if (err)
+ return err;
+
+ /* To deal with older firmware that silently ignores the set attr mac:
+ * doesn't actually change the mac and doesn't return an error, so we
+ * do the get attr to verify whether or not the set actually happened
+ */
+ if (!ether_addr_equal(get_mac, mac))
+ return 1;
+
+ return 0;
+}
+
static int ionic_set_mac_address(struct net_device *netdev, void *sa)
{
+ struct ionic_lif *lif = netdev_priv(netdev);
struct sockaddr *addr = sa;
u8 *mac;
int err;
@@ -1574,6 +1633,14 @@ static int ionic_set_mac_address(struct net_device *netdev, void *sa)
if (ether_addr_equal(netdev->dev_addr, mac))
return 0;
+ err = ionic_program_mac(lif, mac);
+ if (err < 0)
+ return err;
+
+ if (err > 0)
+ netdev_dbg(netdev, "%s: SET and GET ATTR Mac are not equal-due to old FW running\n",
+ __func__);
+
err = eth_prepare_mac_addr_change(netdev, addr);
if (err)
return err;
@@ -2963,6 +3030,9 @@ static void ionic_lif_handle_fw_up(struct ionic_lif *lif)
mutex_lock(&lif->queue_lock);
+ if (test_and_clear_bit(IONIC_LIF_F_BROKEN, lif->state))
+ dev_info(ionic->dev, "FW Up: clearing broken state\n");
+
err = ionic_qcqs_alloc(lif);
if (err)
goto err_unlock;
@@ -3169,6 +3239,7 @@ static int ionic_station_set(struct ionic_lif *lif)
.attr = IONIC_LIF_ATTR_MAC,
},
};
+ u8 mac_address[ETH_ALEN];
struct sockaddr addr;
int err;
@@ -3177,8 +3248,23 @@ static int ionic_station_set(struct ionic_lif *lif)
return err;
netdev_dbg(lif->netdev, "found initial MAC addr %pM\n",
ctx.comp.lif_getattr.mac);
- if (is_zero_ether_addr(ctx.comp.lif_getattr.mac))
- return 0;
+ ether_addr_copy(mac_address, ctx.comp.lif_getattr.mac);
+
+ if (is_zero_ether_addr(mac_address)) {
+ eth_hw_addr_random(netdev);
+ netdev_dbg(netdev, "Random Mac generated: %pM\n", netdev->dev_addr);
+ ether_addr_copy(mac_address, netdev->dev_addr);
+
+ err = ionic_program_mac(lif, mac_address);
+ if (err < 0)
+ return err;
+
+ if (err > 0) {
+ netdev_dbg(netdev, "%s:SET/GET ATTR Mac are not same-due to old FW running\n",
+ __func__);
+ return 0;
+ }
+ }
if (!is_zero_ether_addr(netdev->dev_addr)) {
/* If the netdev mac is non-zero and doesn't match the default
@@ -3186,12 +3272,11 @@ static int ionic_station_set(struct ionic_lif *lif)
* likely here again after a fw-upgrade reset. We need to be
* sure the netdev mac is in our filter list.
*/
- if (!ether_addr_equal(ctx.comp.lif_getattr.mac,
- netdev->dev_addr))
+ if (!ether_addr_equal(mac_address, netdev->dev_addr))
ionic_lif_addr_add(lif, netdev->dev_addr);
} else {
/* Update the netdev mac with the device's mac */
- memcpy(addr.sa_data, ctx.comp.lif_getattr.mac, netdev->addr_len);
+ ether_addr_copy(addr.sa_data, mac_address);
addr.sa_family = AF_INET;
err = eth_prepare_mac_addr_change(netdev, &addr);
if (err) {
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c
index 4029b4e021f8..56f93b030551 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_main.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c
@@ -474,8 +474,8 @@ static int __ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds,
ionic_opcode_to_str(opcode), opcode,
ionic_error_to_str(err), err);
- msleep(1000);
iowrite32(0, &idev->dev_cmd_regs->done);
+ msleep(1000);
iowrite32(1, &idev->dev_cmd_regs->doorbell);
goto try_again;
}
@@ -488,6 +488,8 @@ static int __ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds,
return ionic_error_to_errno(err);
}
+ ionic_dev_cmd_clean(ionic);
+
return 0;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
index caa4bfc4c1d6..9b6138b11776 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
@@ -258,14 +258,18 @@ EXPORT_SYMBOL_GPL(stmmac_set_mac_addr);
/* Enable disable MAC RX/TX */
void stmmac_set_mac(void __iomem *ioaddr, bool enable)
{
- u32 value = readl(ioaddr + MAC_CTRL_REG);
+ u32 old_val, value;
+
+ old_val = readl(ioaddr + MAC_CTRL_REG);
+ value = old_val;
if (enable)
value |= MAC_ENABLE_RX | MAC_ENABLE_TX;
else
value &= ~(MAC_ENABLE_TX | MAC_ENABLE_RX);
- writel(value, ioaddr + MAC_CTRL_REG);
+ if (value != old_val)
+ writel(value, ioaddr + MAC_CTRL_REG);
}
void stmmac_get_mac_addr(void __iomem *ioaddr, unsigned char *addr,
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index c5f33630e771..78f11dabca05 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -983,10 +983,10 @@ static void stmmac_mac_link_up(struct phylink_config *config,
bool tx_pause, bool rx_pause)
{
struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev));
- u32 ctrl;
+ u32 old_ctrl, ctrl;
- ctrl = readl(priv->ioaddr + MAC_CTRL_REG);
- ctrl &= ~priv->hw->link.speed_mask;
+ old_ctrl = readl(priv->ioaddr + MAC_CTRL_REG);
+ ctrl = old_ctrl & ~priv->hw->link.speed_mask;
if (interface == PHY_INTERFACE_MODE_USXGMII) {
switch (speed) {
@@ -1061,7 +1061,8 @@ static void stmmac_mac_link_up(struct phylink_config *config,
if (tx_pause && rx_pause)
stmmac_mac_flow_ctrl(priv, duplex);
- writel(ctrl, priv->ioaddr + MAC_CTRL_REG);
+ if (ctrl != old_ctrl)
+ writel(ctrl, priv->ioaddr + MAC_CTRL_REG);
stmmac_mac_set(priv, priv->ioaddr, true);
if (phy && priv->dma_cap.eee) {
diff --git a/drivers/net/ipa/ipa_mem.c b/drivers/net/ipa/ipa_mem.c
index 1e9eae208e44..53a1dbeaffa6 100644
--- a/drivers/net/ipa/ipa_mem.c
+++ b/drivers/net/ipa/ipa_mem.c
@@ -568,7 +568,7 @@ static int ipa_smem_init(struct ipa *ipa, u32 item, size_t size)
}
/* Align the address down and the size up to a page boundary */
- addr = qcom_smem_virt_to_phys(virt) & PAGE_MASK;
+ addr = qcom_smem_virt_to_phys(virt);
phys = addr & PAGE_MASK;
size = PAGE_ALIGN(size + addr - phys);
iova = phys; /* We just want a direct mapping */
diff --git a/drivers/net/ipvlan/ipvtap.c b/drivers/net/ipvlan/ipvtap.c
index ef02f2cf5ce1..cbabca167a07 100644
--- a/drivers/net/ipvlan/ipvtap.c
+++ b/drivers/net/ipvlan/ipvtap.c
@@ -194,7 +194,7 @@ static struct notifier_block ipvtap_notifier_block __read_mostly = {
.notifier_call = ipvtap_device_event,
};
-static int ipvtap_init(void)
+static int __init ipvtap_init(void)
{
int err;
@@ -228,7 +228,7 @@ static int ipvtap_init(void)
}
module_init(ipvtap_init);
-static void ipvtap_exit(void)
+static void __exit ipvtap_exit(void)
{
rtnl_link_unregister(&ipvtap_link_ops);
unregister_netdevice_notifier(&ipvtap_notifier_block);
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index f354fad05714..5b0b23e55fa7 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -449,11 +449,6 @@ static struct macsec_eth_header *macsec_ethhdr(struct sk_buff *skb)
return (struct macsec_eth_header *)skb_mac_header(skb);
}
-static sci_t dev_to_sci(struct net_device *dev, __be16 port)
-{
- return make_sci(dev->dev_addr, port);
-}
-
static void __macsec_pn_wrapped(struct macsec_secy *secy,
struct macsec_tx_sa *tx_sa)
{
@@ -3622,7 +3617,6 @@ static int macsec_set_mac_address(struct net_device *dev, void *p)
out:
eth_hw_addr_set(dev, addr->sa_data);
- macsec->secy.sci = dev_to_sci(dev, MACSEC_PORT_ES);
/* If h/w offloading is available, propagate to the device */
if (macsec_is_offloaded(macsec)) {
@@ -3960,6 +3954,11 @@ static bool sci_exists(struct net_device *dev, sci_t sci)
return false;
}
+static sci_t dev_to_sci(struct net_device *dev, __be16 port)
+{
+ return make_sci(dev->dev_addr, port);
+}
+
static int macsec_add_dev(struct net_device *dev, sci_t sci, u8 icv_len)
{
struct macsec_dev *macsec = macsec_priv(dev);
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 608de5a94165..f90a21781d8d 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -316,11 +316,11 @@ static __maybe_unused int mdio_bus_phy_resume(struct device *dev)
phydev->suspended_by_mdio_bus = 0;
- /* If we managed to get here with the PHY state machine in a state other
- * than PHY_HALTED this is an indication that something went wrong and
- * we should most likely be using MAC managed PM and we are not.
+ /* If we manged to get here with the PHY state machine in a state neither
+ * PHY_HALTED nor PHY_READY this is an indication that something went wrong
+ * and we should most likely be using MAC managed PM and we are not.
*/
- WARN_ON(phydev->state != PHY_HALTED && !phydev->mac_managed_pm);
+ WARN_ON(phydev->state != PHY_HALTED && phydev->state != PHY_READY);
ret = phy_init_hw(phydev);
if (ret < 0)
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 0f6efaabaa32..d142ac8fcf6e 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -5906,6 +5906,11 @@ static void r8153_enter_oob(struct r8152 *tp)
ocp_data &= ~NOW_IS_OOB;
ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data);
+ /* RX FIFO settings for OOB */
+ ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RXFIFO_CTRL0, RXFIFO_THR1_OOB);
+ ocp_write_word(tp, MCU_TYPE_PLA, PLA_RXFIFO_CTRL1, RXFIFO_THR2_OOB);
+ ocp_write_word(tp, MCU_TYPE_PLA, PLA_RXFIFO_CTRL2, RXFIFO_THR3_OOB);
+
rtl_disable(tp);
rtl_reset_bmu(tp);
@@ -6431,21 +6436,8 @@ static void r8156_fc_parameter(struct r8152 *tp)
u32 pause_on = tp->fc_pause_on ? tp->fc_pause_on : fc_pause_on_auto(tp);
u32 pause_off = tp->fc_pause_off ? tp->fc_pause_off : fc_pause_off_auto(tp);
- switch (tp->version) {
- case RTL_VER_10:
- case RTL_VER_11:
- ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_FULL, pause_on / 8);
- ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_EMPTY, pause_off / 8);
- break;
- case RTL_VER_12:
- case RTL_VER_13:
- case RTL_VER_15:
- ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_FULL, pause_on / 16);
- ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_EMPTY, pause_off / 16);
- break;
- default:
- break;
- }
+ ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_FULL, pause_on / 16);
+ ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_EMPTY, pause_off / 16);
}
static void rtl8156_change_mtu(struct r8152 *tp)
@@ -6557,6 +6549,11 @@ static void rtl8156_down(struct r8152 *tp)
ocp_data &= ~NOW_IS_OOB;
ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data);
+ /* RX FIFO settings for OOB */
+ ocp_write_word(tp, MCU_TYPE_PLA, PLA_RXFIFO_FULL, 64 / 16);
+ ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_FULL, 1024 / 16);
+ ocp_write_word(tp, MCU_TYPE_PLA, PLA_RX_FIFO_EMPTY, 4096 / 16);
+
rtl_disable(tp);
rtl_reset_bmu(tp);
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
index faa279bbbcb2..7eb23805aa94 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mcu.c
@@ -1403,6 +1403,8 @@ int mt76_connac_mcu_uni_add_bss(struct mt76_phy *phy,
else
conn_type = CONNECTION_INFRA_AP;
basic_req.basic.conn_type = cpu_to_le32(conn_type);
+ /* Fully active/deactivate BSS network in AP mode only */
+ basic_req.basic.active = enable;
break;
case NL80211_IFTYPE_STATION:
if (vif->p2p)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
index e86fe9ee4623..d3f310877248 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
@@ -653,15 +653,6 @@ static void mt7921_bss_info_changed(struct ieee80211_hw *hw,
}
}
- if (changed & BSS_CHANGED_BEACON_ENABLED && info->enable_beacon) {
- struct mt7921_vif *mvif = (struct mt7921_vif *)vif->drv_priv;
-
- mt76_connac_mcu_uni_add_bss(phy->mt76, vif, &mvif->sta.wcid,
- true);
- mt7921_mcu_sta_update(dev, NULL, vif, true,
- MT76_STA_INFO_STATE_NONE);
- }
-
if (changed & (BSS_CHANGED_BEACON |
BSS_CHANGED_BEACON_ENABLED))
mt7921_mcu_uni_add_beacon_offload(dev, hw, vif,
@@ -1500,6 +1491,42 @@ mt7921_channel_switch_beacon(struct ieee80211_hw *hw,
mt7921_mutex_release(dev);
}
+static int
+mt7921_start_ap(struct ieee80211_hw *hw, struct ieee80211_vif *vif)
+{
+ struct mt7921_vif *mvif = (struct mt7921_vif *)vif->drv_priv;
+ struct mt7921_phy *phy = mt7921_hw_phy(hw);
+ struct mt7921_dev *dev = mt7921_hw_dev(hw);
+ int err;
+
+ err = mt76_connac_mcu_uni_add_bss(phy->mt76, vif, &mvif->sta.wcid,
+ true);
+ if (err)
+ return err;
+
+ err = mt7921_mcu_set_bss_pm(dev, vif, true);
+ if (err)
+ return err;
+
+ return mt7921_mcu_sta_update(dev, NULL, vif, true,
+ MT76_STA_INFO_STATE_NONE);
+}
+
+static void
+mt7921_stop_ap(struct ieee80211_hw *hw, struct ieee80211_vif *vif)
+{
+ struct mt7921_vif *mvif = (struct mt7921_vif *)vif->drv_priv;
+ struct mt7921_phy *phy = mt7921_hw_phy(hw);
+ struct mt7921_dev *dev = mt7921_hw_dev(hw);
+ int err;
+
+ err = mt7921_mcu_set_bss_pm(dev, vif, false);
+ if (err)
+ return;
+
+ mt76_connac_mcu_uni_add_bss(phy->mt76, vif, &mvif->sta.wcid, false);
+}
+
const struct ieee80211_ops mt7921_ops = {
.tx = mt7921_tx,
.start = mt7921_start,
@@ -1510,6 +1537,8 @@ const struct ieee80211_ops mt7921_ops = {
.conf_tx = mt7921_conf_tx,
.configure_filter = mt7921_configure_filter,
.bss_info_changed = mt7921_bss_info_changed,
+ .start_ap = mt7921_start_ap,
+ .stop_ap = mt7921_stop_ap,
.sta_state = mt7921_sta_state,
.sta_pre_rcu_remove = mt76_sta_pre_rcu_remove,
.set_key = mt7921_set_key,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
index 613a94be8ea4..6d0aceb5226a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
@@ -1020,7 +1020,7 @@ mt7921_mcu_uni_bss_bcnft(struct mt7921_dev *dev, struct ieee80211_vif *vif,
&bcnft_req, sizeof(bcnft_req), true);
}
-static int
+int
mt7921_mcu_set_bss_pm(struct mt7921_dev *dev, struct ieee80211_vif *vif,
bool enable)
{
@@ -1049,9 +1049,6 @@ mt7921_mcu_set_bss_pm(struct mt7921_dev *dev, struct ieee80211_vif *vif,
};
int err;
- if (vif->type != NL80211_IFTYPE_STATION)
- return 0;
-
err = mt76_mcu_send_msg(&dev->mt76, MCU_CE_CMD(SET_BSS_ABORT),
&req_hdr, sizeof(req_hdr), false);
if (err < 0 || !enable)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
index 66054123bcc4..cebc3cfa01b8 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h
@@ -280,6 +280,8 @@ int mt7921_wpdma_reset(struct mt7921_dev *dev, bool force);
int mt7921_wpdma_reinit_cond(struct mt7921_dev *dev);
void mt7921_dma_cleanup(struct mt7921_dev *dev);
int mt7921_run_firmware(struct mt7921_dev *dev);
+int mt7921_mcu_set_bss_pm(struct mt7921_dev *dev, struct ieee80211_vif *vif,
+ bool enable);
int mt7921_mcu_sta_update(struct mt7921_dev *dev, struct ieee80211_sta *sta,
struct ieee80211_vif *vif, bool enable,
enum mt76_sta_info_state state);
diff --git a/drivers/nfc/pn533/uart.c b/drivers/nfc/pn533/uart.c
index 2caf997f9bc9..07596bf5f7d6 100644
--- a/drivers/nfc/pn533/uart.c
+++ b/drivers/nfc/pn533/uart.c
@@ -310,6 +310,7 @@ static void pn532_uart_remove(struct serdev_device *serdev)
pn53x_unregister_nfc(pn532->priv);
serdev_device_close(serdev);
pn53x_common_clean(pn532->priv);
+ del_timer_sync(&pn532->cmd_timeout);
kfree_skb(pn532->recv_skb);
kfree(pn532);
}
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 6ffc9e4258a8..78edb1ea4748 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1549,7 +1549,6 @@ static blk_status_t scsi_prepare_cmd(struct request *req)
scsi_init_command(sdev, cmd);
cmd->eh_eflags = 0;
- cmd->allowed = 0;
cmd->prot_type = 0;
cmd->prot_flags = 0;
cmd->submitter = 0;
@@ -1600,6 +1599,8 @@ static blk_status_t scsi_prepare_cmd(struct request *req)
return ret;
}
+ /* Usually overridden by the ULP */
+ cmd->allowed = 0;
memset(cmd->cmnd, 0, sizeof(cmd->cmnd));
return scsi_cmd_to_driver(cmd)->init_command(cmd);
}
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index fe000da11332..8ced292c4b96 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -2012,7 +2012,7 @@ static int storvsc_probe(struct hv_device *device,
*/
host_dev->handle_error_wq =
alloc_ordered_workqueue("storvsc_error_wq_%d",
- WQ_MEM_RECLAIM,
+ 0,
host->host_no);
if (!host_dev->handle_error_wq) {
ret = -ENOMEM;
diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c
index b89075f3b6ab..2efaed36a3ad 100644
--- a/drivers/video/fbdev/core/fbcon.c
+++ b/drivers/video/fbdev/core/fbcon.c
@@ -2402,15 +2402,21 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount,
struct fb_info *info = fbcon_info_from_console(vc->vc_num);
struct fbcon_ops *ops = info->fbcon_par;
struct fbcon_display *p = &fb_display[vc->vc_num];
- int resize;
+ int resize, ret, old_userfont, old_width, old_height, old_charcount;
char *old_data = NULL;
resize = (w != vc->vc_font.width) || (h != vc->vc_font.height);
if (p->userfont)
old_data = vc->vc_font.data;
vc->vc_font.data = (void *)(p->fontdata = data);
+ old_userfont = p->userfont;
if ((p->userfont = userfont))
REFCOUNT(data)++;
+
+ old_width = vc->vc_font.width;
+ old_height = vc->vc_font.height;
+ old_charcount = vc->vc_font.charcount;
+
vc->vc_font.width = w;
vc->vc_font.height = h;
vc->vc_font.charcount = charcount;
@@ -2426,7 +2432,9 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount,
rows = FBCON_SWAP(ops->rotate, info->var.yres, info->var.xres);
cols /= w;
rows /= h;
- vc_resize(vc, cols, rows);
+ ret = vc_resize(vc, cols, rows);
+ if (ret)
+ goto err_out;
} else if (con_is_visible(vc)
&& vc->vc_mode == KD_TEXT) {
fbcon_clear_margins(vc, 0);
@@ -2436,6 +2444,21 @@ static int fbcon_do_set_font(struct vc_data *vc, int w, int h, int charcount,
if (old_data && (--REFCOUNT(old_data) == 0))
kfree(old_data - FONT_EXTRA_WORDS * sizeof(int));
return 0;
+
+err_out:
+ p->fontdata = old_data;
+ vc->vc_font.data = (void *)old_data;
+
+ if (userfont) {
+ p->userfont = old_userfont;
+ REFCOUNT(data)--;
+ }
+
+ vc->vc_font.width = old_width;
+ vc->vc_font.height = old_height;
+ vc->vc_font.charcount = old_charcount;
+
+ return ret;
}
/*
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index 3369734108af..e88e8f6f0a33 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -581,27 +581,30 @@ static int lock_pages(
struct privcmd_dm_op_buf kbufs[], unsigned int num,
struct page *pages[], unsigned int nr_pages, unsigned int *pinned)
{
- unsigned int i;
+ unsigned int i, off = 0;
- for (i = 0; i < num; i++) {
+ for (i = 0; i < num; ) {
unsigned int requested;
int page_count;
requested = DIV_ROUND_UP(
offset_in_page(kbufs[i].uptr) + kbufs[i].size,
- PAGE_SIZE);
+ PAGE_SIZE) - off;
if (requested > nr_pages)
return -ENOSPC;
page_count = pin_user_pages_fast(
- (unsigned long) kbufs[i].uptr,
+ (unsigned long)kbufs[i].uptr + off * PAGE_SIZE,
requested, FOLL_WRITE, pages);
- if (page_count < 0)
- return page_count;
+ if (page_count <= 0)
+ return page_count ? : -EFAULT;
*pinned += page_count;
nr_pages -= page_count;
pages += page_count;
+
+ off = (requested == page_count) ? 0 : off + page_count;
+ i += !off;
}
return 0;
@@ -677,10 +680,8 @@ static long privcmd_ioctl_dm_op(struct file *file, void __user *udata)
}
rc = lock_pages(kbufs, kdata.num, pages, nr_pages, &pinned);
- if (rc < 0) {
- nr_pages = pinned;
+ if (rc < 0)
goto out;
- }
for (i = 0; i < kdata.num; i++) {
set_xen_guest_handle(xbufs[i].h, kbufs[i].uptr);
@@ -692,7 +693,7 @@ static long privcmd_ioctl_dm_op(struct file *file, void __user *udata)
xen_preemptible_hcall_end();
out:
- unlock_pages(pages, nr_pages);
+ unlock_pages(pages, pinned);
kfree(xbufs);
kfree(pages);
kfree(kbufs);
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index deaed255f301..a8ecd83abb11 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -440,39 +440,26 @@ void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache,
btrfs_put_caching_control(caching_ctl);
}
-int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache)
+static int btrfs_caching_ctl_wait_done(struct btrfs_block_group *cache,
+ struct btrfs_caching_control *caching_ctl)
+{
+ wait_event(caching_ctl->wait, btrfs_block_group_done(cache));
+ return cache->cached == BTRFS_CACHE_ERROR ? -EIO : 0;
+}
+
+static int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache)
{
struct btrfs_caching_control *caching_ctl;
- int ret = 0;
+ int ret;
caching_ctl = btrfs_get_caching_control(cache);
if (!caching_ctl)
return (cache->cached == BTRFS_CACHE_ERROR) ? -EIO : 0;
-
- wait_event(caching_ctl->wait, btrfs_block_group_done(cache));
- if (cache->cached == BTRFS_CACHE_ERROR)
- ret = -EIO;
+ ret = btrfs_caching_ctl_wait_done(cache, caching_ctl);
btrfs_put_caching_control(caching_ctl);
return ret;
}
-static bool space_cache_v1_done(struct btrfs_block_group *cache)
-{
- bool ret;
-
- spin_lock(&cache->lock);
- ret = cache->cached != BTRFS_CACHE_FAST;
- spin_unlock(&cache->lock);
-
- return ret;
-}
-
-void btrfs_wait_space_cache_v1_finished(struct btrfs_block_group *cache,
- struct btrfs_caching_control *caching_ctl)
-{
- wait_event(caching_ctl->wait, space_cache_v1_done(cache));
-}
-
#ifdef CONFIG_BTRFS_DEBUG
static void fragment_free_space(struct btrfs_block_group *block_group)
{
@@ -750,9 +737,8 @@ static noinline void caching_thread(struct btrfs_work *work)
btrfs_put_block_group(block_group);
}
-int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only)
+int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait)
{
- DEFINE_WAIT(wait);
struct btrfs_fs_info *fs_info = cache->fs_info;
struct btrfs_caching_control *caching_ctl = NULL;
int ret = 0;
@@ -785,10 +771,7 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only
}
WARN_ON(cache->caching_ctl);
cache->caching_ctl = caching_ctl;
- if (btrfs_test_opt(fs_info, SPACE_CACHE))
- cache->cached = BTRFS_CACHE_FAST;
- else
- cache->cached = BTRFS_CACHE_STARTED;
+ cache->cached = BTRFS_CACHE_STARTED;
cache->has_caching_ctl = 1;
spin_unlock(&cache->lock);
@@ -801,8 +784,8 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only
btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work);
out:
- if (load_cache_only && caching_ctl)
- btrfs_wait_space_cache_v1_finished(cache, caching_ctl);
+ if (wait && caching_ctl)
+ ret = btrfs_caching_ctl_wait_done(cache, caching_ctl);
if (caching_ctl)
btrfs_put_caching_control(caching_ctl);
@@ -3313,7 +3296,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
* space back to the block group, otherwise we will leak space.
*/
if (!alloc && !btrfs_block_group_done(cache))
- btrfs_cache_block_group(cache, 1);
+ btrfs_cache_block_group(cache, true);
byte_in_group = bytenr - cache->start;
WARN_ON(byte_in_group > cache->length);
diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h
index 35e0e860cc0b..6b3cdc4cbc41 100644
--- a/fs/btrfs/block-group.h
+++ b/fs/btrfs/block-group.h
@@ -263,9 +263,7 @@ void btrfs_dec_nocow_writers(struct btrfs_block_group *bg);
void btrfs_wait_nocow_writers(struct btrfs_block_group *bg);
void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache,
u64 num_bytes);
-int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache);
-int btrfs_cache_block_group(struct btrfs_block_group *cache,
- int load_cache_only);
+int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait);
void btrfs_put_caching_control(struct btrfs_caching_control *ctl);
struct btrfs_caching_control *btrfs_get_caching_control(
struct btrfs_block_group *cache);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 3a51d0c13a95..7d3ca3ea0bce 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -494,7 +494,6 @@ struct btrfs_free_cluster {
enum btrfs_caching_type {
BTRFS_CACHE_NO,
BTRFS_CACHE_STARTED,
- BTRFS_CACHE_FAST,
BTRFS_CACHE_FINISHED,
BTRFS_CACHE_ERROR,
};
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index a7dd6ba25e99..c0f358b958ab 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -165,7 +165,7 @@ int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info)
*/
if (btrfs_find_device(fs_info->fs_devices, &args)) {
btrfs_err(fs_info,
- "replace devid present without an active replace item");
+"replace without active item, run 'device scan --forget' on the target device");
ret = -EUCLEAN;
} else {
dev_replace->srcdev = NULL;
@@ -1128,8 +1128,7 @@ int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
up_write(&dev_replace->rwsem);
/* Scrub for replace must not be running in suspended state */
- ret = btrfs_scrub_cancel(fs_info);
- ASSERT(ret != -ENOTCONN);
+ btrfs_scrub_cancel(fs_info);
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index f2c79838ebe5..ced3fc76063f 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2567,17 +2567,10 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
return -EINVAL;
/*
- * pull in the free space cache (if any) so that our pin
- * removes the free space from the cache. We have load_only set
- * to one because the slow code to read in the free extents does check
- * the pinned extents.
+ * Fully cache the free space first so that our pin removes the free space
+ * from the cache.
*/
- btrfs_cache_block_group(cache, 1);
- /*
- * Make sure we wait until the cache is completely built in case it is
- * missing or is invalid and therefore needs to be rebuilt.
- */
- ret = btrfs_wait_block_group_cache_done(cache);
+ ret = btrfs_cache_block_group(cache, true);
if (ret)
goto out;
@@ -2600,12 +2593,7 @@ static int __exclude_logged_extent(struct btrfs_fs_info *fs_info,
if (!block_group)
return -EINVAL;
- btrfs_cache_block_group(block_group, 1);
- /*
- * Make sure we wait until the cache is completely built in case it is
- * missing or is invalid and therefore needs to be rebuilt.
- */
- ret = btrfs_wait_block_group_cache_done(block_group);
+ ret = btrfs_cache_block_group(block_group, true);
if (ret)
goto out;
@@ -4415,7 +4403,7 @@ static noinline int find_free_extent(struct btrfs_root *root,
ffe_ctl->cached = btrfs_block_group_done(block_group);
if (unlikely(!ffe_ctl->cached)) {
ffe_ctl->have_caching_bg = true;
- ret = btrfs_cache_block_group(block_group, 0);
+ ret = btrfs_cache_block_group(block_group, false);
/*
* If we get ENOMEM here or something else we want to
@@ -6169,13 +6157,7 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
if (end - start >= range->minlen) {
if (!btrfs_block_group_done(cache)) {
- ret = btrfs_cache_block_group(cache, 0);
- if (ret) {
- bg_failed++;
- bg_ret = ret;
- continue;
- }
- ret = btrfs_wait_block_group_cache_done(cache);
+ ret = btrfs_cache_block_group(cache, true);
if (ret) {
bg_failed++;
bg_ret = ret;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 89c6d7ff1987..78df9b8557dd 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2483,6 +2483,7 @@ static int fill_holes(struct btrfs_trans_handle *trans,
btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
btrfs_set_file_extent_offset(leaf, fi, 0);
+ btrfs_set_file_extent_generation(leaf, fi, trans->transid);
btrfs_mark_buffer_dirty(leaf);
goto out;
}
@@ -2499,6 +2500,7 @@ static int fill_holes(struct btrfs_trans_handle *trans,
btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
btrfs_set_file_extent_offset(leaf, fi, 0);
+ btrfs_set_file_extent_generation(leaf, fi, trans->transid);
btrfs_mark_buffer_dirty(leaf);
goto out;
}
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index a64b26b16904..d647cb2938c0 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -349,9 +349,10 @@ int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
key.offset = ref_id;
again:
ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
- if (ret < 0)
+ if (ret < 0) {
+ err = ret;
goto out;
- if (ret == 0) {
+ } else if (ret == 0) {
leaf = path->nodes[0];
ref = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_root_ref);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 9cd9d06f5469..3460fd674380 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2344,8 +2344,11 @@ int btrfs_get_dev_args_from_path(struct btrfs_fs_info *fs_info,
ret = btrfs_get_bdev_and_sb(path, FMODE_READ, fs_info->bdev_holder, 0,
&bdev, &disk_super);
- if (ret)
+ if (ret) {
+ btrfs_put_dev_args_from_path(args);
return ret;
+ }
+
args->devid = btrfs_stack_device_id(&disk_super->dev_item);
memcpy(args->uuid, disk_super->dev_item.uuid, BTRFS_UUID_SIZE);
if (btrfs_fs_incompat(fs_info, METADATA_UUID))
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 7421abcf325a..5bb8d8c86311 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -371,6 +371,9 @@ static int btrfs_xattr_handler_set(const struct xattr_handler *handler,
const char *name, const void *buffer,
size_t size, int flags)
{
+ if (btrfs_root_readonly(BTRFS_I(inode)->root))
+ return -EROFS;
+
name = xattr_full_name(handler, name);
return btrfs_setxattr_trans(inode, name, buffer, size, flags);
}
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index aa4c1d403708..3898ec2632dc 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -3671,7 +3671,7 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
loff_t offset, loff_t len)
{
- struct inode *inode;
+ struct inode *inode = file_inode(file);
struct cifsFileInfo *cfile = file->private_data;
struct file_zero_data_information fsctl_buf;
long rc;
@@ -3680,14 +3680,12 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
xid = get_xid();
- inode = d_inode(cfile->dentry);
-
+ inode_lock(inode);
/* Need to make file sparse, if not already, before freeing range. */
/* Consider adding equivalent for compressed since it could also work */
if (!smb2_set_sparse(xid, tcon, cfile, inode, set_sparse)) {
rc = -EOPNOTSUPP;
- free_xid(xid);
- return rc;
+ goto out;
}
filemap_invalidate_lock(inode->i_mapping);
@@ -3707,8 +3705,10 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
true /* is_fctl */, (char *)&fsctl_buf,
sizeof(struct file_zero_data_information),
CIFSMaxBufSize, NULL, NULL);
- free_xid(xid);
filemap_invalidate_unlock(inode->i_mapping);
+out:
+ inode_unlock(inode);
+ free_xid(xid);
return rc;
}
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index c705de32e225..c7614ade875b 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -2571,19 +2571,15 @@ alloc_path_with_tree_prefix(__le16 **out_path, int *out_size, int *out_len,
path_len = UniStrnlen((wchar_t *)path, PATH_MAX);
- /*
- * make room for one path separator between the treename and
- * path
- */
- *out_len = treename_len + 1 + path_len;
+ /* make room for one path separator only if @path isn't empty */
+ *out_len = treename_len + (path[0] ? 1 : 0) + path_len;
/*
- * final path needs to be null-terminated UTF16 with a
- * size aligned to 8
+ * final path needs to be 8-byte aligned as specified in
+ * MS-SMB2 2.2.13 SMB2 CREATE Request.
*/
-
- *out_size = roundup((*out_len+1)*2, 8);
- *out_path = kzalloc(*out_size, GFP_KERNEL);
+ *out_size = roundup(*out_len * sizeof(__le16), 8);
+ *out_path = kzalloc(*out_size + sizeof(__le16) /* null */, GFP_KERNEL);
if (!*out_path)
return -ENOMEM;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 05221366a16d..08a1993ab7fd 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -134,10 +134,10 @@ static bool inode_io_list_move_locked(struct inode *inode,
static void wb_wakeup(struct bdi_writeback *wb)
{
- spin_lock_bh(&wb->work_lock);
+ spin_lock_irq(&wb->work_lock);
if (test_bit(WB_registered, &wb->state))
mod_delayed_work(bdi_wq, &wb->dwork, 0);
- spin_unlock_bh(&wb->work_lock);
+ spin_unlock_irq(&wb->work_lock);
}
static void finish_writeback_work(struct bdi_writeback *wb,
@@ -164,7 +164,7 @@ static void wb_queue_work(struct bdi_writeback *wb,
if (work->done)
atomic_inc(&work->done->cnt);
- spin_lock_bh(&wb->work_lock);
+ spin_lock_irq(&wb->work_lock);
if (test_bit(WB_registered, &wb->state)) {
list_add_tail(&work->list, &wb->work_list);
@@ -172,7 +172,7 @@ static void wb_queue_work(struct bdi_writeback *wb,
} else
finish_writeback_work(wb, work);
- spin_unlock_bh(&wb->work_lock);
+ spin_unlock_irq(&wb->work_lock);
}
/**
@@ -2082,13 +2082,13 @@ static struct wb_writeback_work *get_next_work_item(struct bdi_writeback *wb)
{
struct wb_writeback_work *work = NULL;
- spin_lock_bh(&wb->work_lock);
+ spin_lock_irq(&wb->work_lock);
if (!list_empty(&wb->work_list)) {
work = list_entry(wb->work_list.next,
struct wb_writeback_work, list);
list_del_init(&work->list);
}
- spin_unlock_bh(&wb->work_lock);
+ spin_unlock_irq(&wb->work_lock);
return work;
}
diff --git a/fs/namespace.c b/fs/namespace.c
index e6a7e769d25d..a59f8d645654 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -4238,6 +4238,13 @@ static int build_mount_idmapped(const struct mount_attr *attr, size_t usize,
err = -EPERM;
goto out_fput;
}
+
+ /* We're not controlling the target namespace. */
+ if (!ns_capable(mnt_userns, CAP_SYS_ADMIN)) {
+ err = -EPERM;
+ goto out_fput;
+ }
+
kattr->mnt_userns = get_user_ns(mnt_userns);
out_fput:
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 2d72b1b7ed74..9a0e4a89cdf1 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -221,8 +221,10 @@ nfs_file_fsync_commit(struct file *file, int datasync)
int
nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
- struct nfs_open_context *ctx = nfs_file_open_context(file);
struct inode *inode = file_inode(file);
+ struct nfs_inode *nfsi = NFS_I(inode);
+ long save_nredirtied = atomic_long_read(&nfsi->redirtied_pages);
+ long nredirtied;
int ret;
trace_nfs_fsync_enter(inode);
@@ -237,15 +239,10 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
ret = pnfs_sync_inode(inode, !!datasync);
if (ret != 0)
break;
- if (!test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags))
+ nredirtied = atomic_long_read(&nfsi->redirtied_pages);
+ if (nredirtied == save_nredirtied)
break;
- /*
- * If nfs_file_fsync_commit detected a server reboot, then
- * resend all dirty pages that might have been covered by
- * the NFS_CONTEXT_RESEND_WRITES flag
- */
- start = 0;
- end = LLONG_MAX;
+ save_nredirtied = nredirtied;
}
trace_nfs_fsync_exit(inode, ret);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index b4e46b0ffa2d..bea7c005119c 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -426,6 +426,7 @@ nfs_ilookup(struct super_block *sb, struct nfs_fattr *fattr, struct nfs_fh *fh)
static void nfs_inode_init_regular(struct nfs_inode *nfsi)
{
atomic_long_set(&nfsi->nrequests, 0);
+ atomic_long_set(&nfsi->redirtied_pages, 0);
INIT_LIST_HEAD(&nfsi->commit_info.list);
atomic_long_set(&nfsi->commit_info.ncommit, 0);
atomic_set(&nfsi->commit_info.rpcs_out, 0);
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index e88f6b18445e..9eb181287879 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -340,6 +340,11 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt,
goto out;
}
+ if (!S_ISREG(fattr->mode)) {
+ res = ERR_PTR(-EBADF);
+ goto out;
+ }
+
res = ERR_PTR(-ENOMEM);
len = strlen(SSC_READ_NAME_BODY) + 16;
read_name = kzalloc(len, GFP_KERNEL);
@@ -357,6 +362,7 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt,
r_ino->i_fop);
if (IS_ERR(filep)) {
res = ERR_CAST(filep);
+ iput(r_ino);
goto out_free_name;
}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 1c706465d090..5d7e1c206184 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1419,10 +1419,12 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr,
*/
static void nfs_redirty_request(struct nfs_page *req)
{
+ struct nfs_inode *nfsi = NFS_I(page_file_mapping(req->wb_page)->host);
+
/* Bump the transmission count */
req->wb_nio++;
nfs_mark_request_dirty(req);
- set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags);
+ atomic_long_inc(&nfsi->redirtied_pages);
nfs_end_page_writeback(req);
nfs_release_request(req);
}
@@ -1892,7 +1894,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
/* We have a mismatch. Write the page again */
dprintk_cont(" mismatch\n");
nfs_mark_request_dirty(req);
- set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags);
+ atomic_long_inc(&NFS_I(data->inode)->redirtied_pages);
next:
nfs_unlock_and_release_request(req);
/* Latency breaker */
diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c
index 1b8c89dbf668..3629049decac 100644
--- a/fs/ntfs3/xattr.c
+++ b/fs/ntfs3/xattr.c
@@ -478,8 +478,7 @@ static noinline int ntfs_set_ea(struct inode *inode, const char *name,
}
#ifdef CONFIG_NTFS3_FS_POSIX_ACL
-static struct posix_acl *ntfs_get_acl_ex(struct user_namespace *mnt_userns,
- struct inode *inode, int type,
+static struct posix_acl *ntfs_get_acl_ex(struct inode *inode, int type,
int locked)
{
struct ntfs_inode *ni = ntfs_i(inode);
@@ -514,7 +513,7 @@ static struct posix_acl *ntfs_get_acl_ex(struct user_namespace *mnt_userns,
/* Translate extended attribute to acl. */
if (err >= 0) {
- acl = posix_acl_from_xattr(mnt_userns, buf, err);
+ acl = posix_acl_from_xattr(&init_user_ns, buf, err);
} else if (err == -ENODATA) {
acl = NULL;
} else {
@@ -537,8 +536,7 @@ struct posix_acl *ntfs_get_acl(struct inode *inode, int type, bool rcu)
if (rcu)
return ERR_PTR(-ECHILD);
- /* TODO: init_user_ns? */
- return ntfs_get_acl_ex(&init_user_ns, inode, type, 0);
+ return ntfs_get_acl_ex(inode, type, 0);
}
static noinline int ntfs_set_acl_ex(struct user_namespace *mnt_userns,
@@ -590,7 +588,7 @@ static noinline int ntfs_set_acl_ex(struct user_namespace *mnt_userns,
value = kmalloc(size, GFP_NOFS);
if (!value)
return -ENOMEM;
- err = posix_acl_to_xattr(mnt_userns, acl, value, size);
+ err = posix_acl_to_xattr(&init_user_ns, acl, value, size);
if (err < 0)
goto out;
flags = 0;
@@ -641,7 +639,7 @@ static int ntfs_xattr_get_acl(struct user_namespace *mnt_userns,
if (!acl)
return -ENODATA;
- err = posix_acl_to_xattr(mnt_userns, acl, buffer, size);
+ err = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
posix_acl_release(acl);
return err;
@@ -665,12 +663,12 @@ static int ntfs_xattr_set_acl(struct user_namespace *mnt_userns,
if (!value) {
acl = NULL;
} else {
- acl = posix_acl_from_xattr(mnt_userns, value, size);
+ acl = posix_acl_from_xattr(&init_user_ns, value, size);
if (IS_ERR(acl))
return PTR_ERR(acl);
if (acl) {
- err = posix_acl_valid(mnt_userns, acl);
+ err = posix_acl_valid(&init_user_ns, acl);
if (err)
goto release_and_out;
}
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 801e60bab955..c28bc983a7b1 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -3403,10 +3403,12 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb,
ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres);
ocfs2_lock_res_free(&osb->osb_orphan_scan.os_lockres);
- ocfs2_cluster_disconnect(osb->cconn, hangup_pending);
- osb->cconn = NULL;
+ if (osb->cconn) {
+ ocfs2_cluster_disconnect(osb->cconn, hangup_pending);
+ osb->cconn = NULL;
- ocfs2_dlm_shutdown_debug(osb);
+ ocfs2_dlm_shutdown_debug(osb);
+ }
}
static int ocfs2_drop_lock(struct ocfs2_super *osb,
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 438be028935d..bc18c27e9683 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1914,8 +1914,7 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
!ocfs2_is_hard_readonly(osb))
hangup_needed = 1;
- if (osb->cconn)
- ocfs2_dlm_shutdown(osb, hangup_needed);
+ ocfs2_dlm_shutdown(osb, hangup_needed);
ocfs2_blockcheck_stats_debugfs_remove(&osb->osb_ecc_stats);
debugfs_remove_recursive(osb->osb_debug_root);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 2d04e3470d4c..313788bc0c30 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -525,10 +525,12 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
struct vm_area_struct *vma = walk->vma;
bool locked = !!(vma->vm_flags & VM_LOCKED);
struct page *page = NULL;
- bool migration = false;
+ bool migration = false, young = false, dirty = false;
if (pte_present(*pte)) {
page = vm_normal_page(vma, addr, *pte);
+ young = pte_young(*pte);
+ dirty = pte_dirty(*pte);
} else if (is_swap_pte(*pte)) {
swp_entry_t swpent = pte_to_swp_entry(*pte);
@@ -558,8 +560,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
if (!page)
return;
- smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte),
- locked, migration);
+ smaps_account(mss, page, false, young, dirty, locked, migration);
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index de86f5b2859f..ab0576d372d6 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -1601,6 +1601,10 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
wake_userfault(vma->vm_userfaultfd_ctx.ctx, &range);
}
+ /* Reset ptes for the whole vma range if wr-protected */
+ if (userfaultfd_wp(vma))
+ uffd_wp_range(mm, vma, start, vma_end - start, false);
+
new_flags = vma->vm_flags & ~__VM_UFFD_FLAGS;
prev = vma_merge(mm, prev, start, vma_end, new_flags,
vma->anon_vma, vma->vm_file, vma->vm_pgoff,
diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
index d0f7bdd2fdf2..db13bb620f52 100644
--- a/include/asm-generic/sections.h
+++ b/include/asm-generic/sections.h
@@ -97,7 +97,7 @@ static inline bool memory_contains(void *begin, void *end, void *virt,
/**
* memory_intersects - checks if the region occupied by an object intersects
* with another memory region
- * @begin: virtual address of the beginning of the memory regien
+ * @begin: virtual address of the beginning of the memory region
* @end: virtual address of the end of the memory region
* @virt: virtual address of the memory object
* @size: size of the memory object
@@ -110,7 +110,10 @@ static inline bool memory_intersects(void *begin, void *end, void *virt,
{
void *vend = virt + size;
- return (virt >= begin && virt < end) || (vend >= begin && vend < end);
+ if (virt < end && vend > begin)
+ return true;
+
+ return false;
}
/**
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 9ecead1042b9..c322b98260f5 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -978,19 +978,30 @@ static inline void mod_memcg_page_state(struct page *page,
static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
{
- return READ_ONCE(memcg->vmstats.state[idx]);
+ long x = READ_ONCE(memcg->vmstats.state[idx]);
+#ifdef CONFIG_SMP
+ if (x < 0)
+ x = 0;
+#endif
+ return x;
}
static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
enum node_stat_item idx)
{
struct mem_cgroup_per_node *pn;
+ long x;
if (mem_cgroup_disabled())
return node_page_state(lruvec_pgdat(lruvec), idx);
pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
- return READ_ONCE(pn->lruvec_stats.state[idx]);
+ x = READ_ONCE(pn->lruvec_stats.state[idx]);
+#ifdef CONFIG_SMP
+ if (x < 0)
+ x = 0;
+#endif
+ return x;
}
static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 5040cd774c5a..b0b4ac92354a 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -773,6 +773,7 @@ struct mlx5_core_dev {
enum mlx5_device_state state;
/* sync interface state */
struct mutex intf_state_mutex;
+ struct lock_class_key lock_key;
unsigned long intf_state;
struct mlx5_priv priv;
struct mlx5_profile profile;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7898e29bcfb5..25b8860f47cc 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2939,7 +2939,6 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
#define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */
#define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */
#define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */
-#define FOLL_COW 0x4000 /* internal GUP flag */
#define FOLL_ANON 0x8000 /* don't do file mappings */
#define FOLL_LONGTERM 0x10000 /* mapping lifetime is indefinite: see below */
#define FOLL_SPLIT_PMD 0x20000 /* split huge pmd before returning */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2563d30736e9..db40bc62213b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -640,9 +640,23 @@ extern int sysctl_devconf_inherit_init_net;
*/
static inline bool net_has_fallback_tunnels(const struct net *net)
{
- return !IS_ENABLED(CONFIG_SYSCTL) ||
- !sysctl_fb_tunnels_only_for_init_net ||
- (net == &init_net && sysctl_fb_tunnels_only_for_init_net == 1);
+#if IS_ENABLED(CONFIG_SYSCTL)
+ int fb_tunnels_only_for_init_net = READ_ONCE(sysctl_fb_tunnels_only_for_init_net);
+
+ return !fb_tunnels_only_for_init_net ||
+ (net_eq(net, &init_net) && fb_tunnels_only_for_init_net == 1);
+#else
+ return true;
+#endif
+}
+
+static inline int net_inherit_devconf(void)
+{
+#if IS_ENABLED(CONFIG_SYSCTL)
+ return READ_ONCE(sysctl_devconf_inherit_init_net);
+#else
+ return 0;
+#endif
}
static inline int netdev_queue_numa_node_read(const struct netdev_queue *q)
diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index a13296d6c7ce..fd533552a062 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -94,10 +94,6 @@ struct ebt_table {
struct ebt_replace_kernel *table;
unsigned int valid_hooks;
rwlock_t lock;
- /* e.g. could be the table explicitly only allows certain
- * matches, targets, ... 0 == let it in */
- int (*check)(const struct ebt_table_info *info,
- unsigned int valid_hooks);
/* the data used by the kernel */
struct ebt_table_info *private;
struct nf_hook_ops *ops;
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index a17c337dbdf1..b1f83697699e 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -182,6 +182,7 @@ struct nfs_inode {
/* Regular file */
struct {
atomic_long_t nrequests;
+ atomic_long_t redirtied_pages;
struct nfs_mds_commit_info commit_info;
struct mutex commit_mutex;
};
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index 732b522bacb7..e1b8a915e9e9 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -73,6 +73,8 @@ extern ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long dst_start,
extern int mwriteprotect_range(struct mm_struct *dst_mm,
unsigned long start, unsigned long len,
bool enable_wp, atomic_t *mmap_changing);
+extern void uffd_wp_range(struct mm_struct *dst_mm, struct vm_area_struct *vma,
+ unsigned long start, unsigned long len, bool enable_wp);
/* mm helpers */
static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma,
diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index c4898fcbf923..f90f0021f5f2 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -33,7 +33,7 @@ extern unsigned int sysctl_net_busy_poll __read_mostly;
static inline bool net_busy_loop_on(void)
{
- return sysctl_net_busy_poll;
+ return READ_ONCE(sysctl_net_busy_poll);
}
static inline bool sk_can_busy_loop(const struct sock *sk)
diff --git a/include/net/gro.h b/include/net/gro.h
index 867656b0739c..24003dea8fa4 100644
--- a/include/net/gro.h
+++ b/include/net/gro.h
@@ -439,7 +439,7 @@ static inline void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb,
{
list_add_tail(&skb->list, &napi->rx_list);
napi->rx_count += segs;
- if (napi->rx_count >= gro_normal_batch)
+ if (napi->rx_count >= READ_ONCE(gro_normal_batch))
gro_normal_list(napi);
}
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index 64daafd1fc41..9c93e4981b68 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -270,6 +270,7 @@ void flow_offload_refresh(struct nf_flowtable *flow_table,
struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table,
struct flow_offload_tuple *tuple);
+void nf_flow_table_gc_run(struct nf_flowtable *flow_table);
void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable,
struct net_device *dev);
void nf_flow_table_cleanup(struct net_device *dev);
@@ -306,6 +307,8 @@ void nf_flow_offload_stats(struct nf_flowtable *flowtable,
struct flow_offload *flow);
void nf_flow_table_offload_flush(struct nf_flowtable *flowtable);
+void nf_flow_table_offload_flush_cleanup(struct nf_flowtable *flowtable);
+
int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
struct net_device *dev,
enum flow_block_command cmd);
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index b8890ace0f87..0daad6e63ccb 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1635,6 +1635,7 @@ struct nftables_pernet {
struct list_head module_list;
struct list_head notify_list;
struct mutex commit_mutex;
+ u64 table_handle;
unsigned int base_seq;
u8 validate_state;
};
diff --git a/include/ufs/ufshci.h b/include/ufs/ufshci.h
index f81aa95ffbc4..f525566a0864 100644
--- a/include/ufs/ufshci.h
+++ b/include/ufs/ufshci.h
@@ -135,11 +135,7 @@ static inline u32 ufshci_version(u32 major, u32 minor)
#define UFSHCD_UIC_MASK (UIC_COMMAND_COMPL | UFSHCD_UIC_PWR_MASK)
-#define UFSHCD_ERROR_MASK (UIC_ERROR |\
- DEVICE_FATAL_ERROR |\
- CONTROLLER_FATAL_ERROR |\
- SYSTEM_BUS_FATAL_ERROR |\
- CRYPTO_ENGINE_FATAL_ERROR)
+#define UFSHCD_ERROR_MASK (UIC_ERROR | INT_FATAL_ERRORS)
#define INT_FATAL_ERRORS (DEVICE_FATAL_ERROR |\
CONTROLLER_FATAL_ERROR |\
diff --git a/init/main.c b/init/main.c
index 91642a4e69be..1fe7942f5d4a 100644
--- a/init/main.c
+++ b/init/main.c
@@ -1446,13 +1446,25 @@ static noinline void __init kernel_init_freeable(void);
#if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_STRICT_MODULE_RWX)
bool rodata_enabled __ro_after_init = true;
+
+#ifndef arch_parse_debug_rodata
+static inline bool arch_parse_debug_rodata(char *str) { return false; }
+#endif
+
static int __init set_debug_rodata(char *str)
{
- if (strtobool(str, &rodata_enabled))
+ if (arch_parse_debug_rodata(str))
+ return 0;
+
+ if (str && !strcmp(str, "on"))
+ rodata_enabled = true;
+ else if (str && !strcmp(str, "off"))
+ rodata_enabled = false;
+ else
pr_warn("Invalid option string for rodata: '%s'\n", str);
- return 1;
+ return 0;
}
-__setup("rodata=", set_debug_rodata);
+early_param("rodata", set_debug_rodata);
#endif
#ifdef CONFIG_STRICT_KERNEL_RWX
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 6a67dbf5195f..cd155b7e1346 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -4331,7 +4331,12 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
copy_iov:
iov_iter_restore(&s->iter, &s->iter_state);
ret = io_setup_async_rw(req, iovec, s, false);
- return ret ?: -EAGAIN;
+ if (!ret) {
+ if (kiocb->ki_flags & IOCB_WRITE)
+ kiocb_end_write(req);
+ return -EAGAIN;
+ }
+ return ret;
}
out_free:
/* it's reportedly faster than delegating the null check to kfree() */
diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c
index 6432a37ac1c9..c565fbf66ac8 100644
--- a/kernel/audit_fsnotify.c
+++ b/kernel/audit_fsnotify.c
@@ -102,6 +102,7 @@ struct audit_fsnotify_mark *audit_alloc_mark(struct audit_krule *krule, char *pa
ret = fsnotify_add_inode_mark(&audit_mark->mark, inode, 0);
if (ret < 0) {
+ audit_mark->path = NULL;
fsnotify_put_mark(&audit_mark->mark);
audit_mark = ERR_PTR(ret);
}
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 3a8c9d744800..0c33e04c293a 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1965,6 +1965,7 @@ void __audit_uring_exit(int success, long code)
goto out;
}
+ audit_return_fixup(ctx, success, code);
if (ctx->context == AUDIT_CTX_SYSCALL) {
/*
* NOTE: See the note in __audit_uring_entry() about the case
@@ -2006,7 +2007,6 @@ void __audit_uring_exit(int success, long code)
audit_filter_inodes(current, ctx);
if (ctx->current_state != AUDIT_STATE_RECORD)
goto out;
- audit_return_fixup(ctx, success, code);
audit_log_exit();
out:
@@ -2090,13 +2090,13 @@ void __audit_syscall_exit(int success, long return_code)
if (!list_empty(&context->killed_trees))
audit_kill_trees(context);
+ audit_return_fixup(context, success, return_code);
/* run through both filters to ensure we set the filterkey properly */
audit_filter_syscall(current, context);
audit_filter_inodes(current, context);
if (context->current_state < AUDIT_STATE_RECORD)
goto out;
- audit_return_fixup(context, success, return_code);
audit_log_exit();
out:
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index e91d2faef160..0e45d405f151 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6999,8 +6999,7 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
struct bpf_reg_state *regs = cur_regs(env), *reg;
struct bpf_map *map = meta->map_ptr;
- struct tnum range;
- u64 val;
+ u64 val, max;
int err;
if (func_id != BPF_FUNC_tail_call)
@@ -7010,10 +7009,11 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
return -EINVAL;
}
- range = tnum_range(0, map->max_entries - 1);
reg = ®s[BPF_REG_3];
+ val = reg->var_off.value;
+ max = map->max_entries;
- if (!register_is_const(reg) || !tnum_in(range, reg->var_off)) {
+ if (!(register_is_const(reg) && val < max)) {
bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
return 0;
}
@@ -7021,8 +7021,6 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
err = mark_chain_precision(env, BPF_REG_3);
if (err)
return err;
-
- val = reg->var_off.value;
if (bpf_map_key_unseen(aux))
bpf_map_key_store(aux, val);
else if (!bpf_map_key_poisoned(aux) &&
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 13c8e91d7862..ce95aee05e8a 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -1811,6 +1811,7 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
if (ss->css_rstat_flush) {
list_del_rcu(&css->rstat_css_node);
+ synchronize_rcu();
list_add_rcu(&css->rstat_css_node,
&dcgrp->rstat_css_list);
}
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 80697e5e03e4..08350e35aba2 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1707,11 +1707,12 @@ static struct kprobe *__disable_kprobe(struct kprobe *p)
/* Try to disarm and disable this/parent probe */
if (p == orig_p || aggr_kprobe_disabled(orig_p)) {
/*
- * If 'kprobes_all_disarmed' is set, 'orig_p'
- * should have already been disarmed, so
- * skip unneed disarming process.
+ * Don't be lazy here. Even if 'kprobes_all_disarmed'
+ * is false, 'orig_p' might not have been armed yet.
+ * Note arm_all_kprobes() __tries__ to arm all kprobes
+ * on the best effort basis.
*/
- if (!kprobes_all_disarmed) {
+ if (!kprobes_all_disarmed && !kprobe_disabled(orig_p)) {
ret = disarm_kprobe(orig_p, true);
if (ret) {
p->flags &= ~KPROBE_FLAG_DISABLED;
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index a492f159624f..860b2dcf3ac4 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -277,6 +277,7 @@ COND_SYSCALL(landlock_restrict_self);
/* mm/fadvise.c */
COND_SYSCALL(fadvise64_64);
+COND_SYSCALL_COMPAT(fadvise64_64);
/* mm/, CONFIG_MMU only */
COND_SYSCALL(swapon);
diff --git a/lib/ratelimit.c b/lib/ratelimit.c
index e01a93f46f83..ce945c17980b 100644
--- a/lib/ratelimit.c
+++ b/lib/ratelimit.c
@@ -26,10 +26,16 @@
*/
int ___ratelimit(struct ratelimit_state *rs, const char *func)
{
+ /* Paired with WRITE_ONCE() in .proc_handler().
+ * Changing two values seperately could be inconsistent
+ * and some message could be lost. (See: net_ratelimit_state).
+ */
+ int interval = READ_ONCE(rs->interval);
+ int burst = READ_ONCE(rs->burst);
unsigned long flags;
int ret;
- if (!rs->interval)
+ if (!interval)
return 1;
/*
@@ -44,7 +50,7 @@ int ___ratelimit(struct ratelimit_state *rs, const char *func)
if (!rs->begin)
rs->begin = jiffies;
- if (time_is_before_jiffies(rs->begin + rs->interval)) {
+ if (time_is_before_jiffies(rs->begin + interval)) {
if (rs->missed) {
if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) {
printk_deferred(KERN_WARNING
@@ -56,7 +62,7 @@ int ___ratelimit(struct ratelimit_state *rs, const char *func)
rs->begin = jiffies;
rs->printed = 0;
}
- if (rs->burst && rs->burst > rs->printed) {
+ if (burst && burst > rs->printed) {
rs->printed++;
ret = 1;
} else {
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 95550b8fa7fe..de65cb1e5f76 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -260,10 +260,10 @@ void wb_wakeup_delayed(struct bdi_writeback *wb)
unsigned long timeout;
timeout = msecs_to_jiffies(dirty_writeback_interval * 10);
- spin_lock_bh(&wb->work_lock);
+ spin_lock_irq(&wb->work_lock);
if (test_bit(WB_registered, &wb->state))
queue_delayed_work(bdi_wq, &wb->dwork, timeout);
- spin_unlock_bh(&wb->work_lock);
+ spin_unlock_irq(&wb->work_lock);
}
static void wb_update_bandwidth_workfn(struct work_struct *work)
@@ -334,12 +334,12 @@ static void cgwb_remove_from_bdi_list(struct bdi_writeback *wb);
static void wb_shutdown(struct bdi_writeback *wb)
{
/* Make sure nobody queues further work */
- spin_lock_bh(&wb->work_lock);
+ spin_lock_irq(&wb->work_lock);
if (!test_and_clear_bit(WB_registered, &wb->state)) {
- spin_unlock_bh(&wb->work_lock);
+ spin_unlock_irq(&wb->work_lock);
return;
}
- spin_unlock_bh(&wb->work_lock);
+ spin_unlock_irq(&wb->work_lock);
cgwb_remove_from_bdi_list(wb);
/*
diff --git a/mm/bootmem_info.c b/mm/bootmem_info.c
index f18a631e7479..b1efebfcf94b 100644
--- a/mm/bootmem_info.c
+++ b/mm/bootmem_info.c
@@ -12,6 +12,7 @@
#include <linux/memblock.h>
#include <linux/bootmem_info.h>
#include <linux/memory_hotplug.h>
+#include <linux/kmemleak.h>
void get_page_bootmem(unsigned long info, struct page *page, unsigned long type)
{
@@ -33,6 +34,7 @@ void put_page_bootmem(struct page *page)
ClearPagePrivate(page);
set_page_private(page, 0);
INIT_LIST_HEAD(&page->lru);
+ kmemleak_free_part(page_to_virt(page), PAGE_SIZE);
free_reserved_page(page);
}
}
diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c
index a0dab8b5e45f..53ba8b1e619c 100644
--- a/mm/damon/dbgfs.c
+++ b/mm/damon/dbgfs.c
@@ -787,6 +787,9 @@ static int dbgfs_mk_context(char *name)
return -ENOENT;
new_dir = debugfs_create_dir(name, root);
+ /* Below check is required for a potential duplicated name case */
+ if (IS_ERR(new_dir))
+ return PTR_ERR(new_dir);
dbgfs_dirs[dbgfs_nr_ctxs] = new_dir;
new_ctx = dbgfs_new_ctx();
diff --git a/mm/gup.c b/mm/gup.c
index fd3262ae92fc..38effce68b48 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -478,14 +478,43 @@ static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address,
return -EEXIST;
}
-/*
- * FOLL_FORCE can write to even unwritable pte's, but only
- * after we've gone through a COW cycle and they are dirty.
- */
-static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
+/* FOLL_FORCE can write to even unwritable PTEs in COW mappings. */
+static inline bool can_follow_write_pte(pte_t pte, struct page *page,
+ struct vm_area_struct *vma,
+ unsigned int flags)
{
- return pte_write(pte) ||
- ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte));
+ /* If the pte is writable, we can write to the page. */
+ if (pte_write(pte))
+ return true;
+
+ /* Maybe FOLL_FORCE is set to override it? */
+ if (!(flags & FOLL_FORCE))
+ return false;
+
+ /* But FOLL_FORCE has no effect on shared mappings */
+ if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
+ return false;
+
+ /* ... or read-only private ones */
+ if (!(vma->vm_flags & VM_MAYWRITE))
+ return false;
+
+ /* ... or already writable ones that just need to take a write fault */
+ if (vma->vm_flags & VM_WRITE)
+ return false;
+
+ /*
+ * See can_change_pte_writable(): we broke COW and could map the page
+ * writable if we have an exclusive anonymous page ...
+ */
+ if (!page || !PageAnon(page) || !PageAnonExclusive(page))
+ return false;
+
+ /* ... and a write-fault isn't required for other reasons. */
+ if (IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) &&
+ !(vma->vm_flags & VM_SOFTDIRTY) && !pte_soft_dirty(pte))
+ return false;
+ return !userfaultfd_pte_wp(vma, pte);
}
static struct page *follow_page_pte(struct vm_area_struct *vma,
@@ -528,12 +557,19 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
}
if ((flags & FOLL_NUMA) && pte_protnone(pte))
goto no_page;
- if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, flags)) {
- pte_unmap_unlock(ptep, ptl);
- return NULL;
- }
page = vm_normal_page(vma, address, pte);
+
+ /*
+ * We only care about anon pages in can_follow_write_pte() and don't
+ * have to worry about pte_devmap() because they are never anon.
+ */
+ if ((flags & FOLL_WRITE) &&
+ !can_follow_write_pte(pte, page, vma, flags)) {
+ page = NULL;
+ goto out;
+ }
+
if (!page && pte_devmap(pte) && (flags & (FOLL_GET | FOLL_PIN))) {
/*
* Only return device mapping pages in the FOLL_GET or FOLL_PIN
@@ -967,17 +1003,6 @@ static int faultin_page(struct vm_area_struct *vma,
return -EBUSY;
}
- /*
- * The VM_FAULT_WRITE bit tells us that do_wp_page has broken COW when
- * necessary, even if maybe_mkwrite decided not to set pte_write. We
- * can thus safely do subsequent page lookups as if they were reads.
- * But only do so when looping for pte_write is futile: in some cases
- * userspace may also be wanting to write to the gotten user page,
- * which a read fault here might prevent (a readonly page might get
- * reCOWed by userspace write).
- */
- if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE))
- *flags |= FOLL_COW;
return 0;
}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 15965084816d..0f465e70349c 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -978,12 +978,6 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
assert_spin_locked(pmd_lockptr(mm, pmd));
- /*
- * When we COW a devmap PMD entry, we split it into PTEs, so we should
- * not be in this function with `flags & FOLL_COW` set.
- */
- WARN_ONCE(flags & FOLL_COW, "mm: In follow_devmap_pmd with FOLL_COW set");
-
/* FOLL_GET and FOLL_PIN are mutually exclusive. */
if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) ==
(FOLL_PIN | FOLL_GET)))
@@ -1349,14 +1343,43 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf)
return VM_FAULT_FALLBACK;
}
-/*
- * FOLL_FORCE can write to even unwritable pmd's, but only
- * after we've gone through a COW cycle and they are dirty.
- */
-static inline bool can_follow_write_pmd(pmd_t pmd, unsigned int flags)
+/* FOLL_FORCE can write to even unwritable PMDs in COW mappings. */
+static inline bool can_follow_write_pmd(pmd_t pmd, struct page *page,
+ struct vm_area_struct *vma,
+ unsigned int flags)
{
- return pmd_write(pmd) ||
- ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pmd_dirty(pmd));
+ /* If the pmd is writable, we can write to the page. */
+ if (pmd_write(pmd))
+ return true;
+
+ /* Maybe FOLL_FORCE is set to override it? */
+ if (!(flags & FOLL_FORCE))
+ return false;
+
+ /* But FOLL_FORCE has no effect on shared mappings */
+ if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
+ return false;
+
+ /* ... or read-only private ones */
+ if (!(vma->vm_flags & VM_MAYWRITE))
+ return false;
+
+ /* ... or already writable ones that just need to take a write fault */
+ if (vma->vm_flags & VM_WRITE)
+ return false;
+
+ /*
+ * See can_change_pte_writable(): we broke COW and could map the page
+ * writable if we have an exclusive anonymous page ...
+ */
+ if (!page || !PageAnon(page) || !PageAnonExclusive(page))
+ return false;
+
+ /* ... and a write-fault isn't required for other reasons. */
+ if (IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) &&
+ !(vma->vm_flags & VM_SOFTDIRTY) && !pmd_soft_dirty(pmd))
+ return false;
+ return !userfaultfd_huge_pmd_wp(vma, pmd);
}
struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
@@ -1365,12 +1388,16 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
unsigned int flags)
{
struct mm_struct *mm = vma->vm_mm;
- struct page *page = NULL;
+ struct page *page;
assert_spin_locked(pmd_lockptr(mm, pmd));
- if (flags & FOLL_WRITE && !can_follow_write_pmd(*pmd, flags))
- goto out;
+ page = pmd_page(*pmd);
+ VM_BUG_ON_PAGE(!PageHead(page) && !is_zone_device_page(page), page);
+
+ if ((flags & FOLL_WRITE) &&
+ !can_follow_write_pmd(*pmd, page, vma, flags))
+ return NULL;
/* Avoid dumping huge zero page */
if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd))
@@ -1378,10 +1405,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
/* Full NUMA hinting faults to serialise migration in fault paths */
if ((flags & FOLL_NUMA) && pmd_protnone(*pmd))
- goto out;
-
- page = pmd_page(*pmd);
- VM_BUG_ON_PAGE(!PageHead(page) && !is_zone_device_page(page), page);
+ return NULL;
if (!pmd_write(*pmd) && gup_must_unshare(flags, page))
return ERR_PTR(-EMLINK);
@@ -1398,7 +1422,6 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT;
VM_BUG_ON_PAGE(!PageCompound(page) && !is_zone_device_page(page), page);
-out:
return page;
}
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 474bfbe9929e..299dcfaa35b2 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5232,6 +5232,21 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
VM_BUG_ON(unshare && (flags & FOLL_WRITE));
VM_BUG_ON(!unshare && !(flags & FOLL_WRITE));
+ /*
+ * hugetlb does not support FOLL_FORCE-style write faults that keep the
+ * PTE mapped R/O such as maybe_mkwrite() would do.
+ */
+ if (WARN_ON_ONCE(!unshare && !(vma->vm_flags & VM_WRITE)))
+ return VM_FAULT_SIGSEGV;
+
+ /* Let's take out MAP_SHARED mappings first. */
+ if (vma->vm_flags & VM_MAYSHARE) {
+ if (unlikely(unshare))
+ return 0;
+ set_huge_ptep_writable(vma, haddr, ptep);
+ return 0;
+ }
+
pte = huge_ptep_get(ptep);
old_page = pte_page(pte);
@@ -5766,12 +5781,11 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
* If we are going to COW/unshare the mapping later, we examine the
* pending reservations for this page now. This will ensure that any
* allocations necessary to record that reservation occur outside the
- * spinlock. For private mappings, we also lookup the pagecache
- * page now as it is used to determine if a reservation has been
- * consumed.
+ * spinlock. Also lookup the pagecache page now as it is used to
+ * determine if a reservation has been consumed.
*/
if ((flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) &&
- !huge_pte_write(entry)) {
+ !(vma->vm_flags & VM_MAYSHARE) && !huge_pte_write(entry)) {
if (vma_needs_reservation(h, vma, haddr) < 0) {
ret = VM_FAULT_OOM;
goto out_mutex;
@@ -5779,9 +5793,7 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
/* Just decrements count, does not deallocate */
vma_end_reservation(h, vma, haddr);
- if (!(vma->vm_flags & VM_MAYSHARE))
- pagecache_page = hugetlbfs_pagecache_page(h,
- vma, haddr);
+ pagecache_page = hugetlbfs_pagecache_page(h, vma, haddr);
}
ptl = huge_pte_lock(h, mm, ptep);
@@ -6014,7 +6026,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
if (!huge_pte_none_mostly(huge_ptep_get(dst_pte)))
goto out_release_unlock;
- if (vm_shared) {
+ if (page_in_pagecache) {
page_dup_file_rmap(page, true);
} else {
ClearHPageRestoreReserve(page);
diff --git a/mm/mmap.c b/mm/mmap.c
index 7c59ec73acc3..3b284b091bb7 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1693,8 +1693,12 @@ int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot)
pgprot_val(vm_pgprot_modify(vm_page_prot, vm_flags)))
return 0;
- /* Do we need to track softdirty? */
- if (IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) && !(vm_flags & VM_SOFTDIRTY))
+ /*
+ * Do we need to track softdirty? hugetlb does not support softdirty
+ * tracking yet.
+ */
+ if (IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) && !(vm_flags & VM_SOFTDIRTY) &&
+ !is_vm_hugetlb_page(vma))
return 1;
/* Specialty mapping? */
diff --git a/mm/mprotect.c b/mm/mprotect.c
index ba5592655ee3..0d38d5b63762 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -158,10 +158,11 @@ static unsigned long change_pte_range(struct mmu_gather *tlb,
pages++;
} else if (is_swap_pte(oldpte)) {
swp_entry_t entry = pte_to_swp_entry(oldpte);
- struct page *page = pfn_swap_entry_to_page(entry);
pte_t newpte;
if (is_writable_migration_entry(entry)) {
+ struct page *page = pfn_swap_entry_to_page(entry);
+
/*
* A protection check is difficult so
* just be safe and disable write
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 55c2776ae699..3c34db15cf70 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2867,6 +2867,7 @@ static void wb_inode_writeback_start(struct bdi_writeback *wb)
static void wb_inode_writeback_end(struct bdi_writeback *wb)
{
+ unsigned long flags;
atomic_dec(&wb->writeback_inodes);
/*
* Make sure estimate of writeback throughput gets updated after
@@ -2875,7 +2876,10 @@ static void wb_inode_writeback_end(struct bdi_writeback *wb)
* that if multiple inodes end writeback at a similar time, they get
* batched into one bandwidth update.
*/
- queue_delayed_work(bdi_wq, &wb->bw_dwork, BANDWIDTH_INTERVAL);
+ spin_lock_irqsave(&wb->work_lock, flags);
+ if (test_bit(WB_registered, &wb->state))
+ queue_delayed_work(bdi_wq, &wb->bw_dwork, BANDWIDTH_INTERVAL);
+ spin_unlock_irqrestore(&wb->work_lock, flags);
}
bool __folio_end_writeback(struct folio *folio)
diff --git a/mm/shmem.c b/mm/shmem.c
index b7f2d4a56867..f152375e770b 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1771,6 +1771,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
if (shmem_should_replace_folio(folio, gfp)) {
error = shmem_replace_page(&page, gfp, info, index);
+ folio = page_folio(page);
if (error)
goto failed;
}
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 07d3befc80e4..7327b2573f7c 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -703,14 +703,29 @@ ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long start,
mmap_changing, 0);
}
+void uffd_wp_range(struct mm_struct *dst_mm, struct vm_area_struct *dst_vma,
+ unsigned long start, unsigned long len, bool enable_wp)
+{
+ struct mmu_gather tlb;
+ pgprot_t newprot;
+
+ if (enable_wp)
+ newprot = vm_get_page_prot(dst_vma->vm_flags & ~(VM_WRITE));
+ else
+ newprot = vm_get_page_prot(dst_vma->vm_flags);
+
+ tlb_gather_mmu(&tlb, dst_mm);
+ change_protection(&tlb, dst_vma, start, start + len, newprot,
+ enable_wp ? MM_CP_UFFD_WP : MM_CP_UFFD_WP_RESOLVE);
+ tlb_finish_mmu(&tlb);
+}
+
int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
unsigned long len, bool enable_wp,
atomic_t *mmap_changing)
{
struct vm_area_struct *dst_vma;
unsigned long page_mask;
- struct mmu_gather tlb;
- pgprot_t newprot;
int err;
/*
@@ -750,15 +765,7 @@ int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
goto out_unlock;
}
- if (enable_wp)
- newprot = vm_get_page_prot(dst_vma->vm_flags & ~(VM_WRITE));
- else
- newprot = vm_get_page_prot(dst_vma->vm_flags);
-
- tlb_gather_mmu(&tlb, dst_mm);
- change_protection(&tlb, dst_vma, start, start + len, newprot,
- enable_wp ? MM_CP_UFFD_WP : MM_CP_UFFD_WP_RESOLVE);
- tlb_finish_mmu(&tlb);
+ uffd_wp_range(dst_mm, dst_vma, start, len, enable_wp);
err = 0;
out_unlock:
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index 1a11064f9990..8f19253024b0 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -36,18 +36,10 @@ static struct ebt_replace_kernel initial_table = {
.entries = (char *)&initial_chain,
};
-static int check(const struct ebt_table_info *info, unsigned int valid_hooks)
-{
- if (valid_hooks & ~(1 << NF_BR_BROUTING))
- return -EINVAL;
- return 0;
-}
-
static const struct ebt_table broute_table = {
.name = "broute",
.table = &initial_table,
.valid_hooks = 1 << NF_BR_BROUTING,
- .check = check,
.me = THIS_MODULE,
};
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index cb949436bc0e..278f324e6752 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -43,18 +43,10 @@ static struct ebt_replace_kernel initial_table = {
.entries = (char *)initial_chains,
};
-static int check(const struct ebt_table_info *info, unsigned int valid_hooks)
-{
- if (valid_hooks & ~FILTER_VALID_HOOKS)
- return -EINVAL;
- return 0;
-}
-
static const struct ebt_table frame_filter = {
.name = "filter",
.table = &initial_table,
.valid_hooks = FILTER_VALID_HOOKS,
- .check = check,
.me = THIS_MODULE,
};
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 5ee0531ae506..9066f7f376d5 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -43,18 +43,10 @@ static struct ebt_replace_kernel initial_table = {
.entries = (char *)initial_chains,
};
-static int check(const struct ebt_table_info *info, unsigned int valid_hooks)
-{
- if (valid_hooks & ~NAT_VALID_HOOKS)
- return -EINVAL;
- return 0;
-}
-
static const struct ebt_table frame_nat = {
.name = "nat",
.table = &initial_table,
.valid_hooks = NAT_VALID_HOOKS,
- .check = check,
.me = THIS_MODULE,
};
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index f2dbefb61ce8..9a0ae59cdc50 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1040,8 +1040,7 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl,
goto free_iterate;
}
- /* the table doesn't like it */
- if (t->check && (ret = t->check(newinfo, repl->valid_hooks)))
+ if (repl->valid_hooks != t->valid_hooks)
goto free_unlock;
if (repl->num_counters && repl->num_counters != t->private->nentries) {
@@ -1231,11 +1230,6 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table,
if (ret != 0)
goto free_chainstack;
- if (table->check && table->check(newinfo, table->valid_hooks)) {
- ret = -EINVAL;
- goto free_chainstack;
- }
-
table->private = newinfo;
rwlock_init(&table->lock);
mutex_lock(&ebt_mutex);
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index 1b7f385643b4..94374d529ea4 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -310,11 +310,12 @@ BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk)
static int bpf_sk_storage_charge(struct bpf_local_storage_map *smap,
void *owner, u32 size)
{
+ int optmem_max = READ_ONCE(sysctl_optmem_max);
struct sock *sk = (struct sock *)owner;
/* same check as in sock_kmalloc() */
- if (size <= sysctl_optmem_max &&
- atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
+ if (size <= optmem_max &&
+ atomic_read(&sk->sk_omem_alloc) + size < optmem_max) {
atomic_add(size, &sk->sk_omem_alloc);
return 0;
}
diff --git a/net/core/dev.c b/net/core/dev.c
index 30a1603a7225..a77a979a4bf7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4623,7 +4623,7 @@ static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)
struct softnet_data *sd;
unsigned int old_flow, new_flow;
- if (qlen < (netdev_max_backlog >> 1))
+ if (qlen < (READ_ONCE(netdev_max_backlog) >> 1))
return false;
sd = this_cpu_ptr(&softnet_data);
@@ -4671,7 +4671,7 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
if (!netif_running(skb->dev))
goto drop;
qlen = skb_queue_len(&sd->input_pkt_queue);
- if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) {
+ if (qlen <= READ_ONCE(netdev_max_backlog) && !skb_flow_limit(skb, qlen)) {
if (qlen) {
enqueue:
__skb_queue_tail(&sd->input_pkt_queue, skb);
@@ -4927,7 +4927,7 @@ static int netif_rx_internal(struct sk_buff *skb)
{
int ret;
- net_timestamp_check(netdev_tstamp_prequeue, skb);
+ net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb);
trace_netif_rx(skb);
@@ -5280,7 +5280,7 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
int ret = NET_RX_DROP;
__be16 type;
- net_timestamp_check(!netdev_tstamp_prequeue, skb);
+ net_timestamp_check(!READ_ONCE(netdev_tstamp_prequeue), skb);
trace_netif_receive_skb(skb);
@@ -5663,7 +5663,7 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
{
int ret;
- net_timestamp_check(netdev_tstamp_prequeue, skb);
+ net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb);
if (skb_defer_rx_timestamp(skb))
return NET_RX_SUCCESS;
@@ -5693,7 +5693,7 @@ void netif_receive_skb_list_internal(struct list_head *head)
INIT_LIST_HEAD(&sublist);
list_for_each_entry_safe(skb, next, head, list) {
- net_timestamp_check(netdev_tstamp_prequeue, skb);
+ net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb);
skb_list_del_init(skb);
if (!skb_defer_rx_timestamp(skb))
list_add_tail(&skb->list, &sublist);
@@ -5917,7 +5917,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
net_rps_action_and_irq_enable(sd);
}
- napi->weight = dev_rx_weight;
+ napi->weight = READ_ONCE(dev_rx_weight);
while (again) {
struct sk_buff *skb;
@@ -6646,8 +6646,8 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
{
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
unsigned long time_limit = jiffies +
- usecs_to_jiffies(netdev_budget_usecs);
- int budget = netdev_budget;
+ usecs_to_jiffies(READ_ONCE(netdev_budget_usecs));
+ int budget = READ_ONCE(netdev_budget);
LIST_HEAD(list);
LIST_HEAD(repoll);
@@ -10265,7 +10265,7 @@ static struct net_device *netdev_wait_allrefs_any(struct list_head *list)
return dev;
if (time_after(jiffies, warning_time +
- netdev_unregister_timeout_secs * HZ)) {
+ READ_ONCE(netdev_unregister_timeout_secs) * HZ)) {
list_for_each_entry(dev, list, todo_list) {
pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
dev->name, netdev_refcnt_read(dev));
diff --git a/net/core/filter.c b/net/core/filter.c
index 74f05ed6aff2..063176428086 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1214,10 +1214,11 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp)
{
u32 filter_size = bpf_prog_size(fp->prog->len);
+ int optmem_max = READ_ONCE(sysctl_optmem_max);
/* same check as in sock_kmalloc() */
- if (filter_size <= sysctl_optmem_max &&
- atomic_read(&sk->sk_omem_alloc) + filter_size < sysctl_optmem_max) {
+ if (filter_size <= optmem_max &&
+ atomic_read(&sk->sk_omem_alloc) + filter_size < optmem_max) {
atomic_add(filter_size, &sk->sk_omem_alloc);
return true;
}
@@ -1548,7 +1549,7 @@ int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
if (IS_ERR(prog))
return PTR_ERR(prog);
- if (bpf_prog_size(prog->len) > sysctl_optmem_max)
+ if (bpf_prog_size(prog->len) > READ_ONCE(sysctl_optmem_max))
err = -ENOMEM;
else
err = reuseport_attach_prog(sk, prog);
@@ -1615,7 +1616,7 @@ int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
}
} else {
/* BPF_PROG_TYPE_SOCKET_FILTER */
- if (bpf_prog_size(prog->len) > sysctl_optmem_max) {
+ if (bpf_prog_size(prog->len) > READ_ONCE(sysctl_optmem_max)) {
err = -ENOMEM;
goto err_prog_put;
}
@@ -5036,14 +5037,14 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
/* Only some socketops are supported */
switch (optname) {
case SO_RCVBUF:
- val = min_t(u32, val, sysctl_rmem_max);
+ val = min_t(u32, val, READ_ONCE(sysctl_rmem_max));
val = min_t(int, val, INT_MAX / 2);
sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
WRITE_ONCE(sk->sk_rcvbuf,
max_t(int, val * 2, SOCK_MIN_RCVBUF));
break;
case SO_SNDBUF:
- val = min_t(u32, val, sysctl_wmem_max);
+ val = min_t(u32, val, READ_ONCE(sysctl_wmem_max));
val = min_t(int, val, INT_MAX / 2);
sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
WRITE_ONCE(sk->sk_sndbuf,
diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c
index 541c7a72a28a..21619c70a82b 100644
--- a/net/core/gro_cells.c
+++ b/net/core/gro_cells.c
@@ -26,7 +26,7 @@ int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb)
cell = this_cpu_ptr(gcells->cells);
- if (skb_queue_len(&cell->napi_skbs) > netdev_max_backlog) {
+ if (skb_queue_len(&cell->napi_skbs) > READ_ONCE(netdev_max_backlog)) {
drop:
dev_core_stats_rx_dropped_inc(dev);
kfree_skb(skb);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 5b3559cb1d82..bebf58464d66 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4772,7 +4772,7 @@ static bool skb_may_tx_timestamp(struct sock *sk, bool tsonly)
{
bool ret;
- if (likely(sysctl_tstamp_allow_data || tsonly))
+ if (likely(READ_ONCE(sysctl_tstamp_allow_data) || tsonly))
return true;
read_lock_bh(&sk->sk_callback_lock);
diff --git a/net/core/sock.c b/net/core/sock.c
index 2ff40dd0a7a6..16ab5ef749c6 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1100,7 +1100,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
* play 'guess the biggest size' games. RCVBUF/SNDBUF
* are treated in BSD as hints
*/
- val = min_t(u32, val, sysctl_wmem_max);
+ val = min_t(u32, val, READ_ONCE(sysctl_wmem_max));
set_sndbuf:
/* Ensure val * 2 fits into an int, to prevent max_t()
* from treating it as a negative value.
@@ -1132,7 +1132,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
* play 'guess the biggest size' games. RCVBUF/SNDBUF
* are treated in BSD as hints
*/
- __sock_set_rcvbuf(sk, min_t(u32, val, sysctl_rmem_max));
+ __sock_set_rcvbuf(sk, min_t(u32, val, READ_ONCE(sysctl_rmem_max)));
break;
case SO_RCVBUFFORCE:
@@ -2535,7 +2535,7 @@ struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
/* small safe race: SKB_TRUESIZE may differ from final skb->truesize */
if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) >
- sysctl_optmem_max)
+ READ_ONCE(sysctl_optmem_max))
return NULL;
skb = alloc_skb(size, priority);
@@ -2553,8 +2553,10 @@ struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
*/
void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
{
- if ((unsigned int)size <= sysctl_optmem_max &&
- atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
+ int optmem_max = READ_ONCE(sysctl_optmem_max);
+
+ if ((unsigned int)size <= optmem_max &&
+ atomic_read(&sk->sk_omem_alloc) + size < optmem_max) {
void *mem;
/* First do the add, to avoid the race if kmalloc
* might sleep.
@@ -3307,8 +3309,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
timer_setup(&sk->sk_timer, NULL, 0);
sk->sk_allocation = GFP_KERNEL;
- sk->sk_rcvbuf = sysctl_rmem_default;
- sk->sk_sndbuf = sysctl_wmem_default;
+ sk->sk_rcvbuf = READ_ONCE(sysctl_rmem_default);
+ sk->sk_sndbuf = READ_ONCE(sysctl_wmem_default);
sk->sk_state = TCP_CLOSE;
sk_set_socket(sk, sock);
@@ -3363,7 +3365,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
#ifdef CONFIG_NET_RX_BUSY_POLL
sk->sk_napi_id = 0;
- sk->sk_ll_usec = sysctl_net_busy_read;
+ sk->sk_ll_usec = READ_ONCE(sysctl_net_busy_read);
#endif
sk->sk_max_pacing_rate = ~0UL;
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 71a13596ea2b..725891527814 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -234,14 +234,17 @@ static int set_default_qdisc(struct ctl_table *table, int write,
static int proc_do_dev_weight(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
- int ret;
+ static DEFINE_MUTEX(dev_weight_mutex);
+ int ret, weight;
+ mutex_lock(&dev_weight_mutex);
ret = proc_dointvec(table, write, buffer, lenp, ppos);
- if (ret != 0)
- return ret;
-
- dev_rx_weight = weight_p * dev_weight_rx_bias;
- dev_tx_weight = weight_p * dev_weight_tx_bias;
+ if (!ret && write) {
+ weight = READ_ONCE(weight_p);
+ WRITE_ONCE(dev_rx_weight, weight * dev_weight_rx_bias);
+ WRITE_ONCE(dev_tx_weight, weight * dev_weight_tx_bias);
+ }
+ mutex_unlock(&dev_weight_mutex);
return ret;
}
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index b2366ad540e6..787a44e3222d 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -2682,23 +2682,27 @@ static __net_init int devinet_init_net(struct net *net)
#endif
if (!net_eq(net, &init_net)) {
- if (IS_ENABLED(CONFIG_SYSCTL) &&
- sysctl_devconf_inherit_init_net == 3) {
+ switch (net_inherit_devconf()) {
+ case 3:
/* copy from the current netns */
memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all,
sizeof(ipv4_devconf));
memcpy(dflt,
current->nsproxy->net_ns->ipv4.devconf_dflt,
sizeof(ipv4_devconf_dflt));
- } else if (!IS_ENABLED(CONFIG_SYSCTL) ||
- sysctl_devconf_inherit_init_net != 2) {
- /* inherit == 0 or 1: copy from init_net */
+ break;
+ case 0:
+ case 1:
+ /* copy from init_net */
memcpy(all, init_net.ipv4.devconf_all,
sizeof(ipv4_devconf));
memcpy(dflt, init_net.ipv4.devconf_dflt,
sizeof(ipv4_devconf_dflt));
+ break;
+ case 2:
+ /* use compiled values */
+ break;
}
- /* else inherit == 2: use compiled values */
}
#ifdef CONFIG_SYSCTL
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 00b4bf26fd93..da8b3cc67234 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1712,7 +1712,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
sk->sk_protocol = ip_hdr(skb)->protocol;
sk->sk_bound_dev_if = arg->bound_dev_if;
- sk->sk_sndbuf = sysctl_wmem_default;
+ sk->sk_sndbuf = READ_ONCE(sysctl_wmem_default);
ipc.sockc.mark = fl4.flowi4_mark;
err = ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base,
len, 0, &ipc, &rt, MSG_DONTWAIT);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index a8a323ecbb54..e49a61a053a6 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -772,7 +772,7 @@ static int ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen)
if (optlen < GROUP_FILTER_SIZE(0))
return -EINVAL;
- if (optlen > sysctl_optmem_max)
+ if (optlen > READ_ONCE(sysctl_optmem_max))
return -ENOBUFS;
gsf = memdup_sockptr(optval, optlen);
@@ -808,7 +808,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
if (optlen < size0)
return -EINVAL;
- if (optlen > sysctl_optmem_max - 4)
+ if (optlen > READ_ONCE(sysctl_optmem_max) - 4)
return -ENOBUFS;
p = kmalloc(optlen + 4, GFP_KERNEL);
@@ -1233,7 +1233,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, int optname,
if (optlen < IP_MSFILTER_SIZE(0))
goto e_inval;
- if (optlen > sysctl_optmem_max) {
+ if (optlen > READ_ONCE(sysctl_optmem_max)) {
err = -ENOBUFS;
break;
}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 3ae2ea048883..3d446773ff2a 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1000,7 +1000,7 @@ static struct sk_buff *tcp_build_frag(struct sock *sk, int size_goal, int flags,
i = skb_shinfo(skb)->nr_frags;
can_coalesce = skb_can_coalesce(skb, i, page, offset);
- if (!can_coalesce && i >= sysctl_max_skb_frags) {
+ if (!can_coalesce && i >= READ_ONCE(sysctl_max_skb_frags)) {
tcp_mark_push(tp, skb);
goto new_segment;
}
@@ -1348,7 +1348,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
if (!skb_can_coalesce(skb, i, pfrag->page,
pfrag->offset)) {
- if (i >= sysctl_max_skb_frags) {
+ if (i >= READ_ONCE(sysctl_max_skb_frags)) {
tcp_mark_push(tp, skb);
goto new_segment;
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index aed0c5f828be..84314de754f8 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -239,7 +239,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
if (wscale_ok) {
/* Set window scaling on max possible window */
space = max_t(u32, space, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
- space = max_t(u32, space, sysctl_rmem_max);
+ space = max_t(u32, space, READ_ONCE(sysctl_rmem_max));
space = min_t(u32, space, *window_clamp);
*rcv_wscale = clamp_t(int, ilog2(space) - 15,
0, TCP_MAX_WSCALE);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 49cc6587dd77..b738eb7e1cae 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -7158,9 +7158,8 @@ static int __net_init addrconf_init_net(struct net *net)
if (!dflt)
goto err_alloc_dflt;
- if (IS_ENABLED(CONFIG_SYSCTL) &&
- !net_eq(net, &init_net)) {
- switch (sysctl_devconf_inherit_init_net) {
+ if (!net_eq(net, &init_net)) {
+ switch (net_inherit_devconf()) {
case 1: /* copy from init_net */
memcpy(all, init_net.ipv6.devconf_all,
sizeof(ipv6_devconf));
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 222f6bf220ba..e0dcc7a193df 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -210,7 +210,7 @@ static int ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
if (optlen < GROUP_FILTER_SIZE(0))
return -EINVAL;
- if (optlen > sysctl_optmem_max)
+ if (optlen > READ_ONCE(sysctl_optmem_max))
return -ENOBUFS;
gsf = memdup_sockptr(optval, optlen);
@@ -244,7 +244,7 @@ static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
if (optlen < size0)
return -EINVAL;
- if (optlen > sysctl_optmem_max - 4)
+ if (optlen > READ_ONCE(sysctl_optmem_max) - 4)
return -ENOBUFS;
p = kmalloc(optlen + 4, GFP_KERNEL);
diff --git a/net/key/af_key.c b/net/key/af_key.c
index fb16d7c4e1b8..20e73643b9c8 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1697,9 +1697,12 @@ static int pfkey_register(struct sock *sk, struct sk_buff *skb, const struct sad
pfk->registered |= (1<<hdr->sadb_msg_satype);
}
+ mutex_lock(&pfkey_mutex);
xfrm_probe_algs();
supp_skb = compose_sadb_supported(hdr, GFP_KERNEL | __GFP_ZERO);
+ mutex_unlock(&pfkey_mutex);
+
if (!supp_skb) {
if (hdr->sadb_msg_satype != SADB_SATYPE_UNSPEC)
pfk->registered &= ~(1<<hdr->sadb_msg_satype);
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 3d90fa9653ef..513f571a082b 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1299,7 +1299,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
i = skb_shinfo(skb)->nr_frags;
can_coalesce = skb_can_coalesce(skb, i, dfrag->page, offset);
- if (!can_coalesce && i >= sysctl_max_skb_frags) {
+ if (!can_coalesce && i >= READ_ONCE(sysctl_max_skb_frags)) {
tcp_mark_push(tcp_sk(ssk), skb);
goto alloc_skb;
}
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 9d43277b8b4f..a56fd0b5a430 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1280,12 +1280,12 @@ static void set_sock_size(struct sock *sk, int mode, int val)
lock_sock(sk);
if (mode) {
val = clamp_t(int, val, (SOCK_MIN_SNDBUF + 1) / 2,
- sysctl_wmem_max);
+ READ_ONCE(sysctl_wmem_max));
sk->sk_sndbuf = val * 2;
sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
} else {
val = clamp_t(int, val, (SOCK_MIN_RCVBUF + 1) / 2,
- sysctl_rmem_max);
+ READ_ONCE(sysctl_rmem_max));
sk->sk_rcvbuf = val * 2;
sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
}
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index f2def06d1070..483b18d35cad 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -442,12 +442,17 @@ static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table,
}
}
+void nf_flow_table_gc_run(struct nf_flowtable *flow_table)
+{
+ nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, NULL);
+}
+
static void nf_flow_offload_work_gc(struct work_struct *work)
{
struct nf_flowtable *flow_table;
flow_table = container_of(work, struct nf_flowtable, gc_work.work);
- nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, NULL);
+ nf_flow_table_gc_run(flow_table);
queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
}
@@ -605,11 +610,11 @@ void nf_flow_table_free(struct nf_flowtable *flow_table)
mutex_unlock(&flowtable_lock);
cancel_delayed_work_sync(&flow_table->gc_work);
- nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
- nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, NULL);
nf_flow_table_offload_flush(flow_table);
- if (nf_flowtable_hw_offload(flow_table))
- nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, NULL);
+ /* ... no more pending work after this stage ... */
+ nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
+ nf_flow_table_gc_run(flow_table);
+ nf_flow_table_offload_flush_cleanup(flow_table);
rhashtable_destroy(&flow_table->rhashtable);
}
EXPORT_SYMBOL_GPL(nf_flow_table_free);
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index 11b6e1942092..4d1169b634c5 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -1063,6 +1063,14 @@ void nf_flow_offload_stats(struct nf_flowtable *flowtable,
flow_offload_queue_work(offload);
}
+void nf_flow_table_offload_flush_cleanup(struct nf_flowtable *flowtable)
+{
+ if (nf_flowtable_hw_offload(flowtable)) {
+ flush_workqueue(nf_flow_offload_del_wq);
+ nf_flow_table_gc_run(flowtable);
+ }
+}
+
void nf_flow_table_offload_flush(struct nf_flowtable *flowtable)
{
if (nf_flowtable_hw_offload(flowtable)) {
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 4bd6e9427c91..bc690238a3c5 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -32,7 +32,6 @@ static LIST_HEAD(nf_tables_objects);
static LIST_HEAD(nf_tables_flowtables);
static LIST_HEAD(nf_tables_destroy_list);
static DEFINE_SPINLOCK(nf_tables_destroy_list_lock);
-static u64 table_handle;
enum {
NFT_VALIDATE_SKIP = 0,
@@ -1235,7 +1234,7 @@ static int nf_tables_newtable(struct sk_buff *skb, const struct nfnl_info *info,
INIT_LIST_HEAD(&table->flowtables);
table->family = family;
table->flags = flags;
- table->handle = ++table_handle;
+ table->handle = ++nft_net->table_handle;
if (table->flags & NFT_TABLE_F_OWNER)
table->nlpid = NETLINK_CB(skb).portid;
@@ -2196,9 +2195,9 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
struct netlink_ext_ack *extack)
{
const struct nlattr * const *nla = ctx->nla;
+ struct nft_stats __percpu *stats = NULL;
struct nft_table *table = ctx->table;
struct nft_base_chain *basechain;
- struct nft_stats __percpu *stats;
struct net *net = ctx->net;
char name[NFT_NAME_MAXLEN];
struct nft_rule_blob *blob;
@@ -2236,7 +2235,6 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
return PTR_ERR(stats);
}
rcu_assign_pointer(basechain->stats, stats);
- static_branch_inc(&nft_counters_enabled);
}
err = nft_basechain_init(basechain, family, &hook, flags);
@@ -2319,6 +2317,9 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
goto err_unregister_hook;
}
+ if (stats)
+ static_branch_inc(&nft_counters_enabled);
+
table->use++;
return 0;
@@ -2574,6 +2575,9 @@ static int nf_tables_newchain(struct sk_buff *skb, const struct nfnl_info *info,
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, chain, nla);
if (chain != NULL) {
+ if (chain->flags & NFT_CHAIN_BINDING)
+ return -EINVAL;
+
if (info->nlh->nlmsg_flags & NLM_F_EXCL) {
NL_SET_BAD_ATTR(extack, attr);
return -EEXIST;
@@ -9653,6 +9657,8 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
return PTR_ERR(chain);
if (nft_is_base_chain(chain))
return -EOPNOTSUPP;
+ if (nft_chain_is_bound(chain))
+ return -EINVAL;
if (desc->flags & NFT_DATA_DESC_SETELEM &&
chain->flags & NFT_CHAIN_BINDING)
return -EINVAL;
diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c
index 5eed18f90b02..175d666c8d87 100644
--- a/net/netfilter/nft_osf.c
+++ b/net/netfilter/nft_osf.c
@@ -115,9 +115,21 @@ static int nft_osf_validate(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nft_data **data)
{
- return nft_chain_validate_hooks(ctx->chain, (1 << NF_INET_LOCAL_IN) |
- (1 << NF_INET_PRE_ROUTING) |
- (1 << NF_INET_FORWARD));
+ unsigned int hooks;
+
+ switch (ctx->family) {
+ case NFPROTO_IPV4:
+ case NFPROTO_IPV6:
+ case NFPROTO_INET:
+ hooks = (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_FORWARD);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return nft_chain_validate_hooks(ctx->chain, hooks);
}
static bool nft_osf_reduce(struct nft_regs_track *track,
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 2e7ac007cb30..eb0e40c29712 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -740,17 +740,23 @@ static int nft_payload_set_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_payload_set *priv = nft_expr_priv(expr);
+ u32 csum_offset, csum_type = NFT_PAYLOAD_CSUM_NONE;
+ int err;
priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE]));
priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
if (tb[NFTA_PAYLOAD_CSUM_TYPE])
- priv->csum_type =
- ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_TYPE]));
- if (tb[NFTA_PAYLOAD_CSUM_OFFSET])
- priv->csum_offset =
- ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_OFFSET]));
+ csum_type = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_TYPE]));
+ if (tb[NFTA_PAYLOAD_CSUM_OFFSET]) {
+ err = nft_parse_u32_check(tb[NFTA_PAYLOAD_CSUM_OFFSET], U8_MAX,
+ &csum_offset);
+ if (err < 0)
+ return err;
+
+ priv->csum_offset = csum_offset;
+ }
if (tb[NFTA_PAYLOAD_CSUM_FLAGS]) {
u32 flags;
@@ -761,7 +767,7 @@ static int nft_payload_set_init(const struct nft_ctx *ctx,
priv->csum_flags = flags;
}
- switch (priv->csum_type) {
+ switch (csum_type) {
case NFT_PAYLOAD_CSUM_NONE:
case NFT_PAYLOAD_CSUM_INET:
break;
@@ -775,6 +781,7 @@ static int nft_payload_set_init(const struct nft_ctx *ctx,
default:
return -EOPNOTSUPP;
}
+ priv->csum_type = csum_type;
return nft_parse_register_load(tb[NFTA_PAYLOAD_SREG], &priv->sreg,
priv->len);
@@ -833,6 +840,7 @@ nft_payload_select_ops(const struct nft_ctx *ctx,
{
enum nft_payload_bases base;
unsigned int offset, len;
+ int err;
if (tb[NFTA_PAYLOAD_BASE] == NULL ||
tb[NFTA_PAYLOAD_OFFSET] == NULL ||
@@ -859,8 +867,13 @@ nft_payload_select_ops(const struct nft_ctx *ctx,
if (tb[NFTA_PAYLOAD_DREG] == NULL)
return ERR_PTR(-EINVAL);
- offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
- len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
+ err = nft_parse_u32_check(tb[NFTA_PAYLOAD_OFFSET], U8_MAX, &offset);
+ if (err < 0)
+ return ERR_PTR(err);
+
+ err = nft_parse_u32_check(tb[NFTA_PAYLOAD_LEN], U8_MAX, &len);
+ if (err < 0)
+ return ERR_PTR(err);
if (len <= 4 && is_power_of_2(len) && IS_ALIGNED(offset, len) &&
base != NFT_PAYLOAD_LL_HEADER && base != NFT_PAYLOAD_INNER_HEADER)
diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c
index 801f013971df..a701ad64f10a 100644
--- a/net/netfilter/nft_tproxy.c
+++ b/net/netfilter/nft_tproxy.c
@@ -312,6 +312,13 @@ static int nft_tproxy_dump(struct sk_buff *skb,
return 0;
}
+static int nft_tproxy_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **data)
+{
+ return nft_chain_validate_hooks(ctx->chain, 1 << NF_INET_PRE_ROUTING);
+}
+
static struct nft_expr_type nft_tproxy_type;
static const struct nft_expr_ops nft_tproxy_ops = {
.type = &nft_tproxy_type,
@@ -321,6 +328,7 @@ static const struct nft_expr_ops nft_tproxy_ops = {
.destroy = nft_tproxy_destroy,
.dump = nft_tproxy_dump,
.reduce = NFT_REDUCE_READONLY,
+ .validate = nft_tproxy_validate,
};
static struct nft_expr_type nft_tproxy_type __read_mostly = {
diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c
index d0f9b1d51b0e..96b03e0bf74f 100644
--- a/net/netfilter/nft_tunnel.c
+++ b/net/netfilter/nft_tunnel.c
@@ -161,6 +161,7 @@ static const struct nft_expr_ops nft_tunnel_get_ops = {
static struct nft_expr_type nft_tunnel_type __read_mostly = {
.name = "tunnel",
+ .family = NFPROTO_NETDEV,
.ops = &nft_tunnel_get_ops,
.policy = nft_tunnel_policy,
.maxattr = NFTA_TUNNEL_MAX,
diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c
index 11c45c8c6c16..036d92c0ad79 100644
--- a/net/rose/rose_loopback.c
+++ b/net/rose/rose_loopback.c
@@ -96,7 +96,8 @@ static void rose_loopback_timer(struct timer_list *unused)
}
if (frametype == ROSE_CALL_REQUEST) {
- if (!rose_loopback_neigh->dev) {
+ if (!rose_loopback_neigh->dev &&
+ !rose_loopback_neigh->loopback) {
kfree_skb(skb);
continue;
}
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 84d0a4109645..6401cdf7a624 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -285,8 +285,10 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
_enter("%p,%lx", rx, p->user_call_ID);
limiter = rxrpc_get_call_slot(p, gfp);
- if (!limiter)
+ if (!limiter) {
+ release_sock(&rx->sk);
return ERR_PTR(-ERESTARTSYS);
+ }
call = rxrpc_alloc_client_call(rx, srx, gfp, debug_id);
if (IS_ERR(call)) {
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 1d38e279e2ef..3c3a626459de 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -51,10 +51,7 @@ static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx,
return sock_intr_errno(*timeo);
trace_rxrpc_transmit(call, rxrpc_transmit_wait);
- mutex_unlock(&call->user_mutex);
*timeo = schedule_timeout(*timeo);
- if (mutex_lock_interruptible(&call->user_mutex) < 0)
- return sock_intr_errno(*timeo);
}
}
@@ -290,37 +287,48 @@ static int rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
static int rxrpc_send_data(struct rxrpc_sock *rx,
struct rxrpc_call *call,
struct msghdr *msg, size_t len,
- rxrpc_notify_end_tx_t notify_end_tx)
+ rxrpc_notify_end_tx_t notify_end_tx,
+ bool *_dropped_lock)
{
struct rxrpc_skb_priv *sp;
struct sk_buff *skb;
struct sock *sk = &rx->sk;
+ enum rxrpc_call_state state;
long timeo;
- bool more;
- int ret, copied;
+ bool more = msg->msg_flags & MSG_MORE;
+ int ret, copied = 0;
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
/* this should be in poll */
sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
+reload:
+ ret = -EPIPE;
if (sk->sk_shutdown & SEND_SHUTDOWN)
- return -EPIPE;
-
- more = msg->msg_flags & MSG_MORE;
-
+ goto maybe_error;
+ state = READ_ONCE(call->state);
+ ret = -ESHUTDOWN;
+ if (state >= RXRPC_CALL_COMPLETE)
+ goto maybe_error;
+ ret = -EPROTO;
+ if (state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
+ state != RXRPC_CALL_SERVER_ACK_REQUEST &&
+ state != RXRPC_CALL_SERVER_SEND_REPLY)
+ goto maybe_error;
+
+ ret = -EMSGSIZE;
if (call->tx_total_len != -1) {
- if (len > call->tx_total_len)
- return -EMSGSIZE;
- if (!more && len != call->tx_total_len)
- return -EMSGSIZE;
+ if (len - copied > call->tx_total_len)
+ goto maybe_error;
+ if (!more && len - copied != call->tx_total_len)
+ goto maybe_error;
}
skb = call->tx_pending;
call->tx_pending = NULL;
rxrpc_see_skb(skb, rxrpc_skb_seen);
- copied = 0;
do {
/* Check to see if there's a ping ACK to reply to. */
if (call->ackr_reason == RXRPC_ACK_PING_RESPONSE)
@@ -331,16 +339,8 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
_debug("alloc");
- if (!rxrpc_check_tx_space(call, NULL)) {
- ret = -EAGAIN;
- if (msg->msg_flags & MSG_DONTWAIT)
- goto maybe_error;
- ret = rxrpc_wait_for_tx_window(rx, call,
- &timeo,
- msg->msg_flags & MSG_WAITALL);
- if (ret < 0)
- goto maybe_error;
- }
+ if (!rxrpc_check_tx_space(call, NULL))
+ goto wait_for_space;
/* Work out the maximum size of a packet. Assume that
* the security header is going to be in the padded
@@ -468,6 +468,27 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
efault:
ret = -EFAULT;
goto out;
+
+wait_for_space:
+ ret = -EAGAIN;
+ if (msg->msg_flags & MSG_DONTWAIT)
+ goto maybe_error;
+ mutex_unlock(&call->user_mutex);
+ *_dropped_lock = true;
+ ret = rxrpc_wait_for_tx_window(rx, call, &timeo,
+ msg->msg_flags & MSG_WAITALL);
+ if (ret < 0)
+ goto maybe_error;
+ if (call->interruptibility == RXRPC_INTERRUPTIBLE) {
+ if (mutex_lock_interruptible(&call->user_mutex) < 0) {
+ ret = sock_intr_errno(timeo);
+ goto maybe_error;
+ }
+ } else {
+ mutex_lock(&call->user_mutex);
+ }
+ *_dropped_lock = false;
+ goto reload;
}
/*
@@ -629,6 +650,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
enum rxrpc_call_state state;
struct rxrpc_call *call;
unsigned long now, j;
+ bool dropped_lock = false;
int ret;
struct rxrpc_send_params p = {
@@ -737,21 +759,13 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
ret = rxrpc_send_abort_packet(call);
} else if (p.command != RXRPC_CMD_SEND_DATA) {
ret = -EINVAL;
- } else if (rxrpc_is_client_call(call) &&
- state != RXRPC_CALL_CLIENT_SEND_REQUEST) {
- /* request phase complete for this client call */
- ret = -EPROTO;
- } else if (rxrpc_is_service_call(call) &&
- state != RXRPC_CALL_SERVER_ACK_REQUEST &&
- state != RXRPC_CALL_SERVER_SEND_REPLY) {
- /* Reply phase not begun or not complete for service call. */
- ret = -EPROTO;
} else {
- ret = rxrpc_send_data(rx, call, msg, len, NULL);
+ ret = rxrpc_send_data(rx, call, msg, len, NULL, &dropped_lock);
}
out_put_unlock:
- mutex_unlock(&call->user_mutex);
+ if (!dropped_lock)
+ mutex_unlock(&call->user_mutex);
error_put:
rxrpc_put_call(call, rxrpc_call_put);
_leave(" = %d", ret);
@@ -779,6 +793,7 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
struct msghdr *msg, size_t len,
rxrpc_notify_end_tx_t notify_end_tx)
{
+ bool dropped_lock = false;
int ret;
_enter("{%d,%s},", call->debug_id, rxrpc_call_states[call->state]);
@@ -796,7 +811,7 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
case RXRPC_CALL_SERVER_ACK_REQUEST:
case RXRPC_CALL_SERVER_SEND_REPLY:
ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len,
- notify_end_tx);
+ notify_end_tx, &dropped_lock);
break;
case RXRPC_CALL_COMPLETE:
read_lock_bh(&call->state_lock);
@@ -810,7 +825,8 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
break;
}
- mutex_unlock(&call->user_mutex);
+ if (!dropped_lock)
+ mutex_unlock(&call->user_mutex);
_leave(" = %d", ret);
return ret;
}
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index dba0b3e24af5..a64c3c154111 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -409,7 +409,7 @@ static inline bool qdisc_restart(struct Qdisc *q, int *packets)
void __qdisc_run(struct Qdisc *q)
{
- int quota = dev_tx_weight;
+ int quota = READ_ONCE(dev_tx_weight);
int packets;
while (qdisc_restart(q, &packets)) {
diff --git a/net/socket.c b/net/socket.c
index 96300cdc0625..34102aa4ab0a 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1801,7 +1801,7 @@ int __sys_listen(int fd, int backlog)
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (sock) {
- somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
+ somaxconn = READ_ONCE(sock_net(sock->sk)->core.sysctl_somaxconn);
if ((unsigned int)backlog > somaxconn)
backlog = somaxconn;
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 733f9f226092..c1a01947530f 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1888,7 +1888,7 @@ call_encode(struct rpc_task *task)
break;
case -EKEYEXPIRED:
if (!task->tk_cred_retry) {
- rpc_exit(task, task->tk_status);
+ rpc_call_rpcerror(task, task->tk_status);
} else {
task->tk_action = call_refresh;
task->tk_cred_retry--;
diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c
index 82d14eea1b5a..974eb97b77d2 100644
--- a/net/xfrm/espintcp.c
+++ b/net/xfrm/espintcp.c
@@ -168,7 +168,7 @@ int espintcp_queue_out(struct sock *sk, struct sk_buff *skb)
{
struct espintcp_ctx *ctx = espintcp_getctx(sk);
- if (skb_queue_len(&ctx->out_queue) >= netdev_max_backlog)
+ if (skb_queue_len(&ctx->out_queue) >= READ_ONCE(netdev_max_backlog))
return -ENOBUFS;
__skb_queue_tail(&ctx->out_queue, skb);
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 144238a50f3d..b2f4ec9c537f 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -669,7 +669,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
x->curlft.bytes += skb->len;
x->curlft.packets++;
- x->curlft.use_time = ktime_get_real_seconds();
spin_unlock(&x->lock);
@@ -783,7 +782,7 @@ int xfrm_trans_queue_net(struct net *net, struct sk_buff *skb,
trans = this_cpu_ptr(&xfrm_trans_tasklet);
- if (skb_queue_len(&trans->queue) >= netdev_max_backlog)
+ if (skb_queue_len(&trans->queue) >= READ_ONCE(netdev_max_backlog))
return -ENOBUFS;
BUILD_BUG_ON(sizeof(struct xfrm_trans_cb) > sizeof(skb->cb));
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 555ab35cd119..9a5e79a38c67 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -534,7 +534,6 @@ static int xfrm_output_one(struct sk_buff *skb, int err)
x->curlft.bytes += skb->len;
x->curlft.packets++;
- x->curlft.use_time = ktime_get_real_seconds();
spin_unlock_bh(&x->lock);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index f1a0bab920a5..cc6ab79609e2 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -3162,7 +3162,7 @@ struct dst_entry *xfrm_lookup_with_ifid(struct net *net,
return dst;
nopol:
- if (!(dst_orig->dev->flags & IFF_LOOPBACK) &&
+ if ((!dst_orig->dev || !(dst_orig->dev->flags & IFF_LOOPBACK)) &&
net->xfrm.policy_default[dir] == XFRM_USERPOLICY_BLOCK) {
err = -EPERM;
goto error;
@@ -3599,6 +3599,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
if (pols[1]) {
if (IS_ERR(pols[1])) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
+ xfrm_pol_put(pols[0]);
return 0;
}
pols[1]->curlft.use_time = ktime_get_real_seconds();
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index ccfb172eb5b8..11d89af9cb55 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1592,6 +1592,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
x->replay = orig->replay;
x->preplay = orig->preplay;
x->mapping_maxage = orig->mapping_maxage;
+ x->lastused = orig->lastused;
x->new_mapping = 0;
x->new_mapping_sport = 0;
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 73e0762092fe..02a6000a82bb 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -265,7 +265,7 @@ endif
# defined. get-executable-or-default fails with an error if the first argument is supplied but
# doesn't exist.
override PYTHON_CONFIG := $(call get-executable-or-default,PYTHON_CONFIG,$(PYTHON_AUTO))
-override PYTHON := $(call get-executable-or-default,PYTHON,$(subst -config,,$(PYTHON_AUTO)))
+override PYTHON := $(call get-executable-or-default,PYTHON,$(subst -config,,$(PYTHON_CONFIG)))
grep-libs = $(filter -l%,$(1))
strip-libs = $(filter-out -l%,$(1))
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 86f838c5661e..5f0333a8acd8 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -826,6 +826,7 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
}
evlist__for_each_entry(evsel_list, counter) {
+ counter->reset_group = false;
if (bpf_counter__load(counter, &target))
return -1;
if (!evsel__is_bpf(counter))
prev parent reply other threads:[~2022-08-31 16:27 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-08-31 16:26 Linux 5.19.6 Greg Kroah-Hartman
2022-08-31 16:26 ` Greg Kroah-Hartman [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=166196320419649@kroah.com \
--to=gregkh@linuxfoundation.org \
--cc=akpm@linux-foundation.org \
--cc=jslaby@suse.cz \
--cc=linux-kernel@vger.kernel.org \
--cc=lwn@lwn.net \
--cc=stable@vger.kernel.org \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.