Re: Linux 6.1.57

From: Greg Kroah-Hartman
Date: Tue Oct 10 2023 - 16:17:47 EST


diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst
index 9000640f7f7a..d9fce65b2f04 100644
--- a/Documentation/arm64/silicon-errata.rst
+++ b/Documentation/arm64/silicon-errata.rst
@@ -63,6 +63,8 @@ stable kernels.
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A510 | #1902691 | ARM64_ERRATUM_1902691 |
+----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A520 | #2966298 | ARM64_ERRATUM_2966298 |
++----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A53 | #826319 | ARM64_ERRATUM_826319 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A53 | #827319 | ARM64_ERRATUM_827319 |
diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index 3301288a7c69..f5f7a464605f 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -2148,6 +2148,14 @@ accept_ra_min_hop_limit - INTEGER

Default: 1

+accept_ra_min_lft - INTEGER
+ Minimum acceptable lifetime value in Router Advertisement.
+
+ RA sections with a lifetime less than this value shall be
+ ignored. Zero lifetimes stay unaffected.
+
+ Default: 0
+
accept_ra_pinfo - BOOLEAN
Learn Prefix Information in Router Advertisement.

diff --git a/Makefile b/Makefile
index 9ceda3dad5eb..b435b56594f0 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
VERSION = 6
PATCHLEVEL = 1
-SUBLEVEL = 56
+SUBLEVEL = 57
EXTRAVERSION =
NAME = Curry Ramen

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index d5eb2fbab473..9ee9e17eb2ca 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -983,6 +983,19 @@ config ARM64_ERRATUM_2457168

If unsure, say Y.

+config ARM64_ERRATUM_2966298
+ bool "Cortex-A520: 2966298: workaround for speculatively executed unprivileged load"
+ default y
+ help
+ This option adds the workaround for ARM Cortex-A520 erratum 2966298.
+
+ On an affected Cortex-A520 core, a speculatively executed unprivileged
+ load might leak data from a privileged level via a cache side channel.
+
+ Work around this problem by executing a TLBI before returning to EL0.
+
+ If unsure, say Y.
+
config CAVIUM_ERRATUM_22375
bool "Cavium erratum 22375, 24313"
default y
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index f73f11b55042..a0badda3a8d1 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -670,7 +670,7 @@ static inline bool supports_clearbhb(int scope)
isar2 = read_sanitised_ftr_reg(SYS_ID_AA64ISAR2_EL1);

return cpuid_feature_extract_unsigned_field(isar2,
- ID_AA64ISAR2_EL1_BC_SHIFT);
+ ID_AA64ISAR2_EL1_CLRBHB_SHIFT);
}

const struct cpumask *system_32bit_el0_cpumask(void);
@@ -863,7 +863,11 @@ static inline bool cpu_has_hw_af(void)
if (!IS_ENABLED(CONFIG_ARM64_HW_AFDBM))
return false;

- mmfr1 = read_cpuid(ID_AA64MMFR1_EL1);
+ /*
+ * Use cached version to avoid emulated msr operation on KVM
+ * guests.
+ */
+ mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
return cpuid_feature_extract_unsigned_field(mmfr1,
ID_AA64MMFR1_EL1_HAFDBS_SHIFT);
}
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 65e53ef5a396..357932938b5a 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -79,6 +79,7 @@
#define ARM_CPU_PART_CORTEX_A78AE 0xD42
#define ARM_CPU_PART_CORTEX_X1 0xD44
#define ARM_CPU_PART_CORTEX_A510 0xD46
+#define ARM_CPU_PART_CORTEX_A520 0xD80
#define ARM_CPU_PART_CORTEX_A710 0xD47
#define ARM_CPU_PART_CORTEX_X2 0xD48
#define ARM_CPU_PART_NEOVERSE_N2 0xD49
@@ -141,6 +142,7 @@
#define MIDR_CORTEX_A78AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE)
#define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1)
#define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510)
+#define MIDR_CORTEX_A520 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A520)
#define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710)
#define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2)
#define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2)
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 8dbf3c21ea22..3f917124684c 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -723,6 +723,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
.cpu_enable = cpu_clear_bf16_from_user_emulation,
},
#endif
+#ifdef CONFIG_ARM64_ERRATUM_2966298
+ {
+ .desc = "ARM erratum 2966298",
+ .capability = ARM64_WORKAROUND_2966298,
+ /* Cortex-A520 r0p0 - r0p1 */
+ ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A520, 0, 0, 1),
+ },
+#endif
#ifdef CONFIG_AMPERE_ERRATUM_AC03_CPU_38
{
.desc = "AmpereOne erratum AC03_CPU_38",
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index b3eb53847c96..770a31c6ed81 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -212,7 +212,8 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
};

static const struct arm64_ftr_bits ftr_id_aa64isar2[] = {
- ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64ISAR2_EL1_BC_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_CLRBHB_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_BC_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
FTR_STRICT, FTR_EXACT, ID_AA64ISAR2_EL1_APA3_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index beb4db21c89c..de16fa917e1b 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -419,6 +419,10 @@ alternative_else_nop_endif
ldp x28, x29, [sp, #16 * 14]

.if \el == 0
+alternative_if ARM64_WORKAROUND_2966298
+ tlbi vale1, xzr
+ dsb nsh
+alternative_else_nop_endif
alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0
ldr lr, [sp, #S_LR]
add sp, sp, #PT_REGS_SIZE // restore sp
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index 14d31d1b2ff0..e73830d9f136 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -71,6 +71,7 @@ WORKAROUND_2064142
WORKAROUND_2077057
WORKAROUND_2457168
WORKAROUND_2658417
+WORKAROUND_2966298
WORKAROUND_AMPERE_AC03_CPU_38
WORKAROUND_TRBE_OVERWRITE_FILL_MODE
WORKAROUND_TSB_FLUSH_FAILURE
diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg
index 384757a7eda9..11c3f7a7cec7 100644
--- a/arch/arm64/tools/sysreg
+++ b/arch/arm64/tools/sysreg
@@ -484,7 +484,11 @@ EndEnum
EndSysreg

Sysreg ID_AA64ISAR2_EL1 3 0 0 6 2
-Res0 63:28
+Res0 63:32
+Enum 31:28 CLRBHB
+ 0b0000 NI
+ 0b0001 IMP
+EndEnum
Enum 27:24 PAC_frac
0b0000 NI
0b0001 IMP
diff --git a/arch/parisc/include/asm/ldcw.h b/arch/parisc/include/asm/ldcw.h
index 6d28b5514699..10a061d6899c 100644
--- a/arch/parisc/include/asm/ldcw.h
+++ b/arch/parisc/include/asm/ldcw.h
@@ -2,14 +2,28 @@
#ifndef __PARISC_LDCW_H
#define __PARISC_LDCW_H

-#ifndef CONFIG_PA20
/* Because kmalloc only guarantees 8-byte alignment for kmalloc'd data,
and GCC only guarantees 8-byte alignment for stack locals, we can't
be assured of 16-byte alignment for atomic lock data even if we
specify "__attribute ((aligned(16)))" in the type declaration. So,
we use a struct containing an array of four ints for the atomic lock
type and dynamically select the 16-byte aligned int from the array
- for the semaphore. */
+ for the semaphore. */
+
+/* From: "Jim Hull" <jim.hull of hp.com>
+ I've attached a summary of the change, but basically, for PA 2.0, as
+ long as the ",CO" (coherent operation) completer is implemented, then the
+ 16-byte alignment requirement for ldcw and ldcd is relaxed, and instead
+ they only require "natural" alignment (4-byte for ldcw, 8-byte for
+ ldcd).
+
+ Although the cache control hint is accepted by all PA 2.0 processors,
+ it is only implemented on PA8800/PA8900 CPUs. Prior PA8X00 CPUs still
+ require 16-byte alignment. If the address is unaligned, the operation
+ of the instruction is undefined. The ldcw instruction does not generate
+ unaligned data reference traps so misaligned accesses are not detected.
+ This hid the problem for years. So, restore the 16-byte alignment dropped
+ by Kyle McMartin in "Remove __ldcw_align for PA-RISC 2.0 processors". */

#define __PA_LDCW_ALIGNMENT 16
#define __PA_LDCW_ALIGN_ORDER 4
@@ -19,22 +33,12 @@
& ~(__PA_LDCW_ALIGNMENT - 1); \
(volatile unsigned int *) __ret; \
})
-#define __LDCW "ldcw"

-#else /*CONFIG_PA20*/
-/* From: "Jim Hull" <jim.hull of hp.com>
- I've attached a summary of the change, but basically, for PA 2.0, as
- long as the ",CO" (coherent operation) completer is specified, then the
- 16-byte alignment requirement for ldcw and ldcd is relaxed, and instead
- they only require "natural" alignment (4-byte for ldcw, 8-byte for
- ldcd). */
-
-#define __PA_LDCW_ALIGNMENT 4
-#define __PA_LDCW_ALIGN_ORDER 2
-#define __ldcw_align(a) (&(a)->slock)
+#ifdef CONFIG_PA20
#define __LDCW "ldcw,co"
-
-#endif /*!CONFIG_PA20*/
+#else
+#define __LDCW "ldcw"
+#endif

/* LDCW, the only atomic read-write operation PA-RISC has. *sigh*.
We don't explicitly expose that "*a" may be written as reload
diff --git a/arch/parisc/include/asm/spinlock_types.h b/arch/parisc/include/asm/spinlock_types.h
index ca39ee350c3f..35c5086b74d7 100644
--- a/arch/parisc/include/asm/spinlock_types.h
+++ b/arch/parisc/include/asm/spinlock_types.h
@@ -3,13 +3,8 @@
#define __ASM_SPINLOCK_TYPES_H

typedef struct {
-#ifdef CONFIG_PA20
- volatile unsigned int slock;
-# define __ARCH_SPIN_LOCK_UNLOCKED { 1 }
-#else
volatile unsigned int lock[4];
# define __ARCH_SPIN_LOCK_UNLOCKED { { 1, 1, 1, 1 } }
-#endif
} arch_spinlock_t;


diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c
index 7dbd92cafae3..e37ec0548730 100644
--- a/arch/parisc/kernel/smp.c
+++ b/arch/parisc/kernel/smp.c
@@ -443,7 +443,9 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
if (cpu_online(cpu))
return 0;

- if (num_online_cpus() < setup_max_cpus && smp_boot_one_cpu(cpu, tidle))
+ if (num_online_cpus() < nr_cpu_ids &&
+ num_online_cpus() < setup_max_cpus &&
+ smp_boot_one_cpu(cpu, tidle))
return -EIO;

return cpu_online(cpu) ? 0 : -EIO;
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 6672a3f05fc6..04f4b96dec6d 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -534,8 +534,12 @@ static void amd_pmu_cpu_reset(int cpu)
/* Clear enable bits i.e. PerfCntrGlobalCtl.PerfCntrEn */
wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, 0);

- /* Clear overflow bits i.e. PerfCntrGLobalStatus.PerfCntrOvfl */
- wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR, amd_pmu_global_cntr_mask);
+ /*
+ * Clear freeze and overflow bits i.e. PerfCntrGLobalStatus.LbrFreeze
+ * and PerfCntrGLobalStatus.PerfCntrOvfl
+ */
+ wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR,
+ GLOBAL_STATUS_LBRS_FROZEN | amd_pmu_global_cntr_mask);
}

static int amd_pmu_cpu_prepare(int cpu)
@@ -570,6 +574,7 @@ static void amd_pmu_cpu_starting(int cpu)
int i, nb_id;

cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
+ amd_pmu_cpu_reset(cpu);

if (!x86_pmu.amd_nb_constraints)
return;
@@ -591,8 +596,6 @@ static void amd_pmu_cpu_starting(int cpu)

cpuc->amd_nb->nb_id = nb_id;
cpuc->amd_nb->refcnt++;
-
- amd_pmu_cpu_reset(cpu);
}

static void amd_pmu_cpu_dead(int cpu)
@@ -601,6 +604,7 @@ static void amd_pmu_cpu_dead(int cpu)

kfree(cpuhw->lbr_sel);
cpuhw->lbr_sel = NULL;
+ amd_pmu_cpu_reset(cpu);

if (!x86_pmu.amd_nb_constraints)
return;
@@ -613,8 +617,6 @@ static void amd_pmu_cpu_dead(int cpu)

cpuhw->amd_nb = NULL;
}
-
- amd_pmu_cpu_reset(cpu);
}

static inline void amd_pmu_set_global_ctl(u64 ctl)
@@ -884,7 +886,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
struct hw_perf_event *hwc;
struct perf_event *event;
int handled = 0, idx;
- u64 status, mask;
+ u64 reserved, status, mask;
bool pmu_enabled;

/*
@@ -909,6 +911,14 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
status &= ~GLOBAL_STATUS_LBRS_FROZEN;
}

+ reserved = status & ~amd_pmu_global_cntr_mask;
+ if (reserved)
+ pr_warn_once("Reserved PerfCntrGlobalStatus bits are set (0x%llx), please consider updating microcode\n",
+ reserved);
+
+ /* Clear any reserved bits set by buggy microcode */
+ status &= amd_pmu_global_cntr_mask;
+
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
if (!test_bit(idx, cpuc->active_mask))
continue;
diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c
index 3a5b0c9c4fcc..7dce812ce253 100644
--- a/arch/x86/kernel/sev-shared.c
+++ b/arch/x86/kernel/sev-shared.c
@@ -253,7 +253,7 @@ static int __sev_cpuid_hv(u32 fn, int reg_idx, u32 *reg)
return 0;
}

-static int sev_cpuid_hv(struct cpuid_leaf *leaf)
+static int __sev_cpuid_hv_msr(struct cpuid_leaf *leaf)
{
int ret;

@@ -276,6 +276,45 @@ static int sev_cpuid_hv(struct cpuid_leaf *leaf)
return ret;
}

+static int __sev_cpuid_hv_ghcb(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf)
+{
+ u32 cr4 = native_read_cr4();
+ int ret;
+
+ ghcb_set_rax(ghcb, leaf->fn);
+ ghcb_set_rcx(ghcb, leaf->subfn);
+
+ if (cr4 & X86_CR4_OSXSAVE)
+ /* Safe to read xcr0 */
+ ghcb_set_xcr0(ghcb, xgetbv(XCR_XFEATURE_ENABLED_MASK));
+ else
+ /* xgetbv will cause #UD - use reset value for xcr0 */
+ ghcb_set_xcr0(ghcb, 1);
+
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_CPUID, 0, 0);
+ if (ret != ES_OK)
+ return ret;
+
+ if (!(ghcb_rax_is_valid(ghcb) &&
+ ghcb_rbx_is_valid(ghcb) &&
+ ghcb_rcx_is_valid(ghcb) &&
+ ghcb_rdx_is_valid(ghcb)))
+ return ES_VMM_ERROR;
+
+ leaf->eax = ghcb->save.rax;
+ leaf->ebx = ghcb->save.rbx;
+ leaf->ecx = ghcb->save.rcx;
+ leaf->edx = ghcb->save.rdx;
+
+ return ES_OK;
+}
+
+static int sev_cpuid_hv(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf)
+{
+ return ghcb ? __sev_cpuid_hv_ghcb(ghcb, ctxt, leaf)
+ : __sev_cpuid_hv_msr(leaf);
+}
+
/*
* This may be called early while still running on the initial identity
* mapping. Use RIP-relative addressing to obtain the correct address
@@ -385,19 +424,20 @@ snp_cpuid_get_validated_func(struct cpuid_leaf *leaf)
return false;
}

-static void snp_cpuid_hv(struct cpuid_leaf *leaf)
+static void snp_cpuid_hv(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf)
{
- if (sev_cpuid_hv(leaf))
+ if (sev_cpuid_hv(ghcb, ctxt, leaf))
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID_HV);
}

-static int snp_cpuid_postprocess(struct cpuid_leaf *leaf)
+static int snp_cpuid_postprocess(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
+ struct cpuid_leaf *leaf)
{
struct cpuid_leaf leaf_hv = *leaf;

switch (leaf->fn) {
case 0x1:
- snp_cpuid_hv(&leaf_hv);
+ snp_cpuid_hv(ghcb, ctxt, &leaf_hv);

/* initial APIC ID */
leaf->ebx = (leaf_hv.ebx & GENMASK(31, 24)) | (leaf->ebx & GENMASK(23, 0));
@@ -416,7 +456,7 @@ static int snp_cpuid_postprocess(struct cpuid_leaf *leaf)
break;
case 0xB:
leaf_hv.subfn = 0;
- snp_cpuid_hv(&leaf_hv);
+ snp_cpuid_hv(ghcb, ctxt, &leaf_hv);

/* extended APIC ID */
leaf->edx = leaf_hv.edx;
@@ -464,7 +504,7 @@ static int snp_cpuid_postprocess(struct cpuid_leaf *leaf)
}
break;
case 0x8000001E:
- snp_cpuid_hv(&leaf_hv);
+ snp_cpuid_hv(ghcb, ctxt, &leaf_hv);

/* extended APIC ID */
leaf->eax = leaf_hv.eax;
@@ -485,7 +525,7 @@ static int snp_cpuid_postprocess(struct cpuid_leaf *leaf)
* Returns -EOPNOTSUPP if feature not enabled. Any other non-zero return value
* should be treated as fatal by caller.
*/
-static int snp_cpuid(struct cpuid_leaf *leaf)
+static int snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf)
{
const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();

@@ -519,7 +559,7 @@ static int snp_cpuid(struct cpuid_leaf *leaf)
return 0;
}

- return snp_cpuid_postprocess(leaf);
+ return snp_cpuid_postprocess(ghcb, ctxt, leaf);
}

/*
@@ -541,14 +581,14 @@ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
leaf.fn = fn;
leaf.subfn = subfn;

- ret = snp_cpuid(&leaf);
+ ret = snp_cpuid(NULL, NULL, &leaf);
if (!ret)
goto cpuid_done;

if (ret != -EOPNOTSUPP)
goto fail;

- if (sev_cpuid_hv(&leaf))
+ if (__sev_cpuid_hv_msr(&leaf))
goto fail;

cpuid_done:
@@ -845,14 +885,15 @@ static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
return ret;
}

-static int vc_handle_cpuid_snp(struct pt_regs *regs)
+static int vc_handle_cpuid_snp(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
{
+ struct pt_regs *regs = ctxt->regs;
struct cpuid_leaf leaf;
int ret;

leaf.fn = regs->ax;
leaf.subfn = regs->cx;
- ret = snp_cpuid(&leaf);
+ ret = snp_cpuid(ghcb, ctxt, &leaf);
if (!ret) {
regs->ax = leaf.eax;
regs->bx = leaf.ebx;
@@ -871,7 +912,7 @@ static enum es_result vc_handle_cpuid(struct ghcb *ghcb,
enum es_result ret;
int snp_cpuid_ret;

- snp_cpuid_ret = vc_handle_cpuid_snp(regs);
+ snp_cpuid_ret = vc_handle_cpuid_snp(ghcb, ctxt);
if (!snp_cpuid_ret)
return ES_OK;
if (snp_cpuid_ret != -EOPNOTSUPP)
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index a582ea0da74f..a82bdec923b2 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -737,6 +737,7 @@ static void blk_free_queue_rcu(struct rcu_head *rcu_head)
struct request_queue *q = container_of(rcu_head, struct request_queue,
rcu_head);

+ percpu_ref_exit(&q->q_usage_counter);
kmem_cache_free(blk_get_queue_kmem_cache(blk_queue_has_srcu(q)), q);
}

@@ -762,8 +763,6 @@ static void blk_release_queue(struct kobject *kobj)

might_sleep();

- percpu_ref_exit(&q->q_usage_counter);
-
if (q->poll_stat)
blk_stat_remove_callback(q, q->poll_cb);
blk_stat_free_callback(q->poll_cb);
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 25b9bdf2fc38..6a053cd0cf41 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -5022,11 +5022,27 @@ static const unsigned int ata_port_suspend_ehi = ATA_EHI_QUIET

static void ata_port_suspend(struct ata_port *ap, pm_message_t mesg)
{
+ /*
+ * We are about to suspend the port, so we do not care about
+ * scsi_rescan_device() calls scheduled by previous resume operations.
+ * The next resume will schedule the rescan again. So cancel any rescan
+ * that is not done yet.
+ */
+ cancel_delayed_work_sync(&ap->scsi_rescan_task);
+
ata_port_request_pm(ap, mesg, 0, ata_port_suspend_ehi, false);
}

static void ata_port_suspend_async(struct ata_port *ap, pm_message_t mesg)
{
+ /*
+ * We are about to suspend the port, so we do not care about
+ * scsi_rescan_device() calls scheduled by previous resume operations.
+ * The next resume will schedule the rescan again. So cancel any rescan
+ * that is not done yet.
+ */
+ cancel_delayed_work_sync(&ap->scsi_rescan_task);
+
ata_port_request_pm(ap, mesg, 0, ata_port_suspend_ehi, true);
}

diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index d28628b964e2..7b9c9264b9a7 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -1081,7 +1081,15 @@ int ata_scsi_dev_config(struct scsi_device *sdev, struct ata_device *dev)
}
} else {
sdev->sector_size = ata_id_logical_sector_size(dev->id);
- sdev->manage_start_stop = 1;
+ /*
+ * Stop the drive on suspend but do not issue START STOP UNIT
+ * on resume as this is not necessary and may fail: the device
+ * will be woken up by ata_port_pm_resume() with a port reset
+ * and device revalidation.
+ */
+ sdev->manage_system_start_stop = true;
+ sdev->manage_runtime_start_stop = true;
+ sdev->no_start_on_resume = 1;
}

/*
@@ -4640,7 +4648,7 @@ void ata_scsi_dev_rescan(struct work_struct *work)
struct ata_link *link;
struct ata_device *dev;
unsigned long flags;
- bool delay_rescan = false;
+ int ret = 0;

mutex_lock(&ap->scsi_scan_mutex);
spin_lock_irqsave(ap->lock, flags);
@@ -4649,37 +4657,34 @@ void ata_scsi_dev_rescan(struct work_struct *work)
ata_for_each_dev(dev, link, ENABLED) {
struct scsi_device *sdev = dev->sdev;

+ /*
+ * If the port was suspended before this was scheduled,
+ * bail out.
+ */
+ if (ap->pflags & ATA_PFLAG_SUSPENDED)
+ goto unlock;
+
if (!sdev)
continue;
if (scsi_device_get(sdev))
continue;

- /*
- * If the rescan work was scheduled because of a resume
- * event, the port is already fully resumed, but the
- * SCSI device may not yet be fully resumed. In such
- * case, executing scsi_rescan_device() may cause a
- * deadlock with the PM code on device_lock(). Prevent
- * this by giving up and retrying rescan after a short
- * delay.
- */
- delay_rescan = sdev->sdev_gendev.power.is_suspended;
- if (delay_rescan) {
- scsi_device_put(sdev);
- break;
- }
-
spin_unlock_irqrestore(ap->lock, flags);
- scsi_rescan_device(&(sdev->sdev_gendev));
+ ret = scsi_rescan_device(sdev);
scsi_device_put(sdev);
spin_lock_irqsave(ap->lock, flags);
+
+ if (ret)
+ goto unlock;
}
}

+unlock:
spin_unlock_irqrestore(ap->lock, flags);
mutex_unlock(&ap->scsi_scan_mutex);

- if (delay_rescan)
+ /* Reschedule with a delay if scsi_rescan_device() returned an error */
+ if (ret)
schedule_delayed_work(&ap->scsi_rescan_task,
msecs_to_jiffies(5));
}
diff --git a/drivers/base/regmap/regcache-rbtree.c b/drivers/base/regmap/regcache-rbtree.c
index ae6b8788d5f3..d65715b9e129 100644
--- a/drivers/base/regmap/regcache-rbtree.c
+++ b/drivers/base/regmap/regcache-rbtree.c
@@ -453,7 +453,8 @@ static int regcache_rbtree_write(struct regmap *map, unsigned int reg,
if (!rbnode)
return -ENOMEM;
regcache_rbtree_set_register(map, rbnode,
- reg - rbnode->base_reg, value);
+ (reg - rbnode->base_reg) / map->reg_stride,
+ value);
regcache_rbtree_insert(map, &rbtree_ctx->root, rbnode);
rbtree_ctx->cached_rbnode = rbnode;
}
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 74ef3da54536..afc92869cba4 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -632,9 +632,8 @@ void rbd_warn(struct rbd_device *rbd_dev, const char *fmt, ...)
static void rbd_dev_remove_parent(struct rbd_device *rbd_dev);

static int rbd_dev_refresh(struct rbd_device *rbd_dev);
-static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev);
-static int rbd_dev_header_info(struct rbd_device *rbd_dev);
-static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev);
+static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev,
+ struct rbd_image_header *header);
static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev,
u64 snap_id);
static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
@@ -995,15 +994,24 @@ static void rbd_init_layout(struct rbd_device *rbd_dev)
RCU_INIT_POINTER(rbd_dev->layout.pool_ns, NULL);
}

+static void rbd_image_header_cleanup(struct rbd_image_header *header)
+{
+ kfree(header->object_prefix);
+ ceph_put_snap_context(header->snapc);
+ kfree(header->snap_sizes);
+ kfree(header->snap_names);
+
+ memset(header, 0, sizeof(*header));
+}
+
/*
* Fill an rbd image header with information from the given format 1
* on-disk header.
*/
-static int rbd_header_from_disk(struct rbd_device *rbd_dev,
- struct rbd_image_header_ondisk *ondisk)
+static int rbd_header_from_disk(struct rbd_image_header *header,
+ struct rbd_image_header_ondisk *ondisk,
+ bool first_time)
{
- struct rbd_image_header *header = &rbd_dev->header;
- bool first_time = header->object_prefix == NULL;
struct ceph_snap_context *snapc;
char *object_prefix = NULL;
char *snap_names = NULL;
@@ -1070,11 +1078,6 @@ static int rbd_header_from_disk(struct rbd_device *rbd_dev,
if (first_time) {
header->object_prefix = object_prefix;
header->obj_order = ondisk->options.order;
- rbd_init_layout(rbd_dev);
- } else {
- ceph_put_snap_context(header->snapc);
- kfree(header->snap_names);
- kfree(header->snap_sizes);
}

/* The remaining fields always get updated (when we refresh) */
@@ -4860,7 +4863,9 @@ static int rbd_obj_read_sync(struct rbd_device *rbd_dev,
* return, the rbd_dev->header field will contain up-to-date
* information about the image.
*/
-static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev)
+static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev,
+ struct rbd_image_header *header,
+ bool first_time)
{
struct rbd_image_header_ondisk *ondisk = NULL;
u32 snap_count = 0;
@@ -4908,7 +4913,7 @@ static int rbd_dev_v1_header_info(struct rbd_device *rbd_dev)
snap_count = le32_to_cpu(ondisk->snap_count);
} while (snap_count != want_count);

- ret = rbd_header_from_disk(rbd_dev, ondisk);
+ ret = rbd_header_from_disk(header, ondisk, first_time);
out:
kfree(ondisk);

@@ -4932,39 +4937,6 @@ static void rbd_dev_update_size(struct rbd_device *rbd_dev)
}
}

-static int rbd_dev_refresh(struct rbd_device *rbd_dev)
-{
- u64 mapping_size;
- int ret;
-
- down_write(&rbd_dev->header_rwsem);
- mapping_size = rbd_dev->mapping.size;
-
- ret = rbd_dev_header_info(rbd_dev);
- if (ret)
- goto out;
-
- /*
- * If there is a parent, see if it has disappeared due to the
- * mapped image getting flattened.
- */
- if (rbd_dev->parent) {
- ret = rbd_dev_v2_parent_info(rbd_dev);
- if (ret)
- goto out;
- }
-
- rbd_assert(!rbd_is_snap(rbd_dev));
- rbd_dev->mapping.size = rbd_dev->header.image_size;
-
-out:
- up_write(&rbd_dev->header_rwsem);
- if (!ret && mapping_size != rbd_dev->mapping.size)
- rbd_dev_update_size(rbd_dev);
-
- return ret;
-}
-
static const struct blk_mq_ops rbd_mq_ops = {
.queue_rq = rbd_queue_rq,
};
@@ -5504,17 +5476,12 @@ static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id,
return 0;
}

-static int rbd_dev_v2_image_size(struct rbd_device *rbd_dev)
-{
- return _rbd_dev_v2_snap_size(rbd_dev, CEPH_NOSNAP,
- &rbd_dev->header.obj_order,
- &rbd_dev->header.image_size);
-}
-
-static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev)
+static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev,
+ char **pobject_prefix)
{
size_t size;
void *reply_buf;
+ char *object_prefix;
int ret;
void *p;

@@ -5532,16 +5499,16 @@ static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev)
goto out;

p = reply_buf;
- rbd_dev->header.object_prefix = ceph_extract_encoded_string(&p,
- p + ret, NULL, GFP_NOIO);
+ object_prefix = ceph_extract_encoded_string(&p, p + ret, NULL,
+ GFP_NOIO);
+ if (IS_ERR(object_prefix)) {
+ ret = PTR_ERR(object_prefix);
+ goto out;
+ }
ret = 0;

- if (IS_ERR(rbd_dev->header.object_prefix)) {
- ret = PTR_ERR(rbd_dev->header.object_prefix);
- rbd_dev->header.object_prefix = NULL;
- } else {
- dout(" object_prefix = %s\n", rbd_dev->header.object_prefix);
- }
+ *pobject_prefix = object_prefix;
+ dout(" object_prefix = %s\n", object_prefix);
out:
kfree(reply_buf);

@@ -5592,13 +5559,6 @@ static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id,
return 0;
}

-static int rbd_dev_v2_features(struct rbd_device *rbd_dev)
-{
- return _rbd_dev_v2_snap_features(rbd_dev, CEPH_NOSNAP,
- rbd_is_ro(rbd_dev),
- &rbd_dev->header.features);
-}
-
/*
* These are generic image flags, but since they are used only for
* object map, store them in rbd_dev->object_map_flags.
@@ -5635,6 +5595,14 @@ struct parent_image_info {
u64 overlap;
};

+static void rbd_parent_info_cleanup(struct parent_image_info *pii)
+{
+ kfree(pii->pool_ns);
+ kfree(pii->image_id);
+
+ memset(pii, 0, sizeof(*pii));
+}
+
/*
* The caller is responsible for @pii.
*/
@@ -5704,6 +5672,9 @@ static int __get_parent_info(struct rbd_device *rbd_dev,
if (pii->has_overlap)
ceph_decode_64_safe(&p, end, pii->overlap, e_inval);

+ dout("%s pool_id %llu pool_ns %s image_id %s snap_id %llu has_overlap %d overlap %llu\n",
+ __func__, pii->pool_id, pii->pool_ns, pii->image_id, pii->snap_id,
+ pii->has_overlap, pii->overlap);
return 0;

e_inval:
@@ -5742,14 +5713,17 @@ static int __get_parent_info_legacy(struct rbd_device *rbd_dev,
pii->has_overlap = true;
ceph_decode_64_safe(&p, end, pii->overlap, e_inval);

+ dout("%s pool_id %llu pool_ns %s image_id %s snap_id %llu has_overlap %d overlap %llu\n",
+ __func__, pii->pool_id, pii->pool_ns, pii->image_id, pii->snap_id,
+ pii->has_overlap, pii->overlap);
return 0;

e_inval:
return -EINVAL;
}

-static int get_parent_info(struct rbd_device *rbd_dev,
- struct parent_image_info *pii)
+static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev,
+ struct parent_image_info *pii)
{
struct page *req_page, *reply_page;
void *p;
@@ -5777,7 +5751,7 @@ static int get_parent_info(struct rbd_device *rbd_dev,
return ret;
}

-static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
+static int rbd_dev_setup_parent(struct rbd_device *rbd_dev)
{
struct rbd_spec *parent_spec;
struct parent_image_info pii = { 0 };
@@ -5787,37 +5761,12 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
if (!parent_spec)
return -ENOMEM;

- ret = get_parent_info(rbd_dev, &pii);
+ ret = rbd_dev_v2_parent_info(rbd_dev, &pii);
if (ret)
goto out_err;

- dout("%s pool_id %llu pool_ns %s image_id %s snap_id %llu has_overlap %d overlap %llu\n",
- __func__, pii.pool_id, pii.pool_ns, pii.image_id, pii.snap_id,
- pii.has_overlap, pii.overlap);
-
- if (pii.pool_id == CEPH_NOPOOL || !pii.has_overlap) {
- /*
- * Either the parent never existed, or we have
- * record of it but the image got flattened so it no
- * longer has a parent. When the parent of a
- * layered image disappears we immediately set the
- * overlap to 0. The effect of this is that all new
- * requests will be treated as if the image had no
- * parent.
- *
- * If !pii.has_overlap, the parent image spec is not
- * applicable. It's there to avoid duplication in each
- * snapshot record.
- */
- if (rbd_dev->parent_overlap) {
- rbd_dev->parent_overlap = 0;
- rbd_dev_parent_put(rbd_dev);
- pr_info("%s: clone image has been flattened\n",
- rbd_dev->disk->disk_name);
- }
-
+ if (pii.pool_id == CEPH_NOPOOL || !pii.has_overlap)
goto out; /* No parent? No problem. */
- }

/* The ceph file layout needs to fit pool id in 32 bits */

@@ -5829,58 +5778,46 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
}

/*
- * The parent won't change (except when the clone is
- * flattened, already handled that). So we only need to
- * record the parent spec we have not already done so.
+ * The parent won't change except when the clone is flattened,
+ * so we only need to record the parent image spec once.
*/
- if (!rbd_dev->parent_spec) {
- parent_spec->pool_id = pii.pool_id;
- if (pii.pool_ns && *pii.pool_ns) {
- parent_spec->pool_ns = pii.pool_ns;
- pii.pool_ns = NULL;
- }
- parent_spec->image_id = pii.image_id;
- pii.image_id = NULL;
- parent_spec->snap_id = pii.snap_id;
-
- rbd_dev->parent_spec = parent_spec;
- parent_spec = NULL; /* rbd_dev now owns this */
+ parent_spec->pool_id = pii.pool_id;
+ if (pii.pool_ns && *pii.pool_ns) {
+ parent_spec->pool_ns = pii.pool_ns;
+ pii.pool_ns = NULL;
}
+ parent_spec->image_id = pii.image_id;
+ pii.image_id = NULL;
+ parent_spec->snap_id = pii.snap_id;
+
+ rbd_assert(!rbd_dev->parent_spec);
+ rbd_dev->parent_spec = parent_spec;
+ parent_spec = NULL; /* rbd_dev now owns this */

/*
- * We always update the parent overlap. If it's zero we issue
- * a warning, as we will proceed as if there was no parent.
+ * Record the parent overlap. If it's zero, issue a warning as
+ * we will proceed as if there is no parent.
*/
- if (!pii.overlap) {
- if (parent_spec) {
- /* refresh, careful to warn just once */
- if (rbd_dev->parent_overlap)
- rbd_warn(rbd_dev,
- "clone now standalone (overlap became 0)");
- } else {
- /* initial probe */
- rbd_warn(rbd_dev, "clone is standalone (overlap 0)");
- }
- }
+ if (!pii.overlap)
+ rbd_warn(rbd_dev, "clone is standalone (overlap 0)");
rbd_dev->parent_overlap = pii.overlap;

out:
ret = 0;
out_err:
- kfree(pii.pool_ns);
- kfree(pii.image_id);
+ rbd_parent_info_cleanup(&pii);
rbd_spec_put(parent_spec);
return ret;
}

-static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev)
+static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev,
+ u64 *stripe_unit, u64 *stripe_count)
{
struct {
__le64 stripe_unit;
__le64 stripe_count;
} __attribute__ ((packed)) striping_info_buf = { 0 };
size_t size = sizeof (striping_info_buf);
- void *p;
int ret;

ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
@@ -5892,27 +5829,33 @@ static int rbd_dev_v2_striping_info(struct rbd_device *rbd_dev)
if (ret < size)
return -ERANGE;

- p = &striping_info_buf;
- rbd_dev->header.stripe_unit = ceph_decode_64(&p);
- rbd_dev->header.stripe_count = ceph_decode_64(&p);
+ *stripe_unit = le64_to_cpu(striping_info_buf.stripe_unit);
+ *stripe_count = le64_to_cpu(striping_info_buf.stripe_count);
+ dout(" stripe_unit = %llu stripe_count = %llu\n", *stripe_unit,
+ *stripe_count);
+
return 0;
}

-static int rbd_dev_v2_data_pool(struct rbd_device *rbd_dev)
+static int rbd_dev_v2_data_pool(struct rbd_device *rbd_dev, s64 *data_pool_id)
{
- __le64 data_pool_id;
+ __le64 data_pool_buf;
int ret;

ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
&rbd_dev->header_oloc, "get_data_pool",
- NULL, 0, &data_pool_id, sizeof(data_pool_id));
+ NULL, 0, &data_pool_buf,
+ sizeof(data_pool_buf));
+ dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
if (ret < 0)
return ret;
- if (ret < sizeof(data_pool_id))
+ if (ret < sizeof(data_pool_buf))
return -EBADMSG;

- rbd_dev->header.data_pool_id = le64_to_cpu(data_pool_id);
- WARN_ON(rbd_dev->header.data_pool_id == CEPH_NOPOOL);
+ *data_pool_id = le64_to_cpu(data_pool_buf);
+ dout(" data_pool_id = %lld\n", *data_pool_id);
+ WARN_ON(*data_pool_id == CEPH_NOPOOL);
+
return 0;
}

@@ -6104,7 +6047,8 @@ static int rbd_spec_fill_names(struct rbd_device *rbd_dev)
return ret;
}

-static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev)
+static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev,
+ struct ceph_snap_context **psnapc)
{
size_t size;
int ret;
@@ -6165,9 +6109,7 @@ static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev)
for (i = 0; i < snap_count; i++)
snapc->snaps[i] = ceph_decode_64(&p);

- ceph_put_snap_context(rbd_dev->header.snapc);
- rbd_dev->header.snapc = snapc;
-
+ *psnapc = snapc;
dout(" snap context seq = %llu, snap_count = %u\n",
(unsigned long long)seq, (unsigned int)snap_count);
out:
@@ -6216,38 +6158,42 @@ static const char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev,
return snap_name;
}

-static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev)
+static int rbd_dev_v2_header_info(struct rbd_device *rbd_dev,
+ struct rbd_image_header *header,
+ bool first_time)
{
- bool first_time = rbd_dev->header.object_prefix == NULL;
int ret;

- ret = rbd_dev_v2_image_size(rbd_dev);
+ ret = _rbd_dev_v2_snap_size(rbd_dev, CEPH_NOSNAP,
+ first_time ? &header->obj_order : NULL,
+ &header->image_size);
if (ret)
return ret;

if (first_time) {
- ret = rbd_dev_v2_header_onetime(rbd_dev);
+ ret = rbd_dev_v2_header_onetime(rbd_dev, header);
if (ret)
return ret;
}

- ret = rbd_dev_v2_snap_context(rbd_dev);
- if (ret && first_time) {
- kfree(rbd_dev->header.object_prefix);
- rbd_dev->header.object_prefix = NULL;
- }
+ ret = rbd_dev_v2_snap_context(rbd_dev, &header->snapc);
+ if (ret)
+ return ret;

- return ret;
+ return 0;
}

-static int rbd_dev_header_info(struct rbd_device *rbd_dev)
+static int rbd_dev_header_info(struct rbd_device *rbd_dev,
+ struct rbd_image_header *header,
+ bool first_time)
{
rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
+ rbd_assert(!header->object_prefix && !header->snapc);

if (rbd_dev->image_format == 1)
- return rbd_dev_v1_header_info(rbd_dev);
+ return rbd_dev_v1_header_info(rbd_dev, header, first_time);

- return rbd_dev_v2_header_info(rbd_dev);
+ return rbd_dev_v2_header_info(rbd_dev, header, first_time);
}

/*
@@ -6735,60 +6681,49 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev)
*/
static void rbd_dev_unprobe(struct rbd_device *rbd_dev)
{
- struct rbd_image_header *header;
-
rbd_dev_parent_put(rbd_dev);
rbd_object_map_free(rbd_dev);
rbd_dev_mapping_clear(rbd_dev);

/* Free dynamic fields from the header, then zero it out */

- header = &rbd_dev->header;
- ceph_put_snap_context(header->snapc);
- kfree(header->snap_sizes);
- kfree(header->snap_names);
- kfree(header->object_prefix);
- memset(header, 0, sizeof (*header));
+ rbd_image_header_cleanup(&rbd_dev->header);
}

-static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev)
+static int rbd_dev_v2_header_onetime(struct rbd_device *rbd_dev,
+ struct rbd_image_header *header)
{
int ret;

- ret = rbd_dev_v2_object_prefix(rbd_dev);
+ ret = rbd_dev_v2_object_prefix(rbd_dev, &header->object_prefix);
if (ret)
- goto out_err;
+ return ret;

/*
* Get the and check features for the image. Currently the
* features are assumed to never change.
*/
- ret = rbd_dev_v2_features(rbd_dev);
+ ret = _rbd_dev_v2_snap_features(rbd_dev, CEPH_NOSNAP,
+ rbd_is_ro(rbd_dev), &header->features);
if (ret)
- goto out_err;
+ return ret;

/* If the image supports fancy striping, get its parameters */

- if (rbd_dev->header.features & RBD_FEATURE_STRIPINGV2) {
- ret = rbd_dev_v2_striping_info(rbd_dev);
- if (ret < 0)
- goto out_err;
+ if (header->features & RBD_FEATURE_STRIPINGV2) {
+ ret = rbd_dev_v2_striping_info(rbd_dev, &header->stripe_unit,
+ &header->stripe_count);
+ if (ret)
+ return ret;
}

- if (rbd_dev->header.features & RBD_FEATURE_DATA_POOL) {
- ret = rbd_dev_v2_data_pool(rbd_dev);
+ if (header->features & RBD_FEATURE_DATA_POOL) {
+ ret = rbd_dev_v2_data_pool(rbd_dev, &header->data_pool_id);
if (ret)
- goto out_err;
+ return ret;
}

- rbd_init_layout(rbd_dev);
return 0;
-
-out_err:
- rbd_dev->header.features = 0;
- kfree(rbd_dev->header.object_prefix);
- rbd_dev->header.object_prefix = NULL;
- return ret;
}

/*
@@ -6983,13 +6918,15 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
if (!depth)
down_write(&rbd_dev->header_rwsem);

- ret = rbd_dev_header_info(rbd_dev);
+ ret = rbd_dev_header_info(rbd_dev, &rbd_dev->header, true);
if (ret) {
if (ret == -ENOENT && !need_watch)
rbd_print_dne(rbd_dev, false);
goto err_out_probe;
}

+ rbd_init_layout(rbd_dev);
+
/*
* If this image is the one being mapped, we have pool name and
* id, image name and id, and snap name - need to fill snap id.
@@ -7018,7 +6955,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
}

if (rbd_dev->header.features & RBD_FEATURE_LAYERING) {
- ret = rbd_dev_v2_parent_info(rbd_dev);
+ ret = rbd_dev_setup_parent(rbd_dev);
if (ret)
goto err_out_probe;
}
@@ -7044,6 +6981,107 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
return ret;
}

+static void rbd_dev_update_header(struct rbd_device *rbd_dev,
+ struct rbd_image_header *header)
+{
+ rbd_assert(rbd_image_format_valid(rbd_dev->image_format));
+ rbd_assert(rbd_dev->header.object_prefix); /* !first_time */
+
+ if (rbd_dev->header.image_size != header->image_size) {
+ rbd_dev->header.image_size = header->image_size;
+
+ if (!rbd_is_snap(rbd_dev)) {
+ rbd_dev->mapping.size = header->image_size;
+ rbd_dev_update_size(rbd_dev);
+ }
+ }
+
+ ceph_put_snap_context(rbd_dev->header.snapc);
+ rbd_dev->header.snapc = header->snapc;
+ header->snapc = NULL;
+
+ if (rbd_dev->image_format == 1) {
+ kfree(rbd_dev->header.snap_names);
+ rbd_dev->header.snap_names = header->snap_names;
+ header->snap_names = NULL;
+
+ kfree(rbd_dev->header.snap_sizes);
+ rbd_dev->header.snap_sizes = header->snap_sizes;
+ header->snap_sizes = NULL;
+ }
+}
+
+static void rbd_dev_update_parent(struct rbd_device *rbd_dev,
+ struct parent_image_info *pii)
+{
+ if (pii->pool_id == CEPH_NOPOOL || !pii->has_overlap) {
+ /*
+ * Either the parent never existed, or we have
+ * record of it but the image got flattened so it no
+ * longer has a parent. When the parent of a
+ * layered image disappears we immediately set the
+ * overlap to 0. The effect of this is that all new
+ * requests will be treated as if the image had no
+ * parent.
+ *
+ * If !pii.has_overlap, the parent image spec is not
+ * applicable. It's there to avoid duplication in each
+ * snapshot record.
+ */
+ if (rbd_dev->parent_overlap) {
+ rbd_dev->parent_overlap = 0;
+ rbd_dev_parent_put(rbd_dev);
+ pr_info("%s: clone has been flattened\n",
+ rbd_dev->disk->disk_name);
+ }
+ } else {
+ rbd_assert(rbd_dev->parent_spec);
+
+ /*
+ * Update the parent overlap. If it became zero, issue
+ * a warning as we will proceed as if there is no parent.
+ */
+ if (!pii->overlap && rbd_dev->parent_overlap)
+ rbd_warn(rbd_dev,
+ "clone has become standalone (overlap 0)");
+ rbd_dev->parent_overlap = pii->overlap;
+ }
+}
+
+static int rbd_dev_refresh(struct rbd_device *rbd_dev)
+{
+ struct rbd_image_header header = { 0 };
+ struct parent_image_info pii = { 0 };
+ int ret;
+
+ dout("%s rbd_dev %p\n", __func__, rbd_dev);
+
+ ret = rbd_dev_header_info(rbd_dev, &header, false);
+ if (ret)
+ goto out;
+
+ /*
+ * If there is a parent, see if it has disappeared due to the
+ * mapped image getting flattened.
+ */
+ if (rbd_dev->parent) {
+ ret = rbd_dev_v2_parent_info(rbd_dev, &pii);
+ if (ret)
+ goto out;
+ }
+
+ down_write(&rbd_dev->header_rwsem);
+ rbd_dev_update_header(rbd_dev, &header);
+ if (rbd_dev->parent)
+ rbd_dev_update_parent(rbd_dev, &pii);
+ up_write(&rbd_dev->header_rwsem);
+
+out:
+ rbd_parent_info_cleanup(&pii);
+ rbd_image_header_cleanup(&header);
+ return ret;
+}
+
static ssize_t do_rbd_add(struct bus_type *bus,
const char *buf,
size_t count)
diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c
index 60051c0cabea..e322a326546b 100644
--- a/drivers/firewire/sbp2.c
+++ b/drivers/firewire/sbp2.c
@@ -81,7 +81,8 @@ MODULE_PARM_DESC(exclusive_login, "Exclusive login to sbp2 device "
*
* - power condition
* Set the power condition field in the START STOP UNIT commands sent by
- * sd_mod on suspend, resume, and shutdown (if manage_start_stop is on).
+ * sd_mod on suspend, resume, and shutdown (if manage_system_start_stop or
+ * manage_runtime_start_stop is on).
* Some disks need this to spin down or to resume properly.
*
* - override internal blacklist
@@ -1517,8 +1518,10 @@ static int sbp2_scsi_slave_configure(struct scsi_device *sdev)

sdev->use_10_for_rw = 1;

- if (sbp2_param_exclusive_login)
- sdev->manage_start_stop = 1;
+ if (sbp2_param_exclusive_login) {
+ sdev->manage_system_start_stop = true;
+ sdev->manage_runtime_start_stop = true;
+ }

if (sdev->type == TYPE_ROM)
sdev->use_10_for_ms = 1;
diff --git a/drivers/gpio/gpio-aspeed.c b/drivers/gpio/gpio-aspeed.c
index 318a7d95a1a8..42d3e1cf7352 100644
--- a/drivers/gpio/gpio-aspeed.c
+++ b/drivers/gpio/gpio-aspeed.c
@@ -963,7 +963,7 @@ static int aspeed_gpio_set_config(struct gpio_chip *chip, unsigned int offset,
else if (param == PIN_CONFIG_BIAS_DISABLE ||
param == PIN_CONFIG_BIAS_PULL_DOWN ||
param == PIN_CONFIG_DRIVE_STRENGTH)
- return pinctrl_gpio_set_config(offset, config);
+ return pinctrl_gpio_set_config(chip->base + offset, config);
else if (param == PIN_CONFIG_DRIVE_OPEN_DRAIN ||
param == PIN_CONFIG_DRIVE_OPEN_SOURCE)
/* Return -ENOTSUPP to trigger emulation, as per datasheet */
diff --git a/drivers/gpio/gpio-pxa.c b/drivers/gpio/gpio-pxa.c
index 1198ab0305d0..b90357774dc0 100644
--- a/drivers/gpio/gpio-pxa.c
+++ b/drivers/gpio/gpio-pxa.c
@@ -243,6 +243,7 @@ static bool pxa_gpio_has_pinctrl(void)
switch (gpio_type) {
case PXA3XX_GPIO:
case MMP2_GPIO:
+ case MMP_GPIO:
return false;

default:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 5f5999cea7d2..92fa2faf63e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2179,7 +2179,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
adev->flags |= AMD_IS_PX;

if (!(adev->flags & AMD_IS_APU)) {
- parent = pci_upstream_bridge(adev->pdev);
+ parent = pcie_find_root_port(adev->pdev);
adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
}

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 18274ff5082a..339f1f5a0833 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -2344,14 +2344,62 @@ static int dm_late_init(void *handle)
return detect_mst_link_for_all_connectors(adev_to_drm(adev));
}

+static void resume_mst_branch_status(struct drm_dp_mst_topology_mgr *mgr)
+{
+ int ret;
+ u8 guid[16];
+ u64 tmp64;
+
+ mutex_lock(&mgr->lock);
+ if (!mgr->mst_primary)
+ goto out_fail;
+
+ if (drm_dp_read_dpcd_caps(mgr->aux, mgr->dpcd) < 0) {
+ drm_dbg_kms(mgr->dev, "dpcd read failed - undocked during suspend?\n");
+ goto out_fail;
+ }
+
+ ret = drm_dp_dpcd_writeb(mgr->aux, DP_MSTM_CTRL,
+ DP_MST_EN |
+ DP_UP_REQ_EN |
+ DP_UPSTREAM_IS_SRC);
+ if (ret < 0) {
+ drm_dbg_kms(mgr->dev, "mst write failed - undocked during suspend?\n");
+ goto out_fail;
+ }
+
+ /* Some hubs forget their guids after they resume */
+ ret = drm_dp_dpcd_read(mgr->aux, DP_GUID, guid, 16);
+ if (ret != 16) {
+ drm_dbg_kms(mgr->dev, "dpcd read failed - undocked during suspend?\n");
+ goto out_fail;
+ }
+
+ if (memchr_inv(guid, 0, 16) == NULL) {
+ tmp64 = get_jiffies_64();
+ memcpy(&guid[0], &tmp64, sizeof(u64));
+ memcpy(&guid[8], &tmp64, sizeof(u64));
+
+ ret = drm_dp_dpcd_write(mgr->aux, DP_GUID, guid, 16);
+
+ if (ret != 16) {
+ drm_dbg_kms(mgr->dev, "check mstb guid failed - undocked during suspend?\n");
+ goto out_fail;
+ }
+ }
+
+ memcpy(mgr->mst_primary->guid, guid, 16);
+
+out_fail:
+ mutex_unlock(&mgr->lock);
+}
+
static void s3_handle_mst(struct drm_device *dev, bool suspend)
{
struct amdgpu_dm_connector *aconnector;
struct drm_connector *connector;
struct drm_connector_list_iter iter;
struct drm_dp_mst_topology_mgr *mgr;
- int ret;
- bool need_hotplug = false;

drm_connector_list_iter_begin(dev, &iter);
drm_for_each_connector_iter(connector, &iter) {
@@ -2373,18 +2421,15 @@ static void s3_handle_mst(struct drm_device *dev, bool suspend)
if (!dp_is_lttpr_present(aconnector->dc_link))
dc_link_aux_try_to_configure_timeout(aconnector->dc_link->ddc, LINK_AUX_DEFAULT_TIMEOUT_PERIOD);

- ret = drm_dp_mst_topology_mgr_resume(mgr, true);
- if (ret < 0) {
- dm_helpers_dp_mst_stop_top_mgr(aconnector->dc_link->ctx,
- aconnector->dc_link);
- need_hotplug = true;
- }
+ /* TODO: move resume_mst_branch_status() into drm mst resume again
+ * once topology probing work is pulled out from mst resume into mst
+ * resume 2nd step. mst resume 2nd step should be called after old
+ * state getting restored (i.e. drm_atomic_helper_resume()).
+ */
+ resume_mst_branch_status(mgr);
}
}
drm_connector_list_iter_end(&iter);
-
- if (need_hotplug)
- drm_kms_helper_hotplug_event(dev);
}

static int amdgpu_dm_smu_write_watermarks_table(struct amdgpu_device *adev)
@@ -2773,7 +2818,8 @@ static int dm_resume(void *handle)
struct dm_atomic_state *dm_state = to_dm_atomic_state(dm->atomic_obj.state);
enum dc_connection_type new_connection_type = dc_connection_none;
struct dc_state *dc_state;
- int i, r, j;
+ int i, r, j, ret;
+ bool need_hotplug = false;

if (amdgpu_in_reset(adev)) {
dc_state = dm->cached_dc_state;
@@ -2871,7 +2917,7 @@ static int dm_resume(void *handle)
continue;

/*
- * this is the case when traversing through already created
+ * this is the case when traversing through already created end sink
* MST connectors, should be skipped
*/
if (aconnector && aconnector->mst_port)
@@ -2931,6 +2977,27 @@ static int dm_resume(void *handle)

dm->cached_state = NULL;

+ /* Do mst topology probing after resuming cached state*/
+ drm_connector_list_iter_begin(ddev, &iter);
+ drm_for_each_connector_iter(connector, &iter) {
+ aconnector = to_amdgpu_dm_connector(connector);
+ if (aconnector->dc_link->type != dc_connection_mst_branch ||
+ aconnector->mst_port)
+ continue;
+
+ ret = drm_dp_mst_topology_mgr_resume(&aconnector->mst_mgr, true);
+
+ if (ret < 0) {
+ dm_helpers_dp_mst_stop_top_mgr(aconnector->dc_link->ctx,
+ aconnector->dc_link);
+ need_hotplug = true;
+ }
+ }
+ drm_connector_list_iter_end(&iter);
+
+ if (need_hotplug)
+ drm_kms_helper_hotplug_event(ddev);
+
amdgpu_dm_irq_resume_late(adev);

amdgpu_dm_smu_write_watermarks_table(adev);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index 839a812e0da3..fbc4d706748b 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -2081,36 +2081,41 @@ static int sienna_cichlid_display_disable_memory_clock_switch(struct smu_context
return ret;
}

+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+
static int sienna_cichlid_update_pcie_parameters(struct smu_context *smu,
uint32_t pcie_gen_cap,
uint32_t pcie_width_cap)
{
struct smu_11_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context;
struct smu_11_0_pcie_table *pcie_table = &dpm_context->dpm_tables.pcie_table;
- u32 smu_pcie_arg;
+ uint8_t *table_member1, *table_member2;
+ uint32_t min_gen_speed, max_gen_speed;
+ uint32_t min_lane_width, max_lane_width;
+ uint32_t smu_pcie_arg;
int ret, i;

- /* PCIE gen speed and lane width override */
- if (!amdgpu_device_pcie_dynamic_switching_supported()) {
- if (pcie_table->pcie_gen[NUM_LINK_LEVELS - 1] < pcie_gen_cap)
- pcie_gen_cap = pcie_table->pcie_gen[NUM_LINK_LEVELS - 1];
+ GET_PPTABLE_MEMBER(PcieGenSpeed, &table_member1);
+ GET_PPTABLE_MEMBER(PcieLaneCount, &table_member2);

- if (pcie_table->pcie_lane[NUM_LINK_LEVELS - 1] < pcie_width_cap)
- pcie_width_cap = pcie_table->pcie_lane[NUM_LINK_LEVELS - 1];
+ min_gen_speed = MAX(0, table_member1[0]);
+ max_gen_speed = MIN(pcie_gen_cap, table_member1[1]);
+ min_gen_speed = min_gen_speed > max_gen_speed ?
+ max_gen_speed : min_gen_speed;
+ min_lane_width = MAX(1, table_member2[0]);
+ max_lane_width = MIN(pcie_width_cap, table_member2[1]);
+ min_lane_width = min_lane_width > max_lane_width ?
+ max_lane_width : min_lane_width;

- /* Force all levels to use the same settings */
- for (i = 0; i < NUM_LINK_LEVELS; i++) {
- pcie_table->pcie_gen[i] = pcie_gen_cap;
- pcie_table->pcie_lane[i] = pcie_width_cap;
- }
+ if (!amdgpu_device_pcie_dynamic_switching_supported()) {
+ pcie_table->pcie_gen[0] = max_gen_speed;
+ pcie_table->pcie_lane[0] = max_lane_width;
} else {
- for (i = 0; i < NUM_LINK_LEVELS; i++) {
- if (pcie_table->pcie_gen[i] > pcie_gen_cap)
- pcie_table->pcie_gen[i] = pcie_gen_cap;
- if (pcie_table->pcie_lane[i] > pcie_width_cap)
- pcie_table->pcie_lane[i] = pcie_width_cap;
- }
+ pcie_table->pcie_gen[0] = min_gen_speed;
+ pcie_table->pcie_lane[0] = min_lane_width;
}
+ pcie_table->pcie_gen[1] = max_gen_speed;
+ pcie_table->pcie_lane[1] = max_lane_width;

for (i = 0; i < NUM_LINK_LEVELS; i++) {
smu_pcie_arg = (i << 16 |
diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c
index 03691cdcfb8e..f7f7252d839e 100644
--- a/drivers/hid/hid-sony.c
+++ b/drivers/hid/hid-sony.c
@@ -3074,6 +3074,8 @@ static int sony_probe(struct hid_device *hdev, const struct hid_device_id *id)
return ret;

err:
+ usb_free_urb(sc->ghl_urb);
+
hid_hw_stop(hdev);
return ret;
}
diff --git a/drivers/hid/intel-ish-hid/ipc/pci-ish.c b/drivers/hid/intel-ish-hid/ipc/pci-ish.c
index 55cb25038e63..710fda5f19e1 100644
--- a/drivers/hid/intel-ish-hid/ipc/pci-ish.c
+++ b/drivers/hid/intel-ish-hid/ipc/pci-ish.c
@@ -133,6 +133,14 @@ static int enable_gpe(struct device *dev)
}
wakeup = &adev->wakeup;

+ /*
+ * Call acpi_disable_gpe(), so that reference count
+ * gpe_event_info->runtime_count doesn't overflow.
+ * When gpe_event_info->runtime_count = 0, the call
+ * to acpi_disable_gpe() simply return.
+ */
+ acpi_disable_gpe(wakeup->gpe_device, wakeup->gpe_number);
+
acpi_sts = acpi_enable_gpe(wakeup->gpe_device, wakeup->gpe_number);
if (ACPI_FAILURE(acpi_sts)) {
dev_err(dev, "enable ose_gpe failed\n");
diff --git a/drivers/hwmon/nzxt-smart2.c b/drivers/hwmon/nzxt-smart2.c
index 533f38b0b4e9..a8e72d8fd060 100644
--- a/drivers/hwmon/nzxt-smart2.c
+++ b/drivers/hwmon/nzxt-smart2.c
@@ -791,6 +791,8 @@ static const struct hid_device_id nzxt_smart2_hid_id_table[] = {
{ HID_USB_DEVICE(0x1e71, 0x2009) }, /* NZXT RGB & Fan Controller */
{ HID_USB_DEVICE(0x1e71, 0x200e) }, /* NZXT RGB & Fan Controller */
{ HID_USB_DEVICE(0x1e71, 0x2010) }, /* NZXT RGB & Fan Controller */
+ { HID_USB_DEVICE(0x1e71, 0x2011) }, /* NZXT RGB & Fan Controller (6 RGB) */
+ { HID_USB_DEVICE(0x1e71, 0x2019) }, /* NZXT RGB & Fan Controller (6 RGB) */
{},
};

diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index cfeb24d40d37..bb3d10099ba4 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -1430,6 +1430,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &idle_cpu_adl_l),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &idle_cpu_adl_n),
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &idle_cpu_spr),
+ X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &idle_cpu_spr),
X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl),
X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl),
X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &idle_cpu_bxt),
@@ -1862,6 +1863,7 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
skx_idle_state_table_update();
break;
case INTEL_FAM6_SAPPHIRERAPIDS_X:
+ case INTEL_FAM6_EMERALDRAPIDS_X:
spr_idle_state_table_update();
break;
case INTEL_FAM6_ALDERLAKE:
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 4632b1833381..0773ca7ace24 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -4936,7 +4936,7 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
int err = 0;
struct sockaddr *addr = (struct sockaddr *)&mc->addr;
struct net_device *ndev = NULL;
- struct ib_sa_multicast ib;
+ struct ib_sa_multicast ib = {};
enum ib_gid_type gid_type;
bool send_only;

diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c
index 7b68b3ea979f..f2fb2d8a6597 100644
--- a/drivers/infiniband/core/cma_configfs.c
+++ b/drivers/infiniband/core/cma_configfs.c
@@ -217,7 +217,7 @@ static int make_cma_ports(struct cma_dev_group *cma_dev_group,
return -ENOMEM;

for (i = 0; i < ports_num; i++) {
- char port_str[10];
+ char port_str[11];

ports[i].port_num = i + 1;
snprintf(port_str, sizeof(port_str), "%u", i + 1);
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 222733a83ddb..1adf20198afd 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -2501,6 +2501,7 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
},
[RDMA_NLDEV_CMD_SYS_SET] = {
.doit = nldev_set_sys_set_doit,
+ .flags = RDMA_NL_ADMIN_PERM,
},
[RDMA_NLDEV_CMD_STAT_SET] = {
.doit = nldev_stat_set_doit,
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index fa937cd26821..6fe825800494 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -535,7 +535,7 @@ static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr,
if (hdr->in_words * 4 != count)
return -EINVAL;

- if (count < method_elm->req_size + sizeof(hdr)) {
+ if (count < method_elm->req_size + sizeof(*hdr)) {
/*
* rdma-core v18 and v19 have a bug where they send DESTROY_CQ
* with a 16 byte write instead of 24. Old kernels didn't
diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c
index 24ee79aa2122..88f534cf690e 100644
--- a/drivers/infiniband/hw/mlx4/sysfs.c
+++ b/drivers/infiniband/hw/mlx4/sysfs.c
@@ -223,7 +223,7 @@ void del_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
static int add_port_entries(struct mlx4_ib_dev *device, int port_num)
{
int i;
- char buff[11];
+ char buff[12];
struct mlx4_ib_iov_port *port = NULL;
int ret = 0 ;
struct ib_port_attr attr;
diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c
index 5a13d902b064..1022cebd0a46 100644
--- a/drivers/infiniband/hw/mlx5/fs.c
+++ b/drivers/infiniband/hw/mlx5/fs.c
@@ -2471,8 +2471,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE)(
mlx5_steering_anchor_destroy_res(ft_prio);
put_flow_table:
put_flow_table(dev, ft_prio, true);
- mutex_unlock(&dev->flow_db->lock);
free_obj:
+ mutex_unlock(&dev->flow_db->lock);
kfree(obj);

return err;
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 3178df55c4d8..0baf3b5518b4 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -2074,7 +2074,7 @@ static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd)
case MLX5_IB_MMAP_DEVICE_MEM:
return "Device Memory";
default:
- return NULL;
+ return "Unknown";
}
}

diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c
index 552d8271e423..dc679c34ceef 100644
--- a/drivers/infiniband/sw/siw/siw_cm.c
+++ b/drivers/infiniband/sw/siw/siw_cm.c
@@ -973,6 +973,7 @@ static void siw_accept_newconn(struct siw_cep *cep)
siw_cep_put(cep);
new_cep->listen_cep = NULL;
if (rv) {
+ siw_cancel_mpatimer(new_cep);
siw_cep_set_free(new_cep);
goto error;
}
@@ -1097,9 +1098,12 @@ static void siw_cm_work_handler(struct work_struct *w)
/*
* Socket close before MPA request received.
*/
- siw_dbg_cep(cep, "no mpareq: drop listener\n");
- siw_cep_put(cep->listen_cep);
- cep->listen_cep = NULL;
+ if (cep->listen_cep) {
+ siw_dbg_cep(cep,
+ "no mpareq: drop listener\n");
+ siw_cep_put(cep->listen_cep);
+ cep->listen_cep = NULL;
+ }
}
}
release_cep = 1;
@@ -1222,7 +1226,11 @@ static void siw_cm_llp_data_ready(struct sock *sk)
if (!cep)
goto out;

- siw_dbg_cep(cep, "state: %d\n", cep->state);
+ siw_dbg_cep(cep, "cep state: %d, socket state %d\n",
+ cep->state, sk->sk_state);
+
+ if (sk->sk_state != TCP_ESTABLISHED)
+ goto out;

switch (cep->state) {
case SIW_EPSTATE_RDMA_MODE:
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index a7580c4855fe..c4dcef76e964 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -2789,7 +2789,6 @@ static int srp_abort(struct scsi_cmnd *scmnd)
u32 tag;
u16 ch_idx;
struct srp_rdma_ch *ch;
- int ret;

shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");

@@ -2803,19 +2802,14 @@ static int srp_abort(struct scsi_cmnd *scmnd)
shost_printk(KERN_ERR, target->scsi_host,
"Sending SRP abort for tag %#x\n", tag);
if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun,
- SRP_TSK_ABORT_TASK, NULL) == 0)
- ret = SUCCESS;
- else if (target->rport->state == SRP_RPORT_LOST)
- ret = FAST_IO_FAIL;
- else
- ret = FAILED;
- if (ret == SUCCESS) {
+ SRP_TSK_ABORT_TASK, NULL) == 0) {
srp_free_req(ch, req, scmnd, 0);
- scmnd->result = DID_ABORT << 16;
- scsi_done(scmnd);
+ return SUCCESS;
}
+ if (target->rport->state == SRP_RPORT_LOST)
+ return FAST_IO_FAIL;

- return ret;
+ return FAILED;
}

static int srp_reset_device(struct scsi_cmnd *scmnd)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index db33dc87f69e..8966f7d5aab6 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -1886,13 +1886,23 @@ static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
/* Get the leaf page size */
tg = __ffs(smmu_domain->domain.pgsize_bitmap);

+ num_pages = size >> tg;
+
/* Convert page size of 12,14,16 (log2) to 1,2,3 */
cmd->tlbi.tg = (tg - 10) / 2;

- /* Determine what level the granule is at */
- cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
-
- num_pages = size >> tg;
+ /*
+ * Determine what level the granule is at. For non-leaf, both
+ * io-pgtable and SVA pass a nominal last-level granule because
+ * they don't know what level(s) actually apply, so ignore that
+ * and leave TTL=0. However for various errata reasons we still
+ * want to use a range command, so avoid the SVA corner case
+ * where both scale and num could be 0 as well.
+ */
+ if (cmd->tlbi.leaf)
+ cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
+ else if ((num_pages & CMDQ_TLBI_RANGE_NUM_MAX) == 1)
+ num_pages++;
}

cmds.num = 0;
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index d4b5d20bd6dd..5c4f5aa8e87e 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -3163,13 +3163,6 @@ static int iommu_suspend(void)
struct intel_iommu *iommu = NULL;
unsigned long flag;

- for_each_active_iommu(iommu, drhd) {
- iommu->iommu_state = kcalloc(MAX_SR_DMAR_REGS, sizeof(u32),
- GFP_KERNEL);
- if (!iommu->iommu_state)
- goto nomem;
- }
-
iommu_flush_all();

for_each_active_iommu(iommu, drhd) {
@@ -3189,12 +3182,6 @@ static int iommu_suspend(void)
raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
}
return 0;
-
-nomem:
- for_each_active_iommu(iommu, drhd)
- kfree(iommu->iommu_state);
-
- return -ENOMEM;
}

static void iommu_resume(void)
@@ -3226,9 +3213,6 @@ static void iommu_resume(void)

raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
}
-
- for_each_active_iommu(iommu, drhd)
- kfree(iommu->iommu_state);
}

static struct syscore_ops iommu_syscore_ops = {
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index db9df7c3790c..c99cb715bd9a 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -595,7 +595,7 @@ struct intel_iommu {
struct iopf_queue *iopf_queue;
unsigned char iopfq_name[16];
struct q_inval *qi; /* Queued invalidation info */
- u32 *iommu_state; /* Store iommu states between suspend and resume.*/
+ u32 iommu_state[MAX_SR_DMAR_REGS]; /* Store iommu states between suspend and resume.*/

#ifdef CONFIG_IRQ_REMAP
struct ir_table *ir_table; /* Interrupt remapping info */
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 9673cd60c84f..0ba2a63a9538 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -223,7 +223,7 @@ struct mtk_iommu_data {
struct device *smicomm_dev;

struct mtk_iommu_bank_data *bank;
- struct mtk_iommu_domain *share_dom; /* For 2 HWs share pgtable */
+ struct mtk_iommu_domain *share_dom;

struct regmap *pericfg;
struct mutex mutex; /* Protect m4u_group/m4u_dom above */
@@ -579,8 +579,8 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom,
struct mtk_iommu_domain *share_dom = data->share_dom;
const struct mtk_iommu_iova_region *region;

- /* Always use share domain in sharing pgtable case */
- if (MTK_IOMMU_HAS_FLAG(data->plat_data, SHARE_PGTABLE) && share_dom) {
+ /* Share pgtable when 2 MM IOMMU share the pgtable or one IOMMU use multiple iova ranges */
+ if (share_dom) {
dom->iop = share_dom->iop;
dom->cfg = share_dom->cfg;
dom->domain.pgsize_bitmap = share_dom->cfg.pgsize_bitmap;
@@ -613,8 +613,7 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom,
/* Update our support page sizes bitmap */
dom->domain.pgsize_bitmap = dom->cfg.pgsize_bitmap;

- if (MTK_IOMMU_HAS_FLAG(data->plat_data, SHARE_PGTABLE))
- data->share_dom = dom;
+ data->share_dom = dom;

update_iova_region:
/* Update the iova region for this domain */
diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c
index aad8bc44459f..d94d60b52646 100644
--- a/drivers/leds/led-core.c
+++ b/drivers/leds/led-core.c
@@ -424,10 +424,6 @@ int led_compose_name(struct device *dev, struct led_init_data *init_data,

led_parse_fwnode_props(dev, fwnode, &props);

- /* We want to label LEDs that can produce full range of colors
- * as RGB, not multicolor */
- BUG_ON(props.color == LED_COLOR_ID_MULTI);
-
if (props.label) {
/*
* If init_data.devicename is NULL, then it indicates that
diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c
index 95b132b52f33..4abe1e2f8ad8 100644
--- a/drivers/md/dm-zoned-target.c
+++ b/drivers/md/dm-zoned-target.c
@@ -748,17 +748,16 @@ static int dmz_get_zoned_device(struct dm_target *ti, char *path,
/*
* Cleanup zoned device information.
*/
-static void dmz_put_zoned_device(struct dm_target *ti)
+static void dmz_put_zoned_devices(struct dm_target *ti)
{
struct dmz_target *dmz = ti->private;
int i;

- for (i = 0; i < dmz->nr_ddevs; i++) {
- if (dmz->ddev[i]) {
+ for (i = 0; i < dmz->nr_ddevs; i++)
+ if (dmz->ddev[i])
dm_put_device(ti, dmz->ddev[i]);
- dmz->ddev[i] = NULL;
- }
- }
+
+ kfree(dmz->ddev);
}

static int dmz_fixup_devices(struct dm_target *ti)
@@ -948,7 +947,7 @@ static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv)
err_meta:
dmz_dtr_metadata(dmz->metadata);
err_dev:
- dmz_put_zoned_device(ti);
+ dmz_put_zoned_devices(ti);
err:
kfree(dmz->dev);
kfree(dmz);
@@ -978,7 +977,7 @@ static void dmz_dtr(struct dm_target *ti)

bioset_exit(&dmz->bio_set);

- dmz_put_zoned_device(ti);
+ dmz_put_zoned_devices(ti);

mutex_destroy(&dmz->chunk_lock);

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index fbef3c9badb6..98d4e93efa31 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -854,6 +854,13 @@ struct stripe_head *raid5_get_active_stripe(struct r5conf *conf,

set_bit(R5_INACTIVE_BLOCKED, &conf->cache_state);
r5l_wake_reclaim(conf->log, 0);
+
+ /* release batch_last before wait to avoid risk of deadlock */
+ if (ctx && ctx->batch_last) {
+ raid5_release_stripe(ctx->batch_last);
+ ctx->batch_last = NULL;
+ }
+
wait_event_lock_irq(conf->wait_for_stripe,
is_inactive_blocked(conf, hash),
*(conf->hash_locks + hash));
diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
index 1662c12e24ad..6fbd77dc1d18 100644
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@@ -893,6 +893,13 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num,
return -EINVAL;
}

+ /* UBI cannot work on flashes with zero erasesize. */
+ if (!mtd->erasesize) {
+ pr_err("ubi: refuse attaching mtd%d - zero erasesize flash is not supported\n",
+ mtd->index);
+ return -EINVAL;
+ }
+
if (ubi_num == UBI_DEV_NUM_AUTO) {
/* Search for an empty slot in the @ubi_devices array */
for (ubi_num = 0; ubi_num < UBI_MAX_DEVICES; ubi_num++)
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index a73008b9e0b3..ba906dfab055 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -3012,14 +3012,16 @@ static void mv88e6xxx_hardware_reset(struct mv88e6xxx_chip *chip)
* from the wrong location resulting in the switch booting
* to wrong mode and inoperable.
*/
- mv88e6xxx_g1_wait_eeprom_done(chip);
+ if (chip->info->ops->get_eeprom)
+ mv88e6xxx_g2_eeprom_wait(chip);

gpiod_set_value_cansleep(gpiod, 1);
usleep_range(10000, 20000);
gpiod_set_value_cansleep(gpiod, 0);
usleep_range(10000, 20000);

- mv88e6xxx_g1_wait_eeprom_done(chip);
+ if (chip->info->ops->get_eeprom)
+ mv88e6xxx_g2_eeprom_wait(chip);
}
}

diff --git a/drivers/net/dsa/mv88e6xxx/global1.c b/drivers/net/dsa/mv88e6xxx/global1.c
index 5848112036b0..964928285782 100644
--- a/drivers/net/dsa/mv88e6xxx/global1.c
+++ b/drivers/net/dsa/mv88e6xxx/global1.c
@@ -75,37 +75,6 @@ static int mv88e6xxx_g1_wait_init_ready(struct mv88e6xxx_chip *chip)
return mv88e6xxx_g1_wait_bit(chip, MV88E6XXX_G1_STS, bit, 1);
}

-void mv88e6xxx_g1_wait_eeprom_done(struct mv88e6xxx_chip *chip)
-{
- const unsigned long timeout = jiffies + 1 * HZ;
- u16 val;
- int err;
-
- /* Wait up to 1 second for the switch to finish reading the
- * EEPROM.
- */
- while (time_before(jiffies, timeout)) {
- err = mv88e6xxx_g1_read(chip, MV88E6XXX_G1_STS, &val);
- if (err) {
- dev_err(chip->dev, "Error reading status");
- return;
- }
-
- /* If the switch is still resetting, it may not
- * respond on the bus, and so MDIO read returns
- * 0xffff. Differentiate between that, and waiting for
- * the EEPROM to be done by bit 0 being set.
- */
- if (val != 0xffff &&
- val & BIT(MV88E6XXX_G1_STS_IRQ_EEPROM_DONE))
- return;
-
- usleep_range(1000, 2000);
- }
-
- dev_err(chip->dev, "Timeout waiting for EEPROM done");
-}
-
/* Offset 0x01: Switch MAC Address Register Bytes 0 & 1
* Offset 0x02: Switch MAC Address Register Bytes 2 & 3
* Offset 0x03: Switch MAC Address Register Bytes 4 & 5
diff --git a/drivers/net/dsa/mv88e6xxx/global1.h b/drivers/net/dsa/mv88e6xxx/global1.h
index 65958b2a0d3a..04b57a21f786 100644
--- a/drivers/net/dsa/mv88e6xxx/global1.h
+++ b/drivers/net/dsa/mv88e6xxx/global1.h
@@ -281,7 +281,6 @@ int mv88e6xxx_g1_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr);
int mv88e6185_g1_reset(struct mv88e6xxx_chip *chip);
int mv88e6352_g1_reset(struct mv88e6xxx_chip *chip);
int mv88e6250_g1_reset(struct mv88e6xxx_chip *chip);
-void mv88e6xxx_g1_wait_eeprom_done(struct mv88e6xxx_chip *chip);

int mv88e6185_g1_ppu_enable(struct mv88e6xxx_chip *chip);
int mv88e6185_g1_ppu_disable(struct mv88e6xxx_chip *chip);
diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c
index ec49939968fa..ac302a935ce6 100644
--- a/drivers/net/dsa/mv88e6xxx/global2.c
+++ b/drivers/net/dsa/mv88e6xxx/global2.c
@@ -340,7 +340,7 @@ int mv88e6xxx_g2_pot_clear(struct mv88e6xxx_chip *chip)
* Offset 0x15: EEPROM Addr (for 8-bit data access)
*/

-static int mv88e6xxx_g2_eeprom_wait(struct mv88e6xxx_chip *chip)
+int mv88e6xxx_g2_eeprom_wait(struct mv88e6xxx_chip *chip)
{
int bit = __bf_shf(MV88E6XXX_G2_EEPROM_CMD_BUSY);
int err;
diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h
index c05fad5c9f19..751a6c988de4 100644
--- a/drivers/net/dsa/mv88e6xxx/global2.h
+++ b/drivers/net/dsa/mv88e6xxx/global2.h
@@ -359,6 +359,7 @@ int mv88e6xxx_g2_trunk_clear(struct mv88e6xxx_chip *chip);

int mv88e6xxx_g2_device_mapping_write(struct mv88e6xxx_chip *chip, int target,
int port);
+int mv88e6xxx_g2_eeprom_wait(struct mv88e6xxx_chip *chip);

extern const struct mv88e6xxx_irq_ops mv88e6097_watchdog_ops;
extern const struct mv88e6xxx_irq_ops mv88e6250_watchdog_ops;
diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
index 0b4ec6e41eb4..1d21a281222d 100644
--- a/drivers/net/ethernet/ibm/ibmveth.c
+++ b/drivers/net/ethernet/ibm/ibmveth.c
@@ -1308,24 +1308,23 @@ static void ibmveth_rx_csum_helper(struct sk_buff *skb,
* the user space for finding a flow. During this process, OVS computes
* checksum on the first packet when CHECKSUM_PARTIAL flag is set.
*
- * So, re-compute TCP pseudo header checksum when configured for
- * trunk mode.
+ * So, re-compute TCP pseudo header checksum.
*/
+
if (iph_proto == IPPROTO_TCP) {
struct tcphdr *tcph = (struct tcphdr *)(skb->data + iphlen);
+
if (tcph->check == 0x0000) {
/* Recompute TCP pseudo header checksum */
- if (adapter->is_active_trunk) {
- tcphdrlen = skb->len - iphlen;
- if (skb_proto == ETH_P_IP)
- tcph->check =
- ~csum_tcpudp_magic(iph->saddr,
- iph->daddr, tcphdrlen, iph_proto, 0);
- else if (skb_proto == ETH_P_IPV6)
- tcph->check =
- ~csum_ipv6_magic(&iph6->saddr,
- &iph6->daddr, tcphdrlen, iph_proto, 0);
- }
+ tcphdrlen = skb->len - iphlen;
+ if (skb_proto == ETH_P_IP)
+ tcph->check =
+ ~csum_tcpudp_magic(iph->saddr,
+ iph->daddr, tcphdrlen, iph_proto, 0);
+ else if (skb_proto == ETH_P_IPV6)
+ tcph->check =
+ ~csum_ipv6_magic(&iph6->saddr,
+ &iph6->daddr, tcphdrlen, iph_proto, 0);
/* Setup SKB fields for checksum offload */
skb_partial_csum_set(skb, iphlen,
offsetof(struct tcphdr, check));
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
index ffea0c9c82f1..97a9efe7b713 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
@@ -361,9 +361,9 @@ static int i40e_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
1000000ULL << 16);

if (neg_adj)
- adj = I40E_PTP_40GB_INCVAL - diff;
+ adj = freq - diff;
else
- adj = I40E_PTP_40GB_INCVAL + diff;
+ adj = freq + diff;

wr32(hw, I40E_PRTTSYN_INC_L, adj & 0xFFFFFFFF);
wr32(hw, I40E_PRTTSYN_INC_H, adj >> 32);
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 0ac5ae16308f..17e6ac4445af 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -2862,8 +2862,8 @@ static irqreturn_t mtk_handle_irq_rx(int irq, void *_eth)

eth->rx_events++;
if (likely(napi_schedule_prep(&eth->rx_napi))) {
- __napi_schedule(&eth->rx_napi);
mtk_rx_irq_disable(eth, eth->soc->txrx.rx_irq_done_mask);
+ __napi_schedule(&eth->rx_napi);
}

return IRQ_HANDLED;
@@ -2875,8 +2875,8 @@ static irqreturn_t mtk_handle_irq_tx(int irq, void *_eth)

eth->tx_events++;
if (likely(napi_schedule_prep(&eth->tx_napi))) {
- __napi_schedule(&eth->tx_napi);
mtk_tx_irq_disable(eth, MTK_TX_DONE_INT);
+ __napi_schedule(&eth->tx_napi);
}

return IRQ_HANDLED;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.h b/drivers/net/ethernet/qlogic/qed/qed_ll2.h
index 0bfc375161ed..a174c6fc626a 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.h
@@ -110,9 +110,9 @@ struct qed_ll2_info {
enum core_tx_dest tx_dest;
u8 tx_stats_en;
bool main_func_queue;
+ struct qed_ll2_cbs cbs;
struct qed_ll2_rx_queue rx_queue;
struct qed_ll2_tx_queue tx_queue;
- struct qed_ll2_cbs cbs;
};

extern const struct qed_ll2_ops qed_ll2_ops_pass;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
index 2b38a499a404..533f5245ad94 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
@@ -105,6 +105,7 @@ struct stm32_ops {
int (*parse_data)(struct stm32_dwmac *dwmac,
struct device *dev);
u32 syscfg_eth_mask;
+ bool clk_rx_enable_in_suspend;
};

static int stm32_dwmac_init(struct plat_stmmacenet_data *plat_dat)
@@ -122,7 +123,8 @@ static int stm32_dwmac_init(struct plat_stmmacenet_data *plat_dat)
if (ret)
return ret;

- if (!dwmac->dev->power.is_suspended) {
+ if (!dwmac->ops->clk_rx_enable_in_suspend ||
+ !dwmac->dev->power.is_suspended) {
ret = clk_prepare_enable(dwmac->clk_rx);
if (ret) {
clk_disable_unprepare(dwmac->clk_tx);
@@ -515,7 +517,8 @@ static struct stm32_ops stm32mp1_dwmac_data = {
.suspend = stm32mp1_suspend,
.resume = stm32mp1_resume,
.parse_data = stm32mp1_parse_data,
- .syscfg_eth_mask = SYSCFG_MP1_ETH_MASK
+ .syscfg_eth_mask = SYSCFG_MP1_ETH_MASK,
+ .clk_rx_enable_in_suspend = true
};

static const struct of_device_id stm32_dwmac_match[] = {
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index 25466cbdc16b..9f2553799895 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -1614,6 +1614,7 @@ static int am65_cpsw_nuss_init_tx_chns(struct am65_cpsw_common *common)
if (tx_chn->irq <= 0) {
dev_err(dev, "Failed to get tx dma irq %d\n",
tx_chn->irq);
+ ret = tx_chn->irq ?: -ENXIO;
goto err;
}

diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c
index 5d6454fedb3f..78ad2da3ee29 100644
--- a/drivers/net/usb/smsc75xx.c
+++ b/drivers/net/usb/smsc75xx.c
@@ -90,7 +90,9 @@ static int __must_check __smsc75xx_read_reg(struct usbnet *dev, u32 index,
ret = fn(dev, USB_VENDOR_REQUEST_READ_REGISTER, USB_DIR_IN
| USB_TYPE_VENDOR | USB_RECIP_DEVICE,
0, index, &buf, 4);
- if (unlikely(ret < 0)) {
+ if (unlikely(ret < 4)) {
+ ret = ret < 0 ? ret : -ENODATA;
+
netdev_warn(dev->net, "Failed to read reg index 0x%08x: %d\n",
index, ret);
return ret;
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index f6dcec66f0a4..208df4d41939 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -664,7 +664,7 @@ static int vrf_finish_output6(struct net *net, struct sock *sk,
skb->protocol = htons(ETH_P_IPV6);
skb->dev = dev;

- rcu_read_lock_bh();
+ rcu_read_lock();
nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
if (unlikely(!neigh))
@@ -672,10 +672,10 @@ static int vrf_finish_output6(struct net *net, struct sock *sk,
if (!IS_ERR(neigh)) {
sock_confirm_neigh(skb, neigh);
ret = neigh_output(neigh, skb, false);
- rcu_read_unlock_bh();
+ rcu_read_unlock();
return ret;
}
- rcu_read_unlock_bh();
+ rcu_read_unlock();

IP6_INC_STATS(dev_net(dst->dev),
ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
@@ -889,7 +889,7 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
}
}

- rcu_read_lock_bh();
+ rcu_read_lock();

neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
if (!IS_ERR(neigh)) {
@@ -898,11 +898,11 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
sock_confirm_neigh(skb, neigh);
/* if crossing protocols, can not use the cached header */
ret = neigh_output(neigh, skb, is_v6gw);
- rcu_read_unlock_bh();
+ rcu_read_unlock();
return ret;
}

- rcu_read_unlock_bh();
+ rcu_read_unlock();
vrf_tx_error(skb->dev, skb);
return -EINVAL;
}
diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
index 0c3eb850fcb7..619dd71c9d75 100644
--- a/drivers/net/vxlan/vxlan_core.c
+++ b/drivers/net/vxlan/vxlan_core.c
@@ -1910,7 +1910,7 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
struct vxlan_fdb *f;
struct sk_buff *reply;

- if (!(n->nud_state & NUD_CONNECTED)) {
+ if (!(READ_ONCE(n->nud_state) & NUD_CONNECTED)) {
neigh_release(n);
goto out;
}
@@ -2074,7 +2074,7 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
struct vxlan_fdb *f;
struct sk_buff *reply;

- if (!(n->nud_state & NUD_CONNECTED)) {
+ if (!(READ_ONCE(n->nud_state) & NUD_CONNECTED)) {
neigh_release(n);
goto out;
}
diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c
index 1c53b5546927..5fec8abe8e1d 100644
--- a/drivers/net/wan/fsl_ucc_hdlc.c
+++ b/drivers/net/wan/fsl_ucc_hdlc.c
@@ -34,6 +34,8 @@
#define TDM_PPPOHT_SLIC_MAXIN
#define RX_BD_ERRORS (R_CD_S | R_OV_S | R_CR_S | R_AB_S | R_NO_S | R_LG_S)

+static int uhdlc_close(struct net_device *dev);
+
static struct ucc_tdm_info utdm_primary_info = {
.uf_info = {
.tsa = 0,
@@ -708,6 +710,7 @@ static int uhdlc_open(struct net_device *dev)
hdlc_device *hdlc = dev_to_hdlc(dev);
struct ucc_hdlc_private *priv = hdlc->priv;
struct ucc_tdm *utdm = priv->utdm;
+ int rc = 0;

if (priv->hdlc_busy != 1) {
if (request_irq(priv->ut_info->uf_info.irq,
@@ -731,10 +734,13 @@ static int uhdlc_open(struct net_device *dev)
napi_enable(&priv->napi);
netdev_reset_queue(dev);
netif_start_queue(dev);
- hdlc_open(dev);
+
+ rc = hdlc_open(dev);
+ if (rc)
+ uhdlc_close(dev);
}

- return 0;
+ return rc;
}

static void uhdlc_memclean(struct ucc_hdlc_private *priv)
@@ -824,6 +830,8 @@ static int uhdlc_close(struct net_device *dev)
netdev_reset_queue(dev);
priv->hdlc_busy = 0;

+ hdlc_close(dev);
+
return 0;
}

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/error-dump.h b/drivers/net/wireless/intel/iwlwifi/fw/error-dump.h
index c62576e442bd..2d481849a9c2 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/error-dump.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/error-dump.h
@@ -295,9 +295,9 @@ struct iwl_fw_ini_fifo_hdr {
struct iwl_fw_ini_error_dump_range {
__le32 range_data_size;
union {
- __le32 internal_base_addr;
- __le64 dram_base_addr;
- __le32 page_num;
+ __le32 internal_base_addr __packed;
+ __le64 dram_base_addr __packed;
+ __le32 page_num __packed;
struct iwl_fw_ini_fifo_hdr fifo_hdr;
struct iwl_cmd_header fw_pkt_hdr;
};
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
index 887d0789c96c..2e3c98eaa400 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
@@ -796,7 +796,7 @@ int iwl_run_init_mvm_ucode(struct iwl_mvm *mvm)
mvm->nvm_data->bands[0].n_channels = 1;
mvm->nvm_data->bands[0].n_bitrates = 1;
mvm->nvm_data->bands[0].bitrates =
- (void *)((u8 *)mvm->nvm_data->channels + 1);
+ (void *)(mvm->nvm_data->channels + 1);
mvm->nvm_data->bands[0].bitrates->hw_value = 10;
}

diff --git a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c
index a04b66284af4..7351acac6932 100644
--- a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c
+++ b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c
@@ -965,8 +965,8 @@ void mwifiex_11n_rxba_sync_event(struct mwifiex_private *priv,
}
}

- tlv_buf_left -= (sizeof(*tlv_rxba) + tlv_len);
- tmp = (u8 *)tlv_rxba + tlv_len + sizeof(*tlv_rxba);
+ tlv_buf_left -= (sizeof(tlv_rxba->header) + tlv_len);
+ tmp = (u8 *)tlv_rxba + sizeof(tlv_rxba->header) + tlv_len;
tlv_rxba = (struct mwifiex_ie_types_rxba_sync *)tmp;
}
}
diff --git a/drivers/net/wireless/marvell/mwifiex/sta_rx.c b/drivers/net/wireless/marvell/mwifiex/sta_rx.c
index 65420ad67416..257737137cd7 100644
--- a/drivers/net/wireless/marvell/mwifiex/sta_rx.c
+++ b/drivers/net/wireless/marvell/mwifiex/sta_rx.c
@@ -86,7 +86,8 @@ int mwifiex_process_rx_packet(struct mwifiex_private *priv,
rx_pkt_len = le16_to_cpu(local_rx_pd->rx_pkt_length);
rx_pkt_hdr = (void *)local_rx_pd + rx_pkt_off;

- if (sizeof(*rx_pkt_hdr) + rx_pkt_off > skb->len) {
+ if (sizeof(rx_pkt_hdr->eth803_hdr) + sizeof(rfc1042_header) +
+ rx_pkt_off > skb->len) {
mwifiex_dbg(priv->adapter, ERROR,
"wrong rx packet offset: len=%d, rx_pkt_off=%d\n",
skb->len, rx_pkt_off);
@@ -95,12 +96,13 @@ int mwifiex_process_rx_packet(struct mwifiex_private *priv,
return -1;
}

- if ((!memcmp(&rx_pkt_hdr->rfc1042_hdr, bridge_tunnel_header,
- sizeof(bridge_tunnel_header))) ||
- (!memcmp(&rx_pkt_hdr->rfc1042_hdr, rfc1042_header,
- sizeof(rfc1042_header)) &&
- ntohs(rx_pkt_hdr->rfc1042_hdr.snap_type) != ETH_P_AARP &&
- ntohs(rx_pkt_hdr->rfc1042_hdr.snap_type) != ETH_P_IPX)) {
+ if (sizeof(*rx_pkt_hdr) + rx_pkt_off <= skb->len &&
+ ((!memcmp(&rx_pkt_hdr->rfc1042_hdr, bridge_tunnel_header,
+ sizeof(bridge_tunnel_header))) ||
+ (!memcmp(&rx_pkt_hdr->rfc1042_hdr, rfc1042_header,
+ sizeof(rfc1042_header)) &&
+ ntohs(rx_pkt_hdr->rfc1042_hdr.snap_type) != ETH_P_AARP &&
+ ntohs(rx_pkt_hdr->rfc1042_hdr.snap_type) != ETH_P_IPX))) {
/*
* Replace the 803 header and rfc1042 header (llc/snap) with an
* EthernetII header, keep the src/dst and snap_type
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_eeprom.c b/drivers/net/wireless/mediatek/mt76/mt76x02_eeprom.c
index 0acabba2d1a5..5d402cf2951c 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_eeprom.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_eeprom.c
@@ -131,15 +131,8 @@ u8 mt76x02_get_lna_gain(struct mt76x02_dev *dev,
s8 *lna_2g, s8 *lna_5g,
struct ieee80211_channel *chan)
{
- u16 val;
u8 lna;

- val = mt76x02_eeprom_get(dev, MT_EE_NIC_CONF_1);
- if (val & MT_EE_NIC_CONF_1_LNA_EXT_2G)
- *lna_2g = 0;
- if (val & MT_EE_NIC_CONF_1_LNA_EXT_5G)
- memset(lna_5g, 0, sizeof(s8) * 3);
-
if (chan->band == NL80211_BAND_2GHZ)
lna = *lna_2g;
else if (chan->hw_value <= 64)
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt76x2/eeprom.c
index c57e05a5c65e..91807bf662dd 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2/eeprom.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2/eeprom.c
@@ -256,7 +256,8 @@ void mt76x2_read_rx_gain(struct mt76x02_dev *dev)
struct ieee80211_channel *chan = dev->mphy.chandef.chan;
int channel = chan->hw_value;
s8 lna_5g[3], lna_2g;
- u8 lna;
+ bool use_lna;
+ u8 lna = 0;
u16 val;

if (chan->band == NL80211_BAND_2GHZ)
@@ -275,7 +276,15 @@ void mt76x2_read_rx_gain(struct mt76x02_dev *dev)
dev->cal.rx.mcu_gain |= (lna_5g[1] & 0xff) << 16;
dev->cal.rx.mcu_gain |= (lna_5g[2] & 0xff) << 24;

- lna = mt76x02_get_lna_gain(dev, &lna_2g, lna_5g, chan);
+ val = mt76x02_eeprom_get(dev, MT_EE_NIC_CONF_1);
+ if (chan->band == NL80211_BAND_2GHZ)
+ use_lna = !(val & MT_EE_NIC_CONF_1_LNA_EXT_2G);
+ else
+ use_lna = !(val & MT_EE_NIC_CONF_1_LNA_EXT_5G);
+
+ if (use_lna)
+ lna = mt76x02_get_lna_gain(dev, &lna_2g, lna_5g, chan);
+
dev->cal.rx.lna_gain = mt76x02_sign_extend(lna, 8);
}
EXPORT_SYMBOL_GPL(mt76x2_read_rx_gain);
diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c
index 95501b77ef31..0fbf331a748f 100644
--- a/drivers/of/dynamic.c
+++ b/drivers/of/dynamic.c
@@ -902,13 +902,13 @@ int of_changeset_action(struct of_changeset *ocs, unsigned long action,
{
struct of_changeset_entry *ce;

+ if (WARN_ON(action >= ARRAY_SIZE(action_names)))
+ return -EINVAL;
+
ce = kzalloc(sizeof(*ce), GFP_KERNEL);
if (!ce)
return -ENOMEM;

- if (WARN_ON(action >= ARRAY_SIZE(action_names)))
- return -EINVAL;
-
/* get a reference to the node */
ce->action = action;
ce->np = of_node_get(np);
diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
index d24712a76ba7..0ccd92faf078 100644
--- a/drivers/pci/controller/dwc/pcie-qcom.c
+++ b/drivers/pci/controller/dwc/pcie-qcom.c
@@ -40,7 +40,6 @@
#define PARF_PHY_REFCLK 0x4c
#define PARF_CONFIG_BITS 0x50
#define PARF_DBI_BASE_ADDR 0x168
-#define PARF_SLV_ADDR_SPACE_SIZE_2_3_3 0x16c /* Register offset specific to IP ver 2.3.3 */
#define PARF_MHI_CLOCK_RESET_CTRL 0x174
#define PARF_AXI_MSTR_WR_ADDR_HALT 0x178
#define PARF_AXI_MSTR_WR_ADDR_HALT_V2 0x1a8
@@ -1148,8 +1147,7 @@ static int qcom_pcie_post_init_2_3_3(struct qcom_pcie *pcie)
u16 offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
u32 val;

- writel(SLV_ADDR_SPACE_SZ,
- pcie->parf + PARF_SLV_ADDR_SPACE_SIZE_2_3_3);
+ writel(SLV_ADDR_SPACE_SZ, pcie->parf + PARF_SLV_ADDR_SPACE_SIZE);

val = readl(pcie->parf + PARF_PHY_CTRL);
val &= ~BIT(0);
diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c
index a48d9b7d2921..8fee9b330b61 100644
--- a/drivers/ptp/ptp_ocp.c
+++ b/drivers/ptp/ptp_ocp.c
@@ -3532,7 +3532,6 @@ ptp_ocp_device_init(struct ptp_ocp *bp, struct pci_dev *pdev)
return 0;

out:
- ptp_ocp_dev_release(&bp->dev);
put_device(&bp->dev);
return err;
}
diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 351f0fd225b1..f6a95f72af18 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -5543,6 +5543,8 @@ regulator_register(struct device *dev,
goto rinse;
}
device_initialize(&rdev->dev);
+ dev_set_drvdata(&rdev->dev, rdev);
+ rdev->dev.class = &regulator_class;
spin_lock_init(&rdev->err_lock);

/*
@@ -5604,11 +5606,9 @@ regulator_register(struct device *dev,
rdev->supply_name = regulator_desc->supply_name;

/* register with sysfs */
- rdev->dev.class = &regulator_class;
rdev->dev.parent = config->dev;
dev_set_name(&rdev->dev, "regulator.%lu",
(unsigned long) atomic_inc_return(&regulator_no));
- dev_set_drvdata(&rdev->dev, rdev);

/* set regulator constraints */
if (init_data)
diff --git a/drivers/regulator/mt6358-regulator.c b/drivers/regulator/mt6358-regulator.c
index 8a5ce990f1bf..a0441b808671 100644
--- a/drivers/regulator/mt6358-regulator.c
+++ b/drivers/regulator/mt6358-regulator.c
@@ -35,19 +35,19 @@ struct mt6358_regulator_info {
};

#define MT6358_BUCK(match, vreg, min, max, step, \
- volt_ranges, vosel_mask, _da_vsel_reg, _da_vsel_mask, \
+ vosel_mask, _da_vsel_reg, _da_vsel_mask, \
_modeset_reg, _modeset_shift) \
[MT6358_ID_##vreg] = { \
.desc = { \
.name = #vreg, \
.of_match = of_match_ptr(match), \
- .ops = &mt6358_volt_range_ops, \
+ .ops = &mt6358_buck_ops, \
.type = REGULATOR_VOLTAGE, \
.id = MT6358_ID_##vreg, \
.owner = THIS_MODULE, \
.n_voltages = ((max) - (min)) / (step) + 1, \
- .linear_ranges = volt_ranges, \
- .n_linear_ranges = ARRAY_SIZE(volt_ranges), \
+ .min_uV = (min), \
+ .uV_step = (step), \
.vsel_reg = MT6358_BUCK_##vreg##_ELR0, \
.vsel_mask = vosel_mask, \
.enable_reg = MT6358_BUCK_##vreg##_CON0, \
@@ -87,7 +87,7 @@ struct mt6358_regulator_info {
}

#define MT6358_LDO1(match, vreg, min, max, step, \
- volt_ranges, _da_vsel_reg, _da_vsel_mask, \
+ _da_vsel_reg, _da_vsel_mask, \
vosel, vosel_mask) \
[MT6358_ID_##vreg] = { \
.desc = { \
@@ -98,8 +98,8 @@ struct mt6358_regulator_info {
.id = MT6358_ID_##vreg, \
.owner = THIS_MODULE, \
.n_voltages = ((max) - (min)) / (step) + 1, \
- .linear_ranges = volt_ranges, \
- .n_linear_ranges = ARRAY_SIZE(volt_ranges), \
+ .min_uV = (min), \
+ .uV_step = (step), \
.vsel_reg = vosel, \
.vsel_mask = vosel_mask, \
.enable_reg = MT6358_LDO_##vreg##_CON0, \
@@ -131,19 +131,19 @@ struct mt6358_regulator_info {
}

#define MT6366_BUCK(match, vreg, min, max, step, \
- volt_ranges, vosel_mask, _da_vsel_reg, _da_vsel_mask, \
+ vosel_mask, _da_vsel_reg, _da_vsel_mask, \
_modeset_reg, _modeset_shift) \
[MT6366_ID_##vreg] = { \
.desc = { \
.name = #vreg, \
.of_match = of_match_ptr(match), \
- .ops = &mt6358_volt_range_ops, \
+ .ops = &mt6358_buck_ops, \
.type = REGULATOR_VOLTAGE, \
.id = MT6366_ID_##vreg, \
.owner = THIS_MODULE, \
.n_voltages = ((max) - (min)) / (step) + 1, \
- .linear_ranges = volt_ranges, \
- .n_linear_ranges = ARRAY_SIZE(volt_ranges), \
+ .min_uV = (min), \
+ .uV_step = (step), \
.vsel_reg = MT6358_BUCK_##vreg##_ELR0, \
.vsel_mask = vosel_mask, \
.enable_reg = MT6358_BUCK_##vreg##_CON0, \
@@ -183,7 +183,7 @@ struct mt6358_regulator_info {
}

#define MT6366_LDO1(match, vreg, min, max, step, \
- volt_ranges, _da_vsel_reg, _da_vsel_mask, \
+ _da_vsel_reg, _da_vsel_mask, \
vosel, vosel_mask) \
[MT6366_ID_##vreg] = { \
.desc = { \
@@ -194,8 +194,8 @@ struct mt6358_regulator_info {
.id = MT6366_ID_##vreg, \
.owner = THIS_MODULE, \
.n_voltages = ((max) - (min)) / (step) + 1, \
- .linear_ranges = volt_ranges, \
- .n_linear_ranges = ARRAY_SIZE(volt_ranges), \
+ .min_uV = (min), \
+ .uV_step = (step), \
.vsel_reg = vosel, \
.vsel_mask = vosel_mask, \
.enable_reg = MT6358_LDO_##vreg##_CON0, \
@@ -226,21 +226,6 @@ struct mt6358_regulator_info {
.qi = BIT(15), \
}

-static const struct linear_range buck_volt_range1[] = {
- REGULATOR_LINEAR_RANGE(500000, 0, 0x7f, 6250),
-};
-
-static const struct linear_range buck_volt_range2[] = {
- REGULATOR_LINEAR_RANGE(500000, 0, 0x7f, 12500),
-};
-
-static const struct linear_range buck_volt_range3[] = {
- REGULATOR_LINEAR_RANGE(500000, 0, 0x3f, 50000),
-};
-
-static const struct linear_range buck_volt_range4[] = {
- REGULATOR_LINEAR_RANGE(1000000, 0, 0x7f, 12500),
-};

static const unsigned int vdram2_voltages[] = {
600000, 1800000,
@@ -463,9 +448,9 @@ static unsigned int mt6358_regulator_get_mode(struct regulator_dev *rdev)
}
}

-static const struct regulator_ops mt6358_volt_range_ops = {
- .list_voltage = regulator_list_voltage_linear_range,
- .map_voltage = regulator_map_voltage_linear_range,
+static const struct regulator_ops mt6358_buck_ops = {
+ .list_voltage = regulator_list_voltage_linear,
+ .map_voltage = regulator_map_voltage_linear,
.set_voltage_sel = regulator_set_voltage_sel_regmap,
.get_voltage_sel = mt6358_get_buck_voltage_sel,
.set_voltage_time_sel = regulator_set_voltage_time_sel,
@@ -477,6 +462,18 @@ static const struct regulator_ops mt6358_volt_range_ops = {
.get_mode = mt6358_regulator_get_mode,
};

+static const struct regulator_ops mt6358_volt_range_ops = {
+ .list_voltage = regulator_list_voltage_linear,
+ .map_voltage = regulator_map_voltage_linear,
+ .set_voltage_sel = regulator_set_voltage_sel_regmap,
+ .get_voltage_sel = mt6358_get_buck_voltage_sel,
+ .set_voltage_time_sel = regulator_set_voltage_time_sel,
+ .enable = regulator_enable_regmap,
+ .disable = regulator_disable_regmap,
+ .is_enabled = regulator_is_enabled_regmap,
+ .get_status = mt6358_get_status,
+};
+
static const struct regulator_ops mt6358_volt_table_ops = {
.list_voltage = regulator_list_voltage_table,
.map_voltage = regulator_map_voltage_iterate,
@@ -500,35 +497,23 @@ static const struct regulator_ops mt6358_volt_fixed_ops = {
/* The array is indexed by id(MT6358_ID_XXX) */
static struct mt6358_regulator_info mt6358_regulators[] = {
MT6358_BUCK("buck_vdram1", VDRAM1, 500000, 2087500, 12500,
- buck_volt_range2, 0x7f, MT6358_BUCK_VDRAM1_DBG0, 0x7f,
- MT6358_VDRAM1_ANA_CON0, 8),
+ 0x7f, MT6358_BUCK_VDRAM1_DBG0, 0x7f, MT6358_VDRAM1_ANA_CON0, 8),
MT6358_BUCK("buck_vcore", VCORE, 500000, 1293750, 6250,
- buck_volt_range1, 0x7f, MT6358_BUCK_VCORE_DBG0, 0x7f,
- MT6358_VCORE_VGPU_ANA_CON0, 1),
- MT6358_BUCK("buck_vcore_sshub", VCORE_SSHUB, 500000, 1293750, 6250,
- buck_volt_range1, 0x7f, MT6358_BUCK_VCORE_SSHUB_ELR0, 0x7f,
- MT6358_VCORE_VGPU_ANA_CON0, 1),
+ 0x7f, MT6358_BUCK_VCORE_DBG0, 0x7f, MT6358_VCORE_VGPU_ANA_CON0, 1),
MT6358_BUCK("buck_vpa", VPA, 500000, 3650000, 50000,
- buck_volt_range3, 0x3f, MT6358_BUCK_VPA_DBG0, 0x3f,
- MT6358_VPA_ANA_CON0, 3),
+ 0x3f, MT6358_BUCK_VPA_DBG0, 0x3f, MT6358_VPA_ANA_CON0, 3),
MT6358_BUCK("buck_vproc11", VPROC11, 500000, 1293750, 6250,
- buck_volt_range1, 0x7f, MT6358_BUCK_VPROC11_DBG0, 0x7f,
- MT6358_VPROC_ANA_CON0, 1),
+ 0x7f, MT6358_BUCK_VPROC11_DBG0, 0x7f, MT6358_VPROC_ANA_CON0, 1),
MT6358_BUCK("buck_vproc12", VPROC12, 500000, 1293750, 6250,
- buck_volt_range1, 0x7f, MT6358_BUCK_VPROC12_DBG0, 0x7f,
- MT6358_VPROC_ANA_CON0, 2),
+ 0x7f, MT6358_BUCK_VPROC12_DBG0, 0x7f, MT6358_VPROC_ANA_CON0, 2),
MT6358_BUCK("buck_vgpu", VGPU, 500000, 1293750, 6250,
- buck_volt_range1, 0x7f, MT6358_BUCK_VGPU_ELR0, 0x7f,
- MT6358_VCORE_VGPU_ANA_CON0, 2),
+ 0x7f, MT6358_BUCK_VGPU_ELR0, 0x7f, MT6358_VCORE_VGPU_ANA_CON0, 2),
MT6358_BUCK("buck_vs2", VS2, 500000, 2087500, 12500,
- buck_volt_range2, 0x7f, MT6358_BUCK_VS2_DBG0, 0x7f,
- MT6358_VS2_ANA_CON0, 8),
+ 0x7f, MT6358_BUCK_VS2_DBG0, 0x7f, MT6358_VS2_ANA_CON0, 8),
MT6358_BUCK("buck_vmodem", VMODEM, 500000, 1293750, 6250,
- buck_volt_range1, 0x7f, MT6358_BUCK_VMODEM_DBG0, 0x7f,
- MT6358_VMODEM_ANA_CON0, 8),
+ 0x7f, MT6358_BUCK_VMODEM_DBG0, 0x7f, MT6358_VMODEM_ANA_CON0, 8),
MT6358_BUCK("buck_vs1", VS1, 1000000, 2587500, 12500,
- buck_volt_range4, 0x7f, MT6358_BUCK_VS1_DBG0, 0x7f,
- MT6358_VS1_ANA_CON0, 8),
+ 0x7f, MT6358_BUCK_VS1_DBG0, 0x7f, MT6358_VS1_ANA_CON0, 8),
MT6358_REG_FIXED("ldo_vrf12", VRF12,
MT6358_LDO_VRF12_CON0, 0, 1200000),
MT6358_REG_FIXED("ldo_vio18", VIO18,
@@ -582,55 +567,35 @@ static struct mt6358_regulator_info mt6358_regulators[] = {
MT6358_LDO("ldo_vsim2", VSIM2, vsim_voltages, vsim_idx,
MT6358_LDO_VSIM2_CON0, 0, MT6358_VSIM2_ANA_CON0, 0xf00),
MT6358_LDO1("ldo_vsram_proc11", VSRAM_PROC11, 500000, 1293750, 6250,
- buck_volt_range1, MT6358_LDO_VSRAM_PROC11_DBG0, 0x7f00,
- MT6358_LDO_VSRAM_CON0, 0x7f),
+ MT6358_LDO_VSRAM_PROC11_DBG0, 0x7f00, MT6358_LDO_VSRAM_CON0, 0x7f),
MT6358_LDO1("ldo_vsram_others", VSRAM_OTHERS, 500000, 1293750, 6250,
- buck_volt_range1, MT6358_LDO_VSRAM_OTHERS_DBG0, 0x7f00,
- MT6358_LDO_VSRAM_CON2, 0x7f),
- MT6358_LDO1("ldo_vsram_others_sshub", VSRAM_OTHERS_SSHUB, 500000,
- 1293750, 6250, buck_volt_range1,
- MT6358_LDO_VSRAM_OTHERS_SSHUB_CON1, 0x7f,
- MT6358_LDO_VSRAM_OTHERS_SSHUB_CON1, 0x7f),
+ MT6358_LDO_VSRAM_OTHERS_DBG0, 0x7f00, MT6358_LDO_VSRAM_CON2, 0x7f),
MT6358_LDO1("ldo_vsram_gpu", VSRAM_GPU, 500000, 1293750, 6250,
- buck_volt_range1, MT6358_LDO_VSRAM_GPU_DBG0, 0x7f00,
- MT6358_LDO_VSRAM_CON3, 0x7f),
+ MT6358_LDO_VSRAM_GPU_DBG0, 0x7f00, MT6358_LDO_VSRAM_CON3, 0x7f),
MT6358_LDO1("ldo_vsram_proc12", VSRAM_PROC12, 500000, 1293750, 6250,
- buck_volt_range1, MT6358_LDO_VSRAM_PROC12_DBG0, 0x7f00,
- MT6358_LDO_VSRAM_CON1, 0x7f),
+ MT6358_LDO_VSRAM_PROC12_DBG0, 0x7f00, MT6358_LDO_VSRAM_CON1, 0x7f),
};

/* The array is indexed by id(MT6366_ID_XXX) */
static struct mt6358_regulator_info mt6366_regulators[] = {
MT6366_BUCK("buck_vdram1", VDRAM1, 500000, 2087500, 12500,
- buck_volt_range2, 0x7f, MT6358_BUCK_VDRAM1_DBG0, 0x7f,
- MT6358_VDRAM1_ANA_CON0, 8),
+ 0x7f, MT6358_BUCK_VDRAM1_DBG0, 0x7f, MT6358_VDRAM1_ANA_CON0, 8),
MT6366_BUCK("buck_vcore", VCORE, 500000, 1293750, 6250,
- buck_volt_range1, 0x7f, MT6358_BUCK_VCORE_DBG0, 0x7f,
- MT6358_VCORE_VGPU_ANA_CON0, 1),
- MT6366_BUCK("buck_vcore_sshub", VCORE_SSHUB, 500000, 1293750, 6250,
- buck_volt_range1, 0x7f, MT6358_BUCK_VCORE_SSHUB_ELR0, 0x7f,
- MT6358_VCORE_VGPU_ANA_CON0, 1),
+ 0x7f, MT6358_BUCK_VCORE_DBG0, 0x7f, MT6358_VCORE_VGPU_ANA_CON0, 1),
MT6366_BUCK("buck_vpa", VPA, 500000, 3650000, 50000,
- buck_volt_range3, 0x3f, MT6358_BUCK_VPA_DBG0, 0x3f,
- MT6358_VPA_ANA_CON0, 3),
+ 0x3f, MT6358_BUCK_VPA_DBG0, 0x3f, MT6358_VPA_ANA_CON0, 3),
MT6366_BUCK("buck_vproc11", VPROC11, 500000, 1293750, 6250,
- buck_volt_range1, 0x7f, MT6358_BUCK_VPROC11_DBG0, 0x7f,
- MT6358_VPROC_ANA_CON0, 1),
+ 0x7f, MT6358_BUCK_VPROC11_DBG0, 0x7f, MT6358_VPROC_ANA_CON0, 1),
MT6366_BUCK("buck_vproc12", VPROC12, 500000, 1293750, 6250,
- buck_volt_range1, 0x7f, MT6358_BUCK_VPROC12_DBG0, 0x7f,
- MT6358_VPROC_ANA_CON0, 2),
+ 0x7f, MT6358_BUCK_VPROC12_DBG0, 0x7f, MT6358_VPROC_ANA_CON0, 2),
MT6366_BUCK("buck_vgpu", VGPU, 500000, 1293750, 6250,
- buck_volt_range1, 0x7f, MT6358_BUCK_VGPU_ELR0, 0x7f,
- MT6358_VCORE_VGPU_ANA_CON0, 2),
+ 0x7f, MT6358_BUCK_VGPU_ELR0, 0x7f, MT6358_VCORE_VGPU_ANA_CON0, 2),
MT6366_BUCK("buck_vs2", VS2, 500000, 2087500, 12500,
- buck_volt_range2, 0x7f, MT6358_BUCK_VS2_DBG0, 0x7f,
- MT6358_VS2_ANA_CON0, 8),
+ 0x7f, MT6358_BUCK_VS2_DBG0, 0x7f, MT6358_VS2_ANA_CON0, 8),
MT6366_BUCK("buck_vmodem", VMODEM, 500000, 1293750, 6250,
- buck_volt_range1, 0x7f, MT6358_BUCK_VMODEM_DBG0, 0x7f,
- MT6358_VMODEM_ANA_CON0, 8),
+ 0x7f, MT6358_BUCK_VMODEM_DBG0, 0x7f, MT6358_VMODEM_ANA_CON0, 8),
MT6366_BUCK("buck_vs1", VS1, 1000000, 2587500, 12500,
- buck_volt_range4, 0x7f, MT6358_BUCK_VS1_DBG0, 0x7f,
- MT6358_VS1_ANA_CON0, 8),
+ 0x7f, MT6358_BUCK_VS1_DBG0, 0x7f, MT6358_VS1_ANA_CON0, 8),
MT6366_REG_FIXED("ldo_vrf12", VRF12,
MT6358_LDO_VRF12_CON0, 0, 1200000),
MT6366_REG_FIXED("ldo_vio18", VIO18,
@@ -673,21 +638,13 @@ static struct mt6358_regulator_info mt6366_regulators[] = {
MT6366_LDO("ldo_vsim2", VSIM2, vsim_voltages, vsim_idx,
MT6358_LDO_VSIM2_CON0, 0, MT6358_VSIM2_ANA_CON0, 0xf00),
MT6366_LDO1("ldo_vsram_proc11", VSRAM_PROC11, 500000, 1293750, 6250,
- buck_volt_range1, MT6358_LDO_VSRAM_PROC11_DBG0, 0x7f00,
- MT6358_LDO_VSRAM_CON0, 0x7f),
+ MT6358_LDO_VSRAM_PROC11_DBG0, 0x7f00, MT6358_LDO_VSRAM_CON0, 0x7f),
MT6366_LDO1("ldo_vsram_others", VSRAM_OTHERS, 500000, 1293750, 6250,
- buck_volt_range1, MT6358_LDO_VSRAM_OTHERS_DBG0, 0x7f00,
- MT6358_LDO_VSRAM_CON2, 0x7f),
- MT6366_LDO1("ldo_vsram_others_sshub", VSRAM_OTHERS_SSHUB, 500000,
- 1293750, 6250, buck_volt_range1,
- MT6358_LDO_VSRAM_OTHERS_SSHUB_CON1, 0x7f,
- MT6358_LDO_VSRAM_OTHERS_SSHUB_CON1, 0x7f),
+ MT6358_LDO_VSRAM_OTHERS_DBG0, 0x7f00, MT6358_LDO_VSRAM_CON2, 0x7f),
MT6366_LDO1("ldo_vsram_gpu", VSRAM_GPU, 500000, 1293750, 6250,
- buck_volt_range1, MT6358_LDO_VSRAM_GPU_DBG0, 0x7f00,
- MT6358_LDO_VSRAM_CON3, 0x7f),
+ MT6358_LDO_VSRAM_GPU_DBG0, 0x7f00, MT6358_LDO_VSRAM_CON3, 0x7f),
MT6366_LDO1("ldo_vsram_proc12", VSRAM_PROC12, 500000, 1293750, 6250,
- buck_volt_range1, MT6358_LDO_VSRAM_PROC12_DBG0, 0x7f00,
- MT6358_LDO_VSRAM_CON1, 0x7f),
+ MT6358_LDO_VSRAM_PROC12_DBG0, 0x7f00, MT6358_LDO_VSRAM_CON1, 0x7f),
};

static int mt6358_regulator_probe(struct platform_device *pdev)
diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c
index df782646e856..ab2f35bc294d 100644
--- a/drivers/s390/scsi/zfcp_aux.c
+++ b/drivers/s390/scsi/zfcp_aux.c
@@ -518,12 +518,12 @@ struct zfcp_port *zfcp_port_enqueue(struct zfcp_adapter *adapter, u64 wwpn,
if (port) {
put_device(&port->dev);
retval = -EEXIST;
- goto err_out;
+ goto err_put;
}

port = kzalloc(sizeof(struct zfcp_port), GFP_KERNEL);
if (!port)
- goto err_out;
+ goto err_put;

rwlock_init(&port->unit_list_lock);
INIT_LIST_HEAD(&port->unit_list);
@@ -546,7 +546,7 @@ struct zfcp_port *zfcp_port_enqueue(struct zfcp_adapter *adapter, u64 wwpn,

if (dev_set_name(&port->dev, "0x%016llx", (unsigned long long)wwpn)) {
kfree(port);
- goto err_out;
+ goto err_put;
}
retval = -EINVAL;

@@ -563,7 +563,8 @@ struct zfcp_port *zfcp_port_enqueue(struct zfcp_adapter *adapter, u64 wwpn,

return port;

-err_out:
+err_put:
zfcp_ccw_adapter_put(adapter);
+err_out:
return ERR_PTR(retval);
}
diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index 3f062e4013ab..013a9a334972 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -1451,7 +1451,7 @@ static void aac_handle_aif(struct aac_dev * dev, struct fib * fibptr)
#endif
break;
}
- scsi_rescan_device(&device->sdev_gendev);
+ scsi_rescan_device(device);
break;

default:
diff --git a/drivers/scsi/mvumi.c b/drivers/scsi/mvumi.c
index 05d3ce9b72db..c4acf65379d2 100644
--- a/drivers/scsi/mvumi.c
+++ b/drivers/scsi/mvumi.c
@@ -1500,7 +1500,7 @@ static void mvumi_rescan_devices(struct mvumi_hba *mhba, int id)

sdev = scsi_device_lookup(mhba->shost, 0, id, 0);
if (sdev) {
- scsi_rescan_device(&sdev->sdev_gendev);
+ scsi_rescan_device(sdev);
scsi_device_put(sdev);
}
}
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index fb6e9a7a7f58..d25e1c247253 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2445,7 +2445,7 @@ static void scsi_evt_emit(struct scsi_device *sdev, struct scsi_event *evt)
envp[idx++] = "SDEV_MEDIA_CHANGE=1";
break;
case SDEV_EVT_INQUIRY_CHANGE_REPORTED:
- scsi_rescan_device(&sdev->sdev_gendev);
+ scsi_rescan_device(sdev);
envp[idx++] = "SDEV_UA=INQUIRY_DATA_HAS_CHANGED";
break;
case SDEV_EVT_CAPACITY_CHANGE_REPORTED:
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index c52de9a973e4..b14545acb40f 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -132,7 +132,6 @@ extern int scsi_complete_async_scans(void);
extern int scsi_scan_host_selected(struct Scsi_Host *, unsigned int,
unsigned int, u64, enum scsi_scan_mode);
extern void scsi_forget_host(struct Scsi_Host *);
-extern void scsi_rescan_device(struct device *);

/* scsi_sysctl.c */
#ifdef CONFIG_SYSCTL
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index d12f2dcb4040..ed26c52ed847 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -1611,12 +1611,24 @@ int scsi_add_device(struct Scsi_Host *host, uint channel,
}
EXPORT_SYMBOL(scsi_add_device);

-void scsi_rescan_device(struct device *dev)
+int scsi_rescan_device(struct scsi_device *sdev)
{
- struct scsi_device *sdev = to_scsi_device(dev);
+ struct device *dev = &sdev->sdev_gendev;
+ int ret = 0;

device_lock(dev);

+ /*
+ * Bail out if the device is not running. Otherwise, the rescan may
+ * block waiting for commands to be executed, with us holding the
+ * device lock. This can result in a potential deadlock in the power
+ * management core code when system resume is on-going.
+ */
+ if (sdev->sdev_state != SDEV_RUNNING) {
+ ret = -EWOULDBLOCK;
+ goto unlock;
+ }
+
scsi_attach_vpd(sdev);

if (sdev->handler && sdev->handler->rescan)
@@ -1629,7 +1641,11 @@ void scsi_rescan_device(struct device *dev)
drv->rescan(dev);
module_put(dev->driver->owner);
}
+
+unlock:
device_unlock(dev);
+
+ return ret;
}
EXPORT_SYMBOL(scsi_rescan_device);

diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index cac7c902cf70..1f531063d633 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -762,7 +762,7 @@ static ssize_t
store_rescan_field (struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
- scsi_rescan_device(dev);
+ scsi_rescan_device(to_scsi_device(dev));
return count;
}
static DEVICE_ATTR(rescan, S_IWUSR, NULL, store_rescan_field);
@@ -855,7 +855,7 @@ store_state_field(struct device *dev, struct device_attribute *attr,
* waiting for pending I/O to finish.
*/
blk_mq_run_hw_queues(sdev->request_queue, true);
- scsi_rescan_device(dev);
+ scsi_rescan_device(sdev);
}

return ret == 0 ? count : -EINVAL;
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index e934779bf05c..30184f7b762c 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -213,18 +213,32 @@ cache_type_store(struct device *dev, struct device_attribute *attr,
}

static ssize_t
-manage_start_stop_show(struct device *dev, struct device_attribute *attr,
- char *buf)
+manage_start_stop_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct scsi_disk *sdkp = to_scsi_disk(dev);
struct scsi_device *sdp = sdkp->device;

- return sprintf(buf, "%u\n", sdp->manage_start_stop);
+ return sysfs_emit(buf, "%u\n",
+ sdp->manage_system_start_stop &&
+ sdp->manage_runtime_start_stop);
}
+static DEVICE_ATTR_RO(manage_start_stop);

static ssize_t
-manage_start_stop_store(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t count)
+manage_system_start_stop_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct scsi_disk *sdkp = to_scsi_disk(dev);
+ struct scsi_device *sdp = sdkp->device;
+
+ return sysfs_emit(buf, "%u\n", sdp->manage_system_start_stop);
+}
+
+static ssize_t
+manage_system_start_stop_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
{
struct scsi_disk *sdkp = to_scsi_disk(dev);
struct scsi_device *sdp = sdkp->device;
@@ -236,11 +250,42 @@ manage_start_stop_store(struct device *dev, struct device_attribute *attr,
if (kstrtobool(buf, &v))
return -EINVAL;

- sdp->manage_start_stop = v;
+ sdp->manage_system_start_stop = v;

return count;
}
-static DEVICE_ATTR_RW(manage_start_stop);
+static DEVICE_ATTR_RW(manage_system_start_stop);
+
+static ssize_t
+manage_runtime_start_stop_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct scsi_disk *sdkp = to_scsi_disk(dev);
+ struct scsi_device *sdp = sdkp->device;
+
+ return sysfs_emit(buf, "%u\n", sdp->manage_runtime_start_stop);
+}
+
+static ssize_t
+manage_runtime_start_stop_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct scsi_disk *sdkp = to_scsi_disk(dev);
+ struct scsi_device *sdp = sdkp->device;
+ bool v;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+
+ if (kstrtobool(buf, &v))
+ return -EINVAL;
+
+ sdp->manage_runtime_start_stop = v;
+
+ return count;
+}
+static DEVICE_ATTR_RW(manage_runtime_start_stop);

static ssize_t
allow_restart_show(struct device *dev, struct device_attribute *attr, char *buf)
@@ -572,6 +617,8 @@ static struct attribute *sd_disk_attrs[] = {
&dev_attr_FUA.attr,
&dev_attr_allow_restart.attr,
&dev_attr_manage_start_stop.attr,
+ &dev_attr_manage_system_start_stop.attr,
+ &dev_attr_manage_runtime_start_stop.attr,
&dev_attr_protection_type.attr,
&dev_attr_protection_mode.attr,
&dev_attr_app_tag_own.attr,
@@ -3579,7 +3626,8 @@ static int sd_remove(struct device *dev)

device_del(&sdkp->disk_dev);
del_gendisk(sdkp->disk);
- sd_shutdown(dev);
+ if (!sdkp->suspended)
+ sd_shutdown(dev);

put_disk(sdkp->disk);
return 0;
@@ -3652,13 +3700,20 @@ static void sd_shutdown(struct device *dev)
sd_sync_cache(sdkp, NULL);
}

- if (system_state != SYSTEM_RESTART && sdkp->device->manage_start_stop) {
+ if (system_state != SYSTEM_RESTART &&
+ sdkp->device->manage_system_start_stop) {
sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n");
sd_start_stop_device(sdkp, 0);
}
}

-static int sd_suspend_common(struct device *dev, bool ignore_stop_errors)
+static inline bool sd_do_start_stop(struct scsi_device *sdev, bool runtime)
+{
+ return (sdev->manage_system_start_stop && !runtime) ||
+ (sdev->manage_runtime_start_stop && runtime);
+}
+
+static int sd_suspend_common(struct device *dev, bool runtime)
{
struct scsi_disk *sdkp = dev_get_drvdata(dev);
struct scsi_sense_hdr sshdr;
@@ -3690,15 +3745,18 @@ static int sd_suspend_common(struct device *dev, bool ignore_stop_errors)
}
}

- if (sdkp->device->manage_start_stop) {
+ if (sd_do_start_stop(sdkp->device, runtime)) {
if (!sdkp->device->silence_suspend)
sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n");
/* an error is not worth aborting a system sleep */
ret = sd_start_stop_device(sdkp, 0);
- if (ignore_stop_errors)
+ if (!runtime)
ret = 0;
}

+ if (!ret)
+ sdkp->suspended = true;
+
return ret;
}

@@ -3707,29 +3765,37 @@ static int sd_suspend_system(struct device *dev)
if (pm_runtime_suspended(dev))
return 0;

- return sd_suspend_common(dev, true);
+ return sd_suspend_common(dev, false);
}

static int sd_suspend_runtime(struct device *dev)
{
- return sd_suspend_common(dev, false);
+ return sd_suspend_common(dev, true);
}

-static int sd_resume(struct device *dev)
+static int sd_resume(struct device *dev, bool runtime)
{
struct scsi_disk *sdkp = dev_get_drvdata(dev);
- int ret;
+ int ret = 0;

if (!sdkp) /* E.g.: runtime resume at the start of sd_probe() */
return 0;

- if (!sdkp->device->manage_start_stop)
+ if (!sd_do_start_stop(sdkp->device, runtime)) {
+ sdkp->suspended = false;
return 0;
+ }

- sd_printk(KERN_NOTICE, sdkp, "Starting disk\n");
- ret = sd_start_stop_device(sdkp, 1);
- if (!ret)
+ if (!sdkp->device->no_start_on_resume) {
+ sd_printk(KERN_NOTICE, sdkp, "Starting disk\n");
+ ret = sd_start_stop_device(sdkp, 1);
+ }
+
+ if (!ret) {
opal_unlock_from_suspend(sdkp->opal_dev);
+ sdkp->suspended = false;
+ }
+
return ret;
}

@@ -3738,7 +3804,7 @@ static int sd_resume_system(struct device *dev)
if (pm_runtime_suspended(dev))
return 0;

- return sd_resume(dev);
+ return sd_resume(dev, false);
}

static int sd_resume_runtime(struct device *dev)
@@ -3762,7 +3828,7 @@ static int sd_resume_runtime(struct device *dev)
"Failed to clear sense data\n");
}

- return sd_resume(dev);
+ return sd_resume(dev, true);
}

/**
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 5eea762f84d1..409dda5350d1 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -131,6 +131,7 @@ struct scsi_disk {
u8 provisioning_mode;
u8 zeroing_mode;
u8 nr_actuators; /* Number of actuators */
+ bool suspended; /* Disk is suspended (stopped) */
unsigned ATO : 1; /* state of disk ATO bit */
unsigned cache_override : 1; /* temp override of WCE,RCD */
unsigned WCE : 1; /* state of disk WCE bit */
diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c
index 9f0f69c1ed66..47d487729635 100644
--- a/drivers/scsi/smartpqi/smartpqi_init.c
+++ b/drivers/scsi/smartpqi/smartpqi_init.c
@@ -2278,7 +2278,7 @@ static void pqi_update_device_list(struct pqi_ctrl_info *ctrl_info,
device->advertised_queue_depth = device->queue_depth;
scsi_change_queue_depth(device->sdev, device->advertised_queue_depth);
if (device->rescan) {
- scsi_rescan_device(&device->sdev->sdev_gendev);
+ scsi_rescan_device(device->sdev);
device->rescan = false;
}
}
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 7a1dc5c7c49e..c2d981d5a2dd 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -471,7 +471,7 @@ static void storvsc_device_scan(struct work_struct *work)
sdev = scsi_device_lookup(wrk->host, 0, wrk->tgt_id, wrk->lun);
if (!sdev)
goto done;
- scsi_rescan_device(&sdev->sdev_gendev);
+ scsi_rescan_device(sdev);
scsi_device_put(sdev);

done:
diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index 2a79ab16134b..3f8c553f3d91 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -325,7 +325,7 @@ static void virtscsi_handle_param_change(struct virtio_scsi *vscsi,
/* Handle "Parameters changed", "Mode parameters changed", and
"Capacity data has changed". */
if (asc == 0x2a && (ascq == 0x00 || ascq == 0x01 || ascq == 0x09))
- scsi_rescan_device(&sdev->sdev_gendev);
+ scsi_rescan_device(sdev);

scsi_device_put(sdev);
}
diff --git a/drivers/spi/spi-zynqmp-gqspi.c b/drivers/spi/spi-zynqmp-gqspi.c
index c760aac070e5..3b56d5e7080e 100644
--- a/drivers/spi/spi-zynqmp-gqspi.c
+++ b/drivers/spi/spi-zynqmp-gqspi.c
@@ -1218,9 +1218,9 @@ static int zynqmp_qspi_probe(struct platform_device *pdev)
return 0;

clk_dis_all:
- pm_runtime_put_sync(&pdev->dev);
- pm_runtime_set_suspended(&pdev->dev);
pm_runtime_disable(&pdev->dev);
+ pm_runtime_put_noidle(&pdev->dev);
+ pm_runtime_set_suspended(&pdev->dev);
clk_disable_unprepare(xqspi->refclk);
clk_dis_pclk:
clk_disable_unprepare(xqspi->pclk);
@@ -1244,11 +1244,15 @@ static int zynqmp_qspi_remove(struct platform_device *pdev)
{
struct zynqmp_qspi *xqspi = platform_get_drvdata(pdev);

+ pm_runtime_get_sync(&pdev->dev);
+
zynqmp_gqspi_write(xqspi, GQSPI_EN_OFST, 0x0);
+
+ pm_runtime_disable(&pdev->dev);
+ pm_runtime_put_noidle(&pdev->dev);
+ pm_runtime_set_suspended(&pdev->dev);
clk_disable_unprepare(xqspi->refclk);
clk_disable_unprepare(xqspi->pclk);
- pm_runtime_set_suspended(&pdev->dev);
- pm_runtime_disable(&pdev->dev);

return 0;
}
diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index d21f88de197c..301fe376a120 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -883,7 +883,6 @@ sector_t target_to_linux_sector(struct se_device *dev, sector_t lb)
EXPORT_SYMBOL(target_to_linux_sector);

struct devices_idr_iter {
- struct config_item *prev_item;
int (*fn)(struct se_device *dev, void *data);
void *data;
};
@@ -893,11 +892,9 @@ static int target_devices_idr_iter(int id, void *p, void *data)
{
struct devices_idr_iter *iter = data;
struct se_device *dev = p;
+ struct config_item *item;
int ret;

- config_item_put(iter->prev_item);
- iter->prev_item = NULL;
-
/*
* We add the device early to the idr, so it can be used
* by backend modules during configuration. We do not want
@@ -907,12 +904,13 @@ static int target_devices_idr_iter(int id, void *p, void *data)
if (!target_dev_configured(dev))
return 0;

- iter->prev_item = config_item_get_unless_zero(&dev->dev_group.cg_item);
- if (!iter->prev_item)
+ item = config_item_get_unless_zero(&dev->dev_group.cg_item);
+ if (!item)
return 0;
mutex_unlock(&device_mutex);

ret = iter->fn(dev, iter->data);
+ config_item_put(item);

mutex_lock(&device_mutex);
return ret;
@@ -935,7 +933,6 @@ int target_for_each_device(int (*fn)(struct se_device *dev, void *data),
mutex_lock(&device_mutex);
ret = idr_for_each(&devices_idr, target_devices_idr_iter, &iter);
mutex_unlock(&device_mutex);
- config_item_put(iter.prev_item);
return ret;
}

diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
index 33eb941fcf15..10bfc5f1c50d 100644
--- a/drivers/vhost/vringh.c
+++ b/drivers/vhost/vringh.c
@@ -123,8 +123,18 @@ static inline ssize_t vringh_iov_xfer(struct vringh *vrh,
done += partlen;
len -= partlen;
ptr += partlen;
+ iov->consumed += partlen;
+ iov->iov[iov->i].iov_len -= partlen;
+ iov->iov[iov->i].iov_base += partlen;

- vringh_kiov_advance(iov, partlen);
+ if (!iov->iov[iov->i].iov_len) {
+ /* Fix up old iov element then increment. */
+ iov->iov[iov->i].iov_len = iov->consumed;
+ iov->iov[iov->i].iov_base -= iov->consumed;
+
+ iov->consumed = 0;
+ iov->i++;
+ }
}
return done;
}
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index c443f04aaad7..80b46de14f41 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -33,6 +33,7 @@
#include <linux/slab.h>
#include <linux/irqnr.h>
#include <linux/pci.h>
+#include <linux/rcupdate.h>
#include <linux/spinlock.h>
#include <linux/cpuhotplug.h>
#include <linux/atomic.h>
@@ -96,6 +97,7 @@ enum xen_irq_type {
struct irq_info {
struct list_head list;
struct list_head eoi_list;
+ struct rcu_work rwork;
short refcnt;
u8 spurious_cnt;
u8 is_accounted;
@@ -145,23 +147,13 @@ const struct evtchn_ops *evtchn_ops;
*/
static DEFINE_MUTEX(irq_mapping_update_lock);

-/*
- * Lock protecting event handling loop against removing event channels.
- * Adding of event channels is no issue as the associated IRQ becomes active
- * only after everything is setup (before request_[threaded_]irq() the handler
- * can't be entered for an event, as the event channel will be unmasked only
- * then).
- */
-static DEFINE_RWLOCK(evtchn_rwlock);
-
/*
* Lock hierarchy:
*
* irq_mapping_update_lock
- * evtchn_rwlock
- * IRQ-desc lock
- * percpu eoi_list_lock
- * irq_info->lock
+ * IRQ-desc lock
+ * percpu eoi_list_lock
+ * irq_info->lock
*/

static LIST_HEAD(xen_irq_list_head);
@@ -305,6 +297,22 @@ static void channels_on_cpu_inc(struct irq_info *info)
info->is_accounted = 1;
}

+static void delayed_free_irq(struct work_struct *work)
+{
+ struct irq_info *info = container_of(to_rcu_work(work), struct irq_info,
+ rwork);
+ unsigned int irq = info->irq;
+
+ /* Remove the info pointer only now, with no potential users left. */
+ set_info_for_irq(irq, NULL);
+
+ kfree(info);
+
+ /* Legacy IRQ descriptors are managed by the arch. */
+ if (irq >= nr_legacy_irqs())
+ irq_free_desc(irq);
+}
+
/* Constructors for packed IRQ information. */
static int xen_irq_info_common_setup(struct irq_info *info,
unsigned irq,
@@ -667,33 +675,36 @@ static void xen_irq_lateeoi_worker(struct work_struct *work)

eoi = container_of(to_delayed_work(work), struct lateeoi_work, delayed);

- read_lock_irqsave(&evtchn_rwlock, flags);
+ rcu_read_lock();

while (true) {
- spin_lock(&eoi->eoi_list_lock);
+ spin_lock_irqsave(&eoi->eoi_list_lock, flags);

info = list_first_entry_or_null(&eoi->eoi_list, struct irq_info,
eoi_list);

- if (info == NULL || now < info->eoi_time) {
- spin_unlock(&eoi->eoi_list_lock);
+ if (info == NULL)
+ break;
+
+ if (now < info->eoi_time) {
+ mod_delayed_work_on(info->eoi_cpu, system_wq,
+ &eoi->delayed,
+ info->eoi_time - now);
break;
}

list_del_init(&info->eoi_list);

- spin_unlock(&eoi->eoi_list_lock);
+ spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);

info->eoi_time = 0;

xen_irq_lateeoi_locked(info, false);
}

- if (info)
- mod_delayed_work_on(info->eoi_cpu, system_wq,
- &eoi->delayed, info->eoi_time - now);
+ spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);

- read_unlock_irqrestore(&evtchn_rwlock, flags);
+ rcu_read_unlock();
}

static void xen_cpu_init_eoi(unsigned int cpu)
@@ -708,16 +719,15 @@ static void xen_cpu_init_eoi(unsigned int cpu)
void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags)
{
struct irq_info *info;
- unsigned long flags;

- read_lock_irqsave(&evtchn_rwlock, flags);
+ rcu_read_lock();

info = info_for_irq(irq);

if (info)
xen_irq_lateeoi_locked(info, eoi_flags & XEN_EOI_FLAG_SPURIOUS);

- read_unlock_irqrestore(&evtchn_rwlock, flags);
+ rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(xen_irq_lateeoi);

@@ -731,6 +741,7 @@ static void xen_irq_init(unsigned irq)

info->type = IRQT_UNBOUND;
info->refcnt = -1;
+ INIT_RCU_WORK(&info->rwork, delayed_free_irq);

set_info_for_irq(irq, info);
/*
@@ -788,31 +799,18 @@ static int __must_check xen_allocate_irq_gsi(unsigned gsi)
static void xen_free_irq(unsigned irq)
{
struct irq_info *info = info_for_irq(irq);
- unsigned long flags;

if (WARN_ON(!info))
return;

- write_lock_irqsave(&evtchn_rwlock, flags);
-
if (!list_empty(&info->eoi_list))
lateeoi_list_del(info);

list_del(&info->list);

- set_info_for_irq(irq, NULL);
-
WARN_ON(info->refcnt > 0);

- write_unlock_irqrestore(&evtchn_rwlock, flags);
-
- kfree(info);
-
- /* Legacy IRQ descriptors are managed by the arch. */
- if (irq < nr_legacy_irqs())
- return;
-
- irq_free_desc(irq);
+ queue_rcu_work(system_wq, &info->rwork);
}

static void xen_evtchn_close(evtchn_port_t port)
@@ -1716,7 +1714,14 @@ static void __xen_evtchn_do_upcall(void)
int cpu = smp_processor_id();
struct evtchn_loop_ctrl ctrl = { 0 };

- read_lock(&evtchn_rwlock);
+ /*
+ * When closing an event channel the associated IRQ must not be freed
+ * until all cpus have left the event handling loop. This is ensured
+ * by taking the rcu_read_lock() while handling events, as freeing of
+ * the IRQ is handled via queue_rcu_work() _after_ closing the event
+ * channel.
+ */
+ rcu_read_lock();

do {
vcpu_info->evtchn_upcall_pending = 0;
@@ -1729,7 +1734,7 @@ static void __xen_evtchn_do_upcall(void)

} while (vcpu_info->evtchn_upcall_pending);

- read_unlock(&evtchn_rwlock);
+ rcu_read_unlock();

/*
* Increment irq_epoch only now to defer EOIs only for
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 3bcef0c4d6fc..27d06bb5e5c0 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -28,6 +28,7 @@
#include <linux/refcount.h>
#include <linux/crc32c.h>
#include <linux/iomap.h>
+#include <linux/fscrypt.h>
#include "extent-io-tree.h"
#include "extent_io.h"
#include "extent_map.h"
@@ -3238,11 +3239,11 @@ static inline void btrfs_clear_sb_rdonly(struct super_block *sb)

/* root-item.c */
int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
- u64 ref_id, u64 dirid, u64 sequence, const char *name,
- int name_len);
+ u64 ref_id, u64 dirid, u64 sequence,
+ const struct fscrypt_str *name);
int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
- u64 ref_id, u64 dirid, u64 *sequence, const char *name,
- int name_len);
+ u64 ref_id, u64 dirid, u64 *sequence,
+ const struct fscrypt_str *name);
int btrfs_del_root(struct btrfs_trans_handle *trans,
const struct btrfs_key *key);
int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
@@ -3271,25 +3272,23 @@ int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info);

/* dir-item.c */
int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
- const char *name, int name_len);
-int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, const char *name,
- int name_len, struct btrfs_inode *dir,
+ const struct fscrypt_str *name);
+int btrfs_insert_dir_item(struct btrfs_trans_handle *trans,
+ const struct fscrypt_str *name, struct btrfs_inode *dir,
struct btrfs_key *location, u8 type, u64 index);
struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, u64 dir,
- const char *name, int name_len,
- int mod);
+ const struct fscrypt_str *name, int mod);
struct btrfs_dir_item *
btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, u64 dir,
- u64 index, const char *name, int name_len,
- int mod);
+ u64 index, const struct fscrypt_str *name, int mod);
struct btrfs_dir_item *
btrfs_search_dir_index_item(struct btrfs_root *root,
struct btrfs_path *path, u64 dirid,
- const char *name, int name_len);
+ const struct fscrypt_str *name);
int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
@@ -3370,10 +3369,10 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry);
int btrfs_set_inode_index(struct btrfs_inode *dir, u64 *index);
int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
struct btrfs_inode *dir, struct btrfs_inode *inode,
- const char *name, int name_len);
+ const struct fscrypt_str *name);
int btrfs_add_link(struct btrfs_trans_handle *trans,
struct btrfs_inode *parent_inode, struct btrfs_inode *inode,
- const char *name, int name_len, int add_backref, u64 index);
+ const struct fscrypt_str *name, int add_backref, u64 index);
int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry);
int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len,
int front);
@@ -3398,6 +3397,7 @@ struct btrfs_new_inode_args {
*/
struct posix_acl *default_acl;
struct posix_acl *acl;
+ struct fscrypt_name fname;
};
int btrfs_new_inode_prepare(struct btrfs_new_inode_args *args,
unsigned int *trans_num_items);
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index 72fb2c518a2b..fdab48c1abb8 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -103,8 +103,8 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
* to use for the second index (if one is created).
* Will return 0 or -ENOMEM
*/
-int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, const char *name,
- int name_len, struct btrfs_inode *dir,
+int btrfs_insert_dir_item(struct btrfs_trans_handle *trans,
+ const struct fscrypt_str *name, struct btrfs_inode *dir,
struct btrfs_key *location, u8 type, u64 index)
{
int ret = 0;
@@ -120,7 +120,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, const char *name,

key.objectid = btrfs_ino(dir);
key.type = BTRFS_DIR_ITEM_KEY;
- key.offset = btrfs_name_hash(name, name_len);
+ key.offset = btrfs_name_hash(name->name, name->len);

path = btrfs_alloc_path();
if (!path)
@@ -128,9 +128,9 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, const char *name,

btrfs_cpu_key_to_disk(&disk_key, location);

- data_size = sizeof(*dir_item) + name_len;
+ data_size = sizeof(*dir_item) + name->len;
dir_item = insert_with_overflow(trans, root, path, &key, data_size,
- name, name_len);
+ name->name, name->len);
if (IS_ERR(dir_item)) {
ret = PTR_ERR(dir_item);
if (ret == -EEXIST)
@@ -142,11 +142,11 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, const char *name,
btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
btrfs_set_dir_type(leaf, dir_item, type);
btrfs_set_dir_data_len(leaf, dir_item, 0);
- btrfs_set_dir_name_len(leaf, dir_item, name_len);
+ btrfs_set_dir_name_len(leaf, dir_item, name->len);
btrfs_set_dir_transid(leaf, dir_item, trans->transid);
name_ptr = (unsigned long)(dir_item + 1);

- write_extent_buffer(leaf, name, name_ptr, name_len);
+ write_extent_buffer(leaf, name->name, name_ptr, name->len);
btrfs_mark_buffer_dirty(leaf);

second_insert:
@@ -157,7 +157,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, const char *name,
}
btrfs_release_path(path);

- ret2 = btrfs_insert_delayed_dir_index(trans, name, name_len, dir,
+ ret2 = btrfs_insert_delayed_dir_index(trans, name->name, name->len, dir,
&disk_key, type, index);
out_free:
btrfs_free_path(path);
@@ -206,7 +206,7 @@ static struct btrfs_dir_item *btrfs_lookup_match_dir(
struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, u64 dir,
- const char *name, int name_len,
+ const struct fscrypt_str *name,
int mod)
{
struct btrfs_key key;
@@ -214,9 +214,10 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,

key.objectid = dir;
key.type = BTRFS_DIR_ITEM_KEY;
- key.offset = btrfs_name_hash(name, name_len);
+ key.offset = btrfs_name_hash(name->name, name->len);

- di = btrfs_lookup_match_dir(trans, root, path, &key, name, name_len, mod);
+ di = btrfs_lookup_match_dir(trans, root, path, &key, name->name,
+ name->len, mod);
if (IS_ERR(di) && PTR_ERR(di) == -ENOENT)
return NULL;

@@ -224,7 +225,7 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
}

int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
- const char *name, int name_len)
+ const struct fscrypt_str *name)
{
int ret;
struct btrfs_key key;
@@ -240,9 +241,10 @@ int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,

key.objectid = dir;
key.type = BTRFS_DIR_ITEM_KEY;
- key.offset = btrfs_name_hash(name, name_len);
+ key.offset = btrfs_name_hash(name->name, name->len);

- di = btrfs_lookup_match_dir(NULL, root, path, &key, name, name_len, 0);
+ di = btrfs_lookup_match_dir(NULL, root, path, &key, name->name,
+ name->len, 0);
if (IS_ERR(di)) {
ret = PTR_ERR(di);
/* Nothing found, we're safe */
@@ -262,11 +264,8 @@ int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
goto out;
}

- /*
- * see if there is room in the item to insert this
- * name
- */
- data_size = sizeof(*di) + name_len;
+ /* See if there is room in the item to insert this name. */
+ data_size = sizeof(*di) + name->len;
leaf = path->nodes[0];
slot = path->slots[0];
if (data_size + btrfs_item_size(leaf, slot) +
@@ -303,8 +302,7 @@ struct btrfs_dir_item *
btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, u64 dir,
- u64 index, const char *name, int name_len,
- int mod)
+ u64 index, const struct fscrypt_str *name, int mod)
{
struct btrfs_dir_item *di;
struct btrfs_key key;
@@ -313,7 +311,8 @@ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans,
key.type = BTRFS_DIR_INDEX_KEY;
key.offset = index;

- di = btrfs_lookup_match_dir(trans, root, path, &key, name, name_len, mod);
+ di = btrfs_lookup_match_dir(trans, root, path, &key, name->name,
+ name->len, mod);
if (di == ERR_PTR(-ENOENT))
return NULL;

@@ -321,9 +320,8 @@ btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans,
}

struct btrfs_dir_item *
-btrfs_search_dir_index_item(struct btrfs_root *root,
- struct btrfs_path *path, u64 dirid,
- const char *name, int name_len)
+btrfs_search_dir_index_item(struct btrfs_root *root, struct btrfs_path *path,
+ u64 dirid, const struct fscrypt_str *name)
{
struct btrfs_dir_item *di;
struct btrfs_key key;
@@ -338,7 +336,7 @@ btrfs_search_dir_index_item(struct btrfs_root *root,
break;

di = btrfs_match_dir_item_name(root->fs_info, path,
- name, name_len);
+ name->name, name->len);
if (di)
return di;
}
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 77202addead8..0a46fff3dd06 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1458,8 +1458,13 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
if (iocb->ki_flags & IOCB_NOWAIT)
ilock_flags |= BTRFS_ILOCK_TRY;

- /* If the write DIO is within EOF, use a shared lock */
- if (iocb->ki_pos + iov_iter_count(from) <= i_size_read(inode))
+ /*
+ * If the write DIO is within EOF, use a shared lock and also only if
+ * security bits will likely not be dropped by file_remove_privs() called
+ * from btrfs_write_check(). Either will need to be rechecked after the
+ * lock was acquired.
+ */
+ if (iocb->ki_pos + iov_iter_count(from) <= i_size_read(inode) && IS_NOSEC(inode))
ilock_flags |= BTRFS_ILOCK_SHARED;

relock:
@@ -1467,6 +1472,13 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
if (err < 0)
return err;

+ /* Shared lock cannot be used with security bits set. */
+ if ((ilock_flags & BTRFS_ILOCK_SHARED) && !IS_NOSEC(inode)) {
+ btrfs_inode_unlock(inode, ilock_flags);
+ ilock_flags &= ~BTRFS_ILOCK_SHARED;
+ goto relock;
+ }
+
err = generic_write_checks(iocb, from);
if (err <= 0) {
btrfs_inode_unlock(inode, ilock_flags);
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c
index 0eeb5ea87894..5add022d3534 100644
--- a/fs/btrfs/inode-item.c
+++ b/fs/btrfs/inode-item.c
@@ -10,8 +10,8 @@
#include "print-tree.h"

struct btrfs_inode_ref *btrfs_find_name_in_backref(struct extent_buffer *leaf,
- int slot, const char *name,
- int name_len)
+ int slot,
+ const struct fscrypt_str *name)
{
struct btrfs_inode_ref *ref;
unsigned long ptr;
@@ -27,9 +27,10 @@ struct btrfs_inode_ref *btrfs_find_name_in_backref(struct extent_buffer *leaf,
len = btrfs_inode_ref_name_len(leaf, ref);
name_ptr = (unsigned long)(ref + 1);
cur_offset += len + sizeof(*ref);
- if (len != name_len)
+ if (len != name->len)
continue;
- if (memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0)
+ if (memcmp_extent_buffer(leaf, name->name, name_ptr,
+ name->len) == 0)
return ref;
}
return NULL;
@@ -37,7 +38,7 @@ struct btrfs_inode_ref *btrfs_find_name_in_backref(struct extent_buffer *leaf,

struct btrfs_inode_extref *btrfs_find_name_in_ext_backref(
struct extent_buffer *leaf, int slot, u64 ref_objectid,
- const char *name, int name_len)
+ const struct fscrypt_str *name)
{
struct btrfs_inode_extref *extref;
unsigned long ptr;
@@ -60,9 +61,10 @@ struct btrfs_inode_extref *btrfs_find_name_in_ext_backref(
name_ptr = (unsigned long)(&extref->name);
ref_name_len = btrfs_inode_extref_name_len(leaf, extref);

- if (ref_name_len == name_len &&
+ if (ref_name_len == name->len &&
btrfs_inode_extref_parent(leaf, extref) == ref_objectid &&
- (memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0))
+ (memcmp_extent_buffer(leaf, name->name, name_ptr,
+ name->len) == 0))
return extref;

cur_offset += ref_name_len + sizeof(*extref);
@@ -75,7 +77,7 @@ struct btrfs_inode_extref *
btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
- const char *name, int name_len,
+ const struct fscrypt_str *name,
u64 inode_objectid, u64 ref_objectid, int ins_len,
int cow)
{
@@ -84,7 +86,7 @@ btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans,

key.objectid = inode_objectid;
key.type = BTRFS_INODE_EXTREF_KEY;
- key.offset = btrfs_extref_hash(ref_objectid, name, name_len);
+ key.offset = btrfs_extref_hash(ref_objectid, name->name, name->len);

ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
if (ret < 0)
@@ -92,13 +94,13 @@ btrfs_lookup_inode_extref(struct btrfs_trans_handle *trans,
if (ret > 0)
return NULL;
return btrfs_find_name_in_ext_backref(path->nodes[0], path->slots[0],
- ref_objectid, name, name_len);
+ ref_objectid, name);

}

static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- const char *name, int name_len,
+ const struct fscrypt_str *name,
u64 inode_objectid, u64 ref_objectid,
u64 *index)
{
@@ -107,14 +109,14 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
struct btrfs_inode_extref *extref;
struct extent_buffer *leaf;
int ret;
- int del_len = name_len + sizeof(*extref);
+ int del_len = name->len + sizeof(*extref);
unsigned long ptr;
unsigned long item_start;
u32 item_size;

key.objectid = inode_objectid;
key.type = BTRFS_INODE_EXTREF_KEY;
- key.offset = btrfs_extref_hash(ref_objectid, name, name_len);
+ key.offset = btrfs_extref_hash(ref_objectid, name->name, name->len);

path = btrfs_alloc_path();
if (!path)
@@ -132,7 +134,7 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
* readonly.
*/
extref = btrfs_find_name_in_ext_backref(path->nodes[0], path->slots[0],
- ref_objectid, name, name_len);
+ ref_objectid, name);
if (!extref) {
btrfs_handle_fs_error(root->fs_info, -ENOENT, NULL);
ret = -EROFS;
@@ -168,8 +170,7 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
}

int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- const char *name, int name_len,
+ struct btrfs_root *root, const struct fscrypt_str *name,
u64 inode_objectid, u64 ref_objectid, u64 *index)
{
struct btrfs_path *path;
@@ -182,7 +183,7 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
u32 sub_item_len;
int ret;
int search_ext_refs = 0;
- int del_len = name_len + sizeof(*ref);
+ int del_len = name->len + sizeof(*ref);

key.objectid = inode_objectid;
key.offset = ref_objectid;
@@ -201,8 +202,7 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
goto out;
}

- ref = btrfs_find_name_in_backref(path->nodes[0], path->slots[0], name,
- name_len);
+ ref = btrfs_find_name_in_backref(path->nodes[0], path->slots[0], name);
if (!ref) {
ret = -ENOENT;
search_ext_refs = 1;
@@ -219,7 +219,7 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
goto out;
}
ptr = (unsigned long)ref;
- sub_item_len = name_len + sizeof(*ref);
+ sub_item_len = name->len + sizeof(*ref);
item_start = btrfs_item_ptr_offset(leaf, path->slots[0]);
memmove_extent_buffer(leaf, ptr, ptr + sub_item_len,
item_size - (ptr + sub_item_len - item_start));
@@ -233,7 +233,7 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
* name in our ref array. Find and remove the extended
* inode ref then.
*/
- return btrfs_del_inode_extref(trans, root, name, name_len,
+ return btrfs_del_inode_extref(trans, root, name,
inode_objectid, ref_objectid, index);
}

@@ -247,12 +247,13 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
*/
static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- const char *name, int name_len,
- u64 inode_objectid, u64 ref_objectid, u64 index)
+ const struct fscrypt_str *name,
+ u64 inode_objectid, u64 ref_objectid,
+ u64 index)
{
struct btrfs_inode_extref *extref;
int ret;
- int ins_len = name_len + sizeof(*extref);
+ int ins_len = name->len + sizeof(*extref);
unsigned long ptr;
struct btrfs_path *path;
struct btrfs_key key;
@@ -260,7 +261,7 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans,

key.objectid = inode_objectid;
key.type = BTRFS_INODE_EXTREF_KEY;
- key.offset = btrfs_extref_hash(ref_objectid, name, name_len);
+ key.offset = btrfs_extref_hash(ref_objectid, name->name, name->len);

path = btrfs_alloc_path();
if (!path)
@@ -272,7 +273,7 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans,
if (btrfs_find_name_in_ext_backref(path->nodes[0],
path->slots[0],
ref_objectid,
- name, name_len))
+ name))
goto out;

btrfs_extend_item(path, ins_len);
@@ -286,12 +287,12 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans,
ptr += btrfs_item_size(leaf, path->slots[0]) - ins_len;
extref = (struct btrfs_inode_extref *)ptr;

- btrfs_set_inode_extref_name_len(path->nodes[0], extref, name_len);
+ btrfs_set_inode_extref_name_len(path->nodes[0], extref, name->len);
btrfs_set_inode_extref_index(path->nodes[0], extref, index);
btrfs_set_inode_extref_parent(path->nodes[0], extref, ref_objectid);

ptr = (unsigned long)&extref->name;
- write_extent_buffer(path->nodes[0], name, ptr, name_len);
+ write_extent_buffer(path->nodes[0], name->name, ptr, name->len);
btrfs_mark_buffer_dirty(path->nodes[0]);

out:
@@ -301,8 +302,7 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans,

/* Will return 0, -ENOMEM, -EMLINK, or -EEXIST or anything from the CoW path */
int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- const char *name, int name_len,
+ struct btrfs_root *root, const struct fscrypt_str *name,
u64 inode_objectid, u64 ref_objectid, u64 index)
{
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -311,7 +311,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
struct btrfs_inode_ref *ref;
unsigned long ptr;
int ret;
- int ins_len = name_len + sizeof(*ref);
+ int ins_len = name->len + sizeof(*ref);

key.objectid = inode_objectid;
key.offset = ref_objectid;
@@ -327,7 +327,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
if (ret == -EEXIST) {
u32 old_size;
ref = btrfs_find_name_in_backref(path->nodes[0], path->slots[0],
- name, name_len);
+ name);
if (ref)
goto out;

@@ -336,7 +336,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_ref);
ref = (struct btrfs_inode_ref *)((unsigned long)ref + old_size);
- btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
+ btrfs_set_inode_ref_name_len(path->nodes[0], ref, name->len);
btrfs_set_inode_ref_index(path->nodes[0], ref, index);
ptr = (unsigned long)(ref + 1);
ret = 0;
@@ -344,7 +344,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
if (ret == -EOVERFLOW) {
if (btrfs_find_name_in_backref(path->nodes[0],
path->slots[0],
- name, name_len))
+ name))
ret = -EEXIST;
else
ret = -EMLINK;
@@ -353,11 +353,11 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
} else {
ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_ref);
- btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
+ btrfs_set_inode_ref_name_len(path->nodes[0], ref, name->len);
btrfs_set_inode_ref_index(path->nodes[0], ref, index);
ptr = (unsigned long)(ref + 1);
}
- write_extent_buffer(path->nodes[0], name, ptr, name_len);
+ write_extent_buffer(path->nodes[0], name->name, ptr, name->len);
btrfs_mark_buffer_dirty(path->nodes[0]);

out:
@@ -370,7 +370,6 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
if (btrfs_super_incompat_flags(disk_super)
& BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF)
ret = btrfs_insert_inode_extref(trans, root, name,
- name_len,
inode_objectid,
ref_objectid, index);
}
diff --git a/fs/btrfs/inode-item.h b/fs/btrfs/inode-item.h
index a8fc16d0147f..b80aeb715701 100644
--- a/fs/btrfs/inode-item.h
+++ b/fs/btrfs/inode-item.h
@@ -64,33 +64,31 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_truncate_control *control);
int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- const char *name, int name_len,
+ struct btrfs_root *root, const struct fscrypt_str *name,
u64 inode_objectid, u64 ref_objectid, u64 index);
int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- const char *name, int name_len,
- u64 inode_objectid, u64 ref_objectid, u64 *index);
+ struct btrfs_root *root, const struct fscrypt_str *name,
+ u64 inode_objectid, u64 ref_objectid, u64 *index);
int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, u64 objectid);
-int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root
- *root, struct btrfs_path *path,
+int btrfs_lookup_inode(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct btrfs_path *path,
struct btrfs_key *location, int mod);

struct btrfs_inode_extref *btrfs_lookup_inode_extref(
struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
- const char *name, int name_len,
+ const struct fscrypt_str *name,
u64 inode_objectid, u64 ref_objectid, int ins_len,
int cow);

struct btrfs_inode_ref *btrfs_find_name_in_backref(struct extent_buffer *leaf,
- int slot, const char *name,
- int name_len);
+ int slot,
+ const struct fscrypt_str *name);
struct btrfs_inode_extref *btrfs_find_name_in_ext_backref(
struct extent_buffer *leaf, int slot, u64 ref_objectid,
- const char *name, int name_len);
+ const struct fscrypt_str *name);

#endif
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 222068bf8003..4063447217f9 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3627,7 +3627,7 @@ void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info)
spin_unlock(&fs_info->delayed_iput_lock);
}

-/**
+/*
* Wait for flushing all delayed iputs
*
* @fs_info: the filesystem
@@ -4272,7 +4272,7 @@ int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
struct btrfs_inode *dir,
struct btrfs_inode *inode,
- const char *name, int name_len,
+ const struct fscrypt_str *name,
struct btrfs_rename_ctx *rename_ctx)
{
struct btrfs_root *root = dir->root;
@@ -4290,8 +4290,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
goto out;
}

- di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
- name, name_len, -1);
+ di = btrfs_lookup_dir_item(trans, root, path, dir_ino, name, -1);
if (IS_ERR_OR_NULL(di)) {
ret = di ? PTR_ERR(di) : -ENOENT;
goto err;
@@ -4319,12 +4318,11 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
}
}

- ret = btrfs_del_inode_ref(trans, root, name, name_len, ino,
- dir_ino, &index);
+ ret = btrfs_del_inode_ref(trans, root, name, ino, dir_ino, &index);
if (ret) {
btrfs_info(fs_info,
"failed to delete reference to %.*s, inode %llu parent %llu",
- name_len, name, ino, dir_ino);
+ name->len, name->name, ino, dir_ino);
btrfs_abort_transaction(trans, ret);
goto err;
}
@@ -4345,10 +4343,8 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
* operations on the log tree, increasing latency for applications.
*/
if (!rename_ctx) {
- btrfs_del_inode_ref_in_log(trans, root, name, name_len, inode,
- dir_ino);
- btrfs_del_dir_entries_in_log(trans, root, name, name_len, dir,
- index);
+ btrfs_del_inode_ref_in_log(trans, root, name, inode, dir_ino);
+ btrfs_del_dir_entries_in_log(trans, root, name, dir, index);
}

/*
@@ -4366,7 +4362,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
if (ret)
goto out;

- btrfs_i_size_write(dir, dir->vfs_inode.i_size - name_len * 2);
+ btrfs_i_size_write(dir, dir->vfs_inode.i_size - name->len * 2);
inode_inc_iversion(&inode->vfs_inode);
inode_inc_iversion(&dir->vfs_inode);
inode->vfs_inode.i_ctime = current_time(&inode->vfs_inode);
@@ -4379,10 +4375,11 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,

int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
struct btrfs_inode *dir, struct btrfs_inode *inode,
- const char *name, int name_len)
+ const struct fscrypt_str *name)
{
int ret;
- ret = __btrfs_unlink_inode(trans, dir, inode, name, name_len, NULL);
+
+ ret = __btrfs_unlink_inode(trans, dir, inode, name, NULL);
if (!ret) {
drop_nlink(&inode->vfs_inode);
ret = btrfs_update_inode(trans, inode->root, inode);
@@ -4418,29 +4415,39 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
struct btrfs_trans_handle *trans;
struct inode *inode = d_inode(dentry);
int ret;
+ struct fscrypt_name fname;
+
+ ret = fscrypt_setup_filename(dir, &dentry->d_name, 1, &fname);
+ if (ret)
+ return ret;
+
+ /* This needs to handle no-key deletions later on */

trans = __unlink_start_trans(dir);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ goto fscrypt_free;
+ }

btrfs_record_unlink_dir(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)),
0);

- ret = btrfs_unlink_inode(trans, BTRFS_I(dir),
- BTRFS_I(d_inode(dentry)), dentry->d_name.name,
- dentry->d_name.len);
+ ret = btrfs_unlink_inode(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)),
+ &fname.disk_name);
if (ret)
- goto out;
+ goto end_trans;

if (inode->i_nlink == 0) {
ret = btrfs_orphan_add(trans, BTRFS_I(inode));
if (ret)
- goto out;
+ goto end_trans;
}

-out:
+end_trans:
btrfs_end_transaction(trans);
btrfs_btree_balance_dirty(BTRFS_I(dir)->root->fs_info);
+fscrypt_free:
+ fscrypt_free_filename(&fname);
return ret;
}

@@ -4453,12 +4460,17 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
struct extent_buffer *leaf;
struct btrfs_dir_item *di;
struct btrfs_key key;
- const char *name = dentry->d_name.name;
- int name_len = dentry->d_name.len;
u64 index;
int ret;
u64 objectid;
u64 dir_ino = btrfs_ino(BTRFS_I(dir));
+ struct fscrypt_name fname;
+
+ ret = fscrypt_setup_filename(dir, &dentry->d_name, 1, &fname);
+ if (ret)
+ return ret;
+
+ /* This needs to handle no-key deletions later on */

if (btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID) {
objectid = inode->root->root_key.objectid;
@@ -4466,15 +4478,18 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
objectid = inode->location.objectid;
} else {
WARN_ON(1);
+ fscrypt_free_filename(&fname);
return -EINVAL;
}

path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
+ if (!path) {
+ ret = -ENOMEM;
+ goto out;
+ }

di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
- name, name_len, -1);
+ &fname.disk_name, -1);
if (IS_ERR_OR_NULL(di)) {
ret = di ? PTR_ERR(di) : -ENOENT;
goto out;
@@ -4500,8 +4515,7 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
* call btrfs_del_root_ref, and it _shouldn't_ fail.
*/
if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) {
- di = btrfs_search_dir_index_item(root, path, dir_ino,
- name, name_len);
+ di = btrfs_search_dir_index_item(root, path, dir_ino, &fname.disk_name);
if (IS_ERR_OR_NULL(di)) {
if (!di)
ret = -ENOENT;
@@ -4518,7 +4532,7 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
} else {
ret = btrfs_del_root_ref(trans, objectid,
root->root_key.objectid, dir_ino,
- &index, name, name_len);
+ &index, &fname.disk_name);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
@@ -4531,7 +4545,7 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
goto out;
}

- btrfs_i_size_write(BTRFS_I(dir), dir->i_size - name_len * 2);
+ btrfs_i_size_write(BTRFS_I(dir), dir->i_size - fname.disk_name.len * 2);
inode_inc_iversion(dir);
dir->i_mtime = current_time(dir);
dir->i_ctime = dir->i_mtime;
@@ -4540,6 +4554,7 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
btrfs_abort_transaction(trans, ret);
out:
btrfs_free_path(path);
+ fscrypt_free_filename(&fname);
return ret;
}

@@ -4553,6 +4568,7 @@ static noinline int may_destroy_subvol(struct btrfs_root *root)
struct btrfs_path *path;
struct btrfs_dir_item *di;
struct btrfs_key key;
+ struct fscrypt_str name = FSTR_INIT("default", 7);
u64 dir_id;
int ret;

@@ -4563,7 +4579,7 @@ static noinline int may_destroy_subvol(struct btrfs_root *root)
/* Make sure this root isn't set as the default subvol */
dir_id = btrfs_super_root_dir(fs_info->super_copy);
di = btrfs_lookup_dir_item(NULL, fs_info->tree_root, path,
- dir_id, "default", 7, 0);
+ dir_id, &name, 0);
if (di && !IS_ERR(di)) {
btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
if (key.objectid == root->root_key.objectid) {
@@ -4802,6 +4818,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
int err = 0;
struct btrfs_trans_handle *trans;
u64 last_unlink_trans;
+ struct fscrypt_name fname;

if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
return -ENOTEMPTY;
@@ -4814,9 +4831,17 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
return btrfs_delete_subvolume(dir, dentry);
}

+ err = fscrypt_setup_filename(dir, &dentry->d_name, 1, &fname);
+ if (err)
+ return err;
+
+ /* This needs to handle no-key deletions later on */
+
trans = __unlink_start_trans(dir);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
+ if (IS_ERR(trans)) {
+ err = PTR_ERR(trans);
+ goto out_notrans;
+ }

if (unlikely(btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
err = btrfs_unlink_subvol(trans, dir, dentry);
@@ -4830,9 +4855,8 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
last_unlink_trans = BTRFS_I(inode)->last_unlink_trans;

/* now the directory is empty */
- err = btrfs_unlink_inode(trans, BTRFS_I(dir),
- BTRFS_I(d_inode(dentry)), dentry->d_name.name,
- dentry->d_name.len);
+ err = btrfs_unlink_inode(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)),
+ &fname.disk_name);
if (!err) {
btrfs_i_size_write(BTRFS_I(inode), 0);
/*
@@ -4851,7 +4875,9 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
}
out:
btrfs_end_transaction(trans);
+out_notrans:
btrfs_btree_balance_dirty(fs_info);
+ fscrypt_free_filename(&fname);

return err;
}
@@ -5532,19 +5558,24 @@ void btrfs_evict_inode(struct inode *inode)
static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
struct btrfs_key *location, u8 *type)
{
- const char *name = dentry->d_name.name;
- int namelen = dentry->d_name.len;
struct btrfs_dir_item *di;
struct btrfs_path *path;
struct btrfs_root *root = BTRFS_I(dir)->root;
int ret = 0;
+ struct fscrypt_name fname;

path = btrfs_alloc_path();
if (!path)
return -ENOMEM;

+ ret = fscrypt_setup_filename(dir, &dentry->d_name, 1, &fname);
+ if (ret)
+ goto out;
+
+ /* This needs to handle no-key deletions later on */
+
di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(BTRFS_I(dir)),
- name, namelen, 0);
+ &fname.disk_name, 0);
if (IS_ERR_OR_NULL(di)) {
ret = di ? PTR_ERR(di) : -ENOENT;
goto out;
@@ -5556,12 +5587,13 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
ret = -EUCLEAN;
btrfs_warn(root->fs_info,
"%s gets something invalid in DIR_ITEM (name %s, directory ino %llu, location(%llu %u %llu))",
- __func__, name, btrfs_ino(BTRFS_I(dir)),
+ __func__, fname.disk_name.name, btrfs_ino(BTRFS_I(dir)),
location->objectid, location->type, location->offset);
}
if (!ret)
*type = btrfs_dir_type(path->nodes[0], di);
out:
+ fscrypt_free_filename(&fname);
btrfs_free_path(path);
return ret;
}
@@ -5584,6 +5616,11 @@ static int fixup_tree_root_location(struct btrfs_fs_info *fs_info,
struct btrfs_key key;
int ret;
int err = 0;
+ struct fscrypt_name fname;
+
+ ret = fscrypt_setup_filename(dir, &dentry->d_name, 0, &fname);
+ if (ret)
+ return ret;

path = btrfs_alloc_path();
if (!path) {
@@ -5606,12 +5643,11 @@ static int fixup_tree_root_location(struct btrfs_fs_info *fs_info,
leaf = path->nodes[0];
ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
if (btrfs_root_ref_dirid(leaf, ref) != btrfs_ino(BTRFS_I(dir)) ||
- btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len)
+ btrfs_root_ref_name_len(leaf, ref) != fname.disk_name.len)
goto out;

- ret = memcmp_extent_buffer(leaf, dentry->d_name.name,
- (unsigned long)(ref + 1),
- dentry->d_name.len);
+ ret = memcmp_extent_buffer(leaf, fname.disk_name.name,
+ (unsigned long)(ref + 1), fname.disk_name.len);
if (ret)
goto out;

@@ -5630,6 +5666,7 @@ static int fixup_tree_root_location(struct btrfs_fs_info *fs_info,
err = 0;
out:
btrfs_free_path(path);
+ fscrypt_free_filename(&fname);
return err;
}

@@ -6238,9 +6275,18 @@ int btrfs_new_inode_prepare(struct btrfs_new_inode_args *args,
struct inode *inode = args->inode;
int ret;

+ if (!args->orphan) {
+ ret = fscrypt_setup_filename(dir, &args->dentry->d_name, 0,
+ &args->fname);
+ if (ret)
+ return ret;
+ }
+
ret = posix_acl_create(dir, &inode->i_mode, &args->default_acl, &args->acl);
- if (ret)
+ if (ret) {
+ fscrypt_free_filename(&args->fname);
return ret;
+ }

/* 1 to add inode item */
*trans_num_items = 1;
@@ -6280,6 +6326,7 @@ void btrfs_new_inode_args_destroy(struct btrfs_new_inode_args *args)
{
posix_acl_release(args->acl);
posix_acl_release(args->default_acl);
+ fscrypt_free_filename(&args->fname);
}

/*
@@ -6315,8 +6362,7 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans,
{
struct inode *dir = args->dir;
struct inode *inode = args->inode;
- const char *name = args->orphan ? NULL : args->dentry->d_name.name;
- int name_len = args->orphan ? 0 : args->dentry->d_name.len;
+ const struct fscrypt_str *name = args->orphan ? NULL : &args->fname.disk_name;
struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
struct btrfs_root *root;
struct btrfs_inode_item *inode_item;
@@ -6417,7 +6463,7 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans,
sizes[1] = 2 + sizeof(*ref);
} else {
key[1].offset = btrfs_ino(BTRFS_I(dir));
- sizes[1] = name_len + sizeof(*ref);
+ sizes[1] = name->len + sizeof(*ref);
}
}

@@ -6456,10 +6502,12 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans,
btrfs_set_inode_ref_index(path->nodes[0], ref, 0);
write_extent_buffer(path->nodes[0], "..", ptr, 2);
} else {
- btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
+ btrfs_set_inode_ref_name_len(path->nodes[0], ref,
+ name->len);
btrfs_set_inode_ref_index(path->nodes[0], ref,
BTRFS_I(inode)->dir_index);
- write_extent_buffer(path->nodes[0], name, ptr, name_len);
+ write_extent_buffer(path->nodes[0], name->name, ptr,
+ name->len);
}
}

@@ -6520,7 +6568,7 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans,
ret = btrfs_orphan_add(trans, BTRFS_I(inode));
} else {
ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode), name,
- name_len, 0, BTRFS_I(inode)->dir_index);
+ 0, BTRFS_I(inode)->dir_index);
}
if (ret) {
btrfs_abort_transaction(trans, ret);
@@ -6549,7 +6597,7 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans,
*/
int btrfs_add_link(struct btrfs_trans_handle *trans,
struct btrfs_inode *parent_inode, struct btrfs_inode *inode,
- const char *name, int name_len, int add_backref, u64 index)
+ const struct fscrypt_str *name, int add_backref, u64 index)
{
int ret = 0;
struct btrfs_key key;
@@ -6568,17 +6616,17 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
ret = btrfs_add_root_ref(trans, key.objectid,
root->root_key.objectid, parent_ino,
- index, name, name_len);
+ index, name);
} else if (add_backref) {
- ret = btrfs_insert_inode_ref(trans, root, name, name_len, ino,
- parent_ino, index);
+ ret = btrfs_insert_inode_ref(trans, root, name,
+ ino, parent_ino, index);
}

/* Nothing to clean up yet */
if (ret)
return ret;

- ret = btrfs_insert_dir_item(trans, name, name_len, parent_inode, &key,
+ ret = btrfs_insert_dir_item(trans, name, parent_inode, &key,
btrfs_inode_type(&inode->vfs_inode), index);
if (ret == -EEXIST || ret == -EOVERFLOW)
goto fail_dir_item;
@@ -6588,7 +6636,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
}

btrfs_i_size_write(parent_inode, parent_inode->vfs_inode.i_size +
- name_len * 2);
+ name->len * 2);
inode_inc_iversion(&parent_inode->vfs_inode);
/*
* If we are replaying a log tree, we do not want to update the mtime
@@ -6613,15 +6661,15 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
int err;
err = btrfs_del_root_ref(trans, key.objectid,
root->root_key.objectid, parent_ino,
- &local_index, name, name_len);
+ &local_index, name);
if (err)
btrfs_abort_transaction(trans, err);
} else if (add_backref) {
u64 local_index;
int err;

- err = btrfs_del_inode_ref(trans, root, name, name_len,
- ino, parent_ino, &local_index);
+ err = btrfs_del_inode_ref(trans, root, name, ino, parent_ino,
+ &local_index);
if (err)
btrfs_abort_transaction(trans, err);
}
@@ -6704,6 +6752,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
struct btrfs_root *root = BTRFS_I(dir)->root;
struct inode *inode = d_inode(old_dentry);
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct fscrypt_name fname;
u64 index;
int err;
int drop_inode = 0;
@@ -6715,6 +6764,10 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
if (inode->i_nlink >= BTRFS_LINK_MAX)
return -EMLINK;

+ err = fscrypt_setup_filename(dir, &dentry->d_name, 0, &fname);
+ if (err)
+ goto fail;
+
err = btrfs_set_inode_index(BTRFS_I(dir), &index);
if (err)
goto fail;
@@ -6741,7 +6794,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags);

err = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode),
- dentry->d_name.name, dentry->d_name.len, 1, index);
+ &fname.disk_name, 1, index);

if (err) {
drop_inode = 1;
@@ -6765,6 +6818,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
}

fail:
+ fscrypt_free_filename(&fname);
if (trans)
btrfs_end_transaction(trans);
if (drop_inode) {
@@ -9037,6 +9091,8 @@ static int btrfs_rename_exchange(struct inode *old_dir,
int ret;
int ret2;
bool need_abort = false;
+ struct fscrypt_name old_fname, new_fname;
+ struct fscrypt_str *old_name, *new_name;

/*
* For non-subvolumes allow exchange only within one subvolume, in the
@@ -9048,6 +9104,19 @@ static int btrfs_rename_exchange(struct inode *old_dir,
new_ino != BTRFS_FIRST_FREE_OBJECTID))
return -EXDEV;

+ ret = fscrypt_setup_filename(old_dir, &old_dentry->d_name, 0, &old_fname);
+ if (ret)
+ return ret;
+
+ ret = fscrypt_setup_filename(new_dir, &new_dentry->d_name, 0, &new_fname);
+ if (ret) {
+ fscrypt_free_filename(&old_fname);
+ return ret;
+ }
+
+ old_name = &old_fname.disk_name;
+ new_name = &new_fname.disk_name;
+
/* close the race window with snapshot create/destroy ioctl */
if (old_ino == BTRFS_FIRST_FREE_OBJECTID ||
new_ino == BTRFS_FIRST_FREE_OBJECTID)
@@ -9115,10 +9184,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
/* force full log commit if subvolume involved. */
btrfs_set_log_full_commit(trans);
} else {
- ret = btrfs_insert_inode_ref(trans, dest,
- new_dentry->d_name.name,
- new_dentry->d_name.len,
- old_ino,
+ ret = btrfs_insert_inode_ref(trans, dest, new_name, old_ino,
btrfs_ino(BTRFS_I(new_dir)),
old_idx);
if (ret)
@@ -9131,10 +9197,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
/* force full log commit if subvolume involved. */
btrfs_set_log_full_commit(trans);
} else {
- ret = btrfs_insert_inode_ref(trans, root,
- old_dentry->d_name.name,
- old_dentry->d_name.len,
- new_ino,
+ ret = btrfs_insert_inode_ref(trans, root, old_name, new_ino,
btrfs_ino(BTRFS_I(old_dir)),
new_idx);
if (ret) {
@@ -9169,9 +9232,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
} else { /* src is an inode */
ret = __btrfs_unlink_inode(trans, BTRFS_I(old_dir),
BTRFS_I(old_dentry->d_inode),
- old_dentry->d_name.name,
- old_dentry->d_name.len,
- &old_rename_ctx);
+ old_name, &old_rename_ctx);
if (!ret)
ret = btrfs_update_inode(trans, root, BTRFS_I(old_inode));
}
@@ -9186,9 +9247,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
} else { /* dest is an inode */
ret = __btrfs_unlink_inode(trans, BTRFS_I(new_dir),
BTRFS_I(new_dentry->d_inode),
- new_dentry->d_name.name,
- new_dentry->d_name.len,
- &new_rename_ctx);
+ new_name, &new_rename_ctx);
if (!ret)
ret = btrfs_update_inode(trans, dest, BTRFS_I(new_inode));
}
@@ -9198,16 +9257,14 @@ static int btrfs_rename_exchange(struct inode *old_dir,
}

ret = btrfs_add_link(trans, BTRFS_I(new_dir), BTRFS_I(old_inode),
- new_dentry->d_name.name,
- new_dentry->d_name.len, 0, old_idx);
+ new_name, 0, old_idx);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out_fail;
}

ret = btrfs_add_link(trans, BTRFS_I(old_dir), BTRFS_I(new_inode),
- old_dentry->d_name.name,
- old_dentry->d_name.len, 0, new_idx);
+ old_name, 0, new_idx);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out_fail;
@@ -9250,6 +9307,8 @@ static int btrfs_rename_exchange(struct inode *old_dir,
old_ino == BTRFS_FIRST_FREE_OBJECTID)
up_read(&fs_info->subvol_sem);

+ fscrypt_free_filename(&new_fname);
+ fscrypt_free_filename(&old_fname);
return ret;
}

@@ -9289,6 +9348,7 @@ static int btrfs_rename(struct user_namespace *mnt_userns,
int ret;
int ret2;
u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
+ struct fscrypt_name old_fname, new_fname;

if (btrfs_ino(BTRFS_I(new_dir)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
return -EPERM;
@@ -9305,22 +9365,28 @@ static int btrfs_rename(struct user_namespace *mnt_userns,
new_inode->i_size > BTRFS_EMPTY_DIR_SIZE)
return -ENOTEMPTY;

+ ret = fscrypt_setup_filename(old_dir, &old_dentry->d_name, 0, &old_fname);
+ if (ret)
+ return ret;

- /* check for collisions, even if the name isn't there */
- ret = btrfs_check_dir_item_collision(dest, new_dir->i_ino,
- new_dentry->d_name.name,
- new_dentry->d_name.len);
+ ret = fscrypt_setup_filename(new_dir, &new_dentry->d_name, 0, &new_fname);
+ if (ret) {
+ fscrypt_free_filename(&old_fname);
+ return ret;
+ }

+ /* check for collisions, even if the name isn't there */
+ ret = btrfs_check_dir_item_collision(dest, new_dir->i_ino, &new_fname.disk_name);
if (ret) {
if (ret == -EEXIST) {
/* we shouldn't get
* eexist without a new_inode */
if (WARN_ON(!new_inode)) {
- return ret;
+ goto out_fscrypt_names;
}
} else {
/* maybe -EOVERFLOW */
- return ret;
+ goto out_fscrypt_names;
}
}
ret = 0;
@@ -9334,8 +9400,10 @@ static int btrfs_rename(struct user_namespace *mnt_userns,

if (flags & RENAME_WHITEOUT) {
whiteout_args.inode = new_whiteout_inode(mnt_userns, old_dir);
- if (!whiteout_args.inode)
- return -ENOMEM;
+ if (!whiteout_args.inode) {
+ ret = -ENOMEM;
+ goto out_fscrypt_names;
+ }
ret = btrfs_new_inode_prepare(&whiteout_args, &trans_num_items);
if (ret)
goto out_whiteout_inode;
@@ -9403,11 +9471,9 @@ static int btrfs_rename(struct user_namespace *mnt_userns,
/* force full log commit if subvolume involved. */
btrfs_set_log_full_commit(trans);
} else {
- ret = btrfs_insert_inode_ref(trans, dest,
- new_dentry->d_name.name,
- new_dentry->d_name.len,
- old_ino,
- btrfs_ino(BTRFS_I(new_dir)), index);
+ ret = btrfs_insert_inode_ref(trans, dest, &new_fname.disk_name,
+ old_ino, btrfs_ino(BTRFS_I(new_dir)),
+ index);
if (ret)
goto out_fail;
}
@@ -9429,10 +9495,8 @@ static int btrfs_rename(struct user_namespace *mnt_userns,
ret = btrfs_unlink_subvol(trans, old_dir, old_dentry);
} else {
ret = __btrfs_unlink_inode(trans, BTRFS_I(old_dir),
- BTRFS_I(d_inode(old_dentry)),
- old_dentry->d_name.name,
- old_dentry->d_name.len,
- &rename_ctx);
+ BTRFS_I(d_inode(old_dentry)),
+ &old_fname.disk_name, &rename_ctx);
if (!ret)
ret = btrfs_update_inode(trans, root, BTRFS_I(old_inode));
}
@@ -9451,8 +9515,7 @@ static int btrfs_rename(struct user_namespace *mnt_userns,
} else {
ret = btrfs_unlink_inode(trans, BTRFS_I(new_dir),
BTRFS_I(d_inode(new_dentry)),
- new_dentry->d_name.name,
- new_dentry->d_name.len);
+ &new_fname.disk_name);
}
if (!ret && new_inode->i_nlink == 0)
ret = btrfs_orphan_add(trans,
@@ -9464,8 +9527,7 @@ static int btrfs_rename(struct user_namespace *mnt_userns,
}

ret = btrfs_add_link(trans, BTRFS_I(new_dir), BTRFS_I(old_inode),
- new_dentry->d_name.name,
- new_dentry->d_name.len, 0, index);
+ &new_fname.disk_name, 0, index);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out_fail;
@@ -9500,6 +9562,9 @@ static int btrfs_rename(struct user_namespace *mnt_userns,
out_whiteout_inode:
if (flags & RENAME_WHITEOUT)
iput(whiteout_args.inode);
+out_fscrypt_names:
+ fscrypt_free_filename(&old_fname);
+ fscrypt_free_filename(&new_fname);
return ret;
}

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 2e29fafe0e7d..9e323420c96d 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -951,6 +951,7 @@ static noinline int btrfs_mksubvol(const struct path *parent,
struct inode *dir = d_inode(parent->dentry);
struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
struct dentry *dentry;
+ struct fscrypt_str name_str = FSTR_INIT((char *)name, namelen);
int error;

error = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT);
@@ -971,8 +972,7 @@ static noinline int btrfs_mksubvol(const struct path *parent,
* check for them now when we can safely fail
*/
error = btrfs_check_dir_item_collision(BTRFS_I(dir)->root,
- dir->i_ino, name,
- namelen);
+ dir->i_ino, &name_str);
if (error)
goto out_dput;

@@ -3782,6 +3782,7 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
struct btrfs_trans_handle *trans;
struct btrfs_path *path = NULL;
struct btrfs_disk_key disk_key;
+ struct fscrypt_str name = FSTR_INIT("default", 7);
u64 objectid = 0;
u64 dir_id;
int ret;
@@ -3825,7 +3826,7 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)

dir_id = btrfs_super_root_dir(fs_info->super_copy);
di = btrfs_lookup_dir_item(trans, fs_info->tree_root, path,
- dir_id, "default", 7, 1);
+ dir_id, &name, 1);
if (IS_ERR_OR_NULL(di)) {
btrfs_release_path(path);
btrfs_end_transaction(trans);
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index e1f599d7a916..7d783f094306 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -327,9 +327,8 @@ int btrfs_del_root(struct btrfs_trans_handle *trans,
}

int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
- u64 ref_id, u64 dirid, u64 *sequence, const char *name,
- int name_len)
-
+ u64 ref_id, u64 dirid, u64 *sequence,
+ const struct fscrypt_str *name)
{
struct btrfs_root *tree_root = trans->fs_info->tree_root;
struct btrfs_path *path;
@@ -356,8 +355,8 @@ int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
struct btrfs_root_ref);
ptr = (unsigned long)(ref + 1);
if ((btrfs_root_ref_dirid(leaf, ref) != dirid) ||
- (btrfs_root_ref_name_len(leaf, ref) != name_len) ||
- memcmp_extent_buffer(leaf, name, ptr, name_len)) {
+ (btrfs_root_ref_name_len(leaf, ref) != name->len) ||
+ memcmp_extent_buffer(leaf, name->name, ptr, name->len)) {
ret = -ENOENT;
goto out;
}
@@ -400,8 +399,8 @@ int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
* Will return 0, -ENOMEM, or anything from the CoW path
*/
int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
- u64 ref_id, u64 dirid, u64 sequence, const char *name,
- int name_len)
+ u64 ref_id, u64 dirid, u64 sequence,
+ const struct fscrypt_str *name)
{
struct btrfs_root *tree_root = trans->fs_info->tree_root;
struct btrfs_key key;
@@ -420,7 +419,7 @@ int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
key.offset = ref_id;
again:
ret = btrfs_insert_empty_item(trans, tree_root, path, &key,
- sizeof(*ref) + name_len);
+ sizeof(*ref) + name->len);
if (ret) {
btrfs_abort_transaction(trans, ret);
btrfs_free_path(path);
@@ -431,9 +430,9 @@ int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
btrfs_set_root_ref_dirid(leaf, ref, dirid);
btrfs_set_root_ref_sequence(leaf, ref, sequence);
- btrfs_set_root_ref_name_len(leaf, ref, name_len);
+ btrfs_set_root_ref_name_len(leaf, ref, name->len);
ptr = (unsigned long)(ref + 1);
- write_extent_buffer(leaf, name, ptr, name_len);
+ write_extent_buffer(leaf, name->name, ptr, name->len);
btrfs_mark_buffer_dirty(leaf);

if (key.type == BTRFS_ROOT_BACKREF_KEY) {
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 35e889fe2a95..547b5c229218 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -1596,13 +1596,17 @@ static int gen_unique_name(struct send_ctx *sctx,
return -ENOMEM;

while (1) {
+ struct fscrypt_str tmp_name;
+
len = snprintf(tmp, sizeof(tmp), "o%llu-%llu-%llu",
ino, gen, idx);
ASSERT(len < sizeof(tmp));
+ tmp_name.name = tmp;
+ tmp_name.len = strlen(tmp);

di = btrfs_lookup_dir_item(NULL, sctx->send_root,
path, BTRFS_FIRST_FREE_OBJECTID,
- tmp, strlen(tmp), 0);
+ &tmp_name, 0);
btrfs_release_path(path);
if (IS_ERR(di)) {
ret = PTR_ERR(di);
@@ -1622,7 +1626,7 @@ static int gen_unique_name(struct send_ctx *sctx,

di = btrfs_lookup_dir_item(NULL, sctx->parent_root,
path, BTRFS_FIRST_FREE_OBJECTID,
- tmp, strlen(tmp), 0);
+ &tmp_name, 0);
btrfs_release_path(path);
if (IS_ERR(di)) {
ret = PTR_ERR(di);
@@ -1752,13 +1756,13 @@ static int lookup_dir_item_inode(struct btrfs_root *root,
struct btrfs_dir_item *di;
struct btrfs_key key;
struct btrfs_path *path;
+ struct fscrypt_str name_str = FSTR_INIT((char *)name, name_len);

path = alloc_path_for_send();
if (!path)
return -ENOMEM;

- di = btrfs_lookup_dir_item(NULL, root, path,
- dir, name, name_len, 0);
+ di = btrfs_lookup_dir_item(NULL, root, path, dir, &name_str, 0);
if (IS_ERR_OR_NULL(di)) {
ret = di ? PTR_ERR(di) : -ENOENT;
goto out;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 582b71b7fa77..2c562febd801 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1398,6 +1398,7 @@ static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objec
struct btrfs_dir_item *di;
struct btrfs_path *path;
struct btrfs_key location;
+ struct fscrypt_str name = FSTR_INIT("default", 7);
u64 dir_id;

path = btrfs_alloc_path();
@@ -1410,7 +1411,7 @@ static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objec
* to mount.
*/
dir_id = btrfs_super_root_dir(fs_info->super_copy);
- di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0);
+ di = btrfs_lookup_dir_item(NULL, root, path, dir_id, &name, 0);
if (IS_ERR(di)) {
btrfs_free_path(path);
return PTR_ERR(di);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index a55556759441..1193214ba8c1 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -6,6 +6,7 @@
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/sched.h>
+#include <linux/sched/mm.h>
#include <linux/writeback.h>
#include <linux/pagemap.h>
#include <linux/blkdev.h>
@@ -1627,10 +1628,9 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
struct btrfs_root *root = pending->root;
struct btrfs_root *parent_root;
struct btrfs_block_rsv *rsv;
- struct inode *parent_inode;
+ struct inode *parent_inode = pending->dir;
struct btrfs_path *path;
struct btrfs_dir_item *dir_item;
- struct dentry *dentry;
struct extent_buffer *tmp;
struct extent_buffer *old;
struct timespec64 cur_time;
@@ -1639,6 +1639,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
u64 index = 0;
u64 objectid;
u64 root_flags;
+ unsigned int nofs_flags;
+ struct fscrypt_name fname;

ASSERT(pending->path);
path = pending->path;
@@ -1646,9 +1648,22 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
ASSERT(pending->root_item);
new_root_item = pending->root_item;

+ /*
+ * We're inside a transaction and must make sure that any potential
+ * allocations with GFP_KERNEL in fscrypt won't recurse back to
+ * filesystem.
+ */
+ nofs_flags = memalloc_nofs_save();
+ pending->error = fscrypt_setup_filename(parent_inode,
+ &pending->dentry->d_name, 0,
+ &fname);
+ memalloc_nofs_restore(nofs_flags);
+ if (pending->error)
+ goto free_pending;
+
pending->error = btrfs_get_free_objectid(tree_root, &objectid);
if (pending->error)
- goto no_free_objectid;
+ goto free_fname;

/*
* Make qgroup to skip current new snapshot's qgroupid, as it is
@@ -1677,8 +1692,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
trace_btrfs_space_reservation(fs_info, "transaction",
trans->transid,
trans->bytes_reserved, 1);
- dentry = pending->dentry;
- parent_inode = pending->dir;
parent_root = BTRFS_I(parent_inode)->root;
ret = record_root_in_trans(trans, parent_root, 0);
if (ret)
@@ -1694,8 +1707,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
/* check if there is a file/dir which has the same name. */
dir_item = btrfs_lookup_dir_item(NULL, parent_root, path,
btrfs_ino(BTRFS_I(parent_inode)),
- dentry->d_name.name,
- dentry->d_name.len, 0);
+ &fname.disk_name, 0);
if (dir_item != NULL && !IS_ERR(dir_item)) {
pending->error = -EEXIST;
goto dir_item_existed;
@@ -1790,7 +1802,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
ret = btrfs_add_root_ref(trans, objectid,
parent_root->root_key.objectid,
btrfs_ino(BTRFS_I(parent_inode)), index,
- dentry->d_name.name, dentry->d_name.len);
+ &fname.disk_name);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto fail;
@@ -1822,9 +1834,9 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
if (ret < 0)
goto fail;

- ret = btrfs_insert_dir_item(trans, dentry->d_name.name,
- dentry->d_name.len, BTRFS_I(parent_inode),
- &key, BTRFS_FT_DIR, index);
+ ret = btrfs_insert_dir_item(trans, &fname.disk_name,
+ BTRFS_I(parent_inode), &key, BTRFS_FT_DIR,
+ index);
/* We have check then name at the beginning, so it is impossible. */
BUG_ON(ret == -EEXIST || ret == -EOVERFLOW);
if (ret) {
@@ -1833,7 +1845,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
}

btrfs_i_size_write(BTRFS_I(parent_inode), parent_inode->i_size +
- dentry->d_name.len * 2);
+ fname.disk_name.len * 2);
parent_inode->i_mtime = current_time(parent_inode);
parent_inode->i_ctime = parent_inode->i_mtime;
ret = btrfs_update_inode_fallback(trans, parent_root, BTRFS_I(parent_inode));
@@ -1865,7 +1877,9 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
trans->bytes_reserved = 0;
clear_skip_qgroup:
btrfs_clear_skip_qgroup(trans);
-no_free_objectid:
+free_fname:
+ fscrypt_free_filename(&fname);
+free_pending:
kfree(new_root_item);
pending->root_item = NULL;
btrfs_free_path(path);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 00be69ce7b90..c03ff6a5a7f6 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -595,6 +595,21 @@ static int overwrite_item(struct btrfs_trans_handle *trans,
return do_overwrite_item(trans, root, path, eb, slot, key);
}

+static int read_alloc_one_name(struct extent_buffer *eb, void *start, int len,
+ struct fscrypt_str *name)
+{
+ char *buf;
+
+ buf = kmalloc(len, GFP_NOFS);
+ if (!buf)
+ return -ENOMEM;
+
+ read_extent_buffer(eb, buf, (unsigned long)start, len);
+ name->name = buf;
+ name->len = len;
+ return 0;
+}
+
/*
* simple helper to read an inode off the disk from a given root
* This can only be called for subvolume roots and not for the log
@@ -901,12 +916,11 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
static int unlink_inode_for_log_replay(struct btrfs_trans_handle *trans,
struct btrfs_inode *dir,
struct btrfs_inode *inode,
- const char *name,
- int name_len)
+ const struct fscrypt_str *name)
{
int ret;

- ret = btrfs_unlink_inode(trans, dir, inode, name, name_len);
+ ret = btrfs_unlink_inode(trans, dir, inode, name);
if (ret)
return ret;
/*
@@ -933,8 +947,7 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
{
struct btrfs_root *root = dir->root;
struct inode *inode;
- char *name;
- int name_len;
+ struct fscrypt_str name;
struct extent_buffer *leaf;
struct btrfs_key location;
int ret;
@@ -942,12 +955,10 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
leaf = path->nodes[0];

btrfs_dir_item_key_to_cpu(leaf, di, &location);
- name_len = btrfs_dir_name_len(leaf, di);
- name = kmalloc(name_len, GFP_NOFS);
- if (!name)
+ ret = read_alloc_one_name(leaf, di + 1, btrfs_dir_name_len(leaf, di), &name);
+ if (ret)
return -ENOMEM;

- read_extent_buffer(leaf, name, (unsigned long)(di + 1), name_len);
btrfs_release_path(path);

inode = read_one_inode(root, location.objectid);
@@ -960,10 +971,9 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
if (ret)
goto out;

- ret = unlink_inode_for_log_replay(trans, dir, BTRFS_I(inode), name,
- name_len);
+ ret = unlink_inode_for_log_replay(trans, dir, BTRFS_I(inode), &name);
out:
- kfree(name);
+ kfree(name.name);
iput(inode);
return ret;
}
@@ -978,14 +988,14 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
static noinline int inode_in_dir(struct btrfs_root *root,
struct btrfs_path *path,
u64 dirid, u64 objectid, u64 index,
- const char *name, int name_len)
+ struct fscrypt_str *name)
{
struct btrfs_dir_item *di;
struct btrfs_key location;
int ret = 0;

di = btrfs_lookup_dir_index_item(NULL, root, path, dirid,
- index, name, name_len, 0);
+ index, name, 0);
if (IS_ERR(di)) {
ret = PTR_ERR(di);
goto out;
@@ -998,7 +1008,7 @@ static noinline int inode_in_dir(struct btrfs_root *root,
}

btrfs_release_path(path);
- di = btrfs_lookup_dir_item(NULL, root, path, dirid, name, name_len, 0);
+ di = btrfs_lookup_dir_item(NULL, root, path, dirid, name, 0);
if (IS_ERR(di)) {
ret = PTR_ERR(di);
goto out;
@@ -1025,7 +1035,7 @@ static noinline int inode_in_dir(struct btrfs_root *root,
static noinline int backref_in_log(struct btrfs_root *log,
struct btrfs_key *key,
u64 ref_objectid,
- const char *name, int namelen)
+ const struct fscrypt_str *name)
{
struct btrfs_path *path;
int ret;
@@ -1045,12 +1055,10 @@ static noinline int backref_in_log(struct btrfs_root *log,
if (key->type == BTRFS_INODE_EXTREF_KEY)
ret = !!btrfs_find_name_in_ext_backref(path->nodes[0],
path->slots[0],
- ref_objectid,
- name, namelen);
+ ref_objectid, name);
else
ret = !!btrfs_find_name_in_backref(path->nodes[0],
- path->slots[0],
- name, namelen);
+ path->slots[0], name);
out:
btrfs_free_path(path);
return ret;
@@ -1063,11 +1071,9 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans,
struct btrfs_inode *dir,
struct btrfs_inode *inode,
u64 inode_objectid, u64 parent_objectid,
- u64 ref_index, char *name, int namelen)
+ u64 ref_index, struct fscrypt_str *name)
{
int ret;
- char *victim_name;
- int victim_name_len;
struct extent_buffer *leaf;
struct btrfs_dir_item *di;
struct btrfs_key search_key;
@@ -1099,43 +1105,40 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans,
ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
ptr_end = ptr + btrfs_item_size(leaf, path->slots[0]);
while (ptr < ptr_end) {
- victim_ref = (struct btrfs_inode_ref *)ptr;
- victim_name_len = btrfs_inode_ref_name_len(leaf,
- victim_ref);
- victim_name = kmalloc(victim_name_len, GFP_NOFS);
- if (!victim_name)
- return -ENOMEM;
+ struct fscrypt_str victim_name;

- read_extent_buffer(leaf, victim_name,
- (unsigned long)(victim_ref + 1),
- victim_name_len);
+ victim_ref = (struct btrfs_inode_ref *)ptr;
+ ret = read_alloc_one_name(leaf, (victim_ref + 1),
+ btrfs_inode_ref_name_len(leaf, victim_ref),
+ &victim_name);
+ if (ret)
+ return ret;

ret = backref_in_log(log_root, &search_key,
- parent_objectid, victim_name,
- victim_name_len);
+ parent_objectid, &victim_name);
if (ret < 0) {
- kfree(victim_name);
+ kfree(victim_name.name);
return ret;
} else if (!ret) {
inc_nlink(&inode->vfs_inode);
btrfs_release_path(path);

ret = unlink_inode_for_log_replay(trans, dir, inode,
- victim_name, victim_name_len);
- kfree(victim_name);
+ &victim_name);
+ kfree(victim_name.name);
if (ret)
return ret;
goto again;
}
- kfree(victim_name);
+ kfree(victim_name.name);

- ptr = (unsigned long)(victim_ref + 1) + victim_name_len;
+ ptr = (unsigned long)(victim_ref + 1) + victim_name.len;
}
}
btrfs_release_path(path);

/* Same search but for extended refs */
- extref = btrfs_lookup_inode_extref(NULL, root, path, name, namelen,
+ extref = btrfs_lookup_inode_extref(NULL, root, path, name,
inode_objectid, parent_objectid, 0,
0);
if (IS_ERR(extref)) {
@@ -1152,29 +1155,28 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans,
base = btrfs_item_ptr_offset(leaf, path->slots[0]);

while (cur_offset < item_size) {
- extref = (struct btrfs_inode_extref *)(base + cur_offset);
+ struct fscrypt_str victim_name;

- victim_name_len = btrfs_inode_extref_name_len(leaf, extref);
+ extref = (struct btrfs_inode_extref *)(base + cur_offset);

if (btrfs_inode_extref_parent(leaf, extref) != parent_objectid)
goto next;

- victim_name = kmalloc(victim_name_len, GFP_NOFS);
- if (!victim_name)
- return -ENOMEM;
- read_extent_buffer(leaf, victim_name, (unsigned long)&extref->name,
- victim_name_len);
+ ret = read_alloc_one_name(leaf, &extref->name,
+ btrfs_inode_extref_name_len(leaf, extref),
+ &victim_name);
+ if (ret)
+ return ret;

search_key.objectid = inode_objectid;
search_key.type = BTRFS_INODE_EXTREF_KEY;
search_key.offset = btrfs_extref_hash(parent_objectid,
- victim_name,
- victim_name_len);
+ victim_name.name,
+ victim_name.len);
ret = backref_in_log(log_root, &search_key,
- parent_objectid, victim_name,
- victim_name_len);
+ parent_objectid, &victim_name);
if (ret < 0) {
- kfree(victim_name);
+ kfree(victim_name.name);
return ret;
} else if (!ret) {
ret = -ENOENT;
@@ -1186,26 +1188,24 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans,

ret = unlink_inode_for_log_replay(trans,
BTRFS_I(victim_parent),
- inode,
- victim_name,
- victim_name_len);
+ inode, &victim_name);
}
iput(victim_parent);
- kfree(victim_name);
+ kfree(victim_name.name);
if (ret)
return ret;
goto again;
}
- kfree(victim_name);
+ kfree(victim_name.name);
next:
- cur_offset += victim_name_len + sizeof(*extref);
+ cur_offset += victim_name.len + sizeof(*extref);
}
}
btrfs_release_path(path);

/* look for a conflicting sequence number */
di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir),
- ref_index, name, namelen, 0);
+ ref_index, name, 0);
if (IS_ERR(di)) {
return PTR_ERR(di);
} else if (di) {
@@ -1216,8 +1216,7 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans,
btrfs_release_path(path);

/* look for a conflicting name */
- di = btrfs_lookup_dir_item(trans, root, path, btrfs_ino(dir),
- name, namelen, 0);
+ di = btrfs_lookup_dir_item(trans, root, path, btrfs_ino(dir), name, 0);
if (IS_ERR(di)) {
return PTR_ERR(di);
} else if (di) {
@@ -1231,20 +1230,18 @@ static inline int __add_inode_ref(struct btrfs_trans_handle *trans,
}

static int extref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr,
- u32 *namelen, char **name, u64 *index,
+ struct fscrypt_str *name, u64 *index,
u64 *parent_objectid)
{
struct btrfs_inode_extref *extref;
+ int ret;

extref = (struct btrfs_inode_extref *)ref_ptr;

- *namelen = btrfs_inode_extref_name_len(eb, extref);
- *name = kmalloc(*namelen, GFP_NOFS);
- if (*name == NULL)
- return -ENOMEM;
-
- read_extent_buffer(eb, *name, (unsigned long)&extref->name,
- *namelen);
+ ret = read_alloc_one_name(eb, &extref->name,
+ btrfs_inode_extref_name_len(eb, extref), name);
+ if (ret)
+ return ret;

if (index)
*index = btrfs_inode_extref_index(eb, extref);
@@ -1255,18 +1252,17 @@ static int extref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr,
}

static int ref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr,
- u32 *namelen, char **name, u64 *index)
+ struct fscrypt_str *name, u64 *index)
{
struct btrfs_inode_ref *ref;
+ int ret;

ref = (struct btrfs_inode_ref *)ref_ptr;

- *namelen = btrfs_inode_ref_name_len(eb, ref);
- *name = kmalloc(*namelen, GFP_NOFS);
- if (*name == NULL)
- return -ENOMEM;
-
- read_extent_buffer(eb, *name, (unsigned long)(ref + 1), *namelen);
+ ret = read_alloc_one_name(eb, ref + 1, btrfs_inode_ref_name_len(eb, ref),
+ name);
+ if (ret)
+ return ret;

if (index)
*index = btrfs_inode_ref_index(eb, ref);
@@ -1308,28 +1304,24 @@ static int unlink_old_inode_refs(struct btrfs_trans_handle *trans,
ref_ptr = btrfs_item_ptr_offset(eb, path->slots[0]);
ref_end = ref_ptr + btrfs_item_size(eb, path->slots[0]);
while (ref_ptr < ref_end) {
- char *name = NULL;
- int namelen;
+ struct fscrypt_str name;
u64 parent_id;

if (key->type == BTRFS_INODE_EXTREF_KEY) {
- ret = extref_get_fields(eb, ref_ptr, &namelen, &name,
+ ret = extref_get_fields(eb, ref_ptr, &name,
NULL, &parent_id);
} else {
parent_id = key->offset;
- ret = ref_get_fields(eb, ref_ptr, &namelen, &name,
- NULL);
+ ret = ref_get_fields(eb, ref_ptr, &name, NULL);
}
if (ret)
goto out;

if (key->type == BTRFS_INODE_EXTREF_KEY)
ret = !!btrfs_find_name_in_ext_backref(log_eb, log_slot,
- parent_id, name,
- namelen);
+ parent_id, &name);
else
- ret = !!btrfs_find_name_in_backref(log_eb, log_slot,
- name, namelen);
+ ret = !!btrfs_find_name_in_backref(log_eb, log_slot, &name);

if (!ret) {
struct inode *dir;
@@ -1338,20 +1330,20 @@ static int unlink_old_inode_refs(struct btrfs_trans_handle *trans,
dir = read_one_inode(root, parent_id);
if (!dir) {
ret = -ENOENT;
- kfree(name);
+ kfree(name.name);
goto out;
}
ret = unlink_inode_for_log_replay(trans, BTRFS_I(dir),
- inode, name, namelen);
- kfree(name);
+ inode, &name);
+ kfree(name.name);
iput(dir);
if (ret)
goto out;
goto again;
}

- kfree(name);
- ref_ptr += namelen;
+ kfree(name.name);
+ ref_ptr += name.len;
if (key->type == BTRFS_INODE_EXTREF_KEY)
ref_ptr += sizeof(struct btrfs_inode_extref);
else
@@ -1380,8 +1372,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
struct inode *inode = NULL;
unsigned long ref_ptr;
unsigned long ref_end;
- char *name = NULL;
- int namelen;
+ struct fscrypt_str name;
int ret;
int log_ref_ver = 0;
u64 parent_objectid;
@@ -1425,7 +1416,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,

while (ref_ptr < ref_end) {
if (log_ref_ver) {
- ret = extref_get_fields(eb, ref_ptr, &namelen, &name,
+ ret = extref_get_fields(eb, ref_ptr, &name,
&ref_index, &parent_objectid);
/*
* parent object can change from one array
@@ -1438,15 +1429,13 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
goto out;
}
} else {
- ret = ref_get_fields(eb, ref_ptr, &namelen, &name,
- &ref_index);
+ ret = ref_get_fields(eb, ref_ptr, &name, &ref_index);
}
if (ret)
goto out;

ret = inode_in_dir(root, path, btrfs_ino(BTRFS_I(dir)),
- btrfs_ino(BTRFS_I(inode)), ref_index,
- name, namelen);
+ btrfs_ino(BTRFS_I(inode)), ref_index, &name);
if (ret < 0) {
goto out;
} else if (ret == 0) {
@@ -1460,7 +1449,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
ret = __add_inode_ref(trans, root, path, log,
BTRFS_I(dir), BTRFS_I(inode),
inode_objectid, parent_objectid,
- ref_index, name, namelen);
+ ref_index, &name);
if (ret) {
if (ret == 1)
ret = 0;
@@ -1469,7 +1458,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,

/* insert our name */
ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode),
- name, namelen, 0, ref_index);
+ &name, 0, ref_index);
if (ret)
goto out;

@@ -1479,9 +1468,9 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
}
/* Else, ret == 1, we already have a perfect match, we're done. */

- ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + namelen;
- kfree(name);
- name = NULL;
+ ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + name.len;
+ kfree(name.name);
+ name.name = NULL;
if (log_ref_ver) {
iput(dir);
dir = NULL;
@@ -1505,7 +1494,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
ret = overwrite_item(trans, root, path, eb, slot, key);
out:
btrfs_release_path(path);
- kfree(name);
+ kfree(name.name);
iput(dir);
iput(inode);
return ret;
@@ -1777,7 +1766,7 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
static noinline int insert_one_name(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 dirid, u64 index,
- char *name, int name_len,
+ const struct fscrypt_str *name,
struct btrfs_key *location)
{
struct inode *inode;
@@ -1795,7 +1784,7 @@ static noinline int insert_one_name(struct btrfs_trans_handle *trans,
}

ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode), name,
- name_len, 1, index);
+ 1, index);

/* FIXME, put inode into FIXUP list */

@@ -1855,8 +1844,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
struct btrfs_dir_item *di,
struct btrfs_key *key)
{
- char *name;
- int name_len;
+ struct fscrypt_str name;
struct btrfs_dir_item *dir_dst_di;
struct btrfs_dir_item *index_dst_di;
bool dir_dst_matches = false;
@@ -1874,17 +1862,11 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
if (!dir)
return -EIO;

- name_len = btrfs_dir_name_len(eb, di);
- name = kmalloc(name_len, GFP_NOFS);
- if (!name) {
- ret = -ENOMEM;
+ ret = read_alloc_one_name(eb, di + 1, btrfs_dir_name_len(eb, di), &name);
+ if (ret)
goto out;
- }

log_type = btrfs_dir_type(eb, di);
- read_extent_buffer(eb, name, (unsigned long)(di + 1),
- name_len);
-
btrfs_dir_item_key_to_cpu(eb, di, &log_key);
ret = btrfs_lookup_inode(trans, root, path, &log_key, 0);
btrfs_release_path(path);
@@ -1894,7 +1876,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
ret = 0;

dir_dst_di = btrfs_lookup_dir_item(trans, root, path, key->objectid,
- name, name_len, 1);
+ &name, 1);
if (IS_ERR(dir_dst_di)) {
ret = PTR_ERR(dir_dst_di);
goto out;
@@ -1911,7 +1893,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,

index_dst_di = btrfs_lookup_dir_index_item(trans, root, path,
key->objectid, key->offset,
- name, name_len, 1);
+ &name, 1);
if (IS_ERR(index_dst_di)) {
ret = PTR_ERR(index_dst_di);
goto out;
@@ -1939,7 +1921,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
search_key.objectid = log_key.objectid;
search_key.type = BTRFS_INODE_REF_KEY;
search_key.offset = key->objectid;
- ret = backref_in_log(root->log_root, &search_key, 0, name, name_len);
+ ret = backref_in_log(root->log_root, &search_key, 0, &name);
if (ret < 0) {
goto out;
} else if (ret) {
@@ -1952,8 +1934,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
search_key.objectid = log_key.objectid;
search_key.type = BTRFS_INODE_EXTREF_KEY;
search_key.offset = key->objectid;
- ret = backref_in_log(root->log_root, &search_key, key->objectid, name,
- name_len);
+ ret = backref_in_log(root->log_root, &search_key, key->objectid, &name);
if (ret < 0) {
goto out;
} else if (ret) {
@@ -1964,7 +1945,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
}
btrfs_release_path(path);
ret = insert_one_name(trans, root, key->objectid, key->offset,
- name, name_len, &log_key);
+ &name, &log_key);
if (ret && ret != -ENOENT && ret != -EEXIST)
goto out;
if (!ret)
@@ -1974,10 +1955,10 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,

out:
if (!ret && update_size) {
- btrfs_i_size_write(BTRFS_I(dir), dir->i_size + name_len * 2);
+ btrfs_i_size_write(BTRFS_I(dir), dir->i_size + name.len * 2);
ret = btrfs_update_inode(trans, root, BTRFS_I(dir));
}
- kfree(name);
+ kfree(name.name);
iput(dir);
if (!ret && name_added)
ret = 1;
@@ -2143,8 +2124,7 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans,
struct extent_buffer *eb;
int slot;
struct btrfs_dir_item *di;
- int name_len;
- char *name;
+ struct fscrypt_str name;
struct inode *inode = NULL;
struct btrfs_key location;

@@ -2159,22 +2139,16 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans,
eb = path->nodes[0];
slot = path->slots[0];
di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
- name_len = btrfs_dir_name_len(eb, di);
- name = kmalloc(name_len, GFP_NOFS);
- if (!name) {
- ret = -ENOMEM;
+ ret = read_alloc_one_name(eb, di + 1, btrfs_dir_name_len(eb, di), &name);
+ if (ret)
goto out;
- }
-
- read_extent_buffer(eb, name, (unsigned long)(di + 1), name_len);

if (log) {
struct btrfs_dir_item *log_di;

log_di = btrfs_lookup_dir_index_item(trans, log, log_path,
dir_key->objectid,
- dir_key->offset,
- name, name_len, 0);
+ dir_key->offset, &name, 0);
if (IS_ERR(log_di)) {
ret = PTR_ERR(log_di);
goto out;
@@ -2200,7 +2174,7 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans,

inc_nlink(inode);
ret = unlink_inode_for_log_replay(trans, BTRFS_I(dir), BTRFS_I(inode),
- name, name_len);
+ &name);
/*
* Unlike dir item keys, dir index keys can only have one name (entry) in
* them, as there are no key collisions since each key has a unique offset
@@ -2209,7 +2183,7 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans,
out:
btrfs_release_path(path);
btrfs_release_path(log_path);
- kfree(name);
+ kfree(name.name);
iput(inode);
return ret;
}
@@ -3443,7 +3417,7 @@ static int del_logged_dentry(struct btrfs_trans_handle *trans,
struct btrfs_root *log,
struct btrfs_path *path,
u64 dir_ino,
- const char *name, int name_len,
+ const struct fscrypt_str *name,
u64 index)
{
struct btrfs_dir_item *di;
@@ -3453,7 +3427,7 @@ static int del_logged_dentry(struct btrfs_trans_handle *trans,
* for dir item keys.
*/
di = btrfs_lookup_dir_index_item(trans, log, path, dir_ino,
- index, name, name_len, -1);
+ index, name, -1);
if (IS_ERR(di))
return PTR_ERR(di);
else if (!di)
@@ -3490,7 +3464,7 @@ static int del_logged_dentry(struct btrfs_trans_handle *trans,
*/
void btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- const char *name, int name_len,
+ const struct fscrypt_str *name,
struct btrfs_inode *dir, u64 index)
{
struct btrfs_path *path;
@@ -3517,7 +3491,7 @@ void btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
}

ret = del_logged_dentry(trans, root->log_root, path, btrfs_ino(dir),
- name, name_len, index);
+ name, index);
btrfs_free_path(path);
out_unlock:
mutex_unlock(&dir->log_mutex);
@@ -3529,7 +3503,7 @@ void btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
/* see comments for btrfs_del_dir_entries_in_log */
void btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- const char *name, int name_len,
+ const struct fscrypt_str *name,
struct btrfs_inode *inode, u64 dirid)
{
struct btrfs_root *log;
@@ -3550,7 +3524,7 @@ void btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
log = root->log_root;
mutex_lock(&inode->log_mutex);

- ret = btrfs_del_inode_ref(trans, log, name, name_len, btrfs_ino(inode),
+ ret = btrfs_del_inode_ref(trans, log, name, btrfs_ino(inode),
dirid, &index);
mutex_unlock(&inode->log_mutex);
if (ret < 0 && ret != -ENOENT)
@@ -5293,6 +5267,7 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb,
u32 this_len;
unsigned long name_ptr;
struct btrfs_dir_item *di;
+ struct fscrypt_str name_str;

if (key->type == BTRFS_INODE_REF_KEY) {
struct btrfs_inode_ref *iref;
@@ -5326,8 +5301,11 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb,
}

read_extent_buffer(eb, name, name_ptr, this_name_len);
+
+ name_str.name = name;
+ name_str.len = this_name_len;
di = btrfs_lookup_dir_item(NULL, inode->root, search_path,
- parent, name, this_name_len, 0);
+ parent, &name_str, 0);
if (di && !IS_ERR(di)) {
struct btrfs_key di_key;

@@ -7493,9 +7471,14 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
if (old_dir && old_dir->logged_trans == trans->transid) {
struct btrfs_root *log = old_dir->root->log_root;
struct btrfs_path *path;
+ struct fscrypt_name fname;

ASSERT(old_dir_index >= BTRFS_DIR_START_INDEX);

+ ret = fscrypt_setup_filename(&old_dir->vfs_inode,
+ &old_dentry->d_name, 0, &fname);
+ if (ret)
+ goto out;
/*
* We have two inodes to update in the log, the old directory and
* the inode that got renamed, so we must pin the log to prevent
@@ -7508,13 +7491,17 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
* not fail, but if it does, it's not serious, just bail out and
* mark the log for a full commit.
*/
- if (WARN_ON_ONCE(ret < 0))
+ if (WARN_ON_ONCE(ret < 0)) {
+ fscrypt_free_filename(&fname);
goto out;
+ }
+
log_pinned = true;

path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
+ fscrypt_free_filename(&fname);
goto out;
}

@@ -7530,8 +7517,7 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
*/
mutex_lock(&old_dir->log_mutex);
ret = del_logged_dentry(trans, log, path, btrfs_ino(old_dir),
- old_dentry->d_name.name,
- old_dentry->d_name.len, old_dir_index);
+ &fname.disk_name, old_dir_index);
if (ret > 0) {
/*
* The dentry does not exist in the log, so record its
@@ -7545,6 +7531,7 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
mutex_unlock(&old_dir->log_mutex);

btrfs_free_path(path);
+ fscrypt_free_filename(&fname);
if (ret < 0)
goto out;
}
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index bcca74128c3b..8adebf4c9ada 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -84,11 +84,11 @@ int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
struct btrfs_log_ctx *ctx);
void btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- const char *name, int name_len,
+ const struct fscrypt_str *name,
struct btrfs_inode *dir, u64 index);
void btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- const char *name, int name_len,
+ const struct fscrypt_str *name,
struct btrfs_inode *inode, u64 dirid);
void btrfs_end_log_trans(struct btrfs_root *root);
void btrfs_pin_log_trans(struct btrfs_root *root);
diff --git a/fs/erofs/decompressor_lzma.c b/fs/erofs/decompressor_lzma.c
index 5cd612a8f858..49addc345aeb 100644
--- a/fs/erofs/decompressor_lzma.c
+++ b/fs/erofs/decompressor_lzma.c
@@ -217,9 +217,12 @@ int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
strm->buf.out_size = min_t(u32, outlen,
PAGE_SIZE - pageofs);
outlen -= strm->buf.out_size;
- if (!rq->out[no] && rq->fillgaps) /* deduped */
+ if (!rq->out[no] && rq->fillgaps) { /* deduped */
rq->out[no] = erofs_allocpage(pagepool,
GFP_KERNEL | __GFP_NOFAIL);
+ set_page_private(rq->out[no],
+ Z_EROFS_SHORTLIVED_PAGE);
+ }
if (rq->out[no])
strm->buf.out = kmap(rq->out[no]) + pageofs;
pageofs = 0;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index be570c65ae15..e1297c6bcfbe 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -7157,7 +7157,6 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
{
struct nfs4_lockdata *data = calldata;
struct nfs4_lock_state *lsp = data->lsp;
- struct nfs_server *server = NFS_SERVER(d_inode(data->ctx->dentry));

if (!nfs4_sequence_done(task, &data->res.seq_res))
return;
@@ -7165,7 +7164,8 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
data->rpc_status = task->tk_status;
switch (task->tk_status) {
case 0:
- renew_lease(server, data->timestamp);
+ renew_lease(NFS_SERVER(d_inode(data->ctx->dentry)),
+ data->timestamp);
if (data->arg.new_lock && !data->cancelled) {
data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS);
if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0)
@@ -7193,8 +7193,6 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
if (!nfs4_stateid_match(&data->arg.open_stateid,
&lsp->ls_state->open_stateid))
goto out_restart;
- else if (nfs4_async_handle_error(task, server, lsp->ls_state, NULL) == -EAGAIN)
- goto out_restart;
} else if (!nfs4_stateid_match(&data->arg.lock_stateid,
&lsp->ls_stateid))
goto out_restart;
@@ -10629,7 +10627,9 @@ static void nfs4_disable_swap(struct inode *inode)
*/
struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;

- nfs4_schedule_state_manager(clp);
+ set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
+ clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
+ wake_up_var(&clp->cl_state);
}

static const struct inode_operations nfs4_dir_inode_operations = {
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 5b49e5365bb3..457b2b2f804a 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1209,17 +1209,23 @@ void nfs4_schedule_state_manager(struct nfs_client *clp)
{
struct task_struct *task;
char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1];
- struct rpc_clnt *cl = clp->cl_rpcclient;
-
- while (cl != cl->cl_parent)
- cl = cl->cl_parent;
+ struct rpc_clnt *clnt = clp->cl_rpcclient;
+ bool swapon = false;

set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
- if (test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state) != 0) {
- wake_up_var(&clp->cl_state);
- return;
+
+ if (atomic_read(&clnt->cl_swapper)) {
+ swapon = !test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE,
+ &clp->cl_state);
+ if (!swapon) {
+ wake_up_var(&clp->cl_state);
+ return;
+ }
}
- set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state);
+
+ if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0)
+ return;
+
__module_get(THIS_MODULE);
refcount_inc(&clp->cl_count);

@@ -1236,8 +1242,9 @@ void nfs4_schedule_state_manager(struct nfs_client *clp)
__func__, PTR_ERR(task));
if (!nfs_client_init_is_complete(clp))
nfs_mark_client_ready(clp, PTR_ERR(task));
+ if (swapon)
+ clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
nfs4_clear_state_manager_bit(clp);
- clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
nfs_put_client(clp);
module_put(THIS_MODULE);
}
@@ -2703,6 +2710,13 @@ static void nfs4_state_manager(struct nfs_client *clp)
nfs4_end_drain_session(clp);
nfs4_clear_state_manager_bit(clp);

+ if (test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state) &&
+ !test_and_set_bit(NFS4CLNT_MANAGER_RUNNING,
+ &clp->cl_state)) {
+ memflags = memalloc_nofs_save();
+ continue;
+ }
+
if (!test_and_set_bit(NFS4CLNT_RECALL_RUNNING, &clp->cl_state)) {
if (test_and_clear_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) {
nfs_client_return_marked_delegations(clp);
@@ -2741,22 +2755,25 @@ static int nfs4_run_state_manager(void *ptr)

allow_signal(SIGKILL);
again:
- set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state);
nfs4_state_manager(clp);
- if (atomic_read(&cl->cl_swapper)) {
+
+ if (test_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state) &&
+ !test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state)) {
wait_var_event_interruptible(&clp->cl_state,
test_bit(NFS4CLNT_RUN_MANAGER,
&clp->cl_state));
- if (atomic_read(&cl->cl_swapper) &&
- test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state))
+ if (!atomic_read(&cl->cl_swapper))
+ clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
+ if (refcount_read(&clp->cl_count) > 1 && !signalled() &&
+ !test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state))
goto again;
/* Either no longer a swapper, or were signalled */
+ clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
}
- clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);

if (refcount_read(&clp->cl_count) > 1 && !signalled() &&
test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state) &&
- !test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state))
+ !test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state))
goto again;

nfs_put_client(clp);
diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c
index a6f740366963..edb535a0ff97 100644
--- a/fs/nfs/sysfs.c
+++ b/fs/nfs/sysfs.c
@@ -18,7 +18,7 @@
#include "sysfs.h"

struct kobject *nfs_client_kobj;
-static struct kset *nfs_client_kset;
+static struct kset *nfs_kset;

static void nfs_netns_object_release(struct kobject *kobj)
{
@@ -55,13 +55,13 @@ static struct kobject *nfs_netns_object_alloc(const char *name,

int nfs_sysfs_init(void)
{
- nfs_client_kset = kset_create_and_add("nfs", NULL, fs_kobj);
- if (!nfs_client_kset)
+ nfs_kset = kset_create_and_add("nfs", NULL, fs_kobj);
+ if (!nfs_kset)
return -ENOMEM;
- nfs_client_kobj = nfs_netns_object_alloc("net", nfs_client_kset, NULL);
+ nfs_client_kobj = nfs_netns_object_alloc("net", nfs_kset, NULL);
if (!nfs_client_kobj) {
- kset_unregister(nfs_client_kset);
- nfs_client_kset = NULL;
+ kset_unregister(nfs_kset);
+ nfs_kset = NULL;
return -ENOMEM;
}
return 0;
@@ -70,7 +70,7 @@ int nfs_sysfs_init(void)
void nfs_sysfs_exit(void)
{
kobject_put(nfs_client_kobj);
- kset_unregister(nfs_client_kset);
+ kset_unregister(nfs_kset);
}

static ssize_t nfs_netns_identifier_show(struct kobject *kobj,
@@ -159,7 +159,7 @@ static struct nfs_netns_client *nfs_netns_client_alloc(struct kobject *parent,
p = kzalloc(sizeof(*p), GFP_KERNEL);
if (p) {
p->net = net;
- p->kobject.kset = nfs_client_kset;
+ p->kobject.kset = nfs_kset;
if (kobject_init_and_add(&p->kobject, &nfs_netns_client_type,
parent, "nfs_client") == 0)
return p;
diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c
index 57da4f23c1e4..acb8951eb757 100644
--- a/fs/smb/client/connect.c
+++ b/fs/smb/client/connect.c
@@ -2901,9 +2901,9 @@ bind_socket(struct TCP_Server_Info *server)
if (server->srcaddr.ss_family != AF_UNSPEC) {
/* Bind to the specified local IP address */
struct socket *socket = server->ssocket;
- rc = socket->ops->bind(socket,
- (struct sockaddr *) &server->srcaddr,
- sizeof(server->srcaddr));
+ rc = kernel_bind(socket,
+ (struct sockaddr *) &server->srcaddr,
+ sizeof(server->srcaddr));
if (rc < 0) {
struct sockaddr_in *saddr4;
struct sockaddr_in6 *saddr6;
@@ -3050,8 +3050,8 @@ generic_ip_connect(struct TCP_Server_Info *server)
socket->sk->sk_sndbuf,
socket->sk->sk_rcvbuf, socket->sk->sk_rcvtimeo);

- rc = socket->ops->connect(socket, saddr, slen,
- server->noblockcnt ? O_NONBLOCK : 0);
+ rc = kernel_connect(socket, saddr, slen,
+ server->noblockcnt ? O_NONBLOCK : 0);
/*
* When mounting SMB root file systems, we do not want to block in
* connect. Otherwise bail out and then let cifs_reconnect() perform
diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c
index e1d2be19cddf..ff97cad8d5b4 100644
--- a/fs/smb/server/connection.c
+++ b/fs/smb/server/connection.c
@@ -84,6 +84,8 @@ struct ksmbd_conn *ksmbd_conn_alloc(void)
spin_lock_init(&conn->llist_lock);
INIT_LIST_HEAD(&conn->lock_list);

+ init_rwsem(&conn->session_lock);
+
down_write(&conn_list_lock);
list_add(&conn->conns_list, &conn_list);
up_write(&conn_list_lock);
diff --git a/fs/smb/server/connection.h b/fs/smb/server/connection.h
index ad8dfaa48ffb..335fdd714d59 100644
--- a/fs/smb/server/connection.h
+++ b/fs/smb/server/connection.h
@@ -50,6 +50,7 @@ struct ksmbd_conn {
struct nls_table *local_nls;
struct unicode_map *um;
struct list_head conns_list;
+ struct rw_semaphore session_lock;
/* smb session 1 per user */
struct xarray sessions;
unsigned long last_active;
diff --git a/fs/smb/server/mgmt/user_session.c b/fs/smb/server/mgmt/user_session.c
index ea4b56d570fb..cf6621e21ba3 100644
--- a/fs/smb/server/mgmt/user_session.c
+++ b/fs/smb/server/mgmt/user_session.c
@@ -183,7 +183,7 @@ static void ksmbd_expire_session(struct ksmbd_conn *conn)
unsigned long id;
struct ksmbd_session *sess;

- down_write(&sessions_table_lock);
+ down_write(&conn->session_lock);
xa_for_each(&conn->sessions, id, sess) {
if (sess->state != SMB2_SESSION_VALID ||
time_after(jiffies,
@@ -194,7 +194,7 @@ static void ksmbd_expire_session(struct ksmbd_conn *conn)
continue;
}
}
- up_write(&sessions_table_lock);
+ up_write(&conn->session_lock);
}

int ksmbd_session_register(struct ksmbd_conn *conn,
@@ -236,7 +236,9 @@ void ksmbd_sessions_deregister(struct ksmbd_conn *conn)
}
}
}
+ up_write(&sessions_table_lock);

+ down_write(&conn->session_lock);
xa_for_each(&conn->sessions, id, sess) {
unsigned long chann_id;
struct channel *chann;
@@ -253,7 +255,7 @@ void ksmbd_sessions_deregister(struct ksmbd_conn *conn)
ksmbd_session_destroy(sess);
}
}
- up_write(&sessions_table_lock);
+ up_write(&conn->session_lock);
}

struct ksmbd_session *ksmbd_session_lookup(struct ksmbd_conn *conn,
@@ -261,9 +263,11 @@ struct ksmbd_session *ksmbd_session_lookup(struct ksmbd_conn *conn,
{
struct ksmbd_session *sess;

+ down_read(&conn->session_lock);
sess = xa_load(&conn->sessions, id);
if (sess)
sess->last_active = jiffies;
+ up_read(&conn->session_lock);
return sess;
}

diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index f6fd5cf976a5..683152007566 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -8128,10 +8128,10 @@ static void smb20_oplock_break_ack(struct ksmbd_work *work)
goto err_out;
}

- opinfo_put(opinfo);
- ksmbd_fd_put(work, fp);
opinfo->op_state = OPLOCK_STATE_NONE;
wake_up_interruptible_all(&opinfo->oplock_q);
+ opinfo_put(opinfo);
+ ksmbd_fd_put(work, fp);

rsp->StructureSize = cpu_to_le16(24);
rsp->OplockLevel = rsp_oplevel;
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 1ed2ec035e77..1fba826f0ace 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1065,7 +1065,7 @@ static inline int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link,
static inline struct bpf_trampoline *bpf_trampoline_get(u64 key,
struct bpf_attach_target_info *tgt_info)
{
- return ERR_PTR(-EOPNOTSUPP);
+ return NULL;
}
static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {}
#define DEFINE_BPF_DISPATCHER(name)
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 15d7529ac953..9a44de45cc1f 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -33,6 +33,7 @@ struct ipv6_devconf {
__s32 accept_ra_defrtr;
__u32 ra_defrtr_metric;
__s32 accept_ra_min_hop_limit;
+ __s32 accept_ra_min_lft;
__s32 accept_ra_pinfo;
__s32 ignore_routes_with_linkdown;
#ifdef CONFIG_IPV6_ROUTER_PREF
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 104ec00823da..eefb0948110a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1906,6 +1906,8 @@ static inline bool can_do_mlock(void) { return false; }
extern int user_shm_lock(size_t, struct ucounts *);
extern void user_shm_unlock(size_t, struct ucounts *);

+struct folio *vm_normal_folio(struct vm_area_struct *vma, unsigned long addr,
+ pte_t pte);
struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
pte_t pte);
struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
diff --git a/include/linux/netfilter/nf_conntrack_sctp.h b/include/linux/netfilter/nf_conntrack_sctp.h
index 625f491b95de..fb31312825ae 100644
--- a/include/linux/netfilter/nf_conntrack_sctp.h
+++ b/include/linux/netfilter/nf_conntrack_sctp.h
@@ -9,6 +9,7 @@ struct ip_ct_sctp {
enum sctp_conntrack state;

__be32 vtag[IP_CT_DIR_MAX];
+ u8 init[IP_CT_DIR_MAX];
u8 last_dir;
u8 flags;
};
diff --git a/include/linux/regulator/mt6358-regulator.h b/include/linux/regulator/mt6358-regulator.h
index bdcf83cd719e..be9f61e3e8e6 100644
--- a/include/linux/regulator/mt6358-regulator.h
+++ b/include/linux/regulator/mt6358-regulator.h
@@ -48,8 +48,6 @@ enum {
MT6358_ID_VLDO28,
MT6358_ID_VAUD28,
MT6358_ID_VSIM2,
- MT6358_ID_VCORE_SSHUB,
- MT6358_ID_VSRAM_OTHERS_SSHUB,
MT6358_ID_RG_MAX,
};

@@ -90,8 +88,6 @@ enum {
MT6366_ID_VMC,
MT6366_ID_VAUD28,
MT6366_ID_VSIM2,
- MT6366_ID_VCORE_SSHUB,
- MT6366_ID_VSRAM_OTHERS_SSHUB,
MT6366_ID_RG_MAX,
};

diff --git a/include/net/arp.h b/include/net/arp.h
index d7ef4ec71dfe..e8747e0713c7 100644
--- a/include/net/arp.h
+++ b/include/net/arp.h
@@ -38,11 +38,11 @@ static inline struct neighbour *__ipv4_neigh_lookup(struct net_device *dev, u32
{
struct neighbour *n;

- rcu_read_lock_bh();
+ rcu_read_lock();
n = __ipv4_neigh_lookup_noref(dev, key);
if (n && !refcount_inc_not_zero(&n->refcnt))
n = NULL;
- rcu_read_unlock_bh();
+ rcu_read_unlock();

return n;
}
@@ -51,10 +51,10 @@ static inline void __ipv4_confirm_neigh(struct net_device *dev, u32 key)
{
struct neighbour *n;

- rcu_read_lock_bh();
+ rcu_read_lock();
n = __ipv4_neigh_lookup_noref(dev, key);
neigh_confirm(n);
- rcu_read_unlock_bh();
+ rcu_read_unlock();
}

void arp_init(void);
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 5976545aa26b..7a6c3059d50b 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -5621,12 +5621,17 @@ struct cfg80211_cqm_config;
* wiphy_lock - lock the wiphy
* @wiphy: the wiphy to lock
*
- * This is mostly exposed so it can be done around registering and
- * unregistering netdevs that aren't created through cfg80211 calls,
- * since that requires locking in cfg80211 when the notifiers is
- * called, but that cannot differentiate which way it's called.
+ * This is needed around registering and unregistering netdevs that
+ * aren't created through cfg80211 calls, since that requires locking
+ * in cfg80211 when the notifiers is called, but that cannot
+ * differentiate which way it's called.
+ *
+ * It can also be used by drivers for their own purposes.
*
* When cfg80211 ops are called, the wiphy is already locked.
+ *
+ * Note that this makes sure that no workers that have been queued
+ * with wiphy_queue_work() are running.
*/
static inline void wiphy_lock(struct wiphy *wiphy)
__acquires(&wiphy->mtx)
@@ -5646,6 +5651,88 @@ static inline void wiphy_unlock(struct wiphy *wiphy)
mutex_unlock(&wiphy->mtx);
}

+struct wiphy_work;
+typedef void (*wiphy_work_func_t)(struct wiphy *, struct wiphy_work *);
+
+struct wiphy_work {
+ struct list_head entry;
+ wiphy_work_func_t func;
+};
+
+static inline void wiphy_work_init(struct wiphy_work *work,
+ wiphy_work_func_t func)
+{
+ INIT_LIST_HEAD(&work->entry);
+ work->func = func;
+}
+
+/**
+ * wiphy_work_queue - queue work for the wiphy
+ * @wiphy: the wiphy to queue for
+ * @work: the work item
+ *
+ * This is useful for work that must be done asynchronously, and work
+ * queued here has the special property that the wiphy mutex will be
+ * held as if wiphy_lock() was called, and that it cannot be running
+ * after wiphy_lock() was called. Therefore, wiphy_cancel_work() can
+ * use just cancel_work() instead of cancel_work_sync(), it requires
+ * being in a section protected by wiphy_lock().
+ */
+void wiphy_work_queue(struct wiphy *wiphy, struct wiphy_work *work);
+
+/**
+ * wiphy_work_cancel - cancel previously queued work
+ * @wiphy: the wiphy, for debug purposes
+ * @work: the work to cancel
+ *
+ * Cancel the work *without* waiting for it, this assumes being
+ * called under the wiphy mutex acquired by wiphy_lock().
+ */
+void wiphy_work_cancel(struct wiphy *wiphy, struct wiphy_work *work);
+
+struct wiphy_delayed_work {
+ struct wiphy_work work;
+ struct wiphy *wiphy;
+ struct timer_list timer;
+};
+
+void wiphy_delayed_work_timer(struct timer_list *t);
+
+static inline void wiphy_delayed_work_init(struct wiphy_delayed_work *dwork,
+ wiphy_work_func_t func)
+{
+ timer_setup(&dwork->timer, wiphy_delayed_work_timer, 0);
+ wiphy_work_init(&dwork->work, func);
+}
+
+/**
+ * wiphy_delayed_work_queue - queue delayed work for the wiphy
+ * @wiphy: the wiphy to queue for
+ * @dwork: the delayable worker
+ * @delay: number of jiffies to wait before queueing
+ *
+ * This is useful for work that must be done asynchronously, and work
+ * queued here has the special property that the wiphy mutex will be
+ * held as if wiphy_lock() was called, and that it cannot be running
+ * after wiphy_lock() was called. Therefore, wiphy_cancel_work() can
+ * use just cancel_work() instead of cancel_work_sync(), it requires
+ * being in a section protected by wiphy_lock().
+ */
+void wiphy_delayed_work_queue(struct wiphy *wiphy,
+ struct wiphy_delayed_work *dwork,
+ unsigned long delay);
+
+/**
+ * wiphy_delayed_work_cancel - cancel previously queued delayed work
+ * @wiphy: the wiphy, for debug purposes
+ * @dwork: the delayed work to cancel
+ *
+ * Cancel the work *without* waiting for it, this assumes being
+ * called under the wiphy mutex acquired by wiphy_lock().
+ */
+void wiphy_delayed_work_cancel(struct wiphy *wiphy,
+ struct wiphy_delayed_work *dwork);
+
/**
* struct wireless_dev - wireless device state
*
@@ -5718,6 +5805,7 @@ static inline void wiphy_unlock(struct wiphy *wiphy)
* @event_lock: (private) lock for event list
* @owner_nlportid: (private) owner socket port ID
* @nl_owner_dead: (private) owner socket went away
+ * @cqm_rssi_work: (private) CQM RSSI reporting work
* @cqm_config: (private) nl80211 RSSI monitor state
* @pmsr_list: (private) peer measurement requests
* @pmsr_lock: (private) peer measurements requests/results lock
@@ -5790,7 +5878,8 @@ struct wireless_dev {
} wext;
#endif

- struct cfg80211_cqm_config *cqm_config;
+ struct wiphy_work cqm_rssi_work;
+ struct cfg80211_cqm_config __rcu *cqm_config;

struct list_head pmsr_list;
spinlock_t pmsr_lock;
diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index da7eec8669ec..325a6fb65c89 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -395,11 +395,11 @@ static inline struct neighbour *__ipv6_neigh_lookup(struct net_device *dev, cons
{
struct neighbour *n;

- rcu_read_lock_bh();
+ rcu_read_lock();
n = __ipv6_neigh_lookup_noref(dev, pkey);
if (n && !refcount_inc_not_zero(&n->refcnt))
n = NULL;
- rcu_read_unlock_bh();
+ rcu_read_unlock();

return n;
}
@@ -409,10 +409,10 @@ static inline void __ipv6_confirm_neigh(struct net_device *dev,
{
struct neighbour *n;

- rcu_read_lock_bh();
+ rcu_read_lock();
n = __ipv6_neigh_lookup_noref(dev, pkey);
neigh_confirm(n);
- rcu_read_unlock_bh();
+ rcu_read_unlock();
}

static inline void __ipv6_confirm_neigh_stub(struct net_device *dev,
@@ -420,10 +420,10 @@ static inline void __ipv6_confirm_neigh_stub(struct net_device *dev,
{
struct neighbour *n;

- rcu_read_lock_bh();
+ rcu_read_lock();
n = __ipv6_neigh_lookup_noref_stub(dev, pkey);
neigh_confirm(n);
- rcu_read_unlock_bh();
+ rcu_read_unlock();
}

/* uses ipv6_stub and is meant for use outside of IPv6 core */
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 794e45981891..ccc4a0f8b4ad 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -299,14 +299,14 @@ static inline struct neighbour *___neigh_lookup_noref(
const void *pkey,
struct net_device *dev)
{
- struct neigh_hash_table *nht = rcu_dereference_bh(tbl->nht);
+ struct neigh_hash_table *nht = rcu_dereference(tbl->nht);
struct neighbour *n;
u32 hash_val;

hash_val = hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
- for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
+ for (n = rcu_dereference(nht->hash_buckets[hash_val]);
n != NULL;
- n = rcu_dereference_bh(n->next)) {
+ n = rcu_dereference(n->next)) {
if (n->dev == dev && key_eq(n, pkey))
return n;
}
@@ -464,7 +464,7 @@ static __always_inline int neigh_event_send_probe(struct neighbour *neigh,

if (READ_ONCE(neigh->used) != now)
WRITE_ONCE(neigh->used, now);
- if (!(neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE)))
+ if (!(READ_ONCE(neigh->nud_state) & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE)))
return __neigh_event_send(neigh, skb, immediate_ok);
return 0;
}
@@ -541,7 +541,7 @@ static inline int neigh_output(struct neighbour *n, struct sk_buff *skb,
READ_ONCE(hh->hh_len))
return neigh_hh_output(hh, skb);

- return n->output(n, skb);
+ return READ_ONCE(n->output)(n, skb);
}

static inline struct neighbour *
diff --git a/include/net/netlink.h b/include/net/netlink.h
index 6bfa972f2fbf..a686c9041ddc 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -937,6 +937,27 @@ static inline struct nlmsghdr *nlmsg_put(struct sk_buff *skb, u32 portid, u32 se
return __nlmsg_put(skb, portid, seq, type, payload, flags);
}

+/**
+ * nlmsg_append - Add more data to a nlmsg in a skb
+ * @skb: socket buffer to store message in
+ * @size: length of message payload
+ *
+ * Append data to an existing nlmsg, used when constructing a message
+ * with multiple fixed-format headers (which is rare).
+ * Returns NULL if the tailroom of the skb is insufficient to store
+ * the extra payload.
+ */
+static inline void *nlmsg_append(struct sk_buff *skb, u32 size)
+{
+ if (unlikely(skb_tailroom(skb) < NLMSG_ALIGN(size)))
+ return NULL;
+
+ if (NLMSG_ALIGN(size) - size)
+ memset(skb_tail_pointer(skb) + size, 0,
+ NLMSG_ALIGN(size) - size);
+ return __skb_put(skb, NLMSG_ALIGN(size));
+}
+
/**
* nlmsg_put_answer - Add a new callback based netlink message to an skb
* @skb: socket buffer to store message in
diff --git a/include/net/nexthop.h b/include/net/nexthop.h
index 28085b995ddc..2b12725de9c0 100644
--- a/include/net/nexthop.h
+++ b/include/net/nexthop.h
@@ -497,29 +497,6 @@ static inline struct fib6_nh *nexthop_fib6_nh(struct nexthop *nh)
return NULL;
}

-/* Variant of nexthop_fib6_nh().
- * Caller should either hold rcu_read_lock_bh(), or RTNL.
- */
-static inline struct fib6_nh *nexthop_fib6_nh_bh(struct nexthop *nh)
-{
- struct nh_info *nhi;
-
- if (nh->is_group) {
- struct nh_group *nh_grp;
-
- nh_grp = rcu_dereference_bh_rtnl(nh->nh_grp);
- nh = nexthop_mpath_select(nh_grp, 0);
- if (!nh)
- return NULL;
- }
-
- nhi = rcu_dereference_bh_rtnl(nh->nh_info);
- if (nhi->family == AF_INET6)
- return &nhi->fib6_nh;
-
- return NULL;
-}
-
static inline struct net_device *fib6_info_nh_dev(struct fib6_info *f6i)
{
struct fib6_nh *fib6_nh;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 5fd69f2342a4..9ebb54122bb7 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -355,12 +355,14 @@ ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos,
struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
bool force_schedule);

-static inline void tcp_dec_quickack_mode(struct sock *sk,
- const unsigned int pkts)
+static inline void tcp_dec_quickack_mode(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);

if (icsk->icsk_ack.quick) {
+ /* How many ACKs S/ACKing new data have we sent? */
+ const unsigned int pkts = inet_csk_ack_scheduled(sk) ? 1 : 0;
+
if (pkts >= icsk->icsk_ack.quick) {
icsk->icsk_ack.quick = 0;
/* Leaving quickack mode we deflate ATO. */
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 006858ed04e8..dc2cff18b68b 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -161,6 +161,10 @@ struct scsi_device {
* pass settings from slave_alloc to scsi
* core. */
unsigned int eh_timeout; /* Error handling timeout */
+
+ bool manage_system_start_stop; /* Let HLD (sd) manage system start/stop */
+ bool manage_runtime_start_stop; /* Let HLD (sd) manage runtime start/stop */
+
unsigned removable:1;
unsigned changed:1; /* Data invalid due to media change */
unsigned busy:1; /* Used to prevent races */
@@ -192,7 +196,7 @@ struct scsi_device {
unsigned use_192_bytes_for_3f:1; /* ask for 192 bytes from page 0x3f */
unsigned no_start_on_add:1; /* do not issue start on add */
unsigned allow_restart:1; /* issue START_UNIT in error handler */
- unsigned manage_start_stop:1; /* Let HLD (sd) manage start/stop */
+ unsigned no_start_on_resume:1; /* Do not issue START_STOP_UNIT on resume */
unsigned start_stop_pwr_cond:1; /* Set power cond. in START_STOP_UNIT */
unsigned no_uld_attach:1; /* disable connecting to upper level drivers */
unsigned select_no_atn:1;
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index d27d9fb7174c..71def41b1ad7 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -752,7 +752,7 @@ extern int __must_check scsi_add_host_with_dma(struct Scsi_Host *,
struct device *,
struct device *);
extern void scsi_scan_host(struct Scsi_Host *);
-extern void scsi_rescan_device(struct device *);
+extern int scsi_rescan_device(struct scsi_device *sdev);
extern void scsi_remove_host(struct Scsi_Host *);
extern struct Scsi_Host *scsi_host_get(struct Scsi_Host *);
extern int scsi_host_busy(struct Scsi_Host *shost);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 53bc48794719..92dbe89dafbf 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3112,6 +3112,11 @@ union bpf_attr {
* **BPF_FIB_LOOKUP_OUTPUT**
* Perform lookup from an egress perspective (default is
* ingress).
+ * **BPF_FIB_LOOKUP_SKIP_NEIGH**
+ * Skip the neighbour table lookup. *params*->dmac
+ * and *params*->smac will not be set as output. A common
+ * use case is to call **bpf_redirect_neigh**\ () after
+ * doing **bpf_fib_lookup**\ ().
*
* *ctx* is either **struct xdp_md** for XDP programs or
* **struct sk_buff** tc cls_act programs.
@@ -6678,6 +6683,7 @@ struct bpf_raw_tracepoint_args {
enum {
BPF_FIB_LOOKUP_DIRECT = (1U << 0),
BPF_FIB_LOOKUP_OUTPUT = (1U << 1),
+ BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
};

enum {
diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
index 53326dfc59ec..4fa8511b1e35 100644
--- a/include/uapi/linux/ipv6.h
+++ b/include/uapi/linux/ipv6.h
@@ -198,6 +198,7 @@ enum {
DEVCONF_IOAM6_ID_WIDE,
DEVCONF_NDISC_EVICT_NOCARRIER,
DEVCONF_ACCEPT_UNTRACKED_NA,
+ DEVCONF_ACCEPT_RA_MIN_LFT,
DEVCONF_MAX
};

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 2f562cf961e0..b7383358c4ea 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -354,10 +354,11 @@ static void rb_init_page(struct buffer_data_page *bpage)
local_set(&bpage->commit, 0);
}

-/*
- * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
- * this issue out.
- */
+static __always_inline unsigned int rb_page_commit(struct buffer_page *bpage)
+{
+ return local_read(&bpage->page->commit);
+}
+
static void free_buffer_page(struct buffer_page *bpage)
{
free_page((unsigned long)bpage->page);
@@ -2024,7 +2025,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages)
* Increment overrun to account for the lost events.
*/
local_add(page_entries, &cpu_buffer->overrun);
- local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
+ local_sub(rb_page_commit(to_remove_page), &cpu_buffer->entries_bytes);
local_inc(&cpu_buffer->pages_lost);
}

@@ -2368,11 +2369,6 @@ rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
cpu_buffer->reader_page->read);
}

-static __always_inline unsigned rb_page_commit(struct buffer_page *bpage)
-{
- return local_read(&bpage->page->commit);
-}
-
static struct ring_buffer_event *
rb_iter_head_event(struct ring_buffer_iter *iter)
{
@@ -2518,7 +2514,7 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer,
* the counters.
*/
local_add(entries, &cpu_buffer->overrun);
- local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
+ local_sub(rb_page_commit(next_page), &cpu_buffer->entries_bytes);
local_inc(&cpu_buffer->pages_lost);

/*
@@ -2661,9 +2657,6 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,

event = __rb_page_index(tail_page, tail);

- /* account for padding bytes */
- local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes);
-
/*
* Save the original length to the meta data.
* This will be used by the reader to add lost event
@@ -2677,7 +2670,8 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
* write counter enough to allow another writer to slip
* in on this page.
* We put in a discarded commit instead, to make sure
- * that this space is not used again.
+ * that this space is not used again, and this space will
+ * not be accounted into 'entries_bytes'.
*
* If we are less than the minimum size, we don't need to
* worry about it.
@@ -2702,6 +2696,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
/* time delta must be non zero */
event->time_delta = 1;

+ /* account for padding bytes */
+ local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes);
+
/* Make sure the padding is visible before the tail_page->write update */
smp_wmb();

@@ -4219,7 +4216,7 @@ u64 ring_buffer_oldest_event_ts(struct trace_buffer *buffer, int cpu)
EXPORT_SYMBOL_GPL(ring_buffer_oldest_event_ts);

/**
- * ring_buffer_bytes_cpu - get the number of bytes consumed in a cpu buffer
+ * ring_buffer_bytes_cpu - get the number of bytes unconsumed in a cpu buffer
* @buffer: The ring buffer
* @cpu: The per CPU buffer to read from.
*/
@@ -4729,6 +4726,7 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)

length = rb_event_length(event);
cpu_buffer->reader_page->read += length;
+ cpu_buffer->read_bytes += length;
}

static void rb_advance_iter(struct ring_buffer_iter *iter)
@@ -5824,7 +5822,7 @@ int ring_buffer_read_page(struct trace_buffer *buffer,
} else {
/* update the entry counter */
cpu_buffer->read += rb_page_entries(reader);
- cpu_buffer->read_bytes += BUF_PAGE_SIZE;
+ cpu_buffer->read_bytes += rb_page_commit(reader);

/* swap the pages */
rb_init_page(bpage);
diff --git a/mm/memory.c b/mm/memory.c
index 2083078cd061..0d1b3ee8fcd7 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -672,6 +672,16 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
return pfn_to_page(pfn);
}

+struct folio *vm_normal_folio(struct vm_area_struct *vma, unsigned long addr,
+ pte_t pte)
+{
+ struct page *page = vm_normal_page(vma, addr, pte);
+
+ if (page)
+ return page_folio(page);
+ return NULL;
+}
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
pmd_t pmd)
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 7d36dd95d1ff..bfe2d1d50fbe 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -414,7 +414,7 @@ static const struct mempolicy_operations mpol_ops[MPOL_MAX] = {
},
};

-static int migrate_page_add(struct page *page, struct list_head *pagelist,
+static int migrate_folio_add(struct folio *folio, struct list_head *foliolist,
unsigned long flags);

struct queue_pages {
@@ -424,6 +424,7 @@ struct queue_pages {
unsigned long start;
unsigned long end;
struct vm_area_struct *first;
+ bool has_unmovable;
};

/*
@@ -442,21 +443,20 @@ static inline bool queue_pages_required(struct page *page,
}

/*
- * queue_pages_pmd() has three possible return values:
- * 0 - pages are placed on the right node or queued successfully, or
- * special page is met, i.e. huge zero page.
- * 1 - there is unmovable page, and MPOL_MF_MOVE* & MPOL_MF_STRICT were
- * specified.
+ * queue_folios_pmd() has three possible return values:
+ * 0 - folios are placed on the right node or queued successfully, or
+ * special page is met, i.e. zero page, or unmovable page is found
+ * but continue walking (indicated by queue_pages.has_unmovable).
* -EIO - is migration entry or only MPOL_MF_STRICT was specified and an
- * existing page was already on a node that does not follow the
+ * existing folio was already on a node that does not follow the
* policy.
*/
-static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
+static int queue_folios_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
unsigned long end, struct mm_walk *walk)
__releases(ptl)
{
int ret = 0;
- struct page *page;
+ struct folio *folio;
struct queue_pages *qp = walk->private;
unsigned long flags;

@@ -464,20 +464,20 @@ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
ret = -EIO;
goto unlock;
}
- page = pmd_page(*pmd);
- if (is_huge_zero_page(page)) {
+ folio = pfn_folio(pmd_pfn(*pmd));
+ if (is_huge_zero_page(&folio->page)) {
walk->action = ACTION_CONTINUE;
goto unlock;
}
- if (!queue_pages_required(page, qp))
+ if (!queue_pages_required(&folio->page, qp))
goto unlock;

flags = qp->flags;
- /* go to thp migration */
+ /* go to folio migration */
if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
if (!vma_migratable(walk->vma) ||
- migrate_page_add(page, qp->pagelist, flags)) {
- ret = 1;
+ migrate_folio_add(folio, qp->pagelist, flags)) {
+ qp->has_unmovable = true;
goto unlock;
}
} else
@@ -491,28 +491,26 @@ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
* Scan through pages checking if pages follow certain conditions,
* and move them to the pagelist if they do.
*
- * queue_pages_pte_range() has three possible return values:
- * 0 - pages are placed on the right node or queued successfully, or
- * special page is met, i.e. zero page.
- * 1 - there is unmovable page, and MPOL_MF_MOVE* & MPOL_MF_STRICT were
- * specified.
- * -EIO - only MPOL_MF_STRICT was specified and an existing page was already
+ * queue_folios_pte_range() has three possible return values:
+ * 0 - folios are placed on the right node or queued successfully, or
+ * special page is met, i.e. zero page, or unmovable page is found
+ * but continue walking (indicated by queue_pages.has_unmovable).
+ * -EIO - only MPOL_MF_STRICT was specified and an existing folio was already
* on a node that does not follow the policy.
*/
-static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
+static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr,
unsigned long end, struct mm_walk *walk)
{
struct vm_area_struct *vma = walk->vma;
- struct page *page;
+ struct folio *folio;
struct queue_pages *qp = walk->private;
unsigned long flags = qp->flags;
- bool has_unmovable = false;
pte_t *pte, *mapped_pte;
spinlock_t *ptl;

ptl = pmd_trans_huge_lock(pmd, vma);
if (ptl)
- return queue_pages_pmd(pmd, ptl, addr, end, walk);
+ return queue_folios_pmd(pmd, ptl, addr, end, walk);

if (pmd_trans_unstable(pmd))
return 0;
@@ -521,40 +519,38 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
for (; addr != end; pte++, addr += PAGE_SIZE) {
if (!pte_present(*pte))
continue;
- page = vm_normal_page(vma, addr, *pte);
- if (!page || is_zone_device_page(page))
+ folio = vm_normal_folio(vma, addr, *pte);
+ if (!folio || folio_is_zone_device(folio))
continue;
/*
- * vm_normal_page() filters out zero pages, but there might
- * still be PageReserved pages to skip, perhaps in a VDSO.
+ * vm_normal_folio() filters out zero pages, but there might
+ * still be reserved folios to skip, perhaps in a VDSO.
*/
- if (PageReserved(page))
+ if (folio_test_reserved(folio))
continue;
- if (!queue_pages_required(page, qp))
+ if (!queue_pages_required(&folio->page, qp))
continue;
if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
- /* MPOL_MF_STRICT must be specified if we get here */
- if (!vma_migratable(vma)) {
- has_unmovable = true;
- break;
- }
+ /*
+ * MPOL_MF_STRICT must be specified if we get here.
+ * Continue walking vmas due to MPOL_MF_MOVE* flags.
+ */
+ if (!vma_migratable(vma))
+ qp->has_unmovable = true;

/*
* Do not abort immediately since there may be
* temporary off LRU pages in the range. Still
* need migrate other LRU pages.
*/
- if (migrate_page_add(page, qp->pagelist, flags))
- has_unmovable = true;
+ if (migrate_folio_add(folio, qp->pagelist, flags))
+ qp->has_unmovable = true;
} else
break;
}
pte_unmap_unlock(mapped_pte, ptl);
cond_resched();

- if (has_unmovable)
- return 1;
-
return addr != end ? -EIO : 0;
}

@@ -594,7 +590,7 @@ static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask,
* Detecting misplaced page but allow migrating pages which
* have been queued.
*/
- ret = 1;
+ qp->has_unmovable = true;
goto unlock;
}

@@ -608,7 +604,7 @@ static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask,
* Failed to isolate page but allow migrating pages
* which have been queued.
*/
- ret = 1;
+ qp->has_unmovable = true;
}
unlock:
spin_unlock(ptl);
@@ -705,7 +701,7 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end,

static const struct mm_walk_ops queue_pages_walk_ops = {
.hugetlb_entry = queue_pages_hugetlb,
- .pmd_entry = queue_pages_pte_range,
+ .pmd_entry = queue_folios_pte_range,
.test_walk = queue_pages_test_walk,
};

@@ -737,10 +733,13 @@ queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
.start = start,
.end = end,
.first = NULL,
+ .has_unmovable = false,
};

err = walk_page_range(mm, start, end, &queue_pages_walk_ops, &qp);

+ if (qp.has_unmovable)
+ err = 1;
if (!qp.first)
/* whole range in hole */
err = -EFAULT;
@@ -1012,27 +1011,28 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
}

#ifdef CONFIG_MIGRATION
-/*
- * page migration, thp tail pages can be passed.
- */
-static int migrate_page_add(struct page *page, struct list_head *pagelist,
+static int migrate_folio_add(struct folio *folio, struct list_head *foliolist,
unsigned long flags)
{
- struct page *head = compound_head(page);
/*
- * Avoid migrating a page that is shared with others.
+ * We try to migrate only unshared folios. If it is shared it
+ * is likely not worth migrating.
+ *
+ * To check if the folio is shared, ideally we want to make sure
+ * every page is mapped to the same process. Doing that is very
+ * expensive, so check the estimated mapcount of the folio instead.
*/
- if ((flags & MPOL_MF_MOVE_ALL) || page_mapcount(head) == 1) {
- if (!isolate_lru_page(head)) {
- list_add_tail(&head->lru, pagelist);
- mod_node_page_state(page_pgdat(head),
- NR_ISOLATED_ANON + page_is_file_lru(head),
- thp_nr_pages(head));
+ if ((flags & MPOL_MF_MOVE_ALL) || folio_estimated_sharers(folio) == 1) {
+ if (!folio_isolate_lru(folio)) {
+ list_add_tail(&folio->lru, foliolist);
+ node_stat_mod_folio(folio,
+ NR_ISOLATED_ANON + folio_is_file_lru(folio),
+ folio_nr_pages(folio));
} else if (flags & MPOL_MF_STRICT) {
/*
- * Non-movable page may reach here. And, there may be
- * temporary off LRU pages or non-LRU movable pages.
- * Treat them as unmovable pages since they can't be
+ * Non-movable folio may reach here. And, there may be
+ * temporary off LRU folios or non-LRU movable folios.
+ * Treat them as unmovable folios since they can't be
* isolated, so they can't be moved at the moment. It
* should return -EIO for this case too.
*/
@@ -1224,7 +1224,7 @@ static struct page *new_page(struct page *page, unsigned long start)
}
#else

-static int migrate_page_add(struct page *page, struct list_head *pagelist,
+static int migrate_folio_add(struct folio *folio, struct list_head *foliolist,
unsigned long flags)
{
return -EIO;
@@ -1337,7 +1337,7 @@ static long do_mbind(unsigned long start, unsigned long len,
putback_movable_pages(&pagelist);
}

- if ((ret > 0) || (nr_failed && (flags & MPOL_MF_STRICT)))
+ if (((ret > 0) || nr_failed) && (flags & MPOL_MF_STRICT))
err = -EIO;
} else {
up_out:
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 69668817fed3..ca017c6008b7 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -170,21 +170,12 @@ static DEFINE_MUTEX(pcp_batch_high_lock);
_ret; \
})

-#define pcpu_spin_lock_irqsave(type, member, ptr, flags) \
+#define pcpu_spin_trylock(type, member, ptr) \
({ \
type *_ret; \
pcpu_task_pin(); \
_ret = this_cpu_ptr(ptr); \
- spin_lock_irqsave(&_ret->member, flags); \
- _ret; \
-})
-
-#define pcpu_spin_trylock_irqsave(type, member, ptr, flags) \
-({ \
- type *_ret; \
- pcpu_task_pin(); \
- _ret = this_cpu_ptr(ptr); \
- if (!spin_trylock_irqsave(&_ret->member, flags)) { \
+ if (!spin_trylock(&_ret->member)) { \
pcpu_task_unpin(); \
_ret = NULL; \
} \
@@ -197,27 +188,16 @@ static DEFINE_MUTEX(pcp_batch_high_lock);
pcpu_task_unpin(); \
})

-#define pcpu_spin_unlock_irqrestore(member, ptr, flags) \
-({ \
- spin_unlock_irqrestore(&ptr->member, flags); \
- pcpu_task_unpin(); \
-})
-
/* struct per_cpu_pages specific helpers. */
#define pcp_spin_lock(ptr) \
pcpu_spin_lock(struct per_cpu_pages, lock, ptr)

-#define pcp_spin_lock_irqsave(ptr, flags) \
- pcpu_spin_lock_irqsave(struct per_cpu_pages, lock, ptr, flags)
-
-#define pcp_spin_trylock_irqsave(ptr, flags) \
- pcpu_spin_trylock_irqsave(struct per_cpu_pages, lock, ptr, flags)
+#define pcp_spin_trylock(ptr) \
+ pcpu_spin_trylock(struct per_cpu_pages, lock, ptr)

#define pcp_spin_unlock(ptr) \
pcpu_spin_unlock(lock, ptr)

-#define pcp_spin_unlock_irqrestore(ptr, flags) \
- pcpu_spin_unlock_irqrestore(lock, ptr, flags)
#ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID
DEFINE_PER_CPU(int, numa_node);
EXPORT_PER_CPU_SYMBOL(numa_node);
@@ -1548,6 +1528,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
struct per_cpu_pages *pcp,
int pindex)
{
+ unsigned long flags;
int min_pindex = 0;
int max_pindex = NR_PCP_LISTS - 1;
unsigned int order;
@@ -1563,8 +1544,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
/* Ensure requested pindex is drained first. */
pindex = pindex - 1;

- /* Caller must hold IRQ-safe pcp->lock so IRQs are disabled. */
- spin_lock(&zone->lock);
+ spin_lock_irqsave(&zone->lock, flags);
isolated_pageblocks = has_isolate_pageblock(zone);

while (count > 0) {
@@ -1612,7 +1592,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
} while (count > 0 && !list_empty(list));
}

- spin_unlock(&zone->lock);
+ spin_unlock_irqrestore(&zone->lock, flags);
}

static void free_one_page(struct zone *zone,
@@ -3126,10 +3106,10 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
unsigned long count, struct list_head *list,
int migratetype, unsigned int alloc_flags)
{
+ unsigned long flags;
int i, allocated = 0;

- /* Caller must hold IRQ-safe pcp->lock so IRQs are disabled. */
- spin_lock(&zone->lock);
+ spin_lock_irqsave(&zone->lock, flags);
for (i = 0; i < count; ++i) {
struct page *page = __rmqueue(zone, order, migratetype,
alloc_flags);
@@ -3163,7 +3143,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
* pages added to the pcp list.
*/
__mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order));
- spin_unlock(&zone->lock);
+ spin_unlock_irqrestore(&zone->lock, flags);
return allocated;
}

@@ -3180,16 +3160,9 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
batch = READ_ONCE(pcp->batch);
to_drain = min(pcp->count, batch);
if (to_drain > 0) {
- unsigned long flags;
-
- /*
- * free_pcppages_bulk expects IRQs disabled for zone->lock
- * so even though pcp->lock is not intended to be IRQ-safe,
- * it's needed in this context.
- */
- spin_lock_irqsave(&pcp->lock, flags);
+ spin_lock(&pcp->lock);
free_pcppages_bulk(zone, to_drain, pcp, 0);
- spin_unlock_irqrestore(&pcp->lock, flags);
+ spin_unlock(&pcp->lock);
}
}
#endif
@@ -3203,12 +3176,9 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone)

pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);
if (pcp->count) {
- unsigned long flags;
-
- /* See drain_zone_pages on why this is disabling IRQs */
- spin_lock_irqsave(&pcp->lock, flags);
+ spin_lock(&pcp->lock);
free_pcppages_bulk(zone, pcp->count, pcp, 0);
- spin_unlock_irqrestore(&pcp->lock, flags);
+ spin_unlock(&pcp->lock);
}
}

@@ -3474,12 +3444,11 @@ static void free_unref_page_commit(struct zone *zone, struct per_cpu_pages *pcp,
*/
void free_unref_page(struct page *page, unsigned int order)
{
- unsigned long flags;
unsigned long __maybe_unused UP_flags;
struct per_cpu_pages *pcp;
struct zone *zone;
unsigned long pfn = page_to_pfn(page);
- int migratetype;
+ int migratetype, pcpmigratetype;

if (!free_unref_page_prepare(page, pfn, order))
return;
@@ -3487,25 +3456,25 @@ void free_unref_page(struct page *page, unsigned int order)
/*
* We only track unmovable, reclaimable and movable on pcp lists.
* Place ISOLATE pages on the isolated list because they are being
- * offlined but treat HIGHATOMIC as movable pages so we can get those
- * areas back if necessary. Otherwise, we may have to free
+ * offlined but treat HIGHATOMIC and CMA as movable pages so we can
+ * get those areas back if necessary. Otherwise, we may have to free
* excessively into the page allocator
*/
- migratetype = get_pcppage_migratetype(page);
+ migratetype = pcpmigratetype = get_pcppage_migratetype(page);
if (unlikely(migratetype >= MIGRATE_PCPTYPES)) {
if (unlikely(is_migrate_isolate(migratetype))) {
free_one_page(page_zone(page), page, pfn, order, migratetype, FPI_NONE);
return;
}
- migratetype = MIGRATE_MOVABLE;
+ pcpmigratetype = MIGRATE_MOVABLE;
}

zone = page_zone(page);
pcp_trylock_prepare(UP_flags);
- pcp = pcp_spin_trylock_irqsave(zone->per_cpu_pageset, flags);
+ pcp = pcp_spin_trylock(zone->per_cpu_pageset);
if (pcp) {
- free_unref_page_commit(zone, pcp, page, migratetype, order);
- pcp_spin_unlock_irqrestore(pcp, flags);
+ free_unref_page_commit(zone, pcp, page, pcpmigratetype, order);
+ pcp_spin_unlock(pcp);
} else {
free_one_page(zone, page, pfn, order, migratetype, FPI_NONE);
}
@@ -3517,10 +3486,10 @@ void free_unref_page(struct page *page, unsigned int order)
*/
void free_unref_page_list(struct list_head *list)
{
+ unsigned long __maybe_unused UP_flags;
struct page *page, *next;
struct per_cpu_pages *pcp = NULL;
struct zone *locked_zone = NULL;
- unsigned long flags;
int batch_count = 0;
int migratetype;

@@ -3547,20 +3516,37 @@ void free_unref_page_list(struct list_head *list)
list_for_each_entry_safe(page, next, list, lru) {
struct zone *zone = page_zone(page);

+ list_del(&page->lru);
+ migratetype = get_pcppage_migratetype(page);
+
/* Different zone, different pcp lock. */
if (zone != locked_zone) {
- if (pcp)
- pcp_spin_unlock_irqrestore(pcp, flags);
+ if (pcp) {
+ pcp_spin_unlock(pcp);
+ pcp_trylock_finish(UP_flags);
+ }

+ /*
+ * trylock is necessary as pages may be getting freed
+ * from IRQ or SoftIRQ context after an IO completion.
+ */
+ pcp_trylock_prepare(UP_flags);
+ pcp = pcp_spin_trylock(zone->per_cpu_pageset);
+ if (unlikely(!pcp)) {
+ pcp_trylock_finish(UP_flags);
+ free_one_page(zone, page, page_to_pfn(page),
+ 0, migratetype, FPI_NONE);
+ locked_zone = NULL;
+ continue;
+ }
locked_zone = zone;
- pcp = pcp_spin_lock_irqsave(locked_zone->per_cpu_pageset, flags);
+ batch_count = 0;
}

/*
* Non-isolated types over MIGRATE_PCPTYPES get added
* to the MIGRATE_MOVABLE pcp list.
*/
- migratetype = get_pcppage_migratetype(page);
if (unlikely(migratetype >= MIGRATE_PCPTYPES))
migratetype = MIGRATE_MOVABLE;

@@ -3568,18 +3554,23 @@ void free_unref_page_list(struct list_head *list)
free_unref_page_commit(zone, pcp, page, migratetype, 0);

/*
- * Guard against excessive IRQ disabled times when we get
- * a large list of pages to free.
+ * Guard against excessive lock hold times when freeing
+ * a large list of pages. Lock will be reacquired if
+ * necessary on the next iteration.
*/
if (++batch_count == SWAP_CLUSTER_MAX) {
- pcp_spin_unlock_irqrestore(pcp, flags);
+ pcp_spin_unlock(pcp);
+ pcp_trylock_finish(UP_flags);
batch_count = 0;
- pcp = pcp_spin_lock_irqsave(locked_zone->per_cpu_pageset, flags);
+ pcp = NULL;
+ locked_zone = NULL;
}
}

- if (pcp)
- pcp_spin_unlock_irqrestore(pcp, flags);
+ if (pcp) {
+ pcp_spin_unlock(pcp);
+ pcp_trylock_finish(UP_flags);
+ }
}

/*
@@ -3780,15 +3771,11 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
struct per_cpu_pages *pcp;
struct list_head *list;
struct page *page;
- unsigned long flags;
unsigned long __maybe_unused UP_flags;

- /*
- * spin_trylock may fail due to a parallel drain. In the future, the
- * trylock will also protect against IRQ reentrancy.
- */
+ /* spin_trylock may fail due to a parallel drain or IRQ reentrancy. */
pcp_trylock_prepare(UP_flags);
- pcp = pcp_spin_trylock_irqsave(zone->per_cpu_pageset, flags);
+ pcp = pcp_spin_trylock(zone->per_cpu_pageset);
if (!pcp) {
pcp_trylock_finish(UP_flags);
return NULL;
@@ -3802,7 +3789,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
pcp->free_factor >>= 1;
list = &pcp->lists[order_to_pindex(migratetype, order)];
page = __rmqueue_pcplist(zone, order, migratetype, alloc_flags, pcp, list);
- pcp_spin_unlock_irqrestore(pcp, flags);
+ pcp_spin_unlock(pcp);
pcp_trylock_finish(UP_flags);
if (page) {
__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
@@ -5373,7 +5360,6 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
struct page **page_array)
{
struct page *page;
- unsigned long flags;
unsigned long __maybe_unused UP_flags;
struct zone *zone;
struct zoneref *z;
@@ -5455,9 +5441,9 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
if (unlikely(!zone))
goto failed;

- /* Is a parallel drain in progress? */
+ /* spin_trylock may fail due to a parallel drain or IRQ reentrancy. */
pcp_trylock_prepare(UP_flags);
- pcp = pcp_spin_trylock_irqsave(zone->per_cpu_pageset, flags);
+ pcp = pcp_spin_trylock(zone->per_cpu_pageset);
if (!pcp)
goto failed_irq;

@@ -5476,7 +5462,7 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
if (unlikely(!page)) {
/* Try and allocate at least one page */
if (!nr_account) {
- pcp_spin_unlock_irqrestore(pcp, flags);
+ pcp_spin_unlock(pcp);
goto failed_irq;
}
break;
@@ -5491,7 +5477,7 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
nr_populated++;
}

- pcp_spin_unlock_irqrestore(pcp, flags);
+ pcp_spin_unlock(pcp);
pcp_trylock_finish(UP_flags);

__count_zid_vm_events(PGALLOC, zone_idx(zone), nr_account);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index fa4dd5fab0d4..d13b498f148c 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -2783,6 +2783,7 @@ void hci_release_dev(struct hci_dev *hdev)
hci_conn_params_clear_all(hdev);
hci_discovery_filter_clear(hdev);
hci_blocked_keys_clear(hdev);
+ hci_codec_list_clear(&hdev->local_codecs);
hci_dev_unlock(hdev);

ida_simple_remove(&hci_index_ida, hdev->id);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 83eaf25ece46..e4d8857716eb 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -32,6 +32,7 @@

#include "hci_request.h"
#include "hci_debugfs.h"
+#include "hci_codec.h"
#include "a2mp.h"
#include "amp.h"
#include "smp.h"
diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h
index b9c5a9823837..0be75cf0efed 100644
--- a/net/bluetooth/hci_request.h
+++ b/net/bluetooth/hci_request.h
@@ -71,7 +71,5 @@ struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen,
void hci_req_add_le_scan_disable(struct hci_request *req, bool rpa_le_conn);
void hci_req_add_le_passive_scan(struct hci_request *req);

-void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next);
-
void hci_request_setup(struct hci_dev *hdev);
void hci_request_cancel_all(struct hci_dev *hdev);
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 2ae038dfc39f..5218c4dfe0a8 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -412,11 +412,6 @@ static int hci_le_scan_restart_sync(struct hci_dev *hdev)
LE_SCAN_FILTER_DUP_ENABLE);
}

-static int le_scan_restart_sync(struct hci_dev *hdev, void *data)
-{
- return hci_le_scan_restart_sync(hdev);
-}
-
static void le_scan_restart(struct work_struct *work)
{
struct hci_dev *hdev = container_of(work, struct hci_dev,
@@ -426,15 +421,15 @@ static void le_scan_restart(struct work_struct *work)

bt_dev_dbg(hdev, "");

- hci_dev_lock(hdev);
-
- status = hci_cmd_sync_queue(hdev, le_scan_restart_sync, NULL, NULL);
+ status = hci_le_scan_restart_sync(hdev);
if (status) {
bt_dev_err(hdev, "failed to restart LE scan: status %d",
status);
- goto unlock;
+ return;
}

+ hci_dev_lock(hdev);
+
if (!test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) ||
!hdev->discovery.scan_start)
goto unlock;
@@ -5033,6 +5028,7 @@ int hci_dev_close_sync(struct hci_dev *hdev)
memset(hdev->eir, 0, sizeof(hdev->eir));
memset(hdev->dev_class, 0, sizeof(hdev->dev_class));
bacpy(&hdev->random_addr, BDADDR_ANY);
+ hci_codec_list_clear(&hdev->local_codecs);

hci_dev_put(hdev);
return err;
diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c
index 5cd2e775915b..91e990accbf2 100644
--- a/net/bluetooth/iso.c
+++ b/net/bluetooth/iso.c
@@ -458,7 +458,7 @@ static void iso_recv_frame(struct iso_conn *conn, struct sk_buff *skb)
}

/* -------- Socket interface ---------- */
-static struct sock *__iso_get_sock_listen_by_addr(bdaddr_t *ba)
+static struct sock *__iso_get_sock_listen_by_addr(bdaddr_t *src, bdaddr_t *dst)
{
struct sock *sk;

@@ -466,7 +466,10 @@ static struct sock *__iso_get_sock_listen_by_addr(bdaddr_t *ba)
if (sk->sk_state != BT_LISTEN)
continue;

- if (!bacmp(&iso_pi(sk)->src, ba))
+ if (bacmp(&iso_pi(sk)->dst, dst))
+ continue;
+
+ if (!bacmp(&iso_pi(sk)->src, src))
return sk;
}

@@ -910,7 +913,7 @@ static int iso_listen_cis(struct sock *sk)

write_lock(&iso_sk_list.lock);

- if (__iso_get_sock_listen_by_addr(&iso_pi(sk)->src))
+ if (__iso_get_sock_listen_by_addr(&iso_pi(sk)->src, &iso_pi(sk)->dst))
err = -EADDRINUSE;

write_unlock(&iso_sk_list.lock);
diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c
index e5e48c6e35d7..b45c00c01dea 100644
--- a/net/bridge/br_arp_nd_proxy.c
+++ b/net/bridge/br_arp_nd_proxy.c
@@ -192,7 +192,7 @@ void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br,
if (n) {
struct net_bridge_fdb_entry *f;

- if (!(n->nud_state & NUD_VALID)) {
+ if (!(READ_ONCE(n->nud_state) & NUD_VALID)) {
neigh_release(n);
return;
}
@@ -452,7 +452,7 @@ void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br,
if (n) {
struct net_bridge_fdb_entry *f;

- if (!(n->nud_state & NUD_VALID)) {
+ if (!(READ_ONCE(n->nud_state) & NUD_VALID)) {
neigh_release(n);
return;
}
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 812bd7e1750b..01d690d9fe5f 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -277,7 +277,8 @@ int br_nf_pre_routing_finish_bridge(struct net *net, struct sock *sk, struct sk_
struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
int ret;

- if ((neigh->nud_state & NUD_CONNECTED) && neigh->hh.hh_len) {
+ if ((READ_ONCE(neigh->nud_state) & NUD_CONNECTED) &&
+ READ_ONCE(neigh->hh.hh_len)) {
neigh_hh_bridge(&neigh->hh, skb);
skb->dev = nf_bridge->physindev;
ret = br_handle_frame_finish(net, sk, skb);
@@ -293,7 +294,7 @@ int br_nf_pre_routing_finish_bridge(struct net *net, struct sock *sk, struct sk_
/* tell br_dev_xmit to continue with forwarding */
nf_bridge->bridged_dnat = 1;
/* FIXME Need to refragment */
- ret = neigh->output(neigh, skb);
+ ret = READ_ONCE(neigh->output)(neigh, skb);
}
neigh_release(neigh);
return ret;
diff --git a/net/core/filter.c b/net/core/filter.c
index 9fd7c88b5db4..adc327f4af1e 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2197,7 +2197,7 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb,
return -ENOMEM;
}

- rcu_read_lock_bh();
+ rcu_read_lock();
if (!nh) {
dst = skb_dst(skb);
nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst),
@@ -2210,10 +2210,12 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb,
int ret;

sock_confirm_neigh(skb, neigh);
+ local_bh_disable();
dev_xmit_recursion_inc();
ret = neigh_output(neigh, skb, false);
dev_xmit_recursion_dec();
- rcu_read_unlock_bh();
+ local_bh_enable();
+ rcu_read_unlock();
return ret;
}
rcu_read_unlock_bh();
@@ -2295,7 +2297,7 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb,
return -ENOMEM;
}

- rcu_read_lock_bh();
+ rcu_read_lock();
if (!nh) {
struct dst_entry *dst = skb_dst(skb);
struct rtable *rt = container_of(dst, struct rtable, dst);
@@ -2307,7 +2309,7 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb,
} else if (nh->nh_family == AF_INET) {
neigh = ip_neigh_gw4(dev, nh->ipv4_nh);
} else {
- rcu_read_unlock_bh();
+ rcu_read_unlock();
goto out_drop;
}

@@ -2315,13 +2317,15 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb,
int ret;

sock_confirm_neigh(skb, neigh);
+ local_bh_disable();
dev_xmit_recursion_inc();
ret = neigh_output(neigh, skb, is_v6gw);
dev_xmit_recursion_dec();
- rcu_read_unlock_bh();
+ local_bh_enable();
+ rcu_read_unlock();
return ret;
}
- rcu_read_unlock_bh();
+ rcu_read_unlock();
out_drop:
kfree_skb(skb);
return -ENETDOWN;
@@ -5674,12 +5678,8 @@ static const struct bpf_func_proto bpf_skb_get_xfrm_state_proto = {
#endif

#if IS_ENABLED(CONFIG_INET) || IS_ENABLED(CONFIG_IPV6)
-static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params,
- const struct neighbour *neigh,
- const struct net_device *dev, u32 mtu)
+static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params, u32 mtu)
{
- memcpy(params->dmac, neigh->ha, ETH_ALEN);
- memcpy(params->smac, dev->dev_addr, ETH_ALEN);
params->h_vlan_TCI = 0;
params->h_vlan_proto = 0;
if (mtu)
@@ -5790,21 +5790,29 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
if (likely(nhc->nhc_gw_family != AF_INET6)) {
if (nhc->nhc_gw_family)
params->ipv4_dst = nhc->nhc_gw.ipv4;
-
- neigh = __ipv4_neigh_lookup_noref(dev,
- (__force u32)params->ipv4_dst);
} else {
struct in6_addr *dst = (struct in6_addr *)params->ipv6_dst;

params->family = AF_INET6;
*dst = nhc->nhc_gw.ipv6;
- neigh = __ipv6_neigh_lookup_noref_stub(dev, dst);
}

- if (!neigh || !(neigh->nud_state & NUD_VALID))
+ if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH)
+ goto set_fwd_params;
+
+ if (likely(nhc->nhc_gw_family != AF_INET6))
+ neigh = __ipv4_neigh_lookup_noref(dev,
+ (__force u32)params->ipv4_dst);
+ else
+ neigh = __ipv6_neigh_lookup_noref_stub(dev, params->ipv6_dst);
+
+ if (!neigh || !(READ_ONCE(neigh->nud_state) & NUD_VALID))
return BPF_FIB_LKUP_RET_NO_NEIGH;
+ memcpy(params->dmac, neigh->ha, ETH_ALEN);
+ memcpy(params->smac, dev->dev_addr, ETH_ALEN);

- return bpf_fib_set_fwd_params(params, neigh, dev, mtu);
+set_fwd_params:
+ return bpf_fib_set_fwd_params(params, mtu);
}
#endif

@@ -5912,24 +5920,33 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
params->rt_metric = res.f6i->fib6_metric;
params->ifindex = dev->ifindex;

+ if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH)
+ goto set_fwd_params;
+
/* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is
* not needed here.
*/
neigh = __ipv6_neigh_lookup_noref_stub(dev, dst);
- if (!neigh || !(neigh->nud_state & NUD_VALID))
+ if (!neigh || !(READ_ONCE(neigh->nud_state) & NUD_VALID))
return BPF_FIB_LKUP_RET_NO_NEIGH;
+ memcpy(params->dmac, neigh->ha, ETH_ALEN);
+ memcpy(params->smac, dev->dev_addr, ETH_ALEN);

- return bpf_fib_set_fwd_params(params, neigh, dev, mtu);
+set_fwd_params:
+ return bpf_fib_set_fwd_params(params, mtu);
}
#endif

+#define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \
+ BPF_FIB_LOOKUP_SKIP_NEIGH)
+
BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
struct bpf_fib_lookup *, params, int, plen, u32, flags)
{
if (plen < sizeof(*params))
return -EINVAL;

- if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT))
+ if (flags & ~BPF_FIB_LOOKUP_MASK)
return -EINVAL;

switch (params->family) {
@@ -5967,7 +5984,7 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
if (plen < sizeof(*params))
return -EINVAL;

- if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT))
+ if (flags & ~BPF_FIB_LOOKUP_MASK)
return -EINVAL;

if (params->tot_len)
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 6c0f2149f2c7..b20c9768d9f3 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -410,7 +410,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
*/
__skb_queue_purge(&n->arp_queue);
n->arp_queue_len_bytes = 0;
- n->output = neigh_blackhole;
+ WRITE_ONCE(n->output, neigh_blackhole);
if (n->nud_state & NUD_VALID)
n->nud_state = NUD_NOARP;
else
@@ -614,7 +614,7 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,

NEIGH_CACHE_STAT_INC(tbl, lookups);

- rcu_read_lock_bh();
+ rcu_read_lock();
n = __neigh_lookup_noref(tbl, pkey, dev);
if (n) {
if (!refcount_inc_not_zero(&n->refcnt))
@@ -622,7 +622,7 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
NEIGH_CACHE_STAT_INC(tbl, hits);
}

- rcu_read_unlock_bh();
+ rcu_read_unlock();
return n;
}
EXPORT_SYMBOL(neigh_lookup);
@@ -920,7 +920,7 @@ static void neigh_suspect(struct neighbour *neigh)
{
neigh_dbg(2, "neigh %p is suspected\n", neigh);

- neigh->output = neigh->ops->output;
+ WRITE_ONCE(neigh->output, neigh->ops->output);
}

/* Neighbour state is OK;
@@ -932,7 +932,7 @@ static void neigh_connect(struct neighbour *neigh)
{
neigh_dbg(2, "neigh %p is connected\n", neigh);

- neigh->output = neigh->ops->connected_output;
+ WRITE_ONCE(neigh->output, neigh->ops->connected_output);
}

static void neigh_periodic_work(struct work_struct *work)
@@ -988,7 +988,9 @@ static void neigh_periodic_work(struct work_struct *work)
(state == NUD_FAILED ||
!time_in_range_open(jiffies, n->used,
n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
- *np = n->next;
+ rcu_assign_pointer(*np,
+ rcu_dereference_protected(n->next,
+ lockdep_is_held(&tbl->lock)));
neigh_mark_dead(n);
write_unlock(&n->lock);
neigh_cleanup_and_release(n);
@@ -1093,13 +1095,13 @@ static void neigh_timer_handler(struct timer_list *t)
neigh->used +
NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
neigh_dbg(2, "neigh %p is delayed\n", neigh);
- neigh->nud_state = NUD_DELAY;
+ WRITE_ONCE(neigh->nud_state, NUD_DELAY);
neigh->updated = jiffies;
neigh_suspect(neigh);
next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
} else {
neigh_dbg(2, "neigh %p is suspected\n", neigh);
- neigh->nud_state = NUD_STALE;
+ WRITE_ONCE(neigh->nud_state, NUD_STALE);
neigh->updated = jiffies;
neigh_suspect(neigh);
notify = 1;
@@ -1109,14 +1111,14 @@ static void neigh_timer_handler(struct timer_list *t)
neigh->confirmed +
NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
neigh_dbg(2, "neigh %p is now reachable\n", neigh);
- neigh->nud_state = NUD_REACHABLE;
+ WRITE_ONCE(neigh->nud_state, NUD_REACHABLE);
neigh->updated = jiffies;
neigh_connect(neigh);
notify = 1;
next = neigh->confirmed + neigh->parms->reachable_time;
} else {
neigh_dbg(2, "neigh %p is probed\n", neigh);
- neigh->nud_state = NUD_PROBE;
+ WRITE_ONCE(neigh->nud_state, NUD_PROBE);
neigh->updated = jiffies;
atomic_set(&neigh->probes, 0);
notify = 1;
@@ -1130,7 +1132,7 @@ static void neigh_timer_handler(struct timer_list *t)

if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
- neigh->nud_state = NUD_FAILED;
+ WRITE_ONCE(neigh->nud_state, NUD_FAILED);
notify = 1;
neigh_invalidate(neigh);
goto out;
@@ -1179,7 +1181,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb,
atomic_set(&neigh->probes,
NEIGH_VAR(neigh->parms, UCAST_PROBES));
neigh_del_timer(neigh);
- neigh->nud_state = NUD_INCOMPLETE;
+ WRITE_ONCE(neigh->nud_state, NUD_INCOMPLETE);
neigh->updated = now;
if (!immediate_ok) {
next = now + 1;
@@ -1191,7 +1193,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb,
}
neigh_add_timer(neigh, next);
} else {
- neigh->nud_state = NUD_FAILED;
+ WRITE_ONCE(neigh->nud_state, NUD_FAILED);
neigh->updated = jiffies;
write_unlock_bh(&neigh->lock);

@@ -1201,7 +1203,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb,
} else if (neigh->nud_state & NUD_STALE) {
neigh_dbg(2, "neigh %p is delayed\n", neigh);
neigh_del_timer(neigh);
- neigh->nud_state = NUD_DELAY;
+ WRITE_ONCE(neigh->nud_state, NUD_DELAY);
neigh->updated = jiffies;
neigh_add_timer(neigh, jiffies +
NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
@@ -1313,7 +1315,7 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
neigh_update_flags(neigh, flags, &notify, &gc_update, &managed_update);
if (flags & (NEIGH_UPDATE_F_USE | NEIGH_UPDATE_F_MANAGED)) {
new = old & ~NUD_PERMANENT;
- neigh->nud_state = new;
+ WRITE_ONCE(neigh->nud_state, new);
err = 0;
goto out;
}
@@ -1322,7 +1324,7 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
neigh_del_timer(neigh);
if (old & NUD_CONNECTED)
neigh_suspect(neigh);
- neigh->nud_state = new;
+ WRITE_ONCE(neigh->nud_state, new);
err = 0;
notify = old & NUD_VALID;
if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
@@ -1401,7 +1403,7 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
((new & NUD_REACHABLE) ?
neigh->parms->reachable_time :
0)));
- neigh->nud_state = new;
+ WRITE_ONCE(neigh->nud_state, new);
notify = 1;
}

@@ -1447,7 +1449,7 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr,
if (n2)
n1 = n2;
}
- n1->output(n1, skb);
+ READ_ONCE(n1->output)(n1, skb);
if (n2)
neigh_release(n2);
rcu_read_unlock();
@@ -1488,7 +1490,7 @@ void __neigh_set_probe_once(struct neighbour *neigh)
neigh->updated = jiffies;
if (!(neigh->nud_state & NUD_FAILED))
return;
- neigh->nud_state = NUD_INCOMPLETE;
+ WRITE_ONCE(neigh->nud_state, NUD_INCOMPLETE);
atomic_set(&neigh->probes, neigh_max_probes(neigh));
neigh_add_timer(neigh,
jiffies + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
@@ -2174,11 +2176,11 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
.ndtc_proxy_qlen = tbl->proxy_queue.qlen,
};

- rcu_read_lock_bh();
- nht = rcu_dereference_bh(tbl->nht);
+ rcu_read_lock();
+ nht = rcu_dereference(tbl->nht);
ndc.ndtc_hash_rnd = nht->hash_rnd[0];
ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
- rcu_read_unlock_bh();
+ rcu_read_unlock();

if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
goto nla_put_failure;
@@ -2693,15 +2695,15 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
if (filter->dev_idx || filter->master_idx)
flags |= NLM_F_DUMP_FILTERED;

- rcu_read_lock_bh();
- nht = rcu_dereference_bh(tbl->nht);
+ rcu_read_lock();
+ nht = rcu_dereference(tbl->nht);

for (h = s_h; h < (1 << nht->hash_shift); h++) {
if (h > s_h)
s_idx = 0;
- for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
+ for (n = rcu_dereference(nht->hash_buckets[h]), idx = 0;
n != NULL;
- n = rcu_dereference_bh(n->next)) {
+ n = rcu_dereference(n->next)) {
if (idx < s_idx || !net_eq(dev_net(n->dev), net))
goto next;
if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
@@ -2720,7 +2722,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
}
rc = skb->len;
out:
- rcu_read_unlock_bh();
+ rcu_read_unlock();
cb->args[1] = h;
cb->args[2] = idx;
return rc;
@@ -3065,20 +3067,20 @@ void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void
int chain;
struct neigh_hash_table *nht;

- rcu_read_lock_bh();
- nht = rcu_dereference_bh(tbl->nht);
+ rcu_read_lock();
+ nht = rcu_dereference(tbl->nht);

- read_lock(&tbl->lock); /* avoid resizes */
+ read_lock_bh(&tbl->lock); /* avoid resizes */
for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
struct neighbour *n;

- for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
+ for (n = rcu_dereference(nht->hash_buckets[chain]);
n != NULL;
- n = rcu_dereference_bh(n->next))
+ n = rcu_dereference(n->next))
cb(n, cookie);
}
- read_unlock(&tbl->lock);
- rcu_read_unlock_bh();
+ read_unlock_bh(&tbl->lock);
+ rcu_read_unlock();
}
EXPORT_SYMBOL(neigh_for_each);

@@ -3128,7 +3130,7 @@ int neigh_xmit(int index, struct net_device *dev,
tbl = neigh_tables[index];
if (!tbl)
goto out;
- rcu_read_lock_bh();
+ rcu_read_lock();
if (index == NEIGH_ARP_TABLE) {
u32 key = *((u32 *)addr);

@@ -3140,11 +3142,11 @@ int neigh_xmit(int index, struct net_device *dev,
neigh = __neigh_create(tbl, addr, dev, false);
err = PTR_ERR(neigh);
if (IS_ERR(neigh)) {
- rcu_read_unlock_bh();
+ rcu_read_unlock();
goto out_kfree_skb;
}
- err = neigh->output(neigh, skb);
- rcu_read_unlock_bh();
+ err = READ_ONCE(neigh->output)(neigh, skb);
+ rcu_read_unlock();
}
else if (index == NEIGH_LINK_TABLE) {
err = dev_hard_header(skb, dev, ntohs(skb->protocol),
@@ -3173,7 +3175,7 @@ static struct neighbour *neigh_get_first(struct seq_file *seq)

state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
- n = rcu_dereference_bh(nht->hash_buckets[bucket]);
+ n = rcu_dereference(nht->hash_buckets[bucket]);

while (n) {
if (!net_eq(dev_net(n->dev), net))
@@ -3188,10 +3190,10 @@ static struct neighbour *neigh_get_first(struct seq_file *seq)
}
if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
break;
- if (n->nud_state & ~NUD_NOARP)
+ if (READ_ONCE(n->nud_state) & ~NUD_NOARP)
break;
next:
- n = rcu_dereference_bh(n->next);
+ n = rcu_dereference(n->next);
}

if (n)
@@ -3215,7 +3217,7 @@ static struct neighbour *neigh_get_next(struct seq_file *seq,
if (v)
return n;
}
- n = rcu_dereference_bh(n->next);
+ n = rcu_dereference(n->next);

while (1) {
while (n) {
@@ -3230,10 +3232,10 @@ static struct neighbour *neigh_get_next(struct seq_file *seq,
if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
break;

- if (n->nud_state & ~NUD_NOARP)
+ if (READ_ONCE(n->nud_state) & ~NUD_NOARP)
break;
next:
- n = rcu_dereference_bh(n->next);
+ n = rcu_dereference(n->next);
}

if (n)
@@ -3242,7 +3244,7 @@ static struct neighbour *neigh_get_next(struct seq_file *seq,
if (++state->bucket >= (1 << nht->hash_shift))
break;

- n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
+ n = rcu_dereference(nht->hash_buckets[state->bucket]);
}

if (n && pos)
@@ -3344,7 +3346,7 @@ static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)

void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
__acquires(tbl->lock)
- __acquires(rcu_bh)
+ __acquires(rcu)
{
struct neigh_seq_state *state = seq->private;

@@ -3352,9 +3354,9 @@ void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl
state->bucket = 0;
state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);

- rcu_read_lock_bh();
- state->nht = rcu_dereference_bh(tbl->nht);
- read_lock(&tbl->lock);
+ rcu_read_lock();
+ state->nht = rcu_dereference(tbl->nht);
+ read_lock_bh(&tbl->lock);

return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
}
@@ -3389,13 +3391,13 @@ EXPORT_SYMBOL(neigh_seq_next);

void neigh_seq_stop(struct seq_file *seq, void *v)
__releases(tbl->lock)
- __releases(rcu_bh)
+ __releases(rcu)
{
struct neigh_seq_state *state = seq->private;
struct neigh_table *tbl = state->tbl;

- read_unlock(&tbl->lock);
- rcu_read_unlock_bh();
+ read_unlock_bh(&tbl->lock);
+ rcu_read_unlock();
}
EXPORT_SYMBOL(neigh_seq_stop);

diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 96db7409baa1..38e01f82f2ef 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -670,6 +670,8 @@ BPF_CALL_4(bpf_msg_redirect_map, struct sk_msg *, msg,
sk = __sock_map_lookup_elem(map, key);
if (unlikely(!sk || !sock_map_redirect_allowed(sk)))
return SK_DROP;
+ if (!(flags & BPF_F_INGRESS) && !sk_is_tcp(sk))
+ return SK_DROP;

msg->flags = flags;
msg->sk_redir = sk;
@@ -1262,6 +1264,8 @@ BPF_CALL_4(bpf_msg_redirect_hash, struct sk_msg *, msg,
sk = __sock_hash_lookup_elem(map, key);
if (unlikely(!sk || !sock_map_redirect_allowed(sk)))
return SK_DROP;
+ if (!(flags & BPF_F_INGRESS) && !sk_is_tcp(sk))
+ return SK_DROP;

msg->flags = flags;
msg->sk_redir = sk;
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 4f7237661afb..9456f5bb35e5 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -375,7 +375,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)

probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES);
if (probes < 0) {
- if (!(neigh->nud_state & NUD_VALID))
+ if (!(READ_ONCE(neigh->nud_state) & NUD_VALID))
pr_debug("trying to ucast probe in NUD_INVALID\n");
neigh_ha_snapshot(dst_ha, neigh, dev);
dst_hw = dst_ha;
@@ -1123,7 +1123,7 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev)

neigh = neigh_lookup(&arp_tbl, &ip, dev);
if (neigh) {
- if (!(neigh->nud_state & NUD_NOARP)) {
+ if (!(READ_ONCE(neigh->nud_state) & NUD_NOARP)) {
read_lock_bh(&neigh->lock);
memcpy(r->arp_ha.sa_data, neigh->ha, dev->addr_len);
r->arp_flags = arp_state_to_flags(neigh);
@@ -1144,12 +1144,12 @@ int arp_invalidate(struct net_device *dev, __be32 ip, bool force)
struct neigh_table *tbl = &arp_tbl;

if (neigh) {
- if ((neigh->nud_state & NUD_VALID) && !force) {
+ if ((READ_ONCE(neigh->nud_state) & NUD_VALID) && !force) {
neigh_release(neigh);
return 0;
}

- if (neigh->nud_state & ~NUD_NOARP)
+ if (READ_ONCE(neigh->nud_state) & ~NUD_NOARP)
err = neigh_update(neigh, NULL, NUD_FAILED,
NEIGH_UPDATE_F_OVERRIDE|
NEIGH_UPDATE_F_ADMIN, 0);
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 3b6e6bc80dc1..eafa4a033515 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -564,7 +564,7 @@ static int fib_detect_death(struct fib_info *fi, int order,
n = NULL;

if (n) {
- state = n->nud_state;
+ state = READ_ONCE(n->nud_state);
neigh_release(n);
} else {
return 0;
@@ -2194,7 +2194,7 @@ static bool fib_good_nh(const struct fib_nh *nh)
if (nh->fib_nh_scope == RT_SCOPE_LINK) {
struct neighbour *n;

- rcu_read_lock_bh();
+ rcu_read_lock();

if (likely(nh->fib_nh_gw_family == AF_INET))
n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev,
@@ -2205,9 +2205,9 @@ static bool fib_good_nh(const struct fib_nh *nh)
else
n = NULL;
if (n)
- state = n->nud_state;
+ state = READ_ONCE(n->nud_state);

- rcu_read_unlock_bh();
+ rcu_read_unlock();
}

return !!(state & NUD_VALID);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 66908ce2dd11..493c679ea54f 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -218,7 +218,7 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
return res;
}

- rcu_read_lock_bh();
+ rcu_read_lock();
neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
if (!IS_ERR(neigh)) {
int res;
@@ -226,10 +226,10 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
sock_confirm_neigh(skb, neigh);
/* if crossing protocols, can not use the cached header */
res = neigh_output(neigh, skb, is_v6gw);
- rcu_read_unlock_bh();
+ rcu_read_unlock();
return res;
}
- rcu_read_unlock_bh();
+ rcu_read_unlock();

net_dbg_ratelimited("%s: No header cache and no neighbour!\n",
__func__);
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 9cc287902454..be5498f5dd31 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -1124,13 +1124,13 @@ static bool ipv6_good_nh(const struct fib6_nh *nh)
int state = NUD_REACHABLE;
struct neighbour *n;

- rcu_read_lock_bh();
+ rcu_read_lock();

n = __ipv6_neigh_lookup_noref_stub(nh->fib_nh_dev, &nh->fib_nh_gw6);
if (n)
- state = n->nud_state;
+ state = READ_ONCE(n->nud_state);

- rcu_read_unlock_bh();
+ rcu_read_unlock();

return !!(state & NUD_VALID);
}
@@ -1140,14 +1140,14 @@ static bool ipv4_good_nh(const struct fib_nh *nh)
int state = NUD_REACHABLE;
struct neighbour *n;

- rcu_read_lock_bh();
+ rcu_read_lock();

n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev,
(__force u32)nh->fib_nh_gw4);
if (n)
- state = n->nud_state;
+ state = READ_ONCE(n->nud_state);

- rcu_read_unlock_bh();
+ rcu_read_unlock();

return !!(state & NUD_VALID);
}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 84a0a71a6f4e..9cbaae4f5ee7 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -408,7 +408,7 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
struct net_device *dev = dst->dev;
struct neighbour *n;

- rcu_read_lock_bh();
+ rcu_read_lock();

if (likely(rt->rt_gw_family == AF_INET)) {
n = ip_neigh_gw4(dev, rt->rt_gw4);
@@ -424,7 +424,7 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
if (!IS_ERR(n) && !refcount_inc_not_zero(&n->refcnt))
n = NULL;

- rcu_read_unlock_bh();
+ rcu_read_unlock();

return n;
}
@@ -784,7 +784,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
if (!n)
n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev);
if (!IS_ERR(n)) {
- if (!(n->nud_state & NUD_VALID)) {
+ if (!(READ_ONCE(n->nud_state) & NUD_VALID)) {
neigh_event_send(n, NULL);
} else {
if (fib_lookup(net, fl4, &res, 0) == 0) {
@@ -3421,6 +3421,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
fa->fa_type == fri.type) {
fri.offload = READ_ONCE(fa->offload);
fri.trap = READ_ONCE(fa->trap);
+ fri.offload_failed =
+ READ_ONCE(fa->offload_failed);
break;
}
}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index fab25d4f3a6f..96fdde6e42b1 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1755,16 +1755,13 @@ EXPORT_SYMBOL(tcp_read_sock);

int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
{
- struct tcp_sock *tp = tcp_sk(sk);
- u32 seq = tp->copied_seq;
struct sk_buff *skb;
int copied = 0;
- u32 offset;

if (sk->sk_state == TCP_LISTEN)
return -ENOTCONN;

- while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) {
+ while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) {
u8 tcp_flags;
int used;

@@ -1777,13 +1774,10 @@ int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
copied = used;
break;
}
- seq += used;
copied += used;

- if (tcp_flags & TCPHDR_FIN) {
- ++seq;
+ if (tcp_flags & TCPHDR_FIN)
break;
- }
}
return copied;
}
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 5f93918c063c..f53380fd89bc 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -217,6 +217,7 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
int *addr_len)
{
struct tcp_sock *tcp = tcp_sk(sk);
+ int peek = flags & MSG_PEEK;
u32 seq = tcp->copied_seq;
struct sk_psock *psock;
int copied = 0;
@@ -306,7 +307,8 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
copied = -EAGAIN;
}
out:
- WRITE_ONCE(tcp->copied_seq, seq);
+ if (!peek)
+ WRITE_ONCE(tcp->copied_seq, seq);
tcp_rcv_space_adjust(sk);
if (copied > 0)
__tcp_cleanup_rbuf(sk, copied);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c697836f2b5b..068221e74242 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -243,6 +243,19 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
if (unlikely(len > icsk->icsk_ack.rcv_mss +
MAX_TCP_OPTION_SPACE))
tcp_gro_dev_warn(sk, skb, len);
+ /* If the skb has a len of exactly 1*MSS and has the PSH bit
+ * set then it is likely the end of an application write. So
+ * more data may not be arriving soon, and yet the data sender
+ * may be waiting for an ACK if cwnd-bound or using TX zero
+ * copy. So we set ICSK_ACK_PUSHED here so that
+ * tcp_cleanup_rbuf() will send an ACK immediately if the app
+ * reads all of the data and is not ping-pong. If len > MSS
+ * then this logic does not matter (and does not hurt) because
+ * tcp_cleanup_rbuf() will always ACK immediately if the app
+ * reads data and there is more than an MSS of unACKed data.
+ */
+ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_PSH)
+ icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
} else {
/* Otherwise, we make more careful check taking into account,
* that SACKs block is variable.
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index dc3166e56169..5921b0f6f9f4 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -177,8 +177,7 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
}

/* Account for an ACK we sent. */
-static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
- u32 rcv_nxt)
+static inline void tcp_event_ack_sent(struct sock *sk, u32 rcv_nxt)
{
struct tcp_sock *tp = tcp_sk(sk);

@@ -192,7 +191,7 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,

if (unlikely(rcv_nxt != tp->rcv_nxt))
return; /* Special ACK sent by DCTCP to reflect ECN */
- tcp_dec_quickack_mode(sk, pkts);
+ tcp_dec_quickack_mode(sk);
inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
}

@@ -1373,7 +1372,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
sk, skb);

if (likely(tcb->tcp_flags & TCPHDR_ACK))
- tcp_event_ack_sent(sk, tcp_skb_pcount(skb), rcv_nxt);
+ tcp_event_ack_sent(sk, rcv_nxt);

if (skb->len != tcp_header_size) {
tcp_event_data_sent(tp, sk);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 83be84219824..c63ccd39fc55 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -202,6 +202,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.ra_defrtr_metric = IP6_RT_PRIO_USER,
.accept_ra_from_local = 0,
.accept_ra_min_hop_limit= 1,
+ .accept_ra_min_lft = 0,
.accept_ra_pinfo = 1,
#ifdef CONFIG_IPV6_ROUTER_PREF
.accept_ra_rtr_pref = 1,
@@ -262,6 +263,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.ra_defrtr_metric = IP6_RT_PRIO_USER,
.accept_ra_from_local = 0,
.accept_ra_min_hop_limit= 1,
+ .accept_ra_min_lft = 0,
.accept_ra_pinfo = 1,
#ifdef CONFIG_IPV6_ROUTER_PREF
.accept_ra_rtr_pref = 1,
@@ -1033,7 +1035,7 @@ static int ipv6_add_addr_hash(struct net_device *dev, struct inet6_ifaddr *ifa)
unsigned int hash = inet6_addr_hash(net, &ifa->addr);
int err = 0;

- spin_lock(&net->ipv6.addrconf_hash_lock);
+ spin_lock_bh(&net->ipv6.addrconf_hash_lock);

/* Ignore adding duplicate addresses on an interface */
if (ipv6_chk_same_addr(net, &ifa->addr, dev, hash)) {
@@ -1043,7 +1045,7 @@ static int ipv6_add_addr_hash(struct net_device *dev, struct inet6_ifaddr *ifa)
hlist_add_head_rcu(&ifa->addr_lst, &net->ipv6.inet6_addr_lst[hash]);
}

- spin_unlock(&net->ipv6.addrconf_hash_lock);
+ spin_unlock_bh(&net->ipv6.addrconf_hash_lock);

return err;
}
@@ -1138,15 +1140,15 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg,
/* For caller */
refcount_set(&ifa->refcnt, 1);

- rcu_read_lock_bh();
+ rcu_read_lock();

err = ipv6_add_addr_hash(idev->dev, ifa);
if (err < 0) {
- rcu_read_unlock_bh();
+ rcu_read_unlock();
goto out;
}

- write_lock(&idev->lock);
+ write_lock_bh(&idev->lock);

/* Add to inet6_dev unicast addr list. */
ipv6_link_dev_addr(idev, ifa);
@@ -1157,9 +1159,9 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg,
}

in6_ifa_hold(ifa);
- write_unlock(&idev->lock);
+ write_unlock_bh(&idev->lock);

- rcu_read_unlock_bh();
+ rcu_read_unlock();

inet6addr_notifier_call_chain(NETDEV_UP, ifa);
out:
@@ -2731,6 +2733,9 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
return;
}

+ if (valid_lft != 0 && valid_lft < in6_dev->cnf.accept_ra_min_lft)
+ goto put;
+
/*
* Two things going on here:
* 1) Add routes for on-link prefixes
@@ -5601,6 +5606,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_IOAM6_ID_WIDE] = cnf->ioam6_id_wide;
array[DEVCONF_NDISC_EVICT_NOCARRIER] = cnf->ndisc_evict_nocarrier;
array[DEVCONF_ACCEPT_UNTRACKED_NA] = cnf->accept_untracked_na;
+ array[DEVCONF_ACCEPT_RA_MIN_LFT] = cnf->accept_ra_min_lft;
}

static inline size_t inet6_ifla6_size(void)
@@ -6794,6 +6800,13 @@ static const struct ctl_table addrconf_sysctl[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "accept_ra_min_lft",
+ .data = &ipv6_devconf.accept_ra_min_lft,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
{
.procname = "accept_ra_pinfo",
.data = &ipv6_devconf.accept_ra_pinfo,
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 413f66781e50..eb6640f9a792 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -2492,7 +2492,7 @@ static int ipv6_route_native_seq_show(struct seq_file *seq, void *v)
const struct net_device *dev;

if (rt->nh)
- fib6_nh = nexthop_fib6_nh_bh(rt->nh);
+ fib6_nh = nexthop_fib6_nh(rt->nh);

seq_printf(seq, "%pi6 %02x ", &rt->fib6_dst.addr, rt->fib6_dst.plen);

@@ -2557,14 +2557,14 @@ static struct fib6_table *ipv6_route_seq_next_table(struct fib6_table *tbl,

if (tbl) {
h = (tbl->tb6_id & (FIB6_TABLE_HASHSZ - 1)) + 1;
- node = rcu_dereference_bh(hlist_next_rcu(&tbl->tb6_hlist));
+ node = rcu_dereference(hlist_next_rcu(&tbl->tb6_hlist));
} else {
h = 0;
node = NULL;
}

while (!node && h < FIB6_TABLE_HASHSZ) {
- node = rcu_dereference_bh(
+ node = rcu_dereference(
hlist_first_rcu(&net->ipv6.fib_table_hash[h++]));
}
return hlist_entry_safe(node, struct fib6_table, tb6_hlist);
@@ -2594,7 +2594,7 @@ static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
if (!v)
goto iter_table;

- n = rcu_dereference_bh(((struct fib6_info *)v)->fib6_next);
+ n = rcu_dereference(((struct fib6_info *)v)->fib6_next);
if (n)
return n;

@@ -2620,12 +2620,12 @@ static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
}

static void *ipv6_route_seq_start(struct seq_file *seq, loff_t *pos)
- __acquires(RCU_BH)
+ __acquires(RCU)
{
struct net *net = seq_file_net(seq);
struct ipv6_route_iter *iter = seq->private;

- rcu_read_lock_bh();
+ rcu_read_lock();
iter->tbl = ipv6_route_seq_next_table(NULL, net);
iter->skip = *pos;

@@ -2646,7 +2646,7 @@ static bool ipv6_route_iter_active(struct ipv6_route_iter *iter)
}

static void ipv6_route_native_seq_stop(struct seq_file *seq, void *v)
- __releases(RCU_BH)
+ __releases(RCU)
{
struct net *net = seq_file_net(seq);
struct ipv6_route_iter *iter = seq->private;
@@ -2654,7 +2654,7 @@ static void ipv6_route_native_seq_stop(struct seq_file *seq, void *v)
if (ipv6_route_iter_active(iter))
fib6_walker_unlink(net, &iter->w);

- rcu_read_unlock_bh();
+ rcu_read_unlock();
}

#if IS_BUILTIN(CONFIG_IPV6) && defined(CONFIG_BPF_SYSCALL)
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 34192f7a166f..ce2c5e728745 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -116,7 +116,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
return res;
}

- rcu_read_lock_bh();
+ rcu_read_lock();
nexthop = rt6_nexthop((struct rt6_info *)dst, daddr);
neigh = __ipv6_neigh_lookup_noref(dev, nexthop);

@@ -124,7 +124,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
if (unlikely(!neigh))
neigh = __neigh_create(&nd_tbl, nexthop, dev, false);
if (IS_ERR(neigh)) {
- rcu_read_unlock_bh();
+ rcu_read_unlock();
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES);
kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_CREATEFAIL);
return -EINVAL;
@@ -132,7 +132,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
}
sock_confirm_neigh(skb, neigh);
ret = neigh_output(neigh, skb, false);
- rcu_read_unlock_bh();
+ rcu_read_unlock();
return ret;
}

@@ -1150,11 +1150,11 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
* dst entry of the nexthop router
*/
rt = (struct rt6_info *) *dst;
- rcu_read_lock_bh();
+ rcu_read_lock();
n = __ipv6_neigh_lookup_noref(rt->dst.dev,
rt6_nexthop(rt, &fl6->daddr));
- err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
- rcu_read_unlock_bh();
+ err = n && !(READ_ONCE(n->nud_state) & NUD_VALID) ? -EINVAL : 0;
+ rcu_read_unlock();

if (err) {
struct inet6_ifaddr *ifp;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index a4d43eb45a9d..8c5a99fe6803 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -746,7 +746,7 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
saddr = &ipv6_hdr(skb)->saddr;
probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES);
if (probes < 0) {
- if (!(neigh->nud_state & NUD_VALID)) {
+ if (!(READ_ONCE(neigh->nud_state) & NUD_VALID)) {
ND_PRINTK(1, dbg,
"%s: trying to ucast probe in NUD_INVALID: %pI6\n",
__func__, target);
@@ -1092,7 +1092,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
u8 old_flags = neigh->flags;
struct net *net = dev_net(dev);

- if (neigh->nud_state & NUD_FAILED)
+ if (READ_ONCE(neigh->nud_state) & NUD_FAILED)
goto out;

/*
@@ -1331,6 +1331,14 @@ static void ndisc_router_discovery(struct sk_buff *skb)
goto skip_defrtr;
}

+ lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime);
+ if (lifetime != 0 && lifetime < in6_dev->cnf.accept_ra_min_lft) {
+ ND_PRINTK(2, info,
+ "RA: router lifetime (%ds) is too short: %s\n",
+ lifetime, skb->dev->name);
+ goto skip_defrtr;
+ }
+
/* Do not accept RA with source-addr found on local machine unless
* accept_ra_from_local is set to true.
*/
@@ -1343,8 +1351,6 @@ static void ndisc_router_discovery(struct sk_buff *skb)
goto skip_defrtr;
}

- lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime);
-
#ifdef CONFIG_IPV6_ROUTER_PREF
pref = ra_msg->icmph.icmp6_router_pref;
/* 10b is handled as if it were 00b (medium) */
@@ -1519,6 +1525,9 @@ static void ndisc_router_discovery(struct sk_buff *skb)
if (ri->prefix_len == 0 &&
!in6_dev->cnf.accept_ra_defrtr)
continue;
+ if (ri->lifetime != 0 &&
+ ntohl(ri->lifetime) < in6_dev->cnf.accept_ra_min_lft)
+ continue;
if (ri->prefix_len < in6_dev->cnf.accept_ra_rt_info_min_plen)
continue;
if (ri->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 93957b20fccc..0bcdb675ba2c 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -636,15 +636,15 @@ static void rt6_probe(struct fib6_nh *fib6_nh)

nh_gw = &fib6_nh->fib_nh_gw6;
dev = fib6_nh->fib_nh_dev;
- rcu_read_lock_bh();
+ rcu_read_lock();
last_probe = READ_ONCE(fib6_nh->last_probe);
idev = __in6_dev_get(dev);
neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
if (neigh) {
- if (neigh->nud_state & NUD_VALID)
+ if (READ_ONCE(neigh->nud_state) & NUD_VALID)
goto out;

- write_lock(&neigh->lock);
+ write_lock_bh(&neigh->lock);
if (!(neigh->nud_state & NUD_VALID) &&
time_after(jiffies,
neigh->updated + idev->cnf.rtr_probe_interval)) {
@@ -652,7 +652,7 @@ static void rt6_probe(struct fib6_nh *fib6_nh)
if (work)
__neigh_set_probe_once(neigh);
}
- write_unlock(&neigh->lock);
+ write_unlock_bh(&neigh->lock);
} else if (time_after(jiffies, last_probe +
idev->cnf.rtr_probe_interval)) {
work = kmalloc(sizeof(*work), GFP_ATOMIC);
@@ -670,7 +670,7 @@ static void rt6_probe(struct fib6_nh *fib6_nh)
}

out:
- rcu_read_unlock_bh();
+ rcu_read_unlock();
}
#else
static inline void rt6_probe(struct fib6_nh *fib6_nh)
@@ -686,25 +686,25 @@ static enum rt6_nud_state rt6_check_neigh(const struct fib6_nh *fib6_nh)
enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
struct neighbour *neigh;

- rcu_read_lock_bh();
+ rcu_read_lock();
neigh = __ipv6_neigh_lookup_noref(fib6_nh->fib_nh_dev,
&fib6_nh->fib_nh_gw6);
if (neigh) {
- read_lock(&neigh->lock);
- if (neigh->nud_state & NUD_VALID)
+ u8 nud_state = READ_ONCE(neigh->nud_state);
+
+ if (nud_state & NUD_VALID)
ret = RT6_NUD_SUCCEED;
#ifdef CONFIG_IPV6_ROUTER_PREF
- else if (!(neigh->nud_state & NUD_FAILED))
+ else if (!(nud_state & NUD_FAILED))
ret = RT6_NUD_SUCCEED;
else
ret = RT6_NUD_FAIL_PROBE;
#endif
- read_unlock(&neigh->lock);
} else {
ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
}
- rcu_read_unlock_bh();
+ rcu_read_unlock();

return ret;
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 4bdd356bb5c4..7be89dcfd5fc 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1644,9 +1644,12 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
struct sock *nsk;

sk = req->rsk_listener;
- drop_reason = tcp_inbound_md5_hash(sk, skb,
- &hdr->saddr, &hdr->daddr,
- AF_INET6, dif, sdif);
+ if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
+ drop_reason = SKB_DROP_REASON_XFRM_POLICY;
+ else
+ drop_reason = tcp_inbound_md5_hash(sk, skb,
+ &hdr->saddr, &hdr->daddr,
+ AF_INET6, dif, sdif);
if (drop_reason) {
sk_drops_add(sk, skb);
reqsk_put(req);
@@ -1693,6 +1696,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
}
goto discard_and_relse;
}
+ nf_reset_ct(skb);
if (nsk == sk) {
reqsk_put(req);
tcp_v6_restore_cb(skb);
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index bce4132b0a5c..314ec3a51e8d 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -510,7 +510,6 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
*/
if (len > INT_MAX - transhdrlen)
return -EMSGSIZE;
- ulen = len + transhdrlen;

/* Mirror BSD error message compatibility */
if (msg->msg_flags & MSG_OOB)
@@ -631,6 +630,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)

back_from_confirm:
lock_sock(sk);
+ ulen = len + skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0;
err = ip6_append_data(sk, ip_generic_getfrag, msg,
ulen, transhdrlen, &ipc6,
&fl6, (struct rt6_info *)dst,
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index cf3453b532d6..0167413d5697 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -566,6 +566,9 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
}

err = ieee80211_key_link(key, link, sta);
+ /* KRACK protection, shouldn't happen but just silently accept key */
+ if (err == -EALREADY)
+ err = 0;

out_unlock:
mutex_unlock(&local->sta_mtx);
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index e8f6c1e5eabf..23bb24243c6e 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -901,7 +901,7 @@ int ieee80211_key_link(struct ieee80211_key *key,
*/
if (ieee80211_key_identical(sdata, old_key, key)) {
ieee80211_key_free_unused(key);
- ret = 0;
+ ret = -EALREADY;
goto out;
}

diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 9127a7fd5269..5d845fcf3d09 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -2047,7 +2047,7 @@ static int mptcp_event_put_token_and_ssk(struct sk_buff *skb,
nla_put_s32(skb, MPTCP_ATTR_IF_IDX, ssk->sk_bound_dev_if))
return -EMSGSIZE;

- sk_err = ssk->sk_err;
+ sk_err = READ_ONCE(ssk->sk_err);
if (sk_err && sk->sk_state == TCP_ESTABLISHED &&
nla_put_u8(skb, MPTCP_ATTR_ERROR, sk_err))
return -EMSGSIZE;
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
index 8a2aa63caa51..38cbdc66d8bf 100644
--- a/net/mptcp/pm_userspace.c
+++ b/net/mptcp/pm_userspace.c
@@ -309,12 +309,6 @@ int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info)
goto create_err;
}

- if (addr_l.id == 0) {
- NL_SET_ERR_MSG_ATTR(info->extack, laddr, "missing local addr id");
- err = -EINVAL;
- goto create_err;
- }
-
err = mptcp_pm_parse_addr(raddr, info, &addr_r);
if (err < 0) {
NL_SET_ERR_MSG_ATTR(info->extack, raddr, "error parsing remote addr");
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 6dd880d6b051..b6e0579e7264 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -401,7 +401,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
return false;
}

-static void mptcp_stop_timer(struct sock *sk)
+static void mptcp_stop_rtx_timer(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);

@@ -765,6 +765,46 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk)
return moved;
}

+static bool __mptcp_subflow_error_report(struct sock *sk, struct sock *ssk)
+{
+ int err = sock_error(ssk);
+ int ssk_state;
+
+ if (!err)
+ return false;
+
+ /* only propagate errors on fallen-back sockets or
+ * on MPC connect
+ */
+ if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(mptcp_sk(sk)))
+ return false;
+
+ /* We need to propagate only transition to CLOSE state.
+ * Orphaned socket will see such state change via
+ * subflow_sched_work_if_closed() and that path will properly
+ * destroy the msk as needed.
+ */
+ ssk_state = inet_sk_state_load(ssk);
+ if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD))
+ inet_sk_state_store(sk, ssk_state);
+ WRITE_ONCE(sk->sk_err, -err);
+
+ /* This barrier is coupled with smp_rmb() in mptcp_poll() */
+ smp_wmb();
+ sk_error_report(sk);
+ return true;
+}
+
+void __mptcp_error_report(struct sock *sk)
+{
+ struct mptcp_subflow_context *subflow;
+ struct mptcp_sock *msk = mptcp_sk(sk);
+
+ mptcp_for_each_subflow(msk, subflow)
+ if (__mptcp_subflow_error_report(sk, mptcp_subflow_tcp_sock(subflow)))
+ break;
+}
+
/* In most cases we will be able to lock the mptcp socket. If its already
* owned, we need to defer to the work queue to avoid ABBA deadlock.
*/
@@ -846,6 +886,7 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)

mptcp_sockopt_sync_locked(msk, ssk);
mptcp_subflow_joined(msk, ssk);
+ mptcp_stop_tout_timer(sk);
return true;
}

@@ -865,12 +906,12 @@ static void __mptcp_flush_join_list(struct sock *sk, struct list_head *join_list
}
}

-static bool mptcp_timer_pending(struct sock *sk)
+static bool mptcp_rtx_timer_pending(struct sock *sk)
{
return timer_pending(&inet_csk(sk)->icsk_retransmit_timer);
}

-static void mptcp_reset_timer(struct sock *sk)
+static void mptcp_reset_rtx_timer(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
unsigned long tout;
@@ -1054,10 +1095,10 @@ static void __mptcp_clean_una(struct sock *sk)
out:
if (snd_una == READ_ONCE(msk->snd_nxt) &&
snd_una == READ_ONCE(msk->write_seq)) {
- if (mptcp_timer_pending(sk) && !mptcp_data_fin_enabled(msk))
- mptcp_stop_timer(sk);
+ if (mptcp_rtx_timer_pending(sk) && !mptcp_data_fin_enabled(msk))
+ mptcp_stop_rtx_timer(sk);
} else {
- mptcp_reset_timer(sk);
+ mptcp_reset_rtx_timer(sk);
}
}

@@ -1606,8 +1647,8 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)

out:
/* ensure the rtx timer is running */
- if (!mptcp_timer_pending(sk))
- mptcp_reset_timer(sk);
+ if (!mptcp_rtx_timer_pending(sk))
+ mptcp_reset_rtx_timer(sk);
if (do_check_data_fin)
mptcp_check_send_data_fin(sk);
}
@@ -1665,8 +1706,8 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
if (copied) {
tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
info.size_goal);
- if (!mptcp_timer_pending(sk))
- mptcp_reset_timer(sk);
+ if (!mptcp_rtx_timer_pending(sk))
+ mptcp_reset_rtx_timer(sk);

if (msk->snd_data_fin_enable &&
msk->snd_nxt + 1 == msk->write_seq)
@@ -2227,7 +2268,7 @@ static void mptcp_retransmit_timer(struct timer_list *t)
sock_put(sk);
}

-static void mptcp_timeout_timer(struct timer_list *t)
+static void mptcp_tout_timer(struct timer_list *t)
{
struct sock *sk = from_timer(sk, t, sk_timer);

@@ -2349,18 +2390,14 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
bool dispose_it, need_push = false;

/* If the first subflow moved to a close state before accept, e.g. due
- * to an incoming reset, mptcp either:
- * - if either the subflow or the msk are dead, destroy the context
- * (the subflow socket is deleted by inet_child_forget) and the msk
- * - otherwise do nothing at the moment and take action at accept and/or
- * listener shutdown - user-space must be able to accept() the closed
- * socket.
+ * to an incoming reset or listener shutdown, the subflow socket is
+ * already deleted by inet_child_forget() and the mptcp socket can't
+ * survive too.
*/
- if (msk->in_accept_queue && msk->first == ssk) {
- if (!sock_flag(sk, SOCK_DEAD) && !sock_flag(ssk, SOCK_DEAD))
- return;
-
+ if (msk->in_accept_queue && msk->first == ssk &&
+ (sock_flag(sk, SOCK_DEAD) || sock_flag(ssk, SOCK_DEAD))) {
/* ensure later check in mptcp_worker() will dispose the msk */
+ mptcp_set_close_tout(sk, tcp_jiffies32 - (TCP_TIMEWAIT_LEN + 1));
sock_set_flag(sk, SOCK_DEAD);
lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
mptcp_subflow_drop_ctx(ssk);
@@ -2413,6 +2450,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
}

out_release:
+ __mptcp_subflow_error_report(sk, ssk);
release_sock(ssk);

sock_put(ssk);
@@ -2426,6 +2464,22 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,

if (need_push)
__mptcp_push_pending(sk, 0);
+
+ /* Catch every 'all subflows closed' scenario, including peers silently
+ * closing them, e.g. due to timeout.
+ * For established sockets, allow an additional timeout before closing,
+ * as the protocol can still create more subflows.
+ */
+ if (list_is_singular(&msk->conn_list) && msk->first &&
+ inet_sk_state_load(msk->first) == TCP_CLOSE) {
+ if (sk->sk_state != TCP_ESTABLISHED ||
+ msk->in_accept_queue || sock_flag(sk, SOCK_DEAD)) {
+ inet_sk_state_store(sk, TCP_CLOSE);
+ mptcp_close_wake_up(sk);
+ } else {
+ mptcp_start_tout_timer(sk);
+ }
+ }
}

void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
@@ -2469,23 +2523,14 @@ static void __mptcp_close_subflow(struct sock *sk)

}

-static bool mptcp_should_close(const struct sock *sk)
+static bool mptcp_close_tout_expired(const struct sock *sk)
{
- s32 delta = tcp_jiffies32 - inet_csk(sk)->icsk_mtup.probe_timestamp;
- struct mptcp_subflow_context *subflow;
-
- if (delta >= TCP_TIMEWAIT_LEN || mptcp_sk(sk)->in_accept_queue)
- return true;
+ if (!inet_csk(sk)->icsk_mtup.probe_timestamp ||
+ sk->sk_state == TCP_CLOSE)
+ return false;

- /* if all subflows are in closed status don't bother with additional
- * timeout
- */
- mptcp_for_each_subflow(mptcp_sk(sk), subflow) {
- if (inet_sk_state_load(mptcp_subflow_tcp_sock(subflow)) !=
- TCP_CLOSE)
- return false;
- }
- return true;
+ return time_after32(tcp_jiffies32,
+ inet_csk(sk)->icsk_mtup.probe_timestamp + TCP_TIMEWAIT_LEN);
}

static void mptcp_check_fastclose(struct mptcp_sock *msk)
@@ -2513,15 +2558,15 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
/* Mirror the tcp_reset() error propagation */
switch (sk->sk_state) {
case TCP_SYN_SENT:
- sk->sk_err = ECONNREFUSED;
+ WRITE_ONCE(sk->sk_err, ECONNREFUSED);
break;
case TCP_CLOSE_WAIT:
- sk->sk_err = EPIPE;
+ WRITE_ONCE(sk->sk_err, EPIPE);
break;
case TCP_CLOSE:
return;
default:
- sk->sk_err = ECONNRESET;
+ WRITE_ONCE(sk->sk_err, ECONNRESET);
}

inet_sk_state_store(sk, TCP_CLOSE);
@@ -2597,27 +2642,28 @@ static void __mptcp_retrans(struct sock *sk)
reset_timer:
mptcp_check_and_set_pending(sk);

- if (!mptcp_timer_pending(sk))
- mptcp_reset_timer(sk);
+ if (!mptcp_rtx_timer_pending(sk))
+ mptcp_reset_rtx_timer(sk);
}

/* schedule the timeout timer for the relevant event: either close timeout
* or mp_fail timeout. The close timeout takes precedence on the mp_fail one
*/
-void mptcp_reset_timeout(struct mptcp_sock *msk, unsigned long fail_tout)
+void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout)
{
struct sock *sk = (struct sock *)msk;
unsigned long timeout, close_timeout;

- if (!fail_tout && !sock_flag(sk, SOCK_DEAD))
+ if (!fail_tout && !inet_csk(sk)->icsk_mtup.probe_timestamp)
return;

- close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies + TCP_TIMEWAIT_LEN;
+ close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies +
+ TCP_TIMEWAIT_LEN;

/* the close timeout takes precedence on the fail one, and here at least one of
* them is active
*/
- timeout = sock_flag(sk, SOCK_DEAD) ? close_timeout : fail_tout;
+ timeout = inet_csk(sk)->icsk_mtup.probe_timestamp ? close_timeout : fail_tout;

sk_reset_timer(sk, &sk->sk_timer, timeout);
}
@@ -2636,8 +2682,6 @@ static void mptcp_mp_fail_no_response(struct mptcp_sock *msk)
mptcp_subflow_reset(ssk);
WRITE_ONCE(mptcp_subflow_ctx(ssk)->fail_tout, 0);
unlock_sock_fast(ssk, slow);
-
- mptcp_reset_timeout(msk, 0);
}

static void mptcp_do_fastclose(struct sock *sk)
@@ -2676,19 +2720,15 @@ static void mptcp_worker(struct work_struct *work)
if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
__mptcp_close_subflow(sk);

- /* There is no point in keeping around an orphaned sk timedout or
- * closed, but we need the msk around to reply to incoming DATA_FIN,
- * even if it is orphaned and in FIN_WAIT2 state
- */
- if (sock_flag(sk, SOCK_DEAD)) {
- if (mptcp_should_close(sk)) {
- inet_sk_state_store(sk, TCP_CLOSE);
- mptcp_do_fastclose(sk);
- }
- if (sk->sk_state == TCP_CLOSE) {
- __mptcp_destroy_sock(sk);
- goto unlock;
- }
+ if (mptcp_close_tout_expired(sk)) {
+ inet_sk_state_store(sk, TCP_CLOSE);
+ mptcp_do_fastclose(sk);
+ mptcp_close_wake_up(sk);
+ }
+
+ if (sock_flag(sk, SOCK_DEAD) && sk->sk_state == TCP_CLOSE) {
+ __mptcp_destroy_sock(sk);
+ goto unlock;
}

if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
@@ -2728,7 +2768,7 @@ static int __mptcp_init_sock(struct sock *sk)

/* re-use the csk retrans timer for MPTCP-level retrans */
timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0);
- timer_setup(&sk->sk_timer, mptcp_timeout_timer, 0);
+ timer_setup(&sk->sk_timer, mptcp_tout_timer, 0);

return 0;
}
@@ -2820,8 +2860,8 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how)
} else {
pr_debug("Sending DATA_FIN on subflow %p", ssk);
tcp_send_ack(ssk);
- if (!mptcp_timer_pending(sk))
- mptcp_reset_timer(sk);
+ if (!mptcp_rtx_timer_pending(sk))
+ mptcp_reset_rtx_timer(sk);
}
break;
}
@@ -2904,7 +2944,7 @@ static void __mptcp_destroy_sock(struct sock *sk)

might_sleep();

- mptcp_stop_timer(sk);
+ mptcp_stop_rtx_timer(sk);
sk_stop_timer(sk, &sk->sk_timer);
msk->pm.status = 0;

@@ -2984,7 +3024,6 @@ bool __mptcp_close(struct sock *sk, long timeout)

cleanup:
/* orphan all the subflows */
- inet_csk(sk)->icsk_mtup.probe_timestamp = tcp_jiffies32;
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
bool slow = lock_sock_fast_nested(ssk);
@@ -3021,7 +3060,7 @@ bool __mptcp_close(struct sock *sk, long timeout)
__mptcp_destroy_sock(sk);
do_cancel_work = true;
} else {
- mptcp_reset_timeout(msk, 0);
+ mptcp_start_tout_timer(sk);
}

return do_cancel_work;
@@ -3084,8 +3123,8 @@ static int mptcp_disconnect(struct sock *sk, int flags)
mptcp_check_listen_stop(sk);
inet_sk_state_store(sk, TCP_CLOSE);

- mptcp_stop_timer(sk);
- sk_stop_timer(sk, &sk->sk_timer);
+ mptcp_stop_rtx_timer(sk);
+ mptcp_stop_tout_timer(sk);

if (mptcp_sk(sk)->token)
mptcp_event(MPTCP_EVENT_CLOSED, mptcp_sk(sk), NULL, GFP_KERNEL);
@@ -3895,7 +3934,7 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,

/* This barrier is coupled with smp_wmb() in __mptcp_error_report() */
smp_rmb();
- if (sk->sk_err)
+ if (READ_ONCE(sk->sk_err))
mask |= EPOLLERR;

return mask;
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index d77b25636125..91d89a0aeb58 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -681,7 +681,29 @@ void mptcp_get_options(const struct sk_buff *skb,

void mptcp_finish_connect(struct sock *sk);
void __mptcp_set_connected(struct sock *sk);
-void mptcp_reset_timeout(struct mptcp_sock *msk, unsigned long fail_tout);
+void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout);
+
+static inline void mptcp_stop_tout_timer(struct sock *sk)
+{
+ if (!inet_csk(sk)->icsk_mtup.probe_timestamp)
+ return;
+
+ sk_stop_timer(sk, &sk->sk_timer);
+ inet_csk(sk)->icsk_mtup.probe_timestamp = 0;
+}
+
+static inline void mptcp_set_close_tout(struct sock *sk, unsigned long tout)
+{
+ /* avoid 0 timestamp, as that means no close timeout */
+ inet_csk(sk)->icsk_mtup.probe_timestamp = tout ? : 1;
+}
+
+static inline void mptcp_start_tout_timer(struct sock *sk)
+{
+ mptcp_set_close_tout(sk, tcp_jiffies32);
+ mptcp_reset_tout_timer(mptcp_sk(sk), 0);
+}
+
static inline bool mptcp_is_fully_established(struct sock *sk)
{
return inet_sk_state_load(sk) == TCP_ESTABLISHED &&
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 52a747a80e88..b93b08a75017 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1161,7 +1161,7 @@ static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk)
WRITE_ONCE(subflow->fail_tout, fail_tout);
tcp_send_ack(ssk);

- mptcp_reset_timeout(msk, subflow->fail_tout);
+ mptcp_reset_tout_timer(msk, subflow->fail_tout);
}

static bool subflow_check_data_avail(struct sock *ssk)
@@ -1248,7 +1248,7 @@ static bool subflow_check_data_avail(struct sock *ssk)
subflow->reset_reason = MPTCP_RST_EMPTCP;

reset:
- ssk->sk_err = EBADMSG;
+ WRITE_ONCE(ssk->sk_err, EBADMSG);
tcp_set_state(ssk, TCP_CLOSE);
while ((skb = skb_peek(&ssk->sk_receive_queue)))
sk_eat_skb(ssk, skb);
@@ -1305,42 +1305,6 @@ void mptcp_space(const struct sock *ssk, int *space, int *full_space)
*full_space = tcp_full_space(sk);
}

-void __mptcp_error_report(struct sock *sk)
-{
- struct mptcp_subflow_context *subflow;
- struct mptcp_sock *msk = mptcp_sk(sk);
-
- mptcp_for_each_subflow(msk, subflow) {
- struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
- int err = sock_error(ssk);
- int ssk_state;
-
- if (!err)
- continue;
-
- /* only propagate errors on fallen-back sockets or
- * on MPC connect
- */
- if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk))
- continue;
-
- /* We need to propagate only transition to CLOSE state.
- * Orphaned socket will see such state change via
- * subflow_sched_work_if_closed() and that path will properly
- * destroy the msk as needed.
- */
- ssk_state = inet_sk_state_load(ssk);
- if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD))
- inet_sk_state_store(sk, ssk_state);
- sk->sk_err = -err;
-
- /* This barrier is coupled with smp_rmb() in mptcp_poll() */
- smp_wmb();
- sk_error_report(sk);
- break;
- }
-}
-
static void subflow_error_report(struct sock *ssk)
{
struct sock *sk = mptcp_subflow_ctx(ssk)->conn;
@@ -1527,6 +1491,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
mptcp_sock_graft(ssk, sk->sk_socket);
iput(SOCK_INODE(sf));
WRITE_ONCE(msk->allow_infinite_fallback, false);
+ mptcp_stop_tout_timer(sk);
return 0;

failed_unlink:
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index d4fe7bb4f853..6574f4e651b1 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1507,8 +1507,8 @@ static int make_send_sock(struct netns_ipvs *ipvs, int id,
}

get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->mcfg, id);
- result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr,
- salen, 0);
+ result = kernel_connect(sock, (struct sockaddr *)&mcast_addr,
+ salen, 0);
if (result < 0) {
pr_err("Error connecting to the multicast addr\n");
goto error;
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 7247af51bdfc..c94a9971d790 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -112,7 +112,7 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = {
/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA, sSA},
/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},/* Can't have Stale cookie*/
/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA, sCL},/* 5.2.4 - Big TODO */
-/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},/* Can't come in orig dir */
+/* cookie_ack */ {sCL, sCL, sCW, sES, sES, sSS, sSR, sSA, sCL},/* Can't come in orig dir */
/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL, sCL},
/* heartbeat */ {sHS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS},
/* heartbeat_ack*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS},
@@ -126,7 +126,7 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = {
/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA, sIV},
/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA, sIV},
/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA, sIV},
-/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV},/* Can't come in reply dir */
+/* cookie_echo */ {sIV, sCL, sCE, sCE, sES, sSS, sSR, sSA, sIV},/* Can't come in reply dir */
/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA, sIV},
/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL, sIV},
/* heartbeat */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS},
@@ -426,6 +426,9 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
/* (D) vtag must be same as init_vtag as found in INIT_ACK */
if (sh->vtag != ct->proto.sctp.vtag[dir])
goto out_unlock;
+ } else if (sch->type == SCTP_CID_COOKIE_ACK) {
+ ct->proto.sctp.init[dir] = 0;
+ ct->proto.sctp.init[!dir] = 0;
} else if (sch->type == SCTP_CID_HEARTBEAT) {
if (ct->proto.sctp.vtag[dir] == 0) {
pr_debug("Setting %d vtag %x for dir %d\n", sch->type, sh->vtag, dir);
@@ -474,16 +477,18 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
}

/* If it is an INIT or an INIT ACK note down the vtag */
- if (sch->type == SCTP_CID_INIT ||
- sch->type == SCTP_CID_INIT_ACK) {
- struct sctp_inithdr _inithdr, *ih;
+ if (sch->type == SCTP_CID_INIT) {
+ struct sctp_inithdr _ih, *ih;

- ih = skb_header_pointer(skb, offset + sizeof(_sch),
- sizeof(_inithdr), &_inithdr);
- if (ih == NULL)
+ ih = skb_header_pointer(skb, offset + sizeof(_sch), sizeof(*ih), &_ih);
+ if (!ih)
goto out_unlock;
- pr_debug("Setting vtag %x for dir %d\n",
- ih->init_tag, !dir);
+
+ if (ct->proto.sctp.init[dir] && ct->proto.sctp.init[!dir])
+ ct->proto.sctp.init[!dir] = 0;
+ ct->proto.sctp.init[dir] = 1;
+
+ pr_debug("Setting vtag %x for dir %d\n", ih->init_tag, !dir);
ct->proto.sctp.vtag[!dir] = ih->init_tag;

/* don't renew timeout on init retransmit so
@@ -494,6 +499,24 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
old_state == SCTP_CONNTRACK_CLOSED &&
nf_ct_is_confirmed(ct))
ignore = true;
+ } else if (sch->type == SCTP_CID_INIT_ACK) {
+ struct sctp_inithdr _ih, *ih;
+ __be32 vtag;
+
+ ih = skb_header_pointer(skb, offset + sizeof(_sch), sizeof(*ih), &_ih);
+ if (!ih)
+ goto out_unlock;
+
+ vtag = ct->proto.sctp.vtag[!dir];
+ if (!ct->proto.sctp.init[!dir] && vtag && vtag != ih->init_tag)
+ goto out_unlock;
+ /* collision */
+ if (ct->proto.sctp.init[dir] && ct->proto.sctp.init[!dir] &&
+ vtag != ih->init_tag)
+ goto out_unlock;
+
+ pr_debug("Setting vtag %x for dir %d\n", ih->init_tag, !dir);
+ ct->proto.sctp.vtag[!dir] = ih->init_tag;
}

ct->proto.sctp.state = new_state;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 52b81dc1fcf5..5e3dbe2652db 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -7576,24 +7576,14 @@ static int nf_tables_delobj(struct sk_buff *skb, const struct nfnl_info *info,
return nft_delobj(&ctx, obj);
}

-void nft_obj_notify(struct net *net, const struct nft_table *table,
- struct nft_object *obj, u32 portid, u32 seq, int event,
- u16 flags, int family, int report, gfp_t gfp)
+static void
+__nft_obj_notify(struct net *net, const struct nft_table *table,
+ struct nft_object *obj, u32 portid, u32 seq, int event,
+ u16 flags, int family, int report, gfp_t gfp)
{
struct nftables_pernet *nft_net = nft_pernet(net);
struct sk_buff *skb;
int err;
- char *buf = kasprintf(gfp, "%s:%u",
- table->name, nft_net->base_seq);
-
- audit_log_nfcfg(buf,
- family,
- obj->handle,
- event == NFT_MSG_NEWOBJ ?
- AUDIT_NFT_OP_OBJ_REGISTER :
- AUDIT_NFT_OP_OBJ_UNREGISTER,
- gfp);
- kfree(buf);

if (!report &&
!nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
@@ -7616,13 +7606,35 @@ void nft_obj_notify(struct net *net, const struct nft_table *table,
err:
nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS);
}
+
+void nft_obj_notify(struct net *net, const struct nft_table *table,
+ struct nft_object *obj, u32 portid, u32 seq, int event,
+ u16 flags, int family, int report, gfp_t gfp)
+{
+ struct nftables_pernet *nft_net = nft_pernet(net);
+ char *buf = kasprintf(gfp, "%s:%u",
+ table->name, nft_net->base_seq);
+
+ audit_log_nfcfg(buf,
+ family,
+ obj->handle,
+ event == NFT_MSG_NEWOBJ ?
+ AUDIT_NFT_OP_OBJ_REGISTER :
+ AUDIT_NFT_OP_OBJ_UNREGISTER,
+ gfp);
+ kfree(buf);
+
+ __nft_obj_notify(net, table, obj, portid, seq, event,
+ flags, family, report, gfp);
+}
EXPORT_SYMBOL_GPL(nft_obj_notify);

static void nf_tables_obj_notify(const struct nft_ctx *ctx,
struct nft_object *obj, int event)
{
- nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid, ctx->seq, event,
- ctx->flags, ctx->family, ctx->report, GFP_KERNEL);
+ __nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid,
+ ctx->seq, event, ctx->flags, ctx->family,
+ ctx->report, GFP_KERNEL);
}

/*
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 487572dcd614..2660ceab3759 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -233,10 +233,9 @@ static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set,
rb_erase(&rbe->node, &priv->root);
}

-static int nft_rbtree_gc_elem(const struct nft_set *__set,
- struct nft_rbtree *priv,
- struct nft_rbtree_elem *rbe,
- u8 genmask)
+static const struct nft_rbtree_elem *
+nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv,
+ struct nft_rbtree_elem *rbe, u8 genmask)
{
struct nft_set *set = (struct nft_set *)__set;
struct rb_node *prev = rb_prev(&rbe->node);
@@ -246,7 +245,7 @@ static int nft_rbtree_gc_elem(const struct nft_set *__set,

gc = nft_trans_gc_alloc(set, 0, GFP_ATOMIC);
if (!gc)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);

/* search for end interval coming before this element.
* end intervals don't carry a timeout extension, they
@@ -261,6 +260,7 @@ static int nft_rbtree_gc_elem(const struct nft_set *__set,
prev = rb_prev(prev);
}

+ rbe_prev = NULL;
if (prev) {
rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
nft_rbtree_gc_remove(net, set, priv, rbe_prev);
@@ -272,7 +272,7 @@ static int nft_rbtree_gc_elem(const struct nft_set *__set,
*/
gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
if (WARN_ON_ONCE(!gc))
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);

nft_trans_gc_elem_add(gc, rbe_prev);
}
@@ -280,13 +280,13 @@ static int nft_rbtree_gc_elem(const struct nft_set *__set,
nft_rbtree_gc_remove(net, set, priv, rbe);
gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
if (WARN_ON_ONCE(!gc))
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);

nft_trans_gc_elem_add(gc, rbe);

nft_trans_gc_queue_sync_done(gc);

- return 0;
+ return rbe_prev;
}

static bool nft_rbtree_update_first(const struct nft_set *set,
@@ -314,7 +314,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
struct nft_rbtree *priv = nft_set_priv(set);
u8 cur_genmask = nft_genmask_cur(net);
u8 genmask = nft_genmask_next(net);
- int d, err;
+ int d;

/* Descend the tree to search for an existing element greater than the
* key value to insert that is greater than the new element. This is the
@@ -363,9 +363,14 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
*/
if (nft_set_elem_expired(&rbe->ext) &&
nft_set_elem_active(&rbe->ext, cur_genmask)) {
- err = nft_rbtree_gc_elem(set, priv, rbe, genmask);
- if (err < 0)
- return err;
+ const struct nft_rbtree_elem *removed_end;
+
+ removed_end = nft_rbtree_gc_elem(set, priv, rbe, genmask);
+ if (IS_ERR(removed_end))
+ return PTR_ERR(removed_end);
+
+ if (removed_end == rbe_le || removed_end == rbe_ge)
+ return -EAGAIN;

continue;
}
@@ -486,11 +491,18 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
struct nft_rbtree_elem *rbe = elem->priv;
int err;

- write_lock_bh(&priv->lock);
- write_seqcount_begin(&priv->count);
- err = __nft_rbtree_insert(net, set, rbe, ext);
- write_seqcount_end(&priv->count);
- write_unlock_bh(&priv->lock);
+ do {
+ if (fatal_signal_pending(current))
+ return -EINTR;
+
+ cond_resched();
+
+ write_lock_bh(&priv->lock);
+ write_seqcount_begin(&priv->count);
+ err = __nft_rbtree_insert(net, set, rbe, ext);
+ write_seqcount_end(&priv->count);
+ write_unlock_bh(&priv->lock);
+ } while (err == -EAGAIN);

return err;
}
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 387e430a35cc..cb833302270a 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -352,7 +352,7 @@ static void netlink_overrun(struct sock *sk)
if (!nlk_test_bit(RECV_NO_ENOBUFS, sk)) {
if (!test_and_set_bit(NETLINK_S_CONGESTED,
&nlk_sk(sk)->state)) {
- sk->sk_err = ENOBUFS;
+ WRITE_ONCE(sk->sk_err, ENOBUFS);
sk_error_report(sk);
}
}
@@ -1566,7 +1566,7 @@ static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p)
goto out;
}

- sk->sk_err = p->code;
+ WRITE_ONCE(sk->sk_err, p->code);
sk_error_report(sk);
out:
return ret;
@@ -1955,7 +1955,7 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {
ret = netlink_dump(sk);
if (ret) {
- sk->sk_err = -ret;
+ WRITE_ONCE(sk->sk_err, -ret);
sk_error_report(sk);
}
}
@@ -2443,19 +2443,24 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
flags |= NLM_F_ACK_TLVS;

skb = nlmsg_new(payload + tlvlen, GFP_KERNEL);
- if (!skb) {
- NETLINK_CB(in_skb).sk->sk_err = ENOBUFS;
- sk_error_report(NETLINK_CB(in_skb).sk);
- return;
- }
+ if (!skb)
+ goto err_skb;

rep = nlmsg_put(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
- NLMSG_ERROR, payload, flags);
+ NLMSG_ERROR, sizeof(*errmsg), flags);
+ if (!rep)
+ goto err_bad_put;
errmsg = nlmsg_data(rep);
errmsg->error = err;
- unsafe_memcpy(&errmsg->msg, nlh, payload > sizeof(*errmsg)
- ? nlh->nlmsg_len : sizeof(*nlh),
- /* Bounds checked by the skb layer. */);
+ errmsg->msg = *nlh;
+
+ if (!(flags & NLM_F_CAPPED)) {
+ if (!nlmsg_append(skb, nlmsg_len(nlh)))
+ goto err_bad_put;
+
+ memcpy(nlmsg_data(&errmsg->msg), nlmsg_data(nlh),
+ nlmsg_len(nlh));
+ }

if (tlvlen)
netlink_ack_tlv_fill(in_skb, skb, nlh, err, extack);
@@ -2463,6 +2468,14 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
nlmsg_end(skb, rep);

nlmsg_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).portid);
+
+ return;
+
+err_bad_put:
+ nlmsg_free(skb);
+err_skb:
+ WRITE_ONCE(NETLINK_CB(in_skb).sk->sk_err, ENOBUFS);
+ sk_error_report(NETLINK_CB(in_skb).sk);
}
EXPORT_SYMBOL(netlink_ack);

diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c
index f60e424e0607..6705bb895e23 100644
--- a/net/nfc/llcp_core.c
+++ b/net/nfc/llcp_core.c
@@ -1636,7 +1636,9 @@ int nfc_llcp_register_device(struct nfc_dev *ndev)
timer_setup(&local->sdreq_timer, nfc_llcp_sdreq_timer, 0);
INIT_WORK(&local->sdreq_timeout_work, nfc_llcp_sdreq_timeout_work);

+ spin_lock(&llcp_devices_lock);
list_add(&local->list, &llcp_devices);
+ spin_unlock(&llcp_devices_lock);

return 0;
}
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index f0c477c5d1db..d788c6d28986 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -173,7 +173,7 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp)
* own the socket
*/
rds_tcp_set_callbacks(sock, cp);
- ret = sock->ops->connect(sock, addr, addrlen, O_NONBLOCK);
+ ret = kernel_connect(sock, addr, addrlen, O_NONBLOCK);

rdsdebug("connect to address %pI6c returned %d\n", &conn->c_faddr, ret);
if (ret == -EINPROGRESS)
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 3460abceba44..2965a12fe8aa 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1161,8 +1161,7 @@ int sctp_assoc_update(struct sctp_association *asoc,
/* Add any peer addresses from the new association. */
list_for_each_entry(trans, &new->peer.transport_addr_list,
transports)
- if (!sctp_assoc_lookup_paddr(asoc, &trans->ipaddr) &&
- !sctp_assoc_add_peer(asoc, &trans->ipaddr,
+ if (!sctp_assoc_add_peer(asoc, &trans->ipaddr,
GFP_ATOMIC, trans->state))
return -ENOMEM;

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 32e3669adf14..e25dc1709131 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2449,6 +2449,7 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
if (trans) {
trans->hbinterval =
msecs_to_jiffies(params->spp_hbinterval);
+ sctp_transport_reset_hb_timer(trans);
} else if (asoc) {
asoc->hbinterval =
msecs_to_jiffies(params->spp_hbinterval);
diff --git a/net/socket.c b/net/socket.c
index d281a7ef4b1d..b0169168e3f4 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -720,6 +720,14 @@ static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
return ret;
}

+static int __sock_sendmsg(struct socket *sock, struct msghdr *msg)
+{
+ int err = security_socket_sendmsg(sock, msg,
+ msg_data_left(msg));
+
+ return err ?: sock_sendmsg_nosec(sock, msg);
+}
+
/**
* sock_sendmsg - send a message through @sock
* @sock: socket
@@ -730,10 +738,19 @@ static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
*/
int sock_sendmsg(struct socket *sock, struct msghdr *msg)
{
- int err = security_socket_sendmsg(sock, msg,
- msg_data_left(msg));
+ struct sockaddr_storage *save_addr = (struct sockaddr_storage *)msg->msg_name;
+ struct sockaddr_storage address;
+ int ret;

- return err ?: sock_sendmsg_nosec(sock, msg);
+ if (msg->msg_name) {
+ memcpy(&address, msg->msg_name, msg->msg_namelen);
+ msg->msg_name = &address;
+ }
+
+ ret = __sock_sendmsg(sock, msg);
+ msg->msg_name = save_addr;
+
+ return ret;
}
EXPORT_SYMBOL(sock_sendmsg);

@@ -1110,7 +1127,7 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (sock->type == SOCK_SEQPACKET)
msg.msg_flags |= MSG_EOR;

- res = sock_sendmsg(sock, &msg);
+ res = __sock_sendmsg(sock, &msg);
*from = msg.msg_iter;
return res;
}
@@ -2114,7 +2131,7 @@ int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
if (sock->file->f_flags & O_NONBLOCK)
flags |= MSG_DONTWAIT;
msg.msg_flags = flags;
- err = sock_sendmsg(sock, &msg);
+ err = __sock_sendmsg(sock, &msg);

out_put:
fput_light(sock->file, fput_needed);
@@ -2479,7 +2496,7 @@ static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys,
err = sock_sendmsg_nosec(sock, msg_sys);
goto out_freectl;
}
- err = sock_sendmsg(sock, msg_sys);
+ err = __sock_sendmsg(sock, msg_sys);
/*
* If this is sendmmsg() and sending to current destination address was
* successful, remember it.
diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c
index 2b236d95a646..65f59739a041 100644
--- a/net/tipc/crypto.c
+++ b/net/tipc/crypto.c
@@ -1441,14 +1441,14 @@ static int tipc_crypto_key_revoke(struct net *net, u8 tx_key)
struct tipc_crypto *tx = tipc_net(net)->crypto_tx;
struct tipc_key key;

- spin_lock(&tx->lock);
+ spin_lock_bh(&tx->lock);
key = tx->key;
WARN_ON(!key.active || tx_key != key.active);

/* Free the active key */
tipc_crypto_key_set_state(tx, key.passive, 0, key.pending);
tipc_crypto_key_detach(tx->aead[key.active], &tx->lock);
- spin_unlock(&tx->lock);
+ spin_unlock_bh(&tx->lock);

pr_warn("%s: key is revoked\n", tx->name);
return -EKEYREVOKED;
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 609b79fe4a74..2c7960467206 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -408,6 +408,34 @@ static void cfg80211_propagate_cac_done_wk(struct work_struct *work)
rtnl_unlock();
}

+static void cfg80211_wiphy_work(struct work_struct *work)
+{
+ struct cfg80211_registered_device *rdev;
+ struct wiphy_work *wk;
+
+ rdev = container_of(work, struct cfg80211_registered_device, wiphy_work);
+
+ wiphy_lock(&rdev->wiphy);
+ if (rdev->suspended)
+ goto out;
+
+ spin_lock_irq(&rdev->wiphy_work_lock);
+ wk = list_first_entry_or_null(&rdev->wiphy_work_list,
+ struct wiphy_work, entry);
+ if (wk) {
+ list_del_init(&wk->entry);
+ if (!list_empty(&rdev->wiphy_work_list))
+ schedule_work(work);
+ spin_unlock_irq(&rdev->wiphy_work_lock);
+
+ wk->func(&rdev->wiphy, wk);
+ } else {
+ spin_unlock_irq(&rdev->wiphy_work_lock);
+ }
+out:
+ wiphy_unlock(&rdev->wiphy);
+}
+
/* exported functions */

struct wiphy *wiphy_new_nm(const struct cfg80211_ops *ops, int sizeof_priv,
@@ -533,6 +561,9 @@ struct wiphy *wiphy_new_nm(const struct cfg80211_ops *ops, int sizeof_priv,
return NULL;
}

+ INIT_WORK(&rdev->wiphy_work, cfg80211_wiphy_work);
+ INIT_LIST_HEAD(&rdev->wiphy_work_list);
+ spin_lock_init(&rdev->wiphy_work_lock);
INIT_WORK(&rdev->rfkill_block, cfg80211_rfkill_block_work);
INIT_WORK(&rdev->conn_work, cfg80211_conn_work);
INIT_WORK(&rdev->event_work, cfg80211_event_work);
@@ -1011,6 +1042,31 @@ void wiphy_rfkill_start_polling(struct wiphy *wiphy)
}
EXPORT_SYMBOL(wiphy_rfkill_start_polling);

+void cfg80211_process_wiphy_works(struct cfg80211_registered_device *rdev)
+{
+ unsigned int runaway_limit = 100;
+ unsigned long flags;
+
+ lockdep_assert_held(&rdev->wiphy.mtx);
+
+ spin_lock_irqsave(&rdev->wiphy_work_lock, flags);
+ while (!list_empty(&rdev->wiphy_work_list)) {
+ struct wiphy_work *wk;
+
+ wk = list_first_entry(&rdev->wiphy_work_list,
+ struct wiphy_work, entry);
+ list_del_init(&wk->entry);
+ spin_unlock_irqrestore(&rdev->wiphy_work_lock, flags);
+
+ wk->func(&rdev->wiphy, wk);
+
+ spin_lock_irqsave(&rdev->wiphy_work_lock, flags);
+ if (WARN_ON(--runaway_limit == 0))
+ INIT_LIST_HEAD(&rdev->wiphy_work_list);
+ }
+ spin_unlock_irqrestore(&rdev->wiphy_work_lock, flags);
+}
+
void wiphy_unregister(struct wiphy *wiphy)
{
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
@@ -1049,9 +1105,19 @@ void wiphy_unregister(struct wiphy *wiphy)
cfg80211_rdev_list_generation++;
device_del(&rdev->wiphy.dev);

+#ifdef CONFIG_PM
+ if (rdev->wiphy.wowlan_config && rdev->ops->set_wakeup)
+ rdev_set_wakeup(rdev, false);
+#endif
+
+ /* surely nothing is reachable now, clean up work */
+ cfg80211_process_wiphy_works(rdev);
wiphy_unlock(&rdev->wiphy);
rtnl_unlock();

+ /* this has nothing to do now but make sure it's gone */
+ cancel_work_sync(&rdev->wiphy_work);
+
flush_work(&rdev->scan_done_wk);
cancel_work_sync(&rdev->conn_work);
flush_work(&rdev->event_work);
@@ -1064,10 +1130,6 @@ void wiphy_unregister(struct wiphy *wiphy)
flush_work(&rdev->mgmt_registrations_update_wk);
flush_work(&rdev->background_cac_abort_wk);

-#ifdef CONFIG_PM
- if (rdev->wiphy.wowlan_config && rdev->ops->set_wakeup)
- rdev_set_wakeup(rdev, false);
-#endif
cfg80211_rdev_free_wowlan(rdev);
cfg80211_rdev_free_coalesce(rdev);
}
@@ -1114,16 +1176,11 @@ void wiphy_rfkill_set_hw_state_reason(struct wiphy *wiphy, bool blocked,
}
EXPORT_SYMBOL(wiphy_rfkill_set_hw_state_reason);

-void cfg80211_cqm_config_free(struct wireless_dev *wdev)
-{
- kfree(wdev->cqm_config);
- wdev->cqm_config = NULL;
-}
-
static void _cfg80211_unregister_wdev(struct wireless_dev *wdev,
bool unregister_netdev)
{
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+ struct cfg80211_cqm_config *cqm_config;
unsigned int link_id;

ASSERT_RTNL();
@@ -1162,11 +1219,10 @@ static void _cfg80211_unregister_wdev(struct wireless_dev *wdev,
kfree_sensitive(wdev->wext.keys);
wdev->wext.keys = NULL;
#endif
- /* only initialized if we have a netdev */
- if (wdev->netdev)
- flush_work(&wdev->disconnect_wk);
-
- cfg80211_cqm_config_free(wdev);
+ wiphy_work_cancel(wdev->wiphy, &wdev->cqm_rssi_work);
+ /* deleted from the list, so can't be found from nl80211 any more */
+ cqm_config = rcu_access_pointer(wdev->cqm_config);
+ kfree_rcu(cqm_config, rcu_head);

/*
* Ensure that all events have been processed and
@@ -1318,6 +1374,8 @@ void cfg80211_init_wdev(struct wireless_dev *wdev)
wdev->wext.connect.auth_type = NL80211_AUTHTYPE_AUTOMATIC;
#endif

+ wiphy_work_init(&wdev->cqm_rssi_work, cfg80211_cqm_rssi_notify_work);
+
if (wdev->wiphy->flags & WIPHY_FLAG_PS_ON_BY_DEFAULT)
wdev->ps = true;
else
@@ -1439,6 +1497,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
cfg80211_leave(rdev, wdev);
cfg80211_remove_links(wdev);
wiphy_unlock(&rdev->wiphy);
+ /* since we just did cfg80211_leave() nothing to do there */
+ cancel_work_sync(&wdev->disconnect_wk);
break;
case NETDEV_DOWN:
wiphy_lock(&rdev->wiphy);
@@ -1548,6 +1608,66 @@ static struct pernet_operations cfg80211_pernet_ops = {
.exit = cfg80211_pernet_exit,
};

+void wiphy_work_queue(struct wiphy *wiphy, struct wiphy_work *work)
+{
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
+ unsigned long flags;
+
+ spin_lock_irqsave(&rdev->wiphy_work_lock, flags);
+ if (list_empty(&work->entry))
+ list_add_tail(&work->entry, &rdev->wiphy_work_list);
+ spin_unlock_irqrestore(&rdev->wiphy_work_lock, flags);
+
+ schedule_work(&rdev->wiphy_work);
+}
+EXPORT_SYMBOL_GPL(wiphy_work_queue);
+
+void wiphy_work_cancel(struct wiphy *wiphy, struct wiphy_work *work)
+{
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
+ unsigned long flags;
+
+ lockdep_assert_held(&wiphy->mtx);
+
+ spin_lock_irqsave(&rdev->wiphy_work_lock, flags);
+ if (!list_empty(&work->entry))
+ list_del_init(&work->entry);
+ spin_unlock_irqrestore(&rdev->wiphy_work_lock, flags);
+}
+EXPORT_SYMBOL_GPL(wiphy_work_cancel);
+
+void wiphy_delayed_work_timer(struct timer_list *t)
+{
+ struct wiphy_delayed_work *dwork = from_timer(dwork, t, timer);
+
+ wiphy_work_queue(dwork->wiphy, &dwork->work);
+}
+EXPORT_SYMBOL(wiphy_delayed_work_timer);
+
+void wiphy_delayed_work_queue(struct wiphy *wiphy,
+ struct wiphy_delayed_work *dwork,
+ unsigned long delay)
+{
+ if (!delay) {
+ wiphy_work_queue(wiphy, &dwork->work);
+ return;
+ }
+
+ dwork->wiphy = wiphy;
+ mod_timer(&dwork->timer, jiffies + delay);
+}
+EXPORT_SYMBOL_GPL(wiphy_delayed_work_queue);
+
+void wiphy_delayed_work_cancel(struct wiphy *wiphy,
+ struct wiphy_delayed_work *dwork)
+{
+ lockdep_assert_held(&wiphy->mtx);
+
+ del_timer_sync(&dwork->timer);
+ wiphy_work_cancel(wiphy, &dwork->work);
+}
+EXPORT_SYMBOL_GPL(wiphy_delayed_work_cancel);
+
static int __init cfg80211_init(void)
{
int err;
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 775e16cb99ed..86fd79912254 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -108,6 +108,12 @@ struct cfg80211_registered_device {
/* lock for all wdev lists */
spinlock_t mgmt_registrations_lock;

+ struct work_struct wiphy_work;
+ struct list_head wiphy_work_list;
+ /* protects the list above */
+ spinlock_t wiphy_work_lock;
+ bool suspended;
+
/* must be last because of the way we do wiphy_priv(),
* and it should at least be aligned to NETDEV_ALIGN */
struct wiphy wiphy __aligned(NETDEV_ALIGN);
@@ -287,12 +293,17 @@ struct cfg80211_beacon_registration {
};

struct cfg80211_cqm_config {
+ struct rcu_head rcu_head;
u32 rssi_hyst;
s32 last_rssi_event_value;
+ enum nl80211_cqm_rssi_threshold_event last_rssi_event_type;
int n_rssi_thresholds;
s32 rssi_thresholds[];
};

+void cfg80211_cqm_rssi_notify_work(struct wiphy *wiphy,
+ struct wiphy_work *work);
+
void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev);

/* free object */
@@ -450,6 +461,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
struct net_device *dev, enum nl80211_iftype ntype,
struct vif_params *params);
void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev);
+void cfg80211_process_wiphy_works(struct cfg80211_registered_device *rdev);
void cfg80211_process_wdev_events(struct wireless_dev *wdev);

bool cfg80211_does_bw_fit_range(const struct ieee80211_freq_range *freq_range,
@@ -556,8 +568,6 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev,
#define CFG80211_DEV_WARN_ON(cond) ({bool __r = (cond); __r; })
#endif

-void cfg80211_cqm_config_free(struct wireless_dev *wdev);
-
void cfg80211_release_pmsr(struct wireless_dev *wdev, u32 portid);
void cfg80211_pmsr_wdev_down(struct wireless_dev *wdev);
void cfg80211_pmsr_free_wk(struct work_struct *work);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 12c7c89d5be1..1d993a490ac4 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -12565,7 +12565,8 @@ static int nl80211_set_cqm_txe(struct genl_info *info,
}

static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev,
- struct net_device *dev)
+ struct net_device *dev,
+ struct cfg80211_cqm_config *cqm_config)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
s32 last, low, high;
@@ -12574,7 +12575,7 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev,
int err;

/* RSSI reporting disabled? */
- if (!wdev->cqm_config)
+ if (!cqm_config)
return rdev_set_cqm_rssi_range_config(rdev, dev, 0, 0);

/*
@@ -12583,7 +12584,7 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev,
* connection is established and enough beacons received to calculate
* the average.
*/
- if (!wdev->cqm_config->last_rssi_event_value &&
+ if (!cqm_config->last_rssi_event_value &&
wdev->links[0].client.current_bss &&
rdev->ops->get_station) {
struct station_info sinfo = {};
@@ -12597,30 +12598,30 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev,

cfg80211_sinfo_release_content(&sinfo);
if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_BEACON_SIGNAL_AVG))
- wdev->cqm_config->last_rssi_event_value =
+ cqm_config->last_rssi_event_value =
(s8) sinfo.rx_beacon_signal_avg;
}

- last = wdev->cqm_config->last_rssi_event_value;
- hyst = wdev->cqm_config->rssi_hyst;
- n = wdev->cqm_config->n_rssi_thresholds;
+ last = cqm_config->last_rssi_event_value;
+ hyst = cqm_config->rssi_hyst;
+ n = cqm_config->n_rssi_thresholds;

for (i = 0; i < n; i++) {
i = array_index_nospec(i, n);
- if (last < wdev->cqm_config->rssi_thresholds[i])
+ if (last < cqm_config->rssi_thresholds[i])
break;
}

low_index = i - 1;
if (low_index >= 0) {
low_index = array_index_nospec(low_index, n);
- low = wdev->cqm_config->rssi_thresholds[low_index] - hyst;
+ low = cqm_config->rssi_thresholds[low_index] - hyst;
} else {
low = S32_MIN;
}
if (i < n) {
i = array_index_nospec(i, n);
- high = wdev->cqm_config->rssi_thresholds[i] + hyst - 1;
+ high = cqm_config->rssi_thresholds[i] + hyst - 1;
} else {
high = S32_MAX;
}
@@ -12633,6 +12634,7 @@ static int nl80211_set_cqm_rssi(struct genl_info *info,
u32 hysteresis)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ struct cfg80211_cqm_config *cqm_config = NULL, *old;
struct net_device *dev = info->user_ptr[1];
struct wireless_dev *wdev = dev->ieee80211_ptr;
int i, err;
@@ -12650,10 +12652,6 @@ static int nl80211_set_cqm_rssi(struct genl_info *info,
wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)
return -EOPNOTSUPP;

- wdev_lock(wdev);
- cfg80211_cqm_config_free(wdev);
- wdev_unlock(wdev);
-
if (n_thresholds <= 1 && rdev->ops->set_cqm_rssi_config) {
if (n_thresholds == 0 || thresholds[0] == 0) /* Disabling */
return rdev_set_cqm_rssi_config(rdev, dev, 0, 0);
@@ -12670,9 +12668,10 @@ static int nl80211_set_cqm_rssi(struct genl_info *info,
n_thresholds = 0;

wdev_lock(wdev);
- if (n_thresholds) {
- struct cfg80211_cqm_config *cqm_config;
+ old = rcu_dereference_protected(wdev->cqm_config,
+ lockdep_is_held(&wdev->mtx));

+ if (n_thresholds) {
cqm_config = kzalloc(struct_size(cqm_config, rssi_thresholds,
n_thresholds),
GFP_KERNEL);
@@ -12687,11 +12686,18 @@ static int nl80211_set_cqm_rssi(struct genl_info *info,
flex_array_size(cqm_config, rssi_thresholds,
n_thresholds));

- wdev->cqm_config = cqm_config;
+ rcu_assign_pointer(wdev->cqm_config, cqm_config);
+ } else {
+ RCU_INIT_POINTER(wdev->cqm_config, NULL);
}

- err = cfg80211_cqm_rssi_update(rdev, dev);
-
+ err = cfg80211_cqm_rssi_update(rdev, dev, cqm_config);
+ if (err) {
+ rcu_assign_pointer(wdev->cqm_config, old);
+ kfree_rcu(cqm_config, rcu_head);
+ } else {
+ kfree_rcu(old, rcu_head);
+ }
unlock:
wdev_unlock(wdev);

@@ -18719,9 +18725,8 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev,
enum nl80211_cqm_rssi_threshold_event rssi_event,
s32 rssi_level, gfp_t gfp)
{
- struct sk_buff *msg;
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+ struct cfg80211_cqm_config *cqm_config;

trace_cfg80211_cqm_rssi_notify(dev, rssi_event, rssi_level);

@@ -18729,18 +18734,41 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev,
rssi_event != NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH))
return;

- if (wdev->cqm_config) {
- wdev->cqm_config->last_rssi_event_value = rssi_level;
+ rcu_read_lock();
+ cqm_config = rcu_dereference(wdev->cqm_config);
+ if (cqm_config) {
+ cqm_config->last_rssi_event_value = rssi_level;
+ cqm_config->last_rssi_event_type = rssi_event;
+ wiphy_work_queue(wdev->wiphy, &wdev->cqm_rssi_work);
+ }
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL(cfg80211_cqm_rssi_notify);
+
+void cfg80211_cqm_rssi_notify_work(struct wiphy *wiphy, struct wiphy_work *work)
+{
+ struct wireless_dev *wdev = container_of(work, struct wireless_dev,
+ cqm_rssi_work);
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
+ enum nl80211_cqm_rssi_threshold_event rssi_event;
+ struct cfg80211_cqm_config *cqm_config;
+ struct sk_buff *msg;
+ s32 rssi_level;

- cfg80211_cqm_rssi_update(rdev, dev);
+ wdev_lock(wdev);
+ cqm_config = rcu_dereference_protected(wdev->cqm_config,
+ lockdep_is_held(&wdev->mtx));
+ if (!wdev->cqm_config)
+ goto unlock;

- if (rssi_level == 0)
- rssi_level = wdev->cqm_config->last_rssi_event_value;
- }
+ cfg80211_cqm_rssi_update(rdev, wdev->netdev, cqm_config);

- msg = cfg80211_prepare_cqm(dev, NULL, gfp);
+ rssi_level = cqm_config->last_rssi_event_value;
+ rssi_event = cqm_config->last_rssi_event_type;
+
+ msg = cfg80211_prepare_cqm(wdev->netdev, NULL, GFP_KERNEL);
if (!msg)
- return;
+ goto unlock;

if (nla_put_u32(msg, NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT,
rssi_event))
@@ -18750,14 +18778,15 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev,
rssi_level))
goto nla_put_failure;

- cfg80211_send_cqm(msg, gfp);
+ cfg80211_send_cqm(msg, GFP_KERNEL);

- return;
+ goto unlock;

nla_put_failure:
nlmsg_free(msg);
+ unlock:
+ wdev_unlock(wdev);
}
-EXPORT_SYMBOL(cfg80211_cqm_rssi_notify);

void cfg80211_cqm_txe_notify(struct net_device *dev,
const u8 *peer, u32 num_packets,
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 6e87d2cd8345..b97834284bae 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -5,7 +5,7 @@
* (for nl80211's connect() and wext)
*
* Copyright 2009 Johannes Berg <johannes@xxxxxxxxxxxxxxxx>
- * Copyright (C) 2009, 2020, 2022 Intel Corporation. All rights reserved.
+ * Copyright (C) 2009, 2020, 2022-2023 Intel Corporation. All rights reserved.
* Copyright 2017 Intel Deutschland GmbH
*/

@@ -1555,6 +1555,7 @@ void cfg80211_autodisconnect_wk(struct work_struct *work)
container_of(work, struct wireless_dev, disconnect_wk);
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);

+ wiphy_lock(wdev->wiphy);
wdev_lock(wdev);

if (wdev->conn_owner_nlportid) {
@@ -1593,4 +1594,5 @@ void cfg80211_autodisconnect_wk(struct work_struct *work)
}

wdev_unlock(wdev);
+ wiphy_unlock(wdev->wiphy);
}
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
index 0c3f05c9be27..4d3b65803010 100644
--- a/net/wireless/sysfs.c
+++ b/net/wireless/sysfs.c
@@ -5,7 +5,7 @@
*
* Copyright 2005-2006 Jiri Benc <jbenc@xxxxxxx>
* Copyright 2006 Johannes Berg <johannes@xxxxxxxxxxxxxxxx>
- * Copyright (C) 2020-2021 Intel Corporation
+ * Copyright (C) 2020-2021, 2023 Intel Corporation
*/

#include <linux/device.h>
@@ -105,14 +105,18 @@ static int wiphy_suspend(struct device *dev)
cfg80211_leave_all(rdev);
cfg80211_process_rdev_events(rdev);
}
+ cfg80211_process_wiphy_works(rdev);
if (rdev->ops->suspend)
ret = rdev_suspend(rdev, rdev->wiphy.wowlan_config);
if (ret == 1) {
/* Driver refuse to configure wowlan */
cfg80211_leave_all(rdev);
cfg80211_process_rdev_events(rdev);
+ cfg80211_process_wiphy_works(rdev);
ret = rdev_suspend(rdev, NULL);
}
+ if (ret == 0)
+ rdev->suspended = true;
}
wiphy_unlock(&rdev->wiphy);
rtnl_unlock();
@@ -132,6 +136,8 @@ static int wiphy_resume(struct device *dev)
wiphy_lock(&rdev->wiphy);
if (rdev->wiphy.registered && rdev->ops->resume)
ret = rdev_resume(rdev);
+ rdev->suspended = false;
+ schedule_work(&rdev->wiphy_work);
wiphy_unlock(&rdev->wiphy);

if (ret)
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index 80d973144fde..111d5464c12d 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -1577,7 +1577,7 @@ void handle_moddevtable(struct module *mod, struct elf_info *info,
/* First handle the "special" cases */
if (sym_is(name, namelen, "usb"))
do_usb_table(symval, sym->st_size, mod);
- if (sym_is(name, namelen, "of"))
+ else if (sym_is(name, namelen, "of"))
do_of_table(symval, sym->st_size, mod);
else if (sym_is(name, namelen, "pnp"))
do_pnp_device_entry(symval, sym->st_size, mod);
diff --git a/security/integrity/ima/Kconfig b/security/integrity/ima/Kconfig
index c17660bf5f34..6ef7bde55126 100644
--- a/security/integrity/ima/Kconfig
+++ b/security/integrity/ima/Kconfig
@@ -29,9 +29,11 @@ config IMA
to learn more about IMA.
If unsure, say N.

+if IMA
+
config IMA_KEXEC
bool "Enable carrying the IMA measurement list across a soft boot"
- depends on IMA && TCG_TPM && HAVE_IMA_KEXEC
+ depends on TCG_TPM && HAVE_IMA_KEXEC
default n
help
TPM PCRs are only reset on a hard reboot. In order to validate
@@ -43,7 +45,6 @@ config IMA_KEXEC

config IMA_MEASURE_PCR_IDX
int
- depends on IMA
range 8 14
default 10
help
@@ -53,7 +54,7 @@ config IMA_MEASURE_PCR_IDX

config IMA_LSM_RULES
bool
- depends on IMA && AUDIT && (SECURITY_SELINUX || SECURITY_SMACK || SECURITY_APPARMOR)
+ depends on AUDIT && (SECURITY_SELINUX || SECURITY_SMACK || SECURITY_APPARMOR)
default y
help
Disabling this option will disregard LSM based policy rules.
@@ -61,7 +62,6 @@ config IMA_LSM_RULES
choice
prompt "Default template"
default IMA_NG_TEMPLATE
- depends on IMA
help
Select the default IMA measurement template.

@@ -80,14 +80,12 @@ endchoice

config IMA_DEFAULT_TEMPLATE
string
- depends on IMA
default "ima-ng" if IMA_NG_TEMPLATE
default "ima-sig" if IMA_SIG_TEMPLATE

choice
prompt "Default integrity hash algorithm"
default IMA_DEFAULT_HASH_SHA1
- depends on IMA
help
Select the default hash algorithm used for the measurement
list, integrity appraisal and audit log. The compiled default
@@ -117,7 +115,6 @@ endchoice

config IMA_DEFAULT_HASH
string
- depends on IMA
default "sha1" if IMA_DEFAULT_HASH_SHA1
default "sha256" if IMA_DEFAULT_HASH_SHA256
default "sha512" if IMA_DEFAULT_HASH_SHA512
@@ -126,7 +123,6 @@ config IMA_DEFAULT_HASH

config IMA_WRITE_POLICY
bool "Enable multiple writes to the IMA policy"
- depends on IMA
default n
help
IMA policy can now be updated multiple times. The new rules get
@@ -137,7 +133,6 @@ config IMA_WRITE_POLICY

config IMA_READ_POLICY
bool "Enable reading back the current IMA policy"
- depends on IMA
default y if IMA_WRITE_POLICY
default n if !IMA_WRITE_POLICY
help
@@ -147,7 +142,6 @@ config IMA_READ_POLICY

config IMA_APPRAISE
bool "Appraise integrity measurements"
- depends on IMA
default n
help
This option enables local measurement integrity appraisal.
@@ -268,7 +262,7 @@ config IMA_KEYRINGS_PERMIT_SIGNED_BY_BUILTIN_OR_SECONDARY
config IMA_BLACKLIST_KEYRING
bool "Create IMA machine owner blacklist keyrings (EXPERIMENTAL)"
depends on SYSTEM_TRUSTED_KEYRING
- depends on IMA_TRUSTED_KEYRING
+ depends on INTEGRITY_TRUSTED_KEYRING
default n
help
This option creates an IMA blacklist keyring, which contains all
@@ -278,7 +272,7 @@ config IMA_BLACKLIST_KEYRING

config IMA_LOAD_X509
bool "Load X509 certificate onto the '.ima' trusted keyring"
- depends on IMA_TRUSTED_KEYRING
+ depends on INTEGRITY_TRUSTED_KEYRING
default n
help
File signature verification is based on the public keys
@@ -303,7 +297,6 @@ config IMA_APPRAISE_SIGNED_INIT

config IMA_MEASURE_ASYMMETRIC_KEYS
bool
- depends on IMA
depends on ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y
default y

@@ -322,7 +315,8 @@ config IMA_SECURE_AND_OR_TRUSTED_BOOT

config IMA_DISABLE_HTABLE
bool "Disable htable to allow measurement of duplicate records"
- depends on IMA
default n
help
This option disables htable to allow measurement of duplicate records.
+
+endif
diff --git a/sound/soc/soc-utils.c b/sound/soc/soc-utils.c
index a4dba0b751e7..1bbd1d077dfd 100644
--- a/sound/soc/soc-utils.c
+++ b/sound/soc/soc-utils.c
@@ -217,6 +217,7 @@ int snd_soc_dai_is_dummy(struct snd_soc_dai *dai)
return 1;
return 0;
}
+EXPORT_SYMBOL_GPL(snd_soc_dai_is_dummy);

int snd_soc_component_is_dummy(struct snd_soc_component *component)
{
diff --git a/sound/soc/tegra/tegra_audio_graph_card.c b/sound/soc/tegra/tegra_audio_graph_card.c
index 1f2c5018bf5a..4737e776d383 100644
--- a/sound/soc/tegra/tegra_audio_graph_card.c
+++ b/sound/soc/tegra/tegra_audio_graph_card.c
@@ -10,6 +10,7 @@
#include <linux/platform_device.h>
#include <sound/graph_card.h>
#include <sound/pcm_params.h>
+#include <sound/soc-dai.h>

#define MAX_PLLA_OUT0_DIV 128

@@ -44,6 +45,21 @@ struct tegra_audio_cdata {
unsigned int plla_out0_rates[NUM_RATE_TYPE];
};

+static bool need_clk_update(struct snd_soc_dai *dai)
+{
+ if (snd_soc_dai_is_dummy(dai) ||
+ !dai->driver->ops ||
+ !dai->driver->name)
+ return false;
+
+ if (strstr(dai->driver->name, "I2S") ||
+ strstr(dai->driver->name, "DMIC") ||
+ strstr(dai->driver->name, "DSPK"))
+ return true;
+
+ return false;
+}
+
/* Setup PLL clock as per the given sample rate */
static int tegra_audio_graph_update_pll(struct snd_pcm_substream *substream,
struct snd_pcm_hw_params *params)
@@ -140,19 +156,7 @@ static int tegra_audio_graph_hw_params(struct snd_pcm_substream *substream,
struct snd_soc_dai *cpu_dai = asoc_rtd_to_cpu(rtd, 0);
int err;

- /*
- * This gets called for each DAI link (FE or BE) when DPCM is used.
- * We may not want to update PLLA rate for each call. So PLLA update
- * must be restricted to external I/O links (I2S, DMIC or DSPK) since
- * they actually depend on it. I/O modules update their clocks in
- * hw_param() of their respective component driver and PLLA rate
- * update here helps them to derive appropriate rates.
- *
- * TODO: When more HW accelerators get added (like sample rate
- * converter, volume gain controller etc., which don't really
- * depend on PLLA) we need a better way to filter here.
- */
- if (cpu_dai->driver->ops && rtd->dai_link->no_pcm) {
+ if (need_clk_update(cpu_dai)) {
err = tegra_audio_graph_update_pll(substream, params);
if (err)
return err;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 53bc48794719..92dbe89dafbf 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3112,6 +3112,11 @@ union bpf_attr {
* **BPF_FIB_LOOKUP_OUTPUT**
* Perform lookup from an egress perspective (default is
* ingress).
+ * **BPF_FIB_LOOKUP_SKIP_NEIGH**
+ * Skip the neighbour table lookup. *params*->dmac
+ * and *params*->smac will not be set as output. A common
+ * use case is to call **bpf_redirect_neigh**\ () after
+ * doing **bpf_fib_lookup**\ ().
*
* *ctx* is either **struct xdp_md** for XDP programs or
* **struct sk_buff** tc cls_act programs.
@@ -6678,6 +6683,7 @@ struct bpf_raw_tracepoint_args {
enum {
BPF_FIB_LOOKUP_DIRECT = (1U << 0),
BPF_FIB_LOOKUP_OUTPUT = (1U << 1),
+ BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
};

enum {
diff --git a/tools/testing/selftests/netfilter/.gitignore b/tools/testing/selftests/netfilter/.gitignore
index 4cb887b57413..4b2928e1c19d 100644
--- a/tools/testing/selftests/netfilter/.gitignore
+++ b/tools/testing/selftests/netfilter/.gitignore
@@ -1,3 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
nf-queue
connect_close
+audit_logread
diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile
index 3686bfa6c58d..321db8850da0 100644
--- a/tools/testing/selftests/netfilter/Makefile
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -6,13 +6,13 @@ TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \
nft_concat_range.sh nft_conntrack_helper.sh \
nft_queue.sh nft_meta.sh nf_nat_edemux.sh \
ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \
- conntrack_vrf.sh nft_synproxy.sh rpath.sh
+ conntrack_vrf.sh nft_synproxy.sh rpath.sh nft_audit.sh

HOSTPKG_CONFIG := pkg-config

CFLAGS += $(shell $(HOSTPKG_CONFIG) --cflags libmnl 2>/dev/null)
LDLIBS += $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl)

-TEST_GEN_FILES = nf-queue connect_close
+TEST_GEN_FILES = nf-queue connect_close audit_logread

include ../lib.mk
diff --git a/tools/testing/selftests/netfilter/audit_logread.c b/tools/testing/selftests/netfilter/audit_logread.c
new file mode 100644
index 000000000000..a0a880fc2d9d
--- /dev/null
+++ b/tools/testing/selftests/netfilter/audit_logread.c
@@ -0,0 +1,165 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <unistd.h>
+#include <linux/audit.h>
+#include <linux/netlink.h>
+
+static int fd;
+
+#define MAX_AUDIT_MESSAGE_LENGTH 8970
+struct audit_message {
+ struct nlmsghdr nlh;
+ union {
+ struct audit_status s;
+ char data[MAX_AUDIT_MESSAGE_LENGTH];
+ } u;
+};
+
+int audit_recv(int fd, struct audit_message *rep)
+{
+ struct sockaddr_nl addr;
+ socklen_t addrlen = sizeof(addr);
+ int ret;
+
+ do {
+ ret = recvfrom(fd, rep, sizeof(*rep), 0,
+ (struct sockaddr *)&addr, &addrlen);
+ } while (ret < 0 && errno == EINTR);
+
+ if (ret < 0 ||
+ addrlen != sizeof(addr) ||
+ addr.nl_pid != 0 ||
+ rep->nlh.nlmsg_type == NLMSG_ERROR) /* short-cut for now */
+ return -1;
+
+ return ret;
+}
+
+int audit_send(int fd, uint16_t type, uint32_t key, uint32_t val)
+{
+ static int seq = 0;
+ struct audit_message msg = {
+ .nlh = {
+ .nlmsg_len = NLMSG_SPACE(sizeof(msg.u.s)),
+ .nlmsg_type = type,
+ .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
+ .nlmsg_seq = ++seq,
+ },
+ .u.s = {
+ .mask = key,
+ .enabled = key == AUDIT_STATUS_ENABLED ? val : 0,
+ .pid = key == AUDIT_STATUS_PID ? val : 0,
+ }
+ };
+ struct sockaddr_nl addr = {
+ .nl_family = AF_NETLINK,
+ };
+ int ret;
+
+ do {
+ ret = sendto(fd, &msg, msg.nlh.nlmsg_len, 0,
+ (struct sockaddr *)&addr, sizeof(addr));
+ } while (ret < 0 && errno == EINTR);
+
+ if (ret != (int)msg.nlh.nlmsg_len)
+ return -1;
+ return 0;
+}
+
+int audit_set(int fd, uint32_t key, uint32_t val)
+{
+ struct audit_message rep = { 0 };
+ int ret;
+
+ ret = audit_send(fd, AUDIT_SET, key, val);
+ if (ret)
+ return ret;
+
+ ret = audit_recv(fd, &rep);
+ if (ret < 0)
+ return ret;
+ return 0;
+}
+
+int readlog(int fd)
+{
+ struct audit_message rep = { 0 };
+ int ret = audit_recv(fd, &rep);
+ const char *sep = "";
+ char *k, *v;
+
+ if (ret < 0)
+ return ret;
+
+ if (rep.nlh.nlmsg_type != AUDIT_NETFILTER_CFG)
+ return 0;
+
+ /* skip the initial "audit(...): " part */
+ strtok(rep.u.data, " ");
+
+ while ((k = strtok(NULL, "="))) {
+ v = strtok(NULL, " ");
+
+ /* these vary and/or are uninteresting, ignore */
+ if (!strcmp(k, "pid") ||
+ !strcmp(k, "comm") ||
+ !strcmp(k, "subj"))
+ continue;
+
+ /* strip the varying sequence number */
+ if (!strcmp(k, "table"))
+ *strchrnul(v, ':') = '\0';
+
+ printf("%s%s=%s", sep, k, v);
+ sep = " ";
+ }
+ if (*sep) {
+ printf("\n");
+ fflush(stdout);
+ }
+ return 0;
+}
+
+void cleanup(int sig)
+{
+ audit_set(fd, AUDIT_STATUS_ENABLED, 0);
+ close(fd);
+ if (sig)
+ exit(0);
+}
+
+int main(int argc, char **argv)
+{
+ struct sigaction act = {
+ .sa_handler = cleanup,
+ };
+
+ fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_AUDIT);
+ if (fd < 0) {
+ perror("Can't open netlink socket");
+ return -1;
+ }
+
+ if (sigaction(SIGTERM, &act, NULL) < 0 ||
+ sigaction(SIGINT, &act, NULL) < 0) {
+ perror("Can't set signal handler");
+ close(fd);
+ return -1;
+ }
+
+ audit_set(fd, AUDIT_STATUS_ENABLED, 1);
+ audit_set(fd, AUDIT_STATUS_PID, getpid());
+
+ while (1)
+ readlog(fd);
+}
diff --git a/tools/testing/selftests/netfilter/config b/tools/testing/selftests/netfilter/config
index 4faf2ce021d9..7c42b1b2c69b 100644
--- a/tools/testing/selftests/netfilter/config
+++ b/tools/testing/selftests/netfilter/config
@@ -6,3 +6,4 @@ CONFIG_NFT_REDIR=m
CONFIG_NFT_MASQ=m
CONFIG_NFT_FLOW_OFFLOAD=m
CONFIG_NF_CT_NETLINK=m
+CONFIG_AUDIT=y
diff --git a/tools/testing/selftests/netfilter/nft_audit.sh b/tools/testing/selftests/netfilter/nft_audit.sh
new file mode 100755
index 000000000000..bb34329e02a7
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_audit.sh
@@ -0,0 +1,193 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Check that audit logs generated for nft commands are as expected.
+
+SKIP_RC=4
+RC=0
+
+nft --version >/dev/null 2>&1 || {
+ echo "SKIP: missing nft tool"
+ exit $SKIP_RC
+}
+
+logfile=$(mktemp)
+rulefile=$(mktemp)
+echo "logging into $logfile"
+./audit_logread >"$logfile" &
+logread_pid=$!
+trap 'kill $logread_pid; rm -f $logfile $rulefile' EXIT
+exec 3<"$logfile"
+
+do_test() { # (cmd, log)
+ echo -n "testing for cmd: $1 ... "
+ cat <&3 >/dev/null
+ $1 >/dev/null || exit 1
+ sleep 0.1
+ res=$(diff -a -u <(echo "$2") - <&3)
+ [ $? -eq 0 ] && { echo "OK"; return; }
+ echo "FAIL"
+ grep -v '^\(---\|+++\|@@\)' <<< "$res"
+ ((RC--))
+}
+
+nft flush ruleset
+
+# adding tables, chains and rules
+
+for table in t1 t2; do
+ do_test "nft add table $table" \
+ "table=$table family=2 entries=1 op=nft_register_table"
+
+ do_test "nft add chain $table c1" \
+ "table=$table family=2 entries=1 op=nft_register_chain"
+
+ do_test "nft add chain $table c2; add chain $table c3" \
+ "table=$table family=2 entries=2 op=nft_register_chain"
+
+ cmd="add rule $table c1 counter"
+
+ do_test "nft $cmd" \
+ "table=$table family=2 entries=1 op=nft_register_rule"
+
+ do_test "nft $cmd; $cmd" \
+ "table=$table family=2 entries=2 op=nft_register_rule"
+
+ cmd=""
+ sep=""
+ for chain in c2 c3; do
+ for i in {1..3}; do
+ cmd+="$sep add rule $table $chain counter"
+ sep=";"
+ done
+ done
+ do_test "nft $cmd" \
+ "table=$table family=2 entries=6 op=nft_register_rule"
+done
+
+for ((i = 0; i < 500; i++)); do
+ echo "add rule t2 c3 counter accept comment \"rule $i\""
+done >$rulefile
+do_test "nft -f $rulefile" \
+'table=t2 family=2 entries=500 op=nft_register_rule'
+
+# adding sets and elements
+
+settype='type inet_service; counter'
+setelem='{ 22, 80, 443 }'
+setblock="{ $settype; elements = $setelem; }"
+do_test "nft add set t1 s $setblock" \
+"table=t1 family=2 entries=4 op=nft_register_set"
+
+do_test "nft add set t1 s2 $setblock; add set t1 s3 { $settype; }" \
+"table=t1 family=2 entries=5 op=nft_register_set"
+
+do_test "nft add element t1 s3 $setelem" \
+"table=t1 family=2 entries=3 op=nft_register_setelem"
+
+# adding counters
+
+do_test 'nft add counter t1 c1' \
+'table=t1 family=2 entries=1 op=nft_register_obj'
+
+do_test 'nft add counter t2 c1; add counter t2 c2' \
+'table=t2 family=2 entries=2 op=nft_register_obj'
+
+# adding/updating quotas
+
+do_test 'nft add quota t1 q1 { 10 bytes }' \
+'table=t1 family=2 entries=1 op=nft_register_obj'
+
+do_test 'nft add quota t2 q1 { 10 bytes }; add quota t2 q2 { 10 bytes }' \
+'table=t2 family=2 entries=2 op=nft_register_obj'
+
+# changing the quota value triggers obj update path
+do_test 'nft add quota t1 q1 { 20 bytes }' \
+'table=t1 family=2 entries=1 op=nft_register_obj'
+
+# resetting rules
+
+do_test 'nft reset rules t1 c2' \
+'table=t1 family=2 entries=3 op=nft_reset_rule'
+
+do_test 'nft reset rules table t1' \
+'table=t1 family=2 entries=3 op=nft_reset_rule
+table=t1 family=2 entries=3 op=nft_reset_rule
+table=t1 family=2 entries=3 op=nft_reset_rule'
+
+do_test 'nft reset rules t2 c3' \
+'table=t2 family=2 entries=189 op=nft_reset_rule
+table=t2 family=2 entries=188 op=nft_reset_rule
+table=t2 family=2 entries=126 op=nft_reset_rule'
+
+do_test 'nft reset rules t2' \
+'table=t2 family=2 entries=3 op=nft_reset_rule
+table=t2 family=2 entries=3 op=nft_reset_rule
+table=t2 family=2 entries=186 op=nft_reset_rule
+table=t2 family=2 entries=188 op=nft_reset_rule
+table=t2 family=2 entries=129 op=nft_reset_rule'
+
+do_test 'nft reset rules' \
+'table=t1 family=2 entries=3 op=nft_reset_rule
+table=t1 family=2 entries=3 op=nft_reset_rule
+table=t1 family=2 entries=3 op=nft_reset_rule
+table=t2 family=2 entries=3 op=nft_reset_rule
+table=t2 family=2 entries=3 op=nft_reset_rule
+table=t2 family=2 entries=180 op=nft_reset_rule
+table=t2 family=2 entries=188 op=nft_reset_rule
+table=t2 family=2 entries=135 op=nft_reset_rule'
+
+# resetting sets and elements
+
+elem=(22 ,80 ,443)
+relem=""
+for i in {1..3}; do
+ relem+="${elem[((i - 1))]}"
+ do_test "nft reset element t1 s { $relem }" \
+ "table=t1 family=2 entries=$i op=nft_reset_setelem"
+done
+
+do_test 'nft reset set t1 s' \
+'table=t1 family=2 entries=3 op=nft_reset_setelem'
+
+# deleting rules
+
+readarray -t handles < <(nft -a list chain t1 c1 | \
+ sed -n 's/.*counter.* handle \(.*\)$/\1/p')
+
+do_test "nft delete rule t1 c1 handle ${handles[0]}" \
+'table=t1 family=2 entries=1 op=nft_unregister_rule'
+
+cmd='delete rule t1 c1 handle'
+do_test "nft $cmd ${handles[1]}; $cmd ${handles[2]}" \
+'table=t1 family=2 entries=2 op=nft_unregister_rule'
+
+do_test 'nft flush chain t1 c2' \
+'table=t1 family=2 entries=3 op=nft_unregister_rule'
+
+do_test 'nft flush table t2' \
+'table=t2 family=2 entries=509 op=nft_unregister_rule'
+
+# deleting chains
+
+do_test 'nft delete chain t2 c2' \
+'table=t2 family=2 entries=1 op=nft_unregister_chain'
+
+# deleting sets and elements
+
+do_test 'nft delete element t1 s { 22 }' \
+'table=t1 family=2 entries=1 op=nft_unregister_setelem'
+
+do_test 'nft delete element t1 s { 80, 443 }' \
+'table=t1 family=2 entries=2 op=nft_unregister_setelem'
+
+do_test 'nft flush set t1 s2' \
+'table=t1 family=2 entries=3 op=nft_unregister_setelem'
+
+do_test 'nft delete set t1 s2' \
+'table=t1 family=2 entries=1 op=nft_unregister_set'
+
+do_test 'nft delete set t1 s3' \
+'table=t1 family=2 entries=1 op=nft_unregister_set'
+
+exit $RC