Re: Linux 4.10.9

From: Greg KH
Date: Sat Apr 08 2017 - 04:09:50 EST


diff --git a/Documentation/devicetree/bindings/rng/omap_rng.txt b/Documentation/devicetree/bindings/rng/omap_rng.txt
index 471477299ece..9cf7876ab434 100644
--- a/Documentation/devicetree/bindings/rng/omap_rng.txt
+++ b/Documentation/devicetree/bindings/rng/omap_rng.txt
@@ -12,7 +12,8 @@ Required properties:
- reg : Offset and length of the register set for the module
- interrupts : the interrupt number for the RNG module.
Used for "ti,omap4-rng" and "inside-secure,safexcel-eip76"
-- clocks: the trng clock source
+- clocks: the trng clock source. Only mandatory for the
+ "inside-secure,safexcel-eip76" compatible.

Example:
/* AM335x */
diff --git a/Makefile b/Makefile
index 82e0809fed9b..4ebd511dee58 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
VERSION = 4
PATCHLEVEL = 10
-SUBLEVEL = 8
+SUBLEVEL = 9
EXTRAVERSION =
NAME = Fearless Coyote

diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index d408fa21a07c..928562967f3c 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -633,6 +633,9 @@ noinline static void slc_entire_op(const int op)

write_aux_reg(ARC_REG_SLC_INVALIDATE, 1);

+ /* Make sure "busy" bit reports correct stataus, see STAR 9001165532 */
+ read_aux_reg(r);
+
/* Important to wait for flush to complete */
while (read_aux_reg(r) & SLC_CTRL_BUSY);
}
diff --git a/arch/arm/boot/dts/bcm5301x.dtsi b/arch/arm/boot/dts/bcm5301x.dtsi
index f09a2bb08979..4b6049240ec2 100644
--- a/arch/arm/boot/dts/bcm5301x.dtsi
+++ b/arch/arm/boot/dts/bcm5301x.dtsi
@@ -66,14 +66,14 @@
timer@20200 {
compatible = "arm,cortex-a9-global-timer";
reg = <0x20200 0x100>;
- interrupts = <GIC_PPI 11 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_PPI 11 IRQ_TYPE_EDGE_RISING>;
clocks = <&periph_clk>;
};

local-timer@20600 {
compatible = "arm,cortex-a9-twd-timer";
reg = <0x20600 0x100>;
- interrupts = <GIC_PPI 13 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <GIC_PPI 13 IRQ_TYPE_EDGE_RISING>;
clocks = <&periph_clk>;
};

diff --git a/arch/mips/lantiq/irq.c b/arch/mips/lantiq/irq.c
index 8ac0e5994ed2..0ddf3698b85d 100644
--- a/arch/mips/lantiq/irq.c
+++ b/arch/mips/lantiq/irq.c
@@ -269,6 +269,11 @@ static void ltq_hw5_irqdispatch(void)
DEFINE_HWx_IRQDISPATCH(5)
#endif

+static void ltq_hw_irq_handler(struct irq_desc *desc)
+{
+ ltq_hw_irqdispatch(irq_desc_get_irq(desc) - 2);
+}
+
#ifdef CONFIG_MIPS_MT_SMP
void __init arch_init_ipiirq(int irq, struct irqaction *action)
{
@@ -313,23 +318,19 @@ static struct irqaction irq_call = {
asmlinkage void plat_irq_dispatch(void)
{
unsigned int pending = read_c0_status() & read_c0_cause() & ST0_IM;
- unsigned int i;
-
- if ((MIPS_CPU_TIMER_IRQ == 7) && (pending & CAUSEF_IP7)) {
- do_IRQ(MIPS_CPU_TIMER_IRQ);
- goto out;
- } else {
- for (i = 0; i < MAX_IM; i++) {
- if (pending & (CAUSEF_IP2 << i)) {
- ltq_hw_irqdispatch(i);
- goto out;
- }
- }
+ int irq;
+
+ if (!pending) {
+ spurious_interrupt();
+ return;
}
- pr_alert("Spurious IRQ: CAUSE=0x%08x\n", read_c0_status());

-out:
- return;
+ pending >>= CAUSEB_IP;
+ while (pending) {
+ irq = fls(pending) - 1;
+ do_IRQ(MIPS_CPU_IRQ_BASE + irq);
+ pending &= ~BIT(irq);
+ }
}

static int icu_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw)
@@ -354,11 +355,6 @@ static const struct irq_domain_ops irq_domain_ops = {
.map = icu_map,
};

-static struct irqaction cascade = {
- .handler = no_action,
- .name = "cascade",
-};
-
int __init icu_of_init(struct device_node *node, struct device_node *parent)
{
struct device_node *eiu_node;
@@ -390,7 +386,7 @@ int __init icu_of_init(struct device_node *node, struct device_node *parent)
mips_cpu_irq_init();

for (i = 0; i < MAX_IM; i++)
- setup_irq(i + 2, &cascade);
+ irq_set_chained_handler(i + 2, ltq_hw_irq_handler);

if (cpu_has_vint) {
pr_info("Setting up vectored interrupts\n");
diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h
index 9a2aee1b90fc..7fcf5128996a 100644
--- a/arch/parisc/include/asm/uaccess.h
+++ b/arch/parisc/include/asm/uaccess.h
@@ -68,6 +68,15 @@ struct exception_table_entry {
".previous\n"

/*
+ * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() creates a special exception table entry
+ * (with lowest bit set) for which the fault handler in fixup_exception() will
+ * load -EFAULT into %r8 for a read or write fault, and zeroes the target
+ * register in case of a read fault in get_user().
+ */
+#define ASM_EXCEPTIONTABLE_ENTRY_EFAULT( fault_addr, except_addr )\
+ ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr + 1)
+
+/*
* The page fault handler stores, in a per-cpu area, the following information
* if a fixup routine is available.
*/
@@ -94,7 +103,7 @@ struct exception_data {
#define __get_user(x, ptr) \
({ \
register long __gu_err __asm__ ("r8") = 0; \
- register long __gu_val __asm__ ("r9") = 0; \
+ register long __gu_val; \
\
load_sr2(); \
switch (sizeof(*(ptr))) { \
@@ -110,22 +119,23 @@ struct exception_data {
})

#define __get_user_asm(ldx, ptr) \
- __asm__("\n1:\t" ldx "\t0(%%sr2,%2),%0\n\t" \
- ASM_EXCEPTIONTABLE_ENTRY(1b, fixup_get_user_skip_1)\
+ __asm__("1: " ldx " 0(%%sr2,%2),%0\n" \
+ "9:\n" \
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \
: "=r"(__gu_val), "=r"(__gu_err) \
- : "r"(ptr), "1"(__gu_err) \
- : "r1");
+ : "r"(ptr), "1"(__gu_err));

#if !defined(CONFIG_64BIT)

#define __get_user_asm64(ptr) \
- __asm__("\n1:\tldw 0(%%sr2,%2),%0" \
- "\n2:\tldw 4(%%sr2,%2),%R0\n\t" \
- ASM_EXCEPTIONTABLE_ENTRY(1b, fixup_get_user_skip_2)\
- ASM_EXCEPTIONTABLE_ENTRY(2b, fixup_get_user_skip_1)\
+ __asm__(" copy %%r0,%R0\n" \
+ "1: ldw 0(%%sr2,%2),%0\n" \
+ "2: ldw 4(%%sr2,%2),%R0\n" \
+ "9:\n" \
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \
: "=r"(__gu_val), "=r"(__gu_err) \
- : "r"(ptr), "1"(__gu_err) \
- : "r1");
+ : "r"(ptr), "1"(__gu_err));

#endif /* !defined(CONFIG_64BIT) */

@@ -151,32 +161,31 @@ struct exception_data {
* The "__put_user/kernel_asm()" macros tell gcc they read from memory
* instead of writing. This is because they do not write to any memory
* gcc knows about, so there are no aliasing issues. These macros must
- * also be aware that "fixup_put_user_skip_[12]" are executed in the
- * context of the fault, and any registers used there must be listed
- * as clobbers. In this case only "r1" is used by the current routines.
- * r8/r9 are already listed as err/val.
+ * also be aware that fixups are executed in the context of the fault,
+ * and any registers used there must be listed as clobbers.
+ * r8 is already listed as err.
*/

#define __put_user_asm(stx, x, ptr) \
__asm__ __volatile__ ( \
- "\n1:\t" stx "\t%2,0(%%sr2,%1)\n\t" \
- ASM_EXCEPTIONTABLE_ENTRY(1b, fixup_put_user_skip_1)\
+ "1: " stx " %2,0(%%sr2,%1)\n" \
+ "9:\n" \
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \
: "=r"(__pu_err) \
- : "r"(ptr), "r"(x), "0"(__pu_err) \
- : "r1")
+ : "r"(ptr), "r"(x), "0"(__pu_err))


#if !defined(CONFIG_64BIT)

#define __put_user_asm64(__val, ptr) do { \
__asm__ __volatile__ ( \
- "\n1:\tstw %2,0(%%sr2,%1)" \
- "\n2:\tstw %R2,4(%%sr2,%1)\n\t" \
- ASM_EXCEPTIONTABLE_ENTRY(1b, fixup_put_user_skip_2)\
- ASM_EXCEPTIONTABLE_ENTRY(2b, fixup_put_user_skip_1)\
+ "1: stw %2,0(%%sr2,%1)\n" \
+ "2: stw %R2,4(%%sr2,%1)\n" \
+ "9:\n" \
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \
+ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \
: "=r"(__pu_err) \
- : "r"(ptr), "r"(__val), "0"(__pu_err) \
- : "r1"); \
+ : "r"(ptr), "r"(__val), "0"(__pu_err)); \
} while (0)

#endif /* !defined(CONFIG_64BIT) */
diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c
index 7484b3d11e0d..c6d6272a934f 100644
--- a/arch/parisc/kernel/parisc_ksyms.c
+++ b/arch/parisc/kernel/parisc_ksyms.c
@@ -47,16 +47,6 @@ EXPORT_SYMBOL(__cmpxchg_u64);
EXPORT_SYMBOL(lclear_user);
EXPORT_SYMBOL(lstrnlen_user);

-/* Global fixups - defined as int to avoid creation of function pointers */
-extern int fixup_get_user_skip_1;
-extern int fixup_get_user_skip_2;
-extern int fixup_put_user_skip_1;
-extern int fixup_put_user_skip_2;
-EXPORT_SYMBOL(fixup_get_user_skip_1);
-EXPORT_SYMBOL(fixup_get_user_skip_2);
-EXPORT_SYMBOL(fixup_put_user_skip_1);
-EXPORT_SYMBOL(fixup_put_user_skip_2);
-
#ifndef CONFIG_64BIT
/* Needed so insmod can set dp value */
extern int $global$;
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index 9e2d98ee6f9c..3286cbc7b934 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -140,6 +140,8 @@ void machine_power_off(void)
printk(KERN_EMERG "System shut down completed.\n"
"Please power this system off now.");

+ /* prevent soft lockup/stalled CPU messages for endless loop. */
+ rcu_sysrq_start();
for (;;);
}

diff --git a/arch/parisc/lib/Makefile b/arch/parisc/lib/Makefile
index 8fa92b8d839a..f2dac4d73b1b 100644
--- a/arch/parisc/lib/Makefile
+++ b/arch/parisc/lib/Makefile
@@ -2,7 +2,7 @@
# Makefile for parisc-specific library files
#

-lib-y := lusercopy.o bitops.o checksum.o io.o memset.o fixup.o memcpy.o \
+lib-y := lusercopy.o bitops.o checksum.o io.o memset.o memcpy.o \
ucmpdi2.o delay.o

obj-y := iomap.o
diff --git a/arch/parisc/lib/fixup.S b/arch/parisc/lib/fixup.S
deleted file mode 100644
index a5b72f22c7a6..000000000000
--- a/arch/parisc/lib/fixup.S
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Linux/PA-RISC Project (http://www.parisc-linux.org/)
- *
- * Copyright (C) 2004 Randolph Chung <tausq@xxxxxxxxxx>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Fixup routines for kernel exception handling.
- */
-#include <asm/asm-offsets.h>
-#include <asm/assembly.h>
-#include <asm/errno.h>
-#include <linux/linkage.h>
-
-#ifdef CONFIG_SMP
- .macro get_fault_ip t1 t2
- loadgp
- addil LT%__per_cpu_offset,%r27
- LDREG RT%__per_cpu_offset(%r1),\t1
- /* t2 = smp_processor_id() */
- mfctl 30,\t2
- ldw TI_CPU(\t2),\t2
-#ifdef CONFIG_64BIT
- extrd,u \t2,63,32,\t2
-#endif
- /* t2 = &__per_cpu_offset[smp_processor_id()]; */
- LDREGX \t2(\t1),\t2
- addil LT%exception_data,%r27
- LDREG RT%exception_data(%r1),\t1
- /* t1 = this_cpu_ptr(&exception_data) */
- add,l \t1,\t2,\t1
- /* %r27 = t1->fault_gp - restore gp */
- LDREG EXCDATA_GP(\t1), %r27
- /* t1 = t1->fault_ip */
- LDREG EXCDATA_IP(\t1), \t1
- .endm
-#else
- .macro get_fault_ip t1 t2
- loadgp
- /* t1 = this_cpu_ptr(&exception_data) */
- addil LT%exception_data,%r27
- LDREG RT%exception_data(%r1),\t2
- /* %r27 = t2->fault_gp - restore gp */
- LDREG EXCDATA_GP(\t2), %r27
- /* t1 = t2->fault_ip */
- LDREG EXCDATA_IP(\t2), \t1
- .endm
-#endif
-
- .level LEVEL
-
- .text
- .section .fixup, "ax"
-
- /* get_user() fixups, store -EFAULT in r8, and 0 in r9 */
-ENTRY_CFI(fixup_get_user_skip_1)
- get_fault_ip %r1,%r8
- ldo 4(%r1), %r1
- ldi -EFAULT, %r8
- bv %r0(%r1)
- copy %r0, %r9
-ENDPROC_CFI(fixup_get_user_skip_1)
-
-ENTRY_CFI(fixup_get_user_skip_2)
- get_fault_ip %r1,%r8
- ldo 8(%r1), %r1
- ldi -EFAULT, %r8
- bv %r0(%r1)
- copy %r0, %r9
-ENDPROC_CFI(fixup_get_user_skip_2)
-
- /* put_user() fixups, store -EFAULT in r8 */
-ENTRY_CFI(fixup_put_user_skip_1)
- get_fault_ip %r1,%r8
- ldo 4(%r1), %r1
- bv %r0(%r1)
- ldi -EFAULT, %r8
-ENDPROC_CFI(fixup_put_user_skip_1)
-
-ENTRY_CFI(fixup_put_user_skip_2)
- get_fault_ip %r1,%r8
- ldo 8(%r1), %r1
- bv %r0(%r1)
- ldi -EFAULT, %r8
-ENDPROC_CFI(fixup_put_user_skip_2)
-
diff --git a/arch/parisc/lib/lusercopy.S b/arch/parisc/lib/lusercopy.S
index 56845de6b5df..f01188c044ee 100644
--- a/arch/parisc/lib/lusercopy.S
+++ b/arch/parisc/lib/lusercopy.S
@@ -5,6 +5,8 @@
* Copyright (C) 2000 Richard Hirst <rhirst with parisc-linux.org>
* Copyright (C) 2001 Matthieu Delahaye <delahaym at esiee.fr>
* Copyright (C) 2003 Randolph Chung <tausq with parisc-linux.org>
+ * Copyright (C) 2017 Helge Deller <deller@xxxxxx>
+ * Copyright (C) 2017 John David Anglin <dave.anglin@xxxxxxxx>
*
*
* This program is free software; you can redistribute it and/or modify
@@ -132,4 +134,320 @@ ENDPROC_CFI(lstrnlen_user)

.procend

+
+
+/*
+ * unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len)
+ *
+ * Inputs:
+ * - sr1 already contains space of source region
+ * - sr2 already contains space of destination region
+ *
+ * Returns:
+ * - number of bytes that could not be copied.
+ * On success, this will be zero.
+ *
+ * This code is based on a C-implementation of a copy routine written by
+ * Randolph Chung, which in turn was derived from the glibc.
+ *
+ * Several strategies are tried to try to get the best performance for various
+ * conditions. In the optimal case, we copy by loops that copy 32- or 16-bytes
+ * at a time using general registers. Unaligned copies are handled either by
+ * aligning the destination and then using shift-and-write method, or in a few
+ * cases by falling back to a byte-at-a-time copy.
+ *
+ * Testing with various alignments and buffer sizes shows that this code is
+ * often >10x faster than a simple byte-at-a-time copy, even for strangely
+ * aligned operands. It is interesting to note that the glibc version of memcpy
+ * (written in C) is actually quite fast already. This routine is able to beat
+ * it by 30-40% for aligned copies because of the loop unrolling, but in some
+ * cases the glibc version is still slightly faster. This lends more
+ * credibility that gcc can generate very good code as long as we are careful.
+ *
+ * Possible optimizations:
+ * - add cache prefetching
+ * - try not to use the post-increment address modifiers; they may create
+ * additional interlocks. Assumption is that those were only efficient on old
+ * machines (pre PA8000 processors)
+ */
+
+ dst = arg0
+ src = arg1
+ len = arg2
+ end = arg3
+ t1 = r19
+ t2 = r20
+ t3 = r21
+ t4 = r22
+ srcspc = sr1
+ dstspc = sr2
+
+ t0 = r1
+ a1 = t1
+ a2 = t2
+ a3 = t3
+ a0 = t4
+
+ save_src = ret0
+ save_dst = ret1
+ save_len = r31
+
+ENTRY_CFI(pa_memcpy)
+ .proc
+ .callinfo NO_CALLS
+ .entry
+
+ /* Last destination address */
+ add dst,len,end
+
+ /* short copy with less than 16 bytes? */
+ cmpib,>>=,n 15,len,.Lbyte_loop
+
+ /* same alignment? */
+ xor src,dst,t0
+ extru t0,31,2,t1
+ cmpib,<>,n 0,t1,.Lunaligned_copy
+
+#ifdef CONFIG_64BIT
+ /* only do 64-bit copies if we can get aligned. */
+ extru t0,31,3,t1
+ cmpib,<>,n 0,t1,.Lalign_loop32
+
+ /* loop until we are 64-bit aligned */
+.Lalign_loop64:
+ extru dst,31,3,t1
+ cmpib,=,n 0,t1,.Lcopy_loop_16
+20: ldb,ma 1(srcspc,src),t1
+21: stb,ma t1,1(dstspc,dst)
+ b .Lalign_loop64
+ ldo -1(len),len
+
+ ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
+ ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
+
+ ldi 31,t0
+.Lcopy_loop_16:
+ cmpb,COND(>>=),n t0,len,.Lword_loop
+
+10: ldd 0(srcspc,src),t1
+11: ldd 8(srcspc,src),t2
+ ldo 16(src),src
+12: std,ma t1,8(dstspc,dst)
+13: std,ma t2,8(dstspc,dst)
+14: ldd 0(srcspc,src),t1
+15: ldd 8(srcspc,src),t2
+ ldo 16(src),src
+16: std,ma t1,8(dstspc,dst)
+17: std,ma t2,8(dstspc,dst)
+
+ ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
+ ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy16_fault)
+ ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done)
+ ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done)
+ ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done)
+ ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy16_fault)
+ ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done)
+ ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done)
+
+ b .Lcopy_loop_16
+ ldo -32(len),len
+
+.Lword_loop:
+ cmpib,COND(>>=),n 3,len,.Lbyte_loop
+20: ldw,ma 4(srcspc,src),t1
+21: stw,ma t1,4(dstspc,dst)
+ b .Lword_loop
+ ldo -4(len),len
+
+ ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
+ ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
+
+#endif /* CONFIG_64BIT */
+
+ /* loop until we are 32-bit aligned */
+.Lalign_loop32:
+ extru dst,31,2,t1
+ cmpib,=,n 0,t1,.Lcopy_loop_4
+20: ldb,ma 1(srcspc,src),t1
+21: stb,ma t1,1(dstspc,dst)
+ b .Lalign_loop32
+ ldo -1(len),len
+
+ ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
+ ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
+
+
+.Lcopy_loop_4:
+ cmpib,COND(>>=),n 15,len,.Lbyte_loop
+
+10: ldw 0(srcspc,src),t1
+11: ldw 4(srcspc,src),t2
+12: stw,ma t1,4(dstspc,dst)
+13: stw,ma t2,4(dstspc,dst)
+14: ldw 8(srcspc,src),t1
+15: ldw 12(srcspc,src),t2
+ ldo 16(src),src
+16: stw,ma t1,4(dstspc,dst)
+17: stw,ma t2,4(dstspc,dst)
+
+ ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
+ ASM_EXCEPTIONTABLE_ENTRY(11b,.Lcopy8_fault)
+ ASM_EXCEPTIONTABLE_ENTRY(12b,.Lcopy_done)
+ ASM_EXCEPTIONTABLE_ENTRY(13b,.Lcopy_done)
+ ASM_EXCEPTIONTABLE_ENTRY(14b,.Lcopy_done)
+ ASM_EXCEPTIONTABLE_ENTRY(15b,.Lcopy8_fault)
+ ASM_EXCEPTIONTABLE_ENTRY(16b,.Lcopy_done)
+ ASM_EXCEPTIONTABLE_ENTRY(17b,.Lcopy_done)
+
+ b .Lcopy_loop_4
+ ldo -16(len),len
+
+.Lbyte_loop:
+ cmpclr,COND(<>) len,%r0,%r0
+ b,n .Lcopy_done
+20: ldb 0(srcspc,src),t1
+ ldo 1(src),src
+21: stb,ma t1,1(dstspc,dst)
+ b .Lbyte_loop
+ ldo -1(len),len
+
+ ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
+ ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
+
+.Lcopy_done:
+ bv %r0(%r2)
+ sub end,dst,ret0
+
+
+ /* src and dst are not aligned the same way. */
+ /* need to go the hard way */
+.Lunaligned_copy:
+ /* align until dst is 32bit-word-aligned */
+ extru dst,31,2,t1
+ cmpib,COND(=),n 0,t1,.Lcopy_dstaligned
+20: ldb 0(srcspc,src),t1
+ ldo 1(src),src
+21: stb,ma t1,1(dstspc,dst)
+ b .Lunaligned_copy
+ ldo -1(len),len
+
+ ASM_EXCEPTIONTABLE_ENTRY(20b,.Lcopy_done)
+ ASM_EXCEPTIONTABLE_ENTRY(21b,.Lcopy_done)
+
+.Lcopy_dstaligned:
+
+ /* store src, dst and len in safe place */
+ copy src,save_src
+ copy dst,save_dst
+ copy len,save_len
+
+ /* len now needs give number of words to copy */
+ SHRREG len,2,len
+
+ /*
+ * Copy from a not-aligned src to an aligned dst using shifts.
+ * Handles 4 words per loop.
+ */
+
+ depw,z src,28,2,t0
+ subi 32,t0,t0
+ mtsar t0
+ extru len,31,2,t0
+ cmpib,= 2,t0,.Lcase2
+ /* Make src aligned by rounding it down. */
+ depi 0,31,2,src
+
+ cmpiclr,<> 3,t0,%r0
+ b,n .Lcase3
+ cmpiclr,<> 1,t0,%r0
+ b,n .Lcase1
+.Lcase0:
+ cmpb,= %r0,len,.Lcda_finish
+ nop
+
+1: ldw,ma 4(srcspc,src), a3
+ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+1: ldw,ma 4(srcspc,src), a0
+ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+ b,n .Ldo3
+.Lcase1:
+1: ldw,ma 4(srcspc,src), a2
+ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+1: ldw,ma 4(srcspc,src), a3
+ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+ ldo -1(len),len
+ cmpb,=,n %r0,len,.Ldo0
+.Ldo4:
+1: ldw,ma 4(srcspc,src), a0
+ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+ shrpw a2, a3, %sar, t0
+1: stw,ma t0, 4(dstspc,dst)
+ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
+.Ldo3:
+1: ldw,ma 4(srcspc,src), a1
+ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+ shrpw a3, a0, %sar, t0
+1: stw,ma t0, 4(dstspc,dst)
+ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
+.Ldo2:
+1: ldw,ma 4(srcspc,src), a2
+ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+ shrpw a0, a1, %sar, t0
+1: stw,ma t0, 4(dstspc,dst)
+ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
+.Ldo1:
+1: ldw,ma 4(srcspc,src), a3
+ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+ shrpw a1, a2, %sar, t0
+1: stw,ma t0, 4(dstspc,dst)
+ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
+ ldo -4(len),len
+ cmpb,<> %r0,len,.Ldo4
+ nop
+.Ldo0:
+ shrpw a2, a3, %sar, t0
+1: stw,ma t0, 4(dstspc,dst)
+ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcopy_done)
+
+.Lcda_rdfault:
+.Lcda_finish:
+ /* calculate new src, dst and len and jump to byte-copy loop */
+ sub dst,save_dst,t0
+ add save_src,t0,src
+ b .Lbyte_loop
+ sub save_len,t0,len
+
+.Lcase3:
+1: ldw,ma 4(srcspc,src), a0
+ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+1: ldw,ma 4(srcspc,src), a1
+ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+ b .Ldo2
+ ldo 1(len),len
+.Lcase2:
+1: ldw,ma 4(srcspc,src), a1
+ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+1: ldw,ma 4(srcspc,src), a2
+ ASM_EXCEPTIONTABLE_ENTRY(1b,.Lcda_rdfault)
+ b .Ldo1
+ ldo 2(len),len
+
+
+ /* fault exception fixup handlers: */
+#ifdef CONFIG_64BIT
+.Lcopy16_fault:
+10: b .Lcopy_done
+ std,ma t1,8(dstspc,dst)
+ ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
+#endif
+
+.Lcopy8_fault:
+10: b .Lcopy_done
+ stw,ma t1,4(dstspc,dst)
+ ASM_EXCEPTIONTABLE_ENTRY(10b,.Lcopy_done)
+
+ .exit
+ENDPROC_CFI(pa_memcpy)
+ .procend
+
.end
diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c
index f82ff10ed974..b3d47ec1d80a 100644
--- a/arch/parisc/lib/memcpy.c
+++ b/arch/parisc/lib/memcpy.c
@@ -2,7 +2,7 @@
* Optimized memory copy routines.
*
* Copyright (C) 2004 Randolph Chung <tausq@xxxxxxxxxx>
- * Copyright (C) 2013 Helge Deller <deller@xxxxxx>
+ * Copyright (C) 2013-2017 Helge Deller <deller@xxxxxx>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -21,474 +21,21 @@
* Portions derived from the GNU C Library
* Copyright (C) 1991, 1997, 2003 Free Software Foundation, Inc.
*
- * Several strategies are tried to try to get the best performance for various
- * conditions. In the optimal case, we copy 64-bytes in an unrolled loop using
- * fp regs. This is followed by loops that copy 32- or 16-bytes at a time using
- * general registers. Unaligned copies are handled either by aligning the
- * destination and then using shift-and-write method, or in a few cases by
- * falling back to a byte-at-a-time copy.
- *
- * I chose to implement this in C because it is easier to maintain and debug,
- * and in my experiments it appears that the C code generated by gcc (3.3/3.4
- * at the time of writing) is fairly optimal. Unfortunately some of the
- * semantics of the copy routine (exception handling) is difficult to express
- * in C, so we have to play some tricks to get it to work.
- *
- * All the loads and stores are done via explicit asm() code in order to use
- * the right space registers.
- *
- * Testing with various alignments and buffer sizes shows that this code is
- * often >10x faster than a simple byte-at-a-time copy, even for strangely
- * aligned operands. It is interesting to note that the glibc version
- * of memcpy (written in C) is actually quite fast already. This routine is
- * able to beat it by 30-40% for aligned copies because of the loop unrolling,
- * but in some cases the glibc version is still slightly faster. This lends
- * more credibility that gcc can generate very good code as long as we are
- * careful.
- *
- * TODO:
- * - cache prefetching needs more experimentation to get optimal settings
- * - try not to use the post-increment address modifiers; they create additional
- * interlocks
- * - replace byte-copy loops with stybs sequences
*/

-#ifdef __KERNEL__
#include <linux/module.h>
#include <linux/compiler.h>
#include <linux/uaccess.h>
-#define s_space "%%sr1"
-#define d_space "%%sr2"
-#else
-#include "memcpy.h"
-#define s_space "%%sr0"
-#define d_space "%%sr0"
-#define pa_memcpy new2_copy
-#endif

DECLARE_PER_CPU(struct exception_data, exception_data);

-#define preserve_branch(label) do { \
- volatile int dummy = 0; \
- /* The following branch is never taken, it's just here to */ \
- /* prevent gcc from optimizing away our exception code. */ \
- if (unlikely(dummy != dummy)) \
- goto label; \
-} while (0)
-
#define get_user_space() (segment_eq(get_fs(), KERNEL_DS) ? 0 : mfsp(3))
#define get_kernel_space() (0)

-#define MERGE(w0, sh_1, w1, sh_2) ({ \
- unsigned int _r; \
- asm volatile ( \
- "mtsar %3\n" \
- "shrpw %1, %2, %%sar, %0\n" \
- : "=r"(_r) \
- : "r"(w0), "r"(w1), "r"(sh_2) \
- ); \
- _r; \
-})
-#define THRESHOLD 16
-
-#ifdef DEBUG_MEMCPY
-#define DPRINTF(fmt, args...) do { printk(KERN_DEBUG "%s:%d:%s ", __FILE__, __LINE__, __func__ ); printk(KERN_DEBUG fmt, ##args ); } while (0)
-#else
-#define DPRINTF(fmt, args...)
-#endif
-
-#define def_load_ai_insn(_insn,_sz,_tt,_s,_a,_t,_e) \
- __asm__ __volatile__ ( \
- "1:\t" #_insn ",ma " #_sz "(" _s ",%1), %0\n\t" \
- ASM_EXCEPTIONTABLE_ENTRY(1b,_e) \
- : _tt(_t), "+r"(_a) \
- : \
- : "r8")
-
-#define def_store_ai_insn(_insn,_sz,_tt,_s,_a,_t,_e) \
- __asm__ __volatile__ ( \
- "1:\t" #_insn ",ma %1, " #_sz "(" _s ",%0)\n\t" \
- ASM_EXCEPTIONTABLE_ENTRY(1b,_e) \
- : "+r"(_a) \
- : _tt(_t) \
- : "r8")
-
-#define ldbma(_s, _a, _t, _e) def_load_ai_insn(ldbs,1,"=r",_s,_a,_t,_e)
-#define stbma(_s, _t, _a, _e) def_store_ai_insn(stbs,1,"r",_s,_a,_t,_e)
-#define ldwma(_s, _a, _t, _e) def_load_ai_insn(ldw,4,"=r",_s,_a,_t,_e)
-#define stwma(_s, _t, _a, _e) def_store_ai_insn(stw,4,"r",_s,_a,_t,_e)
-#define flddma(_s, _a, _t, _e) def_load_ai_insn(fldd,8,"=f",_s,_a,_t,_e)
-#define fstdma(_s, _t, _a, _e) def_store_ai_insn(fstd,8,"f",_s,_a,_t,_e)
-
-#define def_load_insn(_insn,_tt,_s,_o,_a,_t,_e) \
- __asm__ __volatile__ ( \
- "1:\t" #_insn " " #_o "(" _s ",%1), %0\n\t" \
- ASM_EXCEPTIONTABLE_ENTRY(1b,_e) \
- : _tt(_t) \
- : "r"(_a) \
- : "r8")
-
-#define def_store_insn(_insn,_tt,_s,_t,_o,_a,_e) \
- __asm__ __volatile__ ( \
- "1:\t" #_insn " %0, " #_o "(" _s ",%1)\n\t" \
- ASM_EXCEPTIONTABLE_ENTRY(1b,_e) \
- : \
- : _tt(_t), "r"(_a) \
- : "r8")
-
-#define ldw(_s,_o,_a,_t,_e) def_load_insn(ldw,"=r",_s,_o,_a,_t,_e)
-#define stw(_s,_t,_o,_a,_e) def_store_insn(stw,"r",_s,_t,_o,_a,_e)
-
-#ifdef CONFIG_PREFETCH
-static inline void prefetch_src(const void *addr)
-{
- __asm__("ldw 0(" s_space ",%0), %%r0" : : "r" (addr));
-}
-
-static inline void prefetch_dst(const void *addr)
-{
- __asm__("ldd 0(" d_space ",%0), %%r0" : : "r" (addr));
-}
-#else
-#define prefetch_src(addr) do { } while(0)
-#define prefetch_dst(addr) do { } while(0)
-#endif
-
-#define PA_MEMCPY_OK 0
-#define PA_MEMCPY_LOAD_ERROR 1
-#define PA_MEMCPY_STORE_ERROR 2
-
-/* Copy from a not-aligned src to an aligned dst, using shifts. Handles 4 words
- * per loop. This code is derived from glibc.
- */
-static noinline unsigned long copy_dstaligned(unsigned long dst,
- unsigned long src, unsigned long len)
-{
- /* gcc complains that a2 and a3 may be uninitialized, but actually
- * they cannot be. Initialize a2/a3 to shut gcc up.
- */
- register unsigned int a0, a1, a2 = 0, a3 = 0;
- int sh_1, sh_2;
-
- /* prefetch_src((const void *)src); */
-
- /* Calculate how to shift a word read at the memory operation
- aligned srcp to make it aligned for copy. */
- sh_1 = 8 * (src % sizeof(unsigned int));
- sh_2 = 8 * sizeof(unsigned int) - sh_1;
-
- /* Make src aligned by rounding it down. */
- src &= -sizeof(unsigned int);
-
- switch (len % 4)
- {
- case 2:
- /* a1 = ((unsigned int *) src)[0];
- a2 = ((unsigned int *) src)[1]; */
- ldw(s_space, 0, src, a1, cda_ldw_exc);
- ldw(s_space, 4, src, a2, cda_ldw_exc);
- src -= 1 * sizeof(unsigned int);
- dst -= 3 * sizeof(unsigned int);
- len += 2;
- goto do1;
- case 3:
- /* a0 = ((unsigned int *) src)[0];
- a1 = ((unsigned int *) src)[1]; */
- ldw(s_space, 0, src, a0, cda_ldw_exc);
- ldw(s_space, 4, src, a1, cda_ldw_exc);
- src -= 0 * sizeof(unsigned int);
- dst -= 2 * sizeof(unsigned int);
- len += 1;
- goto do2;
- case 0:
- if (len == 0)
- return PA_MEMCPY_OK;
- /* a3 = ((unsigned int *) src)[0];
- a0 = ((unsigned int *) src)[1]; */
- ldw(s_space, 0, src, a3, cda_ldw_exc);
- ldw(s_space, 4, src, a0, cda_ldw_exc);
- src -=-1 * sizeof(unsigned int);
- dst -= 1 * sizeof(unsigned int);
- len += 0;
- goto do3;
- case 1:
- /* a2 = ((unsigned int *) src)[0];
- a3 = ((unsigned int *) src)[1]; */
- ldw(s_space, 0, src, a2, cda_ldw_exc);
- ldw(s_space, 4, src, a3, cda_ldw_exc);
- src -=-2 * sizeof(unsigned int);
- dst -= 0 * sizeof(unsigned int);
- len -= 1;
- if (len == 0)
- goto do0;
- goto do4; /* No-op. */
- }
-
- do
- {
- /* prefetch_src((const void *)(src + 4 * sizeof(unsigned int))); */
-do4:
- /* a0 = ((unsigned int *) src)[0]; */
- ldw(s_space, 0, src, a0, cda_ldw_exc);
- /* ((unsigned int *) dst)[0] = MERGE (a2, sh_1, a3, sh_2); */
- stw(d_space, MERGE (a2, sh_1, a3, sh_2), 0, dst, cda_stw_exc);
-do3:
- /* a1 = ((unsigned int *) src)[1]; */
- ldw(s_space, 4, src, a1, cda_ldw_exc);
- /* ((unsigned int *) dst)[1] = MERGE (a3, sh_1, a0, sh_2); */
- stw(d_space, MERGE (a3, sh_1, a0, sh_2), 4, dst, cda_stw_exc);
-do2:
- /* a2 = ((unsigned int *) src)[2]; */
- ldw(s_space, 8, src, a2, cda_ldw_exc);
- /* ((unsigned int *) dst)[2] = MERGE (a0, sh_1, a1, sh_2); */
- stw(d_space, MERGE (a0, sh_1, a1, sh_2), 8, dst, cda_stw_exc);
-do1:
- /* a3 = ((unsigned int *) src)[3]; */
- ldw(s_space, 12, src, a3, cda_ldw_exc);
- /* ((unsigned int *) dst)[3] = MERGE (a1, sh_1, a2, sh_2); */
- stw(d_space, MERGE (a1, sh_1, a2, sh_2), 12, dst, cda_stw_exc);
-
- src += 4 * sizeof(unsigned int);
- dst += 4 * sizeof(unsigned int);
- len -= 4;
- }
- while (len != 0);
-
-do0:
- /* ((unsigned int *) dst)[0] = MERGE (a2, sh_1, a3, sh_2); */
- stw(d_space, MERGE (a2, sh_1, a3, sh_2), 0, dst, cda_stw_exc);
-
- preserve_branch(handle_load_error);
- preserve_branch(handle_store_error);
-
- return PA_MEMCPY_OK;
-
-handle_load_error:
- __asm__ __volatile__ ("cda_ldw_exc:\n");
- return PA_MEMCPY_LOAD_ERROR;
-
-handle_store_error:
- __asm__ __volatile__ ("cda_stw_exc:\n");
- return PA_MEMCPY_STORE_ERROR;
-}
-
-
-/* Returns PA_MEMCPY_OK, PA_MEMCPY_LOAD_ERROR or PA_MEMCPY_STORE_ERROR.
- * In case of an access fault the faulty address can be read from the per_cpu
- * exception data struct. */
-static noinline unsigned long pa_memcpy_internal(void *dstp, const void *srcp,
- unsigned long len)
-{
- register unsigned long src, dst, t1, t2, t3;
- register unsigned char *pcs, *pcd;
- register unsigned int *pws, *pwd;
- register double *pds, *pdd;
- unsigned long ret;
-
- src = (unsigned long)srcp;
- dst = (unsigned long)dstp;
- pcs = (unsigned char *)srcp;
- pcd = (unsigned char *)dstp;
-
- /* prefetch_src((const void *)srcp); */
-
- if (len < THRESHOLD)
- goto byte_copy;
-
- /* Check alignment */
- t1 = (src ^ dst);
- if (unlikely(t1 & (sizeof(double)-1)))
- goto unaligned_copy;
-
- /* src and dst have same alignment. */
-
- /* Copy bytes till we are double-aligned. */
- t2 = src & (sizeof(double) - 1);
- if (unlikely(t2 != 0)) {
- t2 = sizeof(double) - t2;
- while (t2 && len) {
- /* *pcd++ = *pcs++; */
- ldbma(s_space, pcs, t3, pmc_load_exc);
- len--;
- stbma(d_space, t3, pcd, pmc_store_exc);
- t2--;
- }
- }
-
- pds = (double *)pcs;
- pdd = (double *)pcd;
-
-#if 0
- /* Copy 8 doubles at a time */
- while (len >= 8*sizeof(double)) {
- register double r1, r2, r3, r4, r5, r6, r7, r8;
- /* prefetch_src((char *)pds + L1_CACHE_BYTES); */
- flddma(s_space, pds, r1, pmc_load_exc);
- flddma(s_space, pds, r2, pmc_load_exc);
- flddma(s_space, pds, r3, pmc_load_exc);
- flddma(s_space, pds, r4, pmc_load_exc);
- fstdma(d_space, r1, pdd, pmc_store_exc);
- fstdma(d_space, r2, pdd, pmc_store_exc);
- fstdma(d_space, r3, pdd, pmc_store_exc);
- fstdma(d_space, r4, pdd, pmc_store_exc);
-
-#if 0
- if (L1_CACHE_BYTES <= 32)
- prefetch_src((char *)pds + L1_CACHE_BYTES);
-#endif
- flddma(s_space, pds, r5, pmc_load_exc);
- flddma(s_space, pds, r6, pmc_load_exc);
- flddma(s_space, pds, r7, pmc_load_exc);
- flddma(s_space, pds, r8, pmc_load_exc);
- fstdma(d_space, r5, pdd, pmc_store_exc);
- fstdma(d_space, r6, pdd, pmc_store_exc);
- fstdma(d_space, r7, pdd, pmc_store_exc);
- fstdma(d_space, r8, pdd, pmc_store_exc);
- len -= 8*sizeof(double);
- }
-#endif
-
- pws = (unsigned int *)pds;
- pwd = (unsigned int *)pdd;
-
-word_copy:
- while (len >= 8*sizeof(unsigned int)) {
- register unsigned int r1,r2,r3,r4,r5,r6,r7,r8;
- /* prefetch_src((char *)pws + L1_CACHE_BYTES); */
- ldwma(s_space, pws, r1, pmc_load_exc);
- ldwma(s_space, pws, r2, pmc_load_exc);
- ldwma(s_space, pws, r3, pmc_load_exc);
- ldwma(s_space, pws, r4, pmc_load_exc);
- stwma(d_space, r1, pwd, pmc_store_exc);
- stwma(d_space, r2, pwd, pmc_store_exc);
- stwma(d_space, r3, pwd, pmc_store_exc);
- stwma(d_space, r4, pwd, pmc_store_exc);
-
- ldwma(s_space, pws, r5, pmc_load_exc);
- ldwma(s_space, pws, r6, pmc_load_exc);
- ldwma(s_space, pws, r7, pmc_load_exc);
- ldwma(s_space, pws, r8, pmc_load_exc);
- stwma(d_space, r5, pwd, pmc_store_exc);
- stwma(d_space, r6, pwd, pmc_store_exc);
- stwma(d_space, r7, pwd, pmc_store_exc);
- stwma(d_space, r8, pwd, pmc_store_exc);
- len -= 8*sizeof(unsigned int);
- }
-
- while (len >= 4*sizeof(unsigned int)) {
- register unsigned int r1,r2,r3,r4;
- ldwma(s_space, pws, r1, pmc_load_exc);
- ldwma(s_space, pws, r2, pmc_load_exc);
- ldwma(s_space, pws, r3, pmc_load_exc);
- ldwma(s_space, pws, r4, pmc_load_exc);
- stwma(d_space, r1, pwd, pmc_store_exc);
- stwma(d_space, r2, pwd, pmc_store_exc);
- stwma(d_space, r3, pwd, pmc_store_exc);
- stwma(d_space, r4, pwd, pmc_store_exc);
- len -= 4*sizeof(unsigned int);
- }
-
- pcs = (unsigned char *)pws;
- pcd = (unsigned char *)pwd;
-
-byte_copy:
- while (len) {
- /* *pcd++ = *pcs++; */
- ldbma(s_space, pcs, t3, pmc_load_exc);
- stbma(d_space, t3, pcd, pmc_store_exc);
- len--;
- }
-
- return PA_MEMCPY_OK;
-
-unaligned_copy:
- /* possibly we are aligned on a word, but not on a double... */
- if (likely((t1 & (sizeof(unsigned int)-1)) == 0)) {
- t2 = src & (sizeof(unsigned int) - 1);
-
- if (unlikely(t2 != 0)) {
- t2 = sizeof(unsigned int) - t2;
- while (t2) {
- /* *pcd++ = *pcs++; */
- ldbma(s_space, pcs, t3, pmc_load_exc);
- stbma(d_space, t3, pcd, pmc_store_exc);
- len--;
- t2--;
- }
- }
-
- pws = (unsigned int *)pcs;
- pwd = (unsigned int *)pcd;
- goto word_copy;
- }
-
- /* Align the destination. */
- if (unlikely((dst & (sizeof(unsigned int) - 1)) != 0)) {
- t2 = sizeof(unsigned int) - (dst & (sizeof(unsigned int) - 1));
- while (t2) {
- /* *pcd++ = *pcs++; */
- ldbma(s_space, pcs, t3, pmc_load_exc);
- stbma(d_space, t3, pcd, pmc_store_exc);
- len--;
- t2--;
- }
- dst = (unsigned long)pcd;
- src = (unsigned long)pcs;
- }
-
- ret = copy_dstaligned(dst, src, len / sizeof(unsigned int));
- if (ret)
- return ret;
-
- pcs += (len & -sizeof(unsigned int));
- pcd += (len & -sizeof(unsigned int));
- len %= sizeof(unsigned int);
-
- preserve_branch(handle_load_error);
- preserve_branch(handle_store_error);
-
- goto byte_copy;
-
-handle_load_error:
- __asm__ __volatile__ ("pmc_load_exc:\n");
- return PA_MEMCPY_LOAD_ERROR;
-
-handle_store_error:
- __asm__ __volatile__ ("pmc_store_exc:\n");
- return PA_MEMCPY_STORE_ERROR;
-}
-
-
/* Returns 0 for success, otherwise, returns number of bytes not transferred. */
-static unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len)
-{
- unsigned long ret, fault_addr, reference;
- struct exception_data *d;
-
- ret = pa_memcpy_internal(dstp, srcp, len);
- if (likely(ret == PA_MEMCPY_OK))
- return 0;
-
- /* if a load or store fault occured we can get the faulty addr */
- d = this_cpu_ptr(&exception_data);
- fault_addr = d->fault_addr;
-
- /* error in load or store? */
- if (ret == PA_MEMCPY_LOAD_ERROR)
- reference = (unsigned long) srcp;
- else
- reference = (unsigned long) dstp;
+extern unsigned long pa_memcpy(void *dst, const void *src,
+ unsigned long len);

- DPRINTF("pa_memcpy: fault type = %lu, len=%lu fault_addr=%lu ref=%lu\n",
- ret, len, fault_addr, reference);
-
- if (fault_addr >= reference)
- return len - (fault_addr - reference);
- else
- return len;
-}
-
-#ifdef __KERNEL__
unsigned long __copy_to_user(void __user *dst, const void *src,
unsigned long len)
{
@@ -537,5 +84,3 @@ long probe_kernel_read(void *dst, const void *src, size_t size)

return __probe_kernel_read(dst, src, size);
}
-
-#endif
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index 1a0b4f63f0e9..040c48fc5391 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -149,6 +149,23 @@ int fixup_exception(struct pt_regs *regs)
d->fault_space = regs->isr;
d->fault_addr = regs->ior;

+ /*
+ * Fix up get_user() and put_user().
+ * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() sets the least-significant
+ * bit in the relative address of the fixup routine to indicate
+ * that %r8 should be loaded with -EFAULT to report a userspace
+ * access error.
+ */
+ if (fix->fixup & 1) {
+ regs->gr[8] = -EFAULT;
+
+ /* zero target register for get_user() */
+ if (parisc_acctyp(0, regs->iir) == VM_READ) {
+ int treg = regs->iir & 0x1f;
+ regs->gr[treg] = 0;
+ }
+ }
+
regs->iaoq[0] = (unsigned long)&fix->fixup + fix->fixup;
regs->iaoq[0] &= ~3;
/*
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c989e67dcc9d..9764463ce833 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -10027,7 +10027,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 exec_control;
- bool nested_ept_enabled = false;

vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
@@ -10192,7 +10191,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vmcs12->guest_intr_status);
}

- nested_ept_enabled = (exec_control & SECONDARY_EXEC_ENABLE_EPT) != 0;
vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
}

@@ -10344,7 +10342,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vmx_set_efer(vcpu, vcpu->arch.efer);

/* Shadow page tables on either EPT or shadow page tables. */
- if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_ept_enabled,
+ if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12),
entry_failure_code))
return 1;

diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 779782f58324..9a53a06e5a3e 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -290,7 +290,7 @@ EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled)
_ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail)
_ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail)
_ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail)
- _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
+ _ASM_EXTABLE_FAULT(.L_cache_w2, .L_memcpy_mcsafe_fail)
_ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
_ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail)
_ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail)
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index 887e57182716..aed206475aa7 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -48,7 +48,7 @@ static const unsigned long vaddr_start = __PAGE_OFFSET_BASE;
#if defined(CONFIG_X86_ESPFIX64)
static const unsigned long vaddr_end = ESPFIX_BASE_ADDR;
#elif defined(CONFIG_EFI)
-static const unsigned long vaddr_end = EFI_VA_START;
+static const unsigned long vaddr_end = EFI_VA_END;
#else
static const unsigned long vaddr_end = __START_KERNEL_map;
#endif
@@ -105,7 +105,7 @@ void __init kernel_randomize_memory(void)
*/
BUILD_BUG_ON(vaddr_start >= vaddr_end);
BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_ESPFIX64) &&
- vaddr_end >= EFI_VA_START);
+ vaddr_end >= EFI_VA_END);
BUILD_BUG_ON((IS_ENABLED(CONFIG_X86_ESPFIX64) ||
IS_ENABLED(CONFIG_EFI)) &&
vaddr_end >= __START_KERNEL_map);
diff --git a/block/bio.c b/block/bio.c
index 2b375020fc49..17ece5b40a2f 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -376,10 +376,14 @@ static void punt_bios_to_rescuer(struct bio_set *bs)
bio_list_init(&punt);
bio_list_init(&nopunt);

- while ((bio = bio_list_pop(current->bio_list)))
+ while ((bio = bio_list_pop(&current->bio_list[0])))
bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
+ current->bio_list[0] = nopunt;

- *current->bio_list = nopunt;
+ bio_list_init(&nopunt);
+ while ((bio = bio_list_pop(&current->bio_list[1])))
+ bio_list_add(bio->bi_pool == bs ? &punt : &nopunt, bio);
+ current->bio_list[1] = nopunt;

spin_lock(&bs->rescue_lock);
bio_list_merge(&bs->rescue_list, &punt);
@@ -466,7 +470,9 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
* we retry with the original gfp_flags.
*/

- if (current->bio_list && !bio_list_empty(current->bio_list))
+ if (current->bio_list &&
+ (!bio_list_empty(&current->bio_list[0]) ||
+ !bio_list_empty(&current->bio_list[1])))
gfp_mask &= ~__GFP_DIRECT_RECLAIM;

p = mempool_alloc(bs->bio_pool, gfp_mask);
diff --git a/block/blk-core.c b/block/blk-core.c
index 61ba08c58b64..9734b5d0d932 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1977,7 +1977,14 @@ generic_make_request_checks(struct bio *bio)
*/
blk_qc_t generic_make_request(struct bio *bio)
{
- struct bio_list bio_list_on_stack;
+ /*
+ * bio_list_on_stack[0] contains bios submitted by the current
+ * make_request_fn.
+ * bio_list_on_stack[1] contains bios that were submitted before
+ * the current make_request_fn, but that haven't been processed
+ * yet.
+ */
+ struct bio_list bio_list_on_stack[2];
blk_qc_t ret = BLK_QC_T_NONE;

if (!generic_make_request_checks(bio))
@@ -1994,7 +2001,7 @@ blk_qc_t generic_make_request(struct bio *bio)
* should be added at the tail
*/
if (current->bio_list) {
- bio_list_add(current->bio_list, bio);
+ bio_list_add(&current->bio_list[0], bio);
goto out;
}

@@ -2013,23 +2020,39 @@ blk_qc_t generic_make_request(struct bio *bio)
* bio_list, and call into ->make_request() again.
*/
BUG_ON(bio->bi_next);
- bio_list_init(&bio_list_on_stack);
- current->bio_list = &bio_list_on_stack;
+ bio_list_init(&bio_list_on_stack[0]);
+ current->bio_list = bio_list_on_stack;
do {
struct request_queue *q = bdev_get_queue(bio->bi_bdev);

if (likely(blk_queue_enter(q, false) == 0)) {
+ struct bio_list lower, same;
+
+ /* Create a fresh bio_list for all subordinate requests */
+ bio_list_on_stack[1] = bio_list_on_stack[0];
+ bio_list_init(&bio_list_on_stack[0]);
ret = q->make_request_fn(q, bio);

blk_queue_exit(q);

- bio = bio_list_pop(current->bio_list);
+ /* sort new bios into those for a lower level
+ * and those for the same level
+ */
+ bio_list_init(&lower);
+ bio_list_init(&same);
+ while ((bio = bio_list_pop(&bio_list_on_stack[0])) != NULL)
+ if (q == bdev_get_queue(bio->bi_bdev))
+ bio_list_add(&same, bio);
+ else
+ bio_list_add(&lower, bio);
+ /* now assemble so we handle the lowest level first */
+ bio_list_merge(&bio_list_on_stack[0], &lower);
+ bio_list_merge(&bio_list_on_stack[0], &same);
+ bio_list_merge(&bio_list_on_stack[0], &bio_list_on_stack[1]);
} else {
- struct bio *bio_next = bio_list_pop(current->bio_list);
-
bio_io_error(bio);
- bio = bio_next;
}
+ bio = bio_list_pop(&bio_list_on_stack[0]);
} while (bio);
current->bio_list = NULL; /* deactivate */

diff --git a/crypto/lrw.c b/crypto/lrw.c
index ecd8474018e3..3ea095adafd9 100644
--- a/crypto/lrw.c
+++ b/crypto/lrw.c
@@ -286,8 +286,11 @@ static int init_crypt(struct skcipher_request *req, crypto_completion_t done)

subreq->cryptlen = LRW_BUFFER_SIZE;
if (req->cryptlen > LRW_BUFFER_SIZE) {
- subreq->cryptlen = min(req->cryptlen, (unsigned)PAGE_SIZE);
- rctx->ext = kmalloc(subreq->cryptlen, gfp);
+ unsigned int n = min(req->cryptlen, (unsigned int)PAGE_SIZE);
+
+ rctx->ext = kmalloc(n, gfp);
+ if (rctx->ext)
+ subreq->cryptlen = n;
}

rctx->src = req->src;
diff --git a/crypto/xts.c b/crypto/xts.c
index baeb34dd8582..c976bfac29da 100644
--- a/crypto/xts.c
+++ b/crypto/xts.c
@@ -230,8 +230,11 @@ static int init_crypt(struct skcipher_request *req, crypto_completion_t done)

subreq->cryptlen = XTS_BUFFER_SIZE;
if (req->cryptlen > XTS_BUFFER_SIZE) {
- subreq->cryptlen = min(req->cryptlen, (unsigned)PAGE_SIZE);
- rctx->ext = kmalloc(subreq->cryptlen, gfp);
+ unsigned int n = min(req->cryptlen, (unsigned int)PAGE_SIZE);
+
+ rctx->ext = kmalloc(n, gfp);
+ if (rctx->ext)
+ subreq->cryptlen = n;
}

rctx->src = req->src;
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index 9ed087853dee..4c5678cfa9c4 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -2,7 +2,6 @@
# Makefile for the Linux ACPI interpreter
#

-ccflags-y := -Os
ccflags-$(CONFIG_ACPI_DEBUG) += -DACPI_DEBUG_OUTPUT

#
diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c
index b4c1a6a51da4..03250e1f1103 100644
--- a/drivers/acpi/acpi_platform.c
+++ b/drivers/acpi/acpi_platform.c
@@ -25,9 +25,11 @@
ACPI_MODULE_NAME("platform");

static const struct acpi_device_id forbidden_id_list[] = {
- {"PNP0000", 0}, /* PIC */
- {"PNP0100", 0}, /* Timer */
- {"PNP0200", 0}, /* AT DMA Controller */
+ {"PNP0000", 0}, /* PIC */
+ {"PNP0100", 0}, /* Timer */
+ {"PNP0200", 0}, /* AT DMA Controller */
+ {"ACPI0009", 0}, /* IOxAPIC */
+ {"ACPI000A", 0}, /* IOAPIC */
{"", 0},
};

diff --git a/drivers/crypto/ccp/ccp-dev-v5.c b/drivers/crypto/ccp/ccp-dev-v5.c
index 612898b4aaad..3422f203455d 100644
--- a/drivers/crypto/ccp/ccp-dev-v5.c
+++ b/drivers/crypto/ccp/ccp-dev-v5.c
@@ -1014,6 +1014,7 @@ const struct ccp_vdata ccpv5a = {

const struct ccp_vdata ccpv5b = {
.version = CCP_VERSION(5, 0),
+ .dma_chan_attr = DMA_PRIVATE,
.setup = ccp5other_config,
.perform = &ccp5_actions,
.bar = 2,
diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h
index 649e5610a5ce..cd9a7051da3c 100644
--- a/drivers/crypto/ccp/ccp-dev.h
+++ b/drivers/crypto/ccp/ccp-dev.h
@@ -179,6 +179,10 @@

/* ------------------------ General CCP Defines ------------------------ */

+#define CCP_DMA_DFLT 0x0
+#define CCP_DMA_PRIV 0x1
+#define CCP_DMA_PUB 0x2
+
#define CCP_DMAPOOL_MAX_SIZE 64
#define CCP_DMAPOOL_ALIGN BIT(5)

@@ -635,6 +639,7 @@ struct ccp_actions {
/* Structure to hold CCP version-specific values */
struct ccp_vdata {
const unsigned int version;
+ const unsigned int dma_chan_attr;
void (*setup)(struct ccp_device *);
const struct ccp_actions *perform;
const unsigned int bar;
diff --git a/drivers/crypto/ccp/ccp-dmaengine.c b/drivers/crypto/ccp/ccp-dmaengine.c
index 8d0eeb46d4a2..e00be01fbf5a 100644
--- a/drivers/crypto/ccp/ccp-dmaengine.c
+++ b/drivers/crypto/ccp/ccp-dmaengine.c
@@ -10,6 +10,7 @@
* published by the Free Software Foundation.
*/

+#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/dmaengine.h>
#include <linux/spinlock.h>
@@ -25,6 +26,37 @@
(mask == 0) ? 64 : fls64(mask); \
})

+/* The CCP as a DMA provider can be configured for public or private
+ * channels. Default is specified in the vdata for the device (PCI ID).
+ * This module parameter will override for all channels on all devices:
+ * dma_chan_attr = 0x2 to force all channels public
+ * = 0x1 to force all channels private
+ * = 0x0 to defer to the vdata setting
+ * = any other value: warning, revert to 0x0
+ */
+static unsigned int dma_chan_attr = CCP_DMA_DFLT;
+module_param(dma_chan_attr, uint, 0444);
+MODULE_PARM_DESC(dma_chan_attr, "Set DMA channel visibility: 0 (default) = device defaults, 1 = make private, 2 = make public");
+
+unsigned int ccp_get_dma_chan_attr(struct ccp_device *ccp)
+{
+ switch (dma_chan_attr) {
+ case CCP_DMA_DFLT:
+ return ccp->vdata->dma_chan_attr;
+
+ case CCP_DMA_PRIV:
+ return DMA_PRIVATE;
+
+ case CCP_DMA_PUB:
+ return 0;
+
+ default:
+ dev_info_once(ccp->dev, "Invalid value for dma_chan_attr: %d\n",
+ dma_chan_attr);
+ return ccp->vdata->dma_chan_attr;
+ }
+}
+
static void ccp_free_cmd_resources(struct ccp_device *ccp,
struct list_head *list)
{
@@ -675,6 +707,15 @@ int ccp_dmaengine_register(struct ccp_device *ccp)
dma_cap_set(DMA_SG, dma_dev->cap_mask);
dma_cap_set(DMA_INTERRUPT, dma_dev->cap_mask);

+ /* The DMA channels for this device can be set to public or private,
+ * and overridden by the module parameter dma_chan_attr.
+ * Default: according to the value in vdata (dma_chan_attr=0)
+ * dma_chan_attr=0x1: all channels private (override vdata)
+ * dma_chan_attr=0x2: all channels public (override vdata)
+ */
+ if (ccp_get_dma_chan_attr(ccp) == DMA_PRIVATE)
+ dma_cap_set(DMA_PRIVATE, dma_dev->cap_mask);
+
INIT_LIST_HEAD(&dma_dev->channels);
for (i = 0; i < ccp->cmd_q_count; i++) {
chan = ccp->ccp_dma_chan + i;
diff --git a/drivers/gpu/drm/armada/Makefile b/drivers/gpu/drm/armada/Makefile
index a18f156c8b66..64c0b4546fb2 100644
--- a/drivers/gpu/drm/armada/Makefile
+++ b/drivers/gpu/drm/armada/Makefile
@@ -4,3 +4,5 @@ armada-y += armada_510.o
armada-$(CONFIG_DEBUG_FS) += armada_debugfs.o

obj-$(CONFIG_DRM_ARMADA) := armada.o
+
+CFLAGS_armada_trace.o := -I$(src)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index 0a67124bb2a4..db0a43a090d0 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -1303,6 +1303,8 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
goto out_pm_put;
}

+ mutex_lock(&gpu->lock);
+
fence = etnaviv_gpu_fence_alloc(gpu);
if (!fence) {
event_free(gpu, event);
@@ -1310,8 +1312,6 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
goto out_pm_put;
}

- mutex_lock(&gpu->lock);
-
gpu->event[event].fence = fence;
submit->fence = fence->seqno;
gpu->active_fence = submit->fence;
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index 3f656e3a6e5a..325cb9b55989 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -1334,6 +1334,7 @@ static int kvmgt_guest_init(struct mdev_device *mdev)
vgpu->handle = (unsigned long)info;
info->vgpu = vgpu;
info->kvm = kvm;
+ kvm_get_kvm(info->kvm);

kvmgt_protect_table_init(info);
gvt_cache_init(vgpu);
@@ -1353,6 +1354,7 @@ static bool kvmgt_guest_exit(struct kvmgt_guest_info *info)
}

kvm_page_track_unregister_notifier(info->kvm, &info->track_node);
+ kvm_put_kvm(info->kvm);
kvmgt_protect_table_destroy(info);
gvt_cache_destroy(info->vgpu);
vfree(info);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index b4bde1452f2a..6924a8e79da9 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -735,10 +735,9 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
GEM_BUG_ON(pte_end > GEN8_PTES);

bitmap_clear(pt->used_ptes, pte, num_entries);
-
- if (bitmap_empty(pt->used_ptes, GEN8_PTES)) {
- free_pt(to_i915(vm->dev), pt);
- return true;
+ if (USES_FULL_PPGTT(vm->i915)) {
+ if (bitmap_empty(pt->used_ptes, GEN8_PTES))
+ return true;
}

pt_vaddr = kmap_px(pt);
@@ -775,13 +774,12 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
pde_vaddr = kmap_px(pd);
pde_vaddr[pde] = scratch_pde;
kunmap_px(ppgtt, pde_vaddr);
+ free_pt(to_i915(vm->dev), pt);
}
}

- if (bitmap_empty(pd->used_pdes, I915_PDES)) {
- free_pd(to_i915(vm->dev), pd);
+ if (bitmap_empty(pd->used_pdes, I915_PDES))
return true;
- }

return false;
}
@@ -795,7 +793,6 @@ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
uint64_t length)
{
struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
- struct drm_i915_private *dev_priv = to_i915(vm->dev);
struct i915_page_directory *pd;
uint64_t pdpe;
gen8_ppgtt_pdpe_t *pdpe_vaddr;
@@ -813,16 +810,14 @@ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
pdpe_vaddr[pdpe] = scratch_pdpe;
kunmap_px(ppgtt, pdpe_vaddr);
}
+ free_pd(to_i915(vm->dev), pd);
}
}

mark_tlbs_dirty(ppgtt);

- if (USES_FULL_48BIT_PPGTT(dev_priv) &&
- bitmap_empty(pdp->used_pdpes, I915_PDPES_PER_PDP(dev_priv))) {
- free_pdp(dev_priv, pdp);
+ if (bitmap_empty(pdp->used_pdpes, I915_PDPES_PER_PDP(dev_priv)))
return true;
- }

return false;
}
@@ -836,6 +831,7 @@ static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm,
uint64_t start,
uint64_t length)
{
+ struct drm_i915_private *dev_priv = to_i915(vm->dev);
struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
struct i915_page_directory_pointer *pdp;
uint64_t pml4e;
@@ -854,6 +850,7 @@ static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm,
pml4e_vaddr = kmap_px(pml4);
pml4e_vaddr[pml4e] = scratch_pml4e;
kunmap_px(ppgtt, pml4e_vaddr);
+ free_pdp(dev_priv, pdp);
}
}
}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index beabc17e7c8a..2af4522d60e6 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -362,7 +362,8 @@ execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state)
static u64 execlists_update_context(struct drm_i915_gem_request *rq)
{
struct intel_context *ce = &rq->ctx->engine[rq->engine->id];
- struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
+ struct i915_hw_ppgtt *ppgtt =
+ rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
u32 *reg_state = ce->lrc_reg_state;

reg_state[CTX_RING_TAIL+1] = rq->tail;
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 0cf03ccbf0a7..445a907552c1 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -213,8 +213,8 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo,
rbo->placement.num_busy_placement = 0;
for (i = 0; i < rbo->placement.num_placement; i++) {
if (rbo->placements[i].flags & TTM_PL_FLAG_VRAM) {
- if (rbo->placements[0].fpfn < fpfn)
- rbo->placements[0].fpfn = fpfn;
+ if (rbo->placements[i].fpfn < fpfn)
+ rbo->placements[i].fpfn = fpfn;
} else {
rbo->placement.busy_placement =
&rbo->placements[i];
diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
index 7aadce1f7e7a..c7e6c9839c9a 100644
--- a/drivers/gpu/drm/vc4/vc4_crtc.c
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -842,6 +842,17 @@ static void vc4_crtc_destroy_state(struct drm_crtc *crtc,
drm_atomic_helper_crtc_destroy_state(crtc, state);
}

+static void
+vc4_crtc_reset(struct drm_crtc *crtc)
+{
+ if (crtc->state)
+ __drm_atomic_helper_crtc_destroy_state(crtc->state);
+
+ crtc->state = kzalloc(sizeof(struct vc4_crtc_state), GFP_KERNEL);
+ if (crtc->state)
+ crtc->state->crtc = crtc;
+}
+
static const struct drm_crtc_funcs vc4_crtc_funcs = {
.set_config = drm_atomic_helper_set_config,
.destroy = vc4_crtc_destroy,
@@ -849,7 +860,7 @@ static const struct drm_crtc_funcs vc4_crtc_funcs = {
.set_property = NULL,
.cursor_set = NULL, /* handled by drm_mode_cursor_universal */
.cursor_move = NULL, /* handled by drm_mode_cursor_universal */
- .reset = drm_atomic_helper_crtc_reset,
+ .reset = vc4_crtc_reset,
.atomic_duplicate_state = vc4_crtc_duplicate_state,
.atomic_destroy_state = vc4_crtc_destroy_state,
.gamma_set = vc4_crtc_gamma_set,
diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c
index 8aeca038cc73..5f282bb0ea10 100644
--- a/drivers/hid/wacom_sys.c
+++ b/drivers/hid/wacom_sys.c
@@ -2081,6 +2081,14 @@ static int wacom_parse_and_register(struct wacom *wacom, bool wireless)

wacom_update_name(wacom, wireless ? " (WL)" : "");

+ /* pen only Bamboo neither support touch nor pad */
+ if ((features->type == BAMBOO_PEN) &&
+ ((features->device_type & WACOM_DEVICETYPE_TOUCH) ||
+ (features->device_type & WACOM_DEVICETYPE_PAD))) {
+ error = -ENODEV;
+ goto fail;
+ }
+
error = wacom_add_shared_data(hdev);
if (error)
goto fail;
@@ -2128,14 +2136,6 @@ static int wacom_parse_and_register(struct wacom *wacom, bool wireless)
goto fail_quirks;
}

- /* pen only Bamboo neither support touch nor pad */
- if ((features->type == BAMBOO_PEN) &&
- ((features->device_type & WACOM_DEVICETYPE_TOUCH) ||
- (features->device_type & WACOM_DEVICETYPE_PAD))) {
- error = -ENODEV;
- goto fail_quirks;
- }
-
if (features->device_type & WACOM_DEVICETYPE_WL_MONITOR)
error = hid_hw_open(hdev);

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 0ff5469c03d2..b78bc2916664 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -986,26 +986,29 @@ static void flush_current_bio_list(struct blk_plug_cb *cb, bool from_schedule)
struct dm_offload *o = container_of(cb, struct dm_offload, cb);
struct bio_list list;
struct bio *bio;
+ int i;

INIT_LIST_HEAD(&o->cb.list);

if (unlikely(!current->bio_list))
return;

- list = *current->bio_list;
- bio_list_init(current->bio_list);
-
- while ((bio = bio_list_pop(&list))) {
- struct bio_set *bs = bio->bi_pool;
- if (unlikely(!bs) || bs == fs_bio_set) {
- bio_list_add(current->bio_list, bio);
- continue;
+ for (i = 0; i < 2; i++) {
+ list = current->bio_list[i];
+ bio_list_init(&current->bio_list[i]);
+
+ while ((bio = bio_list_pop(&list))) {
+ struct bio_set *bs = bio->bi_pool;
+ if (unlikely(!bs) || bs == fs_bio_set) {
+ bio_list_add(&current->bio_list[i], bio);
+ continue;
+ }
+
+ spin_lock(&bs->rescue_lock);
+ bio_list_add(&bs->rescue_list, bio);
+ queue_work(bs->rescue_workqueue, &bs->rescue_work);
+ spin_unlock(&bs->rescue_lock);
}
-
- spin_lock(&bs->rescue_lock);
- bio_list_add(&bs->rescue_list, bio);
- queue_work(bs->rescue_workqueue, &bs->rescue_work);
- spin_unlock(&bs->rescue_lock);
}
}

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 87f14080c2cd..41693890e2b8 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -974,7 +974,8 @@ static void wait_barrier(struct r10conf *conf)
!conf->barrier ||
(atomic_read(&conf->nr_pending) &&
current->bio_list &&
- !bio_list_empty(current->bio_list)),
+ (!bio_list_empty(&current->bio_list[0]) ||
+ !bio_list_empty(&current->bio_list[1]))),
conf->resync_lock);
conf->nr_waiting--;
if (!conf->nr_waiting)
diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c
index 7fd964256faa..d5430ed02a67 100644
--- a/drivers/mmc/host/sdhci-of-at91.c
+++ b/drivers/mmc/host/sdhci-of-at91.c
@@ -29,6 +29,8 @@

#include "sdhci-pltfm.h"

+#define SDMMC_MC1R 0x204
+#define SDMMC_MC1R_DDR BIT(3)
#define SDMMC_CACR 0x230
#define SDMMC_CACR_CAPWREN BIT(0)
#define SDMMC_CACR_KEY (0x46 << 8)
@@ -103,11 +105,18 @@ static void sdhci_at91_set_power(struct sdhci_host *host, unsigned char mode,
sdhci_set_power_noreg(host, mode, vdd);
}

+void sdhci_at91_set_uhs_signaling(struct sdhci_host *host, unsigned int timing)
+{
+ if (timing == MMC_TIMING_MMC_DDR52)
+ sdhci_writeb(host, SDMMC_MC1R_DDR, SDMMC_MC1R);
+ sdhci_set_uhs_signaling(host, timing);
+}
+
static const struct sdhci_ops sdhci_at91_sama5d2_ops = {
.set_clock = sdhci_at91_set_clock,
.set_bus_width = sdhci_set_bus_width,
.reset = sdhci_reset,
- .set_uhs_signaling = sdhci_set_uhs_signaling,
+ .set_uhs_signaling = sdhci_at91_set_uhs_signaling,
.set_power = sdhci_at91_set_power,
};

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index d0819d18ad08..d2a4adc50a84 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1830,6 +1830,9 @@ static void sdhci_enable_sdio_irq(struct mmc_host *mmc, int enable)
struct sdhci_host *host = mmc_priv(mmc);
unsigned long flags;

+ if (enable)
+ pm_runtime_get_noresume(host->mmc->parent);
+
spin_lock_irqsave(&host->lock, flags);
if (enable)
host->flags |= SDHCI_SDIO_IRQ_ENABLED;
@@ -1838,6 +1841,9 @@ static void sdhci_enable_sdio_irq(struct mmc_host *mmc, int enable)

sdhci_enable_sdio_irq_nolock(host, enable);
spin_unlock_irqrestore(&host->lock, flags);
+
+ if (!enable)
+ pm_runtime_put_noidle(host->mmc->parent);
}

static int sdhci_start_signal_voltage_switch(struct mmc_host *mmc,
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 8a3c3e32a704..3818ff609d55 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2034,9 +2034,9 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
* Revalidating a dead namespace sets capacity to 0. This will
* end buffered writers dirtying pages that can't be synced.
*/
- if (ns->disk && !test_and_set_bit(NVME_NS_DEAD, &ns->flags))
- revalidate_disk(ns->disk);
-
+ if (!ns->disk || test_and_set_bit(NVME_NS_DEAD, &ns->flags))
+ continue;
+ revalidate_disk(ns->disk);
blk_set_queue_dying(ns->queue);
blk_mq_abort_requeue_list(ns->queue);
blk_mq_start_stopped_hw_queues(ns->queue, true);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 3faefabf339c..410c3d15b0cb 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1990,8 +1990,10 @@ static void nvme_remove(struct pci_dev *pdev)

pci_set_drvdata(pdev, NULL);

- if (!pci_device_is_present(pdev))
+ if (!pci_device_is_present(pdev)) {
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD);
+ nvme_dev_disable(dev, false);
+ }

flush_work(&dev->reset_work);
nvme_uninit_ctrl(&dev->ctrl);
diff --git a/drivers/pci/host/pci-thunder-pem.c b/drivers/pci/host/pci-thunder-pem.c
index af722eb0ca75..e354010fb006 100644
--- a/drivers/pci/host/pci-thunder-pem.c
+++ b/drivers/pci/host/pci-thunder-pem.c
@@ -331,7 +331,7 @@ static int thunder_pem_acpi_init(struct pci_config_window *cfg)
if (!res_pem)
return -ENOMEM;

- ret = acpi_get_rc_resources(dev, "THRX0002", root->segment, res_pem);
+ ret = acpi_get_rc_resources(dev, "CAVA02B", root->segment, res_pem);
if (ret) {
dev_err(dev, "can't get rc base address\n");
return ret;
diff --git a/drivers/pci/host/pcie-iproc-bcma.c b/drivers/pci/host/pcie-iproc-bcma.c
index bd4c9ec25edc..384c27e664fe 100644
--- a/drivers/pci/host/pcie-iproc-bcma.c
+++ b/drivers/pci/host/pcie-iproc-bcma.c
@@ -44,8 +44,7 @@ static int iproc_pcie_bcma_probe(struct bcma_device *bdev)
{
struct device *dev = &bdev->dev;
struct iproc_pcie *pcie;
- LIST_HEAD(res);
- struct resource res_mem;
+ LIST_HEAD(resources);
int ret;

pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL);
@@ -63,22 +62,23 @@ static int iproc_pcie_bcma_probe(struct bcma_device *bdev)

pcie->base_addr = bdev->addr;

- res_mem.start = bdev->addr_s[0];
- res_mem.end = bdev->addr_s[0] + SZ_128M - 1;
- res_mem.name = "PCIe MEM space";
- res_mem.flags = IORESOURCE_MEM;
- pci_add_resource(&res, &res_mem);
+ pcie->mem.start = bdev->addr_s[0];
+ pcie->mem.end = bdev->addr_s[0] + SZ_128M - 1;
+ pcie->mem.name = "PCIe MEM space";
+ pcie->mem.flags = IORESOURCE_MEM;
+ pci_add_resource(&resources, &pcie->mem);

pcie->map_irq = iproc_pcie_bcma_map_irq;

- ret = iproc_pcie_setup(pcie, &res);
- if (ret)
+ ret = iproc_pcie_setup(pcie, &resources);
+ if (ret) {
dev_err(dev, "PCIe controller setup failed\n");
-
- pci_free_resource_list(&res);
+ pci_free_resource_list(&resources);
+ return ret;
+ }

bcma_set_drvdata(bdev, pcie);
- return ret;
+ return 0;
}

static void iproc_pcie_bcma_remove(struct bcma_device *bdev)
diff --git a/drivers/pci/host/pcie-iproc-platform.c b/drivers/pci/host/pcie-iproc-platform.c
index 22d814a78a78..f95564ac37df 100644
--- a/drivers/pci/host/pcie-iproc-platform.c
+++ b/drivers/pci/host/pcie-iproc-platform.c
@@ -52,7 +52,7 @@ static int iproc_pcie_pltfm_probe(struct platform_device *pdev)
struct device_node *np = dev->of_node;
struct resource reg;
resource_size_t iobase = 0;
- LIST_HEAD(res);
+ LIST_HEAD(resources);
int ret;

of_id = of_match_device(iproc_pcie_of_match_table, dev);
@@ -101,10 +101,10 @@ static int iproc_pcie_pltfm_probe(struct platform_device *pdev)
pcie->phy = NULL;
}

- ret = of_pci_get_host_bridge_resources(np, 0, 0xff, &res, &iobase);
+ ret = of_pci_get_host_bridge_resources(np, 0, 0xff, &resources,
+ &iobase);
if (ret) {
- dev_err(dev,
- "unable to get PCI host bridge resources\n");
+ dev_err(dev, "unable to get PCI host bridge resources\n");
return ret;
}

@@ -117,14 +117,15 @@ static int iproc_pcie_pltfm_probe(struct platform_device *pdev)
pcie->map_irq = of_irq_parse_and_map_pci;
}

- ret = iproc_pcie_setup(pcie, &res);
- if (ret)
+ ret = iproc_pcie_setup(pcie, &resources);
+ if (ret) {
dev_err(dev, "PCIe controller setup failed\n");
-
- pci_free_resource_list(&res);
+ pci_free_resource_list(&resources);
+ return ret;
+ }

platform_set_drvdata(pdev, pcie);
- return ret;
+ return 0;
}

static int iproc_pcie_pltfm_remove(struct platform_device *pdev)
diff --git a/drivers/pci/host/pcie-iproc.h b/drivers/pci/host/pcie-iproc.h
index 04fed8e907f1..0bbe2ea44f3e 100644
--- a/drivers/pci/host/pcie-iproc.h
+++ b/drivers/pci/host/pcie-iproc.h
@@ -90,6 +90,7 @@ struct iproc_pcie {
#ifdef CONFIG_ARM
struct pci_sys_data sysdata;
#endif
+ struct resource mem;
struct pci_bus *root_bus;
struct phy *phy;
int (*map_irq)(const struct pci_dev *, u8, u8);
diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
index d704752b6332..6021cb9ea910 100644
--- a/drivers/scsi/device_handler/scsi_dh_alua.c
+++ b/drivers/scsi/device_handler/scsi_dh_alua.c
@@ -113,7 +113,7 @@ struct alua_queue_data {
#define ALUA_POLICY_SWITCH_ALL 1

static void alua_rtpg_work(struct work_struct *work);
-static void alua_rtpg_queue(struct alua_port_group *pg,
+static bool alua_rtpg_queue(struct alua_port_group *pg,
struct scsi_device *sdev,
struct alua_queue_data *qdata, bool force);
static void alua_check(struct scsi_device *sdev, bool force);
@@ -866,7 +866,13 @@ static void alua_rtpg_work(struct work_struct *work)
kref_put(&pg->kref, release_port_group);
}

-static void alua_rtpg_queue(struct alua_port_group *pg,
+/**
+ * alua_rtpg_queue() - cause RTPG to be submitted asynchronously
+ *
+ * Returns true if and only if alua_rtpg_work() will be called asynchronously.
+ * That function is responsible for calling @qdata->fn().
+ */
+static bool alua_rtpg_queue(struct alua_port_group *pg,
struct scsi_device *sdev,
struct alua_queue_data *qdata, bool force)
{
@@ -874,8 +880,8 @@ static void alua_rtpg_queue(struct alua_port_group *pg,
unsigned long flags;
struct workqueue_struct *alua_wq = kaluad_wq;

- if (!pg)
- return;
+ if (!pg || scsi_device_get(sdev))
+ return false;

spin_lock_irqsave(&pg->lock, flags);
if (qdata) {
@@ -888,14 +894,12 @@ static void alua_rtpg_queue(struct alua_port_group *pg,
pg->flags |= ALUA_PG_RUN_RTPG;
kref_get(&pg->kref);
pg->rtpg_sdev = sdev;
- scsi_device_get(sdev);
start_queue = 1;
} else if (!(pg->flags & ALUA_PG_RUN_RTPG) && force) {
pg->flags |= ALUA_PG_RUN_RTPG;
/* Do not queue if the worker is already running */
if (!(pg->flags & ALUA_PG_RUNNING)) {
kref_get(&pg->kref);
- sdev = NULL;
start_queue = 1;
}
}
@@ -904,13 +908,17 @@ static void alua_rtpg_queue(struct alua_port_group *pg,
alua_wq = kaluad_sync_wq;
spin_unlock_irqrestore(&pg->lock, flags);

- if (start_queue &&
- !queue_delayed_work(alua_wq, &pg->rtpg_work,
- msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS))) {
- if (sdev)
- scsi_device_put(sdev);
- kref_put(&pg->kref, release_port_group);
+ if (start_queue) {
+ if (queue_delayed_work(alua_wq, &pg->rtpg_work,
+ msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS)))
+ sdev = NULL;
+ else
+ kref_put(&pg->kref, release_port_group);
}
+ if (sdev)
+ scsi_device_put(sdev);
+
+ return true;
}

/*
@@ -1011,11 +1019,13 @@ static int alua_activate(struct scsi_device *sdev,
mutex_unlock(&h->init_mutex);
goto out;
}
- fn = NULL;
rcu_read_unlock();
mutex_unlock(&h->init_mutex);

- alua_rtpg_queue(pg, sdev, qdata, true);
+ if (alua_rtpg_queue(pg, sdev, qdata, true))
+ fn = NULL;
+ else
+ err = SCSI_DH_DEV_OFFLINED;
kref_put(&pg->kref, release_port_group);
out:
if (fn)
diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
index 763f012fdeca..87f5e694dbed 100644
--- a/drivers/scsi/libsas/sas_ata.c
+++ b/drivers/scsi/libsas/sas_ata.c
@@ -221,7 +221,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc)
task->num_scatter = qc->n_elem;
} else {
for_each_sg(qc->sg, sg, qc->n_elem, si)
- xfer += sg->length;
+ xfer += sg_dma_len(sg);

task->total_xfer_len = xfer;
task->num_scatter = si;
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 121de0aaa6ad..f753df25ba34 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -998,6 +998,8 @@ sg_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg)
result = get_user(val, ip);
if (result)
return result;
+ if (val > SG_MAX_CDB_SIZE)
+ return -ENOMEM;
sfp->next_cmd_len = (val > 0) ? val : 0;
return 0;
case SG_GET_VERSION_NUM:
diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c
index fabbe76203bb..4d079cdaa7a3 100644
--- a/drivers/tty/serial/atmel_serial.c
+++ b/drivers/tty/serial/atmel_serial.c
@@ -1938,6 +1938,11 @@ static void atmel_flush_buffer(struct uart_port *port)
atmel_uart_writel(port, ATMEL_PDC_TCR, 0);
atmel_port->pdc_tx.ofs = 0;
}
+ /*
+ * in uart_flush_buffer(), the xmit circular buffer has just
+ * been cleared, so we have to reset tx_len accordingly.
+ */
+ atmel_port->tx_len = 0;
}

/*
@@ -2471,6 +2476,9 @@ static void atmel_console_write(struct console *co, const char *s, u_int count)
pdc_tx = atmel_uart_readl(port, ATMEL_PDC_PTSR) & ATMEL_PDC_TXTEN;
atmel_uart_writel(port, ATMEL_PDC_PTCR, ATMEL_PDC_TXTDIS);

+ /* Make sure that tx path is actually able to send characters */
+ atmel_uart_writel(port, ATMEL_US_CR, ATMEL_US_TXEN);
+
uart_console_write(port, s, count, atmel_console_putchar);

/*
diff --git a/drivers/tty/serial/mxs-auart.c b/drivers/tty/serial/mxs-auart.c
index 8c1c9112b3fd..181972b03845 100644
--- a/drivers/tty/serial/mxs-auart.c
+++ b/drivers/tty/serial/mxs-auart.c
@@ -1085,7 +1085,7 @@ static void mxs_auart_settermios(struct uart_port *u,
AUART_LINECTRL_BAUD_DIV_MAX);
baud_max = u->uartclk * 32 / AUART_LINECTRL_BAUD_DIV_MIN;
baud = uart_get_baud_rate(u, termios, old, baud_min, baud_max);
- div = u->uartclk * 32 / baud;
+ div = DIV_ROUND_CLOSEST(u->uartclk * 32, baud);
}

ctrl |= AUART_LINECTRL_BAUD_DIVFRAC(div & 0x3F);
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 479e223f9cff..f029aad67183 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -520,8 +520,10 @@ static int rh_call_control (struct usb_hcd *hcd, struct urb *urb)
*/
tbuf_size = max_t(u16, sizeof(struct usb_hub_descriptor), wLength);
tbuf = kzalloc(tbuf_size, GFP_KERNEL);
- if (!tbuf)
- return -ENOMEM;
+ if (!tbuf) {
+ status = -ENOMEM;
+ goto err_alloc;
+ }

bufp = tbuf;

@@ -734,6 +736,7 @@ static int rh_call_control (struct usb_hcd *hcd, struct urb *urb)
}

kfree(tbuf);
+ err_alloc:

/* any errors get returned through the urb completion */
spin_lock_irq(&hcd_root_hub_lock);
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index e32029a31ca4..4c101f4161f8 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -2000,6 +2000,9 @@ static int process_ctrl_td(struct xhci_hcd *xhci, struct xhci_td *td,
case TRB_NORMAL:
td->urb->actual_length = requested - remaining;
goto finish_td;
+ case TRB_STATUS:
+ td->urb->actual_length = requested;
+ goto finish_td;
default:
xhci_warn(xhci, "WARN: unexpected TRB Type %d\n",
trb_type);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 0a436c4a28ad..2c48e2528600 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2550,17 +2550,14 @@ static void nfs41_check_delegation_stateid(struct nfs4_state *state)
}

nfs4_stateid_copy(&stateid, &delegation->stateid);
- if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) {
+ if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags) ||
+ !test_and_clear_bit(NFS_DELEGATION_TEST_EXPIRED,
+ &delegation->flags)) {
rcu_read_unlock();
nfs_finish_clear_delegation_stateid(state, &stateid);
return;
}

- if (!test_and_clear_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags)) {
- rcu_read_unlock();
- return;
- }
-
cred = get_rpccred(delegation->cred);
rcu_read_unlock();
status = nfs41_test_and_free_expired_stateid(server, &stateid, cred);
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 010aff5c5a79..536009e50387 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -790,6 +790,7 @@ nfserrno (int errno)
{ nfserr_serverfault, -ESERVERFAULT },
{ nfserr_serverfault, -ENFILE },
{ nfserr_io, -EUCLEAN },
+ { nfserr_perm, -ENOKEY },
};
int i;

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index bfc00de5c6f1..3365ecb9074d 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -769,8 +769,8 @@ xfs_bmap_extents_to_btree(
args.type = XFS_ALLOCTYPE_START_BNO;
args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
} else if (dfops->dop_low) {
-try_another_ag:
args.type = XFS_ALLOCTYPE_START_BNO;
+try_another_ag:
args.fsbno = *firstblock;
} else {
args.type = XFS_ALLOCTYPE_NEAR_BNO;
@@ -796,17 +796,19 @@ xfs_bmap_extents_to_btree(
if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) &&
args.fsbno == NULLFSBLOCK &&
args.type == XFS_ALLOCTYPE_NEAR_BNO) {
- dfops->dop_low = true;
+ args.type = XFS_ALLOCTYPE_FIRST_AG;
goto try_another_ag;
}
+ if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
+ xfs_iroot_realloc(ip, -1, whichfork);
+ xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
+ return -ENOSPC;
+ }
/*
* Allocation can't fail, the space was reserved.
*/
- ASSERT(args.fsbno != NULLFSBLOCK);
ASSERT(*firstblock == NULLFSBLOCK ||
- args.agno == XFS_FSB_TO_AGNO(mp, *firstblock) ||
- (dfops->dop_low &&
- args.agno > XFS_FSB_TO_AGNO(mp, *firstblock)));
+ args.agno >= XFS_FSB_TO_AGNO(mp, *firstblock));
*firstblock = cur->bc_private.b.firstblock = args.fsbno;
cur->bc_private.b.allocated++;
ip->i_d.di_nblocks++;
@@ -1278,7 +1280,6 @@ xfs_bmap_read_extents(
/* REFERENCED */
xfs_extnum_t room; /* number of entries there's room for */

- bno = NULLFSBLOCK;
mp = ip->i_mount;
ifp = XFS_IFORK_PTR(ip, whichfork);
exntf = (whichfork != XFS_DATA_FORK) ? XFS_EXTFMT_NOSTATE :
@@ -1291,9 +1292,7 @@ xfs_bmap_read_extents(
ASSERT(level > 0);
pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
bno = be64_to_cpu(*pp);
- ASSERT(bno != NULLFSBLOCK);
- ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
- ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
+
/*
* Go down the tree until leaf level is reached, following the first
* pointer (leftmost) at each level.
@@ -1864,6 +1863,7 @@ xfs_bmap_add_extent_delay_real(
*/
trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
xfs_bmbt_set_startblock(ep, new->br_startblock);
+ xfs_bmbt_set_state(ep, new->br_state);
trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);

(*nextents)++;
@@ -2202,6 +2202,7 @@ STATIC int /* error */
xfs_bmap_add_extent_unwritten_real(
struct xfs_trans *tp,
xfs_inode_t *ip, /* incore inode pointer */
+ int whichfork,
xfs_extnum_t *idx, /* extent number to update/insert */
xfs_btree_cur_t **curp, /* if *curp is null, not a btree */
xfs_bmbt_irec_t *new, /* new data to add to file extents */
@@ -2221,12 +2222,14 @@ xfs_bmap_add_extent_unwritten_real(
/* left is 0, right is 1, prev is 2 */
int rval=0; /* return value (logging flags) */
int state = 0;/* state bits, accessed thru macros */
- struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_mount *mp = ip->i_mount;

*logflagsp = 0;

cur = *curp;
- ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+ ifp = XFS_IFORK_PTR(ip, whichfork);
+ if (whichfork == XFS_COW_FORK)
+ state |= BMAP_COWFORK;

ASSERT(*idx >= 0);
ASSERT(*idx <= xfs_iext_count(ifp));
@@ -2285,7 +2288,7 @@ xfs_bmap_add_extent_unwritten_real(
* Don't set contiguous if the combined extent would be too large.
* Also check for all-three-contiguous being too large.
*/
- if (*idx < xfs_iext_count(&ip->i_df) - 1) {
+ if (*idx < xfs_iext_count(ifp) - 1) {
state |= BMAP_RIGHT_VALID;
xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT);
if (isnullstartblock(RIGHT.br_startblock))
@@ -2325,7 +2328,8 @@ xfs_bmap_add_extent_unwritten_real(
trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);

xfs_iext_remove(ip, *idx + 1, 2, state);
- ip->i_d.di_nextents -= 2;
+ XFS_IFORK_NEXT_SET(ip, whichfork,
+ XFS_IFORK_NEXTENTS(ip, whichfork) - 2);
if (cur == NULL)
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
else {
@@ -2368,7 +2372,8 @@ xfs_bmap_add_extent_unwritten_real(
trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);

xfs_iext_remove(ip, *idx + 1, 1, state);
- ip->i_d.di_nextents--;
+ XFS_IFORK_NEXT_SET(ip, whichfork,
+ XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
if (cur == NULL)
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
else {
@@ -2403,7 +2408,8 @@ xfs_bmap_add_extent_unwritten_real(
xfs_bmbt_set_state(ep, newext);
trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
xfs_iext_remove(ip, *idx + 1, 1, state);
- ip->i_d.di_nextents--;
+ XFS_IFORK_NEXT_SET(ip, whichfork,
+ XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
if (cur == NULL)
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
else {
@@ -2515,7 +2521,8 @@ xfs_bmap_add_extent_unwritten_real(
trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);

xfs_iext_insert(ip, *idx, 1, new, state);
- ip->i_d.di_nextents++;
+ XFS_IFORK_NEXT_SET(ip, whichfork,
+ XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
if (cur == NULL)
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
else {
@@ -2593,7 +2600,8 @@ xfs_bmap_add_extent_unwritten_real(
++*idx;
xfs_iext_insert(ip, *idx, 1, new, state);

- ip->i_d.di_nextents++;
+ XFS_IFORK_NEXT_SET(ip, whichfork,
+ XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
if (cur == NULL)
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
else {
@@ -2641,7 +2649,8 @@ xfs_bmap_add_extent_unwritten_real(
++*idx;
xfs_iext_insert(ip, *idx, 2, &r[0], state);

- ip->i_d.di_nextents += 2;
+ XFS_IFORK_NEXT_SET(ip, whichfork,
+ XFS_IFORK_NEXTENTS(ip, whichfork) + 2);
if (cur == NULL)
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
else {
@@ -2695,17 +2704,17 @@ xfs_bmap_add_extent_unwritten_real(
}

/* update reverse mappings */
- error = xfs_rmap_convert_extent(mp, dfops, ip, XFS_DATA_FORK, new);
+ error = xfs_rmap_convert_extent(mp, dfops, ip, whichfork, new);
if (error)
goto done;

/* convert to a btree if necessary */
- if (xfs_bmap_needs_btree(ip, XFS_DATA_FORK)) {
+ if (xfs_bmap_needs_btree(ip, whichfork)) {
int tmp_logflags; /* partial log flag return val */

ASSERT(cur == NULL);
error = xfs_bmap_extents_to_btree(tp, ip, first, dfops, &cur,
- 0, &tmp_logflags, XFS_DATA_FORK);
+ 0, &tmp_logflags, whichfork);
*logflagsp |= tmp_logflags;
if (error)
goto done;
@@ -2717,7 +2726,7 @@ xfs_bmap_add_extent_unwritten_real(
*curp = cur;
}

- xfs_bmap_check_leaf_extents(*curp, ip, XFS_DATA_FORK);
+ xfs_bmap_check_leaf_extents(*curp, ip, whichfork);
done:
*logflagsp |= rval;
return error;
@@ -2809,7 +2818,8 @@ xfs_bmap_add_extent_hole_delay(
oldlen = startblockval(left.br_startblock) +
startblockval(new->br_startblock) +
startblockval(right.br_startblock);
- newlen = xfs_bmap_worst_indlen(ip, temp);
+ newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
+ oldlen);
xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx),
nullstartblock((int)newlen));
trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
@@ -2830,7 +2840,8 @@ xfs_bmap_add_extent_hole_delay(
xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp);
oldlen = startblockval(left.br_startblock) +
startblockval(new->br_startblock);
- newlen = xfs_bmap_worst_indlen(ip, temp);
+ newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
+ oldlen);
xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx),
nullstartblock((int)newlen));
trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
@@ -2846,7 +2857,8 @@ xfs_bmap_add_extent_hole_delay(
temp = new->br_blockcount + right.br_blockcount;
oldlen = startblockval(new->br_startblock) +
startblockval(right.br_startblock);
- newlen = xfs_bmap_worst_indlen(ip, temp);
+ newlen = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
+ oldlen);
xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx),
new->br_startoff,
nullstartblock((int)newlen), temp, right.br_state);
@@ -3822,17 +3834,13 @@ xfs_bmap_btalloc(
* the first block that was allocated.
*/
ASSERT(*ap->firstblock == NULLFSBLOCK ||
- XFS_FSB_TO_AGNO(mp, *ap->firstblock) ==
- XFS_FSB_TO_AGNO(mp, args.fsbno) ||
- (ap->dfops->dop_low &&
- XFS_FSB_TO_AGNO(mp, *ap->firstblock) <
- XFS_FSB_TO_AGNO(mp, args.fsbno)));
+ XFS_FSB_TO_AGNO(mp, *ap->firstblock) <=
+ XFS_FSB_TO_AGNO(mp, args.fsbno));

ap->blkno = args.fsbno;
if (*ap->firstblock == NULLFSBLOCK)
*ap->firstblock = args.fsbno;
- ASSERT(nullfb || fb_agno == args.agno ||
- (ap->dfops->dop_low && fb_agno < args.agno));
+ ASSERT(nullfb || fb_agno <= args.agno);
ap->length = args.len;
if (!(ap->flags & XFS_BMAPI_COWFORK))
ap->ip->i_d.di_nblocks += args.len;
@@ -4156,6 +4164,19 @@ xfs_bmapi_read(
return 0;
}

+/*
+ * Add a delayed allocation extent to an inode. Blocks are reserved from the
+ * global pool and the extent inserted into the inode in-core extent tree.
+ *
+ * On entry, got refers to the first extent beyond the offset of the extent to
+ * allocate or eof is specified if no such extent exists. On return, got refers
+ * to the extent record that was inserted to the inode fork.
+ *
+ * Note that the allocated extent may have been merged with contiguous extents
+ * during insertion into the inode fork. Thus, got does not reflect the current
+ * state of the inode fork on return. If necessary, the caller can use lastx to
+ * look up the updated record in the inode fork.
+ */
int
xfs_bmapi_reserve_delalloc(
struct xfs_inode *ip,
@@ -4242,13 +4263,8 @@ xfs_bmapi_reserve_delalloc(
got->br_startblock = nullstartblock(indlen);
got->br_blockcount = alen;
got->br_state = XFS_EXT_NORM;
- xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got);

- /*
- * Update our extent pointer, given that xfs_bmap_add_extent_hole_delay
- * might have merged it into one of the neighbouring ones.
- */
- xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got);
+ xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got);

/*
* Tag the inode if blocks were preallocated. Note that COW fork
@@ -4260,10 +4276,6 @@ xfs_bmapi_reserve_delalloc(
if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len))
xfs_inode_set_cowblocks_tag(ip);

- ASSERT(got->br_startoff <= aoff);
- ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen);
- ASSERT(isnullstartblock(got->br_startblock));
- ASSERT(got->br_state == XFS_EXT_NORM);
return 0;

out_unreserve_blocks:
@@ -4368,10 +4380,16 @@ xfs_bmapi_allocate(
bma->got.br_state = XFS_EXT_NORM;

/*
- * A wasdelay extent has been initialized, so shouldn't be flagged
- * as unwritten.
+ * In the data fork, a wasdelay extent has been initialized, so
+ * shouldn't be flagged as unwritten.
+ *
+ * For the cow fork, however, we convert delalloc reservations
+ * (extents allocated for speculative preallocation) to
+ * allocated unwritten extents, and only convert the unwritten
+ * extents to real extents when we're about to write the data.
*/
- if (!bma->wasdel && (bma->flags & XFS_BMAPI_PREALLOC) &&
+ if ((!bma->wasdel || (bma->flags & XFS_BMAPI_COWFORK)) &&
+ (bma->flags & XFS_BMAPI_PREALLOC) &&
xfs_sb_version_hasextflgbit(&mp->m_sb))
bma->got.br_state = XFS_EXT_UNWRITTEN;

@@ -4422,8 +4440,6 @@ xfs_bmapi_convert_unwritten(
(XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT))
return 0;

- ASSERT(whichfork != XFS_COW_FORK);
-
/*
* Modify (by adding) the state flag, if writing.
*/
@@ -4448,8 +4464,8 @@ xfs_bmapi_convert_unwritten(
return error;
}

- error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx,
- &bma->cur, mval, bma->firstblock, bma->dfops,
+ error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, whichfork,
+ &bma->idx, &bma->cur, mval, bma->firstblock, bma->dfops,
&tmp_logflags);
/*
* Log the inode core unconditionally in the unwritten extent conversion
@@ -4458,8 +4474,12 @@ xfs_bmapi_convert_unwritten(
* in the transaction for the sake of fsync(), even if nothing has
* changed, because fsync() will not force the log for this transaction
* unless it sees the inode pinned.
+ *
+ * Note: If we're only converting cow fork extents, there aren't
+ * any on-disk updates to make, so we don't need to log anything.
*/
- bma->logflags |= tmp_logflags | XFS_ILOG_CORE;
+ if (whichfork != XFS_COW_FORK)
+ bma->logflags |= tmp_logflags | XFS_ILOG_CORE;
if (error)
return error;

@@ -4533,15 +4553,15 @@ xfs_bmapi_write(
ASSERT(*nmap >= 1);
ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
ASSERT(!(flags & XFS_BMAPI_IGSTATE));
- ASSERT(tp != NULL);
+ ASSERT(tp != NULL ||
+ (flags & (XFS_BMAPI_CONVERT | XFS_BMAPI_COWFORK)) ==
+ (XFS_BMAPI_CONVERT | XFS_BMAPI_COWFORK));
ASSERT(len > 0);
ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL);
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
ASSERT(!(flags & XFS_BMAPI_REMAP) || whichfork == XFS_DATA_FORK);
ASSERT(!(flags & XFS_BMAPI_PREALLOC) || !(flags & XFS_BMAPI_REMAP));
ASSERT(!(flags & XFS_BMAPI_CONVERT) || !(flags & XFS_BMAPI_REMAP));
- ASSERT(!(flags & XFS_BMAPI_PREALLOC) || whichfork != XFS_COW_FORK);
- ASSERT(!(flags & XFS_BMAPI_CONVERT) || whichfork != XFS_COW_FORK);

/* zeroing is for currently only for data extents, not metadata */
ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) !=
@@ -4746,13 +4766,9 @@ xfs_bmapi_write(
if (bma.cur) {
if (!error) {
ASSERT(*firstblock == NULLFSBLOCK ||
- XFS_FSB_TO_AGNO(mp, *firstblock) ==
+ XFS_FSB_TO_AGNO(mp, *firstblock) <=
XFS_FSB_TO_AGNO(mp,
- bma.cur->bc_private.b.firstblock) ||
- (dfops->dop_low &&
- XFS_FSB_TO_AGNO(mp, *firstblock) <
- XFS_FSB_TO_AGNO(mp,
- bma.cur->bc_private.b.firstblock)));
+ bma.cur->bc_private.b.firstblock));
*firstblock = bma.cur->bc_private.b.firstblock;
}
xfs_btree_del_cursor(bma.cur,
@@ -4787,34 +4803,59 @@ xfs_bmap_split_indlen(
xfs_filblks_t len2 = *indlen2;
xfs_filblks_t nres = len1 + len2; /* new total res. */
xfs_filblks_t stolen = 0;
+ xfs_filblks_t resfactor;

/*
* Steal as many blocks as we can to try and satisfy the worst case
* indlen for both new extents.
*/
- while (nres > ores && avail) {
- nres--;
- avail--;
- stolen++;
- }
+ if (ores < nres && avail)
+ stolen = XFS_FILBLKS_MIN(nres - ores, avail);
+ ores += stolen;
+
+ /* nothing else to do if we've satisfied the new reservation */
+ if (ores >= nres)
+ return stolen;
+
+ /*
+ * We can't meet the total required reservation for the two extents.
+ * Calculate the percent of the overall shortage between both extents
+ * and apply this percentage to each of the requested indlen values.
+ * This distributes the shortage fairly and reduces the chances that one
+ * of the two extents is left with nothing when extents are repeatedly
+ * split.
+ */
+ resfactor = (ores * 100);
+ do_div(resfactor, nres);
+ len1 *= resfactor;
+ do_div(len1, 100);
+ len2 *= resfactor;
+ do_div(len2, 100);
+ ASSERT(len1 + len2 <= ores);
+ ASSERT(len1 < *indlen1 && len2 < *indlen2);

/*
- * The only blocks available are those reserved for the original
- * extent and what we can steal from the extent being removed.
- * If this still isn't enough to satisfy the combined
- * requirements for the two new extents, skim blocks off of each
- * of the new reservations until they match what is available.
+ * Hand out the remainder to each extent. If one of the two reservations
+ * is zero, we want to make sure that one gets a block first. The loop
+ * below starts with len1, so hand len2 a block right off the bat if it
+ * is zero.
*/
- while (nres > ores) {
- if (len1) {
- len1--;
- nres--;
+ ores -= (len1 + len2);
+ ASSERT((*indlen1 - len1) + (*indlen2 - len2) >= ores);
+ if (ores && !len2 && *indlen2) {
+ len2++;
+ ores--;
+ }
+ while (ores) {
+ if (len1 < *indlen1) {
+ len1++;
+ ores--;
}
- if (nres == ores)
+ if (!ores)
break;
- if (len2) {
- len2--;
- nres--;
+ if (len2 < *indlen2) {
+ len2++;
+ ores--;
}
}

@@ -5556,8 +5597,8 @@ __xfs_bunmapi(
}
del.br_state = XFS_EXT_UNWRITTEN;
error = xfs_bmap_add_extent_unwritten_real(tp, ip,
- &lastx, &cur, &del, firstblock, dfops,
- &logflags);
+ whichfork, &lastx, &cur, &del,
+ firstblock, dfops, &logflags);
if (error)
goto error0;
goto nodelete;
@@ -5610,8 +5651,9 @@ __xfs_bunmapi(
prev.br_state = XFS_EXT_UNWRITTEN;
lastx--;
error = xfs_bmap_add_extent_unwritten_real(tp,
- ip, &lastx, &cur, &prev,
- firstblock, dfops, &logflags);
+ ip, whichfork, &lastx, &cur,
+ &prev, firstblock, dfops,
+ &logflags);
if (error)
goto error0;
goto nodelete;
@@ -5619,8 +5661,9 @@ __xfs_bunmapi(
ASSERT(del.br_state == XFS_EXT_NORM);
del.br_state = XFS_EXT_UNWRITTEN;
error = xfs_bmap_add_extent_unwritten_real(tp,
- ip, &lastx, &cur, &del,
- firstblock, dfops, &logflags);
+ ip, whichfork, &lastx, &cur,
+ &del, firstblock, dfops,
+ &logflags);
if (error)
goto error0;
goto nodelete;
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index d9be241fc86f..999cc5878890 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -453,8 +453,8 @@ xfs_bmbt_alloc_block(

if (args.fsbno == NULLFSBLOCK) {
args.fsbno = be64_to_cpu(start->l);
-try_another_ag:
args.type = XFS_ALLOCTYPE_START_BNO;
+try_another_ag:
/*
* Make sure there is sufficient room left in the AG to
* complete a full tree split for an extent insert. If
@@ -494,8 +494,8 @@ xfs_bmbt_alloc_block(
if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) &&
args.fsbno == NULLFSBLOCK &&
args.type == XFS_ALLOCTYPE_NEAR_BNO) {
- cur->bc_private.b.dfops->dop_low = true;
args.fsbno = cur->bc_private.b.firstblock;
+ args.type = XFS_ALLOCTYPE_FIRST_AG;
goto try_another_ag;
}

@@ -512,7 +512,7 @@ xfs_bmbt_alloc_block(
goto error0;
cur->bc_private.b.dfops->dop_low = true;
}
- if (args.fsbno == NULLFSBLOCK) {
+ if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {
XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
*stat = 0;
return 0;
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 21e6a6ab6b9a..2849d3fa3d0b 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -810,7 +810,8 @@ xfs_btree_read_bufl(
xfs_daddr_t d; /* real disk block address */
int error;

- ASSERT(fsbno != NULLFSBLOCK);
+ if (!XFS_FSB_SANITY_CHECK(mp, fsbno))
+ return -EFSCORRUPTED;
d = XFS_FSB_TO_DADDR(mp, fsbno);
error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
mp->m_bsize, lock, &bp, ops);
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index b69b947c4c1b..33a8f8694d30 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -456,7 +456,7 @@ static inline int xfs_btree_get_level(struct xfs_btree_block *block)
#define XFS_FILBLKS_MAX(a,b) max_t(xfs_filblks_t, (a), (b))

#define XFS_FSB_SANITY_CHECK(mp,fsb) \
- (XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \
+ (fsb && XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \
XFS_FSB_TO_AGBNO(mp, fsb) < mp->m_sb.sb_agblocks)

/*
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index f2dc1a950c85..1bdf2888295b 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -2633,7 +2633,7 @@ xfs_da_read_buf(
/*
* Readahead the dir/attr block.
*/
-xfs_daddr_t
+int
xfs_da_reada_buf(
struct xfs_inode *dp,
xfs_dablk_t bno,
@@ -2664,7 +2664,5 @@ xfs_da_reada_buf(
if (mapp != &map)
kmem_free(mapp);

- if (error)
- return -1;
- return mappedbno;
+ return error;
}
diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h
index 98c75cbe6ac2..4e29cb6a3627 100644
--- a/fs/xfs/libxfs/xfs_da_btree.h
+++ b/fs/xfs/libxfs/xfs_da_btree.h
@@ -201,7 +201,7 @@ int xfs_da_read_buf(struct xfs_trans *trans, struct xfs_inode *dp,
xfs_dablk_t bno, xfs_daddr_t mappedbno,
struct xfs_buf **bpp, int whichfork,
const struct xfs_buf_ops *ops);
-xfs_daddr_t xfs_da_reada_buf(struct xfs_inode *dp, xfs_dablk_t bno,
+int xfs_da_reada_buf(struct xfs_inode *dp, xfs_dablk_t bno,
xfs_daddr_t mapped_bno, int whichfork,
const struct xfs_buf_ops *ops);
int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index 75a557432d0f..bbd1238852b3 100644
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -155,6 +155,42 @@ const struct xfs_buf_ops xfs_dir3_free_buf_ops = {
.verify_write = xfs_dir3_free_write_verify,
};

+/* Everything ok in the free block header? */
+static bool
+xfs_dir3_free_header_check(
+ struct xfs_inode *dp,
+ xfs_dablk_t fbno,
+ struct xfs_buf *bp)
+{
+ struct xfs_mount *mp = dp->i_mount;
+ unsigned int firstdb;
+ int maxbests;
+
+ maxbests = dp->d_ops->free_max_bests(mp->m_dir_geo);
+ firstdb = (xfs_dir2_da_to_db(mp->m_dir_geo, fbno) -
+ xfs_dir2_byte_to_db(mp->m_dir_geo, XFS_DIR2_FREE_OFFSET)) *
+ maxbests;
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ struct xfs_dir3_free_hdr *hdr3 = bp->b_addr;
+
+ if (be32_to_cpu(hdr3->firstdb) != firstdb)
+ return false;
+ if (be32_to_cpu(hdr3->nvalid) > maxbests)
+ return false;
+ if (be32_to_cpu(hdr3->nvalid) < be32_to_cpu(hdr3->nused))
+ return false;
+ } else {
+ struct xfs_dir2_free_hdr *hdr = bp->b_addr;
+
+ if (be32_to_cpu(hdr->firstdb) != firstdb)
+ return false;
+ if (be32_to_cpu(hdr->nvalid) > maxbests)
+ return false;
+ if (be32_to_cpu(hdr->nvalid) < be32_to_cpu(hdr->nused))
+ return false;
+ }
+ return true;
+}

static int
__xfs_dir3_free_read(
@@ -168,11 +204,22 @@ __xfs_dir3_free_read(

err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp,
XFS_DATA_FORK, &xfs_dir3_free_buf_ops);
+ if (err || !*bpp)
+ return err;
+
+ /* Check things that we can't do in the verifier. */
+ if (!xfs_dir3_free_header_check(dp, fbno, *bpp)) {
+ xfs_buf_ioerror(*bpp, -EFSCORRUPTED);
+ xfs_verifier_error(*bpp);
+ xfs_trans_brelse(tp, *bpp);
+ return -EFSCORRUPTED;
+ }

/* try read returns without an error or *bpp if it lands in a hole */
- if (!err && tp && *bpp)
+ if (tp)
xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_FREE_BUF);
- return err;
+
+ return 0;
}

int
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index f272abff11e1..d41ade5d293e 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -51,8 +51,7 @@ xfs_ialloc_cluster_alignment(
struct xfs_mount *mp)
{
if (xfs_sb_version_hasalign(&mp->m_sb) &&
- mp->m_sb.sb_inoalignmt >=
- XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size))
+ mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp))
return mp->m_sb.sb_inoalignmt;
return 1;
}
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index 222e103356c6..25c1e078aef6 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -26,6 +26,7 @@
#include "xfs_inode.h"
#include "xfs_trans.h"
#include "xfs_inode_item.h"
+#include "xfs_btree.h"
#include "xfs_bmap_btree.h"
#include "xfs_bmap.h"
#include "xfs_error.h"
@@ -429,11 +430,13 @@ xfs_iformat_btree(
/* REFERENCED */
int nrecs;
int size;
+ int level;

ifp = XFS_IFORK_PTR(ip, whichfork);
dfp = (xfs_bmdr_block_t *)XFS_DFORK_PTR(dip, whichfork);
size = XFS_BMAP_BROOT_SPACE(mp, dfp);
nrecs = be16_to_cpu(dfp->bb_numrecs);
+ level = be16_to_cpu(dfp->bb_level);

/*
* blow out if -- fork has less extents than can fit in
@@ -446,7 +449,8 @@ xfs_iformat_btree(
XFS_IFORK_MAXEXT(ip, whichfork) ||
XFS_BMDR_SPACE_CALC(nrecs) >
XFS_DFORK_SIZE(dip, mp, whichfork) ||
- XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
+ XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks) ||
+ level == 0 || level > XFS_BTREE_MAXLEVELS) {
xfs_warn(mp, "corrupt inode %Lu (btree).",
(unsigned long long) ip->i_ino);
XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
@@ -497,15 +501,14 @@ xfs_iread_extents(
* We know that the size is valid (it's checked in iformat_btree)
*/
ifp->if_bytes = ifp->if_real_bytes = 0;
- ifp->if_flags |= XFS_IFEXTENTS;
xfs_iext_add(ifp, 0, nextents);
error = xfs_bmap_read_extents(tp, ip, whichfork);
if (error) {
xfs_iext_destroy(ifp);
- ifp->if_flags &= ~XFS_IFEXTENTS;
return error;
}
xfs_validate_extents(ifp, nextents, XFS_EXTFMT_INODE(ip));
+ ifp->if_flags |= XFS_IFEXTENTS;
return 0;
}
/*
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 631e7c0e0a29..937d406d3c11 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -274,54 +274,49 @@ xfs_end_io(
struct xfs_ioend *ioend =
container_of(work, struct xfs_ioend, io_work);
struct xfs_inode *ip = XFS_I(ioend->io_inode);
+ xfs_off_t offset = ioend->io_offset;
+ size_t size = ioend->io_size;
int error = ioend->io_bio->bi_error;

/*
- * Set an error if the mount has shut down and proceed with end I/O
- * processing so it can perform whatever cleanups are necessary.
+ * Just clean up the in-memory strutures if the fs has been shut down.
*/
- if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+ if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
error = -EIO;
+ goto done;
+ }

/*
- * For a CoW extent, we need to move the mapping from the CoW fork
- * to the data fork. If instead an error happened, just dump the
- * new blocks.
+ * Clean up any COW blocks on an I/O error.
*/
- if (ioend->io_type == XFS_IO_COW) {
- if (error)
- goto done;
- if (ioend->io_bio->bi_error) {
- error = xfs_reflink_cancel_cow_range(ip,
- ioend->io_offset, ioend->io_size);
- goto done;
+ if (unlikely(error)) {
+ switch (ioend->io_type) {
+ case XFS_IO_COW:
+ xfs_reflink_cancel_cow_range(ip, offset, size, true);
+ break;
}
- error = xfs_reflink_end_cow(ip, ioend->io_offset,
- ioend->io_size);
- if (error)
- goto done;
+
+ goto done;
}

/*
- * For unwritten extents we need to issue transactions to convert a
- * range to normal written extens after the data I/O has finished.
- * Detecting and handling completion IO errors is done individually
- * for each case as different cleanup operations need to be performed
- * on error.
+ * Success: commit the COW or unwritten blocks if needed.
*/
- if (ioend->io_type == XFS_IO_UNWRITTEN) {
- if (error)
- goto done;
- error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
- ioend->io_size);
- } else if (ioend->io_append_trans) {
- error = xfs_setfilesize_ioend(ioend, error);
- } else {
- ASSERT(!xfs_ioend_is_append(ioend) ||
- ioend->io_type == XFS_IO_COW);
+ switch (ioend->io_type) {
+ case XFS_IO_COW:
+ error = xfs_reflink_end_cow(ip, offset, size);
+ break;
+ case XFS_IO_UNWRITTEN:
+ error = xfs_iomap_write_unwritten(ip, offset, size);
+ break;
+ default:
+ ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans);
+ break;
}

done:
+ if (ioend->io_append_trans)
+ error = xfs_setfilesize_ioend(ioend, error);
xfs_destroy_ioend(ioend, error);
}

@@ -481,6 +476,12 @@ xfs_submit_ioend(
struct xfs_ioend *ioend,
int status)
{
+ /* Convert CoW extents to regular */
+ if (!status && ioend->io_type == XFS_IO_COW) {
+ status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode),
+ ioend->io_offset, ioend->io_size);
+ }
+
/* Reserve log space if we might write beyond the on-disk inode size. */
if (!status &&
ioend->io_type != XFS_IO_UNWRITTEN &&
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index c1417919ab0a..c516d7158a21 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -917,17 +917,18 @@ xfs_can_free_eofblocks(struct xfs_inode *ip, bool force)
*/
int
xfs_free_eofblocks(
- xfs_mount_t *mp,
- xfs_inode_t *ip,
- bool need_iolock)
+ struct xfs_inode *ip)
{
- xfs_trans_t *tp;
- int error;
- xfs_fileoff_t end_fsb;
- xfs_fileoff_t last_fsb;
- xfs_filblks_t map_len;
- int nimaps;
- xfs_bmbt_irec_t imap;
+ struct xfs_trans *tp;
+ int error;
+ xfs_fileoff_t end_fsb;
+ xfs_fileoff_t last_fsb;
+ xfs_filblks_t map_len;
+ int nimaps;
+ struct xfs_bmbt_irec imap;
+ struct xfs_mount *mp = ip->i_mount;
+
+ ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));

/*
* Figure out if there are any blocks beyond the end
@@ -944,6 +945,10 @@ xfs_free_eofblocks(
error = xfs_bmapi_read(ip, end_fsb, map_len, &imap, &nimaps, 0);
xfs_iunlock(ip, XFS_ILOCK_SHARED);

+ /*
+ * If there are blocks after the end of file, truncate the file to its
+ * current size to free them up.
+ */
if (!error && (nimaps != 0) &&
(imap.br_startblock != HOLESTARTBLOCK ||
ip->i_delayed_blks)) {
@@ -954,22 +959,13 @@ xfs_free_eofblocks(
if (error)
return error;

- /*
- * There are blocks after the end of file.
- * Free them up now by truncating the file to
- * its current size.
- */
- if (need_iolock) {
- if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL))
- return -EAGAIN;
- }
+ /* wait on dio to ensure i_size has settled */
+ inode_dio_wait(VFS_I(ip));

error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0,
&tp);
if (error) {
ASSERT(XFS_FORCED_SHUTDOWN(mp));
- if (need_iolock)
- xfs_iunlock(ip, XFS_IOLOCK_EXCL);
return error;
}

@@ -997,8 +993,6 @@ xfs_free_eofblocks(
}

xfs_iunlock(ip, XFS_ILOCK_EXCL);
- if (need_iolock)
- xfs_iunlock(ip, XFS_IOLOCK_EXCL);
}
return error;
}
@@ -1393,10 +1387,16 @@ xfs_shift_file_space(
xfs_fileoff_t stop_fsb;
xfs_fileoff_t next_fsb;
xfs_fileoff_t shift_fsb;
+ uint resblks;

ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT);

if (direction == SHIFT_LEFT) {
+ /*
+ * Reserve blocks to cover potential extent merges after left
+ * shift operations.
+ */
+ resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
next_fsb = XFS_B_TO_FSB(mp, offset + len);
stop_fsb = XFS_B_TO_FSB(mp, VFS_I(ip)->i_size);
} else {
@@ -1404,6 +1404,7 @@ xfs_shift_file_space(
* If right shift, delegate the work of initialization of
* next_fsb to xfs_bmap_shift_extent as it has ilock held.
*/
+ resblks = 0;
next_fsb = NULLFSBLOCK;
stop_fsb = XFS_B_TO_FSB(mp, offset);
}
@@ -1415,7 +1416,7 @@ xfs_shift_file_space(
* into the accessible region of the file.
*/
if (xfs_can_free_eofblocks(ip, true)) {
- error = xfs_free_eofblocks(mp, ip, false);
+ error = xfs_free_eofblocks(ip);
if (error)
return error;
}
@@ -1445,21 +1446,14 @@ xfs_shift_file_space(
}

while (!error && !done) {
- /*
- * We would need to reserve permanent block for transaction.
- * This will come into picture when after shifting extent into
- * hole we found that adjacent extents can be merged which
- * may lead to freeing of a block during record update.
- */
- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write,
- XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp);
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0,
+ &tp);
if (error)
break;

xfs_ilock(ip, XFS_ILOCK_EXCL);
error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot,
- ip->i_gdquot, ip->i_pdquot,
- XFS_DIOSTRAT_SPACE_RES(mp, 0), 0,
+ ip->i_gdquot, ip->i_pdquot, resblks, 0,
XFS_QMOPT_RES_REGBLKS);
if (error)
goto out_trans_cancel;
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 68a621a8e0c0..f1005393785c 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -63,8 +63,7 @@ int xfs_insert_file_space(struct xfs_inode *, xfs_off_t offset,

/* EOF block manipulation functions */
bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force);
-int xfs_free_eofblocks(struct xfs_mount *mp, struct xfs_inode *ip,
- bool need_iolock);
+int xfs_free_eofblocks(struct xfs_inode *ip);

int xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip,
struct xfs_swapext *sx);
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 2975cb2319f4..0306168af332 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -1162,6 +1162,7 @@ xfs_buf_iodone_callbacks(
*/
bp->b_last_error = 0;
bp->b_retries = 0;
+ bp->b_first_retry_time = 0;

xfs_buf_do_callbacks(bp);
bp->b_fspriv = NULL;
diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c
index 162dc186cf04..29c2f997aedf 100644
--- a/fs/xfs/xfs_extent_busy.c
+++ b/fs/xfs/xfs_extent_busy.c
@@ -45,18 +45,7 @@ xfs_extent_busy_insert(
struct rb_node **rbp;
struct rb_node *parent = NULL;

- new = kmem_zalloc(sizeof(struct xfs_extent_busy), KM_MAYFAIL);
- if (!new) {
- /*
- * No Memory! Since it is now not possible to track the free
- * block, make this a synchronous transaction to insure that
- * the block is not reused before this transaction commits.
- */
- trace_xfs_extent_busy_enomem(tp->t_mountp, agno, bno, len);
- xfs_trans_set_sync(tp);
- return;
- }
-
+ new = kmem_zalloc(sizeof(struct xfs_extent_busy), KM_SLEEP);
new->agno = agno;
new->bno = bno;
new->length = len;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index bbb9eb6811b2..2a695a8f4fe7 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -527,6 +527,15 @@ xfs_file_dio_aio_write(
if ((iocb->ki_pos & mp->m_blockmask) ||
((iocb->ki_pos + count) & mp->m_blockmask)) {
unaligned_io = 1;
+
+ /*
+ * We can't properly handle unaligned direct I/O to reflink
+ * files yet, as we can't unshare a partial block.
+ */
+ if (xfs_is_reflink_inode(ip)) {
+ trace_xfs_reflink_bounce_dio_write(ip, iocb->ki_pos, count);
+ return -EREMCHG;
+ }
iolock = XFS_IOLOCK_EXCL;
} else {
iolock = XFS_IOLOCK_SHARED;
@@ -614,8 +623,10 @@ xfs_file_buffered_aio_write(
struct xfs_inode *ip = XFS_I(inode);
ssize_t ret;
int enospc = 0;
- int iolock = XFS_IOLOCK_EXCL;
+ int iolock;

+write_retry:
+ iolock = XFS_IOLOCK_EXCL;
xfs_ilock(ip, iolock);

ret = xfs_file_aio_write_checks(iocb, from, &iolock);
@@ -625,7 +636,6 @@ xfs_file_buffered_aio_write(
/* We can write back this queue in page reclaim */
current->backing_dev_info = inode_to_bdi(inode);

-write_retry:
trace_xfs_file_buffered_write(ip, iov_iter_count(from), iocb->ki_pos);
ret = iomap_file_buffered_write(iocb, from, &xfs_iomap_ops);
if (likely(ret >= 0))
@@ -641,18 +651,21 @@ xfs_file_buffered_aio_write(
* running at the same time.
*/
if (ret == -EDQUOT && !enospc) {
+ xfs_iunlock(ip, iolock);
enospc = xfs_inode_free_quota_eofblocks(ip);
if (enospc)
goto write_retry;
enospc = xfs_inode_free_quota_cowblocks(ip);
if (enospc)
goto write_retry;
+ iolock = 0;
} else if (ret == -ENOSPC && !enospc) {
struct xfs_eofblocks eofb = {0};

enospc = 1;
xfs_flush_inodes(ip->i_mount);
- eofb.eof_scan_owner = ip->i_ino; /* for locking */
+
+ xfs_iunlock(ip, iolock);
eofb.eof_flags = XFS_EOF_FLAGS_SYNC;
xfs_icache_free_eofblocks(ip->i_mount, &eofb);
goto write_retry;
@@ -660,7 +673,8 @@ xfs_file_buffered_aio_write(

current->backing_dev_info = NULL;
out:
- xfs_iunlock(ip, iolock);
+ if (iolock)
+ xfs_iunlock(ip, iolock);
return ret;
}

@@ -908,9 +922,9 @@ xfs_dir_open(
*/
mode = xfs_ilock_data_map_shared(ip);
if (ip->i_d.di_nextents > 0)
- xfs_dir3_data_readahead(ip, 0, -1);
+ error = xfs_dir3_data_readahead(ip, 0, -1);
xfs_iunlock(ip, mode);
- return 0;
+ return error;
}

STATIC int
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 70ca4f608321..3531f8f72fa5 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -1322,13 +1322,10 @@ xfs_inode_free_eofblocks(
int flags,
void *args)
{
- int ret;
+ int ret = 0;
struct xfs_eofblocks *eofb = args;
- bool need_iolock = true;
int match;

- ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0));
-
if (!xfs_can_free_eofblocks(ip, false)) {
/* inode could be preallocated or append-only */
trace_xfs_inode_free_eofblocks_invalid(ip);
@@ -1356,21 +1353,19 @@ xfs_inode_free_eofblocks(
if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE &&
XFS_ISIZE(ip) < eofb->eof_min_file_size)
return 0;
-
- /*
- * A scan owner implies we already hold the iolock. Skip it in
- * xfs_free_eofblocks() to avoid deadlock. This also eliminates
- * the possibility of EAGAIN being returned.
- */
- if (eofb->eof_scan_owner == ip->i_ino)
- need_iolock = false;
}

- ret = xfs_free_eofblocks(ip->i_mount, ip, need_iolock);
-
- /* don't revisit the inode if we're not waiting */
- if (ret == -EAGAIN && !(flags & SYNC_WAIT))
- ret = 0;
+ /*
+ * If the caller is waiting, return -EAGAIN to keep the background
+ * scanner moving and revisit the inode in a subsequent pass.
+ */
+ if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
+ if (flags & SYNC_WAIT)
+ ret = -EAGAIN;
+ return ret;
+ }
+ ret = xfs_free_eofblocks(ip);
+ xfs_iunlock(ip, XFS_IOLOCK_EXCL);

return ret;
}
@@ -1417,15 +1412,10 @@ __xfs_inode_free_quota_eofblocks(
struct xfs_eofblocks eofb = {0};
struct xfs_dquot *dq;

- ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
-
/*
- * Set the scan owner to avoid a potential livelock. Otherwise, the scan
- * can repeatedly trylock on the inode we're currently processing. We
- * run a sync scan to increase effectiveness and use the union filter to
+ * Run a sync scan to increase effectiveness and use the union filter to
* cover all applicable quotas in a single scan.
*/
- eofb.eof_scan_owner = ip->i_ino;
eofb.eof_flags = XFS_EOF_FLAGS_UNION|XFS_EOF_FLAGS_SYNC;

if (XFS_IS_UQUOTA_ENFORCED(ip->i_mount)) {
@@ -1577,12 +1567,9 @@ xfs_inode_free_cowblocks(
{
int ret;
struct xfs_eofblocks *eofb = args;
- bool need_iolock = true;
int match;
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);

- ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0));
-
/*
* Just clear the tag if we have an empty cow fork or none at all. It's
* possible the inode was fully unshared since it was originally tagged.
@@ -1615,28 +1602,16 @@ xfs_inode_free_cowblocks(
if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE &&
XFS_ISIZE(ip) < eofb->eof_min_file_size)
return 0;
-
- /*
- * A scan owner implies we already hold the iolock. Skip it in
- * xfs_free_eofblocks() to avoid deadlock. This also eliminates
- * the possibility of EAGAIN being returned.
- */
- if (eofb->eof_scan_owner == ip->i_ino)
- need_iolock = false;
}

/* Free the CoW blocks */
- if (need_iolock) {
- xfs_ilock(ip, XFS_IOLOCK_EXCL);
- xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
- }
+ xfs_ilock(ip, XFS_IOLOCK_EXCL);
+ xfs_ilock(ip, XFS_MMAPLOCK_EXCL);

- ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF);
+ ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false);

- if (need_iolock) {
- xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
- xfs_iunlock(ip, XFS_IOLOCK_EXCL);
- }
+ xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
+ xfs_iunlock(ip, XFS_IOLOCK_EXCL);

return ret;
}
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index a1e02f4708ab..8a7c849b4dea 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -27,7 +27,6 @@ struct xfs_eofblocks {
kgid_t eof_gid;
prid_t eof_prid;
__u64 eof_min_file_size;
- xfs_ino_t eof_scan_owner;
};

#define SYNC_WAIT 0x0001 /* wait for i/o to complete */
@@ -102,7 +101,6 @@ xfs_fs_eofblocks_from_user(
dst->eof_flags = src->eof_flags;
dst->eof_prid = src->eof_prid;
dst->eof_min_file_size = src->eof_min_file_size;
- dst->eof_scan_owner = NULLFSINO;

dst->eof_uid = INVALID_UID;
if (src->eof_flags & XFS_EOF_FLAGS_UID) {
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index de32f0fe47c8..7eaf1ef74e3c 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1615,7 +1615,7 @@ xfs_itruncate_extents(

/* Remove all pending CoW reservations. */
error = xfs_reflink_cancel_cow_blocks(ip, &tp, first_unmap_block,
- last_block);
+ last_block, true);
if (error)
goto out;

@@ -1692,32 +1692,34 @@ xfs_release(
if (xfs_can_free_eofblocks(ip, false)) {

/*
+ * Check if the inode is being opened, written and closed
+ * frequently and we have delayed allocation blocks outstanding
+ * (e.g. streaming writes from the NFS server), truncating the
+ * blocks past EOF will cause fragmentation to occur.
+ *
+ * In this case don't do the truncation, but we have to be
+ * careful how we detect this case. Blocks beyond EOF show up as
+ * i_delayed_blks even when the inode is clean, so we need to
+ * truncate them away first before checking for a dirty release.
+ * Hence on the first dirty close we will still remove the
+ * speculative allocation, but after that we will leave it in
+ * place.
+ */
+ if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE))
+ return 0;
+ /*
* If we can't get the iolock just skip truncating the blocks
* past EOF because we could deadlock with the mmap_sem
- * otherwise. We'll get another chance to drop them once the
+ * otherwise. We'll get another chance to drop them once the
* last reference to the inode is dropped, so we'll never leak
* blocks permanently.
- *
- * Further, check if the inode is being opened, written and
- * closed frequently and we have delayed allocation blocks
- * outstanding (e.g. streaming writes from the NFS server),
- * truncating the blocks past EOF will cause fragmentation to
- * occur.
- *
- * In this case don't do the truncation, either, but we have to
- * be careful how we detect this case. Blocks beyond EOF show
- * up as i_delayed_blks even when the inode is clean, so we
- * need to truncate them away first before checking for a dirty
- * release. Hence on the first dirty close we will still remove
- * the speculative allocation, but after that we will leave it
- * in place.
*/
- if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE))
- return 0;
-
- error = xfs_free_eofblocks(mp, ip, true);
- if (error && error != -EAGAIN)
- return error;
+ if (xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
+ error = xfs_free_eofblocks(ip);
+ xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+ if (error)
+ return error;
+ }

/* delalloc blocks after truncation means it really is dirty */
if (ip->i_delayed_blks)
@@ -1904,8 +1906,11 @@ xfs_inactive(
* cache. Post-eof blocks must be freed, lest we end up with
* broken free space accounting.
*/
- if (xfs_can_free_eofblocks(ip, true))
- xfs_free_eofblocks(mp, ip, false);
+ if (xfs_can_free_eofblocks(ip, true)) {
+ xfs_ilock(ip, XFS_IOLOCK_EXCL);
+ xfs_free_eofblocks(ip);
+ xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+ }

return;
}
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index fdecf79d2fa4..2326a6913fde 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -637,6 +637,11 @@ xfs_file_iomap_begin_delay(
goto out_unlock;
}

+ /*
+ * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
+ * them out if the write happens to fail.
+ */
+ iomap->flags = IOMAP_F_NEW;
trace_xfs_iomap_alloc(ip, offset, count, 0, &got);
done:
if (isnullstartblock(got.br_startblock))
@@ -685,7 +690,7 @@ xfs_iomap_write_allocate(
int nres;

if (whichfork == XFS_COW_FORK)
- flags |= XFS_BMAPI_COWFORK;
+ flags |= XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC;

/*
* Make sure that the dquots are there.
@@ -1026,17 +1031,7 @@ xfs_file_iomap_begin(
if (error)
goto out_unlock;

- /*
- * We're here because we're trying to do a directio write to a
- * region that isn't aligned to a filesystem block. If the
- * extent is shared, fall back to buffered mode to handle the
- * RMW.
- */
- if (!(flags & IOMAP_REPORT) && shared) {
- trace_xfs_reflink_bounce_dio_write(ip, &imap);
- error = -EREMCHG;
- goto out_unlock;
- }
+ ASSERT((flags & IOMAP_REPORT) || !shared);
}

if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
@@ -1095,7 +1090,8 @@ xfs_file_iomap_end_delalloc(
struct xfs_inode *ip,
loff_t offset,
loff_t length,
- ssize_t written)
+ ssize_t written,
+ struct iomap *iomap)
{
struct xfs_mount *mp = ip->i_mount;
xfs_fileoff_t start_fsb;
@@ -1114,14 +1110,14 @@ xfs_file_iomap_end_delalloc(
end_fsb = XFS_B_TO_FSB(mp, offset + length);

/*
- * Trim back delalloc blocks if we didn't manage to write the whole
- * range reserved.
+ * Trim delalloc blocks if they were allocated by this write and we
+ * didn't manage to write the whole range.
*
* We don't need to care about racing delalloc as we hold i_mutex
* across the reserve/allocate/unreserve calls. If there are delalloc
* blocks in the range, they are ours.
*/
- if (start_fsb < end_fsb) {
+ if ((iomap->flags & IOMAP_F_NEW) && start_fsb < end_fsb) {
truncate_pagecache_range(VFS_I(ip), XFS_FSB_TO_B(mp, start_fsb),
XFS_FSB_TO_B(mp, end_fsb) - 1);

@@ -1151,7 +1147,7 @@ xfs_file_iomap_end(
{
if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC)
return xfs_file_iomap_end_delalloc(XFS_I(inode), offset,
- length, written);
+ length, written, iomap);
return 0;
}

diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 9b9540db17a6..52d27cc4370a 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -187,7 +187,7 @@ xfs_initialize_perag(
xfs_agnumber_t *maxagi)
{
xfs_agnumber_t index;
- xfs_agnumber_t first_initialised = 0;
+ xfs_agnumber_t first_initialised = NULLAGNUMBER;
xfs_perag_t *pag;
int error = -ENOMEM;

@@ -202,22 +202,20 @@ xfs_initialize_perag(
xfs_perag_put(pag);
continue;
}
- if (!first_initialised)
- first_initialised = index;

pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL);
if (!pag)
- goto out_unwind;
+ goto out_unwind_new_pags;
pag->pag_agno = index;
pag->pag_mount = mp;
spin_lock_init(&pag->pag_ici_lock);
mutex_init(&pag->pag_ici_reclaim_lock);
INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
if (xfs_buf_hash_init(pag))
- goto out_unwind;
+ goto out_free_pag;

if (radix_tree_preload(GFP_NOFS))
- goto out_unwind;
+ goto out_hash_destroy;

spin_lock(&mp->m_perag_lock);
if (radix_tree_insert(&mp->m_perag_tree, index, pag)) {
@@ -225,10 +223,13 @@ xfs_initialize_perag(
spin_unlock(&mp->m_perag_lock);
radix_tree_preload_end();
error = -EEXIST;
- goto out_unwind;
+ goto out_hash_destroy;
}
spin_unlock(&mp->m_perag_lock);
radix_tree_preload_end();
+ /* first new pag is fully initialized */
+ if (first_initialised == NULLAGNUMBER)
+ first_initialised = index;
}

index = xfs_set_inode_alloc(mp, agcount);
@@ -239,11 +240,16 @@ xfs_initialize_perag(
mp->m_ag_prealloc_blocks = xfs_prealloc_blocks(mp);
return 0;

-out_unwind:
+out_hash_destroy:
xfs_buf_hash_destroy(pag);
+out_free_pag:
kmem_free(pag);
- for (; index > first_initialised; index--) {
+out_unwind_new_pags:
+ /* unwind any prior newly initialized pags */
+ for (index = first_initialised; index < agcount; index++) {
pag = radix_tree_delete(&mp->m_perag_tree, index);
+ if (!pag)
+ break;
xfs_buf_hash_destroy(pag);
kmem_free(pag);
}
@@ -505,8 +511,7 @@ STATIC void
xfs_set_inoalignment(xfs_mount_t *mp)
{
if (xfs_sb_version_hasalign(&mp->m_sb) &&
- mp->m_sb.sb_inoalignmt >=
- XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size))
+ mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp))
mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1;
else
mp->m_inoalign_mask = 0;
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 07593a362cd0..a72cd2e3c048 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -82,11 +82,22 @@
* mappings are a reservation against the free space in the filesystem;
* adjacent mappings can also be combined into fewer larger mappings.
*
+ * As an optimization, the CoW extent size hint (cowextsz) creates
+ * outsized aligned delalloc reservations in the hope of landing out of
+ * order nearby CoW writes in a single extent on disk, thereby reducing
+ * fragmentation and improving future performance.
+ *
+ * D: --RRRRRRSSSRRRRRRRR--- (data fork)
+ * C: ------DDDDDDD--------- (CoW fork)
+ *
* When dirty pages are being written out (typically in writepage), the
- * delalloc reservations are converted into real mappings by allocating
- * blocks and replacing the delalloc mapping with real ones. A delalloc
- * mapping can be replaced by several real ones if the free space is
- * fragmented.
+ * delalloc reservations are converted into unwritten mappings by
+ * allocating blocks and replacing the delalloc mapping with real ones.
+ * A delalloc mapping can be replaced by several unwritten ones if the
+ * free space is fragmented.
+ *
+ * D: --RRRRRRSSSRRRRRRRR---
+ * C: ------UUUUUUU---------
*
* We want to adapt the delalloc mechanism for copy-on-write, since the
* write paths are similar. The first two steps (creating the reservation
@@ -101,13 +112,29 @@
* Block-aligned directio writes will use the same mechanism as buffered
* writes.
*
+ * Just prior to submitting the actual disk write requests, we convert
+ * the extents representing the range of the file actually being written
+ * (as opposed to extra pieces created for the cowextsize hint) to real
+ * extents. This will become important in the next step:
+ *
+ * D: --RRRRRRSSSRRRRRRRR---
+ * C: ------UUrrUUU---------
+ *
* CoW remapping must be done after the data block write completes,
* because we don't want to destroy the old data fork map until we're sure
* the new block has been written. Since the new mappings are kept in a
* separate fork, we can simply iterate these mappings to find the ones
* that cover the file blocks that we just CoW'd. For each extent, simply
* unmap the corresponding range in the data fork, map the new range into
- * the data fork, and remove the extent from the CoW fork.
+ * the data fork, and remove the extent from the CoW fork. Because of
+ * the presence of the cowextsize hint, however, we must be careful
+ * only to remap the blocks that we've actually written out -- we must
+ * never remap delalloc reservations nor CoW staging blocks that have
+ * yet to be written. This corresponds exactly to the real extents in
+ * the CoW fork:
+ *
+ * D: --RRRRRRrrSRRRRRRRR---
+ * C: ------UU--UUU---------
*
* Since the remapping operation can be applied to an arbitrary file
* range, we record the need for the remap step as a flag in the ioend
@@ -296,6 +323,65 @@ xfs_reflink_reserve_cow(
return 0;
}

+/* Convert part of an unwritten CoW extent to a real one. */
+STATIC int
+xfs_reflink_convert_cow_extent(
+ struct xfs_inode *ip,
+ struct xfs_bmbt_irec *imap,
+ xfs_fileoff_t offset_fsb,
+ xfs_filblks_t count_fsb,
+ struct xfs_defer_ops *dfops)
+{
+ struct xfs_bmbt_irec irec = *imap;
+ xfs_fsblock_t first_block;
+ int nimaps = 1;
+
+ if (imap->br_state == XFS_EXT_NORM)
+ return 0;
+
+ xfs_trim_extent(&irec, offset_fsb, count_fsb);
+ trace_xfs_reflink_convert_cow(ip, &irec);
+ if (irec.br_blockcount == 0)
+ return 0;
+ return xfs_bmapi_write(NULL, ip, irec.br_startoff, irec.br_blockcount,
+ XFS_BMAPI_COWFORK | XFS_BMAPI_CONVERT, &first_block,
+ 0, &irec, &nimaps, dfops);
+}
+
+/* Convert all of the unwritten CoW extents in a file's range to real ones. */
+int
+xfs_reflink_convert_cow(
+ struct xfs_inode *ip,
+ xfs_off_t offset,
+ xfs_off_t count)
+{
+ struct xfs_bmbt_irec got;
+ struct xfs_defer_ops dfops;
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+ xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
+ xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count);
+ xfs_extnum_t idx;
+ bool found;
+ int error = 0;
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+ /* Convert all the extents to real from unwritten. */
+ for (found = xfs_iext_lookup_extent(ip, ifp, offset_fsb, &idx, &got);
+ found && got.br_startoff < end_fsb;
+ found = xfs_iext_get_extent(ifp, ++idx, &got)) {
+ error = xfs_reflink_convert_cow_extent(ip, &got, offset_fsb,
+ end_fsb - offset_fsb, &dfops);
+ if (error)
+ break;
+ }
+
+ /* Finish up. */
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ return error;
+}
+
/* Allocate all CoW reservations covering a range of blocks in a file. */
static int
__xfs_reflink_allocate_cow(
@@ -328,6 +414,7 @@ __xfs_reflink_allocate_cow(
goto out_unlock;
ASSERT(nimaps == 1);

+ /* Make sure there's a CoW reservation for it. */
error = xfs_reflink_reserve_cow(ip, &imap, &shared);
if (error)
goto out_trans_cancel;
@@ -337,14 +424,16 @@ __xfs_reflink_allocate_cow(
goto out_trans_cancel;
}

+ /* Allocate the entire reservation as unwritten blocks. */
xfs_trans_ijoin(tp, ip, 0);
error = xfs_bmapi_write(tp, ip, imap.br_startoff, imap.br_blockcount,
- XFS_BMAPI_COWFORK, &first_block,
+ XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, &first_block,
XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK),
&imap, &nimaps, &dfops);
if (error)
goto out_trans_cancel;

+ /* Finish up. */
error = xfs_defer_finish(&tp, &dfops, NULL);
if (error)
goto out_trans_cancel;
@@ -389,11 +478,12 @@ xfs_reflink_allocate_cow_range(
if (error) {
trace_xfs_reflink_allocate_cow_range_error(ip, error,
_RET_IP_);
- break;
+ return error;
}
}

- return error;
+ /* Convert the CoW extents to regular. */
+ return xfs_reflink_convert_cow(ip, offset, count);
}

/*
@@ -459,14 +549,18 @@ xfs_reflink_trim_irec_to_next_cow(
}

/*
- * Cancel all pending CoW reservations for some block range of an inode.
+ * Cancel CoW reservations for some block range of an inode.
+ *
+ * If cancel_real is true this function cancels all COW fork extents for the
+ * inode; if cancel_real is false, real extents are not cleared.
*/
int
xfs_reflink_cancel_cow_blocks(
struct xfs_inode *ip,
struct xfs_trans **tpp,
xfs_fileoff_t offset_fsb,
- xfs_fileoff_t end_fsb)
+ xfs_fileoff_t end_fsb,
+ bool cancel_real)
{
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
struct xfs_bmbt_irec got, del;
@@ -490,7 +584,7 @@ xfs_reflink_cancel_cow_blocks(
&idx, &got, &del);
if (error)
break;
- } else {
+ } else if (del.br_state == XFS_EXT_UNWRITTEN || cancel_real) {
xfs_trans_ijoin(*tpp, ip, 0);
xfs_defer_init(&dfops, &firstfsb);

@@ -532,13 +626,17 @@ xfs_reflink_cancel_cow_blocks(
}

/*
- * Cancel all pending CoW reservations for some byte range of an inode.
+ * Cancel CoW reservations for some byte range of an inode.
+ *
+ * If cancel_real is true this function cancels all COW fork extents for the
+ * inode; if cancel_real is false, real extents are not cleared.
*/
int
xfs_reflink_cancel_cow_range(
struct xfs_inode *ip,
xfs_off_t offset,
- xfs_off_t count)
+ xfs_off_t count,
+ bool cancel_real)
{
struct xfs_trans *tp;
xfs_fileoff_t offset_fsb;
@@ -564,7 +662,8 @@ xfs_reflink_cancel_cow_range(
xfs_trans_ijoin(tp, ip, 0);

/* Scrape out the old CoW reservations */
- error = xfs_reflink_cancel_cow_blocks(ip, &tp, offset_fsb, end_fsb);
+ error = xfs_reflink_cancel_cow_blocks(ip, &tp, offset_fsb, end_fsb,
+ cancel_real);
if (error)
goto out_cancel;

@@ -641,6 +740,16 @@ xfs_reflink_end_cow(

ASSERT(!isnullstartblock(got.br_startblock));

+ /*
+ * Don't remap unwritten extents; these are
+ * speculatively preallocated CoW extents that have been
+ * allocated but have not yet been involved in a write.
+ */
+ if (got.br_state == XFS_EXT_UNWRITTEN) {
+ idx--;
+ goto next_extent;
+ }
+
/* Unmap the old blocks in the data fork. */
xfs_defer_init(&dfops, &firstfsb);
rlen = del.br_blockcount;
@@ -855,13 +964,14 @@ STATIC int
xfs_reflink_update_dest(
struct xfs_inode *dest,
xfs_off_t newlen,
- xfs_extlen_t cowextsize)
+ xfs_extlen_t cowextsize,
+ bool is_dedupe)
{
struct xfs_mount *mp = dest->i_mount;
struct xfs_trans *tp;
int error;

- if (newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0)
+ if (is_dedupe && newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0)
return 0;

error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
@@ -882,6 +992,10 @@ xfs_reflink_update_dest(
dest->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE;
}

+ if (!is_dedupe) {
+ xfs_trans_ichgtime(tp, dest,
+ XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+ }
xfs_trans_log_inode(tp, dest, XFS_ILOG_CORE);

error = xfs_trans_commit(tp);
@@ -1195,7 +1309,8 @@ xfs_reflink_remap_range(
!(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE))
cowextsize = src->i_d.di_cowextsize;

- ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize);
+ ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize,
+ is_dedupe);

out_unlock:
xfs_iunlock(src, XFS_MMAPLOCK_EXCL);
@@ -1345,7 +1460,7 @@ xfs_reflink_clear_inode_flag(
* We didn't find any shared blocks so turn off the reflink flag.
* First, get rid of any leftover CoW mappings.
*/
- error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF);
+ error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF, true);
if (error)
return error;

diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index aa6a4d64bd35..b715bacb2ea2 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -30,6 +30,8 @@ extern int xfs_reflink_reserve_cow(struct xfs_inode *ip,
struct xfs_bmbt_irec *imap, bool *shared);
extern int xfs_reflink_allocate_cow_range(struct xfs_inode *ip,
xfs_off_t offset, xfs_off_t count);
+extern int xfs_reflink_convert_cow(struct xfs_inode *ip, xfs_off_t offset,
+ xfs_off_t count);
extern bool xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t offset,
struct xfs_bmbt_irec *imap);
extern void xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip,
@@ -37,9 +39,9 @@ extern void xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip,

extern int xfs_reflink_cancel_cow_blocks(struct xfs_inode *ip,
struct xfs_trans **tpp, xfs_fileoff_t offset_fsb,
- xfs_fileoff_t end_fsb);
+ xfs_fileoff_t end_fsb, bool cancel_real);
extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset,
- xfs_off_t count);
+ xfs_off_t count, bool cancel_real);
extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,
xfs_off_t count);
extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index eecbaac08eba..d80187b0e726 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -953,7 +953,7 @@ xfs_fs_destroy_inode(
XFS_STATS_INC(ip->i_mount, vn_remove);

if (xfs_is_reflink_inode(ip)) {
- error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF);
+ error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true);
if (error && !XFS_FORCED_SHUTDOWN(ip->i_mount))
xfs_warn(ip->i_mount,
"Error %d while evicting CoW blocks for inode %llu.",
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 69c5bcd9a51b..375c5e030e5b 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3089,6 +3089,7 @@ DECLARE_EVENT_CLASS(xfs_inode_irec_class,
__field(xfs_fileoff_t, lblk)
__field(xfs_extlen_t, len)
__field(xfs_fsblock_t, pblk)
+ __field(int, state)
),
TP_fast_assign(
__entry->dev = VFS_I(ip)->i_sb->s_dev;
@@ -3096,13 +3097,15 @@ DECLARE_EVENT_CLASS(xfs_inode_irec_class,
__entry->lblk = irec->br_startoff;
__entry->len = irec->br_blockcount;
__entry->pblk = irec->br_startblock;
+ __entry->state = irec->br_state;
),
- TP_printk("dev %d:%d ino 0x%llx lblk 0x%llx len 0x%x pblk %llu",
+ TP_printk("dev %d:%d ino 0x%llx lblk 0x%llx len 0x%x pblk %llu st %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->lblk,
__entry->len,
- __entry->pblk)
+ __entry->pblk,
+ __entry->state)
);
#define DEFINE_INODE_IREC_EVENT(name) \
DEFINE_EVENT(xfs_inode_irec_class, name, \
@@ -3242,11 +3245,12 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_around_shared);
DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_alloc);
DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_found);
DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_enospc);
+DEFINE_INODE_IREC_EVENT(xfs_reflink_convert_cow);

DEFINE_RW_EVENT(xfs_reflink_reserve_cow);
DEFINE_RW_EVENT(xfs_reflink_allocate_cow_range);

-DEFINE_INODE_IREC_EVENT(xfs_reflink_bounce_dio_write);
+DEFINE_SIMPLE_IO_EVENT(xfs_reflink_bounce_dio_write);
DEFINE_IOMAP_EVENT(xfs_reflink_find_cow_mapping);
DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_irec);

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 1c5190dab2c1..e3d146dadceb 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -162,8 +162,8 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
int len, void *val);
int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
int len, struct kvm_io_device *dev);
-int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
- struct kvm_io_device *dev);
+void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
+ struct kvm_io_device *dev);
struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx,
gpa_t addr);

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 254698856b8f..8b35bdbdc214 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -739,6 +739,12 @@ static inline bool mem_cgroup_oom_synchronize(bool wait)
return false;
}

+static inline void mem_cgroup_update_page_stat(struct page *page,
+ enum mem_cgroup_stat_index idx,
+ int nr)
+{
+}
+
static inline void mem_cgroup_inc_page_stat(struct page *page,
enum mem_cgroup_stat_index idx)
{
diff --git a/kernel/padata.c b/kernel/padata.c
index 05316c9f32da..3202aa17492c 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -186,19 +186,20 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd)

reorder = &next_queue->reorder;

+ spin_lock(&reorder->lock);
if (!list_empty(&reorder->list)) {
padata = list_entry(reorder->list.next,
struct padata_priv, list);

- spin_lock(&reorder->lock);
list_del_init(&padata->list);
atomic_dec(&pd->reorder_objects);
- spin_unlock(&reorder->lock);

pd->processed++;

+ spin_unlock(&reorder->lock);
goto out;
}
+ spin_unlock(&reorder->lock);

if (__this_cpu_read(pd->pqueue->cpu_index) == next_queue->cpu_index) {
padata = ERR_PTR(-ENODATA);
diff --git a/lib/syscall.c b/lib/syscall.c
index 63239e097b13..a72cd0996230 100644
--- a/lib/syscall.c
+++ b/lib/syscall.c
@@ -11,6 +11,7 @@ static int collect_syscall(struct task_struct *target, long *callno,

if (!try_get_task_stack(target)) {
/* Task has no stack, so the task isn't in a syscall. */
+ *sp = *pc = 0;
*callno = -1;
return 0;
}
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index c7025c132670..968b547f3b90 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4474,6 +4474,7 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
{
struct page *page = NULL;
spinlock_t *ptl;
+ pte_t pte;
retry:
ptl = pmd_lockptr(mm, pmd);
spin_lock(ptl);
@@ -4483,12 +4484,13 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
*/
if (!pmd_huge(*pmd))
goto out;
- if (pmd_present(*pmd)) {
+ pte = huge_ptep_get((pte_t *)pmd);
+ if (pte_present(pte)) {
page = pmd_page(*pmd) + ((address & ~PMD_MASK) >> PAGE_SHIFT);
if (flags & FOLL_GET)
get_page(page);
} else {
- if (is_hugetlb_entry_migration(huge_ptep_get((pte_t *)pmd))) {
+ if (is_hugetlb_entry_migration(pte)) {
spin_unlock(ptl);
__migration_entry_wait(mm, (pte_t *)pmd, ptl);
goto retry;
diff --git a/mm/rmap.c b/mm/rmap.c
index 91619fd70939..a40d990eede0 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1294,7 +1294,7 @@ void page_add_file_rmap(struct page *page, bool compound)
goto out;
}
__mod_node_page_state(page_pgdat(page), NR_FILE_MAPPED, nr);
- mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
+ mem_cgroup_update_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED, nr);
out:
unlock_page_memcg(page);
}
@@ -1334,7 +1334,7 @@ static void page_remove_file_rmap(struct page *page, bool compound)
* pte lock(a spinlock) is held, which implies preemption disabled.
*/
__mod_node_page_state(page_pgdat(page), NR_FILE_MAPPED, -nr);
- mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
+ mem_cgroup_update_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED, -nr);

if (unlikely(PageMlocked(page)))
clear_page_mlock(page);
diff --git a/mm/workingset.c b/mm/workingset.c
index a67f5796b995..dda16cf9599f 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -533,7 +533,7 @@ static int __init workingset_init(void)
pr_info("workingset: timestamp_bits=%d max_order=%d bucket_order=%u\n",
timestamp_bits, max_order, bucket_order);

- ret = list_lru_init_key(&shadow_nodes, &shadow_nodes_key);
+ ret = __list_lru_init(&shadow_nodes, true, &shadow_nodes_key);
if (ret)
goto err;
ret = register_shrinker(&workingset_shadow_shrinker);
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 770c52701efa..140b067d5d57 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -7,6 +7,7 @@
#include <linux/kthread.h>
#include <linux/net.h>
#include <linux/nsproxy.h>
+#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/socket.h>
#include <linux/string.h>
@@ -469,11 +470,16 @@ static int ceph_tcp_connect(struct ceph_connection *con)
{
struct sockaddr_storage *paddr = &con->peer_addr.in_addr;
struct socket *sock;
+ unsigned int noio_flag;
int ret;

BUG_ON(con->sock);
+
+ /* sock_create_kern() allocates with GFP_KERNEL */
+ noio_flag = memalloc_noio_save();
ret = sock_create_kern(read_pnet(&con->msgr->net), paddr->ss_family,
SOCK_STREAM, IPPROTO_TCP, &sock);
+ memalloc_noio_restore(noio_flag);
if (ret)
return ret;
sock->sk->sk_allocation = GFP_NOFS;
diff --git a/sound/core/seq/seq_fifo.c b/sound/core/seq/seq_fifo.c
index 3f4efcb85df5..3490d21ab9e7 100644
--- a/sound/core/seq/seq_fifo.c
+++ b/sound/core/seq/seq_fifo.c
@@ -265,6 +265,10 @@ int snd_seq_fifo_resize(struct snd_seq_fifo *f, int poolsize)
/* NOTE: overflow flag is not cleared */
spin_unlock_irqrestore(&f->lock, flags);

+ /* close the old pool and wait until all users are gone */
+ snd_seq_pool_mark_closing(oldpool);
+ snd_use_lock_sync(&f->use_lock);
+
/* release cells in old pool */
for (cell = oldhead; cell; cell = next) {
next = cell->next;
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index c813ad857650..152c7ed65254 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -4846,6 +4846,7 @@ enum {
ALC292_FIXUP_DISABLE_AAMIX,
ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK,
ALC298_FIXUP_DELL1_MIC_NO_PRESENCE,
+ ALC298_FIXUP_DELL_AIO_MIC_NO_PRESENCE,
ALC275_FIXUP_DELL_XPS,
ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE,
ALC293_FIXUP_LENOVO_SPK_NOISE,
@@ -5446,6 +5447,15 @@ static const struct hda_fixup alc269_fixups[] = {
.chained = true,
.chain_id = ALC269_FIXUP_HEADSET_MODE
},
+ [ALC298_FIXUP_DELL_AIO_MIC_NO_PRESENCE] = {
+ .type = HDA_FIXUP_PINS,
+ .v.pins = (const struct hda_pintbl[]) {
+ { 0x18, 0x01a1913c }, /* use as headset mic, without its own jack detect */
+ { }
+ },
+ .chained = true,
+ .chain_id = ALC269_FIXUP_HEADSET_MODE
+ },
[ALC275_FIXUP_DELL_XPS] = {
.type = HDA_FIXUP_VERBS,
.v.verbs = (const struct hda_verb[]) {
@@ -5518,7 +5528,7 @@ static const struct hda_fixup alc269_fixups[] = {
.type = HDA_FIXUP_FUNC,
.v.func = alc298_fixup_speaker_volume,
.chained = true,
- .chain_id = ALC298_FIXUP_DELL1_MIC_NO_PRESENCE,
+ .chain_id = ALC298_FIXUP_DELL_AIO_MIC_NO_PRESENCE,
},
[ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER] = {
.type = HDA_FIXUP_PINS,
diff --git a/sound/soc/atmel/atmel-classd.c b/sound/soc/atmel/atmel-classd.c
index 89ac5f5a93eb..7ae46c2647d4 100644
--- a/sound/soc/atmel/atmel-classd.c
+++ b/sound/soc/atmel/atmel-classd.c
@@ -349,7 +349,7 @@ static int atmel_classd_codec_dai_digital_mute(struct snd_soc_dai *codec_dai,
}

#define CLASSD_ACLK_RATE_11M2896_MPY_8 (112896 * 100 * 8)
-#define CLASSD_ACLK_RATE_12M288_MPY_8 (12228 * 1000 * 8)
+#define CLASSD_ACLK_RATE_12M288_MPY_8 (12288 * 1000 * 8)

static struct {
int rate;
diff --git a/sound/soc/codecs/rt5665.c b/sound/soc/codecs/rt5665.c
index 324461e985b3..fe2cf1ed8237 100644
--- a/sound/soc/codecs/rt5665.c
+++ b/sound/soc/codecs/rt5665.c
@@ -1241,7 +1241,7 @@ static irqreturn_t rt5665_irq(int irq, void *data)
static void rt5665_jd_check_handler(struct work_struct *work)
{
struct rt5665_priv *rt5665 = container_of(work, struct rt5665_priv,
- calibrate_work.work);
+ jd_check_work.work);

if (snd_soc_read(rt5665->codec, RT5665_AJD1_CTRL) & 0x0010) {
/* jack out */
diff --git a/sound/soc/intel/skylake/skl-topology.c b/sound/soc/intel/skylake/skl-topology.c
index bd313c907b20..172d7db1653c 100644
--- a/sound/soc/intel/skylake/skl-topology.c
+++ b/sound/soc/intel/skylake/skl-topology.c
@@ -486,7 +486,7 @@ static int skl_tplg_set_module_init_data(struct snd_soc_dapm_widget *w)
if (bc->set_params != SKL_PARAM_INIT)
continue;

- mconfig->formats_config.caps = (u32 *)&bc->params;
+ mconfig->formats_config.caps = (u32 *)bc->params;
mconfig->formats_config.caps_size = bc->size;

break;
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index a29786dd9522..4d28a9ddbee0 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -870,7 +870,8 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
continue;

kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
- kvm->buses[bus_idx]->ioeventfd_count--;
+ if (kvm->buses[bus_idx])
+ kvm->buses[bus_idx]->ioeventfd_count--;
ioeventfd_release(p);
ret = 0;
break;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 482612b4e496..da5db473afb0 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -723,8 +723,11 @@ static void kvm_destroy_vm(struct kvm *kvm)
list_del(&kvm->vm_list);
spin_unlock(&kvm_lock);
kvm_free_irq_routing(kvm);
- for (i = 0; i < KVM_NR_BUSES; i++)
- kvm_io_bus_destroy(kvm->buses[i]);
+ for (i = 0; i < KVM_NR_BUSES; i++) {
+ if (kvm->buses[i])
+ kvm_io_bus_destroy(kvm->buses[i]);
+ kvm->buses[i] = NULL;
+ }
kvm_coalesced_mmio_free(kvm);
#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
@@ -3473,6 +3476,8 @@ int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
};

bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
+ if (!bus)
+ return -ENOMEM;
r = __kvm_io_bus_write(vcpu, bus, &range, val);
return r < 0 ? r : 0;
}
@@ -3490,6 +3495,8 @@ int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx,
};

bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
+ if (!bus)
+ return -ENOMEM;

/* First try the device referenced by cookie. */
if ((cookie >= 0) && (cookie < bus->dev_count) &&
@@ -3540,6 +3547,8 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
};

bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
+ if (!bus)
+ return -ENOMEM;
r = __kvm_io_bus_read(vcpu, bus, &range, val);
return r < 0 ? r : 0;
}
@@ -3552,6 +3561,9 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
struct kvm_io_bus *new_bus, *bus;

bus = kvm->buses[bus_idx];
+ if (!bus)
+ return -ENOMEM;
+
/* exclude ioeventfd which is limited by maximum fd */
if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1)
return -ENOSPC;
@@ -3571,37 +3583,41 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
}

/* Caller must hold slots_lock. */
-int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
- struct kvm_io_device *dev)
+void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
+ struct kvm_io_device *dev)
{
- int i, r;
+ int i;
struct kvm_io_bus *new_bus, *bus;

bus = kvm->buses[bus_idx];
- r = -ENOENT;
+ if (!bus)
+ return;
+
for (i = 0; i < bus->dev_count; i++)
if (bus->range[i].dev == dev) {
- r = 0;
break;
}

- if (r)
- return r;
+ if (i == bus->dev_count)
+ return;

new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count - 1) *
sizeof(struct kvm_io_range)), GFP_KERNEL);
- if (!new_bus)
- return -ENOMEM;
+ if (!new_bus) {
+ pr_err("kvm: failed to shrink bus, removing it completely\n");
+ goto broken;
+ }

memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range));
new_bus->dev_count--;
memcpy(new_bus->range + i, bus->range + i + 1,
(new_bus->dev_count - i) * sizeof(struct kvm_io_range));

+broken:
rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
synchronize_srcu_expedited(&kvm->srcu);
kfree(bus);
- return r;
+ return;
}

struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx,
@@ -3614,6 +3630,8 @@ struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx,
srcu_idx = srcu_read_lock(&kvm->srcu);

bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
+ if (!bus)
+ goto out_unlock;

dev_idx = kvm_io_bus_get_first_dev(bus, addr, 1);
if (dev_idx < 0)