Re: [3.13.y.z extended stable] Linux 3.13.11.11

From: Kamal Mostafa
Date: Tue Nov 11 2014 - 13:21:25 EST


diff --git a/Makefile b/Makefile
index 7b3f255..f1e0442 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
VERSION = 3
PATCHLEVEL = 13
SUBLEVEL = 11
-EXTRAVERSION = .10
+EXTRAVERSION = .11
NAME = King of Alienated Frog Porn

# *DOCUMENTATION*
diff --git a/arch/arc/boot/dts/nsimosci.dts b/arch/arc/boot/dts/nsimosci.dts
index 4f31b2e..398064c 100644
--- a/arch/arc/boot/dts/nsimosci.dts
+++ b/arch/arc/boot/dts/nsimosci.dts
@@ -20,7 +20,7 @@
/* this is for console on PGU */
/* bootargs = "console=tty0 consoleblank=0"; */
/* this is for console on serial */
- bootargs = "earlycon=uart8250,mmio32,0xc0000000,115200n8 console=ttyS0,115200n8 consoleblank=0 debug";
+ bootargs = "earlycon=uart8250,mmio32,0xc0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug";
};

aliases {
diff --git a/arch/arc/include/asm/kgdb.h b/arch/arc/include/asm/kgdb.h
index b65fca7..fea9316 100644
--- a/arch/arc/include/asm/kgdb.h
+++ b/arch/arc/include/asm/kgdb.h
@@ -19,7 +19,7 @@
* register API yet */
#undef DBG_MAX_REG_NUM

-#define GDB_MAX_REGS 39
+#define GDB_MAX_REGS 87

#define BREAK_INSTR_SIZE 2
#define CACHE_FLUSH_IS_SAFE 1
@@ -33,23 +33,27 @@ static inline void arch_kgdb_breakpoint(void)

extern void kgdb_trap(struct pt_regs *regs);

-enum arc700_linux_regnums {
+/* This is the numbering of registers according to the GDB. See GDB's
+ * arc-tdep.h for details.
+ *
+ * Registers are ordered for GDB 7.5. It is incompatible with GDB 6.8. */
+enum arc_linux_regnums {
_R0 = 0,
_R1, _R2, _R3, _R4, _R5, _R6, _R7, _R8, _R9, _R10, _R11, _R12, _R13,
_R14, _R15, _R16, _R17, _R18, _R19, _R20, _R21, _R22, _R23, _R24,
_R25, _R26,
- _BTA = 27,
- _LP_START = 28,
- _LP_END = 29,
- _LP_COUNT = 30,
- _STATUS32 = 31,
- _BLINK = 32,
- _FP = 33,
- __SP = 34,
- _EFA = 35,
- _RET = 36,
- _ORIG_R8 = 37,
- _STOP_PC = 38
+ _FP = 27,
+ __SP = 28,
+ _R30 = 30,
+ _BLINK = 31,
+ _LP_COUNT = 60,
+ _STOP_PC = 64,
+ _RET = 64,
+ _LP_START = 65,
+ _LP_END = 66,
+ _STATUS32 = 67,
+ _ECR = 76,
+ _BTA = 82,
};

#else
diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h
index fda2704..e72289a 100644
--- a/arch/arm64/include/asm/compat.h
+++ b/arch/arm64/include/asm/compat.h
@@ -37,8 +37,8 @@ typedef s32 compat_ssize_t;
typedef s32 compat_time_t;
typedef s32 compat_clock_t;
typedef s32 compat_pid_t;
-typedef u32 __compat_uid_t;
-typedef u32 __compat_gid_t;
+typedef u16 __compat_uid_t;
+typedef u16 __compat_gid_t;
typedef u16 __compat_uid16_t;
typedef u16 __compat_gid16_t;
typedef u32 __compat_uid32_t;
diff --git a/arch/mips/include/asm/ftrace.h b/arch/mips/include/asm/ftrace.h
index ce35c9a..370ae7c 100644
--- a/arch/mips/include/asm/ftrace.h
+++ b/arch/mips/include/asm/ftrace.h
@@ -24,7 +24,7 @@ do { \
asm volatile ( \
"1: " load " %[" STR(dst) "], 0(%[" STR(src) "])\n"\
" li %[" STR(error) "], 0\n" \
- "2:\n" \
+ "2: .insn\n" \
\
".section .fixup, \"ax\"\n" \
"3: li %[" STR(error) "], 1\n" \
@@ -46,7 +46,7 @@ do { \
asm volatile ( \
"1: " store " %[" STR(src) "], 0(%[" STR(dst) "])\n"\
" li %[" STR(error) "], 0\n" \
- "2:\n" \
+ "2: .insn\n" \
\
".section .fixup, \"ax\"\n" \
"3: li %[" STR(error) "], 1\n" \
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index ec90a27..4b95653 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -1057,6 +1057,7 @@ static void build_update_entries(u32 **p, unsigned int tmp, unsigned int ptep)
struct mips_huge_tlb_info {
int huge_pte;
int restore_scratch;
+ bool need_reload_pte;
};

static struct mips_huge_tlb_info
@@ -1071,6 +1072,7 @@ build_fast_tlb_refill_handler (u32 **p, struct uasm_label **l,

rv.huge_pte = scratch;
rv.restore_scratch = 0;
+ rv.need_reload_pte = false;

if (check_for_high_segbits) {
UASM_i_MFC0(p, tmp, C0_BADVADDR);
@@ -1259,6 +1261,7 @@ static void build_r4000_tlb_refill_handler(void)
} else {
htlb_info.huge_pte = K0;
htlb_info.restore_scratch = 0;
+ htlb_info.need_reload_pte = true;
vmalloc_mode = refill_noscratch;
/*
* create the plain linear handler
@@ -1295,7 +1298,8 @@ static void build_r4000_tlb_refill_handler(void)
}
#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
uasm_l_tlb_huge_update(&l, p);
- UASM_i_LW(&p, K0, 0, K1);
+ if (htlb_info.need_reload_pte)
+ UASM_i_LW(&p, htlb_info.huge_pte, 0, K1);
build_huge_update_entries(&p, htlb_info.huge_pte, K1);
build_huge_tlb_write_entry(&p, &l, &r, K0, tlb_random,
htlb_info.restore_scratch);
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 94cd9c1..57cf13a 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -66,6 +66,7 @@ config SPARC64
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_CONTEXT_TRACKING
select HAVE_DEBUG_KMEMLEAK
+ select SPARSE_IRQ
select RTC_DRV_CMOS
select RTC_DRV_BQ4802
select RTC_DRV_SUN4V
diff --git a/arch/sparc/include/asm/hypervisor.h b/arch/sparc/include/asm/hypervisor.h
index ca121f0..17be9d6 100644
--- a/arch/sparc/include/asm/hypervisor.h
+++ b/arch/sparc/include/asm/hypervisor.h
@@ -2944,6 +2944,16 @@ extern unsigned long sun4v_vt_set_perfreg(unsigned long reg_num,
unsigned long reg_val);
#endif

+#define HV_FAST_T5_GET_PERFREG 0x1a8
+#define HV_FAST_T5_SET_PERFREG 0x1a9
+
+#ifndef __ASSEMBLY__
+unsigned long sun4v_t5_get_perfreg(unsigned long reg_num,
+ unsigned long *reg_val);
+unsigned long sun4v_t5_set_perfreg(unsigned long reg_num,
+ unsigned long reg_val);
+#endif
+
/* Function numbers for HV_CORE_TRAP. */
#define HV_CORE_SET_VER 0x00
#define HV_CORE_PUTCHAR 0x01
@@ -2975,6 +2985,7 @@ extern unsigned long sun4v_vt_set_perfreg(unsigned long reg_num,
#define HV_GRP_VF_CPU 0x0205
#define HV_GRP_KT_CPU 0x0209
#define HV_GRP_VT_CPU 0x020c
+#define HV_GRP_T5_CPU 0x0211
#define HV_GRP_DIAG 0x0300

#ifndef __ASSEMBLY__
diff --git a/arch/sparc/include/asm/irq_64.h b/arch/sparc/include/asm/irq_64.h
index abf6afe..3deb07f 100644
--- a/arch/sparc/include/asm/irq_64.h
+++ b/arch/sparc/include/asm/irq_64.h
@@ -37,7 +37,7 @@
*
* ino_bucket->irq allocation is made during {sun4v_,}build_irq().
*/
-#define NR_IRQS 255
+#define NR_IRQS (2048)

extern void irq_install_pre_handler(int irq,
void (*func)(unsigned int, void *, void *),
@@ -57,11 +57,8 @@ extern unsigned int sun4u_build_msi(u32 portid, unsigned int *irq_p,
unsigned long iclr_base);
extern void sun4u_destroy_msi(unsigned int irq);

-extern unsigned char irq_alloc(unsigned int dev_handle,
- unsigned int dev_ino);
-#ifdef CONFIG_PCI_MSI
-extern void irq_free(unsigned int irq);
-#endif
+unsigned int irq_alloc(unsigned int dev_handle, unsigned int dev_ino);
+void irq_free(unsigned int irq);

extern void __init init_IRQ(void);
extern void fixup_irqs(void);
diff --git a/arch/sparc/include/asm/ldc.h b/arch/sparc/include/asm/ldc.h
index bdb524a..8732ed3 100644
--- a/arch/sparc/include/asm/ldc.h
+++ b/arch/sparc/include/asm/ldc.h
@@ -53,13 +53,14 @@ struct ldc_channel;
/* Allocate state for a channel. */
extern struct ldc_channel *ldc_alloc(unsigned long id,
const struct ldc_channel_config *cfgp,
- void *event_arg);
+ void *event_arg,
+ const char *name);

/* Shut down and free state for a channel. */
extern void ldc_free(struct ldc_channel *lp);

/* Register TX and RX queues of the link with the hypervisor. */
-extern int ldc_bind(struct ldc_channel *lp, const char *name);
+extern int ldc_bind(struct ldc_channel *lp);

/* For non-RAW protocols we need to complete a handshake before
* communication can proceed. ldc_connect() does that, if the
diff --git a/arch/sparc/include/asm/oplib_64.h b/arch/sparc/include/asm/oplib_64.h
index a12dbe3..e48fdf4 100644
--- a/arch/sparc/include/asm/oplib_64.h
+++ b/arch/sparc/include/asm/oplib_64.h
@@ -62,7 +62,8 @@ struct linux_mem_p1275 {
/* You must call prom_init() before using any of the library services,
* preferably as early as possible. Pass it the romvec pointer.
*/
-extern void prom_init(void *cif_handler, void *cif_stack);
+extern void prom_init(void *cif_handler);
+extern void prom_init_report(void);

/* Boot argument acquisition, returns the boot command line string. */
extern char *prom_getbootargs(void);
diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h
index aac53fc..b18e602 100644
--- a/arch/sparc/include/asm/page_64.h
+++ b/arch/sparc/include/asm/page_64.h
@@ -57,18 +57,21 @@ extern void copy_user_page(void *to, void *from, unsigned long vaddr, struct pag
typedef struct { unsigned long pte; } pte_t;
typedef struct { unsigned long iopte; } iopte_t;
typedef struct { unsigned long pmd; } pmd_t;
+typedef struct { unsigned long pud; } pud_t;
typedef struct { unsigned long pgd; } pgd_t;
typedef struct { unsigned long pgprot; } pgprot_t;

#define pte_val(x) ((x).pte)
#define iopte_val(x) ((x).iopte)
#define pmd_val(x) ((x).pmd)
+#define pud_val(x) ((x).pud)
#define pgd_val(x) ((x).pgd)
#define pgprot_val(x) ((x).pgprot)

#define __pte(x) ((pte_t) { (x) } )
#define __iopte(x) ((iopte_t) { (x) } )
#define __pmd(x) ((pmd_t) { (x) } )
+#define __pud(x) ((pud_t) { (x) } )
#define __pgd(x) ((pgd_t) { (x) } )
#define __pgprot(x) ((pgprot_t) { (x) } )

@@ -77,18 +80,21 @@ typedef struct { unsigned long pgprot; } pgprot_t;
typedef unsigned long pte_t;
typedef unsigned long iopte_t;
typedef unsigned long pmd_t;
+typedef unsigned long pud_t;
typedef unsigned long pgd_t;
typedef unsigned long pgprot_t;

#define pte_val(x) (x)
#define iopte_val(x) (x)
#define pmd_val(x) (x)
+#define pud_val(x) (x)
#define pgd_val(x) (x)
#define pgprot_val(x) (x)

#define __pte(x) (x)
#define __iopte(x) (x)
#define __pmd(x) (x)
+#define __pud(x) (x)
#define __pgd(x) (x)
#define __pgprot(x) (x)

@@ -96,21 +102,14 @@ typedef unsigned long pgprot_t;

typedef pte_t *pgtable_t;

-/* These two values define the virtual address space range in which we
- * must forbid 64-bit user processes from making mappings. It used to
- * represent precisely the virtual address space hole present in most
- * early sparc64 chips including UltraSPARC-I. But now it also is
- * further constrained by the limits of our page tables, which is
- * 43-bits of virtual address.
- */
-#define SPARC64_VA_HOLE_TOP _AC(0xfffffc0000000000,UL)
-#define SPARC64_VA_HOLE_BOTTOM _AC(0x0000040000000000,UL)
+extern unsigned long sparc64_va_hole_top;
+extern unsigned long sparc64_va_hole_bottom;

/* The next two defines specify the actual exclusion region we
* enforce, wherein we use a 4GB red zone on each side of the VA hole.
*/
-#define VA_EXCLUDE_START (SPARC64_VA_HOLE_BOTTOM - (1UL << 32UL))
-#define VA_EXCLUDE_END (SPARC64_VA_HOLE_TOP + (1UL << 32UL))
+#define VA_EXCLUDE_START (sparc64_va_hole_bottom - (1UL << 32UL))
+#define VA_EXCLUDE_END (sparc64_va_hole_top + (1UL << 32UL))

#define TASK_UNMAPPED_BASE (test_thread_flag(TIF_32BIT) ? \
_AC(0x0000000070000000,UL) : \
@@ -118,20 +117,16 @@ typedef pte_t *pgtable_t;

#include <asm-generic/memory_model.h>

-#define PAGE_OFFSET_BY_BITS(X) (-(_AC(1,UL) << (X)))
extern unsigned long PAGE_OFFSET;

#endif /* !(__ASSEMBLY__) */

-/* The maximum number of physical memory address bits we support, this
- * is used to size various tables used to manage kernel TLB misses and
- * also the sparsemem code.
+/* The maximum number of physical memory address bits we support. The
+ * largest value we can support is whatever "KPGD_SHIFT + KPTE_BITS"
+ * evaluates to.
*/
-#define MAX_PHYS_ADDRESS_BITS 47
+#define MAX_PHYS_ADDRESS_BITS 53

-/* These two shift counts are used when indexing sparc64_valid_addr_bitmap
- * and kpte_linear_bitmap.
- */
#define ILOG2_4MB 22
#define ILOG2_256MB 28

diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h
index bcfe063..2c8d41f 100644
--- a/arch/sparc/include/asm/pgalloc_64.h
+++ b/arch/sparc/include/asm/pgalloc_64.h
@@ -15,6 +15,13 @@

extern struct kmem_cache *pgtable_cache;

+static inline void __pgd_populate(pgd_t *pgd, pud_t *pud)
+{
+ pgd_set(pgd, pud);
+}
+
+#define pgd_populate(MM, PGD, PUD) __pgd_populate(PGD, PUD)
+
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
return kmem_cache_alloc(pgtable_cache, GFP_KERNEL);
@@ -25,7 +32,23 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
kmem_cache_free(pgtable_cache, pgd);
}

-#define pud_populate(MM, PUD, PMD) pud_set(PUD, PMD)
+static inline void __pud_populate(pud_t *pud, pmd_t *pmd)
+{
+ pud_set(pud, pmd);
+}
+
+#define pud_populate(MM, PUD, PMD) __pud_populate(PUD, PMD)
+
+static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+ return kmem_cache_alloc(pgtable_cache,
+ GFP_KERNEL|__GFP_REPEAT);
+}
+
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+{
+ kmem_cache_free(pgtable_cache, pud);
+}

static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
{
@@ -91,4 +114,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pte_t *pte,
#define __pmd_free_tlb(tlb, pmd, addr) \
pgtable_free_tlb(tlb, pmd, false)

+#define __pud_free_tlb(tlb, pud, addr) \
+ pgtable_free_tlb(tlb, pud, false)
+
#endif /* _SPARC64_PGALLOC_H */
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 1a49ffd..e8dfabf 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -20,8 +20,6 @@
#include <asm/page.h>
#include <asm/processor.h>

-#include <asm-generic/pgtable-nopud.h>
-
/* The kernel image occupies 0x4000000 to 0x6000000 (4MB --> 96MB).
* The page copy blockops can use 0x6000000 to 0x8000000.
* The 8K TSB is mapped in the 0x8000000 to 0x8400000 range.
@@ -42,10 +40,7 @@
#define LOW_OBP_ADDRESS _AC(0x00000000f0000000,UL)
#define HI_OBP_ADDRESS _AC(0x0000000100000000,UL)
#define VMALLOC_START _AC(0x0000000100000000,UL)
-#define VMALLOC_END _AC(0x0000010000000000,UL)
-#define VMEMMAP_BASE _AC(0x0000010000000000,UL)
-
-#define vmemmap ((struct page *)VMEMMAP_BASE)
+#define VMEMMAP_BASE VMALLOC_END

/* PMD_SHIFT determines the size of the area a second-level page
* table can map
@@ -55,13 +50,25 @@
#define PMD_MASK (~(PMD_SIZE-1))
#define PMD_BITS (PAGE_SHIFT - 3)

-/* PGDIR_SHIFT determines what a third-level page table entry can map */
-#define PGDIR_SHIFT (PAGE_SHIFT + (PAGE_SHIFT-3) + PMD_BITS)
+/* PUD_SHIFT determines the size of the area a third-level page
+ * table can map
+ */
+#define PUD_SHIFT (PMD_SHIFT + PMD_BITS)
+#define PUD_SIZE (_AC(1,UL) << PUD_SHIFT)
+#define PUD_MASK (~(PUD_SIZE-1))
+#define PUD_BITS (PAGE_SHIFT - 3)
+
+/* PGDIR_SHIFT determines what a fourth-level page table entry can map */
+#define PGDIR_SHIFT (PUD_SHIFT + PUD_BITS)
#define PGDIR_SIZE (_AC(1,UL) << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE-1))
#define PGDIR_BITS (PAGE_SHIFT - 3)

-#if (PGDIR_SHIFT + PGDIR_BITS) != 43
+#if (MAX_PHYS_ADDRESS_BITS > PGDIR_SHIFT + PGDIR_BITS)
+#error MAX_PHYS_ADDRESS_BITS exceeds what kernel page tables can support
+#endif
+
+#if (PGDIR_SHIFT + PGDIR_BITS) != 53
#error Page table parameters do not cover virtual address space properly.
#endif

@@ -71,28 +78,18 @@

#ifndef __ASSEMBLY__

-#include <linux/sched.h>
-
-extern unsigned long sparc64_valid_addr_bitmap[];
+extern unsigned long VMALLOC_END;

-/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
-static inline bool __kern_addr_valid(unsigned long paddr)
-{
- if ((paddr >> MAX_PHYS_ADDRESS_BITS) != 0UL)
- return false;
- return test_bit(paddr >> ILOG2_4MB, sparc64_valid_addr_bitmap);
-}
+#define vmemmap ((struct page *)VMEMMAP_BASE)

-static inline bool kern_addr_valid(unsigned long addr)
-{
- unsigned long paddr = __pa(addr);
+#include <linux/sched.h>

- return __kern_addr_valid(paddr);
-}
+bool kern_addr_valid(unsigned long addr);

/* Entries per page directory level. */
#define PTRS_PER_PTE (1UL << (PAGE_SHIFT-3))
#define PTRS_PER_PMD (1UL << PMD_BITS)
+#define PTRS_PER_PUD (1UL << PUD_BITS)
#define PTRS_PER_PGD (1UL << PGDIR_BITS)

/* Kernel has a separate 44bit address space. */
@@ -101,6 +98,9 @@ static inline bool kern_addr_valid(unsigned long addr)
#define pmd_ERROR(e) \
pr_err("%s:%d: bad pmd %p(%016lx) seen at (%pS)\n", \
__FILE__, __LINE__, &(e), pmd_val(e), __builtin_return_address(0))
+#define pud_ERROR(e) \
+ pr_err("%s:%d: bad pud %p(%016lx) seen at (%pS)\n", \
+ __FILE__, __LINE__, &(e), pud_val(e), __builtin_return_address(0))
#define pgd_ERROR(e) \
pr_err("%s:%d: bad pgd %p(%016lx) seen at (%pS)\n", \
__FILE__, __LINE__, &(e), pgd_val(e), __builtin_return_address(0))
@@ -112,6 +112,7 @@ static inline bool kern_addr_valid(unsigned long addr)
#define _PAGE_R _AC(0x8000000000000000,UL) /* Keep ref bit uptodate*/
#define _PAGE_SPECIAL _AC(0x0200000000000000,UL) /* Special page */
#define _PAGE_PMD_HUGE _AC(0x0100000000000000,UL) /* Huge page */
+#define _PAGE_PUD_HUGE _PAGE_PMD_HUGE

/* Advertise support for _PAGE_SPECIAL */
#define __HAVE_ARCH_PTE_SPECIAL
@@ -658,26 +659,26 @@ static inline unsigned long pmd_large(pmd_t pmd)
return pte_val(pte) & _PAGE_PMD_HUGE;
}

-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-static inline unsigned long pmd_young(pmd_t pmd)
+static inline unsigned long pmd_pfn(pmd_t pmd)
{
pte_t pte = __pte(pmd_val(pmd));

- return pte_young(pte);
+ return pte_pfn(pte);
}

-static inline unsigned long pmd_write(pmd_t pmd)
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline unsigned long pmd_young(pmd_t pmd)
{
pte_t pte = __pte(pmd_val(pmd));

- return pte_write(pte);
+ return pte_young(pte);
}

-static inline unsigned long pmd_pfn(pmd_t pmd)
+static inline unsigned long pmd_write(pmd_t pmd)
{
pte_t pte = __pte(pmd_val(pmd));

- return pte_pfn(pte);
+ return pte_write(pte);
}

static inline unsigned long pmd_trans_huge(pmd_t pmd)
@@ -771,13 +772,15 @@ static inline int pmd_present(pmd_t pmd)
* the top bits outside of the range of any physical address size we
* support are clear as well. We also validate the physical itself.
*/
-#define pmd_bad(pmd) ((pmd_val(pmd) & ~PAGE_MASK) || \
- !__kern_addr_valid(pmd_val(pmd)))
+#define pmd_bad(pmd) (pmd_val(pmd) & ~PAGE_MASK)

#define pud_none(pud) (!pud_val(pud))

-#define pud_bad(pud) ((pud_val(pud) & ~PAGE_MASK) || \
- !__kern_addr_valid(pud_val(pud)))
+#define pud_bad(pud) (pud_val(pud) & ~PAGE_MASK)
+
+#define pgd_none(pgd) (!pgd_val(pgd))
+
+#define pgd_bad(pgd) (pgd_val(pgd) & ~PAGE_MASK)

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
@@ -815,10 +818,31 @@ static inline unsigned long __pmd_page(pmd_t pmd)
#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0UL)
#define pud_present(pud) (pud_val(pud) != 0U)
#define pud_clear(pudp) (pud_val(*(pudp)) = 0UL)
+#define pgd_page_vaddr(pgd) \
+ ((unsigned long) __va(pgd_val(pgd)))
+#define pgd_present(pgd) (pgd_val(pgd) != 0U)
+#define pgd_clear(pgdp) (pgd_val(*(pgd)) = 0UL)
+
+static inline unsigned long pud_large(pud_t pud)
+{
+ pte_t pte = __pte(pud_val(pud));
+
+ return pte_val(pte) & _PAGE_PMD_HUGE;
+}
+
+static inline unsigned long pud_pfn(pud_t pud)
+{
+ pte_t pte = __pte(pud_val(pud));
+
+ return pte_pfn(pte);
+}

/* Same in both SUN4V and SUN4U. */
#define pte_none(pte) (!pte_val(pte))

+#define pgd_set(pgdp, pudp) \
+ (pgd_val(*(pgdp)) = (__pa((unsigned long) (pudp))))
+
/* to find an entry in a page-table-directory. */
#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address))
@@ -826,6 +850,11 @@ static inline unsigned long __pmd_page(pmd_t pmd)
/* to find an entry in a kernel page-table-directory */
#define pgd_offset_k(address) pgd_offset(&init_mm, address)

+/* Find an entry in the third-level page table.. */
+#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
+#define pud_offset(pgdp, address) \
+ ((pud_t *) pgd_page_vaddr(*(pgdp)) + pud_index(address))
+
/* Find an entry in the second-level page table.. */
#define pmd_offset(pudp, address) \
((pmd_t *) pud_page_vaddr(*(pudp)) + \
@@ -898,7 +927,6 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
#endif

extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
-extern pmd_t swapper_low_pmd_dir[PTRS_PER_PMD];

extern void paging_init(void);
extern unsigned long find_ecache_flush_span(unsigned long size);
diff --git a/arch/sparc/include/asm/setup.h b/arch/sparc/include/asm/setup.h
index 5e35e05..acd6146 100644
--- a/arch/sparc/include/asm/setup.h
+++ b/arch/sparc/include/asm/setup.h
@@ -24,6 +24,10 @@ static inline int con_is_present(void)
}
#endif

+#ifdef CONFIG_SPARC64
+extern void __init start_early_boot(void);
+#endif
+
extern void sun_do_break(void);
extern int stop_a_enabled;
extern int scons_pwroff;
diff --git a/arch/sparc/include/asm/spitfire.h b/arch/sparc/include/asm/spitfire.h
index 6b67e50..69424d4 100644
--- a/arch/sparc/include/asm/spitfire.h
+++ b/arch/sparc/include/asm/spitfire.h
@@ -45,6 +45,8 @@
#define SUN4V_CHIP_NIAGARA3 0x03
#define SUN4V_CHIP_NIAGARA4 0x04
#define SUN4V_CHIP_NIAGARA5 0x05
+#define SUN4V_CHIP_SPARC_M6 0x06
+#define SUN4V_CHIP_SPARC_M7 0x07
#define SUN4V_CHIP_SPARC64X 0x8a
#define SUN4V_CHIP_UNKNOWN 0xff

diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h
index a5f01ac..cc6275c 100644
--- a/arch/sparc/include/asm/thread_info_64.h
+++ b/arch/sparc/include/asm/thread_info_64.h
@@ -63,7 +63,8 @@ struct thread_info {
struct pt_regs *kern_una_regs;
unsigned int kern_una_insn;

- unsigned long fpregs[0] __attribute__ ((aligned(64)));
+ unsigned long fpregs[(7 * 256) / sizeof(unsigned long)]
+ __attribute__ ((aligned(64)));
};

#endif /* !(__ASSEMBLY__) */
@@ -102,6 +103,7 @@ struct thread_info {
#define FAULT_CODE_ITLB 0x04 /* Miss happened in I-TLB */
#define FAULT_CODE_WINFIXUP 0x08 /* Miss happened during spill/fill */
#define FAULT_CODE_BLKCOMMIT 0x10 /* Use blk-commit ASI in copy_page */
+#define FAULT_CODE_BAD_RA 0x20 /* Bad RA for sun4v */

#if PAGE_SHIFT == 13
#define THREAD_SIZE (2*PAGE_SIZE)
diff --git a/arch/sparc/include/asm/tsb.h b/arch/sparc/include/asm/tsb.h
index 90916f9..ecb49cf 100644
--- a/arch/sparc/include/asm/tsb.h
+++ b/arch/sparc/include/asm/tsb.h
@@ -133,9 +133,24 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
sub TSB, 0x8, TSB; \
TSB_STORE(TSB, TAG);

- /* Do a kernel page table walk. Leaves physical PTE pointer in
- * REG1. Jumps to FAIL_LABEL on early page table walk termination.
- * VADDR will not be clobbered, but REG2 will.
+ /* Do a kernel page table walk. Leaves valid PTE value in
+ * REG1. Jumps to FAIL_LABEL on early page table walk
+ * termination. VADDR will not be clobbered, but REG2 will.
+ *
+ * There are two masks we must apply to propagate bits from
+ * the virtual address into the PTE physical address field
+ * when dealing with huge pages. This is because the page
+ * table boundaries do not match the huge page size(s) the
+ * hardware supports.
+ *
+ * In these cases we propagate the bits that are below the
+ * page table level where we saw the huge page mapping, but
+ * are still within the relevant physical bits for the huge
+ * page size in question. So for PMD mappings (which fall on
+ * bit 23, for 8MB per PMD) we must propagate bit 22 for a
+ * 4MB huge page. For huge PUDs (which fall on bit 33, for
+ * 8GB per PUD), we have to accomodate 256MB and 2GB huge
+ * pages. So for those we propagate bits 32 to 28.
*/
#define KERN_PGTABLE_WALK(VADDR, REG1, REG2, FAIL_LABEL) \
sethi %hi(swapper_pg_dir), REG1; \
@@ -145,15 +160,40 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
andn REG2, 0x7, REG2; \
ldx [REG1 + REG2], REG1; \
brz,pn REG1, FAIL_LABEL; \
- sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
+ sllx VADDR, 64 - (PUD_SHIFT + PUD_BITS), REG2; \
srlx REG2, 64 - PAGE_SHIFT, REG2; \
andn REG2, 0x7, REG2; \
ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
brz,pn REG1, FAIL_LABEL; \
- sllx VADDR, 64 - PMD_SHIFT, REG2; \
+ sethi %uhi(_PAGE_PUD_HUGE), REG2; \
+ brz,pn REG1, FAIL_LABEL; \
+ sllx REG2, 32, REG2; \
+ andcc REG1, REG2, %g0; \
+ sethi %hi(0xf8000000), REG2; \
+ bne,pt %xcc, 697f; \
+ sllx REG2, 1, REG2; \
+ sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
srlx REG2, 64 - PAGE_SHIFT, REG2; \
andn REG2, 0x7, REG2; \
- add REG1, REG2, REG1;
+ ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
+ sethi %uhi(_PAGE_PMD_HUGE), REG2; \
+ brz,pn REG1, FAIL_LABEL; \
+ sllx REG2, 32, REG2; \
+ andcc REG1, REG2, %g0; \
+ be,pn %xcc, 698f; \
+ sethi %hi(0x400000), REG2; \
+697: brgez,pn REG1, FAIL_LABEL; \
+ andn REG1, REG2, REG1; \
+ and VADDR, REG2, REG2; \
+ ba,pt %xcc, 699f; \
+ or REG1, REG2, REG1; \
+698: sllx VADDR, 64 - PMD_SHIFT, REG2; \
+ srlx REG2, 64 - PAGE_SHIFT, REG2; \
+ andn REG2, 0x7, REG2; \
+ ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
+ brgez,pn REG1, FAIL_LABEL; \
+ nop; \
+699:

/* PMD has been loaded into REG1, interpret the value, seeing
* if it is a HUGE PMD or a normal one. If it is not valid
@@ -198,6 +238,11 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
andn REG2, 0x7, REG2; \
ldxa [PHYS_PGD + REG2] ASI_PHYS_USE_EC, REG1; \
brz,pn REG1, FAIL_LABEL; \
+ sllx VADDR, 64 - (PUD_SHIFT + PUD_BITS), REG2; \
+ srlx REG2, 64 - PAGE_SHIFT, REG2; \
+ andn REG2, 0x7, REG2; \
+ ldxa [REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
+ brz,pn REG1, FAIL_LABEL; \
sllx VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
srlx REG2, 64 - PAGE_SHIFT, REG2; \
andn REG2, 0x7, REG2; \
@@ -246,8 +291,6 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
(KERNEL_TSB_SIZE_BYTES / 16)
#define KERNEL_TSB4M_NENTRIES 4096

-#define KTSB_PHYS_SHIFT 15
-
/* Do a kernel TSB lookup at tl>0 on VADDR+TAG, branch to OK_LABEL
* on TSB hit. REG1, REG2, REG3, and REG4 are used as temporaries
* and the found TTE will be left in REG1. REG3 and REG4 must
@@ -256,17 +299,15 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
* VADDR and TAG will be preserved and not clobbered by this macro.
*/
#define KERN_TSB_LOOKUP_TL1(VADDR, TAG, REG1, REG2, REG3, REG4, OK_LABEL) \
-661: sethi %hi(swapper_tsb), REG1; \
- or REG1, %lo(swapper_tsb), REG1; \
+661: sethi %uhi(swapper_tsb), REG1; \
+ sethi %hi(swapper_tsb), REG2; \
+ or REG1, %ulo(swapper_tsb), REG1; \
+ or REG2, %lo(swapper_tsb), REG2; \
.section .swapper_tsb_phys_patch, "ax"; \
.word 661b; \
.previous; \
-661: nop; \
- .section .tsb_ldquad_phys_patch, "ax"; \
- .word 661b; \
- sllx REG1, KTSB_PHYS_SHIFT, REG1; \
- sllx REG1, KTSB_PHYS_SHIFT, REG1; \
- .previous; \
+ sllx REG1, 32, REG1; \
+ or REG1, REG2, REG1; \
srlx VADDR, PAGE_SHIFT, REG2; \
and REG2, (KERNEL_TSB_NENTRIES - 1), REG2; \
sllx REG2, 4, REG2; \
@@ -281,17 +322,15 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
* we can make use of that for the index computation.
*/
#define KERN_TSB4M_LOOKUP_TL1(TAG, REG1, REG2, REG3, REG4, OK_LABEL) \
-661: sethi %hi(swapper_4m_tsb), REG1; \
- or REG1, %lo(swapper_4m_tsb), REG1; \
+661: sethi %uhi(swapper_4m_tsb), REG1; \
+ sethi %hi(swapper_4m_tsb), REG2; \
+ or REG1, %ulo(swapper_4m_tsb), REG1; \
+ or REG2, %lo(swapper_4m_tsb), REG2; \
.section .swapper_4m_tsb_phys_patch, "ax"; \
.word 661b; \
.previous; \
-661: nop; \
- .section .tsb_ldquad_phys_patch, "ax"; \
- .word 661b; \
- sllx REG1, KTSB_PHYS_SHIFT, REG1; \
- sllx REG1, KTSB_PHYS_SHIFT, REG1; \
- .previous; \
+ sllx REG1, 32, REG1; \
+ or REG1, REG2, REG1; \
and TAG, (KERNEL_TSB4M_NENTRIES - 1), REG2; \
sllx REG2, 4, REG2; \
add REG1, REG2, REG2; \
diff --git a/arch/sparc/include/asm/visasm.h b/arch/sparc/include/asm/visasm.h
index 39ca301..11fdf0e 100644
--- a/arch/sparc/include/asm/visasm.h
+++ b/arch/sparc/include/asm/visasm.h
@@ -39,6 +39,14 @@
297: wr %o5, FPRS_FEF, %fprs; \
298:

+#define VISEntryHalfFast(fail_label) \
+ rd %fprs, %o5; \
+ andcc %o5, FPRS_FEF, %g0; \
+ be,pt %icc, 297f; \
+ nop; \
+ ba,a,pt %xcc, fail_label; \
+297: wr %o5, FPRS_FEF, %fprs;
+
#define VISExitHalf \
wr %o5, 0, %fprs;

diff --git a/arch/sparc/kernel/cpu.c b/arch/sparc/kernel/cpu.c
index 5c51258..52e10de 100644
--- a/arch/sparc/kernel/cpu.c
+++ b/arch/sparc/kernel/cpu.c
@@ -493,6 +493,18 @@ static void __init sun4v_cpu_probe(void)
sparc_pmu_type = "niagara5";
break;

+ case SUN4V_CHIP_SPARC_M6:
+ sparc_cpu_type = "SPARC-M6";
+ sparc_fpu_type = "SPARC-M6 integrated FPU";
+ sparc_pmu_type = "sparc-m6";
+ break;
+
+ case SUN4V_CHIP_SPARC_M7:
+ sparc_cpu_type = "SPARC-M7";
+ sparc_fpu_type = "SPARC-M7 integrated FPU";
+ sparc_pmu_type = "sparc-m7";
+ break;
+
case SUN4V_CHIP_SPARC64X:
sparc_cpu_type = "SPARC64-X";
sparc_fpu_type = "SPARC64-X integrated FPU";
diff --git a/arch/sparc/kernel/cpumap.c b/arch/sparc/kernel/cpumap.c
index cb5d272..b031c9c 100644
--- a/arch/sparc/kernel/cpumap.c
+++ b/arch/sparc/kernel/cpumap.c
@@ -327,6 +327,8 @@ static int iterate_cpu(struct cpuinfo_tree *t, unsigned int root_index)
case SUN4V_CHIP_NIAGARA3:
case SUN4V_CHIP_NIAGARA4:
case SUN4V_CHIP_NIAGARA5:
+ case SUN4V_CHIP_SPARC_M6:
+ case SUN4V_CHIP_SPARC_M7:
case SUN4V_CHIP_SPARC64X:
rover_inc_table = niagara_iterate_method;
break;
diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c
index dff60ab..f87a55d 100644
--- a/arch/sparc/kernel/ds.c
+++ b/arch/sparc/kernel/ds.c
@@ -1200,14 +1200,14 @@ static int ds_probe(struct vio_dev *vdev, const struct vio_device_id *id)
ds_cfg.tx_irq = vdev->tx_irq;
ds_cfg.rx_irq = vdev->rx_irq;

- lp = ldc_alloc(vdev->channel_id, &ds_cfg, dp);
+ lp = ldc_alloc(vdev->channel_id, &ds_cfg, dp, "DS");
if (IS_ERR(lp)) {
err = PTR_ERR(lp);
goto out_free_ds_states;
}
dp->lp = lp;

- err = ldc_bind(lp, "DS");
+ err = ldc_bind(lp);
if (err)
goto out_free_ldc;

diff --git a/arch/sparc/kernel/dtlb_prot.S b/arch/sparc/kernel/dtlb_prot.S
index b2c2c5b..d668ca14 100644
--- a/arch/sparc/kernel/dtlb_prot.S
+++ b/arch/sparc/kernel/dtlb_prot.S
@@ -24,11 +24,11 @@
mov TLB_TAG_ACCESS, %g4 ! For reload of vaddr

/* PROT ** ICACHE line 2: More real fault processing */
+ ldxa [%g4] ASI_DMMU, %g5 ! Put tagaccess in %g5
bgu,pn %xcc, winfix_trampoline ! Yes, perform winfixup
- ldxa [%g4] ASI_DMMU, %g5 ! Put tagaccess in %g5
- ba,pt %xcc, sparc64_realfault_common ! Nope, normal fault
mov FAULT_CODE_DTLB | FAULT_CODE_WRITE, %g4
- nop
+ ba,pt %xcc, sparc64_realfault_common ! Nope, normal fault
+ nop
nop
nop
nop
diff --git a/arch/sparc/kernel/entry.h b/arch/sparc/kernel/entry.h
index 140966f..c88ffb9 100644
--- a/arch/sparc/kernel/entry.h
+++ b/arch/sparc/kernel/entry.h
@@ -66,13 +66,10 @@ struct pause_patch_entry {
extern struct pause_patch_entry __pause_3insn_patch,
__pause_3insn_patch_end;

-extern void __init per_cpu_patch(void);
extern void sun4v_patch_1insn_range(struct sun4v_1insn_patch_entry *,
struct sun4v_1insn_patch_entry *);
extern void sun4v_patch_2insn_range(struct sun4v_2insn_patch_entry *,
struct sun4v_2insn_patch_entry *);
-extern void __init sun4v_patch(void);
-extern void __init boot_cpu_id_too_large(int cpu);
extern unsigned int dcache_parity_tl1_occurred;
extern unsigned int icache_parity_tl1_occurred;

diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index 452f04f..3d61fca 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -427,6 +427,12 @@ sun4v_chip_type:
cmp %g2, '5'
be,pt %xcc, 5f
mov SUN4V_CHIP_NIAGARA5, %g4
+ cmp %g2, '6'
+ be,pt %xcc, 5f
+ mov SUN4V_CHIP_SPARC_M6, %g4
+ cmp %g2, '7'
+ be,pt %xcc, 5f
+ mov SUN4V_CHIP_SPARC_M7, %g4
ba,pt %xcc, 49f
nop

@@ -585,6 +591,12 @@ niagara_tlb_fixup:
cmp %g1, SUN4V_CHIP_NIAGARA5
be,pt %xcc, niagara4_patch
nop
+ cmp %g1, SUN4V_CHIP_SPARC_M6
+ be,pt %xcc, niagara4_patch
+ nop
+ cmp %g1, SUN4V_CHIP_SPARC_M7
+ be,pt %xcc, niagara4_patch
+ nop

call generic_patch_copyops
nop
@@ -660,14 +672,12 @@ tlb_fixup_done:
sethi %hi(init_thread_union), %g6
or %g6, %lo(init_thread_union), %g6
ldx [%g6 + TI_TASK], %g4
- mov %sp, %l6

wr %g0, ASI_P, %asi
mov 1, %g1
sllx %g1, THREAD_SHIFT, %g1
sub %g1, (STACKFRAME_SZ + STACK_BIAS), %g1
add %g6, %g1, %sp
- mov 0, %fp

/* Set per-cpu pointer initially to zero, this makes
* the boot-cpu use the in-kernel-image per-cpu areas
@@ -694,44 +704,14 @@ tlb_fixup_done:
nop
#endif

- mov %l6, %o1 ! OpenPROM stack
call prom_init
mov %l7, %o0 ! OpenPROM cif handler

- /* Initialize current_thread_info()->cpu as early as possible.
- * In order to do that accurately we have to patch up the get_cpuid()
- * assembler sequences. And that, in turn, requires that we know
- * if we are on a Starfire box or not. While we're here, patch up
- * the sun4v sequences as well.
+ /* To create a one-register-window buffer between the kernel's
+ * initial stack and the last stack frame we use from the firmware,
+ * do the rest of the boot from a C helper function.
*/
- call check_if_starfire
- nop
- call per_cpu_patch
- nop
- call sun4v_patch
- nop
-
-#ifdef CONFIG_SMP
- call hard_smp_processor_id
- nop
- cmp %o0, NR_CPUS
- blu,pt %xcc, 1f
- nop
- call boot_cpu_id_too_large
- nop
- /* Not reached... */
-
-1:
-#else
- mov 0, %o0
-#endif
- sth %o0, [%g6 + TI_CPU]
-
- call prom_init_report
- nop
-
- /* Off we go.... */
- call start_kernel
+ call start_early_boot
nop
/* Not reached... */

diff --git a/arch/sparc/kernel/hvapi.c b/arch/sparc/kernel/hvapi.c
index c0a2de0..5c55145 100644
--- a/arch/sparc/kernel/hvapi.c
+++ b/arch/sparc/kernel/hvapi.c
@@ -46,6 +46,7 @@ static struct api_info api_table[] = {
{ .group = HV_GRP_VF_CPU, },
{ .group = HV_GRP_KT_CPU, },
{ .group = HV_GRP_VT_CPU, },
+ { .group = HV_GRP_T5_CPU, },
{ .group = HV_GRP_DIAG, .flags = FLAG_PRE_API },
};

diff --git a/arch/sparc/kernel/hvcalls.S b/arch/sparc/kernel/hvcalls.S
index f3ab509..caedf83 100644
--- a/arch/sparc/kernel/hvcalls.S
+++ b/arch/sparc/kernel/hvcalls.S
@@ -821,3 +821,19 @@ ENTRY(sun4v_vt_set_perfreg)
retl
nop
ENDPROC(sun4v_vt_set_perfreg)
+
+ENTRY(sun4v_t5_get_perfreg)
+ mov %o1, %o4
+ mov HV_FAST_T5_GET_PERFREG, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%o4]
+ retl
+ nop
+ENDPROC(sun4v_t5_get_perfreg)
+
+ENTRY(sun4v_t5_set_perfreg)
+ mov HV_FAST_T5_SET_PERFREG, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_t5_set_perfreg)
diff --git a/arch/sparc/kernel/hvtramp.S b/arch/sparc/kernel/hvtramp.S
index 4eb1a5a..4ad8138 100644
--- a/arch/sparc/kernel/hvtramp.S
+++ b/arch/sparc/kernel/hvtramp.S
@@ -110,7 +110,6 @@ hv_cpu_startup:
sllx %g5, THREAD_SHIFT, %g5
sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5
add %g6, %g5, %sp
- mov 0, %fp

call init_irqwork_curcpu
nop
diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c
index e7e215d..c2d81ad 100644
--- a/arch/sparc/kernel/ioport.c
+++ b/arch/sparc/kernel/ioport.c
@@ -278,7 +278,8 @@ static void *sbus_alloc_coherent(struct device *dev, size_t len,
}

order = get_order(len_total);
- if ((va = __get_free_pages(GFP_KERNEL|__GFP_COMP, order)) == 0)
+ va = __get_free_pages(gfp, order);
+ if (va == 0)
goto err_nopages;

if ((res = kzalloc(sizeof(struct resource), GFP_KERNEL)) == NULL)
@@ -443,7 +444,7 @@ static void *pci32_alloc_coherent(struct device *dev, size_t len,
}

order = get_order(len_total);
- va = (void *) __get_free_pages(GFP_KERNEL, order);
+ va = (void *) __get_free_pages(gfp, order);
if (va == NULL) {
printk("pci_alloc_consistent: no %ld pages\n", len_total>>PAGE_SHIFT);
goto err_nopages;
diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
index 666193f..4033c23 100644
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -47,8 +47,6 @@
#include "cpumap.h"
#include "kstack.h"

-#define NUM_IVECS (IMAP_INR + 1)
-
struct ino_bucket *ivector_table;
unsigned long ivector_table_pa;

@@ -107,55 +105,196 @@ static void bucket_set_irq(unsigned long bucket_pa, unsigned int irq)

#define irq_work_pa(__cpu) &(trap_block[(__cpu)].irq_worklist_pa)

-static struct {
- unsigned int dev_handle;
- unsigned int dev_ino;
- unsigned int in_use;
-} irq_table[NR_IRQS];
-static DEFINE_SPINLOCK(irq_alloc_lock);
+static unsigned long hvirq_major __initdata;
+static int __init early_hvirq_major(char *p)
+{
+ int rc = kstrtoul(p, 10, &hvirq_major);
+
+ return rc;
+}
+early_param("hvirq", early_hvirq_major);
+
+static int hv_irq_version;
+
+/* Major version 2.0 of HV_GRP_INTR added support for the VIRQ cookie
+ * based interfaces, but:
+ *
+ * 1) Several OSs, Solaris and Linux included, use them even when only
+ * negotiating version 1.0 (or failing to negotiate at all). So the
+ * hypervisor has a workaround that provides the VIRQ interfaces even
+ * when only verion 1.0 of the API is in use.
+ *
+ * 2) Second, and more importantly, with major version 2.0 these VIRQ
+ * interfaces only were actually hooked up for LDC interrupts, even
+ * though the Hypervisor specification clearly stated:
+ *
+ * The new interrupt API functions will be available to a guest
+ * when it negotiates version 2.0 in the interrupt API group 0x2. When
+ * a guest negotiates version 2.0, all interrupt sources will only
+ * support using the cookie interface, and any attempt to use the
+ * version 1.0 interrupt APIs numbered 0xa0 to 0xa6 will result in the
+ * ENOTSUPPORTED error being returned.
+ *
+ * with an emphasis on "all interrupt sources".
+ *
+ * To correct this, major version 3.0 was created which does actually
+ * support VIRQs for all interrupt sources (not just LDC devices). So
+ * if we want to move completely over the cookie based VIRQs we must
+ * negotiate major version 3.0 or later of HV_GRP_INTR.
+ */
+static bool sun4v_cookie_only_virqs(void)
+{
+ if (hv_irq_version >= 3)
+ return true;
+ return false;
+}

-unsigned char irq_alloc(unsigned int dev_handle, unsigned int dev_ino)
+static void __init irq_init_hv(void)
{
- unsigned long flags;
- unsigned char ent;
+ unsigned long hv_error, major, minor = 0;
+
+ if (tlb_type != hypervisor)
+ return;

- BUILD_BUG_ON(NR_IRQS >= 256);
+ if (hvirq_major)
+ major = hvirq_major;
+ else
+ major = 3;

- spin_lock_irqsave(&irq_alloc_lock, flags);
+ hv_error = sun4v_hvapi_register(HV_GRP_INTR, major, &minor);
+ if (!hv_error)
+ hv_irq_version = major;
+ else
+ hv_irq_version = 1;

- for (ent = 1; ent < NR_IRQS; ent++) {
- if (!irq_table[ent].in_use)
+ pr_info("SUN4V: Using IRQ API major %d, cookie only virqs %s\n",
+ hv_irq_version,
+ sun4v_cookie_only_virqs() ? "enabled" : "disabled");
+}
+
+/* This function is for the timer interrupt.*/
+int __init arch_probe_nr_irqs(void)
+{
+ return 1;
+}
+
+#define DEFAULT_NUM_IVECS (0xfffU)
+static unsigned int nr_ivec = DEFAULT_NUM_IVECS;
+#define NUM_IVECS (nr_ivec)
+
+static unsigned int __init size_nr_ivec(void)
+{
+ if (tlb_type == hypervisor) {
+ switch (sun4v_chip_type) {
+ /* Athena's devhandle|devino is large.*/
+ case SUN4V_CHIP_SPARC64X:
+ nr_ivec = 0xffff;
break;
+ }
}
- if (ent >= NR_IRQS) {
- printk(KERN_ERR "IRQ: Out of virtual IRQs.\n");
- ent = 0;
- } else {
- irq_table[ent].dev_handle = dev_handle;
- irq_table[ent].dev_ino = dev_ino;
- irq_table[ent].in_use = 1;
- }
+ return nr_ivec;
+}
+
+struct irq_handler_data {
+ union {
+ struct {
+ unsigned int dev_handle;
+ unsigned int dev_ino;
+ };
+ unsigned long sysino;
+ };
+ struct ino_bucket bucket;
+ unsigned long iclr;
+ unsigned long imap;
+};
+
+static inline unsigned int irq_data_to_handle(struct irq_data *data)
+{
+ struct irq_handler_data *ihd = data->handler_data;
+
+ return ihd->dev_handle;
+}
+
+static inline unsigned int irq_data_to_ino(struct irq_data *data)
+{
+ struct irq_handler_data *ihd = data->handler_data;

- spin_unlock_irqrestore(&irq_alloc_lock, flags);
+ return ihd->dev_ino;
+}
+
+static inline unsigned long irq_data_to_sysino(struct irq_data *data)
+{
+ struct irq_handler_data *ihd = data->handler_data;

- return ent;
+ return ihd->sysino;
}

-#ifdef CONFIG_PCI_MSI
void irq_free(unsigned int irq)
{
- unsigned long flags;
+ void *data = irq_get_handler_data(irq);

- if (irq >= NR_IRQS)
- return;
+ kfree(data);
+ irq_set_handler_data(irq, NULL);
+ irq_free_descs(irq, 1);
+}

- spin_lock_irqsave(&irq_alloc_lock, flags);
+unsigned int irq_alloc(unsigned int dev_handle, unsigned int dev_ino)
+{
+ int irq;

- irq_table[irq].in_use = 0;
+ irq = __irq_alloc_descs(-1, 1, 1, numa_node_id(), NULL);
+ if (irq <= 0)
+ goto out;

- spin_unlock_irqrestore(&irq_alloc_lock, flags);
+ return irq;
+out:
+ return 0;
+}
+
+static unsigned int cookie_exists(u32 devhandle, unsigned int devino)
+{
+ unsigned long hv_err, cookie;
+ struct ino_bucket *bucket;
+ unsigned int irq = 0U;
+
+ hv_err = sun4v_vintr_get_cookie(devhandle, devino, &cookie);
+ if (hv_err) {
+ pr_err("HV get cookie failed hv_err = %ld\n", hv_err);
+ goto out;
+ }
+
+ if (cookie & ((1UL << 63UL))) {
+ cookie = ~cookie;
+ bucket = (struct ino_bucket *) __va(cookie);
+ irq = bucket->__irq;
+ }
+out:
+ return irq;
+}
+
+static unsigned int sysino_exists(u32 devhandle, unsigned int devino)
+{
+ unsigned long sysino = sun4v_devino_to_sysino(devhandle, devino);
+ struct ino_bucket *bucket;
+ unsigned int irq;
+
+ bucket = &ivector_table[sysino];
+ irq = bucket_get_irq(__pa(bucket));
+
+ return irq;
+}
+
+void ack_bad_irq(unsigned int irq)
+{
+ pr_crit("BAD IRQ ack %d\n", irq);
+}
+
+void irq_install_pre_handler(int irq,
+ void (*func)(unsigned int, void *, void *),
+ void *arg1, void *arg2)
+{
+ pr_warn("IRQ pre handler NOT supported.\n");
}
-#endif

/*
* /proc/interrupts printing:
@@ -206,15 +345,6 @@ static unsigned int sun4u_compute_tid(unsigned long imap, unsigned long cpuid)
return tid;
}

-struct irq_handler_data {
- unsigned long iclr;
- unsigned long imap;
-
- void (*pre_handler)(unsigned int, void *, void *);
- void *arg1;
- void *arg2;
-};
-
#ifdef CONFIG_SMP
static int irq_choose_cpu(unsigned int irq, const struct cpumask *affinity)
{
@@ -316,8 +446,8 @@ static void sun4u_irq_eoi(struct irq_data *data)

static void sun4v_irq_enable(struct irq_data *data)
{
- unsigned int ino = irq_table[data->irq].dev_ino;
unsigned long cpuid = irq_choose_cpu(data->irq, data->affinity);
+ unsigned int ino = irq_data_to_sysino(data);
int err;

err = sun4v_intr_settarget(ino, cpuid);
@@ -337,8 +467,8 @@ static void sun4v_irq_enable(struct irq_data *data)
static int sun4v_set_affinity(struct irq_data *data,
const struct cpumask *mask, bool force)
{
- unsigned int ino = irq_table[data->irq].dev_ino;
unsigned long cpuid = irq_choose_cpu(data->irq, mask);
+ unsigned int ino = irq_data_to_sysino(data);
int err;

err = sun4v_intr_settarget(ino, cpuid);
@@ -351,7 +481,7 @@ static int sun4v_set_affinity(struct irq_data *data,

static void sun4v_irq_disable(struct irq_data *data)
{
- unsigned int ino = irq_table[data->irq].dev_ino;
+ unsigned int ino = irq_data_to_sysino(data);
int err;

err = sun4v_intr_setenabled(ino, HV_INTR_DISABLED);
@@ -362,7 +492,7 @@ static void sun4v_irq_disable(struct irq_data *data)

static void sun4v_irq_eoi(struct irq_data *data)
{
- unsigned int ino = irq_table[data->irq].dev_ino;
+ unsigned int ino = irq_data_to_sysino(data);
int err;

err = sun4v_intr_setstate(ino, HV_INTR_STATE_IDLE);
@@ -373,14 +503,13 @@ static void sun4v_irq_eoi(struct irq_data *data)

static void sun4v_virq_enable(struct irq_data *data)
{
- unsigned long cpuid, dev_handle, dev_ino;
+ unsigned long dev_handle = irq_data_to_handle(data);
+ unsigned long dev_ino = irq_data_to_ino(data);
+ unsigned long cpuid;
int err;

cpuid = irq_choose_cpu(data->irq, data->affinity);

- dev_handle = irq_table[data->irq].dev_handle;
- dev_ino = irq_table[data->irq].dev_ino;
-
err = sun4v_vintr_set_target(dev_handle, dev_ino, cpuid);
if (err != HV_EOK)
printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): "
@@ -403,14 +532,13 @@ static void sun4v_virq_enable(struct irq_data *data)
static int sun4v_virt_set_affinity(struct irq_data *data,
const struct cpumask *mask, bool force)
{
- unsigned long cpuid, dev_handle, dev_ino;
+ unsigned long dev_handle = irq_data_to_handle(data);
+ unsigned long dev_ino = irq_data_to_ino(data);
+ unsigned long cpuid;
int err;

cpuid = irq_choose_cpu(data->irq, mask);

- dev_handle = irq_table[data->irq].dev_handle;
- dev_ino = irq_table[data->irq].dev_ino;
-
err = sun4v_vintr_set_target(dev_handle, dev_ino, cpuid);
if (err != HV_EOK)
printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): "
@@ -422,11 +550,10 @@ static int sun4v_virt_set_affinity(struct irq_data *data,

static void sun4v_virq_disable(struct irq_data *data)
{
- unsigned long dev_handle, dev_ino;
+ unsigned long dev_handle = irq_data_to_handle(data);
+ unsigned long dev_ino = irq_data_to_ino(data);
int err;

- dev_handle = irq_table[data->irq].dev_handle;
- dev_ino = irq_table[data->irq].dev_ino;

err = sun4v_vintr_set_valid(dev_handle, dev_ino,
HV_INTR_DISABLED);
@@ -438,12 +565,10 @@ static void sun4v_virq_disable(struct irq_data *data)

static void sun4v_virq_eoi(struct irq_data *data)
{
- unsigned long dev_handle, dev_ino;
+ unsigned long dev_handle = irq_data_to_handle(data);
+ unsigned long dev_ino = irq_data_to_ino(data);
int err;

- dev_handle = irq_table[data->irq].dev_handle;
- dev_ino = irq_table[data->irq].dev_ino;
-
err = sun4v_vintr_set_state(dev_handle, dev_ino,
HV_INTR_STATE_IDLE);
if (err != HV_EOK)
@@ -479,31 +604,10 @@ static struct irq_chip sun4v_virq = {
.flags = IRQCHIP_EOI_IF_HANDLED,
};

-static void pre_flow_handler(struct irq_data *d)
-{
- struct irq_handler_data *handler_data = irq_data_get_irq_handler_data(d);
- unsigned int ino = irq_table[d->irq].dev_ino;
-
- handler_data->pre_handler(ino, handler_data->arg1, handler_data->arg2);
-}
-
-void irq_install_pre_handler(int irq,
- void (*func)(unsigned int, void *, void *),
- void *arg1, void *arg2)
-{
- struct irq_handler_data *handler_data = irq_get_handler_data(irq);
-
- handler_data->pre_handler = func;
- handler_data->arg1 = arg1;
- handler_data->arg2 = arg2;
-
- __irq_set_preflow_handler(irq, pre_flow_handler);
-}
-
unsigned int build_irq(int inofixup, unsigned long iclr, unsigned long imap)
{
- struct ino_bucket *bucket;
struct irq_handler_data *handler_data;
+ struct ino_bucket *bucket;
unsigned int irq;
int ino;

@@ -537,119 +641,166 @@ out:
return irq;
}

-static unsigned int sun4v_build_common(unsigned long sysino,
- struct irq_chip *chip)
+static unsigned int sun4v_build_common(u32 devhandle, unsigned int devino,
+ void (*handler_data_init)(struct irq_handler_data *data,
+ u32 devhandle, unsigned int devino),
+ struct irq_chip *chip)
{
- struct ino_bucket *bucket;
- struct irq_handler_data *handler_data;
+ struct irq_handler_data *data;
unsigned int irq;

- BUG_ON(tlb_type != hypervisor);
+ irq = irq_alloc(devhandle, devino);
+ if (!irq)
+ goto out;

- bucket = &ivector_table[sysino];
- irq = bucket_get_irq(__pa(bucket));
- if (!irq) {
- irq = irq_alloc(0, sysino);
- bucket_set_irq(__pa(bucket), irq);
- irq_set_chip_and_handler_name(irq, chip, handle_fasteoi_irq,
- "IVEC");
+ data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC);
+ if (unlikely(!data)) {
+ pr_err("IRQ handler data allocation failed.\n");
+ irq_free(irq);
+ irq = 0;
+ goto out;
}

- handler_data = irq_get_handler_data(irq);
- if (unlikely(handler_data))
- goto out;
+ irq_set_handler_data(irq, data);
+ handler_data_init(data, devhandle, devino);
+ irq_set_chip_and_handler_name(irq, chip, handle_fasteoi_irq, "IVEC");
+ data->imap = ~0UL;
+ data->iclr = ~0UL;
+out:
+ return irq;
+}

- handler_data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC);
- if (unlikely(!handler_data)) {
- prom_printf("IRQ: kzalloc(irq_handler_data) failed.\n");
- prom_halt();
- }
- irq_set_handler_data(irq, handler_data);
+static unsigned long cookie_assign(unsigned int irq, u32 devhandle,
+ unsigned int devino)
+{
+ struct irq_handler_data *ihd = irq_get_handler_data(irq);
+ unsigned long hv_error, cookie;

- /* Catch accidental accesses to these things. IMAP/ICLR handling
- * is done by hypervisor calls on sun4v platforms, not by direct
- * register accesses.
+ /* handler_irq needs to find the irq. cookie is seen signed in
+ * sun4v_dev_mondo and treated as a non ivector_table delivery.
*/
- handler_data->imap = ~0UL;
- handler_data->iclr = ~0UL;
+ ihd->bucket.__irq = irq;
+ cookie = ~__pa(&ihd->bucket);

-out:
- return irq;
+ hv_error = sun4v_vintr_set_cookie(devhandle, devino, cookie);
+ if (hv_error)
+ pr_err("HV vintr set cookie failed = %ld\n", hv_error);
+
+ return hv_error;
}

-unsigned int sun4v_build_irq(u32 devhandle, unsigned int devino)
+static void cookie_handler_data(struct irq_handler_data *data,
+ u32 devhandle, unsigned int devino)
{
- unsigned long sysino = sun4v_devino_to_sysino(devhandle, devino);
+ data->dev_handle = devhandle;
+ data->dev_ino = devino;
+}

- return sun4v_build_common(sysino, &sun4v_irq);
+static unsigned int cookie_build_irq(u32 devhandle, unsigned int devino,
+ struct irq_chip *chip)
+{
+ unsigned long hv_error;
+ unsigned int irq;
+
+ irq = sun4v_build_common(devhandle, devino, cookie_handler_data, chip);
+
+ hv_error = cookie_assign(irq, devhandle, devino);
+ if (hv_error) {
+ irq_free(irq);
+ irq = 0;
+ }
+
+ return irq;
}

-unsigned int sun4v_build_virq(u32 devhandle, unsigned int devino)
+static unsigned int sun4v_build_cookie(u32 devhandle, unsigned int devino)
{
- struct irq_handler_data *handler_data;
- unsigned long hv_err, cookie;
- struct ino_bucket *bucket;
unsigned int irq;

- bucket = kzalloc(sizeof(struct ino_bucket), GFP_ATOMIC);
- if (unlikely(!bucket))
- return 0;
+ irq = cookie_exists(devhandle, devino);
+ if (irq)
+ goto out;

- /* The only reference we store to the IRQ bucket is
- * by physical address which kmemleak can't see, tell
- * it that this object explicitly is not a leak and
- * should be scanned.
- */
- kmemleak_not_leak(bucket);
+ irq = cookie_build_irq(devhandle, devino, &sun4v_virq);

- __flush_dcache_range((unsigned long) bucket,
- ((unsigned long) bucket +
- sizeof(struct ino_bucket)));
+out:
+ return irq;
+}

- irq = irq_alloc(devhandle, devino);
+static void sysino_set_bucket(unsigned int irq)
+{
+ struct irq_handler_data *ihd = irq_get_handler_data(irq);
+ struct ino_bucket *bucket;
+ unsigned long sysino;
+
+ sysino = sun4v_devino_to_sysino(ihd->dev_handle, ihd->dev_ino);
+ BUG_ON(sysino >= nr_ivec);
+ bucket = &ivector_table[sysino];
bucket_set_irq(__pa(bucket), irq);
+}

- irq_set_chip_and_handler_name(irq, &sun4v_virq, handle_fasteoi_irq,
- "IVEC");
+static void sysino_handler_data(struct irq_handler_data *data,
+ u32 devhandle, unsigned int devino)
+{
+ unsigned long sysino;

- handler_data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC);
- if (unlikely(!handler_data))
- return 0;
+ sysino = sun4v_devino_to_sysino(devhandle, devino);
+ data->sysino = sysino;
+}

- /* In order to make the LDC channel startup sequence easier,
- * especially wrt. locking, we do not let request_irq() enable
- * the interrupt.
- */
- irq_set_status_flags(irq, IRQ_NOAUTOEN);
- irq_set_handler_data(irq, handler_data);
+static unsigned int sysino_build_irq(u32 devhandle, unsigned int devino,
+ struct irq_chip *chip)
+{
+ unsigned int irq;

- /* Catch accidental accesses to these things. IMAP/ICLR handling
- * is done by hypervisor calls on sun4v platforms, not by direct
- * register accesses.
- */
- handler_data->imap = ~0UL;
- handler_data->iclr = ~0UL;
+ irq = sun4v_build_common(devhandle, devino, sysino_handler_data, chip);
+ if (!irq)
+ goto out;

- cookie = ~__pa(bucket);
- hv_err = sun4v_vintr_set_cookie(devhandle, devino, cookie);
- if (hv_err) {
- prom_printf("IRQ: Fatal, cannot set cookie for [%x:%x] "
- "err=%lu\n", devhandle, devino, hv_err);
- prom_halt();
- }
+ sysino_set_bucket(irq);
+out:
+ return irq;
+}

+static int sun4v_build_sysino(u32 devhandle, unsigned int devino)
+{
+ int irq;
+
+ irq = sysino_exists(devhandle, devino);
+ if (irq)
+ goto out;
+
+ irq = sysino_build_irq(devhandle, devino, &sun4v_irq);
+out:
return irq;
}

-void ack_bad_irq(unsigned int irq)
+unsigned int sun4v_build_irq(u32 devhandle, unsigned int devino)
{
- unsigned int ino = irq_table[irq].dev_ino;
+ unsigned int irq;

- if (!ino)
- ino = 0xdeadbeef;
+ if (sun4v_cookie_only_virqs())
+ irq = sun4v_build_cookie(devhandle, devino);
+ else
+ irq = sun4v_build_sysino(devhandle, devino);

- printk(KERN_CRIT "Unexpected IRQ from ino[%x] irq[%u]\n",
- ino, irq);
+ return irq;
+}
+
+unsigned int sun4v_build_virq(u32 devhandle, unsigned int devino)
+{
+ int irq;
+
+ irq = cookie_build_irq(devhandle, devino, &sun4v_virq);
+ if (!irq)
+ goto out;
+
+ /* This is borrowed from the original function.
+ */
+ irq_set_status_flags(irq, IRQ_NOAUTOEN);
+
+out:
+ return irq;
}

void *hardirq_stack[NR_CPUS];
@@ -720,9 +871,12 @@ void fixup_irqs(void)

for (irq = 0; irq < NR_IRQS; irq++) {
struct irq_desc *desc = irq_to_desc(irq);
- struct irq_data *data = irq_desc_get_irq_data(desc);
+ struct irq_data *data;
unsigned long flags;

+ if (!desc)
+ continue;
+ data = irq_desc_get_irq_data(desc);
raw_spin_lock_irqsave(&desc->lock, flags);
if (desc->action && !irqd_is_per_cpu(data)) {
if (data->chip->irq_set_affinity)
@@ -922,16 +1076,22 @@ static struct irqaction timer_irq_action = {
.name = "timer",
};

-/* Only invoked on boot processor. */
-void __init init_IRQ(void)
+static void __init irq_ivector_init(void)
{
- unsigned long size;
+ unsigned long size, order;
+ unsigned int ivecs;

- map_prom_timers();
- kill_prom_timer();
+ /* If we are doing cookie only VIRQs then we do not need the ivector
+ * table to process interrupts.
+ */
+ if (sun4v_cookie_only_virqs())
+ return;

- size = sizeof(struct ino_bucket) * NUM_IVECS;
- ivector_table = kzalloc(size, GFP_KERNEL);
+ ivecs = size_nr_ivec();
+ size = sizeof(struct ino_bucket) * ivecs;
+ order = get_order(size);
+ ivector_table = (struct ino_bucket *)
+ __get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
if (!ivector_table) {
prom_printf("Fatal error, cannot allocate ivector_table\n");
prom_halt();
@@ -940,6 +1100,15 @@ void __init init_IRQ(void)
((unsigned long) ivector_table) + size);

ivector_table_pa = __pa(ivector_table);
+}
+
+/* Only invoked on boot processor.*/
+void __init init_IRQ(void)
+{
+ irq_init_hv();
+ irq_ivector_init();
+ map_prom_timers();
+ kill_prom_timer();

if (tlb_type == hypervisor)
sun4v_init_mondo_queues();
diff --git a/arch/sparc/kernel/ktlb.S b/arch/sparc/kernel/ktlb.S
index 605d492..ef0d8e9 100644
--- a/arch/sparc/kernel/ktlb.S
+++ b/arch/sparc/kernel/ktlb.S
@@ -47,14 +47,6 @@ kvmap_itlb_vmalloc_addr:
KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_itlb_longpath)

TSB_LOCK_TAG(%g1, %g2, %g7)
-
- /* Load and check PTE. */
- ldxa [%g5] ASI_PHYS_USE_EC, %g5
- mov 1, %g7
- sllx %g7, TSB_TAG_INVALID_BIT, %g7
- brgez,a,pn %g5, kvmap_itlb_longpath
- TSB_STORE(%g1, %g7)
-
TSB_WRITE(%g1, %g5, %g6)

/* fallthrough to TLB load */
@@ -118,6 +110,12 @@ kvmap_dtlb_obp:
ba,pt %xcc, kvmap_dtlb_load
nop

+kvmap_linear_early:
+ sethi %hi(kern_linear_pte_xor), %g7
+ ldx [%g7 + %lo(kern_linear_pte_xor)], %g2
+ ba,pt %xcc, kvmap_dtlb_tsb4m_load
+ xor %g2, %g4, %g5
+
.align 32
kvmap_dtlb_tsb4m_load:
TSB_LOCK_TAG(%g1, %g2, %g7)
@@ -146,105 +144,17 @@ kvmap_dtlb_4v:
/* Correct TAG_TARGET is already in %g6, check 4mb TSB. */
KERN_TSB4M_LOOKUP_TL1(%g6, %g5, %g1, %g2, %g3, kvmap_dtlb_load)
#endif
- /* TSB entry address left in %g1, lookup linear PTE.
- * Must preserve %g1 and %g6 (TAG).
- */
-kvmap_dtlb_tsb4m_miss:
- /* Clear the PAGE_OFFSET top virtual bits, shift
- * down to get PFN, and make sure PFN is in range.
- */
-661: sllx %g4, 0, %g5
- .section .page_offset_shift_patch, "ax"
- .word 661b
- .previous
-
- /* Check to see if we know about valid memory at the 4MB
- * chunk this physical address will reside within.
+ /* Linear mapping TSB lookup failed. Fallthrough to kernel
+ * page table based lookup.
*/
-661: srlx %g5, MAX_PHYS_ADDRESS_BITS, %g2
- .section .page_offset_shift_patch, "ax"
- .word 661b
- .previous
-
- brnz,pn %g2, kvmap_dtlb_longpath
- nop
-
- /* This unconditional branch and delay-slot nop gets patched
- * by the sethi sequence once the bitmap is properly setup.
- */
- .globl valid_addr_bitmap_insn
-valid_addr_bitmap_insn:
- ba,pt %xcc, 2f
- nop
- .subsection 2
- .globl valid_addr_bitmap_patch
-valid_addr_bitmap_patch:
- sethi %hi(sparc64_valid_addr_bitmap), %g7
- or %g7, %lo(sparc64_valid_addr_bitmap), %g7
- .previous
-
-661: srlx %g5, ILOG2_4MB, %g2
- .section .page_offset_shift_patch, "ax"
- .word 661b
- .previous
-
- srlx %g2, 6, %g5
- and %g2, 63, %g2
- sllx %g5, 3, %g5
- ldx [%g7 + %g5], %g5
- mov 1, %g7
- sllx %g7, %g2, %g7
- andcc %g5, %g7, %g0
- be,pn %xcc, kvmap_dtlb_longpath
-
-2: sethi %hi(kpte_linear_bitmap), %g2
-
- /* Get the 256MB physical address index. */
-661: sllx %g4, 0, %g5
- .section .page_offset_shift_patch, "ax"
- .word 661b
- .previous
-
- or %g2, %lo(kpte_linear_bitmap), %g2
-
-661: srlx %g5, ILOG2_256MB, %g5
- .section .page_offset_shift_patch, "ax"
- .word 661b
- .previous
-
- and %g5, (32 - 1), %g7
-
- /* Divide by 32 to get the offset into the bitmask. */
- srlx %g5, 5, %g5
- add %g7, %g7, %g7
- sllx %g5, 3, %g5
-
- /* kern_linear_pte_xor[(mask >> shift) & 3)] */
- ldx [%g2 + %g5], %g2
- srlx %g2, %g7, %g7
- sethi %hi(kern_linear_pte_xor), %g5
- and %g7, 3, %g7
- or %g5, %lo(kern_linear_pte_xor), %g5
- sllx %g7, 3, %g7
- ldx [%g5 + %g7], %g2
-
.globl kvmap_linear_patch
kvmap_linear_patch:
- ba,pt %xcc, kvmap_dtlb_tsb4m_load
- xor %g2, %g4, %g5
+ ba,a,pt %xcc, kvmap_linear_early

kvmap_dtlb_vmalloc_addr:
KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_dtlb_longpath)

TSB_LOCK_TAG(%g1, %g2, %g7)
-
- /* Load and check PTE. */
- ldxa [%g5] ASI_PHYS_USE_EC, %g5
- mov 1, %g7
- sllx %g7, TSB_TAG_INVALID_BIT, %g7
- brgez,a,pn %g5, kvmap_dtlb_longpath
- TSB_STORE(%g1, %g7)
-
TSB_WRITE(%g1, %g5, %g6)

/* fallthrough to TLB load */
@@ -276,13 +186,8 @@ kvmap_dtlb_load:

#ifdef CONFIG_SPARSEMEM_VMEMMAP
kvmap_vmemmap:
- sub %g4, %g5, %g5
- srlx %g5, ILOG2_4MB, %g5
- sethi %hi(vmemmap_table), %g1
- sllx %g5, 3, %g5
- or %g1, %lo(vmemmap_table), %g1
- ba,pt %xcc, kvmap_dtlb_load
- ldx [%g1 + %g5], %g5
+ KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_dtlb_longpath)
+ ba,a,pt %xcc, kvmap_dtlb_load
#endif

kvmap_dtlb_nonlinear:
@@ -294,8 +199,8 @@ kvmap_dtlb_nonlinear:

#ifdef CONFIG_SPARSEMEM_VMEMMAP
/* Do not use the TSB for vmemmap. */
- mov (VMEMMAP_BASE >> 40), %g5
- sllx %g5, 40, %g5
+ sethi %hi(VMEMMAP_BASE), %g5
+ ldx [%g5 + %lo(VMEMMAP_BASE)], %g5
cmp %g4,%g5
bgeu,pn %xcc, kvmap_vmemmap
nop
@@ -307,8 +212,8 @@ kvmap_dtlb_tsbmiss:
sethi %hi(MODULES_VADDR), %g5
cmp %g4, %g5
blu,pn %xcc, kvmap_dtlb_longpath
- mov (VMALLOC_END >> 40), %g5
- sllx %g5, 40, %g5
+ sethi %hi(VMALLOC_END), %g5
+ ldx [%g5 + %lo(VMALLOC_END)], %g5
cmp %g4, %g5
bgeu,pn %xcc, kvmap_dtlb_longpath
nop
diff --git a/arch/sparc/kernel/ldc.c b/arch/sparc/kernel/ldc.c
index 66dacd5..27bb554 100644
--- a/arch/sparc/kernel/ldc.c
+++ b/arch/sparc/kernel/ldc.c
@@ -1078,7 +1078,8 @@ static void ldc_iommu_release(struct ldc_channel *lp)

struct ldc_channel *ldc_alloc(unsigned long id,
const struct ldc_channel_config *cfgp,
- void *event_arg)
+ void *event_arg,
+ const char *name)
{
struct ldc_channel *lp;
const struct ldc_mode_ops *mops;
@@ -1093,6 +1094,8 @@ struct ldc_channel *ldc_alloc(unsigned long id,
err = -EINVAL;
if (!cfgp)
goto out_err;
+ if (!name)
+ goto out_err;

switch (cfgp->mode) {
case LDC_MODE_RAW:
@@ -1185,6 +1188,21 @@ struct ldc_channel *ldc_alloc(unsigned long id,

INIT_HLIST_HEAD(&lp->mh_list);

+ snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name);
+ snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name);
+
+ err = request_irq(lp->cfg.rx_irq, ldc_rx, 0,
+ lp->rx_irq_name, lp);
+ if (err)
+ goto out_free_txq;
+
+ err = request_irq(lp->cfg.tx_irq, ldc_tx, 0,
+ lp->tx_irq_name, lp);
+ if (err) {
+ free_irq(lp->cfg.rx_irq, lp);
+ goto out_free_txq;
+ }
+
return lp;

out_free_txq:
@@ -1237,31 +1255,14 @@ EXPORT_SYMBOL(ldc_free);
* state. This does not initiate a handshake, ldc_connect() does
* that.
*/
-int ldc_bind(struct ldc_channel *lp, const char *name)
+int ldc_bind(struct ldc_channel *lp)
{
unsigned long hv_err, flags;
int err = -EINVAL;

- if (!name ||
- (lp->state != LDC_STATE_INIT))
+ if (lp->state != LDC_STATE_INIT)
return -EINVAL;

- snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name);
- snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name);
-
- err = request_irq(lp->cfg.rx_irq, ldc_rx, 0,
- lp->rx_irq_name, lp);
- if (err)
- return err;
-
- err = request_irq(lp->cfg.tx_irq, ldc_tx, 0,
- lp->tx_irq_name, lp);
- if (err) {
- free_irq(lp->cfg.rx_irq, lp);
- return err;
- }
-
-
spin_lock_irqsave(&lp->lock, flags);

enable_irq(lp->cfg.rx_irq);
diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c
index 6479256..fce8ab1 100644
--- a/arch/sparc/kernel/nmi.c
+++ b/arch/sparc/kernel/nmi.c
@@ -141,7 +141,6 @@ static inline unsigned int get_nmi_count(int cpu)

static __init void nmi_cpu_busy(void *data)
{
- local_irq_enable_in_hardirq();
while (endflag == 0)
mb();
}
diff --git a/arch/sparc/kernel/pcr.c b/arch/sparc/kernel/pcr.c
index 269af58..7e967c8 100644
--- a/arch/sparc/kernel/pcr.c
+++ b/arch/sparc/kernel/pcr.c
@@ -191,12 +191,41 @@ static const struct pcr_ops n4_pcr_ops = {
.pcr_nmi_disable = PCR_N4_PICNPT,
};

+static u64 n5_pcr_read(unsigned long reg_num)
+{
+ unsigned long val;
+
+ (void) sun4v_t5_get_perfreg(reg_num, &val);
+
+ return val;
+}
+
+static void n5_pcr_write(unsigned long reg_num, u64 val)
+{
+ (void) sun4v_t5_set_perfreg(reg_num, val);
+}
+
+static const struct pcr_ops n5_pcr_ops = {
+ .read_pcr = n5_pcr_read,
+ .write_pcr = n5_pcr_write,
+ .read_pic = n4_pic_read,
+ .write_pic = n4_pic_write,
+ .nmi_picl_value = n4_picl_value,
+ .pcr_nmi_enable = (PCR_N4_PICNPT | PCR_N4_STRACE |
+ PCR_N4_UTRACE | PCR_N4_TOE |
+ (26 << PCR_N4_SL_SHIFT)),
+ .pcr_nmi_disable = PCR_N4_PICNPT,
+};
+
+
static unsigned long perf_hsvc_group;
static unsigned long perf_hsvc_major;
static unsigned long perf_hsvc_minor;

static int __init register_perf_hsvc(void)
{
+ unsigned long hverror;
+
if (tlb_type == hypervisor) {
switch (sun4v_chip_type) {
case SUN4V_CHIP_NIAGARA1:
@@ -215,6 +244,10 @@ static int __init register_perf_hsvc(void)
perf_hsvc_group = HV_GRP_VT_CPU;
break;

+ case SUN4V_CHIP_NIAGARA5:
+ perf_hsvc_group = HV_GRP_T5_CPU;
+ break;
+
default:
return -ENODEV;
}
@@ -222,10 +255,12 @@ static int __init register_perf_hsvc(void)

perf_hsvc_major = 1;
perf_hsvc_minor = 0;
- if (sun4v_hvapi_register(perf_hsvc_group,
- perf_hsvc_major,
- &perf_hsvc_minor)) {
- printk("perfmon: Could not register hvapi.\n");
+ hverror = sun4v_hvapi_register(perf_hsvc_group,
+ perf_hsvc_major,
+ &perf_hsvc_minor);
+ if (hverror) {
+ pr_err("perfmon: Could not register hvapi(0x%lx).\n",
+ hverror);
return -ENODEV;
}
}
@@ -254,6 +289,10 @@ static int __init setup_sun4v_pcr_ops(void)
pcr_ops = &n4_pcr_ops;
break;

+ case SUN4V_CHIP_NIAGARA5:
+ pcr_ops = &n5_pcr_ops;
+ break;
+
default:
ret = -ENODEV;
break;
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index b5c38fa..617b9fe 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -1662,7 +1662,8 @@ static bool __init supported_pmu(void)
sparc_pmu = &niagara2_pmu;
return true;
}
- if (!strcmp(sparc_pmu_type, "niagara4")) {
+ if (!strcmp(sparc_pmu_type, "niagara4") ||
+ !strcmp(sparc_pmu_type, "niagara5")) {
sparc_pmu = &niagara4_pmu;
return true;
}
@@ -1671,9 +1672,12 @@ static bool __init supported_pmu(void)

int __init init_hw_perf_events(void)
{
+ int err;
+
pr_info("Performance events: ");

- if (!supported_pmu()) {
+ err = pcr_arch_init();
+ if (err || !supported_pmu()) {
pr_cont("No support for PMU type '%s'\n", sparc_pmu_type);
return 0;
}
@@ -1685,7 +1689,7 @@ int __init init_hw_perf_events(void)

return 0;
}
-early_initcall(init_hw_perf_events);
+pure_initcall(init_hw_perf_events);

void perf_callchain_kernel(struct perf_callchain_entry *entry,
struct pt_regs *regs)
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index d7b4967..c6f7113 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -306,6 +306,9 @@ static void __global_pmu_self(int this_cpu)
struct global_pmu_snapshot *pp;
int i, num;

+ if (!pcr_ops)
+ return;
+
pp = &global_cpu_snapshot[this_cpu].pmu;

num = 1;
diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c
index 3fdb455..61a5198 100644
--- a/arch/sparc/kernel/setup_64.c
+++ b/arch/sparc/kernel/setup_64.c
@@ -30,6 +30,7 @@
#include <linux/cpu.h>
#include <linux/initrd.h>
#include <linux/module.h>
+#include <linux/start_kernel.h>

#include <asm/io.h>
#include <asm/processor.h>
@@ -174,7 +175,7 @@ char reboot_command[COMMAND_LINE_SIZE];

static struct pt_regs fake_swapper_regs = { { 0, }, 0, 0, 0, 0 };

-void __init per_cpu_patch(void)
+static void __init per_cpu_patch(void)
{
struct cpuid_patch_entry *p;
unsigned long ver;
@@ -266,7 +267,7 @@ void sun4v_patch_2insn_range(struct sun4v_2insn_patch_entry *start,
}
}

-void __init sun4v_patch(void)
+static void __init sun4v_patch(void)
{
extern void sun4v_hvapi_init(void);

@@ -335,14 +336,25 @@ static void __init pause_patch(void)
}
}

-#ifdef CONFIG_SMP
-void __init boot_cpu_id_too_large(int cpu)
+void __init start_early_boot(void)
{
- prom_printf("Serious problem, boot cpu id (%d) >= NR_CPUS (%d)\n",
- cpu, NR_CPUS);
- prom_halt();
+ int cpu;
+
+ check_if_starfire();
+ per_cpu_patch();
+ sun4v_patch();
+
+ cpu = hard_smp_processor_id();
+ if (cpu >= NR_CPUS) {
+ prom_printf("Serious problem, boot cpu id (%d) >= NR_CPUS (%d)\n",
+ cpu, NR_CPUS);
+ prom_halt();
+ }
+ current_thread_info()->cpu = cpu;
+
+ prom_init_report();
+ start_kernel();
}
-#endif

/* On Ultra, we support all of the v8 capabilities. */
unsigned long sparc64_elf_hwcap = (HWCAP_SPARC_FLUSH | HWCAP_SPARC_STBAR |
@@ -500,12 +512,16 @@ static void __init init_sparc64_elf_hwcap(void)
sun4v_chip_type == SUN4V_CHIP_NIAGARA3 ||
sun4v_chip_type == SUN4V_CHIP_NIAGARA4 ||
sun4v_chip_type == SUN4V_CHIP_NIAGARA5 ||
+ sun4v_chip_type == SUN4V_CHIP_SPARC_M6 ||
+ sun4v_chip_type == SUN4V_CHIP_SPARC_M7 ||
sun4v_chip_type == SUN4V_CHIP_SPARC64X)
cap |= HWCAP_SPARC_BLKINIT;
if (sun4v_chip_type == SUN4V_CHIP_NIAGARA2 ||
sun4v_chip_type == SUN4V_CHIP_NIAGARA3 ||
sun4v_chip_type == SUN4V_CHIP_NIAGARA4 ||
sun4v_chip_type == SUN4V_CHIP_NIAGARA5 ||
+ sun4v_chip_type == SUN4V_CHIP_SPARC_M6 ||
+ sun4v_chip_type == SUN4V_CHIP_SPARC_M7 ||
sun4v_chip_type == SUN4V_CHIP_SPARC64X)
cap |= HWCAP_SPARC_N2;
}
@@ -533,6 +549,8 @@ static void __init init_sparc64_elf_hwcap(void)
sun4v_chip_type == SUN4V_CHIP_NIAGARA3 ||
sun4v_chip_type == SUN4V_CHIP_NIAGARA4 ||
sun4v_chip_type == SUN4V_CHIP_NIAGARA5 ||
+ sun4v_chip_type == SUN4V_CHIP_SPARC_M6 ||
+ sun4v_chip_type == SUN4V_CHIP_SPARC_M7 ||
sun4v_chip_type == SUN4V_CHIP_SPARC64X)
cap |= (AV_SPARC_VIS | AV_SPARC_VIS2 |
AV_SPARC_ASI_BLK_INIT |
@@ -540,6 +558,8 @@ static void __init init_sparc64_elf_hwcap(void)
if (sun4v_chip_type == SUN4V_CHIP_NIAGARA3 ||
sun4v_chip_type == SUN4V_CHIP_NIAGARA4 ||
sun4v_chip_type == SUN4V_CHIP_NIAGARA5 ||
+ sun4v_chip_type == SUN4V_CHIP_SPARC_M6 ||
+ sun4v_chip_type == SUN4V_CHIP_SPARC_M7 ||
sun4v_chip_type == SUN4V_CHIP_SPARC64X)
cap |= (AV_SPARC_VIS3 | AV_SPARC_HPC |
AV_SPARC_FMAF);
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 8416d7f..50c3dd03 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1395,7 +1395,6 @@ void __cpu_die(unsigned int cpu)

void __init smp_cpus_done(unsigned int max_cpus)
{
- pcr_arch_init();
}

void smp_send_reschedule(int cpu)
@@ -1480,6 +1479,13 @@ static void __init pcpu_populate_pte(unsigned long addr)
pud_t *pud;
pmd_t *pmd;

+ if (pgd_none(*pgd)) {
+ pud_t *new;
+
+ new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+ pgd_populate(&init_mm, pgd, new);
+ }
+
pud = pud_offset(pgd, addr);
if (pud_none(*pud)) {
pmd_t *new;
diff --git a/arch/sparc/kernel/sun4v_tlb_miss.S b/arch/sparc/kernel/sun4v_tlb_miss.S
index e0c09bf8..6179e19 100644
--- a/arch/sparc/kernel/sun4v_tlb_miss.S
+++ b/arch/sparc/kernel/sun4v_tlb_miss.S
@@ -195,6 +195,11 @@ sun4v_tsb_miss_common:
ldx [%g2 + TRAP_PER_CPU_PGD_PADDR], %g7

sun4v_itlb_error:
+ rdpr %tl, %g1
+ cmp %g1, 1
+ ble,pt %icc, sun4v_bad_ra
+ or %g0, FAULT_CODE_BAD_RA | FAULT_CODE_ITLB, %g1
+
sethi %hi(sun4v_err_itlb_vaddr), %g1
stx %g4, [%g1 + %lo(sun4v_err_itlb_vaddr)]
sethi %hi(sun4v_err_itlb_ctx), %g1
@@ -206,15 +211,10 @@ sun4v_itlb_error:
sethi %hi(sun4v_err_itlb_error), %g1
stx %o0, [%g1 + %lo(sun4v_err_itlb_error)]

+ sethi %hi(1f), %g7
rdpr %tl, %g4
- cmp %g4, 1
- ble,pt %icc, 1f
- sethi %hi(2f), %g7
ba,pt %xcc, etraptl1
- or %g7, %lo(2f), %g7
-
-1: ba,pt %xcc, etrap
-2: or %g7, %lo(2b), %g7
+1: or %g7, %lo(1f), %g7
mov %l4, %o1
call sun4v_itlb_error_report
add %sp, PTREGS_OFF, %o0
@@ -222,6 +222,11 @@ sun4v_itlb_error:
/* NOTREACHED */

sun4v_dtlb_error:
+ rdpr %tl, %g1
+ cmp %g1, 1
+ ble,pt %icc, sun4v_bad_ra
+ or %g0, FAULT_CODE_BAD_RA | FAULT_CODE_DTLB, %g1
+
sethi %hi(sun4v_err_dtlb_vaddr), %g1
stx %g4, [%g1 + %lo(sun4v_err_dtlb_vaddr)]
sethi %hi(sun4v_err_dtlb_ctx), %g1
@@ -233,21 +238,23 @@ sun4v_dtlb_error:
sethi %hi(sun4v_err_dtlb_error), %g1
stx %o0, [%g1 + %lo(sun4v_err_dtlb_error)]

+ sethi %hi(1f), %g7
rdpr %tl, %g4
- cmp %g4, 1
- ble,pt %icc, 1f
- sethi %hi(2f), %g7
ba,pt %xcc, etraptl1
- or %g7, %lo(2f), %g7
-
-1: ba,pt %xcc, etrap
-2: or %g7, %lo(2b), %g7
+1: or %g7, %lo(1f), %g7
mov %l4, %o1
call sun4v_dtlb_error_report
add %sp, PTREGS_OFF, %o0

/* NOTREACHED */

+sun4v_bad_ra:
+ or %g0, %g4, %g5
+ ba,pt %xcc, sparc64_realfault_common
+ or %g1, %g0, %g4
+
+ /* NOTREACHED */
+
/* Instruction Access Exception, tl0. */
sun4v_iacc:
ldxa [%g0] ASI_SCRATCHPAD, %g2
diff --git a/arch/sparc/kernel/trampoline_64.S b/arch/sparc/kernel/trampoline_64.S
index ad4bde3..092a39d 100644
--- a/arch/sparc/kernel/trampoline_64.S
+++ b/arch/sparc/kernel/trampoline_64.S
@@ -110,10 +110,13 @@ startup_continue:
brnz,pn %g1, 1b
nop

- sethi %hi(p1275buf), %g2
- or %g2, %lo(p1275buf), %g2
- ldx [%g2 + 0x10], %l2
- add %l2, -(192 + 128), %sp
+ /* Get onto temporary stack which will be in the locked
+ * kernel image.
+ */
+ sethi %hi(tramp_stack), %g1
+ or %g1, %lo(tramp_stack), %g1
+ add %g1, TRAMP_STACK_SIZE, %g1
+ sub %g1, STACKFRAME_SZ + STACK_BIAS + 256, %sp
flushw

/* Setup the loop variables:
@@ -395,7 +398,6 @@ after_lock_tlb:
sllx %g5, THREAD_SHIFT, %g5
sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5
add %g6, %g5, %sp
- mov 0, %fp

rdpr %pstate, %o1
or %o1, PSTATE_IE, %o1
diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c
index 4ced92f..25d0c7e 100644
--- a/arch/sparc/kernel/traps_64.c
+++ b/arch/sparc/kernel/traps_64.c
@@ -2102,6 +2102,11 @@ void sun4v_nonresum_overflow(struct pt_regs *regs)
atomic_inc(&sun4v_nonresum_oflow_cnt);
}

+static void sun4v_tlb_error(struct pt_regs *regs)
+{
+ die_if_kernel("TLB/TSB error", regs);
+}
+
unsigned long sun4v_err_itlb_vaddr;
unsigned long sun4v_err_itlb_ctx;
unsigned long sun4v_err_itlb_pte;
@@ -2109,8 +2114,7 @@ unsigned long sun4v_err_itlb_error;

void sun4v_itlb_error_report(struct pt_regs *regs, int tl)
{
- if (tl > 1)
- dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+ dump_tl1_traplog((struct tl1_traplog *)(regs + 1));

printk(KERN_EMERG "SUN4V-ITLB: Error at TPC[%lx], tl %d\n",
regs->tpc, tl);
@@ -2123,7 +2127,7 @@ void sun4v_itlb_error_report(struct pt_regs *regs, int tl)
sun4v_err_itlb_vaddr, sun4v_err_itlb_ctx,
sun4v_err_itlb_pte, sun4v_err_itlb_error);

- prom_halt();
+ sun4v_tlb_error(regs);
}

unsigned long sun4v_err_dtlb_vaddr;
@@ -2133,8 +2137,7 @@ unsigned long sun4v_err_dtlb_error;

void sun4v_dtlb_error_report(struct pt_regs *regs, int tl)
{
- if (tl > 1)
- dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
+ dump_tl1_traplog((struct tl1_traplog *)(regs + 1));

printk(KERN_EMERG "SUN4V-DTLB: Error at TPC[%lx], tl %d\n",
regs->tpc, tl);
@@ -2147,7 +2150,7 @@ void sun4v_dtlb_error_report(struct pt_regs *regs, int tl)
sun4v_err_dtlb_vaddr, sun4v_err_dtlb_ctx,
sun4v_err_dtlb_pte, sun4v_err_dtlb_error);

- prom_halt();
+ sun4v_tlb_error(regs);
}

void hypervisor_tlbop_error(unsigned long err, unsigned long op)
diff --git a/arch/sparc/kernel/tsb.S b/arch/sparc/kernel/tsb.S
index 14158d4..be98685 100644
--- a/arch/sparc/kernel/tsb.S
+++ b/arch/sparc/kernel/tsb.S
@@ -162,10 +162,10 @@ tsb_miss_page_table_walk_sun4v_fastpath:
nop
.previous

- rdpr %tl, %g3
- cmp %g3, 1
+ rdpr %tl, %g7
+ cmp %g7, 1
bne,pn %xcc, winfix_trampoline
- nop
+ mov %g3, %g4
ba,pt %xcc, etrap
rd %pc, %g7
call hugetlb_setup
diff --git a/arch/sparc/kernel/viohs.c b/arch/sparc/kernel/viohs.c
index f8e7dd5..9c5fbd0 100644
--- a/arch/sparc/kernel/viohs.c
+++ b/arch/sparc/kernel/viohs.c
@@ -714,7 +714,7 @@ int vio_ldc_alloc(struct vio_driver_state *vio,
cfg.tx_irq = vio->vdev->tx_irq;
cfg.rx_irq = vio->vdev->rx_irq;

- lp = ldc_alloc(vio->vdev->channel_id, &cfg, event_arg);
+ lp = ldc_alloc(vio->vdev->channel_id, &cfg, event_arg, vio->name);
if (IS_ERR(lp))
return PTR_ERR(lp);

@@ -746,7 +746,7 @@ void vio_port_up(struct vio_driver_state *vio)

err = 0;
if (state == LDC_STATE_INIT) {
- err = ldc_bind(vio->lp, vio->name);
+ err = ldc_bind(vio->lp);
if (err)
printk(KERN_WARNING "%s: Port %lu bind failed, "
"err=%d\n",
diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S
index 932ff90..0924305 100644
--- a/arch/sparc/kernel/vmlinux.lds.S
+++ b/arch/sparc/kernel/vmlinux.lds.S
@@ -35,8 +35,9 @@ jiffies = jiffies_64;

SECTIONS
{
- /* swapper_low_pmd_dir is sparc64 only */
- swapper_low_pmd_dir = 0x0000000000402000;
+#ifdef CONFIG_SPARC64
+ swapper_pg_dir = 0x0000000000402000;
+#endif
. = INITIAL_ADDRESS;
.text TEXTSTART :
{
@@ -122,11 +123,6 @@ SECTIONS
*(.swapper_4m_tsb_phys_patch)
__swapper_4m_tsb_phys_patch_end = .;
}
- .page_offset_shift_patch : {
- __page_offset_shift_patch = .;
- *(.page_offset_shift_patch)
- __page_offset_shift_patch_end = .;
- }
.popc_3insn_patch : {
__popc_3insn_patch = .;
*(.popc_3insn_patch)
diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S
index 9cf2ee0..140527a 100644
--- a/arch/sparc/lib/NG4memcpy.S
+++ b/arch/sparc/lib/NG4memcpy.S
@@ -41,6 +41,10 @@
#endif
#endif

+#if !defined(EX_LD) && !defined(EX_ST)
+#define NON_USER_COPY
+#endif
+
#ifndef EX_LD
#define EX_LD(x) x
#endif
@@ -197,9 +201,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
mov EX_RETVAL(%o3), %o0

.Llarge_src_unaligned:
+#ifdef NON_USER_COPY
+ VISEntryHalfFast(.Lmedium_vis_entry_fail)
+#else
+ VISEntryHalf
+#endif
andn %o2, 0x3f, %o4
sub %o2, %o4, %o2
- VISEntryHalf
alignaddr %o1, %g0, %g1
add %o1, %o4, %o1
EX_LD(LOAD(ldd, %g1 + 0x00, %f0))
@@ -240,6 +248,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
nop
ba,a,pt %icc, .Lmedium_unaligned

+#ifdef NON_USER_COPY
+.Lmedium_vis_entry_fail:
+ or %o0, %o1, %g2
+#endif
.Lmedium:
LOAD(prefetch, %o1 + 0x40, #n_reads_strong)
andcc %g2, 0x7, %g0
diff --git a/arch/sparc/lib/memset.S b/arch/sparc/lib/memset.S
index 99c017b..f75e690 100644
--- a/arch/sparc/lib/memset.S
+++ b/arch/sparc/lib/memset.S
@@ -3,8 +3,9 @@
* Copyright (C) 1996,1997 Jakub Jelinek (jj@xxxxxxxxxxxxxxxxxxx)
* Copyright (C) 1996 David S. Miller (davem@xxxxxxxxxxxxxxxx)
*
- * Returns 0, if ok, and number of bytes not yet set if exception
- * occurs and we were called as clear_user.
+ * Calls to memset returns initial %o0. Calls to bzero returns 0, if ok, and
+ * number of bytes not yet set if exception occurs and we were called as
+ * clear_user.
*/

#include <asm/ptrace.h>
@@ -65,6 +66,8 @@ __bzero_begin:
.globl __memset_start, __memset_end
__memset_start:
memset:
+ mov %o0, %g1
+ mov 1, %g4
and %o1, 0xff, %g3
sll %g3, 8, %g2
or %g3, %g2, %g3
@@ -89,6 +92,7 @@ memset:
sub %o0, %o2, %o0

__bzero:
+ clr %g4
mov %g0, %g3
1:
cmp %o1, 7
@@ -151,8 +155,8 @@ __bzero:
bne,a 8f
EX(stb %g3, [%o0], and %o1, 1)
8:
- retl
- clr %o0
+ b 0f
+ nop
7:
be 13b
orcc %o1, 0, %g0
@@ -164,6 +168,12 @@ __bzero:
bne 8b
EX(stb %g3, [%o0 - 1], add %o1, 1)
0:
+ andcc %g4, 1, %g0
+ be 5f
+ nop
+ retl
+ mov %g1, %o0
+5:
retl
clr %o0
__memset_end:
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index 4ced3fc..45a413e 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -348,6 +348,9 @@ retry:
down_read(&mm->mmap_sem);
}

+ if (fault_code & FAULT_CODE_BAD_RA)
+ goto do_sigbus;
+
vma = find_vma(mm, address);
if (!vma)
goto bad_area;
diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c
index 1aed043..ae6ce38 100644
--- a/arch/sparc/mm/gup.c
+++ b/arch/sparc/mm/gup.c
@@ -160,6 +160,36 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
return 1;
}

+int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
+ struct page **pages)
+{
+ struct mm_struct *mm = current->mm;
+ unsigned long addr, len, end;
+ unsigned long next, flags;
+ pgd_t *pgdp;
+ int nr = 0;
+
+ start &= PAGE_MASK;
+ addr = start;
+ len = (unsigned long) nr_pages << PAGE_SHIFT;
+ end = start + len;
+
+ local_irq_save(flags);
+ pgdp = pgd_offset(mm, addr);
+ do {
+ pgd_t pgd = *pgdp;
+
+ next = pgd_addr_end(addr, end);
+ if (pgd_none(pgd))
+ break;
+ if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
+ break;
+ } while (pgdp++, addr = next, addr != end);
+ local_irq_restore(flags);
+
+ return nr;
+}
+
int get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages)
{
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index e275cde..39c52fc 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -73,7 +73,6 @@ unsigned long kern_linear_pte_xor[4] __read_mostly;
* 'cpu' properties, but we need to have this table setup before the
* MDESC is initialized.
*/
-unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)];

#ifndef CONFIG_DEBUG_PAGEALLOC
/* A special kernel TSB for 4MB, 256MB, 2GB and 16GB linear mappings.
@@ -82,10 +81,11 @@ unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)];
*/
extern struct tsb swapper_4m_tsb[KERNEL_TSB4M_NENTRIES];
#endif
+extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES];

static unsigned long cpu_pgsz_mask;

-#define MAX_BANKS 32
+#define MAX_BANKS 1024

static struct linux_prom64_registers pavail[MAX_BANKS];
static int pavail_ents;
@@ -163,10 +163,6 @@ static void __init read_obp_memory(const char *property,
cmp_p64, NULL);
}

-unsigned long sparc64_valid_addr_bitmap[VALID_ADDR_BITMAP_BYTES /
- sizeof(unsigned long)];
-EXPORT_SYMBOL(sparc64_valid_addr_bitmap);
-
/* Kernel physical address base and size in bytes. */
unsigned long kern_base __read_mostly;
unsigned long kern_size __read_mostly;
@@ -838,7 +834,10 @@ static int find_node(unsigned long addr)
if ((addr & p->mask) == p->val)
return i;
}
- return -1;
+ /* The following condition has been observed on LDOM guests.*/
+ WARN_ONCE(1, "find_node: A physical address doesn't match a NUMA node"
+ " rule. Some physical memory will be owned by node 0.");
+ return 0;
}

static u64 memblock_nid_range(u64 start, u64 end, int *nid)
@@ -1359,9 +1358,144 @@ static unsigned long __init bootmem_init(unsigned long phys_base)
static struct linux_prom64_registers pall[MAX_BANKS] __initdata;
static int pall_ents __initdata;

-#ifdef CONFIG_DEBUG_PAGEALLOC
+static unsigned long max_phys_bits = 40;
+
+bool kern_addr_valid(unsigned long addr)
+{
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ if ((long)addr < 0L) {
+ unsigned long pa = __pa(addr);
+
+ if ((addr >> max_phys_bits) != 0UL)
+ return false;
+
+ return pfn_valid(pa >> PAGE_SHIFT);
+ }
+
+ if (addr >= (unsigned long) KERNBASE &&
+ addr < (unsigned long)&_end)
+ return true;
+
+ pgd = pgd_offset_k(addr);
+ if (pgd_none(*pgd))
+ return 0;
+
+ pud = pud_offset(pgd, addr);
+ if (pud_none(*pud))
+ return 0;
+
+ if (pud_large(*pud))
+ return pfn_valid(pud_pfn(*pud));
+
+ pmd = pmd_offset(pud, addr);
+ if (pmd_none(*pmd))
+ return 0;
+
+ if (pmd_large(*pmd))
+ return pfn_valid(pmd_pfn(*pmd));
+
+ pte = pte_offset_kernel(pmd, addr);
+ if (pte_none(*pte))
+ return 0;
+
+ return pfn_valid(pte_pfn(*pte));
+}
+EXPORT_SYMBOL(kern_addr_valid);
+
+static unsigned long __ref kernel_map_hugepud(unsigned long vstart,
+ unsigned long vend,
+ pud_t *pud)
+{
+ const unsigned long mask16gb = (1UL << 34) - 1UL;
+ u64 pte_val = vstart;
+
+ /* Each PUD is 8GB */
+ if ((vstart & mask16gb) ||
+ (vend - vstart <= mask16gb)) {
+ pte_val ^= kern_linear_pte_xor[2];
+ pud_val(*pud) = pte_val | _PAGE_PUD_HUGE;
+
+ return vstart + PUD_SIZE;
+ }
+
+ pte_val ^= kern_linear_pte_xor[3];
+ pte_val |= _PAGE_PUD_HUGE;
+
+ vend = vstart + mask16gb + 1UL;
+ while (vstart < vend) {
+ pud_val(*pud) = pte_val;
+
+ pte_val += PUD_SIZE;
+ vstart += PUD_SIZE;
+ pud++;
+ }
+ return vstart;
+}
+
+static bool kernel_can_map_hugepud(unsigned long vstart, unsigned long vend,
+ bool guard)
+{
+ if (guard && !(vstart & ~PUD_MASK) && (vend - vstart) >= PUD_SIZE)
+ return true;
+
+ return false;
+}
+
+static unsigned long __ref kernel_map_hugepmd(unsigned long vstart,
+ unsigned long vend,
+ pmd_t *pmd)
+{
+ const unsigned long mask256mb = (1UL << 28) - 1UL;
+ const unsigned long mask2gb = (1UL << 31) - 1UL;
+ u64 pte_val = vstart;
+
+ /* Each PMD is 8MB */
+ if ((vstart & mask256mb) ||
+ (vend - vstart <= mask256mb)) {
+ pte_val ^= kern_linear_pte_xor[0];
+ pmd_val(*pmd) = pte_val | _PAGE_PMD_HUGE;
+
+ return vstart + PMD_SIZE;
+ }
+
+ if ((vstart & mask2gb) ||
+ (vend - vstart <= mask2gb)) {
+ pte_val ^= kern_linear_pte_xor[1];
+ pte_val |= _PAGE_PMD_HUGE;
+ vend = vstart + mask256mb + 1UL;
+ } else {
+ pte_val ^= kern_linear_pte_xor[2];
+ pte_val |= _PAGE_PMD_HUGE;
+ vend = vstart + mask2gb + 1UL;
+ }
+
+ while (vstart < vend) {
+ pmd_val(*pmd) = pte_val;
+
+ pte_val += PMD_SIZE;
+ vstart += PMD_SIZE;
+ pmd++;
+ }
+
+ return vstart;
+}
+
+static bool kernel_can_map_hugepmd(unsigned long vstart, unsigned long vend,
+ bool guard)
+{
+ if (guard && !(vstart & ~PMD_MASK) && (vend - vstart) >= PMD_SIZE)
+ return true;
+
+ return false;
+}
+
static unsigned long __ref kernel_map_range(unsigned long pstart,
- unsigned long pend, pgprot_t prot)
+ unsigned long pend, pgprot_t prot,
+ bool use_huge)
{
unsigned long vstart = PAGE_OFFSET + pstart;
unsigned long vend = PAGE_OFFSET + pend;
@@ -1380,19 +1514,34 @@ static unsigned long __ref kernel_map_range(unsigned long pstart,
pmd_t *pmd;
pte_t *pte;

+ if (pgd_none(*pgd)) {
+ pud_t *new;
+
+ new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+ alloc_bytes += PAGE_SIZE;
+ pgd_populate(&init_mm, pgd, new);
+ }
pud = pud_offset(pgd, vstart);
if (pud_none(*pud)) {
pmd_t *new;

+ if (kernel_can_map_hugepud(vstart, vend, use_huge)) {
+ vstart = kernel_map_hugepud(vstart, vend, pud);
+ continue;
+ }
new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
alloc_bytes += PAGE_SIZE;
pud_populate(&init_mm, pud, new);
}

pmd = pmd_offset(pud, vstart);
- if (!pmd_present(*pmd)) {
+ if (pmd_none(*pmd)) {
pte_t *new;

+ if (kernel_can_map_hugepmd(vstart, vend, use_huge)) {
+ vstart = kernel_map_hugepmd(vstart, vend, pmd);
+ continue;
+ }
new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
alloc_bytes += PAGE_SIZE;
pmd_populate_kernel(&init_mm, pmd, new);
@@ -1415,100 +1564,34 @@ static unsigned long __ref kernel_map_range(unsigned long pstart,
return alloc_bytes;
}

-extern unsigned int kvmap_linear_patch[1];
-#endif /* CONFIG_DEBUG_PAGEALLOC */
-
-static void __init kpte_set_val(unsigned long index, unsigned long val)
+static void __init flush_all_kernel_tsbs(void)
{
- unsigned long *ptr = kpte_linear_bitmap;
-
- val <<= ((index % (BITS_PER_LONG / 2)) * 2);
- ptr += (index / (BITS_PER_LONG / 2));
-
- *ptr |= val;
-}
-
-static const unsigned long kpte_shift_min = 28; /* 256MB */
-static const unsigned long kpte_shift_max = 34; /* 16GB */
-static const unsigned long kpte_shift_incr = 3;
-
-static unsigned long kpte_mark_using_shift(unsigned long start, unsigned long end,
- unsigned long shift)
-{
- unsigned long size = (1UL << shift);
- unsigned long mask = (size - 1UL);
- unsigned long remains = end - start;
- unsigned long val;
-
- if (remains < size || (start & mask))
- return start;
-
- /* VAL maps:
- *
- * shift 28 --> kern_linear_pte_xor index 1
- * shift 31 --> kern_linear_pte_xor index 2
- * shift 34 --> kern_linear_pte_xor index 3
- */
- val = ((shift - kpte_shift_min) / kpte_shift_incr) + 1;
-
- remains &= ~mask;
- if (shift != kpte_shift_max)
- remains = size;
-
- while (remains) {
- unsigned long index = start >> kpte_shift_min;
+ int i;

- kpte_set_val(index, val);
+ for (i = 0; i < KERNEL_TSB_NENTRIES; i++) {
+ struct tsb *ent = &swapper_tsb[i];

- start += 1UL << kpte_shift_min;
- remains -= 1UL << kpte_shift_min;
+ ent->tag = (1UL << TSB_TAG_INVALID_BIT);
}
+#ifndef CONFIG_DEBUG_PAGEALLOC
+ for (i = 0; i < KERNEL_TSB4M_NENTRIES; i++) {
+ struct tsb *ent = &swapper_4m_tsb[i];

- return start;
-}
-
-static void __init mark_kpte_bitmap(unsigned long start, unsigned long end)
-{
- unsigned long smallest_size, smallest_mask;
- unsigned long s;
-
- smallest_size = (1UL << kpte_shift_min);
- smallest_mask = (smallest_size - 1UL);
-
- while (start < end) {
- unsigned long orig_start = start;
-
- for (s = kpte_shift_max; s >= kpte_shift_min; s -= kpte_shift_incr) {
- start = kpte_mark_using_shift(start, end, s);
-
- if (start != orig_start)
- break;
- }
-
- if (start == orig_start)
- start = (start + smallest_size) & ~smallest_mask;
+ ent->tag = (1UL << TSB_TAG_INVALID_BIT);
}
+#endif
}

-static void __init init_kpte_bitmap(void)
-{
- unsigned long i;
-
- for (i = 0; i < pall_ents; i++) {
- unsigned long phys_start, phys_end;
-
- phys_start = pall[i].phys_addr;
- phys_end = phys_start + pall[i].reg_size;
-
- mark_kpte_bitmap(phys_start, phys_end);
- }
-}
+extern unsigned int kvmap_linear_patch[1];

static void __init kernel_physical_mapping_init(void)
{
-#ifdef CONFIG_DEBUG_PAGEALLOC
unsigned long i, mem_alloced = 0UL;
+ bool use_huge = true;

+#ifdef CONFIG_DEBUG_PAGEALLOC
+ use_huge = false;
+#endif
for (i = 0; i < pall_ents; i++) {
unsigned long phys_start, phys_end;

@@ -1516,7 +1599,7 @@ static void __init kernel_physical_mapping_init(void)
phys_end = phys_start + pall[i].reg_size;

mem_alloced += kernel_map_range(phys_start, phys_end,
- PAGE_KERNEL);
+ PAGE_KERNEL, use_huge);
}

printk("Allocated %ld bytes for kernel page tables.\n",
@@ -1525,8 +1608,9 @@ static void __init kernel_physical_mapping_init(void)
kvmap_linear_patch[0] = 0x01000000; /* nop */
flushi(&kvmap_linear_patch[0]);

+ flush_all_kernel_tsbs();
+
__flush_tlb_all();
-#endif
}

#ifdef CONFIG_DEBUG_PAGEALLOC
@@ -1536,7 +1620,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
unsigned long phys_end = phys_start + (numpages * PAGE_SIZE);

kernel_map_range(phys_start, phys_end,
- (enable ? PAGE_KERNEL : __pgprot(0)));
+ (enable ? PAGE_KERNEL : __pgprot(0)), false);

flush_tsb_kernel_range(PAGE_OFFSET + phys_start,
PAGE_OFFSET + phys_end);
@@ -1564,76 +1648,56 @@ unsigned long __init find_ecache_flush_span(unsigned long size)
unsigned long PAGE_OFFSET;
EXPORT_SYMBOL(PAGE_OFFSET);

-static void __init page_offset_shift_patch_one(unsigned int *insn, unsigned long phys_bits)
-{
- unsigned long final_shift;
- unsigned int val = *insn;
- unsigned int cnt;
-
- /* We are patching in ilog2(max_supported_phys_address), and
- * we are doing so in a manner similar to a relocation addend.
- * That is, we are adding the shift value to whatever value
- * is in the shift instruction count field already.
- */
- cnt = (val & 0x3f);
- val &= ~0x3f;
-
- /* If we are trying to shift >= 64 bits, clear the destination
- * register. This can happen when phys_bits ends up being equal
- * to MAX_PHYS_ADDRESS_BITS.
- */
- final_shift = (cnt + (64 - phys_bits));
- if (final_shift >= 64) {
- unsigned int rd = (val >> 25) & 0x1f;
-
- val = 0x80100000 | (rd << 25);
- } else {
- val |= final_shift;
- }
- *insn = val;
-
- __asm__ __volatile__("flush %0"
- : /* no outputs */
- : "r" (insn));
-}
-
-static void __init page_offset_shift_patch(unsigned long phys_bits)
-{
- extern unsigned int __page_offset_shift_patch;
- extern unsigned int __page_offset_shift_patch_end;
- unsigned int *p;
-
- p = &__page_offset_shift_patch;
- while (p < &__page_offset_shift_patch_end) {
- unsigned int *insn = (unsigned int *)(unsigned long)*p;
+unsigned long VMALLOC_END = 0x0000010000000000UL;
+EXPORT_SYMBOL(VMALLOC_END);

- page_offset_shift_patch_one(insn, phys_bits);
-
- p++;
- }
-}
+unsigned long sparc64_va_hole_top = 0xfffff80000000000UL;
+unsigned long sparc64_va_hole_bottom = 0x0000080000000000UL;

static void __init setup_page_offset(void)
{
- unsigned long max_phys_bits = 40;
-
if (tlb_type == cheetah || tlb_type == cheetah_plus) {
+ /* Cheetah/Panther support a full 64-bit virtual
+ * address, so we can use all that our page tables
+ * support.
+ */
+ sparc64_va_hole_top = 0xfff0000000000000UL;
+ sparc64_va_hole_bottom = 0x0010000000000000UL;
+
max_phys_bits = 42;
} else if (tlb_type == hypervisor) {
switch (sun4v_chip_type) {
case SUN4V_CHIP_NIAGARA1:
case SUN4V_CHIP_NIAGARA2:
+ /* T1 and T2 support 48-bit virtual addresses. */
+ sparc64_va_hole_top = 0xffff800000000000UL;
+ sparc64_va_hole_bottom = 0x0000800000000000UL;
+
max_phys_bits = 39;
break;
case SUN4V_CHIP_NIAGARA3:
+ /* T3 supports 48-bit virtual addresses. */
+ sparc64_va_hole_top = 0xffff800000000000UL;
+ sparc64_va_hole_bottom = 0x0000800000000000UL;
+
max_phys_bits = 43;
break;
case SUN4V_CHIP_NIAGARA4:
case SUN4V_CHIP_NIAGARA5:
case SUN4V_CHIP_SPARC64X:
- default:
+ case SUN4V_CHIP_SPARC_M6:
+ /* T4 and later support 52-bit virtual addresses. */
+ sparc64_va_hole_top = 0xfff8000000000000UL;
+ sparc64_va_hole_bottom = 0x0008000000000000UL;
max_phys_bits = 47;
break;
+ case SUN4V_CHIP_SPARC_M7:
+ default:
+ /* M7 and later support 52-bit virtual addresses. */
+ sparc64_va_hole_top = 0xfff8000000000000UL;
+ sparc64_va_hole_bottom = 0x0008000000000000UL;
+ max_phys_bits = 49;
+ break;
}
}

@@ -1643,12 +1707,16 @@ static void __init setup_page_offset(void)
prom_halt();
}

- PAGE_OFFSET = PAGE_OFFSET_BY_BITS(max_phys_bits);
+ PAGE_OFFSET = sparc64_va_hole_top;
+ VMALLOC_END = ((sparc64_va_hole_bottom >> 1) +
+ (sparc64_va_hole_bottom >> 2));

- pr_info("PAGE_OFFSET is 0x%016lx (max_phys_bits == %lu)\n",
+ pr_info("MM: PAGE_OFFSET is 0x%016lx (max_phys_bits == %lu)\n",
PAGE_OFFSET, max_phys_bits);
-
- page_offset_shift_patch(max_phys_bits);
+ pr_info("MM: VMALLOC [0x%016lx --> 0x%016lx]\n",
+ VMALLOC_START, VMALLOC_END);
+ pr_info("MM: VMEMMAP [0x%016lx --> 0x%016lx]\n",
+ VMEMMAP_BASE, VMEMMAP_BASE << 1);
}

static void __init tsb_phys_patch(void)
@@ -1693,21 +1761,42 @@ static void __init tsb_phys_patch(void)
#define NUM_KTSB_DESCR 1
#endif
static struct hv_tsb_descr ktsb_descr[NUM_KTSB_DESCR];
-extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES];
+
+/* The swapper TSBs are loaded with a base sequence of:
+ *
+ * sethi %uhi(SYMBOL), REG1
+ * sethi %hi(SYMBOL), REG2
+ * or REG1, %ulo(SYMBOL), REG1
+ * or REG2, %lo(SYMBOL), REG2
+ * sllx REG1, 32, REG1
+ * or REG1, REG2, REG1
+ *
+ * When we use physical addressing for the TSB accesses, we patch the
+ * first four instructions in the above sequence.
+ */

static void patch_one_ktsb_phys(unsigned int *start, unsigned int *end, unsigned long pa)
{
- pa >>= KTSB_PHYS_SHIFT;
+ unsigned long high_bits, low_bits;
+
+ high_bits = (pa >> 32) & 0xffffffff;
+ low_bits = (pa >> 0) & 0xffffffff;

while (start < end) {
unsigned int *ia = (unsigned int *)(unsigned long)*start;

- ia[0] = (ia[0] & ~0x3fffff) | (pa >> 10);
+ ia[0] = (ia[0] & ~0x3fffff) | (high_bits >> 10);
__asm__ __volatile__("flush %0" : : "r" (ia));

- ia[1] = (ia[1] & ~0x3ff) | (pa & 0x3ff);
+ ia[1] = (ia[1] & ~0x3fffff) | (low_bits >> 10);
__asm__ __volatile__("flush %0" : : "r" (ia + 1));

+ ia[2] = (ia[2] & ~0x1fff) | (high_bits & 0x3ff);
+ __asm__ __volatile__("flush %0" : : "r" (ia + 2));
+
+ ia[3] = (ia[3] & ~0x1fff) | (low_bits & 0x3ff);
+ __asm__ __volatile__("flush %0" : : "r" (ia + 3));
+
start++;
}
}
@@ -1846,7 +1935,6 @@ static void __init sun4v_linear_pte_xor_finalize(void)
/* paging_init() sets up the page tables */

static unsigned long last_valid_pfn;
-pgd_t swapper_pg_dir[PTRS_PER_PGD];

static void sun4u_pgprot_init(void);
static void sun4v_pgprot_init(void);
@@ -1949,16 +2037,10 @@ void __init paging_init(void)
*/
init_mm.pgd += ((shift) / (sizeof(pgd_t)));

- memset(swapper_low_pmd_dir, 0, sizeof(swapper_low_pmd_dir));
+ memset(swapper_pg_dir, 0, sizeof(swapper_pg_dir));

- /* Now can init the kernel/bad page tables. */
- pud_set(pud_offset(&swapper_pg_dir[0], 0),
- swapper_low_pmd_dir + (shift / sizeof(pgd_t)));
-
inherit_prom_mappings();

- init_kpte_bitmap();
-
/* Ok, we can use our TLB miss and window trap handlers safely. */
setup_tba();

@@ -2065,70 +2147,6 @@ int page_in_phys_avail(unsigned long paddr)
return 0;
}

-static struct linux_prom64_registers pavail_rescan[MAX_BANKS] __initdata;
-static int pavail_rescan_ents __initdata;
-
-/* Certain OBP calls, such as fetching "available" properties, can
- * claim physical memory. So, along with initializing the valid
- * address bitmap, what we do here is refetch the physical available
- * memory list again, and make sure it provides at least as much
- * memory as 'pavail' does.
- */
-static void __init setup_valid_addr_bitmap_from_pavail(unsigned long *bitmap)
-{
- int i;
-
- read_obp_memory("available", &pavail_rescan[0], &pavail_rescan_ents);
-
- for (i = 0; i < pavail_ents; i++) {
- unsigned long old_start, old_end;
-
- old_start = pavail[i].phys_addr;
- old_end = old_start + pavail[i].reg_size;
- while (old_start < old_end) {
- int n;
-
- for (n = 0; n < pavail_rescan_ents; n++) {
- unsigned long new_start, new_end;
-
- new_start = pavail_rescan[n].phys_addr;
- new_end = new_start +
- pavail_rescan[n].reg_size;
-
- if (new_start <= old_start &&
- new_end >= (old_start + PAGE_SIZE)) {
- set_bit(old_start >> ILOG2_4MB, bitmap);
- goto do_next_page;
- }
- }
-
- prom_printf("mem_init: Lost memory in pavail\n");
- prom_printf("mem_init: OLD start[%lx] size[%lx]\n",
- pavail[i].phys_addr,
- pavail[i].reg_size);
- prom_printf("mem_init: NEW start[%lx] size[%lx]\n",
- pavail_rescan[i].phys_addr,
- pavail_rescan[i].reg_size);
- prom_printf("mem_init: Cannot continue, aborting.\n");
- prom_halt();
-
- do_next_page:
- old_start += PAGE_SIZE;
- }
- }
-}
-
-static void __init patch_tlb_miss_handler_bitmap(void)
-{
- extern unsigned int valid_addr_bitmap_insn[];
- extern unsigned int valid_addr_bitmap_patch[];
-
- valid_addr_bitmap_insn[1] = valid_addr_bitmap_patch[1];
- mb();
- valid_addr_bitmap_insn[0] = valid_addr_bitmap_patch[0];
- flushi(&valid_addr_bitmap_insn[0]);
-}
-
static void __init register_page_bootmem_info(void)
{
#ifdef CONFIG_NEED_MULTIPLE_NODES
@@ -2141,18 +2159,6 @@ static void __init register_page_bootmem_info(void)
}
void __init mem_init(void)
{
- unsigned long addr, last;
-
- addr = PAGE_OFFSET + kern_base;
- last = PAGE_ALIGN(kern_size) + addr;
- while (addr < last) {
- set_bit(__pa(addr) >> ILOG2_4MB, sparc64_valid_addr_bitmap);
- addr += PAGE_SIZE;
- }
-
- setup_valid_addr_bitmap_from_pavail(sparc64_valid_addr_bitmap);
- patch_tlb_miss_handler_bitmap();
-
high_memory = __va(last_valid_pfn << PAGE_SHIFT);

register_page_bootmem_info();
@@ -2242,18 +2248,9 @@ unsigned long _PAGE_CACHE __read_mostly;
EXPORT_SYMBOL(_PAGE_CACHE);

#ifdef CONFIG_SPARSEMEM_VMEMMAP
-unsigned long vmemmap_table[VMEMMAP_SIZE];
-
-static long __meminitdata addr_start, addr_end;
-static int __meminitdata node_start;
-
int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend,
int node)
{
- unsigned long phys_start = (vstart - VMEMMAP_BASE);
- unsigned long phys_end = (vend - VMEMMAP_BASE);
- unsigned long addr = phys_start & VMEMMAP_CHUNK_MASK;
- unsigned long end = VMEMMAP_ALIGN(phys_end);
unsigned long pte_base;

pte_base = (_PAGE_VALID | _PAGE_SZ4MB_4U |
@@ -2264,47 +2261,52 @@ int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend,
_PAGE_CP_4V | _PAGE_CV_4V |
_PAGE_P_4V | _PAGE_W_4V);

- for (; addr < end; addr += VMEMMAP_CHUNK) {
- unsigned long *vmem_pp =
- vmemmap_table + (addr >> VMEMMAP_CHUNK_SHIFT);
- void *block;
+ pte_base |= _PAGE_PMD_HUGE;

- if (!(*vmem_pp & _PAGE_VALID)) {
- block = vmemmap_alloc_block(1UL << ILOG2_4MB, node);
- if (!block)
+ vstart = vstart & PMD_MASK;
+ vend = ALIGN(vend, PMD_SIZE);
+ for (; vstart < vend; vstart += PMD_SIZE) {
+ pgd_t *pgd = pgd_offset_k(vstart);
+ unsigned long pte;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ if (pgd_none(*pgd)) {
+ pud_t *new = vmemmap_alloc_block(PAGE_SIZE, node);
+
+ if (!new)
return -ENOMEM;
+ pgd_populate(&init_mm, pgd, new);
+ }

- *vmem_pp = pte_base | __pa(block);
+ pud = pud_offset(pgd, vstart);
+ if (pud_none(*pud)) {
+ pmd_t *new = vmemmap_alloc_block(PAGE_SIZE, node);

- /* check to see if we have contiguous blocks */
- if (addr_end != addr || node_start != node) {
- if (addr_start)
- printk(KERN_DEBUG " [%lx-%lx] on node %d\n",
- addr_start, addr_end-1, node_start);
- addr_start = addr;
- node_start = node;
- }
- addr_end = addr + VMEMMAP_CHUNK;
+ if (!new)
+ return -ENOMEM;
+ pud_populate(&init_mm, pud, new);
}
- }
- return 0;
-}

-void __meminit vmemmap_populate_print_last(void)
-{
- if (addr_start) {
- printk(KERN_DEBUG " [%lx-%lx] on node %d\n",
- addr_start, addr_end-1, node_start);
- addr_start = 0;
- addr_end = 0;
- node_start = 0;
+ pmd = pmd_offset(pud, vstart);
+
+ pte = pmd_val(*pmd);
+ if (!(pte & _PAGE_VALID)) {
+ void *block = vmemmap_alloc_block(PMD_SIZE, node);
+
+ if (!block)
+ return -ENOMEM;
+
+ pmd_val(*pmd) = pte_base | __pa(block);
+ }
}
+
+ return 0;
}

void vmemmap_free(unsigned long start, unsigned long end)
{
}
-
#endif /* CONFIG_SPARSEMEM_VMEMMAP */

static void prot_init_common(unsigned long page_none,
@@ -2716,8 +2718,8 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
do_flush_tlb_kernel_range(start, LOW_OBP_ADDRESS);
}
if (end > HI_OBP_ADDRESS) {
- flush_tsb_kernel_range(end, HI_OBP_ADDRESS);
- do_flush_tlb_kernel_range(end, HI_OBP_ADDRESS);
+ flush_tsb_kernel_range(HI_OBP_ADDRESS, end);
+ do_flush_tlb_kernel_range(HI_OBP_ADDRESS, end);
}
} else {
flush_tsb_kernel_range(start, end);
diff --git a/arch/sparc/mm/init_64.h b/arch/sparc/mm/init_64.h
index 5d3782de..ac49119 100644
--- a/arch/sparc/mm/init_64.h
+++ b/arch/sparc/mm/init_64.h
@@ -8,15 +8,8 @@
*/

#define MAX_PHYS_ADDRESS (1UL << MAX_PHYS_ADDRESS_BITS)
-#define KPTE_BITMAP_CHUNK_SZ (256UL * 1024UL * 1024UL)
-#define KPTE_BITMAP_BYTES \
- ((MAX_PHYS_ADDRESS / KPTE_BITMAP_CHUNK_SZ) / 4)
-#define VALID_ADDR_BITMAP_CHUNK_SZ (4UL * 1024UL * 1024UL)
-#define VALID_ADDR_BITMAP_BYTES \
- ((MAX_PHYS_ADDRESS / VALID_ADDR_BITMAP_CHUNK_SZ) / 8)

extern unsigned long kern_linear_pte_xor[4];
-extern unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)];
extern unsigned int sparc64_highest_unlocked_tlb_ent;
extern unsigned long sparc64_kern_pri_context;
extern unsigned long sparc64_kern_pri_nuc_bits;
@@ -38,15 +31,4 @@ extern unsigned long kern_locked_tte_data;

extern void prom_world(int enter);

-#ifdef CONFIG_SPARSEMEM_VMEMMAP
-#define VMEMMAP_CHUNK_SHIFT 22
-#define VMEMMAP_CHUNK (1UL << VMEMMAP_CHUNK_SHIFT)
-#define VMEMMAP_CHUNK_MASK ~(VMEMMAP_CHUNK - 1UL)
-#define VMEMMAP_ALIGN(x) (((x)+VMEMMAP_CHUNK-1UL)&VMEMMAP_CHUNK_MASK)
-
-#define VMEMMAP_SIZE ((((1UL << MAX_PHYSADDR_BITS) >> PAGE_SHIFT) * \
- sizeof(struct page)) >> VMEMMAP_CHUNK_SHIFT)
-extern unsigned long vmemmap_table[VMEMMAP_SIZE];
-#endif
-
#endif /* _SPARC64_MM_INIT_H */
diff --git a/arch/sparc/power/hibernate_asm.S b/arch/sparc/power/hibernate_asm.S
index 7994216..d7d9017 100644
--- a/arch/sparc/power/hibernate_asm.S
+++ b/arch/sparc/power/hibernate_asm.S
@@ -54,8 +54,8 @@ ENTRY(swsusp_arch_resume)
nop

/* Write PAGE_OFFSET to %g7 */
- sethi %uhi(PAGE_OFFSET), %g7
- sllx %g7, 32, %g7
+ sethi %hi(PAGE_OFFSET), %g7
+ ldx [%g7 + %lo(PAGE_OFFSET)], %g7

setuw (PAGE_SIZE-8), %g3

diff --git a/arch/sparc/prom/bootstr_64.c b/arch/sparc/prom/bootstr_64.c
index ab9ccc6..7149e77 100644
--- a/arch/sparc/prom/bootstr_64.c
+++ b/arch/sparc/prom/bootstr_64.c
@@ -14,7 +14,10 @@
* the .bss section or it will break things.
*/

-#define BARG_LEN 256
+/* We limit BARG_LEN to 1024 because this is the size of the
+ * 'barg_out' command line buffer in the SILO bootloader.
+ */
+#define BARG_LEN 1024
struct {
int bootstr_len;
int bootstr_valid;
diff --git a/arch/sparc/prom/cif.S b/arch/sparc/prom/cif.S
index 9c86b4b..8050f38 100644
--- a/arch/sparc/prom/cif.S
+++ b/arch/sparc/prom/cif.S
@@ -11,11 +11,10 @@
.text
.globl prom_cif_direct
prom_cif_direct:
+ save %sp, -192, %sp
sethi %hi(p1275buf), %o1
or %o1, %lo(p1275buf), %o1
- ldx [%o1 + 0x0010], %o2 ! prom_cif_stack
- save %o2, -192, %sp
- ldx [%i1 + 0x0008], %l2 ! prom_cif_handler
+ ldx [%o1 + 0x0008], %l2 ! prom_cif_handler
mov %g4, %l0
mov %g5, %l1
mov %g6, %l3
diff --git a/arch/sparc/prom/init_64.c b/arch/sparc/prom/init_64.c
index d95db75..110b0d7 100644
--- a/arch/sparc/prom/init_64.c
+++ b/arch/sparc/prom/init_64.c
@@ -26,13 +26,13 @@ phandle prom_chosen_node;
* It gets passed the pointer to the PROM vector.
*/

-extern void prom_cif_init(void *, void *);
+extern void prom_cif_init(void *);

-void __init prom_init(void *cif_handler, void *cif_stack)
+void __init prom_init(void *cif_handler)
{
phandle node;

- prom_cif_init(cif_handler, cif_stack);
+ prom_cif_init(cif_handler);

prom_chosen_node = prom_finddevice(prom_chosen_path);
if (!prom_chosen_node || (s32)prom_chosen_node == -1)
diff --git a/arch/sparc/prom/p1275.c b/arch/sparc/prom/p1275.c
index 04a4540..fda23e6 100644
--- a/arch/sparc/prom/p1275.c
+++ b/arch/sparc/prom/p1275.c
@@ -10,6 +10,7 @@
#include <linux/smp.h>
#include <linux/string.h>
#include <linux/spinlock.h>
+#include <linux/irqflags.h>

#include <asm/openprom.h>
#include <asm/oplib.h>
@@ -20,7 +21,6 @@
struct {
long prom_callback; /* 0x00 */
void (*prom_cif_handler)(long *); /* 0x08 */
- unsigned long prom_cif_stack; /* 0x10 */
} p1275buf;

extern void prom_world(int);
@@ -37,8 +37,8 @@ void p1275_cmd_direct(unsigned long *args)
{
unsigned long flags;

- raw_local_save_flags(flags);
- raw_local_irq_restore((unsigned long)PIL_NMI);
+ local_save_flags(flags);
+ local_irq_restore((unsigned long)PIL_NMI);
raw_spin_lock(&prom_entry_lock);

prom_world(1);
@@ -46,11 +46,10 @@ void p1275_cmd_direct(unsigned long *args)
prom_world(0);

raw_spin_unlock(&prom_entry_lock);
- raw_local_irq_restore(flags);
+ local_irq_restore(flags);
}

void prom_cif_init(void *cif_handler, void *cif_stack)
{
p1275buf.prom_cif_handler = (void (*)(long *))cif_handler;
- p1275buf.prom_cif_stack = (unsigned long)cif_stack;
}
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 711de08..92a2e93 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -157,7 +157,7 @@ ENTRY(ia32_sysenter_target)
* ourselves. To save a few cycles, we can check whether
* NT was set instead of doing an unconditional popfq.
*/
- testl $X86_EFLAGS_NT,EFLAGS(%rsp) /* saved EFLAGS match cpu */
+ testl $X86_EFLAGS_NT,EFLAGS-ARGOFFSET(%rsp)
jnz sysenter_fix_flags
sysenter_flags_fixed:

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1ee50a4..dba6f79 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -981,6 +981,20 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
}

+static inline u64 get_canonical(u64 la)
+{
+ return ((int64_t)la << 16) >> 16;
+}
+
+static inline bool is_noncanonical_address(u64 la)
+{
+#ifdef CONFIG_X86_64
+ return get_canonical(la) != la;
+#else
+ return false;
+#endif
+}
+
#define TSS_IOPB_BASE_OFFSET 0x66
#define TSS_BASE_SIZE 0x68
#define TSS_IOPB_SIZE (65536 / 8)
@@ -1039,7 +1053,7 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
void kvm_vcpu_reset(struct kvm_vcpu *vcpu);

void kvm_define_shared_msr(unsigned index, u32 msr);
-void kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
+int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);

bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);

diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index 0e79420..990a2fe 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -67,6 +67,7 @@
#define EXIT_REASON_EPT_MISCONFIG 49
#define EXIT_REASON_INVEPT 50
#define EXIT_REASON_PREEMPTION_TIMER 52
+#define EXIT_REASON_INVVPID 53
#define EXIT_REASON_WBINVD 54
#define EXIT_REASON_XSETBV 55
#define EXIT_REASON_APIC_WRITE 56
@@ -114,6 +115,7 @@
{ EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \
{ EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \
{ EXIT_REASON_INVD, "INVD" }, \
+ { EXIT_REASON_INVVPID, "INVVPID" }, \
{ EXIT_REASON_INVPCID, "INVPCID" }

#endif /* _UAPIVMX_H */
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index d278736..f9e7786 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1283,7 +1283,7 @@ void setup_local_APIC(void)
unsigned int value, queued;
int i, j, acked = 0;
unsigned long long tsc = 0, ntsc;
- long long max_loops = cpu_khz;
+ long long max_loops = cpu_khz ? cpu_khz : 1000000;

if (cpu_has_tsc)
rdtscll(tsc);
@@ -1380,7 +1380,7 @@ void setup_local_APIC(void)
break;
}
if (queued) {
- if (cpu_has_tsc) {
+ if (cpu_has_tsc && cpu_khz) {
rdtscll(ntsc);
max_loops = (cpu_khz << 10) - (ntsc - tsc);
} else
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 66af9af..dffebdd 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -974,14 +974,17 @@ void __init tsc_init(void)

x86_init.timers.tsc_pre_init();

- if (!cpu_has_tsc)
+ if (!cpu_has_tsc) {
+ setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
return;
+ }

tsc_khz = x86_platform.calibrate_tsc();
cpu_khz = tsc_khz;

if (!tsc_khz) {
mark_tsc_unstable("could not calculate TSC khz");
+ setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
return;
}

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 7bff3e2..4ae37e7 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -498,11 +498,6 @@ static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc);
}

-static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
-{
- register_address_increment(ctxt, &ctxt->_eip, rel);
-}
-
static u32 desc_limit_scaled(struct desc_struct *desc)
{
u32 limit = get_desc_limit(desc);
@@ -576,6 +571,40 @@ static int emulate_nm(struct x86_emulate_ctxt *ctxt)
return emulate_exception(ctxt, NM_VECTOR, 0, false);
}

+static inline int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
+ int cs_l)
+{
+ switch (ctxt->op_bytes) {
+ case 2:
+ ctxt->_eip = (u16)dst;
+ break;
+ case 4:
+ ctxt->_eip = (u32)dst;
+ break;
+#ifdef CONFIG_X86_64
+ case 8:
+ if ((cs_l && is_noncanonical_address(dst)) ||
+ (!cs_l && (dst >> 32) != 0))
+ return emulate_gp(ctxt, 0);
+ ctxt->_eip = dst;
+ break;
+#endif
+ default:
+ WARN(1, "unsupported eip assignment size\n");
+ }
+ return X86EMUL_CONTINUE;
+}
+
+static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
+{
+ return assign_eip_far(ctxt, dst, ctxt->mode == X86EMUL_MODE_PROT64);
+}
+
+static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
+{
+ return assign_eip_near(ctxt, ctxt->_eip + rel);
+}
+
static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
{
u16 selector;
@@ -1409,11 +1438,12 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
}

/* Does not support long mode */
-static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
- u16 selector, int seg)
+static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
+ u16 selector, int seg, u8 cpl,
+ struct desc_struct *desc)
{
struct desc_struct seg_desc, old_desc;
- u8 dpl, rpl, cpl;
+ u8 dpl, rpl;
unsigned err_vec = GP_VECTOR;
u32 err_code = 0;
bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
@@ -1441,7 +1471,6 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
}

rpl = selector & 3;
- cpl = ctxt->ops->cpl(ctxt);

/* NULL selector is not valid for TR, CS and SS (except for long mode) */
if ((seg == VCPU_SREG_CS
@@ -1537,12 +1566,21 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
}
load:
ctxt->ops->set_segment(ctxt, selector, &seg_desc, 0, seg);
+ if (desc)
+ *desc = seg_desc;
return X86EMUL_CONTINUE;
exception:
emulate_exception(ctxt, err_vec, err_code, true);
return X86EMUL_PROPAGATE_FAULT;
}

+static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
+ u16 selector, int seg)
+{
+ u8 cpl = ctxt->ops->cpl(ctxt);
+ return __load_segment_descriptor(ctxt, selector, seg, cpl, NULL);
+}
+
static void write_register_operand(struct operand *op)
{
/* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
@@ -1937,17 +1975,31 @@ static int em_iret(struct x86_emulate_ctxt *ctxt)
static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
{
int rc;
- unsigned short sel;
+ unsigned short sel, old_sel;
+ struct desc_struct old_desc, new_desc;
+ const struct x86_emulate_ops *ops = ctxt->ops;
+ u8 cpl = ctxt->ops->cpl(ctxt);
+
+ /* Assignment of RIP may only fail in 64-bit mode */
+ if (ctxt->mode == X86EMUL_MODE_PROT64)
+ ops->get_segment(ctxt, &old_sel, &old_desc, NULL,
+ VCPU_SREG_CS);

memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);

- rc = load_segment_descriptor(ctxt, sel, VCPU_SREG_CS);
+ rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
+ &new_desc);
if (rc != X86EMUL_CONTINUE)
return rc;

- ctxt->_eip = 0;
- memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes);
- return X86EMUL_CONTINUE;
+ rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l);
+ if (rc != X86EMUL_CONTINUE) {
+ WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64);
+ /* assigning eip failed; restore the old cs */
+ ops->set_segment(ctxt, old_sel, &old_desc, 0, VCPU_SREG_CS);
+ return rc;
+ }
+ return rc;
}

static int em_grp45(struct x86_emulate_ctxt *ctxt)
@@ -1958,13 +2010,15 @@ static int em_grp45(struct x86_emulate_ctxt *ctxt)
case 2: /* call near abs */ {
long int old_eip;
old_eip = ctxt->_eip;
- ctxt->_eip = ctxt->src.val;
+ rc = assign_eip_near(ctxt, ctxt->src.val);
+ if (rc != X86EMUL_CONTINUE)
+ break;
ctxt->src.val = old_eip;
rc = em_push(ctxt);
break;
}
case 4: /* jmp abs */
- ctxt->_eip = ctxt->src.val;
+ rc = assign_eip_near(ctxt, ctxt->src.val);
break;
case 5: /* jmp far */
rc = em_jmp_far(ctxt);
@@ -1996,30 +2050,47 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)

static int em_ret(struct x86_emulate_ctxt *ctxt)
{
- ctxt->dst.type = OP_REG;
- ctxt->dst.addr.reg = &ctxt->_eip;
- ctxt->dst.bytes = ctxt->op_bytes;
- return em_pop(ctxt);
+ int rc;
+ unsigned long eip;
+
+ rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+
+ return assign_eip_near(ctxt, eip);
}

static int em_ret_far(struct x86_emulate_ctxt *ctxt)
{
int rc;
- unsigned long cs;
+ unsigned long eip, cs;
+ u16 old_cs;
int cpl = ctxt->ops->cpl(ctxt);
+ struct desc_struct old_desc, new_desc;
+ const struct x86_emulate_ops *ops = ctxt->ops;

- rc = emulate_pop(ctxt, &ctxt->_eip, ctxt->op_bytes);
+ if (ctxt->mode == X86EMUL_MODE_PROT64)
+ ops->get_segment(ctxt, &old_cs, &old_desc, NULL,
+ VCPU_SREG_CS);
+
+ rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
if (rc != X86EMUL_CONTINUE)
return rc;
- if (ctxt->op_bytes == 4)
- ctxt->_eip = (u32)ctxt->_eip;
rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
if (rc != X86EMUL_CONTINUE)
return rc;
/* Outer-privilege level return is not implemented */
if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl)
return X86EMUL_UNHANDLEABLE;
- rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS);
+ rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, 0,
+ &new_desc);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+ rc = assign_eip_far(ctxt, eip, new_desc.l);
+ if (rc != X86EMUL_CONTINUE) {
+ WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64);
+ ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
+ }
return rc;
}

@@ -2277,7 +2348,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
{
const struct x86_emulate_ops *ops = ctxt->ops;
struct desc_struct cs, ss;
- u64 msr_data;
+ u64 msr_data, rcx, rdx;
int usermode;
u16 cs_sel = 0, ss_sel = 0;

@@ -2293,6 +2364,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
else
usermode = X86EMUL_MODE_PROT32;

+ rcx = reg_read(ctxt, VCPU_REGS_RCX);
+ rdx = reg_read(ctxt, VCPU_REGS_RDX);
+
cs.dpl = 3;
ss.dpl = 3;
ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
@@ -2310,6 +2384,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
ss_sel = cs_sel + 8;
cs.d = 0;
cs.l = 1;
+ if (is_noncanonical_address(rcx) ||
+ is_noncanonical_address(rdx))
+ return emulate_gp(ctxt, 0);
break;
}
cs_sel |= SELECTOR_RPL_MASK;
@@ -2318,8 +2395,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);

- ctxt->_eip = reg_read(ctxt, VCPU_REGS_RDX);
- *reg_write(ctxt, VCPU_REGS_RSP) = reg_read(ctxt, VCPU_REGS_RCX);
+ ctxt->_eip = rdx;
+ *reg_write(ctxt, VCPU_REGS_RSP) = rcx;

return X86EMUL_CONTINUE;
}
@@ -2408,6 +2485,7 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
struct tss_segment_16 *tss)
{
int ret;
+ u8 cpl;

ctxt->_eip = tss->ip;
ctxt->eflags = tss->flag | 2;
@@ -2430,23 +2508,30 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
set_segment_selector(ctxt, tss->ss, VCPU_SREG_SS);
set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);

+ cpl = tss->cs & 3;
+
/*
* Now load segment descriptors. If fault happens at this stage
* it is handled in a context of new task
*/
- ret = load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR);
+ ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl,
+ NULL);
if (ret != X86EMUL_CONTINUE)
return ret;
- ret = load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES);
+ ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
+ NULL);
if (ret != X86EMUL_CONTINUE)
return ret;
- ret = load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS);
+ ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
+ NULL);
if (ret != X86EMUL_CONTINUE)
return ret;
- ret = load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS);
+ ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
+ NULL);
if (ret != X86EMUL_CONTINUE)
return ret;
- ret = load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS);
+ ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
+ NULL);
if (ret != X86EMUL_CONTINUE)
return ret;

@@ -2525,6 +2610,7 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
struct tss_segment_32 *tss)
{
int ret;
+ u8 cpl;

if (ctxt->ops->set_cr(ctxt, 3, tss->cr3))
return emulate_gp(ctxt, 0);
@@ -2543,7 +2629,8 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,

/*
* SDM says that segment selectors are loaded before segment
- * descriptors
+ * descriptors. This is important because CPL checks will
+ * use CS.RPL.
*/
set_segment_selector(ctxt, tss->ldt_selector, VCPU_SREG_LDTR);
set_segment_selector(ctxt, tss->es, VCPU_SREG_ES);
@@ -2557,43 +2644,45 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
* If we're switching between Protected Mode and VM86, we need to make
* sure to update the mode before loading the segment descriptors so
* that the selectors are interpreted correctly.
- *
- * Need to get rflags to the vcpu struct immediately because it
- * influences the CPL which is checked at least when loading the segment
- * descriptors and when pushing an error code to the new kernel stack.
- *
- * TODO Introduce a separate ctxt->ops->set_cpl callback
*/
- if (ctxt->eflags & X86_EFLAGS_VM)
+ if (ctxt->eflags & X86_EFLAGS_VM) {
ctxt->mode = X86EMUL_MODE_VM86;
- else
+ cpl = 3;
+ } else {
ctxt->mode = X86EMUL_MODE_PROT32;
-
- ctxt->ops->set_rflags(ctxt, ctxt->eflags);
+ cpl = tss->cs & 3;
+ }

/*
* Now load segment descriptors. If fault happenes at this stage
* it is handled in a context of new task
*/
- ret = load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR);
+ ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR,
+ cpl, NULL);
if (ret != X86EMUL_CONTINUE)
return ret;
- ret = load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES);
+ ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl,
+ NULL);
if (ret != X86EMUL_CONTINUE)
return ret;
- ret = load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS);
+ ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl,
+ NULL);
if (ret != X86EMUL_CONTINUE)
return ret;
- ret = load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS);
+ ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl,
+ NULL);
if (ret != X86EMUL_CONTINUE)
return ret;
- ret = load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS);
+ ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl,
+ NULL);
if (ret != X86EMUL_CONTINUE)
return ret;
- ret = load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS);
+ ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl,
+ NULL);
if (ret != X86EMUL_CONTINUE)
return ret;
- ret = load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS);
+ ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl,
+ NULL);
if (ret != X86EMUL_CONTINUE)
return ret;

@@ -2858,10 +2947,13 @@ static int em_aad(struct x86_emulate_ctxt *ctxt)

static int em_call(struct x86_emulate_ctxt *ctxt)
{
+ int rc;
long rel = ctxt->src.val;

ctxt->src.val = (unsigned long)ctxt->_eip;
- jmp_rel(ctxt, rel);
+ rc = jmp_rel(ctxt, rel);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
return em_push(ctxt);
}

@@ -2870,34 +2962,50 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt)
u16 sel, old_cs;
ulong old_eip;
int rc;
+ struct desc_struct old_desc, new_desc;
+ const struct x86_emulate_ops *ops = ctxt->ops;
+ int cpl = ctxt->ops->cpl(ctxt);

- old_cs = get_segment_selector(ctxt, VCPU_SREG_CS);
old_eip = ctxt->_eip;
+ ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS);

memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
- if (load_segment_descriptor(ctxt, sel, VCPU_SREG_CS))
+ rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl,
+ &new_desc);
+ if (rc != X86EMUL_CONTINUE)
return X86EMUL_CONTINUE;

- ctxt->_eip = 0;
- memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes);
+ rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l);
+ if (rc != X86EMUL_CONTINUE)
+ goto fail;

ctxt->src.val = old_cs;
rc = em_push(ctxt);
if (rc != X86EMUL_CONTINUE)
- return rc;
+ goto fail;

ctxt->src.val = old_eip;
- return em_push(ctxt);
+ rc = em_push(ctxt);
+ /* If we failed, we tainted the memory, but the very least we should
+ restore cs */
+ if (rc != X86EMUL_CONTINUE)
+ goto fail;
+ return rc;
+fail:
+ ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
+ return rc;
+
}

static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
{
int rc;
+ unsigned long eip;

- ctxt->dst.type = OP_REG;
- ctxt->dst.addr.reg = &ctxt->_eip;
- ctxt->dst.bytes = ctxt->op_bytes;
- rc = emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes);
+ rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+ rc = assign_eip_near(ctxt, eip);
if (rc != X86EMUL_CONTINUE)
return rc;
rsp_increment(ctxt, ctxt->src.val);
@@ -3227,20 +3335,24 @@ static int em_lmsw(struct x86_emulate_ctxt *ctxt)

static int em_loop(struct x86_emulate_ctxt *ctxt)
{
+ int rc = X86EMUL_CONTINUE;
+
register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1);
if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) &&
(ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
- jmp_rel(ctxt, ctxt->src.val);
+ rc = jmp_rel(ctxt, ctxt->src.val);

- return X86EMUL_CONTINUE;
+ return rc;
}

static int em_jcxz(struct x86_emulate_ctxt *ctxt)
{
+ int rc = X86EMUL_CONTINUE;
+
if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0)
- jmp_rel(ctxt, ctxt->src.val);
+ rc = jmp_rel(ctxt, ctxt->src.val);

- return X86EMUL_CONTINUE;
+ return rc;
}

static int em_in(struct x86_emulate_ctxt *ctxt)
@@ -4637,7 +4749,7 @@ special_insn:
break;
case 0x70 ... 0x7f: /* jcc (short) */
if (test_cc(ctxt->b, ctxt->eflags))
- jmp_rel(ctxt, ctxt->src.val);
+ rc = jmp_rel(ctxt, ctxt->src.val);
break;
case 0x8d: /* lea r16/r32, m */
ctxt->dst.val = ctxt->src.addr.mem.ea;
@@ -4666,7 +4778,7 @@ special_insn:
break;
case 0xe9: /* jmp rel */
case 0xeb: /* jmp rel short */
- jmp_rel(ctxt, ctxt->src.val);
+ rc = jmp_rel(ctxt, ctxt->src.val);
ctxt->dst.type = OP_NONE; /* Disable writeback. */
break;
case 0xf4: /* hlt */
@@ -4786,7 +4898,7 @@ twobyte_insn:
break;
case 0x80 ... 0x8f: /* jnz rel, etc*/
if (test_cc(ctxt->b, ctxt->eflags))
- jmp_rel(ctxt, ctxt->src.val);
+ rc = jmp_rel(ctxt, ctxt->src.val);
break;
case 0x90 ... 0x9f: /* setcc r/m8 */
ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 518d864..298781d 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -262,8 +262,10 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
return;

timer = &pit->pit_state.timer;
+ mutex_lock(&pit->pit_state.lock);
if (hrtimer_cancel(timer))
hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
+ mutex_unlock(&pit->pit_state.lock);
}

static void destroy_pit_timer(struct kvm_pit *pit)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 532add1..1f5faa5 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3200,7 +3200,7 @@ static int wrmsr_interception(struct vcpu_svm *svm)
msr.host_initiated = false;

svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
- if (svm_set_msr(&svm->vcpu, &msr)) {
+ if (kvm_set_msr(&svm->vcpu, &msr)) {
trace_kvm_msr_write_ex(ecx, data);
kvm_inject_gp(&svm->vcpu, 0);
} else {
@@ -3482,9 +3482,9 @@ static int handle_exit(struct kvm_vcpu *vcpu)

if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
|| !svm_exit_handlers[exit_code]) {
- kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
- kvm_run->hw.hardware_exit_reason = exit_code;
- return 0;
+ WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_code);
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return 1;
}

return svm_exit_handlers[exit_code](svm);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c11b1ad..f5c384c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2546,12 +2546,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
msr = find_msr_entry(vmx, msr_index);
if (msr) {
+ u64 old_msr_data = msr->data;
msr->data = data;
if (msr - vmx->guest_msrs < vmx->save_nmsrs) {
preempt_disable();
- kvm_set_shared_msr(msr->index, msr->data,
- msr->mask);
+ ret = kvm_set_shared_msr(msr->index, msr->data,
+ msr->mask);
preempt_enable();
+ if (ret)
+ msr->data = old_msr_data;
}
break;
}
@@ -5135,7 +5138,7 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu)
msr.data = data;
msr.index = ecx;
msr.host_initiated = false;
- if (vmx_set_msr(vcpu, &msr) != 0) {
+ if (kvm_set_msr(vcpu, &msr) != 0) {
trace_kvm_msr_write_ex(ecx, data);
kvm_inject_gp(vcpu, 0);
return 1;
@@ -6407,6 +6410,12 @@ static int handle_invept(struct kvm_vcpu *vcpu)
return 1;
}

+static int handle_invvpid(struct kvm_vcpu *vcpu)
+{
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return 1;
+}
+
/*
* The exit handlers return 1 if the exit was handled fully and guest execution
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
@@ -6452,6 +6461,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[EXIT_REASON_MWAIT_INSTRUCTION] = handle_invalid_op,
[EXIT_REASON_MONITOR_INSTRUCTION] = handle_invalid_op,
[EXIT_REASON_INVEPT] = handle_invept,
+ [EXIT_REASON_INVVPID] = handle_invvpid,
};

static const int kvm_vmx_max_exit_handlers =
@@ -6681,7 +6691,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD:
case EXIT_REASON_VMRESUME: case EXIT_REASON_VMWRITE:
case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
- case EXIT_REASON_INVEPT:
+ case EXIT_REASON_INVEPT: case EXIT_REASON_INVVPID:
/*
* VMX instructions trap unconditionally. This allows L1 to
* emulate them for its L2 guest, i.e., allows 3-level nesting!
@@ -6844,10 +6854,10 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
&& kvm_vmx_exit_handlers[exit_reason])
return kvm_vmx_exit_handlers[exit_reason](vcpu);
else {
- vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
- vcpu->run->hw.hardware_exit_reason = exit_reason;
+ WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_reason);
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return 1;
}
- return 0;
}

static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4e33b85..450c5c5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -225,20 +225,25 @@ static void kvm_shared_msr_cpu_online(void)
shared_msr_update(i, shared_msrs_global.msrs[i]);
}

-void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
+int kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
{
unsigned int cpu = smp_processor_id();
struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
+ int err;

if (((value ^ smsr->values[slot].curr) & mask) == 0)
- return;
+ return 0;
smsr->values[slot].curr = value;
- wrmsrl(shared_msrs_global.msrs[slot], value);
+ err = wrmsrl_safe(shared_msrs_global.msrs[slot], value);
+ if (err)
+ return 1;
+
if (!smsr->registered) {
smsr->urn.on_user_return = kvm_on_user_return;
user_return_notifier_register(&smsr->urn);
smsr->registered = true;
}
+ return 0;
}
EXPORT_SYMBOL_GPL(kvm_set_shared_msr);

@@ -919,7 +924,6 @@ void kvm_enable_efer_bits(u64 mask)
}
EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);

-
/*
* Writes msr value into into the appropriate "register".
* Returns 0 on success, non-0 otherwise.
@@ -927,8 +931,34 @@ EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
*/
int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
{
+ switch (msr->index) {
+ case MSR_FS_BASE:
+ case MSR_GS_BASE:
+ case MSR_KERNEL_GS_BASE:
+ case MSR_CSTAR:
+ case MSR_LSTAR:
+ if (is_noncanonical_address(msr->data))
+ return 1;
+ break;
+ case MSR_IA32_SYSENTER_EIP:
+ case MSR_IA32_SYSENTER_ESP:
+ /*
+ * IA32_SYSENTER_ESP and IA32_SYSENTER_EIP cause #GP if
+ * non-canonical address is written on Intel but not on
+ * AMD (which ignores the top 32-bits, because it does
+ * not implement 64-bit SYSENTER).
+ *
+ * 64-bit code should hence be able to write a non-canonical
+ * value on AMD. Making the address canonical ensures that
+ * vmentry does not fail on Intel after writing a non-canonical
+ * value, and that something deterministic happens if the guest
+ * invokes 64-bit SYSENTER.
+ */
+ msr->data = get_canonical(msr->data);
+ }
return kvm_x86_ops->set_msr(vcpu, msr);
}
+EXPORT_SYMBOL_GPL(kvm_set_msr);

/*
* Adapt set_msr() to msr_io()'s calling convention
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index bb32480..aabdf76 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -389,7 +389,7 @@ phys_addr_t slow_virt_to_phys(void *__virt_addr)
psize = page_level_size(level);
pmask = page_level_mask(level);
offset = virt_addr & ~pmask;
- phys_addr = pte_pfn(*pte) << PAGE_SHIFT;
+ phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
return (phys_addr | offset);
}
EXPORT_SYMBOL_GPL(slow_virt_to_phys);
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 625e3e4..91cceb2 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -487,7 +487,7 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,

if (bytes && blk_rq_map_kern(q, rq, buffer, bytes, __GFP_WAIT)) {
err = DRIVER_ERROR << 24;
- goto out;
+ goto error;
}

memset(sense, 0, sizeof(sense));
@@ -497,7 +497,6 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,

blk_execute_rq(q, disk, rq, 0);

-out:
err = rq->errors & 0xff; /* only 8 bit SCSI status */
if (err) {
if (rq->sense_len && rq->sense) {
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index 3b20294..8c02f84 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -127,6 +127,7 @@ static int EC_FLAGS_MSI; /* Out-of-spec MSI controller */
static int EC_FLAGS_VALIDATE_ECDT; /* ASUStec ECDTs need to be validated */
static int EC_FLAGS_SKIP_DSDT_SCAN; /* Not all BIOS survive early DSDT scan */
static int EC_FLAGS_CLEAR_ON_RESUME; /* Needs acpi_ec_clear() on boot/resume */
+static int EC_FLAGS_QUERY_HANDSHAKE; /* Needs QR_EC issued when SCI_EVT set */

/* --------------------------------------------------------------------------
Transaction Management
@@ -191,6 +192,8 @@ static bool advance_transaction(struct acpi_ec *ec)
t->rdata[t->ri++] = acpi_ec_read_data(ec);
if (t->rlen == t->ri) {
t->flags |= ACPI_EC_COMMAND_COMPLETE;
+ if (t->command == ACPI_EC_COMMAND_QUERY)
+ pr_debug("hardware QR_EC completion\n");
wakeup = true;
}
} else
@@ -202,7 +205,15 @@ static bool advance_transaction(struct acpi_ec *ec)
}
return wakeup;
} else {
- if ((status & ACPI_EC_FLAG_IBF) == 0) {
+ if (EC_FLAGS_QUERY_HANDSHAKE &&
+ !(status & ACPI_EC_FLAG_SCI) &&
+ (t->command == ACPI_EC_COMMAND_QUERY)) {
+ t->flags |= ACPI_EC_COMMAND_POLL;
+ t->rdata[t->ri++] = 0x00;
+ t->flags |= ACPI_EC_COMMAND_COMPLETE;
+ pr_debug("software QR_EC completion\n");
+ wakeup = true;
+ } else if ((status & ACPI_EC_FLAG_IBF) == 0) {
acpi_ec_write_cmd(ec, t->command);
t->flags |= ACPI_EC_COMMAND_POLL;
} else
@@ -981,6 +992,18 @@ static int ec_enlarge_storm_threshold(const struct dmi_system_id *id)
}

/*
+ * Acer EC firmware refuses to respond QR_EC when SCI_EVT is not set, for
+ * which case, we complete the QR_EC without issuing it to the firmware.
+ * https://bugzilla.kernel.org/show_bug.cgi?id=86211
+ */
+static int ec_flag_query_handshake(const struct dmi_system_id *id)
+{
+ pr_debug("Detected the EC firmware requiring QR_EC issued when SCI_EVT set\n");
+ EC_FLAGS_QUERY_HANDSHAKE = 1;
+ return 0;
+}
+
+/*
* On some hardware it is necessary to clear events accumulated by the EC during
* sleep. These ECs stop reporting GPEs until they are manually polled, if too
* many events are accumulated. (e.g. Samsung Series 5/9 notebooks)
@@ -1050,6 +1073,9 @@ static struct dmi_system_id ec_dmi_table[] __initdata = {
{
ec_clear_on_resume, "Samsung hardware", {
DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD.")}, NULL},
+ {
+ ec_flag_query_handshake, "Acer hardware", {
+ DMI_MATCH(DMI_SYS_VENDOR, "Acer"), }, NULL},
{},
};

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 429b75b..8a64dbe 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1063,8 +1063,8 @@ static void extract_buf(struct entropy_store *r, __u8 *out)
* pool while mixing, and hash one final time.
*/
sha_transform(hash.w, extract, workspace);
- memset(extract, 0, sizeof(extract));
- memset(workspace, 0, sizeof(workspace));
+ memzero_explicit(extract, sizeof(extract));
+ memzero_explicit(workspace, sizeof(workspace));

/*
* In case the hash function has some recognizable output
@@ -1076,7 +1076,7 @@ static void extract_buf(struct entropy_store *r, __u8 *out)
hash.w[2] ^= rol32(hash.w[2], 16);

memcpy(out, &hash, EXTRACT_SIZE);
- memset(&hash, 0, sizeof(hash));
+ memzero_explicit(&hash, sizeof(hash));
}

static ssize_t extract_entropy(struct entropy_store *r, void *buf,
@@ -1124,7 +1124,7 @@ static ssize_t extract_entropy(struct entropy_store *r, void *buf,
}

/* Wipe data just returned from memory */
- memset(tmp, 0, sizeof(tmp));
+ memzero_explicit(tmp, sizeof(tmp));

return ret;
}
@@ -1162,7 +1162,7 @@ static ssize_t extract_entropy_user(struct entropy_store *r, void __user *buf,
}

/* Wipe data just returned from memory */
- memset(tmp, 0, sizeof(tmp));
+ memzero_explicit(tmp, sizeof(tmp));

return ret;
}
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index c166b4a..a9cd300 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -405,7 +405,18 @@ show_one(cpuinfo_max_freq, cpuinfo.max_freq);
show_one(cpuinfo_transition_latency, cpuinfo.transition_latency);
show_one(scaling_min_freq, min);
show_one(scaling_max_freq, max);
-show_one(scaling_cur_freq, cur);
+
+static ssize_t show_scaling_cur_freq(
+ struct cpufreq_policy *policy, char *buf)
+{
+ ssize_t ret;
+
+ if (cpufreq_driver && cpufreq_driver->setpolicy && cpufreq_driver->get)
+ ret = sprintf(buf, "%u\n", cpufreq_driver->get(policy->cpu));
+ else
+ ret = sprintf(buf, "%u\n", policy->cur);
+ return ret;
+}

static int cpufreq_set_policy(struct cpufreq_policy *policy,
struct cpufreq_policy *new_policy);
@@ -799,11 +810,11 @@ static int cpufreq_add_dev_interface(struct cpufreq_policy *policy,
if (ret)
goto err_out_kobj_put;
}
- if (has_target()) {
- ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr);
- if (ret)
- goto err_out_kobj_put;
- }
+
+ ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr);
+ if (ret)
+ goto err_out_kobj_put;
+
if (cpufreq_driver->bios_limit) {
ret = sysfs_create_file(&policy->kobj, &bios_limit.attr);
if (ret)
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 2d26563..4e8949d 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -54,6 +54,17 @@ static inline int32_t div_fp(int32_t x, int32_t y)
return div_s64((int64_t)x << FRAC_BITS, (int64_t)y);
}

+static inline int ceiling_fp(int32_t x)
+{
+ int mask, ret;
+
+ ret = fp_toint(x);
+ mask = (1 << FRAC_BITS) - 1;
+ if (x & mask)
+ ret += 1;
+ return ret;
+}
+
struct sample {
int32_t core_pct_busy;
u64 aperf;
@@ -65,6 +76,7 @@ struct pstate_data {
int current_pstate;
int min_pstate;
int max_pstate;
+ int scaling;
int turbo_pstate;
};

@@ -118,6 +130,7 @@ struct pstate_funcs {
int (*get_max)(void);
int (*get_min)(void);
int (*get_turbo)(void);
+ int (*get_scaling)(void);
void (*set)(struct cpudata*, int pstate);
void (*get_vid)(struct cpudata *);
};
@@ -143,6 +156,7 @@ struct perf_limits {

static struct perf_limits limits = {
.no_turbo = 0,
+ .turbo_disabled = 0,
.max_perf_pct = 100,
.max_perf = int_tofp(1),
.min_perf_pct = 0,
@@ -227,6 +241,18 @@ static inline void intel_pstate_reset_all_pid(void)
}
}

+static inline void update_turbo_state(void)
+{
+ u64 misc_en;
+ struct cpudata *cpu;
+
+ cpu = all_cpu_data[0];
+ rdmsrl(MSR_IA32_MISC_ENABLE, misc_en);
+ limits.turbo_disabled =
+ (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ||
+ cpu->pstate.max_pstate == cpu->pstate.turbo_pstate);
+}
+
/************************** debugfs begin ************************/
static int pid_param_set(void *data, u64 val)
{
@@ -283,6 +309,20 @@ static void intel_pstate_debug_expose_params(void)
return sprintf(buf, "%u\n", limits.object); \
}

+static ssize_t show_no_turbo(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ ssize_t ret;
+
+ update_turbo_state();
+ if (limits.turbo_disabled)
+ ret = sprintf(buf, "%u\n", limits.turbo_disabled);
+ else
+ ret = sprintf(buf, "%u\n", limits.no_turbo);
+
+ return ret;
+}
+
static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
const char *buf, size_t count)
{
@@ -291,11 +331,14 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
ret = sscanf(buf, "%u", &input);
if (ret != 1)
return -EINVAL;
- limits.no_turbo = clamp_t(int, input, 0 , 1);
+
+ update_turbo_state();
if (limits.turbo_disabled) {
pr_warn("Turbo disabled by BIOS or unavailable on processor\n");
- limits.no_turbo = limits.turbo_disabled;
+ return -EPERM;
}
+ limits.no_turbo = clamp_t(int, input, 0, 1);
+
return count;
}

@@ -328,7 +371,6 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
return count;
}

-show_one(no_turbo, no_turbo);
show_one(max_perf_pct, max_perf_pct);
show_one(min_perf_pct, min_perf_pct);

@@ -397,7 +439,7 @@ static void byt_set_pstate(struct cpudata *cpudata, int pstate)
cpudata->vid.ratio);

vid_fp = clamp_t(int32_t, vid_fp, cpudata->vid.min, cpudata->vid.max);
- vid = fp_toint(vid_fp);
+ vid = ceiling_fp(vid_fp);

if (pstate > cpudata->pstate.max_pstate)
vid = cpudata->vid.turbo;
@@ -407,6 +449,22 @@ static void byt_set_pstate(struct cpudata *cpudata, int pstate)
wrmsrl(MSR_IA32_PERF_CTL, val);
}

+#define BYT_BCLK_FREQS 5
+static int byt_freq_table[BYT_BCLK_FREQS] = { 833, 1000, 1333, 1167, 800};
+
+static int byt_get_scaling(void)
+{
+ u64 value;
+ int i;
+
+ rdmsrl(MSR_FSB_FREQ, value);
+ i = value & 0x3;
+
+ BUG_ON(i > BYT_BCLK_FREQS);
+
+ return byt_freq_table[i] * 100;
+}
+
static void byt_get_vid(struct cpudata *cpudata)
{
u64 value;
@@ -451,6 +509,11 @@ static int core_get_turbo_pstate(void)
return ret;
}

+static inline int core_get_scaling(void)
+{
+ return 100000;
+}
+
static void core_set_pstate(struct cpudata *cpudata, int pstate)
{
u64 val;
@@ -475,6 +538,7 @@ static struct cpu_defaults core_params = {
.get_max = core_get_max_pstate,
.get_min = core_get_min_pstate,
.get_turbo = core_get_turbo_pstate,
+ .get_scaling = core_get_scaling,
.set = core_set_pstate,
},
};
@@ -493,6 +557,7 @@ static struct cpu_defaults byt_params = {
.get_min = byt_get_min_pstate,
.get_turbo = byt_get_turbo_pstate,
.set = byt_set_pstate,
+ .get_scaling = byt_get_scaling,
.get_vid = byt_get_vid,
},
};
@@ -503,7 +568,8 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max)
int max_perf = cpu->pstate.turbo_pstate;
int max_perf_adj;
int min_perf;
- if (limits.no_turbo)
+
+ if (limits.no_turbo || limits.turbo_disabled)
max_perf = cpu->pstate.max_pstate;

max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits.max_perf));
@@ -519,6 +585,8 @@ static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate)
{
int max_perf, min_perf;

+ update_turbo_state();
+
intel_pstate_get_min_max(cpu, &min_perf, &max_perf);

pstate = clamp_t(int, pstate, min_perf, max_perf);
@@ -526,7 +594,7 @@ static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate)
if (pstate == cpu->pstate.current_pstate)
return;

- trace_cpu_frequency(pstate * 100000, cpu->cpu);
+ trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);

cpu->pstate.current_pstate = pstate;

@@ -555,6 +623,7 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
cpu->pstate.min_pstate = pstate_funcs.get_min();
cpu->pstate.max_pstate = pstate_funcs.get_max();
cpu->pstate.turbo_pstate = pstate_funcs.get_turbo();
+ cpu->pstate.scaling = pstate_funcs.get_scaling();

if (pstate_funcs.get_vid)
pstate_funcs.get_vid(cpu);
@@ -572,7 +641,10 @@ static inline void intel_pstate_calc_busy(struct cpudata *cpu,
u64 core_pct;
core_pct = div64_u64(int_tofp(sample->aperf * 100),
sample->mperf);
- sample->freq = fp_toint(cpu->pstate.max_pstate * core_pct * 1000);
+ sample->freq = fp_toint(
+ mul_fp(int_tofp(
+ cpu->pstate.max_pstate * cpu->pstate.scaling / 100),
+ core_pct));

sample->core_pct_busy = core_pct;
}
@@ -679,7 +751,9 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
if (!id)
return -ENODEV;

- all_cpu_data[cpunum] = kzalloc(sizeof(struct cpudata), GFP_KERNEL);
+ if (!all_cpu_data[cpunum])
+ all_cpu_data[cpunum] = kzalloc(sizeof(struct cpudata),
+ GFP_KERNEL);
if (!all_cpu_data[cpunum])
return -ENOMEM;

@@ -733,9 +807,10 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) {
limits.min_perf_pct = 100;
limits.min_perf = int_tofp(1);
+ limits.max_policy_pct = 100;
limits.max_perf_pct = 100;
limits.max_perf = int_tofp(1);
- limits.no_turbo = limits.turbo_disabled;
+ limits.no_turbo = 0;
return 0;
}
limits.min_perf_pct = (policy->min * 100) / policy->cpuinfo.max_freq;
@@ -766,8 +841,6 @@ static int intel_pstate_cpu_exit(struct cpufreq_policy *policy)
int cpu = policy->cpu;

del_timer(&all_cpu_data[cpu]->timer);
- kfree(all_cpu_data[cpu]);
- all_cpu_data[cpu] = NULL;
return 0;
}

@@ -775,7 +848,6 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
{
struct cpudata *cpu;
int rc;
- u64 misc_en;

rc = intel_pstate_init_cpu(policy->cpu);
if (rc)
@@ -783,23 +855,18 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy)

cpu = all_cpu_data[policy->cpu];

- rdmsrl(MSR_IA32_MISC_ENABLE, misc_en);
- if (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ||
- cpu->pstate.max_pstate == cpu->pstate.turbo_pstate) {
- limits.turbo_disabled = 1;
- limits.no_turbo = 1;
- }
if (limits.min_perf_pct == 100 && limits.max_perf_pct == 100)
policy->policy = CPUFREQ_POLICY_PERFORMANCE;
else
policy->policy = CPUFREQ_POLICY_POWERSAVE;

- policy->min = cpu->pstate.min_pstate * 100000;
- policy->max = cpu->pstate.turbo_pstate * 100000;
+ policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling;
+ policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling;

/* cpuinfo and default policy values */
- policy->cpuinfo.min_freq = cpu->pstate.min_pstate * 100000;
- policy->cpuinfo.max_freq = cpu->pstate.turbo_pstate * 100000;
+ policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling;
+ policy->cpuinfo.max_freq =
+ cpu->pstate.turbo_pstate * cpu->pstate.scaling;
policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
cpumask_set_cpu(policy->cpu, policy->cpus);

@@ -857,6 +924,7 @@ static void copy_cpu_funcs(struct pstate_funcs *funcs)
pstate_funcs.get_max = funcs->get_max;
pstate_funcs.get_min = funcs->get_min;
pstate_funcs.get_turbo = funcs->get_turbo;
+ pstate_funcs.get_scaling = funcs->get_scaling;
pstate_funcs.set = funcs->set;
pstate_funcs.get_vid = funcs->get_vid;
}
diff --git a/drivers/edac/cpc925_edac.c b/drivers/edac/cpc925_edac.c
index df6575f..682288c 100644
--- a/drivers/edac/cpc925_edac.c
+++ b/drivers/edac/cpc925_edac.c
@@ -562,7 +562,7 @@ static void cpc925_mc_check(struct mem_ctl_info *mci)

if (apiexcp & UECC_EXCP_DETECTED) {
cpc925_mc_printk(mci, KERN_INFO, "DRAM UECC Fault\n");
- edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
pfn, offset, 0,
csrow, -1, -1,
mci->ctl_name, "");
diff --git a/drivers/edac/e7xxx_edac.c b/drivers/edac/e7xxx_edac.c
index 1c4056a..2697dea 100644
--- a/drivers/edac/e7xxx_edac.c
+++ b/drivers/edac/e7xxx_edac.c
@@ -226,7 +226,7 @@ static void process_ce(struct mem_ctl_info *mci, struct e7xxx_error_info *info)
static void process_ce_no_info(struct mem_ctl_info *mci)
{
edac_dbg(3, "\n");
- edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0, -1, -1, -1,
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, 0, 0, 0, -1, -1, -1,
"e7xxx CE log register overflow", "");
}

diff --git a/drivers/edac/i3200_edac.c b/drivers/edac/i3200_edac.c
index be10a74..7d5b369 100644
--- a/drivers/edac/i3200_edac.c
+++ b/drivers/edac/i3200_edac.c
@@ -242,11 +242,11 @@ static void i3200_process_error_info(struct mem_ctl_info *mci,
-1, -1,
"i3000 UE", "");
} else if (log & I3200_ECCERRLOG_CE) {
- edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
0, 0, eccerrlog_syndrome(log),
eccerrlog_row(channel, log),
-1, -1,
- "i3000 UE", "");
+ "i3000 CE", "");
}
}
}
diff --git a/drivers/edac/i82860_edac.c b/drivers/edac/i82860_edac.c
index 3e3e431..b93b0d0 100644
--- a/drivers/edac/i82860_edac.c
+++ b/drivers/edac/i82860_edac.c
@@ -124,7 +124,7 @@ static int i82860_process_error_info(struct mem_ctl_info *mci,
dimm->location[0], dimm->location[1], -1,
"i82860 UE", "");
else
- edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
info->eap, 0, info->derrsyn,
dimm->location[0], dimm->location[1], -1,
"i82860 CE", "");
diff --git a/drivers/gpu/drm/cirrus/cirrus_drv.c b/drivers/gpu/drm/cirrus/cirrus_drv.c
index 953fc8a..8555548 100644
--- a/drivers/gpu/drm/cirrus/cirrus_drv.c
+++ b/drivers/gpu/drm/cirrus/cirrus_drv.c
@@ -31,6 +31,8 @@ static struct drm_driver driver;
static DEFINE_PCI_DEVICE_TABLE(pciidlist) = {
{ PCI_VENDOR_ID_CIRRUS, PCI_DEVICE_ID_CIRRUS_5446, 0x1af4, 0x1100, 0,
0, 0 },
+ { PCI_VENDOR_ID_CIRRUS, PCI_DEVICE_ID_CIRRUS_5446, PCI_VENDOR_ID_XEN,
+ 0x0001, 0, 0, 0 },
{0,}
};

diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c
index d70aafb..fb71c10 100644
--- a/drivers/gpu/drm/qxl/qxl_display.c
+++ b/drivers/gpu/drm/qxl/qxl_display.c
@@ -516,7 +516,6 @@ static int qxl_crtc_mode_set(struct drm_crtc *crtc,
struct qxl_framebuffer *qfb;
struct qxl_bo *bo, *old_bo = NULL;
struct qxl_crtc *qcrtc = to_qxl_crtc(crtc);
- uint32_t width, height, base_offset;
bool recreate_primary = false;
int ret;
int surf_id;
@@ -546,9 +545,10 @@ static int qxl_crtc_mode_set(struct drm_crtc *crtc,
if (qcrtc->index == 0)
recreate_primary = true;

- width = mode->hdisplay;
- height = mode->vdisplay;
- base_offset = 0;
+ if (bo->surf.stride * bo->surf.height > qdev->vram_size) {
+ DRM_ERROR("Mode doesn't fit in vram size (vgamem)");
+ return -EINVAL;
+ }

ret = qxl_bo_reserve(bo, false);
if (ret != 0)
@@ -562,10 +562,10 @@ static int qxl_crtc_mode_set(struct drm_crtc *crtc,
if (recreate_primary) {
qxl_io_destroy_primary(qdev);
qxl_io_log(qdev,
- "recreate primary: %dx%d (was %dx%d,%d,%d)\n",
- width, height, bo->surf.width,
- bo->surf.height, bo->surf.stride, bo->surf.format);
- qxl_io_create_primary(qdev, base_offset, bo);
+ "recreate primary: %dx%d,%d,%d\n",
+ bo->surf.width, bo->surf.height,
+ bo->surf.stride, bo->surf.format);
+ qxl_io_create_primary(qdev, 0, bo);
bo->is_primary = true;
surf_id = 0;
} else {
diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c
index 9fd95c7..376502d 100644
--- a/drivers/gpu/drm/radeon/cik_sdma.c
+++ b/drivers/gpu/drm/radeon/cik_sdma.c
@@ -508,16 +508,19 @@ int cik_sdma_ring_test(struct radeon_device *rdev,
{
unsigned i;
int r;
- void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
+ unsigned index;
u32 tmp;
+ u64 gpu_addr;

- if (!ptr) {
- DRM_ERROR("invalid vram scratch pointer\n");
- return -EINVAL;
- }
+ if (ring->idx == R600_RING_TYPE_DMA_INDEX)
+ index = R600_WB_DMA_RING_TEST_OFFSET;
+ else
+ index = CAYMAN_WB_DMA1_RING_TEST_OFFSET;
+
+ gpu_addr = rdev->wb.gpu_addr + index;

tmp = 0xCAFEDEAD;
- writel(tmp, ptr);
+ rdev->wb.wb[index/4] = cpu_to_le32(tmp);

r = radeon_ring_lock(rdev, ring, 5);
if (r) {
@@ -525,14 +528,14 @@ int cik_sdma_ring_test(struct radeon_device *rdev,
return r;
}
radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
- radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
- radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
+ radeon_ring_write(ring, lower_32_bits(gpu_addr));
+ radeon_ring_write(ring, upper_32_bits(gpu_addr));
radeon_ring_write(ring, 1); /* number of DWs to follow */
radeon_ring_write(ring, 0xDEADBEEF);
radeon_ring_unlock_commit(rdev, ring);

for (i = 0; i < rdev->usec_timeout; i++) {
- tmp = readl(ptr);
+ tmp = le32_to_cpu(rdev->wb.wb[index/4]);
if (tmp == 0xDEADBEEF)
break;
DRM_UDELAY(1);
diff --git a/drivers/gpu/drm/radeon/dce6_afmt.c b/drivers/gpu/drm/radeon/dce6_afmt.c
index ff0001c..3483828 100644
--- a/drivers/gpu/drm/radeon/dce6_afmt.c
+++ b/drivers/gpu/drm/radeon/dce6_afmt.c
@@ -174,9 +174,9 @@ void dce6_afmt_write_speaker_allocation(struct drm_encoder *encoder)
}

sad_count = drm_edid_to_speaker_allocation(radeon_connector->edid, &sadb);
- if (sad_count <= 0) {
- DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count);
- return;
+ if (sad_count < 0) {
+ DRM_DEBUG("Couldn't read Speaker Allocation Data Block: %d\n", sad_count);
+ sad_count = 0;
}

/* program the speaker allocation */
diff --git a/drivers/gpu/drm/radeon/evergreen_hdmi.c b/drivers/gpu/drm/radeon/evergreen_hdmi.c
index 0c6d5ce..738c1ec 100644
--- a/drivers/gpu/drm/radeon/evergreen_hdmi.c
+++ b/drivers/gpu/drm/radeon/evergreen_hdmi.c
@@ -118,9 +118,9 @@ static void dce4_afmt_write_speaker_allocation(struct drm_encoder *encoder)
}

sad_count = drm_edid_to_speaker_allocation(radeon_connector->edid, &sadb);
- if (sad_count <= 0) {
- DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count);
- return;
+ if (sad_count < 0) {
+ DRM_DEBUG("Couldn't read Speaker Allocation Data Block: %d\n", sad_count);
+ sad_count = 0;
}

/* program the speaker allocation */
diff --git a/drivers/gpu/drm/radeon/kv_dpm.c b/drivers/gpu/drm/radeon/kv_dpm.c
index 1b0331c..8fcc491 100644
--- a/drivers/gpu/drm/radeon/kv_dpm.c
+++ b/drivers/gpu/drm/radeon/kv_dpm.c
@@ -2620,7 +2620,11 @@ int kv_dpm_init(struct radeon_device *rdev)
if (rdev->family == CHIP_KABINI)
pi->high_voltage_t = 4001;

- pi->enable_nb_dpm = true;
+ /* Enabling nb dpm on an asrock system prevents dpm from working */
+ if (rdev->pdev->subsystem_vendor == 0x1849)
+ pi->enable_nb_dpm = false;
+ else
+ pi->enable_nb_dpm = true;

pi->caps_power_containment = true;
pi->caps_cac = true;
@@ -2635,10 +2639,19 @@ int kv_dpm_init(struct radeon_device *rdev)
pi->caps_sclk_ds = true;
pi->enable_auto_thermal_throttling = true;
pi->disable_nb_ps3_in_battery = false;
- if (radeon_bapm == 0)
+ if (radeon_bapm == -1) {
+ /* There are stability issues reported on with
+ * bapm enabled on an asrock system.
+ */
+ if (rdev->pdev->subsystem_vendor == 0x1849)
+ pi->bapm_enable = false;
+ else
+ pi->bapm_enable = true;
+ } else if (radeon_bapm == 0) {
pi->bapm_enable = false;
- else
+ } else {
pi->bapm_enable = true;
+ }
pi->voltage_drop_t = 0;
pi->caps_sclk_throttle_low_notification = false;
pi->caps_fps = false; /* true? */
diff --git a/drivers/gpu/drm/radeon/r600_dma.c b/drivers/gpu/drm/radeon/r600_dma.c
index 616d37a..cf5e181 100644
--- a/drivers/gpu/drm/radeon/r600_dma.c
+++ b/drivers/gpu/drm/radeon/r600_dma.c
@@ -227,16 +227,19 @@ int r600_dma_ring_test(struct radeon_device *rdev,
{
unsigned i;
int r;
- void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
+ unsigned index;
u32 tmp;
+ u64 gpu_addr;

- if (!ptr) {
- DRM_ERROR("invalid vram scratch pointer\n");
- return -EINVAL;
- }
+ if (ring->idx == R600_RING_TYPE_DMA_INDEX)
+ index = R600_WB_DMA_RING_TEST_OFFSET;
+ else
+ index = CAYMAN_WB_DMA1_RING_TEST_OFFSET;
+
+ gpu_addr = rdev->wb.gpu_addr + index;

tmp = 0xCAFEDEAD;
- writel(tmp, ptr);
+ rdev->wb.wb[index/4] = cpu_to_le32(tmp);

r = radeon_ring_lock(rdev, ring, 4);
if (r) {
@@ -244,13 +247,13 @@ int r600_dma_ring_test(struct radeon_device *rdev,
return r;
}
radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
- radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
- radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff);
+ radeon_ring_write(ring, lower_32_bits(gpu_addr));
+ radeon_ring_write(ring, upper_32_bits(gpu_addr) & 0xff);
radeon_ring_write(ring, 0xDEADBEEF);
radeon_ring_unlock_commit(rdev, ring);

for (i = 0; i < rdev->usec_timeout; i++) {
- tmp = readl(ptr);
+ tmp = le32_to_cpu(rdev->wb.wb[index/4]);
if (tmp == 0xDEADBEEF)
break;
DRM_UDELAY(1);
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index fd2526c..ee3be48 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -1081,6 +1081,8 @@ struct radeon_wb {
#define R600_WB_EVENT_OFFSET 3072
#define CIK_WB_CP1_WPTR_OFFSET 3328
#define CIK_WB_CP2_WPTR_OFFSET 3584
+#define R600_WB_DMA_RING_TEST_OFFSET 3588
+#define CAYMAN_WB_DMA1_RING_TEST_OFFSET 3592

/**
* struct radeon_pm - power management datas
diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c
index 0d50df3..b044e19 100644
--- a/drivers/gpu/drm/radeon/si_dpm.c
+++ b/drivers/gpu/drm/radeon/si_dpm.c
@@ -6224,7 +6224,7 @@ static void si_parse_pplib_clock_info(struct radeon_device *rdev,
if ((rps->class2 & ATOM_PPLIB_CLASSIFICATION2_ULV) &&
index == 0) {
/* XXX disable for A0 tahiti */
- si_pi->ulv.supported = true;
+ si_pi->ulv.supported = false;
si_pi->ulv.pl = *pl;
si_pi->ulv.one_pcie_lane_in_ulv = false;
si_pi->ulv.volt_change_delay = SISLANDS_ULVVOLTAGECHANGEDELAY_DFLT;
diff --git a/drivers/iio/common/st_sensors/st_sensors_buffer.c b/drivers/iio/common/st_sensors/st_sensors_buffer.c
index 1665c8e..e18bc67 100644
--- a/drivers/iio/common/st_sensors/st_sensors_buffer.c
+++ b/drivers/iio/common/st_sensors/st_sensors_buffer.c
@@ -71,7 +71,7 @@ int st_sensors_get_buffer_element(struct iio_dev *indio_dev, u8 *buf)
goto st_sensors_free_memory;
}

- for (i = 0; i < n * num_data_channels; i++) {
+ for (i = 0; i < n * byte_for_channel; i++) {
if (i < n)
buf[i] = rx_array[i];
else
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 44b883f..9b2a108 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -1909,7 +1909,7 @@ isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
isert_cmd->tx_desc.num_sge = 2;
}

- isert_init_send_wr(isert_conn, isert_cmd, send_wr, true);
+ isert_init_send_wr(isert_conn, isert_cmd, send_wr, false);

pr_debug("Posting SCSI Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n");

@@ -2432,7 +2432,7 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
&isert_cmd->tx_desc.iscsi_header);
isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc);
isert_init_send_wr(isert_conn, isert_cmd,
- &isert_cmd->tx_desc.send_wr, true);
+ &isert_cmd->tx_desc.send_wr, false);

atomic_add(wr->send_wr_num + 1, &isert_conn->post_send_buf_count);

diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h
index 40ff494..ce715b1 100644
--- a/drivers/input/serio/i8042-x86ia64io.h
+++ b/drivers/input/serio/i8042-x86ia64io.h
@@ -615,6 +615,22 @@ static const struct dmi_system_id __initconst i8042_dmi_notimeout_table[] = {
},
},
{
+ /* Fujitsu A544 laptop */
+ /* https://bugzilla.redhat.com/show_bug.cgi?id=1111138 */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK A544"),
+ },
+ },
+ {
+ /* Fujitsu AH544 laptop */
+ /* https://bugzilla.kernel.org/show_bug.cgi?id=69731 */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK AH544"),
+ },
+ },
+ {
/* Fujitsu U574 laptop */
/* https://bugzilla.kernel.org/show_bug.cgi?id=69731 */
.matches = {
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 1091fa2..a656d4c 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3630,8 +3630,14 @@ static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev
else
bond_xmit_slave_id(bond, skb, 0);
} else {
- slave_id = bond_rr_gen_slave_id(bond);
- bond_xmit_slave_id(bond, skb, slave_id % bond->slave_cnt);
+ int slave_cnt = ACCESS_ONCE(bond->slave_cnt);
+
+ if (likely(slave_cnt)) {
+ slave_id = bond_rr_gen_slave_id(bond);
+ bond_xmit_slave_id(bond, skb, slave_id % slave_cnt);
+ } else {
+ dev_kfree_skb_any(skb);
+ }
}

return NETDEV_TX_OK;
@@ -3662,8 +3668,13 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_d
static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev)
{
struct bonding *bond = netdev_priv(bond_dev);
+ int slave_cnt = ACCESS_ONCE(bond->slave_cnt);

- bond_xmit_slave_id(bond, skb, bond_xmit_hash(bond, skb, bond->slave_cnt));
+ if (likely(slave_cnt))
+ bond_xmit_slave_id(bond, skb,
+ bond_xmit_hash(bond, skb, bond->slave_cnt));
+ else
+ dev_kfree_skb_any(skb);

return NETDEV_TX_OK;
}
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index e71ee9f..1b84139 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -6907,7 +6907,8 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget)
skb->protocol = eth_type_trans(skb, tp->dev);

if (len > (tp->dev->mtu + ETH_HLEN) &&
- skb->protocol != htons(ETH_P_8021Q)) {
+ skb->protocol != htons(ETH_P_8021Q) &&
+ skb->protocol != htons(ETH_P_8021AD)) {
dev_kfree_skb(skb);
goto drop_it_no_recycle;
}
@@ -7899,8 +7900,6 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)

entry = tnapi->tx_prod;
base_flags = 0;
- if (skb->ip_summed == CHECKSUM_PARTIAL)
- base_flags |= TXD_FLAG_TCPUDP_CSUM;

mss = skb_shinfo(skb)->gso_size;
if (mss) {
@@ -7916,6 +7915,13 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)

hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb) - ETH_HLEN;

+ /* HW/FW can not correctly segment packets that have been
+ * vlan encapsulated.
+ */
+ if (skb->protocol == htons(ETH_P_8021Q) ||
+ skb->protocol == htons(ETH_P_8021AD))
+ return tg3_tso_bug(tp, skb);
+
if (!skb_is_gso_v6(skb)) {
iph->check = 0;
iph->tot_len = htons(mss + hdr_len);
@@ -7962,6 +7968,17 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
base_flags |= tsflags << 12;
}
}
+ } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ /* HW/FW can not correctly checksum packets that have been
+ * vlan encapsulated.
+ */
+ if (skb->protocol == htons(ETH_P_8021Q) ||
+ skb->protocol == htons(ETH_P_8021AD)) {
+ if (skb_checksum_help(skb))
+ goto drop;
+ } else {
+ base_flags |= TXD_FLAG_TCPUDP_CSUM;
+ }
}

if (tg3_flag(tp, USE_JUMBO_BDFLAG) &&
diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c
index 9257869..b020d1c 100644
--- a/drivers/net/ethernet/cadence/macb.c
+++ b/drivers/net/ethernet/cadence/macb.c
@@ -29,7 +29,6 @@
#include <linux/of_device.h>
#include <linux/of_mdio.h>
#include <linux/of_net.h>
-#include <linux/pinctrl/consumer.h>

#include "macb.h"

@@ -1755,7 +1754,6 @@ static int __init macb_probe(struct platform_device *pdev)
struct phy_device *phydev;
u32 config;
int err = -ENXIO;
- struct pinctrl *pinctrl;
const char *mac;

regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -1764,15 +1762,6 @@ static int __init macb_probe(struct platform_device *pdev)
goto err_out;
}

- pinctrl = devm_pinctrl_get_select_default(&pdev->dev);
- if (IS_ERR(pinctrl)) {
- err = PTR_ERR(pinctrl);
- if (err == -EPROBE_DEFER)
- goto err_out;
-
- dev_warn(&pdev->dev, "No pinctrl provided\n");
- }
-
err = -ENOMEM;
dev = alloc_etherdev(sizeof(*bp));
if (!dev)
diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
index 68026f7..4a474dd 100644
--- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
+++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
@@ -872,6 +872,10 @@ static int myri10ge_dma_test(struct myri10ge_priv *mgp, int test_type)
return -ENOMEM;
dmatest_bus = pci_map_page(mgp->pdev, dmatest_page, 0, PAGE_SIZE,
DMA_BIDIRECTIONAL);
+ if (unlikely(pci_dma_mapping_error(mgp->pdev, dmatest_bus))) {
+ __free_page(dmatest_page);
+ return -ENOMEM;
+ }

/* Run a small DMA test.
* The magic multipliers to the length tell the firmware
@@ -1293,6 +1297,7 @@ myri10ge_alloc_rx_pages(struct myri10ge_priv *mgp, struct myri10ge_rx_buf *rx,
int bytes, int watchdog)
{
struct page *page;
+ dma_addr_t bus;
int idx;
#if MYRI10GE_ALLOC_SIZE > 4096
int end_offset;
@@ -1317,11 +1322,21 @@ myri10ge_alloc_rx_pages(struct myri10ge_priv *mgp, struct myri10ge_rx_buf *rx,
rx->watchdog_needed = 1;
return;
}
+
+ bus = pci_map_page(mgp->pdev, page, 0,
+ MYRI10GE_ALLOC_SIZE,
+ PCI_DMA_FROMDEVICE);
+ if (unlikely(pci_dma_mapping_error(mgp->pdev, bus))) {
+ __free_pages(page, MYRI10GE_ALLOC_ORDER);
+ if (rx->fill_cnt - rx->cnt < 16)
+ rx->watchdog_needed = 1;
+ return;
+ }
+
rx->page = page;
rx->page_offset = 0;
- rx->bus = pci_map_page(mgp->pdev, page, 0,
- MYRI10GE_ALLOC_SIZE,
- PCI_DMA_FROMDEVICE);
+ rx->bus = bus;
+
}
rx->info[idx].page = rx->page;
rx->info[idx].page_offset = rx->page_offset;
@@ -2765,6 +2780,35 @@ myri10ge_submit_req(struct myri10ge_tx_buf *tx, struct mcp_kreq_ether_send *src,
mb();
}

+static void myri10ge_unmap_tx_dma(struct myri10ge_priv *mgp,
+ struct myri10ge_tx_buf *tx, int idx)
+{
+ unsigned int len;
+ int last_idx;
+
+ /* Free any DMA resources we've alloced and clear out the skb slot */
+ last_idx = (idx + 1) & tx->mask;
+ idx = tx->req & tx->mask;
+ do {
+ len = dma_unmap_len(&tx->info[idx], len);
+ if (len) {
+ if (tx->info[idx].skb != NULL)
+ pci_unmap_single(mgp->pdev,
+ dma_unmap_addr(&tx->info[idx],
+ bus), len,
+ PCI_DMA_TODEVICE);
+ else
+ pci_unmap_page(mgp->pdev,
+ dma_unmap_addr(&tx->info[idx],
+ bus), len,
+ PCI_DMA_TODEVICE);
+ dma_unmap_len_set(&tx->info[idx], len, 0);
+ tx->info[idx].skb = NULL;
+ }
+ idx = (idx + 1) & tx->mask;
+ } while (idx != last_idx);
+}
+
/*
* Transmit a packet. We need to split the packet so that a single
* segment does not cross myri10ge->tx_boundary, so this makes segment
@@ -2788,7 +2832,7 @@ static netdev_tx_t myri10ge_xmit(struct sk_buff *skb,
u32 low;
__be32 high_swapped;
unsigned int len;
- int idx, last_idx, avail, frag_cnt, frag_idx, count, mss, max_segments;
+ int idx, avail, frag_cnt, frag_idx, count, mss, max_segments;
u16 pseudo_hdr_offset, cksum_offset, queue;
int cum_len, seglen, boundary, rdma_count;
u8 flags, odd_flag;
@@ -2885,9 +2929,12 @@ again:

/* map the skb for DMA */
len = skb_headlen(skb);
+ bus = pci_map_single(mgp->pdev, skb->data, len, PCI_DMA_TODEVICE);
+ if (unlikely(pci_dma_mapping_error(mgp->pdev, bus)))
+ goto drop;
+
idx = tx->req & tx->mask;
tx->info[idx].skb = skb;
- bus = pci_map_single(mgp->pdev, skb->data, len, PCI_DMA_TODEVICE);
dma_unmap_addr_set(&tx->info[idx], bus, bus);
dma_unmap_len_set(&tx->info[idx], len, len);

@@ -2986,12 +3033,16 @@ again:
break;

/* map next fragment for DMA */
- idx = (count + tx->req) & tx->mask;
frag = &skb_shinfo(skb)->frags[frag_idx];
frag_idx++;
len = skb_frag_size(frag);
bus = skb_frag_dma_map(&mgp->pdev->dev, frag, 0, len,
DMA_TO_DEVICE);
+ if (unlikely(pci_dma_mapping_error(mgp->pdev, bus))) {
+ myri10ge_unmap_tx_dma(mgp, tx, idx);
+ goto drop;
+ }
+ idx = (count + tx->req) & tx->mask;
dma_unmap_addr_set(&tx->info[idx], bus, bus);
dma_unmap_len_set(&tx->info[idx], len, len);
}
@@ -3022,31 +3073,8 @@ again:
return NETDEV_TX_OK;

abort_linearize:
- /* Free any DMA resources we've alloced and clear out the skb
- * slot so as to not trip up assertions, and to avoid a
- * double-free if linearizing fails */
+ myri10ge_unmap_tx_dma(mgp, tx, idx);

- last_idx = (idx + 1) & tx->mask;
- idx = tx->req & tx->mask;
- tx->info[idx].skb = NULL;
- do {
- len = dma_unmap_len(&tx->info[idx], len);
- if (len) {
- if (tx->info[idx].skb != NULL)
- pci_unmap_single(mgp->pdev,
- dma_unmap_addr(&tx->info[idx],
- bus), len,
- PCI_DMA_TODEVICE);
- else
- pci_unmap_page(mgp->pdev,
- dma_unmap_addr(&tx->info[idx],
- bus), len,
- PCI_DMA_TODEVICE);
- dma_unmap_len_set(&tx->info[idx], len, 0);
- tx->info[idx].skb = NULL;
- }
- idx = (idx + 1) & tx->mask;
- } while (idx != last_idx);
if (skb_is_gso(skb)) {
netdev_err(mgp->dev, "TSO but wanted to linearize?!?!?\n");
goto drop;
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 6ea06a8..f4aef42 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -147,6 +147,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
struct hv_netvsc_packet *packet;
int ret;
unsigned int i, num_pages, npg_data;
+ u32 skb_length = skb->len;

/* Add multipages for skb->data and additional 2 for RNDIS */
npg_data = (((unsigned long)skb->data + skb_headlen(skb) - 1)
@@ -217,7 +218,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
ret = rndis_filter_send(net_device_ctx->device_ctx,
packet);
if (ret == 0) {
- net->stats.tx_bytes += skb->len;
+ net->stats.tx_bytes += skb_length;
net->stats.tx_packets++;
} else {
kfree(packet);
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index a430b99..0831e2f 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -710,6 +710,7 @@ static netdev_features_t macvlan_fix_features(struct net_device *dev,
features,
mask);
features |= ALWAYS_ON_FEATURES;
+ features &= ~NETIF_F_NETNS_LOCAL;

return features;
}
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 73ffad8..eed7544 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -108,17 +108,15 @@ out:
return err;
}

+/* Requires RTNL */
static int macvtap_set_queue(struct net_device *dev, struct file *file,
struct macvtap_queue *q)
{
struct macvlan_dev *vlan = netdev_priv(dev);
- int err = -EBUSY;

- rtnl_lock();
if (vlan->numqueues == MAX_MACVTAP_QUEUES)
- goto out;
+ return -EBUSY;

- err = 0;
rcu_assign_pointer(q->vlan, vlan);
rcu_assign_pointer(vlan->taps[vlan->numvtaps], q);
sock_hold(&q->sk);
@@ -132,9 +130,7 @@ static int macvtap_set_queue(struct net_device *dev, struct file *file,
vlan->numvtaps++;
vlan->numqueues++;

-out:
- rtnl_unlock();
- return err;
+ return 0;
}

static int macvtap_disable_queue(struct macvtap_queue *q)
@@ -450,11 +446,12 @@ static void macvtap_sock_destruct(struct sock *sk)
static int macvtap_open(struct inode *inode, struct file *file)
{
struct net *net = current->nsproxy->net_ns;
- struct net_device *dev = dev_get_by_macvtap_minor(iminor(inode));
+ struct net_device *dev;
struct macvtap_queue *q;
- int err;
+ int err = -ENODEV;

- err = -ENODEV;
+ rtnl_lock();
+ dev = dev_get_by_macvtap_minor(iminor(inode));
if (!dev)
goto out;

@@ -494,6 +491,7 @@ out:
if (dev)
dev_put(dev);

+ rtnl_unlock();
return err;
}

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 99500d1..2898c2a 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -647,7 +647,7 @@ static void team_notify_peers(struct team *team)
{
if (!team->notify_peers.count || !netif_running(team->dev))
return;
- atomic_set(&team->notify_peers.count_pending, team->notify_peers.count);
+ atomic_add(team->notify_peers.count, &team->notify_peers.count_pending);
schedule_delayed_work(&team->notify_peers.dw, 0);
}

@@ -687,7 +687,7 @@ static void team_mcast_rejoin(struct team *team)
{
if (!team->mcast_rejoin.count || !netif_running(team->dev))
return;
- atomic_set(&team->mcast_rejoin.count_pending, team->mcast_rejoin.count);
+ atomic_add(team->mcast_rejoin.count, &team->mcast_rejoin.count_pending);
schedule_delayed_work(&team->mcast_rejoin.dw, 0);
}

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 7e748a1..5543eb1 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1224,7 +1224,7 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb)
} else if (vxlan->flags & VXLAN_F_L3MISS) {
union vxlan_addr ipa = {
.sin.sin_addr.s_addr = tip,
- .sa.sa_family = AF_INET,
+ .sin.sin_family = AF_INET,
};

vxlan_ip_miss(dev, &ipa);
@@ -1385,7 +1385,7 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb)
} else if (vxlan->flags & VXLAN_F_L3MISS) {
union vxlan_addr ipa = {
.sin6.sin6_addr = msg->target,
- .sa.sa_family = AF_INET6,
+ .sin6.sin6_family = AF_INET6,
};

vxlan_ip_miss(dev, &ipa);
@@ -1418,7 +1418,7 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb)
if (!n && (vxlan->flags & VXLAN_F_L3MISS)) {
union vxlan_addr ipa = {
.sin.sin_addr.s_addr = pip->daddr,
- .sa.sa_family = AF_INET,
+ .sin.sin_family = AF_INET,
};

vxlan_ip_miss(dev, &ipa);
@@ -1439,7 +1439,7 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb)
if (!n && (vxlan->flags & VXLAN_F_L3MISS)) {
union vxlan_addr ipa = {
.sin6.sin6_addr = pip6->daddr,
- .sa.sa_family = AF_INET6,
+ .sin6.sin6_family = AF_INET6,
};

vxlan_ip_miss(dev, &ipa);
diff --git a/drivers/net/wireless/ath/ar5523/ar5523.c b/drivers/net/wireless/ath/ar5523/ar5523.c
index 9cc2b91..5d219a4 100644
--- a/drivers/net/wireless/ath/ar5523/ar5523.c
+++ b/drivers/net/wireless/ath/ar5523/ar5523.c
@@ -1091,7 +1091,8 @@ static int ar5523_set_rts_threshold(struct ieee80211_hw *hw, u32 value)
return ret;
}

-static void ar5523_flush(struct ieee80211_hw *hw, u32 queues, bool drop)
+static void ar5523_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+ u32 queues, bool drop)
{
struct ar5523 *ar = hw->priv;

diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index 97ac8c8..6f8e6ad 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -2939,7 +2939,8 @@ static int ath10k_set_frag_threshold(struct ieee80211_hw *hw, u32 value)
return ret;
}

-static void ath10k_flush(struct ieee80211_hw *hw, u32 queues, bool drop)
+static void ath10k_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+ u32 queues, bool drop)
{
struct ath10k *ar = hw->priv;
bool skip;
diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c
index 21aa09e..5db6849 100644
--- a/drivers/net/wireless/ath/ath9k/main.c
+++ b/drivers/net/wireless/ath/ath9k/main.c
@@ -1818,7 +1818,8 @@ static void ath9k_set_coverage_class(struct ieee80211_hw *hw, u8 coverage_class)
mutex_unlock(&sc->mutex);
}

-static void ath9k_flush(struct ieee80211_hw *hw, u32 queues, bool drop)
+static void ath9k_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+ u32 queues, bool drop)
{
struct ath_softc *sc = hw->priv;
struct ath_hw *ah = sc->sc_ah;
diff --git a/drivers/net/wireless/ath/carl9170/main.c b/drivers/net/wireless/ath/carl9170/main.c
index 349fa22..b795b18 100644
--- a/drivers/net/wireless/ath/carl9170/main.c
+++ b/drivers/net/wireless/ath/carl9170/main.c
@@ -1708,7 +1708,9 @@ found:
return 0;
}

-static void carl9170_op_flush(struct ieee80211_hw *hw, u32 queues, bool drop)
+static void carl9170_op_flush(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ u32 queues, bool drop)
{
struct ar9170 *ar = hw->priv;
unsigned int vid;
diff --git a/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c b/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c
index edc5d10..778f593 100644
--- a/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c
+++ b/drivers/net/wireless/brcm80211/brcmsmac/mac80211_if.c
@@ -899,7 +899,8 @@ static bool brcms_tx_flush_completed(struct brcms_info *wl)
return result;
}

-static void brcms_ops_flush(struct ieee80211_hw *hw, u32 queues, bool drop)
+static void brcms_ops_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+ u32 queues, bool drop)
{
struct brcms_info *wl = hw->priv;
int ret;
diff --git a/drivers/net/wireless/cw1200/sta.c b/drivers/net/wireless/cw1200/sta.c
index 010b252..949dc4e 100644
--- a/drivers/net/wireless/cw1200/sta.c
+++ b/drivers/net/wireless/cw1200/sta.c
@@ -935,7 +935,8 @@ static int __cw1200_flush(struct cw1200_common *priv, bool drop)
return ret;
}

-void cw1200_flush(struct ieee80211_hw *hw, u32 queues, bool drop)
+void cw1200_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+ u32 queues, bool drop)
{
struct cw1200_common *priv = hw->priv;

diff --git a/drivers/net/wireless/cw1200/sta.h b/drivers/net/wireless/cw1200/sta.h
index 35babb6..b7e386b 100644
--- a/drivers/net/wireless/cw1200/sta.h
+++ b/drivers/net/wireless/cw1200/sta.h
@@ -40,7 +40,8 @@ int cw1200_set_key(struct ieee80211_hw *dev, enum set_key_cmd cmd,

int cw1200_set_rts_threshold(struct ieee80211_hw *hw, u32 value);

-void cw1200_flush(struct ieee80211_hw *hw, u32 queues, bool drop);
+void cw1200_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+ u32 queues, bool drop);

u64 cw1200_prepare_multicast(struct ieee80211_hw *hw,
struct netdev_hw_addr_list *mc_list);
diff --git a/drivers/net/wireless/iwlegacy/common.c b/drivers/net/wireless/iwlegacy/common.c
index b03e22e..d9d22e5 100644
--- a/drivers/net/wireless/iwlegacy/common.c
+++ b/drivers/net/wireless/iwlegacy/common.c
@@ -4702,7 +4702,8 @@ out:
}
EXPORT_SYMBOL(il_mac_change_interface);

-void il_mac_flush(struct ieee80211_hw *hw, u32 queues, bool drop)
+void il_mac_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+ u32 queues, bool drop)
{
struct il_priv *il = hw->priv;
unsigned long timeout = jiffies + msecs_to_jiffies(500);
diff --git a/drivers/net/wireless/iwlegacy/common.h b/drivers/net/wireless/iwlegacy/common.h
index ad123d6..108e037 100644
--- a/drivers/net/wireless/iwlegacy/common.h
+++ b/drivers/net/wireless/iwlegacy/common.h
@@ -1722,7 +1722,8 @@ void il_mac_remove_interface(struct ieee80211_hw *hw,
struct ieee80211_vif *vif);
int il_mac_change_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
enum nl80211_iftype newtype, bool newp2p);
-void il_mac_flush(struct ieee80211_hw *hw, u32 queues, bool drop);
+void il_mac_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+ u32 queues, bool drop);
int il_alloc_txq_mem(struct il_priv *il);
void il_free_txq_mem(struct il_priv *il);

diff --git a/drivers/net/wireless/iwlwifi/dvm/mac80211.c b/drivers/net/wireless/iwlwifi/dvm/mac80211.c
index d6e6405..7f0846e 100644
--- a/drivers/net/wireless/iwlwifi/dvm/mac80211.c
+++ b/drivers/net/wireless/iwlwifi/dvm/mac80211.c
@@ -1099,9 +1099,11 @@ static void iwlagn_configure_filter(struct ieee80211_hw *hw,
FIF_BCN_PRBRESP_PROMISC | FIF_CONTROL;
}

-static void iwlagn_mac_flush(struct ieee80211_hw *hw, u32 queues, bool drop)
+static void iwlagn_mac_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+ u32 queues, bool drop)
{
struct iwl_priv *priv = IWL_MAC80211_GET_DVM(hw);
+ u32 scd_queues;

mutex_lock(&priv->mutex);
IWL_DEBUG_MAC80211(priv, "enter\n");
@@ -1115,18 +1117,19 @@ static void iwlagn_mac_flush(struct ieee80211_hw *hw, u32 queues, bool drop)
goto done;
}

- /*
- * mac80211 will not push any more frames for transmit
- * until the flush is completed
- */
- if (drop) {
- IWL_DEBUG_MAC80211(priv, "send flush command\n");
- if (iwlagn_txfifo_flush(priv, 0)) {
- IWL_ERR(priv, "flush request fail\n");
- goto done;
- }
+ scd_queues = BIT(priv->cfg->base_params->num_of_queues) - 1;
+ scd_queues &= ~(BIT(IWL_IPAN_CMD_QUEUE_NUM) |
+ BIT(IWL_DEFAULT_CMD_QUEUE_NUM));
+
+ if (vif)
+ scd_queues &= ~BIT(vif->hw_queue[IEEE80211_AC_VO]);
+
+ IWL_DEBUG_TX_QUEUES(priv, "Flushing SCD queues: 0x%x\n", scd_queues);
+ if (iwlagn_txfifo_flush(priv, scd_queues)) {
+ IWL_ERR(priv, "flush request fail\n");
+ goto done;
}
- IWL_DEBUG_MAC80211(priv, "wait transmit/flush all frames\n");
+ IWL_DEBUG_TX_QUEUES(priv, "wait transmit/flush all frames\n");
iwl_trans_wait_tx_queue_empty(priv->trans);
done:
mutex_unlock(&priv->mutex);
diff --git a/drivers/net/wireless/iwlwifi/iwl-trans.h b/drivers/net/wireless/iwlwifi/iwl-trans.h
index 143292b..2379419 100644
--- a/drivers/net/wireless/iwlwifi/iwl-trans.h
+++ b/drivers/net/wireless/iwlwifi/iwl-trans.h
@@ -484,6 +484,7 @@ enum iwl_trans_state {
* Set during transport allocation.
* @hw_id_str: a string with info about HW ID. Set during transport allocation.
* @pm_support: set to true in start_hw if link pm is supported
+ * @ltr_enabled: set to true if the LTR is enabled
* @dev_cmd_pool: pool for Tx cmd allocation - for internal use only.
* The user should use iwl_trans_{alloc,free}_tx_cmd.
* @dev_cmd_headroom: room needed for the transport's private use before the
@@ -508,6 +509,7 @@ struct iwl_trans {
u8 rx_mpdu_cmd, rx_mpdu_cmd_hdr_size;

bool pm_support;
+ bool ltr_enabled;

/* The following fields are internal only */
struct kmem_cache *dev_cmd_pool;
diff --git a/drivers/net/wireless/iwlwifi/mvm/fw-api-power.h b/drivers/net/wireless/iwlwifi/mvm/fw-api-power.h
index 5cb93ae..71eee38 100644
--- a/drivers/net/wireless/iwlwifi/mvm/fw-api-power.h
+++ b/drivers/net/wireless/iwlwifi/mvm/fw-api-power.h
@@ -66,13 +66,46 @@

/* Power Management Commands, Responses, Notifications */

+/**
+ * enum iwl_ltr_config_flags - masks for LTR config command flags
+ * @LTR_CFG_FLAG_FEATURE_ENABLE: Feature operational status
+ * @LTR_CFG_FLAG_HW_DIS_ON_SHADOW_REG_ACCESS: allow LTR change on shadow
+ * memory access
+ * @LTR_CFG_FLAG_HW_EN_SHRT_WR_THROUGH: allow LTR msg send on ANY LTR
+ * reg change
+ * @LTR_CFG_FLAG_HW_DIS_ON_D0_2_D3: allow LTR msg send on transition from
+ * D0 to D3
+ * @LTR_CFG_FLAG_SW_SET_SHORT: fixed static short LTR register
+ * @LTR_CFG_FLAG_SW_SET_LONG: fixed static short LONG register
+ * @LTR_CFG_FLAG_DENIE_C10_ON_PD: allow going into C10 on PD
+ */
+enum iwl_ltr_config_flags {
+ LTR_CFG_FLAG_FEATURE_ENABLE = BIT(0),
+ LTR_CFG_FLAG_HW_DIS_ON_SHADOW_REG_ACCESS = BIT(1),
+ LTR_CFG_FLAG_HW_EN_SHRT_WR_THROUGH = BIT(2),
+ LTR_CFG_FLAG_HW_DIS_ON_D0_2_D3 = BIT(3),
+ LTR_CFG_FLAG_SW_SET_SHORT = BIT(4),
+ LTR_CFG_FLAG_SW_SET_LONG = BIT(5),
+ LTR_CFG_FLAG_DENIE_C10_ON_PD = BIT(6),
+};
+
+/**
+ * struct iwl_ltr_config_cmd - configures the LTR
+ * @flags: See %enum iwl_ltr_config_flags
+ */
+struct iwl_ltr_config_cmd {
+ __le32 flags;
+ __le32 static_long;
+ __le32 static_short;
+} __packed;
+
/* Radio LP RX Energy Threshold measured in dBm */
#define POWER_LPRX_RSSI_THRESHOLD 75
#define POWER_LPRX_RSSI_THRESHOLD_MAX 94
#define POWER_LPRX_RSSI_THRESHOLD_MIN 30

/**
- * enum iwl_scan_flags - masks for power table command flags
+ * enum iwl_power_flags - masks for power table command flags
* @POWER_FLAGS_POWER_SAVE_ENA_MSK: '1' Allow to save power by turning off
* receiver and transmitter. '0' - does not allow.
* @POWER_FLAGS_POWER_MANAGEMENT_ENA_MSK: '0' Driver disables power management,
diff --git a/drivers/net/wireless/iwlwifi/mvm/fw-api.h b/drivers/net/wireless/iwlwifi/mvm/fw-api.h
index bad5a55..c079783 100644
--- a/drivers/net/wireless/iwlwifi/mvm/fw-api.h
+++ b/drivers/net/wireless/iwlwifi/mvm/fw-api.h
@@ -141,6 +141,7 @@ enum {

/* Power - legacy power table command */
POWER_TABLE_CMD = 0x77,
+ LTR_CONFIG = 0xee,

/* Thermal Throttling*/
REPLY_THERMAL_MNG_BACKOFF = 0x7e,
diff --git a/drivers/net/wireless/iwlwifi/mvm/fw.c b/drivers/net/wireless/iwlwifi/mvm/fw.c
index 70e5297..d6de231 100644
--- a/drivers/net/wireless/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/iwlwifi/mvm/fw.c
@@ -431,6 +431,15 @@ int iwl_mvm_up(struct iwl_mvm *mvm)
goto error;
}

+ if (mvm->trans->ltr_enabled) {
+ struct iwl_ltr_config_cmd cmd = {
+ .flags = cpu_to_le32(LTR_CFG_FLAG_FEATURE_ENABLE),
+ };
+
+ WARN_ON(iwl_mvm_send_cmd_pdu(mvm, LTR_CONFIG, 0,
+ sizeof(cmd), &cmd));
+ }
+
ret = iwl_mvm_power_update_device_mode(mvm);
if (ret)
goto error;
diff --git a/drivers/net/wireless/iwlwifi/mvm/ops.c b/drivers/net/wireless/iwlwifi/mvm/ops.c
index bb58342..50620a7 100644
--- a/drivers/net/wireless/iwlwifi/mvm/ops.c
+++ b/drivers/net/wireless/iwlwifi/mvm/ops.c
@@ -310,6 +310,7 @@ static const char *iwl_mvm_cmd_strings[REPLY_MAX] = {
CMD(REPLY_BEACON_FILTERING_CMD),
CMD(REPLY_THERMAL_MNG_BACKOFF),
CMD(MAC_PM_POWER_TABLE),
+ CMD(LTR_CONFIG),
CMD(BT_COEX_CI),
};
#undef CMD
diff --git a/drivers/net/wireless/iwlwifi/mvm/tx.c b/drivers/net/wireless/iwlwifi/mvm/tx.c
index edf9f7b..c8f6974 100644
--- a/drivers/net/wireless/iwlwifi/mvm/tx.c
+++ b/drivers/net/wireless/iwlwifi/mvm/tx.c
@@ -173,14 +173,10 @@ static void iwl_mvm_set_tx_cmd_rate(struct iwl_mvm *mvm,

/*
* for data packets, rate info comes from the table inside the fw. This
- * table is controlled by LINK_QUALITY commands. Exclude ctrl port
- * frames like EAPOLs which should be treated as mgmt frames. This
- * avoids them being sent initially in high rates which increases the
- * chances for completion of the 4-Way handshake.
+ * table is controlled by LINK_QUALITY commands
*/

- if (ieee80211_is_data(fc) && sta &&
- !(info->control.flags & IEEE80211_TX_CTRL_PORT_CTRL_PROTO)) {
+ if (ieee80211_is_data(fc) && sta) {
tx_cmd->initial_rate_index = 0;
tx_cmd->tx_flags |= cpu_to_le32(TX_CMD_FLG_STA_RATE);
return;
diff --git a/drivers/net/wireless/iwlwifi/pcie/trans.c b/drivers/net/wireless/iwlwifi/pcie/trans.c
index f69aeb3..e7b2565 100644
--- a/drivers/net/wireless/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/iwlwifi/pcie/trans.c
@@ -121,6 +121,7 @@ static void iwl_pcie_apm_config(struct iwl_trans *trans)
{
struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
u16 lctl;
+ u16 cap;

/*
* HW bug W/A for instability in PCIe bus L0S->L1 transition.
@@ -131,16 +132,17 @@ static void iwl_pcie_apm_config(struct iwl_trans *trans)
* power savings, even without L1.
*/
pcie_capability_read_word(trans_pcie->pci_dev, PCI_EXP_LNKCTL, &lctl);
- if (lctl & PCI_EXP_LNKCTL_ASPM_L1) {
- /* L1-ASPM enabled; disable(!) L0S */
+ if (lctl & PCI_EXP_LNKCTL_ASPM_L1)
iwl_set_bit(trans, CSR_GIO_REG, CSR_GIO_REG_VAL_L0S_ENABLED);
- dev_info(trans->dev, "L1 Enabled; Disabling L0S\n");
- } else {
- /* L1-ASPM disabled; enable(!) L0S */
+ else
iwl_clear_bit(trans, CSR_GIO_REG, CSR_GIO_REG_VAL_L0S_ENABLED);
- dev_info(trans->dev, "L1 Disabled; Enabling L0S\n");
- }
trans->pm_support = !(lctl & PCI_EXP_LNKCTL_ASPM_L0S);
+
+ pcie_capability_read_word(trans_pcie->pci_dev, PCI_EXP_DEVCTL2, &cap);
+ trans->ltr_enabled = cap & PCI_EXP_DEVCTL2_LTR_EN;
+ dev_info(trans->dev, "L1 %sabled - LTR %sabled\n",
+ (lctl & PCI_EXP_LNKCTL_ASPM_L1) ? "En" : "Dis",
+ trans->ltr_enabled ? "En" : "Dis");
}

/*
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index a1b32ee..1a0c2bb 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -1466,7 +1466,9 @@ static int mac80211_hwsim_ampdu_action(struct ieee80211_hw *hw,
return 0;
}

-static void mac80211_hwsim_flush(struct ieee80211_hw *hw, u32 queues, bool drop)
+static void mac80211_hwsim_flush(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ u32 queues, bool drop)
{
/* Not implemented, queues only on kernel side */
}
diff --git a/drivers/net/wireless/p54/main.c b/drivers/net/wireless/p54/main.c
index 067e6f2..c214332 100644
--- a/drivers/net/wireless/p54/main.c
+++ b/drivers/net/wireless/p54/main.c
@@ -670,7 +670,8 @@ static unsigned int p54_flush_count(struct p54_common *priv)
return total;
}

-static void p54_flush(struct ieee80211_hw *dev, u32 queues, bool drop)
+static void p54_flush(struct ieee80211_hw *dev, struct ieee80211_vif *vif,
+ u32 queues, bool drop)
{
struct p54_common *priv = dev->priv;
unsigned int total, i;
diff --git a/drivers/net/wireless/rt2x00/rt2800usb.c b/drivers/net/wireless/rt2x00/rt2800usb.c
index a81ceb6..24da6e8 100644
--- a/drivers/net/wireless/rt2x00/rt2800usb.c
+++ b/drivers/net/wireless/rt2x00/rt2800usb.c
@@ -1064,6 +1064,7 @@ static struct usb_device_id rt2800usb_device_table[] = {
/* Ovislink */
{ USB_DEVICE(0x1b75, 0x3071) },
{ USB_DEVICE(0x1b75, 0x3072) },
+ { USB_DEVICE(0x1b75, 0xa200) },
/* Para */
{ USB_DEVICE(0x20b8, 0x8888) },
/* Pegatron */
diff --git a/drivers/net/wireless/rt2x00/rt2x00.h b/drivers/net/wireless/rt2x00/rt2x00.h
index c06ffb7..6a64211 100644
--- a/drivers/net/wireless/rt2x00/rt2x00.h
+++ b/drivers/net/wireless/rt2x00/rt2x00.h
@@ -1451,7 +1451,8 @@ int rt2x00mac_conf_tx(struct ieee80211_hw *hw,
struct ieee80211_vif *vif, u16 queue,
const struct ieee80211_tx_queue_params *params);
void rt2x00mac_rfkill_poll(struct ieee80211_hw *hw);
-void rt2x00mac_flush(struct ieee80211_hw *hw, u32 queues, bool drop);
+void rt2x00mac_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+ u32 queues, bool drop);
int rt2x00mac_set_antenna(struct ieee80211_hw *hw, u32 tx_ant, u32 rx_ant);
int rt2x00mac_get_antenna(struct ieee80211_hw *hw, u32 *tx_ant, u32 *rx_ant);
void rt2x00mac_get_ringparam(struct ieee80211_hw *hw,
diff --git a/drivers/net/wireless/rt2x00/rt2x00mac.c b/drivers/net/wireless/rt2x00/rt2x00mac.c
index bc21fae..04896fa 100644
--- a/drivers/net/wireless/rt2x00/rt2x00mac.c
+++ b/drivers/net/wireless/rt2x00/rt2x00mac.c
@@ -753,7 +753,8 @@ void rt2x00mac_rfkill_poll(struct ieee80211_hw *hw)
}
EXPORT_SYMBOL_GPL(rt2x00mac_rfkill_poll);

-void rt2x00mac_flush(struct ieee80211_hw *hw, u32 queues, bool drop)
+void rt2x00mac_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+ u32 queues, bool drop)
{
struct rt2x00_dev *rt2x00dev = hw->priv;
struct data_queue *queue;
diff --git a/drivers/net/wireless/rtlwifi/core.c b/drivers/net/wireless/rtlwifi/core.c
index 2d337a0..2b16dfb 100644
--- a/drivers/net/wireless/rtlwifi/core.c
+++ b/drivers/net/wireless/rtlwifi/core.c
@@ -1309,7 +1309,8 @@ static void rtl_op_rfkill_poll(struct ieee80211_hw *hw)
* before switch channel or power save, or tx buffer packet
* maybe send after offchannel or rf sleep, this may cause
* dis-association by AP */
-static void rtl_op_flush(struct ieee80211_hw *hw, u32 queues, bool drop)
+static void rtl_op_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+ u32 queues, bool drop)
{
struct rtl_priv *rtlpriv = rtl_priv(hw);

diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c
index 0368b9c..8f63463 100644
--- a/drivers/net/wireless/ti/wlcore/main.c
+++ b/drivers/net/wireless/ti/wlcore/main.c
@@ -5147,7 +5147,8 @@ out:
mutex_unlock(&wl->mutex);
}

-static void wlcore_op_flush(struct ieee80211_hw *hw, u32 queues, bool drop)
+static void wlcore_op_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+ u32 queues, bool drop)
{
struct wl1271 *wl = hw->priv;

diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 21ba076..6a02775 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -186,7 +186,7 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
}
static DEVICE_ATTR_RO(modalias);

-static ssize_t enabled_store(struct device *dev,
+static ssize_t enable_store(struct device *dev,
struct device_attribute *attr, const char *buf,
size_t count)
{
@@ -212,7 +212,7 @@ static ssize_t enabled_store(struct device *dev,
return result < 0 ? result : count;
}

-static ssize_t enabled_show(struct device *dev,
+static ssize_t enable_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct pci_dev *pdev;
@@ -220,7 +220,7 @@ static ssize_t enabled_show(struct device *dev,
pdev = to_pci_dev (dev);
return sprintf (buf, "%u\n", atomic_read(&pdev->enable_cnt));
}
-static DEVICE_ATTR_RW(enabled);
+static DEVICE_ATTR_RW(enable);

#ifdef CONFIG_NUMA
static ssize_t
@@ -531,7 +531,7 @@ static struct attribute *pci_dev_attrs[] = {
#endif
&dev_attr_dma_mask_bits.attr,
&dev_attr_consistent_dma_mask_bits.attr,
- &dev_attr_enabled.attr,
+ &dev_attr_enable.attr,
&dev_attr_broken_parity_status.attr,
&dev_attr_msi_bus.attr,
#if defined(CONFIG_PM_RUNTIME) && defined(CONFIG_ACPI)
diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
index 7eb19be..b93f24a 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
@@ -740,7 +740,16 @@ static void tcm_qla2xxx_clear_nacl_from_fcport_map(struct qla_tgt_sess *sess)
pr_debug("fc_rport domain: port_id 0x%06x\n", nacl->nport_id);

node = btree_remove32(&lport->lport_fcport_map, nacl->nport_id);
- WARN_ON(node && (node != se_nacl));
+ if (WARN_ON(node && (node != se_nacl))) {
+ /*
+ * The nacl no longer matches what we think it should be.
+ * Most likely a new dynamic acl has been added while
+ * someone dropped the hardware lock. It clearly is a
+ * bug elsewhere, but this bit can't make things worse.
+ */
+ btree_insert32(&lport->lport_fcport_map, nacl->nport_id,
+ node, GFP_ATOMIC);
+ }

pr_debug("Removed from fcport_map: %p for WWNN: 0x%016LX, port_id: 0x%06x\n",
se_nacl, nacl->nport_wwnn, nacl->nport_id);
diff --git a/drivers/spi/spi-pl022.c b/drivers/spi/spi-pl022.c
index 2789b45..971855e 100644
--- a/drivers/spi/spi-pl022.c
+++ b/drivers/spi/spi-pl022.c
@@ -1075,7 +1075,7 @@ err_rxdesc:
pl022->sgt_tx.nents, DMA_TO_DEVICE);
err_tx_sgmap:
dma_unmap_sg(rxchan->device->dev, pl022->sgt_rx.sgl,
- pl022->sgt_tx.nents, DMA_FROM_DEVICE);
+ pl022->sgt_rx.nents, DMA_FROM_DEVICE);
err_rx_sgmap:
sg_free_table(&pl022->sgt_tx);
err_alloc_tx_sg:
diff --git a/drivers/staging/iio/adc/mxs-lradc.c b/drivers/staging/iio/adc/mxs-lradc.c
index 558a76c..da4f0b1 100644
--- a/drivers/staging/iio/adc/mxs-lradc.c
+++ b/drivers/staging/iio/adc/mxs-lradc.c
@@ -1307,14 +1307,16 @@ static int mxs_lradc_probe(struct platform_device *pdev)
/* Grab all IRQ sources */
for (i = 0; i < of_cfg->irq_count; i++) {
lradc->irq[i] = platform_get_irq(pdev, i);
- if (lradc->irq[i] < 0)
- return -EINVAL;
+ if (lradc->irq[i] < 0) {
+ ret = lradc->irq[i];
+ goto err_clk;
+ }

ret = devm_request_irq(dev, lradc->irq[i],
mxs_lradc_handle_irq, 0,
of_cfg->irq_name[i], iio);
if (ret)
- return ret;
+ goto err_clk;
}

platform_set_drvdata(pdev, iio);
@@ -1334,7 +1336,7 @@ static int mxs_lradc_probe(struct platform_device *pdev)
&mxs_lradc_trigger_handler,
&mxs_lradc_buffer_ops);
if (ret)
- return ret;
+ goto err_clk;

ret = mxs_lradc_trigger_init(iio);
if (ret)
@@ -1369,6 +1371,8 @@ err_dev:
mxs_lradc_trigger_remove(iio);
err_trig:
iio_triggered_buffer_cleanup(iio);
+err_clk:
+ clk_disable_unprepare(lradc->clk);
return ret;
}

diff --git a/drivers/staging/iio/impedance-analyzer/ad5933.c b/drivers/staging/iio/impedance-analyzer/ad5933.c
index 2b96665..97d4b3f 100644
--- a/drivers/staging/iio/impedance-analyzer/ad5933.c
+++ b/drivers/staging/iio/impedance-analyzer/ad5933.c
@@ -115,6 +115,7 @@ static const struct iio_chan_spec ad5933_channels[] = {
.channel = 0,
.info_mask_separate = BIT(IIO_CHAN_INFO_PROCESSED),
.address = AD5933_REG_TEMP_DATA,
+ .scan_index = -1,
.scan_type = {
.sign = 's',
.realbits = 14,
@@ -124,9 +125,7 @@ static const struct iio_chan_spec ad5933_channels[] = {
.type = IIO_VOLTAGE,
.indexed = 1,
.channel = 0,
- .extend_name = "real_raw",
- .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) |
- BIT(IIO_CHAN_INFO_SCALE),
+ .extend_name = "real",
.address = AD5933_REG_REAL_DATA,
.scan_index = 0,
.scan_type = {
@@ -138,9 +137,7 @@ static const struct iio_chan_spec ad5933_channels[] = {
.type = IIO_VOLTAGE,
.indexed = 1,
.channel = 0,
- .extend_name = "imag_raw",
- .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) |
- BIT(IIO_CHAN_INFO_SCALE),
+ .extend_name = "imag",
.address = AD5933_REG_IMAG_DATA,
.scan_index = 1,
.scan_type = {
@@ -748,14 +745,14 @@ static int ad5933_probe(struct i2c_client *client,
indio_dev->name = id->name;
indio_dev->modes = INDIO_DIRECT_MODE;
indio_dev->channels = ad5933_channels;
- indio_dev->num_channels = 1; /* only register temp0_input */
+ indio_dev->num_channels = ARRAY_SIZE(ad5933_channels);

ret = ad5933_register_ring_funcs_and_init(indio_dev);
if (ret)
goto error_disable_reg;

- /* skip temp0_input, register in0_(real|imag)_raw */
- ret = iio_buffer_register(indio_dev, &ad5933_channels[1], 2);
+ ret = iio_buffer_register(indio_dev, ad5933_channels,
+ ARRAY_SIZE(ad5933_channels));
if (ret)
goto error_unreg_ring;

diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index 04e9288..27bee70 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -1321,7 +1321,8 @@ int core_dev_add_initiator_node_lun_acl(
* Check to see if there are any existing persistent reservation APTPL
* pre-registrations that need to be enabled for this LUN ACL..
*/
- core_scsi3_check_aptpl_registration(lun->lun_se_dev, tpg, lun, lacl);
+ core_scsi3_check_aptpl_registration(lun->lun_se_dev, tpg, lun, nacl,
+ lacl->mapped_lun);
return 0;
}

diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c
index 3013287..1205dbd 100644
--- a/drivers/target/target_core_pr.c
+++ b/drivers/target/target_core_pr.c
@@ -944,10 +944,10 @@ int core_scsi3_check_aptpl_registration(
struct se_device *dev,
struct se_portal_group *tpg,
struct se_lun *lun,
- struct se_lun_acl *lun_acl)
+ struct se_node_acl *nacl,
+ u32 mapped_lun)
{
- struct se_node_acl *nacl = lun_acl->se_lun_nacl;
- struct se_dev_entry *deve = nacl->device_list[lun_acl->mapped_lun];
+ struct se_dev_entry *deve = nacl->device_list[mapped_lun];

if (dev->dev_reservation_flags & DRF_SPC2_RESERVATIONS)
return 0;
diff --git a/drivers/target/target_core_pr.h b/drivers/target/target_core_pr.h
index ed75cdd..14a0a2e 100644
--- a/drivers/target/target_core_pr.h
+++ b/drivers/target/target_core_pr.h
@@ -55,7 +55,7 @@ extern int core_scsi3_alloc_aptpl_registration(
unsigned char *, u16, u32, int, int, u8);
extern int core_scsi3_check_aptpl_registration(struct se_device *,
struct se_portal_group *, struct se_lun *,
- struct se_lun_acl *);
+ struct se_node_acl *, u32);
extern void core_scsi3_free_pr_reg_from_nacl(struct se_device *,
struct se_node_acl *);
extern void core_scsi3_free_all_registrations(struct se_device *);
diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c
index 2a573de..6c4596a 100644
--- a/drivers/target/target_core_tpg.c
+++ b/drivers/target/target_core_tpg.c
@@ -40,6 +40,7 @@
#include <target/target_core_fabric.h>

#include "target_core_internal.h"
+#include "target_core_pr.h"

extern struct se_device *g_lun0_dev;

@@ -166,6 +167,13 @@ void core_tpg_add_node_to_devs(

core_enable_device_list_for_node(lun, NULL, lun->unpacked_lun,
lun_access, acl, tpg);
+ /*
+ * Check to see if there are any existing persistent reservation
+ * APTPL pre-registrations that need to be enabled for this dynamic
+ * LUN ACL now..
+ */
+ core_scsi3_check_aptpl_registration(dev, tpg, lun, acl,
+ lun->unpacked_lun);
spin_lock(&tpg->tpg_lun_lock);
}
spin_unlock(&tpg->tpg_lun_lock);
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index ea545f4..3acb125 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -1817,8 +1817,7 @@ static void transport_complete_qf(struct se_cmd *cmd)
if (cmd->se_cmd_flags & SCF_TRANSPORT_TASK_SENSE) {
trace_target_cmd_complete(cmd);
ret = cmd->se_tfo->queue_status(cmd);
- if (ret)
- goto out;
+ goto out;
}

switch (cmd->data_direction) {
diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c
index 21a3520..0985ff7 100644
--- a/drivers/usb/dwc3/ep0.c
+++ b/drivers/usb/dwc3/ep0.c
@@ -251,7 +251,7 @@ static void dwc3_ep0_stall_and_restart(struct dwc3 *dwc)

/* stall is always issued on EP0 */
dep = dwc->eps[0];
- __dwc3_gadget_ep_set_halt(dep, 1);
+ __dwc3_gadget_ep_set_halt(dep, 1, false);
dep->flags = DWC3_EP_ENABLED;
dwc->delayed_status = false;

@@ -461,7 +461,7 @@ static int dwc3_ep0_handle_feature(struct dwc3 *dwc,
return -EINVAL;
if (set == 0 && (dep->flags & DWC3_EP_WEDGE))
break;
- ret = __dwc3_gadget_ep_set_halt(dep, set);
+ ret = __dwc3_gadget_ep_set_halt(dep, set, true);
if (ret)
return -EINVAL;
break;
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index c37da0c..20e4d2e 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -532,12 +532,11 @@ static int __dwc3_gadget_ep_enable(struct dwc3_ep *dep,
if (!usb_endpoint_xfer_isoc(desc))
return 0;

- memset(&trb_link, 0, sizeof(trb_link));
-
/* Link TRB for ISOC. The HWO bit is never reset */
trb_st_hw = &dep->trb_pool[0];

trb_link = &dep->trb_pool[DWC3_TRB_NUM - 1];
+ memset(trb_link, 0, sizeof(*trb_link));

trb_link->bpl = lower_32_bits(dwc3_trb_dma_offset(dep, trb_st_hw));
trb_link->bph = upper_32_bits(dwc3_trb_dma_offset(dep, trb_st_hw));
@@ -588,7 +587,7 @@ static int __dwc3_gadget_ep_disable(struct dwc3_ep *dep)

/* make sure HW endpoint isn't stalled */
if (dep->flags & DWC3_EP_STALL)
- __dwc3_gadget_ep_set_halt(dep, 0);
+ __dwc3_gadget_ep_set_halt(dep, 0, false);

reg = dwc3_readl(dwc->regs, DWC3_DALEPENA);
reg &= ~DWC3_DALEPENA_EP(dep->number);
@@ -1186,7 +1185,7 @@ out0:
return ret;
}

-int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value)
+int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value, int protocol)
{
struct dwc3_gadget_ep_cmd_params params;
struct dwc3 *dwc = dep->dwc;
@@ -1195,6 +1194,14 @@ int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value)
memset(&params, 0x00, sizeof(params));

if (value) {
+ if (!protocol && ((dep->direction && dep->flags & DWC3_EP_BUSY) ||
+ (!list_empty(&dep->req_queued) ||
+ !list_empty(&dep->request_list)))) {
+ dev_dbg(dwc->dev, "%s: pending request, cannot halt\n",
+ dep->name);
+ return -EAGAIN;
+ }
+
ret = dwc3_send_gadget_ep_cmd(dwc, dep->number,
DWC3_DEPCMD_SETSTALL, &params);
if (ret)
@@ -1234,7 +1241,7 @@ static int dwc3_gadget_ep_set_halt(struct usb_ep *ep, int value)
goto out;
}

- ret = __dwc3_gadget_ep_set_halt(dep, value);
+ ret = __dwc3_gadget_ep_set_halt(dep, value, false);
out:
spin_unlock_irqrestore(&dwc->lock, flags);

diff --git a/drivers/usb/dwc3/gadget.h b/drivers/usb/dwc3/gadget.h
index a0ee75b..ac625582 100644
--- a/drivers/usb/dwc3/gadget.h
+++ b/drivers/usb/dwc3/gadget.h
@@ -85,7 +85,7 @@ void dwc3_ep0_out_start(struct dwc3 *dwc);
int dwc3_gadget_ep0_set_halt(struct usb_ep *ep, int value);
int dwc3_gadget_ep0_queue(struct usb_ep *ep, struct usb_request *request,
gfp_t gfp_flags);
-int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value);
+int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value, int protocol);

/**
* dwc3_gadget_ep_get_transfer_index - Gets transfer index from HW
diff --git a/drivers/usb/gadget/f_acm.c b/drivers/usb/gadget/f_acm.c
index ab1065a..3384486 100644
--- a/drivers/usb/gadget/f_acm.c
+++ b/drivers/usb/gadget/f_acm.c
@@ -430,11 +430,12 @@ static int acm_set_alt(struct usb_function *f, unsigned intf, unsigned alt)
if (acm->notify->driver_data) {
VDBG(cdev, "reset acm control interface %d\n", intf);
usb_ep_disable(acm->notify);
- } else {
- VDBG(cdev, "init acm ctrl interface %d\n", intf);
+ }
+
+ if (!acm->notify->desc)
if (config_ep_by_speed(cdev->gadget, f, acm->notify))
return -EINVAL;
- }
+
usb_ep_enable(acm->notify);
acm->notify->driver_data = acm;

diff --git a/drivers/usb/gadget/udc-core.c b/drivers/usb/gadget/udc-core.c
index 27768a7..9ce0b13 100644
--- a/drivers/usb/gadget/udc-core.c
+++ b/drivers/usb/gadget/udc-core.c
@@ -456,6 +456,11 @@ static ssize_t usb_udc_softconn_store(struct device *dev,
{
struct usb_udc *udc = container_of(dev, struct usb_udc, dev);

+ if (!udc->driver) {
+ dev_err(dev, "soft-connect without a gadget driver\n");
+ return -EOPNOTSUPP;
+ }
+
if (sysfs_streq(buf, "connect")) {
usb_gadget_udc_start(udc->gadget, udc->driver);
usb_gadget_connect(udc->gadget);
diff --git a/drivers/usb/musb/musb_cppi41.c b/drivers/usb/musb/musb_cppi41.c
index 4a5af5c..3faaeb7 100644
--- a/drivers/usb/musb/musb_cppi41.c
+++ b/drivers/usb/musb/musb_cppi41.c
@@ -190,7 +190,8 @@ static enum hrtimer_restart cppi41_recheck_tx_req(struct hrtimer *timer)
}
}

- if (!list_empty(&controller->early_tx_list)) {
+ if (!list_empty(&controller->early_tx_list) &&
+ !hrtimer_is_queued(&controller->early_tx)) {
ret = HRTIMER_RESTART;
hrtimer_forward_now(&controller->early_tx,
ktime_set(0, 150 * NSEC_PER_USEC));
diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index 3c73940..ad85f16 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -155,6 +155,7 @@ static const struct usb_device_id id_table[] = {
{ USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */
{ USB_DEVICE(0x1ADB, 0x0001) }, /* Schweitzer Engineering C662 Cable */
{ USB_DEVICE(0x1B1C, 0x1C00) }, /* Corsair USB Dongle */
+ { USB_DEVICE(0x1BA4, 0x0002) }, /* Silicon Labs 358x factory default */
{ USB_DEVICE(0x1BE3, 0x07A6) }, /* WAGO 750-923 USB Service Cable */
{ USB_DEVICE(0x1D6F, 0x0010) }, /* Seluxit ApS RF Dongle */
{ USB_DEVICE(0x1E29, 0x0102) }, /* Festo CPX-USB */
diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index c903b0b..9e0e29f 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -146,6 +146,7 @@ static struct ftdi_sio_quirk ftdi_8u2232c_quirk = {
* /sys/bus/usb-serial/drivers/ftdi_sio/new_id and send a patch or report.
*/
static struct usb_device_id id_table_combined [] = {
+ { USB_DEVICE(FTDI_VID, FTDI_BRICK_PID) },
{ USB_DEVICE(FTDI_VID, FTDI_ZEITCONTROL_TAGTRACE_MIFARE_PID) },
{ USB_DEVICE(FTDI_VID, FTDI_CTI_MINI_PID) },
{ USB_DEVICE(FTDI_VID, FTDI_CTI_NANO_PID) },
@@ -675,6 +676,8 @@ static struct usb_device_id id_table_combined [] = {
{ USB_DEVICE(FTDI_VID, XSENS_CONVERTER_5_PID) },
{ USB_DEVICE(FTDI_VID, XSENS_CONVERTER_6_PID) },
{ USB_DEVICE(FTDI_VID, XSENS_CONVERTER_7_PID) },
+ { USB_DEVICE(XSENS_VID, XSENS_AWINDA_DONGLE_PID) },
+ { USB_DEVICE(XSENS_VID, XSENS_AWINDA_STATION_PID) },
{ USB_DEVICE(XSENS_VID, XSENS_CONVERTER_PID) },
{ USB_DEVICE(XSENS_VID, XSENS_MTW_PID) },
{ USB_DEVICE(FTDI_VID, FTDI_OMNI1509) },
diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h
index 5937b2d..6786b70 100644
--- a/drivers/usb/serial/ftdi_sio_ids.h
+++ b/drivers/usb/serial/ftdi_sio_ids.h
@@ -30,6 +30,12 @@

/*** third-party PIDs (using FTDI_VID) ***/

+/*
+ * Certain versions of the official Windows FTDI driver reprogrammed
+ * counterfeit FTDI devices to PID 0. Support these devices anyway.
+ */
+#define FTDI_BRICK_PID 0x0000
+
#define FTDI_LUMEL_PD12_PID 0x6002

/*
@@ -143,8 +149,12 @@
* Xsens Technologies BV products (http://www.xsens.com).
*/
#define XSENS_VID 0x2639
-#define XSENS_CONVERTER_PID 0xD00D /* Xsens USB-serial converter */
+#define XSENS_AWINDA_STATION_PID 0x0101
+#define XSENS_AWINDA_DONGLE_PID 0x0102
#define XSENS_MTW_PID 0x0200 /* Xsens MTw */
+#define XSENS_CONVERTER_PID 0xD00D /* Xsens USB-serial converter */
+
+/* Xsens devices using FTDI VID */
#define XSENS_CONVERTER_0_PID 0xD388 /* Xsens USB converter */
#define XSENS_CONVERTER_1_PID 0xD389 /* Xsens Wireless Receiver */
#define XSENS_CONVERTER_2_PID 0xD38A
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 454c6d1..edbd457 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -269,6 +269,7 @@ static void option_instat_callback(struct urb *urb);
#define TELIT_PRODUCT_DE910_DUAL 0x1010
#define TELIT_PRODUCT_UE910_V2 0x1012
#define TELIT_PRODUCT_LE920 0x1200
+#define TELIT_PRODUCT_LE910 0x1201

/* ZTE PRODUCTS */
#define ZTE_VENDOR_ID 0x19d2
@@ -361,6 +362,7 @@ static void option_instat_callback(struct urb *urb);

/* Haier products */
#define HAIER_VENDOR_ID 0x201e
+#define HAIER_PRODUCT_CE81B 0x10f8
#define HAIER_PRODUCT_CE100 0x2009

/* Cinterion (formerly Siemens) products */
@@ -588,6 +590,11 @@ static const struct option_blacklist_info zte_1255_blacklist = {
.reserved = BIT(3) | BIT(4),
};

+static const struct option_blacklist_info telit_le910_blacklist = {
+ .sendsetup = BIT(0),
+ .reserved = BIT(1) | BIT(2),
+};
+
static const struct option_blacklist_info telit_le920_blacklist = {
.sendsetup = BIT(0),
.reserved = BIT(1) | BIT(5),
@@ -1137,6 +1144,8 @@ static const struct usb_device_id option_ids[] = {
{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_CC864_SINGLE) },
{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_DE910_DUAL) },
{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_UE910_V2) },
+ { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910),
+ .driver_info = (kernel_ulong_t)&telit_le910_blacklist },
{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920),
.driver_info = (kernel_ulong_t)&telit_le920_blacklist },
{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MF622, 0xff, 0xff, 0xff) }, /* ZTE WCDMA products */
@@ -1612,6 +1621,7 @@ static const struct usb_device_id option_ids[] = {
{ USB_DEVICE(LONGCHEER_VENDOR_ID, ZOOM_PRODUCT_4597) },
{ USB_DEVICE(LONGCHEER_VENDOR_ID, IBALL_3_5G_CONNECT) },
{ USB_DEVICE(HAIER_VENDOR_ID, HAIER_PRODUCT_CE100) },
+ { USB_DEVICE_AND_INTERFACE_INFO(HAIER_VENDOR_ID, HAIER_PRODUCT_CE81B, 0xff, 0xff, 0xff) },
/* Pirelli */
{ USB_DEVICE_INTERFACE_CLASS(PIRELLI_VENDOR_ID, PIRELLI_PRODUCT_C100_1, 0xff) },
{ USB_DEVICE_INTERFACE_CLASS(PIRELLI_VENDOR_ID, PIRELLI_PRODUCT_C100_2, 0xff) },
diff --git a/fs/buffer.c b/fs/buffer.c
index 4252d82..948aae2 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2077,6 +2077,7 @@ int generic_write_end(struct file *file, struct address_space *mapping,
struct page *page, void *fsdata)
{
struct inode *inode = mapping->host;
+ loff_t old_size = inode->i_size;
int i_size_changed = 0;

copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
@@ -2096,6 +2097,8 @@ int generic_write_end(struct file *file, struct address_space *mapping,
unlock_page(page);
page_cache_release(page);

+ if (old_size < pos)
+ pagecache_isize_extended(inode, old_size, pos);
/*
* Don't mark the inode dirty under page lock. First, it unnecessarily
* makes the holding time of page lock longer. Second, it forces lock
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 37fd31e..0498390 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1354,13 +1354,6 @@ set_qf_format:
"not specified.");
return 0;
}
- } else {
- if (sbi->s_jquota_fmt) {
- ext3_msg(sb, KERN_ERR, "error: journaled quota format "
- "specified with no journaling "
- "enabled.");
- return 0;
- }
}
#endif
return 1;
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 6ea7b14..c8c3cb2 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -176,7 +176,8 @@ static unsigned int num_clusters_in_group(struct super_block *sb,
}

/* Initializes an uninitialized block bitmap */
-void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
+static int ext4_init_block_bitmap(struct super_block *sb,
+ struct buffer_head *bh,
ext4_group_t block_group,
struct ext4_group_desc *gdp)
{
@@ -191,11 +192,10 @@ void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
/* If checksum is bad mark all blocks used to prevent allocation
* essentially implementing a per-group read-only flag. */
if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
- ext4_error(sb, "Checksum bad for group %u", block_group);
grp = ext4_get_group_info(sb, block_group);
set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
- return;
+ return -EIO;
}
memset(bh->b_data, 0, sb->s_blocksize);

@@ -233,6 +233,7 @@ void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
sb->s_blocksize * 8, bh->b_data);
ext4_block_bitmap_csum_set(sb, block_group, gdp, bh);
ext4_group_desc_csum_set(sb, block_group, gdp);
+ return 0;
}

/* Return the number of free blocks in a block group. It is used when
@@ -419,11 +420,15 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
}
ext4_lock_group(sb, block_group);
if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
- ext4_init_block_bitmap(sb, bh, block_group, desc);
+ int err;
+
+ err = ext4_init_block_bitmap(sb, bh, block_group, desc);
set_bitmap_uptodate(bh);
set_buffer_uptodate(bh);
ext4_unlock_group(sb, block_group);
unlock_buffer(bh);
+ if (err)
+ ext4_error(sb, "Checksum bad for grp %u", block_group);
return bh;
}
ext4_unlock_group(sb, block_group);
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c
index 3285aa5..b610779 100644
--- a/fs/ext4/bitmap.c
+++ b/fs/ext4/bitmap.c
@@ -24,8 +24,7 @@ int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
__u32 provided, calculated;
struct ext4_sb_info *sbi = EXT4_SB(sb);

- if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (!ext4_has_metadata_csum(sb))
return 1;

provided = le16_to_cpu(gdp->bg_inode_bitmap_csum_lo);
@@ -46,8 +45,7 @@ void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
__u32 csum;
struct ext4_sb_info *sbi = EXT4_SB(sb);

- if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (!ext4_has_metadata_csum(sb))
return;

csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz);
@@ -65,8 +63,7 @@ int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
struct ext4_sb_info *sbi = EXT4_SB(sb);
int sz = EXT4_CLUSTERS_PER_GROUP(sb) / 8;

- if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (!ext4_has_metadata_csum(sb))
return 1;

provided = le16_to_cpu(gdp->bg_block_bitmap_csum_lo);
@@ -91,8 +88,7 @@ void ext4_block_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
__u32 csum;
struct ext4_sb_info *sbi = EXT4_SB(sb);

- if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (!ext4_has_metadata_csum(sb))
return;

csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index e531054..bfb7fed 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1979,10 +1979,6 @@ extern int ext4_wait_block_bitmap(struct super_block *sb,
struct buffer_head *bh);
extern struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
ext4_group_t block_group);
-extern void ext4_init_block_bitmap(struct super_block *sb,
- struct buffer_head *bh,
- ext4_group_t group,
- struct ext4_group_desc *desc);
extern unsigned ext4_free_clusters_after_init(struct super_block *sb,
ext4_group_t block_group,
struct ext4_group_desc *gdp);
@@ -2110,6 +2106,7 @@ int do_journal_get_write_access(handle_t *handle,
#define CONVERT_INLINE_DATA 2

extern struct inode *ext4_iget(struct super_block *, unsigned long);
+extern struct inode *ext4_iget_normal(struct super_block *, unsigned long);
extern int ext4_write_inode(struct inode *, struct writeback_control *);
extern int ext4_setattr(struct dentry *, struct iattr *);
extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
@@ -2340,10 +2337,18 @@ extern int ext4_register_li_request(struct super_block *sb,
static inline int ext4_has_group_desc_csum(struct super_block *sb)
{
return EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_GDT_CSUM |
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM);
+ EXT4_FEATURE_RO_COMPAT_GDT_CSUM) ||
+ (EXT4_SB(sb)->s_chksum_driver != NULL);
}

+static inline int ext4_has_metadata_csum(struct super_block *sb)
+{
+ WARN_ON_ONCE(EXT4_HAS_RO_COMPAT_FEATURE(sb,
+ EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
+ !EXT4_SB(sb)->s_chksum_driver);
+
+ return (EXT4_SB(sb)->s_chksum_driver != NULL);
+}
static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
{
return ((ext4_fsblk_t)le32_to_cpu(es->s_blocks_count_hi) << 32) |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 3a09bb7..da05fe2 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -74,8 +74,7 @@ static int ext4_extent_block_csum_verify(struct inode *inode,
{
struct ext4_extent_tail *et;

- if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (!ext4_has_metadata_csum(inode->i_sb))
return 1;

et = find_ext4_extent_tail(eh);
@@ -89,8 +88,7 @@ static void ext4_extent_block_csum_set(struct inode *inode,
{
struct ext4_extent_tail *et;

- if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (!ext4_has_metadata_csum(inode->i_sb))
return;

et = find_ext4_extent_tail(eh);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 64bb32f1..a8d1a64 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -864,6 +864,10 @@ got:
struct buffer_head *block_bitmap_bh;

block_bitmap_bh = ext4_read_block_bitmap(sb, group);
+ if (!block_bitmap_bh) {
+ err = -EIO;
+ goto out;
+ }
BUFFER_TRACE(block_bitmap_bh, "get block bitmap access");
err = ext4_journal_get_write_access(handle, block_bitmap_bh);
if (err) {
@@ -988,8 +992,7 @@ got:
spin_unlock(&sbi->s_next_gen_lock);

/* Precompute checksum seed for inode metadata */
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
+ if (ext4_has_metadata_csum(sb)) {
__u32 csum;
__le32 inum = cpu_to_le32(inode->i_ino);
__le32 gen = cpu_to_le32(inode->i_generation);
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 7f03208..3af1fc4 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -1124,8 +1124,7 @@ static int ext4_finish_convert_inline_dir(handle_t *handle,
memcpy((void *)de, buf + EXT4_INLINE_DOTDOT_SIZE,
inline_size - EXT4_INLINE_DOTDOT_SIZE);

- if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (ext4_has_metadata_csum(inode->i_sb))
csum_size = sizeof(struct ext4_dir_entry_tail);

inode->i_size = inode->i_sb->s_blocksize;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 2d782aa..f2a02dd 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -83,8 +83,7 @@ static int ext4_inode_csum_verify(struct inode *inode, struct ext4_inode *raw,

if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
cpu_to_le32(EXT4_OS_LINUX) ||
- !EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ !ext4_has_metadata_csum(inode->i_sb))
return 1;

provided = le16_to_cpu(raw->i_checksum_lo);
@@ -105,8 +104,7 @@ static void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw,

if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
cpu_to_le32(EXT4_OS_LINUX) ||
- !EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ !ext4_has_metadata_csum(inode->i_sb))
return;

csum = ext4_inode_csum(inode, raw, ei);
@@ -221,16 +219,15 @@ void ext4_evict_inode(struct inode *inode)
goto no_delete;
}

- if (!is_bad_inode(inode))
- dquot_initialize(inode);
+ if (is_bad_inode(inode))
+ goto no_delete;
+ dquot_initialize(inode);

if (ext4_should_order_data(inode))
ext4_begin_ordered_truncate(inode, 0);
truncate_inode_pages(&inode->i_data, 0);

WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count));
- if (is_bad_inode(inode))
- goto no_delete;

/*
* Protect us against freezing - iput() caller didn't have to have any
@@ -2639,6 +2636,20 @@ static int ext4_nonda_switch(struct super_block *sb)
return 0;
}

+/* We always reserve for an inode update; the superblock could be there too */
+static int ext4_da_write_credits(struct inode *inode, loff_t pos, unsigned len)
+{
+ if (likely(EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+ EXT4_FEATURE_RO_COMPAT_LARGE_FILE)))
+ return 1;
+
+ if (pos + len <= 0x7fffffffULL)
+ return 1;
+
+ /* We might need to update the superblock to set LARGE_FILE */
+ return 2;
+}
+
static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
@@ -2689,7 +2700,8 @@ retry_grab:
* of file which has an already mapped buffer.
*/
retry_journal:
- handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, 1);
+ handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
+ ext4_da_write_credits(inode, pos, len));
if (IS_ERR(handle)) {
page_cache_release(page);
return PTR_ERR(handle);
@@ -4072,8 +4084,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
ei->i_extra_isize = 0;

/* Precompute checksum seed for inode metadata */
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
+ if (ext4_has_metadata_csum(sb)) {
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
__u32 csum;
__le32 inum = cpu_to_le32(inode->i_ino);
@@ -4261,6 +4272,13 @@ bad_inode:
return ERR_PTR(ret);
}

+struct inode *ext4_iget_normal(struct super_block *sb, unsigned long ino)
+{
+ if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)
+ return ERR_PTR(-EIO);
+ return ext4_iget(sb, ino);
+}
+
static int ext4_inode_blocks_set(handle_t *handle,
struct ext4_inode *raw_inode,
struct ext4_inode_info *ei)
@@ -4656,8 +4674,12 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
ext4_orphan_del(NULL, inode);
goto err_out;
}
- } else
+ } else {
+ loff_t oldsize = inode->i_size;
+
i_size_write(inode, attr->ia_size);
+ pagecache_isize_extended(inode, oldsize, inode->i_size);
+ }

/*
* Blocks are going to be removed from the inode. Wait
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 4a5fe55..fdcadc9 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -347,8 +347,7 @@ flags_out:
if (!inode_owner_or_capable(inode))
return -EPERM;

- if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
+ if (ext4_has_metadata_csum(inode->i_sb)) {
ext4_warning(sb, "Setting inode version is not "
"supported with metadata_csum enabled.");
return -ENOTTY;
@@ -548,9 +547,17 @@ group_add_out:
}

case EXT4_IOC_SWAP_BOOT:
+ {
+ int err;
if (!(filp->f_mode & FMODE_WRITE))
return -EBADF;
- return swap_inode_boot_loader(sb, inode);
+ err = mnt_want_write_file(filp);
+ if (err)
+ return err;
+ err = swap_inode_boot_loader(sb, inode);
+ mnt_drop_write_file(filp);
+ return err;
+ }

case EXT4_IOC_RESIZE_FS: {
ext4_fsblk_t n_blocks_count;
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index 04434ad..1268a1b 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -20,8 +20,7 @@ static __le32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp)

int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp)
{
- if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (!ext4_has_metadata_csum(sb))
return 1;

return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp);
@@ -29,8 +28,7 @@ int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp)

void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp)
{
- if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (!ext4_has_metadata_csum(sb))
return;

mmp->mmp_checksum = ext4_mmp_csum(sb, mmp);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 5a0408d..c8fd7ce 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -123,8 +123,7 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
"directory leaf block found instead of index block");
return ERR_PTR(-EIO);
}
- if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) ||
+ if (!ext4_has_metadata_csum(inode->i_sb) ||
buffer_verified(bh))
return bh;

@@ -339,8 +338,7 @@ int ext4_dirent_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent)
{
struct ext4_dir_entry_tail *t;

- if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (!ext4_has_metadata_csum(inode->i_sb))
return 1;

t = get_dirent_tail(inode, dirent);
@@ -361,8 +359,7 @@ static void ext4_dirent_csum_set(struct inode *inode,
{
struct ext4_dir_entry_tail *t;

- if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (!ext4_has_metadata_csum(inode->i_sb))
return;

t = get_dirent_tail(inode, dirent);
@@ -437,8 +434,7 @@ static int ext4_dx_csum_verify(struct inode *inode,
struct dx_tail *t;
int count_offset, limit, count;

- if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (!ext4_has_metadata_csum(inode->i_sb))
return 1;

c = get_dx_countlimit(inode, dirent, &count_offset);
@@ -467,8 +463,7 @@ static void ext4_dx_csum_set(struct inode *inode, struct ext4_dir_entry *dirent)
struct dx_tail *t;
int count_offset, limit, count;

- if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (!ext4_has_metadata_csum(inode->i_sb))
return;

c = get_dx_countlimit(inode, dirent, &count_offset);
@@ -556,8 +551,7 @@ static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize)
unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) -
EXT4_DIR_REC_LEN(2) - infosize;

- if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (ext4_has_metadata_csum(dir->i_sb))
entry_space -= sizeof(struct dx_tail);
return entry_space / sizeof(struct dx_entry);
}
@@ -566,8 +560,7 @@ static inline unsigned dx_node_limit(struct inode *dir)
{
unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0);

- if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (ext4_has_metadata_csum(dir->i_sb))
entry_space -= sizeof(struct dx_tail);
return entry_space / sizeof(struct dx_entry);
}
@@ -1430,7 +1423,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
dentry->d_name.name);
return ERR_PTR(-EIO);
}
- inode = ext4_iget(dir->i_sb, ino);
+ inode = ext4_iget_normal(dir->i_sb, ino);
if (inode == ERR_PTR(-ESTALE)) {
EXT4_ERROR_INODE(dir,
"deleted inode referenced: %u",
@@ -1461,7 +1454,7 @@ struct dentry *ext4_get_parent(struct dentry *child)
return ERR_PTR(-EIO);
}

- return d_obtain_alias(ext4_iget(child->d_inode->i_sb, ino));
+ return d_obtain_alias(ext4_iget_normal(child->d_inode->i_sb, ino));
}

/*
@@ -1535,8 +1528,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
int csum_size = 0;
int err = 0, i;

- if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (ext4_has_metadata_csum(dir->i_sb))
csum_size = sizeof(struct ext4_dir_entry_tail);

bh2 = ext4_append(handle, dir, &newblock);
@@ -1705,8 +1697,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
int csum_size = 0;
int err;

- if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (ext4_has_metadata_csum(inode->i_sb))
csum_size = sizeof(struct ext4_dir_entry_tail);

if (!de) {
@@ -1773,8 +1764,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
struct fake_dirent *fde;
int csum_size = 0;

- if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (ext4_has_metadata_csum(inode->i_sb))
csum_size = sizeof(struct ext4_dir_entry_tail);

blocksize = dir->i_sb->s_blocksize;
@@ -1839,31 +1829,39 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
ext4fs_dirhash(name, namelen, &hinfo);
+ memset(frames, 0, sizeof(frames));
frame = frames;
frame->entries = entries;
frame->at = entries;
frame->bh = bh;
bh = bh2;

- ext4_handle_dirty_dx_node(handle, dir, frame->bh);
- ext4_handle_dirty_dirent_node(handle, dir, bh);
+ retval = ext4_handle_dirty_dx_node(handle, dir, frame->bh);
+ if (retval)
+ goto out_frames;
+ retval = ext4_handle_dirty_dirent_node(handle, dir, bh);
+ if (retval)
+ goto out_frames;

de = do_split(handle,dir, &bh, frame, &hinfo, &retval);
if (!de) {
- /*
- * Even if the block split failed, we have to properly write
- * out all the changes we did so far. Otherwise we can end up
- * with corrupted filesystem.
- */
- ext4_mark_inode_dirty(handle, dir);
- dx_release(frames);
- return retval;
+ retval = PTR_ERR(de);
+ goto out_frames;
}
dx_release(frames);

retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
brelse(bh);
return retval;
+out_frames:
+ /*
+ * Even if the block split failed, we have to properly write
+ * out all the changes we did so far. Otherwise we can end up
+ * with corrupted filesystem.
+ */
+ ext4_mark_inode_dirty(handle, dir);
+ dx_release(frames);
+ return retval;
}

/*
@@ -1890,8 +1888,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
ext4_lblk_t block, blocks;
int csum_size = 0;

- if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (ext4_has_metadata_csum(inode->i_sb))
csum_size = sizeof(struct ext4_dir_entry_tail);

sb = dir->i_sb;
@@ -2153,8 +2150,7 @@ static int ext4_delete_entry(handle_t *handle,
return err;
}

- if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (ext4_has_metadata_csum(dir->i_sb))
csum_size = sizeof(struct ext4_dir_entry_tail);

BUFFER_TRACE(bh, "get_write_access");
@@ -2373,8 +2369,7 @@ static int ext4_init_new_dir(handle_t *handle, struct inode *dir,
int csum_size = 0;
int err;

- if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (ext4_has_metadata_csum(dir->i_sb))
csum_size = sizeof(struct ext4_dir_entry_tail);

if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
@@ -2554,7 +2549,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
struct ext4_iloc iloc;
int err = 0, rc;

- if (!EXT4_SB(sb)->s_journal)
+ if (!EXT4_SB(sb)->s_journal || is_bad_inode(inode))
return 0;

mutex_lock(&EXT4_SB(sb)->s_orphan_lock);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index f3b84cd..2400ad1 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1071,7 +1071,7 @@ static void update_backups(struct super_block *sb, int blk_off, char *data,
break;

if (meta_bg == 0)
- backup_block = group * bpg + blk_off;
+ backup_block = ((ext4_fsblk_t)group) * bpg + blk_off;
else
backup_block = (ext4_group_first_block_no(sb, group) +
ext4_bg_has_super(sb, group));
@@ -1200,8 +1200,7 @@ static int ext4_set_bitmap_checksums(struct super_block *sb,
{
struct buffer_head *bh;

- if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (!ext4_has_metadata_csum(sb))
return 0;

bh = ext4_get_bitmap(sb, group_data->inode_bitmap);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index a46030d..9fb3e6c 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -140,8 +140,7 @@ static __le32 ext4_superblock_csum(struct super_block *sb,
int ext4_superblock_csum_verify(struct super_block *sb,
struct ext4_super_block *es)
{
- if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (!ext4_has_metadata_csum(sb))
return 1;

return es->s_checksum == ext4_superblock_csum(sb, es);
@@ -151,8 +150,7 @@ void ext4_superblock_csum_set(struct super_block *sb)
{
struct ext4_super_block *es = EXT4_SB(sb)->s_es;

- if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (!ext4_has_metadata_csum(sb))
return;

es->s_checksum = ext4_superblock_csum(sb, es);
@@ -996,7 +994,7 @@ static struct inode *ext4_nfs_get_inode(struct super_block *sb,
* Currently we don't know the generation for parent directory, so
* a generation of 0 means "accept any"
*/
- inode = ext4_iget(sb, ino);
+ inode = ext4_iget_normal(sb, ino);
if (IS_ERR(inode))
return ERR_CAST(inode);
if (generation && inode->i_generation != generation) {
@@ -1706,13 +1704,6 @@ static int parse_options(char *options, struct super_block *sb,
"not specified");
return 0;
}
- } else {
- if (sbi->s_jquota_fmt) {
- ext4_msg(sb, KERN_ERR, "journaled quota format "
- "specified with no journaling "
- "enabled");
- return 0;
- }
}
#endif
if (test_opt(sb, DIOREAD_NOLOCK)) {
@@ -2010,8 +2001,7 @@ static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
__u16 crc = 0;
__le32 le_group = cpu_to_le32(block_group);

- if ((sbi->s_es->s_feature_ro_compat &
- cpu_to_le32(EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))) {
+ if (ext4_has_metadata_csum(sbi->s_sb)) {
/* Use new metadata_csum algorithm */
__le16 save_csum;
__u32 csum32;
@@ -2029,6 +2019,10 @@ static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
}

/* old crc16 code */
+ if (!(sbi->s_es->s_feature_ro_compat &
+ cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)))
+ return 0;
+
offset = offsetof(struct ext4_group_desc, bg_checksum);

crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
@@ -3167,8 +3161,7 @@ static int set_journal_csum_feature_set(struct super_block *sb)
int compat, incompat;
struct ext4_sb_info *sbi = EXT4_SB(sb);

- if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
+ if (ext4_has_metadata_csum(sb)) {
/* journal checksum v3 */
compat = 0;
incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3;
@@ -3475,8 +3468,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
}

/* Precompute checksum seed for all metadata */
- if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (ext4_has_metadata_csum(sb))
sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid,
sizeof(es->s_uuid));

@@ -3494,6 +3486,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
#ifdef CONFIG_EXT4_FS_POSIX_ACL
set_opt(sb, POSIX_ACL);
#endif
+ /* don't forget to enable journal_csum when metadata_csum is enabled. */
+ if (ext4_has_metadata_csum(sb))
+ set_opt(sb, JOURNAL_CHECKSUM);
+
if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
set_opt(sb, JOURNAL_DATA);
else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 298e9c8..a5d2f1b 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -141,8 +141,7 @@ static int ext4_xattr_block_csum_verify(struct inode *inode,
sector_t block_nr,
struct ext4_xattr_header *hdr)
{
- if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
+ if (ext4_has_metadata_csum(inode->i_sb) &&
(hdr->h_checksum != ext4_xattr_block_csum(inode, block_nr, hdr)))
return 0;
return 1;
@@ -152,8 +151,7 @@ static void ext4_xattr_block_csum_set(struct inode *inode,
sector_t block_nr,
struct ext4_xattr_header *hdr)
{
- if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+ if (!ext4_has_metadata_csum(inode->i_sb))
return;

hdr->h_checksum = ext4_xattr_block_csum(inode, block_nr, hdr);
@@ -189,14 +187,28 @@ ext4_listxattr(struct dentry *dentry, char *buffer, size_t size)
}

static int
-ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end)
+ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end,
+ void *value_start)
{
- while (!IS_LAST_ENTRY(entry)) {
- struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(entry);
+ struct ext4_xattr_entry *e = entry;
+
+ while (!IS_LAST_ENTRY(e)) {
+ struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(e);
if ((void *)next >= end)
return -EIO;
- entry = next;
+ e = next;
}
+
+ while (!IS_LAST_ENTRY(entry)) {
+ if (entry->e_value_size != 0 &&
+ (value_start + le16_to_cpu(entry->e_value_offs) <
+ (void *)e + sizeof(__u32) ||
+ value_start + le16_to_cpu(entry->e_value_offs) +
+ le32_to_cpu(entry->e_value_size) > end))
+ return -EIO;
+ entry = EXT4_XATTR_NEXT(entry);
+ }
+
return 0;
}

@@ -213,7 +225,8 @@ ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh)
return -EIO;
if (!ext4_xattr_block_csum_verify(inode, bh->b_blocknr, BHDR(bh)))
return -EIO;
- error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size);
+ error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size,
+ bh->b_data);
if (!error)
set_buffer_verified(bh);
return error;
@@ -329,7 +342,7 @@ ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
header = IHDR(inode, raw_inode);
entry = IFIRST(header);
end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
- error = ext4_xattr_check_names(entry, end);
+ error = ext4_xattr_check_names(entry, end, entry);
if (error)
goto cleanup;
error = ext4_xattr_find_entry(&entry, name_index, name,
@@ -457,7 +470,7 @@ ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
raw_inode = ext4_raw_inode(&iloc);
header = IHDR(inode, raw_inode);
end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
- error = ext4_xattr_check_names(IFIRST(header), end);
+ error = ext4_xattr_check_names(IFIRST(header), end, IFIRST(header));
if (error)
goto cleanup;
error = ext4_xattr_list_entries(dentry, IFIRST(header),
@@ -972,7 +985,8 @@ int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
is->s.here = is->s.first;
is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
- error = ext4_xattr_check_names(IFIRST(header), is->s.end);
+ error = ext4_xattr_check_names(IFIRST(header), is->s.end,
+ IFIRST(header));
if (error)
return error;
/* Find the named attribute. */
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 9b329b5..bcbef08 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -525,6 +525,7 @@ static int do_one_pass(journal_t *journal,
!jbd2_descr_block_csum_verify(journal,
bh->b_data)) {
err = -EIO;
+ brelse(bh);
goto failed;
}

diff --git a/fs/namei.c b/fs/namei.c
index eaabb52..ddb6721 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3128,7 +3128,8 @@ static int do_tmpfile(int dfd, struct filename *pathname,
if (error)
goto out2;
audit_inode(pathname, nd->path.dentry, 0);
- error = may_open(&nd->path, op->acc_mode, op->open_flag);
+ /* Don't check for other permissions, the inode was just created */
+ error = may_open(&nd->path, MAY_OPEN, op->open_flag);
if (error)
goto out2;
file->f_path.mnt = nd->path.mnt;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 08c8e02..25024d5 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1233,7 +1233,8 @@ static bool need_wrongsec_check(struct svc_rqst *rqstp)
*/
if (argp->opcnt == resp->opcnt)
return false;
-
+ if (next->opnum == OP_ILLEGAL)
+ return false;
nextd = OPDESC(next);
/*
* Rest of 2.6.3.1.1: certain operations will return WRONGSEC
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index ce87c90..89da957 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -637,7 +637,7 @@ int dquot_writeback_dquots(struct super_block *sb, int type)
dqstats_inc(DQST_LOOKUPS);
err = sb->dq_op->write_dquot(dquot);
if (!ret && err)
- err = ret;
+ ret = err;
dqput(dquot);
spin_lock(&dq_list_lock);
}
diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h
index bcec4c4..ca52de5 100644
--- a/include/drm/drm_pciids.h
+++ b/include/drm/drm_pciids.h
@@ -74,7 +74,6 @@
{0x1002, 0x4C64, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV250|RADEON_IS_MOBILITY}, \
{0x1002, 0x4C66, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV250|RADEON_IS_MOBILITY}, \
{0x1002, 0x4C67, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV250|RADEON_IS_MOBILITY}, \
- {0x1002, 0x4C6E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV280|RADEON_IS_MOBILITY}, \
{0x1002, 0x4E44, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R300}, \
{0x1002, 0x4E45, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R300}, \
{0x1002, 0x4E46, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R300}, \
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 24545cd..02ae99e 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -37,6 +37,9 @@
__asm__ ("" : "=r"(__ptr) : "0"(ptr)); \
(typeof(ptr)) (__ptr + (off)); })

+/* Make the optimizer believe the variable can be manipulated arbitrarily. */
+#define OPTIMIZER_HIDE_VAR(var) __asm__ ("" : "=r" (var) : "0" (var))
+
#ifdef __CHECKER__
#define __must_be_array(arr) 0
#else
diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h
index dc1bd3d..5529c52 100644
--- a/include/linux/compiler-intel.h
+++ b/include/linux/compiler-intel.h
@@ -15,6 +15,7 @@
*/
#undef barrier
#undef RELOC_HIDE
+#undef OPTIMIZER_HIDE_VAR

#define barrier() __memory_barrier()

@@ -23,6 +24,12 @@
__ptr = (unsigned long) (ptr); \
(typeof(ptr)) (__ptr + (off)); })

+/* This should act as an optimization barrier on var.
+ * Given that this compiler does not have inline assembly, a compiler barrier
+ * is the best we can do.
+ */
+#define OPTIMIZER_HIDE_VAR(var) barrier()
+
/* Intel ECC compiler doesn't support __builtin_types_compatible_p() */
#define __must_be_array(a) 0

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 92669cd..a2329c5 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -170,6 +170,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
(typeof(ptr)) (__ptr + (off)); })
#endif

+#ifndef OPTIMIZER_HIDE_VAR
+#define OPTIMIZER_HIDE_VAR(var) barrier()
+#endif
+
/* Not-quite-unique ID. */
#ifndef __UNIQUE_ID
# define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __LINE__)
diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h
index 6b394f0..eeb3079 100644
--- a/include/linux/khugepaged.h
+++ b/include/linux/khugepaged.h
@@ -6,7 +6,8 @@
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
extern int __khugepaged_enter(struct mm_struct *mm);
extern void __khugepaged_exit(struct mm_struct *mm);
-extern int khugepaged_enter_vma_merge(struct vm_area_struct *vma);
+extern int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
+ unsigned long vm_flags);

#define khugepaged_enabled() \
(transparent_hugepage_flags & \
@@ -35,13 +36,13 @@ static inline void khugepaged_exit(struct mm_struct *mm)
__khugepaged_exit(mm);
}

-static inline int khugepaged_enter(struct vm_area_struct *vma)
+static inline int khugepaged_enter(struct vm_area_struct *vma,
+ unsigned long vm_flags)
{
if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags))
if ((khugepaged_always() ||
- (khugepaged_req_madv() &&
- vma->vm_flags & VM_HUGEPAGE)) &&
- !(vma->vm_flags & VM_NOHUGEPAGE))
+ (khugepaged_req_madv() && (vm_flags & VM_HUGEPAGE))) &&
+ !(vm_flags & VM_NOHUGEPAGE))
if (__khugepaged_enter(vma->vm_mm))
return -ENOMEM;
return 0;
@@ -54,11 +55,13 @@ static inline int khugepaged_fork(struct mm_struct *mm, struct mm_struct *oldmm)
static inline void khugepaged_exit(struct mm_struct *mm)
{
}
-static inline int khugepaged_enter(struct vm_area_struct *vma)
+static inline int khugepaged_enter(struct vm_area_struct *vma,
+ unsigned long vm_flags)
{
return 0;
}
-static inline int khugepaged_enter_vma_merge(struct vm_area_struct *vma)
+static inline int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
+ unsigned long vm_flags)
{
return 0;
}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4506b84..6a47519 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1087,6 +1087,7 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,

extern void truncate_pagecache(struct inode *inode, loff_t new);
extern void truncate_setsize(struct inode *inode, loff_t newsize);
+void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to);
void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end);
int truncate_inode_page(struct address_space *mapping, struct page *page);
int generic_error_remove_page(struct address_space *mapping, struct page *page);
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 4cd6267..17f0949 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -50,6 +50,9 @@ static inline bool oom_task_origin(const struct task_struct *p)
extern unsigned long oom_badness(struct task_struct *p,
struct mem_cgroup *memcg, const nodemask_t *nodemask,
unsigned long totalpages);
+
+extern int oom_kills_count(void);
+extern void note_oom_kill(void);
extern void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
unsigned int points, unsigned long totalpages,
struct mem_cgroup *memcg, nodemask_t *nodemask,
diff --git a/include/linux/string.h b/include/linux/string.h
index ac889c5..0ed878d 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -129,7 +129,7 @@ int bprintf(u32 *bin_buf, size_t size, const char *fmt, ...) __printf(3, 4);
#endif

extern ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos,
- const void *from, size_t available);
+ const void *from, size_t available);

/**
* strstarts - does @str start with @prefix?
@@ -141,7 +141,8 @@ static inline bool strstarts(const char *str, const char *prefix)
return strncmp(str, prefix, strlen(prefix)) == 0;
}

-extern size_t memweight(const void *ptr, size_t bytes);
+size_t memweight(const void *ptr, size_t bytes);
+void memzero_explicit(void *s, size_t count);

/**
* kbasename - return the last part of a pathname.
diff --git a/include/net/dst.h b/include/net/dst.h
index 44995c1..4b368ae 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -467,6 +467,7 @@ void dst_init(void);
/* Flags for xfrm_lookup flags argument. */
enum {
XFRM_LOOKUP_ICMP = 1 << 0,
+ XFRM_LOOKUP_QUEUE = 1 << 1,
};

struct flowi;
@@ -477,7 +478,16 @@ static inline struct dst_entry *xfrm_lookup(struct net *net,
int flags)
{
return dst_orig;
-}
+}
+
+static inline struct dst_entry *xfrm_lookup_route(struct net *net,
+ struct dst_entry *dst_orig,
+ const struct flowi *fl,
+ struct sock *sk,
+ int flags)
+{
+ return dst_orig;
+}

static inline struct xfrm_state *dst_xfrm(const struct dst_entry *dst)
{
@@ -489,6 +499,10 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
const struct flowi *fl, struct sock *sk,
int flags);

+struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig,
+ const struct flowi *fl, struct sock *sk,
+ int flags);
+
/* skb attached with this dst needs transformation if dst->xfrm is valid */
static inline struct xfrm_state *dst_xfrm(const struct dst_entry *dst)
{
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index c55aeed..cf92728 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -62,6 +62,7 @@ struct inet_connection_sock_af_ops {
void (*addr2sockaddr)(struct sock *sk, struct sockaddr *);
int (*bind_conflict)(const struct sock *sk,
const struct inet_bind_bucket *tb, bool relax);
+ void (*mtu_reduced)(struct sock *sk);
};

/** inet_connection_sock - INET connection oriented sock
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 7ceed99..3e95bff 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2546,6 +2546,7 @@ enum ieee80211_roc_type {
* of queues to flush, which is useful if different virtual interfaces
* use different hardware queues; it may also indicate all queues.
* If the parameter @drop is set to %true, pending frames may be dropped.
+ * Note that vif can be NULL.
* The callback can sleep.
*
* @channel_switch: Drivers that need (or want) to offload the channel
@@ -2809,7 +2810,8 @@ struct ieee80211_ops {
struct netlink_callback *cb,
void *data, int len);
#endif
- void (*flush)(struct ieee80211_hw *hw, u32 queues, bool drop);
+ void (*flush)(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+ u32 queues, bool drop);
void (*channel_switch)(struct ieee80211_hw *hw,
struct ieee80211_channel_switch *ch_switch);
int (*napi_poll)(struct ieee80211_hw *hw, int budget);
diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h
index 832f219..c3f0cd9 100644
--- a/include/net/sctp/command.h
+++ b/include/net/sctp/command.h
@@ -116,7 +116,7 @@ typedef enum {
* analysis of the state functions, but in reality just taken from
* thin air in the hopes othat we don't trigger a kernel panic.
*/
-#define SCTP_MAX_NUM_COMMANDS 14
+#define SCTP_MAX_NUM_COMMANDS 20

typedef union {
__s32 i32;
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index c5fe806..d5bc97e 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -453,6 +453,11 @@ static inline void sctp_assoc_pending_pmtu(struct sock *sk, struct sctp_associat
asoc->pmtu_pending = 0;
}

+static inline bool sctp_chunk_pending(const struct sctp_chunk *chunk)
+{
+ return !list_empty(&chunk->list);
+}
+
/* Walk through a list of TLV parameters. Don't trust the
* individual parameter lengths and instead depend on
* the chunk length to indicate when to stop. Make sure
diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index 4ef75af..c91b6f5 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -249,9 +249,9 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *,
int, __be16);
struct sctp_chunk *sctp_make_asconf_set_prim(struct sctp_association *asoc,
union sctp_addr *addr);
-int sctp_verify_asconf(const struct sctp_association *asoc,
- struct sctp_paramhdr *param_hdr, void *chunk_end,
- struct sctp_paramhdr **errp);
+bool sctp_verify_asconf(const struct sctp_association *asoc,
+ struct sctp_chunk *chunk, bool addr_param_needed,
+ struct sctp_paramhdr **errp);
struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc,
struct sctp_chunk *asconf);
int sctp_process_asconf_ack(struct sctp_association *asoc,
diff --git a/include/net/sock.h b/include/net/sock.h
index 749bad5..5db5b7f 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -950,7 +950,6 @@ struct proto {
struct sk_buff *skb);

void (*release_cb)(struct sock *sk);
- void (*mtu_reduced)(struct sock *sk);

/* Keeping track of sk's, looking them up, and port selection methods. */
void (*hash)(struct sock *sk);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 920fc2e..a9b7191 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -450,6 +450,7 @@ const u8 *tcp_parse_md5sig_option(const struct tcphdr *th);
*/

void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb);
+void tcp_v4_mtu_reduced(struct sock *sk);
int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
struct sock *tcp_create_openreq_child(struct sock *sk,
struct request_sock *req,
@@ -720,8 +721,10 @@ struct tcp_skb_cb {
#define TCPCB_SACKED_RETRANS 0x02 /* SKB retransmitted */
#define TCPCB_LOST 0x04 /* SKB is lost */
#define TCPCB_TAGBITS 0x07 /* All tag bits */
+#define TCPCB_REPAIRED 0x10 /* SKB repaired (no skb_mstamp) */
#define TCPCB_EVER_RETRANS 0x80 /* Ever retransmitted frame */
-#define TCPCB_RETRANS (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS)
+#define TCPCB_RETRANS (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS| \
+ TCPCB_REPAIRED)

__u8 ip_dsfield; /* IPv4 tos or IPv6 dsfield */
/* 1 byte hole */
diff --git a/kernel/freezer.c b/kernel/freezer.c
index aa6a8aa..8f9279b 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -42,6 +42,9 @@ bool freezing_slow_path(struct task_struct *p)
if (p->flags & (PF_NOFREEZE | PF_SUSPEND_TASK))
return false;

+ if (test_thread_flag(TIF_MEMDIE))
+ return false;
+
if (pm_nosig_freezing || cgroup_freezing(p))
return true;

diff --git a/kernel/futex.c b/kernel/futex.c
index 2b1583e..7947e4c 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -492,8 +492,14 @@ static struct futex_pi_state * alloc_pi_state(void)
return pi_state;
}

+/*
+ * Must be called with the hb lock held.
+ */
static void free_pi_state(struct futex_pi_state *pi_state)
{
+ if (!pi_state)
+ return;
+
if (!atomic_dec_and_test(&pi_state->refcount))
return;

@@ -1407,15 +1413,6 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
}

retry:
- if (pi_state != NULL) {
- /*
- * We will have to lookup the pi_state again, so free this one
- * to keep the accounting correct.
- */
- free_pi_state(pi_state);
- pi_state = NULL;
- }
-
ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ);
if (unlikely(ret != 0))
goto out;
@@ -1503,6 +1500,8 @@ retry_private:
case 0:
break;
case -EFAULT:
+ free_pi_state(pi_state);
+ pi_state = NULL;
double_unlock_hb(hb1, hb2);
put_futex_key(&key2);
put_futex_key(&key1);
@@ -1512,6 +1511,8 @@ retry_private:
goto out;
case -EAGAIN:
/* The owner was exiting, try again. */
+ free_pi_state(pi_state);
+ pi_state = NULL;
double_unlock_hb(hb1, hb2);
put_futex_key(&key2);
put_futex_key(&key1);
@@ -1588,6 +1589,7 @@ retry_private:
}

out_unlock:
+ free_pi_state(pi_state);
double_unlock_hb(hb1, hb2);

/*
@@ -1604,8 +1606,6 @@ out_put_keys:
out_put_key1:
put_futex_key(&key1);
out:
- if (pi_state != NULL)
- free_pi_state(pi_state);
return ret ? ret : task_count;
}

diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 424c2d4..77e6b83 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -634,6 +634,7 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
goto out;
}
} else {
+ memset(&event.sigev_value, 0, sizeof(event.sigev_value));
event.sigev_notify = SIGEV_SIGNAL;
event.sigev_signo = SIGALRM;
event.sigev_value.sival_int = new_timer->it_id;
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 0121dab..7ef5244 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -491,8 +491,14 @@ int hibernation_restore(int platform_mode)
error = dpm_suspend_start(PMSG_QUIESCE);
if (!error) {
error = resume_target_kernel(platform_mode);
- dpm_resume_end(PMSG_RECOVER);
+ /*
+ * The above should either succeed and jump to the new kernel,
+ * or return with an error. Otherwise things are just
+ * undefined, so let's be paranoid.
+ */
+ BUG_ON(!error);
}
+ dpm_resume_end(PMSG_RECOVER);
pm_restore_gfp_mask();
ftrace_start();
resume_console();
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 14f9a8d..f1fe7ec 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -107,6 +107,28 @@ static int try_to_freeze_tasks(bool user_only)
return todo ? -EBUSY : 0;
}

+/*
+ * Returns true if all freezable tasks (except for current) are frozen already
+ */
+static bool check_frozen_processes(void)
+{
+ struct task_struct *g, *p;
+ bool ret = true;
+
+ read_lock(&tasklist_lock);
+ for_each_process_thread(g, p) {
+ if (p != current && !freezer_should_skip(p) &&
+ !frozen(p)) {
+ ret = false;
+ goto done;
+ }
+ }
+done:
+ read_unlock(&tasklist_lock);
+
+ return ret;
+}
+
/**
* freeze_processes - Signal user space processes to enter the refrigerator.
* The current thread will not be frozen. The same process that calls
@@ -117,6 +139,7 @@ static int try_to_freeze_tasks(bool user_only)
int freeze_processes(void)
{
int error;
+ int oom_kills_saved;

error = __usermodehelper_disable(UMH_FREEZING);
if (error)
@@ -130,12 +153,27 @@ int freeze_processes(void)

printk("Freezing user space processes ... ");
pm_freezing = true;
+ oom_kills_saved = oom_kills_count();
error = try_to_freeze_tasks(true);
if (!error) {
- printk("done.");
__usermodehelper_set_disable_depth(UMH_DISABLED);
oom_killer_disable();
+
+ /*
+ * There might have been an OOM kill while we were
+ * freezing tasks and the killed task might be still
+ * on the way out so we have to double check for race.
+ */
+ if (oom_kills_count() != oom_kills_saved &&
+ !check_frozen_processes()) {
+ __usermodehelper_set_disable_depth(UMH_ENABLED);
+ printk("OOM in progress.");
+ error = -EBUSY;
+ goto done;
+ }
+ printk("done.");
}
+done:
printk("\n");
BUG_ON(in_atomic());

diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index ea90eb5..7708669 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -313,7 +313,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
int size;

syscall_nr = trace_get_syscall_nr(current, regs);
- if (syscall_nr < 0)
+ if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
return;

/* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE) */
@@ -361,7 +361,7 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
int syscall_nr;

syscall_nr = trace_get_syscall_nr(current, regs);
- if (syscall_nr < 0)
+ if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
return;

/* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE()) */
@@ -569,7 +569,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
int size;

syscall_nr = trace_get_syscall_nr(current, regs);
- if (syscall_nr < 0)
+ if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
return;
if (!test_bit(syscall_nr, enabled_perf_enter_syscalls))
return;
@@ -643,7 +643,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
int size;

syscall_nr = trace_get_syscall_nr(current, regs);
- if (syscall_nr < 0)
+ if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
return;
if (!test_bit(syscall_nr, enabled_perf_exit_syscalls))
return;
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 06f7e4f..e5c4ebe 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -131,7 +131,9 @@ void __bitmap_shift_right(unsigned long *dst,
lower = src[off + k];
if (left && off + k == lim - 1)
lower &= mask;
- dst[k] = upper << (BITS_PER_LONG - rem) | lower >> rem;
+ dst[k] = lower >> rem;
+ if (rem)
+ dst[k] |= upper << (BITS_PER_LONG - rem);
if (left && k == lim - 1)
dst[k] &= mask;
}
@@ -172,7 +174,9 @@ void __bitmap_shift_left(unsigned long *dst,
upper = src[k];
if (left && k == lim - 1)
upper &= (1UL << left) - 1;
- dst[k + off] = lower >> (BITS_PER_LONG - rem) | upper << rem;
+ dst[k + off] = upper << rem;
+ if (rem)
+ dst[k + off] |= lower >> (BITS_PER_LONG - rem);
if (left && k + off == lim - 1)
dst[k + off] &= (1UL << left) - 1;
}
diff --git a/lib/string.c b/lib/string.c
index e5878de..43d0781 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -586,6 +586,22 @@ void *memset(void *s, int c, size_t count)
EXPORT_SYMBOL(memset);
#endif

+/**
+ * memzero_explicit - Fill a region of memory (e.g. sensitive
+ * keying data) with 0s.
+ * @s: Pointer to the start of the area.
+ * @count: The size of the area.
+ *
+ * memzero_explicit() doesn't need an arch-specific version as
+ * it just invokes the one of memset() implicitly.
+ */
+void memzero_explicit(void *s, size_t count)
+{
+ memset(s, 0, count);
+ OPTIMIZER_HIDE_VAR(s);
+}
+EXPORT_SYMBOL(memzero_explicit);
+
#ifndef __HAVE_ARCH_MEMCPY
/**
* memcpy - Copy one area of memory to another
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 64a7f9c..65aa131 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -193,7 +193,7 @@ retry:
preempt_disable();
if (cmpxchg(&huge_zero_page, NULL, zero_page)) {
preempt_enable();
- __free_page(zero_page);
+ __free_pages(zero_page, compound_order(zero_page));
goto retry;
}

@@ -225,7 +225,7 @@ static unsigned long shrink_huge_zero_page_scan(struct shrinker *shrink,
if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) {
struct page *zero_page = xchg(&huge_zero_page, NULL);
BUG_ON(zero_page == NULL);
- __free_page(zero_page);
+ __free_pages(zero_page, compound_order(zero_page));
return HPAGE_PMD_NR;
}

@@ -788,7 +788,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
return VM_FAULT_FALLBACK;
if (unlikely(anon_vma_prepare(vma)))
return VM_FAULT_OOM;
- if (unlikely(khugepaged_enter(vma)))
+ if (unlikely(khugepaged_enter(vma, vma->vm_flags)))
return VM_FAULT_OOM;
if (!(flags & FAULT_FLAG_WRITE) &&
transparent_hugepage_use_zero_page()) {
@@ -1993,7 +1993,7 @@ int hugepage_madvise(struct vm_area_struct *vma,
* register it here without waiting a page fault that
* may not happen any time soon.
*/
- if (unlikely(khugepaged_enter_vma_merge(vma)))
+ if (unlikely(khugepaged_enter_vma_merge(vma, *vm_flags)))
return -ENOMEM;
break;
case MADV_NOHUGEPAGE:
@@ -2094,7 +2094,8 @@ int __khugepaged_enter(struct mm_struct *mm)
return 0;
}

-int khugepaged_enter_vma_merge(struct vm_area_struct *vma)
+int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
+ unsigned long vm_flags)
{
unsigned long hstart, hend;
if (!vma->anon_vma)
@@ -2106,11 +2107,11 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma)
if (vma->vm_ops)
/* khugepaged not yet working on file or special mappings */
return 0;
- VM_BUG_ON(vma->vm_flags & VM_NO_THP);
+ VM_BUG_ON(vm_flags & VM_NO_THP);
hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
hend = vma->vm_end & HPAGE_PMD_MASK;
if (hstart < hend)
- return khugepaged_enter(vma);
+ return khugepaged_enter(vma, vm_flags);
return 0;
}

diff --git a/mm/mmap.c b/mm/mmap.c
index 546db74..32dbf6d 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1046,7 +1046,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
end, prev->vm_pgoff, NULL);
if (err)
return NULL;
- khugepaged_enter_vma_merge(prev);
+ khugepaged_enter_vma_merge(prev, vm_flags);
return prev;
}

@@ -1065,7 +1065,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
next->vm_pgoff - pglen, NULL);
if (err)
return NULL;
- khugepaged_enter_vma_merge(area);
+ khugepaged_enter_vma_merge(area, vm_flags);
return area;
}

@@ -2155,7 +2155,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
}
}
vma_unlock_anon_vma(vma);
- khugepaged_enter_vma_merge(vma);
+ khugepaged_enter_vma_merge(vma, vma->vm_flags);
validate_mm(vma->vm_mm);
return error;
}
@@ -2224,7 +2224,7 @@ int expand_downwards(struct vm_area_struct *vma,
}
}
vma_unlock_anon_vma(vma);
- khugepaged_enter_vma_merge(vma);
+ khugepaged_enter_vma_merge(vma, vma->vm_flags);
validate_mm(vma->vm_mm);
return error;
}
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 616b2eb..9c01afd 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -394,6 +394,23 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
dump_tasks(memcg, nodemask);
}

+/*
+ * Number of OOM killer invocations (including memcg OOM killer).
+ * Primarily used by PM freezer to check for potential races with
+ * OOM killed frozen task.
+ */
+static atomic_t oom_kills = ATOMIC_INIT(0);
+
+int oom_kills_count(void)
+{
+ return atomic_read(&oom_kills);
+}
+
+void note_oom_kill(void)
+{
+ atomic_inc(&oom_kills);
+}
+
#define K(x) ((x) << (PAGE_SHIFT-10))
/*
* Must be called while holding a reference to p, which will be released upon
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 38dca81..83a20df 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2178,6 +2178,14 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
}

/*
+ * PM-freezer should be notified that there might be an OOM killer on
+ * its way to kill and wake somebody up. This is too early and we might
+ * end up not killing anything but false positives are acceptable.
+ * See freeze_processes.
+ */
+ note_oom_kill();
+
+ /*
* Go through the zonelist yet one more time, keep very high watermark
* here, this is only to catch a parallel oom killing, we must fail if
* we're still under heavy pressure.
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index 6d757e3a..e007236 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -170,6 +170,7 @@ static void free_page_cgroup(void *addr)
sizeof(struct page_cgroup) * PAGES_PER_SECTION;

BUG_ON(PageReserved(page));
+ kmemleak_free(addr);
free_pages_exact(addr, table_size);
}
}
diff --git a/mm/truncate.c b/mm/truncate.c
index 353b683..855c38c 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -20,6 +20,7 @@
#include <linux/buffer_head.h> /* grr. try_to_release_page,
do_invalidatepage */
#include <linux/cleancache.h>
+#include <linux/rmap.h>
#include "internal.h"


@@ -613,12 +614,68 @@ EXPORT_SYMBOL(truncate_pagecache);
*/
void truncate_setsize(struct inode *inode, loff_t newsize)
{
+ loff_t oldsize = inode->i_size;
+
i_size_write(inode, newsize);
+ if (newsize > oldsize)
+ pagecache_isize_extended(inode, oldsize, newsize);
truncate_pagecache(inode, newsize);
}
EXPORT_SYMBOL(truncate_setsize);

/**
+ * pagecache_isize_extended - update pagecache after extension of i_size
+ * @inode: inode for which i_size was extended
+ * @from: original inode size
+ * @to: new inode size
+ *
+ * Handle extension of inode size either caused by extending truncate or by
+ * write starting after current i_size. We mark the page straddling current
+ * i_size RO so that page_mkwrite() is called on the nearest write access to
+ * the page. This way filesystem can be sure that page_mkwrite() is called on
+ * the page before user writes to the page via mmap after the i_size has been
+ * changed.
+ *
+ * The function must be called after i_size is updated so that page fault
+ * coming after we unlock the page will already see the new i_size.
+ * The function must be called while we still hold i_mutex - this not only
+ * makes sure i_size is stable but also that userspace cannot observe new
+ * i_size value before we are prepared to store mmap writes at new inode size.
+ */
+void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to)
+{
+ int bsize = 1 << inode->i_blkbits;
+ loff_t rounded_from;
+ struct page *page;
+ pgoff_t index;
+
+ WARN_ON(!mutex_is_locked(&inode->i_mutex));
+ WARN_ON(to > inode->i_size);
+
+ if (from >= to || bsize == PAGE_CACHE_SIZE)
+ return;
+ /* Page straddling @from will not have any hole block created? */
+ rounded_from = round_up(from, bsize);
+ if (to <= rounded_from || !(rounded_from & (PAGE_CACHE_SIZE - 1)))
+ return;
+
+ index = from >> PAGE_CACHE_SHIFT;
+ page = find_lock_page(inode->i_mapping, index);
+ /* Page not cached? Nothing to do */
+ if (!page)
+ return;
+ /*
+ * See clear_page_dirty_for_io() for details why set_page_dirty()
+ * is needed.
+ */
+ if (page_mkclean(page))
+ set_page_dirty(page);
+ unlock_page(page);
+ page_cache_release(page);
+}
+EXPORT_SYMBOL(pagecache_isize_extended);
+
+/**
* truncate_pagecache_range - unmap and remove pagecache that is hole-punched
* @inode: inode
* @lstart: offset of beginning of hole
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 280f601..cd37763 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -309,6 +309,9 @@ struct br_input_skb_cb {
int igmp;
int mrouters_only;
#endif
+#ifdef CONFIG_BRIDGE_VLAN_FILTERING
+ bool vlan_filtered;
+#endif
};

#define BR_INPUT_SKB_CB(__skb) ((struct br_input_skb_cb *)(__skb)->cb)
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 4f5341a..a22abd1 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -141,7 +141,8 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br,
{
u16 vid;

- if (!br->vlan_enabled)
+ /* If this packet was not filtered at input, let it pass */
+ if (!BR_INPUT_SKB_CB(skb)->vlan_filtered)
goto out;

/* At this point, we know that the frame was filtered and contains
@@ -186,8 +187,10 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
/* If VLAN filtering is disabled on the bridge, all packets are
* permitted.
*/
- if (!br->vlan_enabled)
+ if (!br->vlan_enabled) {
+ BR_INPUT_SKB_CB(skb)->vlan_filtered = false;
return true;
+ }

/* If there are no vlan in the permitted list, all packets are
* rejected.
@@ -195,6 +198,8 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
if (!v)
goto drop;

+ BR_INPUT_SKB_CB(skb)->vlan_filtered = true;
+
err = br_vlan_get_tag(skb, vid);
if (!*vid) {
u16 pvid = br_get_pvid(v);
@@ -239,7 +244,8 @@ bool br_allowed_egress(struct net_bridge *br,
{
u16 vid;

- if (!br->vlan_enabled)
+ /* If this packet was not filtered at input, let it pass */
+ if (!BR_INPUT_SKB_CB(skb)->vlan_filtered)
return true;

if (!v)
@@ -258,6 +264,7 @@ bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid)
struct net_bridge *br = p->br;
struct net_port_vlans *v;

+ /* If filtering was disabled at input, let it pass. */
if (!br->vlan_enabled)
return true;

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index a7b6520..8463178 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -739,7 +739,8 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev,
(nla_total_size(sizeof(struct ifla_vf_mac)) +
nla_total_size(sizeof(struct ifla_vf_vlan)) +
nla_total_size(sizeof(struct ifla_vf_tx_rate)) +
- nla_total_size(sizeof(struct ifla_vf_spoofchk)));
+ nla_total_size(sizeof(struct ifla_vf_spoofchk)) +
+ nla_total_size(sizeof(struct ifla_vf_link_state)));
return size;
} else
return 0;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 808a270..ddbb949 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3043,6 +3043,9 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD;
goto done;
}
+ /* switch back to head shinfo */
+ pinfo = skb_shinfo(p);
+
if (pinfo->frag_list)
goto merge;
if (skb_gro_len(p) != pinfo->gso_size)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index caadba0..4107687 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2270,9 +2270,9 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
return rt;

if (flp4->flowi4_proto)
- rt = (struct rtable *) xfrm_lookup(net, &rt->dst,
- flowi4_to_flowi(flp4),
- sk, 0);
+ rt = (struct rtable *)xfrm_lookup_route(net, &rt->dst,
+ flowi4_to_flowi(flp4),
+ sk, 0);

return rt;
}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e7a02d8..e06e1df 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1132,13 +1132,6 @@ new_segment:
goto wait_for_memory;

/*
- * All packets are restored as if they have
- * already been sent.
- */
- if (tp->repair)
- TCP_SKB_CB(skb)->when = tcp_time_stamp;
-
- /*
* Check whether we can use HW checksum.
*/
if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
@@ -1147,6 +1140,13 @@ new_segment:
skb_entail(sk, skb);
copy = size_goal;
max = size_goal;
+
+ /* All packets are restored as if they have
+ * already been sent. skb_mstamp isn't set to
+ * avoid wrong rtt estimation.
+ */
+ if (tp->repair)
+ TCP_SKB_CB(skb)->sacked |= TCPCB_REPAIRED;
}

/* Try to append data to the end of skb. */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index abd367b..2ab6b821 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2674,7 +2674,6 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
*/
static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
{
- struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
bool recovered = !before(tp->snd_una, tp->high_seq);

@@ -2700,12 +2699,9 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)

if (recovered) {
/* F-RTO RFC5682 sec 3.1 step 2.a and 1st part of step 3.a */
- icsk->icsk_retransmits = 0;
tcp_try_undo_recovery(sk);
return;
}
- if (flag & FLAG_DATA_ACKED)
- icsk->icsk_retransmits = 0;
if (tcp_is_reno(tp)) {
/* A Reno DUPACK means new data in F-RTO step 2.b above are
* delivered. Lower inflight to clock out (re)tranmissions.
@@ -3394,8 +3390,10 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
tcp_rearm_rto(sk);

- if (after(ack, prior_snd_una))
+ if (after(ack, prior_snd_una)) {
flag |= FLAG_SND_UNA_ADVANCED;
+ icsk->icsk_retransmits = 0;
+ }

prior_fackets = tp->fackets_out;
prior_in_flight = tcp_packets_in_flight(tp);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 77fe507..9b726c0 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -268,7 +268,7 @@ EXPORT_SYMBOL(tcp_v4_connect);
* It can be called through tcp_release_cb() if socket was owned by user
* at the time tcp_v4_err() was called to handle ICMP message.
*/
-static void tcp_v4_mtu_reduced(struct sock *sk)
+void tcp_v4_mtu_reduced(struct sock *sk)
{
struct dst_entry *dst;
struct inet_sock *inet = inet_sk(sk);
@@ -299,6 +299,7 @@ static void tcp_v4_mtu_reduced(struct sock *sk)
tcp_simple_retransmit(sk);
} /* else let the usual retransmit timer handle it */
}
+EXPORT_SYMBOL(tcp_v4_mtu_reduced);

static void do_redirect(struct sk_buff *skb, struct sock *sk)
{
@@ -2107,6 +2108,7 @@ const struct inet_connection_sock_af_ops ipv4_specific = {
.compat_setsockopt = compat_ip_setsockopt,
.compat_getsockopt = compat_ip_getsockopt,
#endif
+ .mtu_reduced = tcp_v4_mtu_reduced,
};
EXPORT_SYMBOL(ipv4_specific);

@@ -2721,7 +2723,6 @@ struct proto tcp_prot = {
.sendpage = tcp_sendpage,
.backlog_rcv = tcp_v4_do_rcv,
.release_cb = tcp_release_cb,
- .mtu_reduced = tcp_v4_mtu_reduced,
.hash = inet_hash,
.unhash = inet_unhash,
.get_port = inet_csk_get_port,
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index e3688a4..395f909 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -785,7 +785,7 @@ void tcp_release_cb(struct sock *sk)
__sock_put(sk);
}
if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) {
- sk->sk_prot->mtu_reduced(sk);
+ inet_csk(sk)->icsk_af_ops->mtu_reduced(sk);
__sock_put(sk);
}
}
@@ -1856,8 +1856,11 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
BUG_ON(!tso_segs);

- if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE)
+ if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
+ /* "when" is used as a start point for the retransmit timer */
+ TCP_SKB_CB(skb)->when = tcp_time_stamp;
goto repair; /* Skip network transmission */
+ }

cwnd_quota = tcp_cwnd_test(tp, skb);
if (!cwnd_quota) {
@@ -2045,9 +2048,7 @@ void tcp_send_loss_probe(struct sock *sk)
if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
goto rearm_timer;

- /* Probe with zero data doesn't trigger fast recovery. */
- if (skb->len > 0)
- err = __tcp_retransmit_skb(sk, skb);
+ err = __tcp_retransmit_skb(sk, skb);

/* Record snd_nxt for loss detection. */
if (likely(!err))
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 01ee297..bbb5ccd 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -790,7 +790,7 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
encap_limit = t->parms.encap_limit;

memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
- fl6.flowi6_proto = IPPROTO_IPIP;
+ fl6.flowi6_proto = IPPROTO_GRE;

dsfield = ipv4_get_dsfield(iph);

@@ -840,7 +840,7 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
encap_limit = t->parms.encap_limit;

memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
- fl6.flowi6_proto = IPPROTO_IPV6;
+ fl6.flowi6_proto = IPPROTO_GRE;

dsfield = ipv6_get_dsfield(ipv6h);
if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 6622e14..e34ca3d 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -991,7 +991,7 @@ struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
if (can_sleep)
fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;

- return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
+ return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
}
EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);

@@ -1027,7 +1027,7 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
if (can_sleep)
fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;

- return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
+ return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
}
EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);

diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 8f59675..9568c1f 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -101,19 +101,19 @@ static struct ip_tunnel *ipip6_tunnel_lookup(struct net *net,
for_each_ip_tunnel_rcu(t, sitn->tunnels_r_l[h0 ^ h1]) {
if (local == t->parms.iph.saddr &&
remote == t->parms.iph.daddr &&
- (!dev || !t->parms.link || dev->iflink == t->parms.link) &&
+ (!dev || !t->parms.link || dev->ifindex == t->parms.link) &&
(t->dev->flags & IFF_UP))
return t;
}
for_each_ip_tunnel_rcu(t, sitn->tunnels_r[h0]) {
if (remote == t->parms.iph.daddr &&
- (!dev || !t->parms.link || dev->iflink == t->parms.link) &&
+ (!dev || !t->parms.link || dev->ifindex == t->parms.link) &&
(t->dev->flags & IFF_UP))
return t;
}
for_each_ip_tunnel_rcu(t, sitn->tunnels_l[h1]) {
if (local == t->parms.iph.saddr &&
- (!dev || !t->parms.link || dev->iflink == t->parms.link) &&
+ (!dev || !t->parms.link || dev->ifindex == t->parms.link) &&
(t->dev->flags & IFF_UP))
return t;
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 282874b..d83045c 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1640,6 +1640,7 @@ static const struct inet_connection_sock_af_ops ipv6_specific = {
.compat_setsockopt = compat_ipv6_setsockopt,
.compat_getsockopt = compat_ipv6_getsockopt,
#endif
+ .mtu_reduced = tcp_v4_mtu_reduced,
};

#ifdef CONFIG_TCP_MD5SIG
@@ -1671,6 +1672,7 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = {
.compat_setsockopt = compat_ipv6_setsockopt,
.compat_getsockopt = compat_ipv6_getsockopt,
#endif
+ .mtu_reduced = tcp_v6_mtu_reduced,
};

#ifdef CONFIG_TCP_MD5SIG
@@ -1907,7 +1909,6 @@ struct proto tcpv6_prot = {
.sendpage = tcp_sendpage,
.backlog_rcv = tcp_v6_do_rcv,
.release_cb = tcp_release_cb,
- .mtu_reduced = tcp_v6_mtu_reduced,
.hash = tcp_v6_hash,
.unhash = inet_unhash,
.get_port = inet_csk_get_port,
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index cfa1406..e6c6d81 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -756,7 +756,8 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
/* If PMTU discovery was enabled, use the MTU that was discovered */
dst = sk_dst_get(tunnel->sock);
if (dst != NULL) {
- u32 pmtu = dst_mtu(__sk_dst_get(tunnel->sock));
+ u32 pmtu = dst_mtu(dst);
+
if (pmtu != 0)
session->mtu = session->mru = pmtu -
PPPOL2TP_HEADER_OVERHEAD;
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 5d03c47..8f12800 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -722,13 +722,19 @@ static inline void drv_rfkill_poll(struct ieee80211_local *local)
}

static inline void drv_flush(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
u32 queues, bool drop)
{
+ struct ieee80211_vif *vif = sdata ? &sdata->vif : NULL;
+
might_sleep();

+ if (sdata)
+ check_sdata_in_driver(sdata);
+
trace_drv_flush(local, queues, drop);
if (local->ops->flush)
- local->ops->flush(&local->hw, queues, drop);
+ local->ops->flush(&local->hw, vif, queues, drop);
trace_drv_return_void(local);
}

diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 22b223f..74350c3 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -462,7 +462,7 @@ static void rate_fixup_ratelist(struct ieee80211_vif *vif,
*/
if (!(rates[0].flags & IEEE80211_TX_RC_MCS)) {
u32 basic_rates = vif->bss_conf.basic_rates;
- s8 baserate = basic_rates ? ffs(basic_rates - 1) : 0;
+ s8 baserate = basic_rates ? ffs(basic_rates) - 1 : 0;

rate = &sband->bitrates[rates[0].idx];

diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 9f9b9bd..32cbc9e 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -558,7 +558,7 @@ void ieee80211_flush_queues(struct ieee80211_local *local,
ieee80211_stop_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP,
IEEE80211_QUEUE_STOP_REASON_FLUSH);

- drv_flush(local, queues, false);
+ drv_flush(local, sdata, queues, false);

ieee80211_wake_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP,
IEEE80211_QUEUE_STOP_REASON_FLUSH);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index dbba678..cb5b7e0 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -204,7 +204,7 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb,
if (nskb) {
nskb->dev = dev;
nskb->protocol = htons((u16) sk->sk_protocol);
-
+ skb_reset_network_header(nskb);
ret = dev_queue_xmit(nskb);
if (unlikely(ret > 0))
ret = net_xmit_errno(ret);
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 65cfaa8..07c4ae3 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -42,6 +42,9 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,

static int make_writable(struct sk_buff *skb, int write_len)
{
+ if (!pskb_may_pull(skb, write_len))
+ return -ENOMEM;
+
if (!skb_cloned(skb) || skb_clone_writable(skb, write_len))
return 0;

@@ -70,6 +73,8 @@ static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci)

vlan_set_encap_proto(skb, vhdr);
skb->mac_header += VLAN_HLEN;
+ if (skb_network_offset(skb) < ETH_HLEN)
+ skb_set_network_header(skb, ETH_HLEN);
skb_reset_mac_len(skb);

return 0;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 88cfbc1..a846125 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -565,6 +565,7 @@ static void init_prb_bdqc(struct packet_sock *po,
p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov);
p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv;

+ p1->max_frame_len = p1->kblk_size - BLK_PLUS_PRIV(p1->blk_sizeof_priv);
prb_init_ft_ops(p1, req_u);
prb_setup_retire_blk_timer(po, tx_ring);
prb_open_block(p1, pbd);
@@ -1814,6 +1815,18 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
if ((int)snaplen < 0)
snaplen = 0;
}
+ } else if (unlikely(macoff + snaplen >
+ GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len)) {
+ u32 nval;
+
+ nval = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len - macoff;
+ pr_err_once("tpacket_rcv: packet too big, clamped from %u to %u. macoff=%u\n",
+ snaplen, nval, macoff);
+ snaplen = nval;
+ if (unlikely((int)snaplen < 0)) {
+ snaplen = 0;
+ macoff = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len;
+ }
}
spin_lock(&sk->sk_receive_queue.lock);
h.raw = packet_current_rx_frame(po, skb,
@@ -3610,6 +3623,10 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
goto out;
if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
goto out;
+ if (po->tp_version >= TPACKET_V3 &&
+ (int)(req->tp_block_size -
+ BLK_PLUS_PRIV(req_u->req3.tp_sizeof_priv)) <= 0)
+ goto out;
if (unlikely(req->tp_frame_size < po->tp_hdrlen +
po->tp_reserve))
goto out;
diff --git a/net/packet/internal.h b/net/packet/internal.h
index 1035fa2..ca086c0 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -29,6 +29,7 @@ struct tpacket_kbdq_core {
char *pkblk_start;
char *pkblk_end;
int kblk_size;
+ unsigned int max_frame_len;
unsigned int knum_blocks;
uint64_t knxt_seq_num;
char *prev;
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 6a4c5a7..3194f7f 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1644,6 +1644,8 @@ struct sctp_chunk *sctp_assoc_lookup_asconf_ack(
* ack chunk whose serial number matches that of the request.
*/
list_for_each_entry(ack, &asoc->asconf_ack_list, transmitted_list) {
+ if (sctp_chunk_pending(ack))
+ continue;
if (ack->subh.addip_hdr->serial == serial) {
sctp_chunk_hold(ack);
return ack;
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index 5856932..560cd41 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -141,18 +141,9 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
} else {
/* Nothing to do. Next chunk in the packet, please. */
ch = (sctp_chunkhdr_t *) chunk->chunk_end;
-
/* Force chunk->skb->data to chunk->chunk_end. */
- skb_pull(chunk->skb,
- chunk->chunk_end - chunk->skb->data);
-
- /* Verify that we have at least chunk headers
- * worth of buffer left.
- */
- if (skb_headlen(chunk->skb) < sizeof(sctp_chunkhdr_t)) {
- sctp_chunk_free(chunk);
- chunk = queue->in_progress = NULL;
- }
+ skb_pull(chunk->skb, chunk->chunk_end - chunk->skb->data);
+ /* We are guaranteed to pull a SCTP header. */
}
}

@@ -188,24 +179,14 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
skb_pull(chunk->skb, sizeof(sctp_chunkhdr_t));
chunk->subh.v = NULL; /* Subheader is no longer valid. */

- if (chunk->chunk_end < skb_tail_pointer(chunk->skb)) {
+ if (chunk->chunk_end + sizeof(sctp_chunkhdr_t) <
+ skb_tail_pointer(chunk->skb)) {
/* This is not a singleton */
chunk->singleton = 0;
} else if (chunk->chunk_end > skb_tail_pointer(chunk->skb)) {
- /* RFC 2960, Section 6.10 Bundling
- *
- * Partial chunks MUST NOT be placed in an SCTP packet.
- * If the receiver detects a partial chunk, it MUST drop
- * the chunk.
- *
- * Since the end of the chunk is past the end of our buffer
- * (which contains the whole packet, we can freely discard
- * the whole packet.
- */
- sctp_chunk_free(chunk);
- chunk = queue->in_progress = NULL;
-
- return NULL;
+ /* Discard inside state machine. */
+ chunk->pdiscard = 1;
+ chunk->chunk_end = skb_tail_pointer(chunk->skb);
} else {
/* We are at the end of the packet, so mark the chunk
* in case we need to send a SACK.
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 1d71674..e45212e 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -3110,50 +3110,63 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc,
return SCTP_ERROR_NO_ERROR;
}

-/* Verify the ASCONF packet before we process it. */
-int sctp_verify_asconf(const struct sctp_association *asoc,
- struct sctp_paramhdr *param_hdr, void *chunk_end,
- struct sctp_paramhdr **errp) {
- sctp_addip_param_t *asconf_param;
+/* Verify the ASCONF packet before we process it. */
+bool sctp_verify_asconf(const struct sctp_association *asoc,
+ struct sctp_chunk *chunk, bool addr_param_needed,
+ struct sctp_paramhdr **errp)
+{
+ sctp_addip_chunk_t *addip = (sctp_addip_chunk_t *) chunk->chunk_hdr;
union sctp_params param;
- int length, plen;
-
- param.v = (sctp_paramhdr_t *) param_hdr;
- while (param.v <= chunk_end - sizeof(sctp_paramhdr_t)) {
- length = ntohs(param.p->length);
- *errp = param.p;
+ bool addr_param_seen = false;

- if (param.v > chunk_end - length ||
- length < sizeof(sctp_paramhdr_t))
- return 0;
+ sctp_walk_params(param, addip, addip_hdr.params) {
+ size_t length = ntohs(param.p->length);

+ *errp = param.p;
switch (param.p->type) {
+ case SCTP_PARAM_ERR_CAUSE:
+ break;
+ case SCTP_PARAM_IPV4_ADDRESS:
+ if (length != sizeof(sctp_ipv4addr_param_t))
+ return false;
+ addr_param_seen = true;
+ break;
+ case SCTP_PARAM_IPV6_ADDRESS:
+ if (length != sizeof(sctp_ipv6addr_param_t))
+ return false;
+ addr_param_seen = true;
+ break;
case SCTP_PARAM_ADD_IP:
case SCTP_PARAM_DEL_IP:
case SCTP_PARAM_SET_PRIMARY:
- asconf_param = (sctp_addip_param_t *)param.v;
- plen = ntohs(asconf_param->param_hdr.length);
- if (plen < sizeof(sctp_addip_param_t) +
- sizeof(sctp_paramhdr_t))
- return 0;
+ /* In ASCONF chunks, these need to be first. */
+ if (addr_param_needed && !addr_param_seen)
+ return false;
+ length = ntohs(param.addip->param_hdr.length);
+ if (length < sizeof(sctp_addip_param_t) +
+ sizeof(sctp_paramhdr_t))
+ return false;
break;
case SCTP_PARAM_SUCCESS_REPORT:
case SCTP_PARAM_ADAPTATION_LAYER_IND:
if (length != sizeof(sctp_addip_param_t))
- return 0;
-
+ return false;
break;
default:
- break;
+ /* This is unkown to us, reject! */
+ return false;
}
-
- param.v += WORD_ROUND(length);
}

- if (param.v != chunk_end)
- return 0;
+ /* Remaining sanity checks. */
+ if (addr_param_needed && !addr_param_seen)
+ return false;
+ if (!addr_param_needed && addr_param_seen)
+ return false;
+ if (param.v != chunk->chunk_end)
+ return false;

- return 1;
+ return true;
}

/* Process an incoming ASCONF chunk with the next expected serial no. and
@@ -3162,16 +3175,17 @@ int sctp_verify_asconf(const struct sctp_association *asoc,
struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc,
struct sctp_chunk *asconf)
{
+ sctp_addip_chunk_t *addip = (sctp_addip_chunk_t *) asconf->chunk_hdr;
+ bool all_param_pass = true;
+ union sctp_params param;
sctp_addiphdr_t *hdr;
union sctp_addr_param *addr_param;
sctp_addip_param_t *asconf_param;
struct sctp_chunk *asconf_ack;
-
__be16 err_code;
int length = 0;
int chunk_len;
__u32 serial;
- int all_param_pass = 1;

chunk_len = ntohs(asconf->chunk_hdr->length) - sizeof(sctp_chunkhdr_t);
hdr = (sctp_addiphdr_t *)asconf->skb->data;
@@ -3199,9 +3213,14 @@ struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc,
goto done;

/* Process the TLVs contained within the ASCONF chunk. */
- while (chunk_len > 0) {
+ sctp_walk_params(param, addip, addip_hdr.params) {
+ /* Skip preceeding address parameters. */
+ if (param.p->type == SCTP_PARAM_IPV4_ADDRESS ||
+ param.p->type == SCTP_PARAM_IPV6_ADDRESS)
+ continue;
+
err_code = sctp_process_asconf_param(asoc, asconf,
- asconf_param);
+ param.addip);
/* ADDIP 4.1 A7)
* If an error response is received for a TLV parameter,
* all TLVs with no response before the failed TLV are
@@ -3209,28 +3228,20 @@ struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc,
* the failed response are considered unsuccessful unless
* a specific success indication is present for the parameter.
*/
- if (SCTP_ERROR_NO_ERROR != err_code)
- all_param_pass = 0;
-
+ if (err_code != SCTP_ERROR_NO_ERROR)
+ all_param_pass = false;
if (!all_param_pass)
- sctp_add_asconf_response(asconf_ack,
- asconf_param->crr_id, err_code,
- asconf_param);
+ sctp_add_asconf_response(asconf_ack, param.addip->crr_id,
+ err_code, param.addip);

/* ADDIP 4.3 D11) When an endpoint receiving an ASCONF to add
* an IP address sends an 'Out of Resource' in its response, it
* MUST also fail any subsequent add or delete requests bundled
* in the ASCONF.
*/
- if (SCTP_ERROR_RSRC_LOW == err_code)
+ if (err_code == SCTP_ERROR_RSRC_LOW)
goto done;
-
- /* Move to the next ASCONF param. */
- length = ntohs(asconf_param->param_hdr.length);
- asconf_param = (void *)asconf_param + length;
- chunk_len -= length;
}
-
done:
asoc->peer.addip_serial++;

diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index cd0d5a2..1c73c33 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -171,6 +171,9 @@ sctp_chunk_length_valid(struct sctp_chunk *chunk,
{
__u16 chunk_length = ntohs(chunk->chunk_hdr->length);

+ /* Previously already marked? */
+ if (unlikely(chunk->pdiscard))
+ return 0;
if (unlikely(chunk_length < required_length))
return 0;

@@ -1776,9 +1779,22 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(struct net *net,
/* Update the content of current association. */
sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc));
sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev));
- sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
- SCTP_STATE(SCTP_STATE_ESTABLISHED));
- sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
+ if (sctp_state(asoc, SHUTDOWN_PENDING) &&
+ (sctp_sstate(asoc->base.sk, CLOSING) ||
+ sock_flag(asoc->base.sk, SOCK_DEAD))) {
+ /* if were currently in SHUTDOWN_PENDING, but the socket
+ * has been closed by user, don't transition to ESTABLISHED.
+ * Instead trigger SHUTDOWN bundled with COOKIE_ACK.
+ */
+ sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
+ return sctp_sf_do_9_2_start_shutdown(net, ep, asoc,
+ SCTP_ST_CHUNK(0), NULL,
+ commands);
+ } else {
+ sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
+ SCTP_STATE(SCTP_STATE_ESTABLISHED));
+ sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl));
+ }
return SCTP_DISPOSITION_CONSUME;

nomem_ev:
@@ -3579,9 +3595,7 @@ sctp_disposition_t sctp_sf_do_asconf(struct net *net,
struct sctp_chunk *asconf_ack = NULL;
struct sctp_paramhdr *err_param = NULL;
sctp_addiphdr_t *hdr;
- union sctp_addr_param *addr_param;
__u32 serial;
- int length;

if (!sctp_vtag_verify(chunk, asoc)) {
sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG,
@@ -3606,17 +3620,8 @@ sctp_disposition_t sctp_sf_do_asconf(struct net *net,
hdr = (sctp_addiphdr_t *)chunk->skb->data;
serial = ntohl(hdr->serial);

- addr_param = (union sctp_addr_param *)hdr->params;
- length = ntohs(addr_param->p.length);
- if (length < sizeof(sctp_paramhdr_t))
- return sctp_sf_violation_paramlen(net, ep, asoc, type, arg,
- (void *)addr_param, commands);
-
/* Verify the ASCONF chunk before processing it. */
- if (!sctp_verify_asconf(asoc,
- (sctp_paramhdr_t *)((void *)addr_param + length),
- (void *)chunk->chunk_end,
- &err_param))
+ if (!sctp_verify_asconf(asoc, chunk, true, &err_param))
return sctp_sf_violation_paramlen(net, ep, asoc, type, arg,
(void *)err_param, commands);

@@ -3734,10 +3739,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net,
rcvd_serial = ntohl(addip_hdr->serial);

/* Verify the ASCONF-ACK chunk before processing it. */
- if (!sctp_verify_asconf(asoc,
- (sctp_paramhdr_t *)addip_hdr->params,
- (void *)asconf_ack->chunk_end,
- &err_param))
+ if (!sctp_verify_asconf(asoc, asconf_ack, false, &err_param))
return sctp_sf_violation_paramlen(net, ep, asoc, type, arg,
(void *)err_param, commands);

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 9a91f74..0ee05f0 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -46,6 +46,11 @@ static DEFINE_SPINLOCK(xfrm_policy_sk_bundle_lock);
static struct dst_entry *xfrm_policy_sk_bundles;
static DEFINE_RWLOCK(xfrm_policy_lock);

+struct xfrm_flo {
+ struct dst_entry *dst_orig;
+ u8 flags;
+};
+
static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO]
__read_mostly;
@@ -1882,13 +1887,14 @@ static int xdst_queue_output(struct sk_buff *skb)
}

static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net,
- struct dst_entry *dst,
+ struct xfrm_flo *xflo,
const struct flowi *fl,
int num_xfrms,
u16 family)
{
int err;
struct net_device *dev;
+ struct dst_entry *dst;
struct dst_entry *dst1;
struct xfrm_dst *xdst;

@@ -1896,10 +1902,13 @@ static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net,
if (IS_ERR(xdst))
return xdst;

- if (net->xfrm.sysctl_larval_drop || num_xfrms <= 0 ||
+ if (!(xflo->flags & XFRM_LOOKUP_QUEUE) ||
+ net->xfrm.sysctl_larval_drop ||
+ num_xfrms <= 0 ||
(fl->flowi_flags & FLOWI_FLAG_CAN_SLEEP))
return xdst;

+ dst = xflo->dst_orig;
dst1 = &xdst->u.dst;
dst_hold(dst);
xdst->route = dst;
@@ -1941,7 +1950,7 @@ static struct flow_cache_object *
xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
struct flow_cache_object *oldflo, void *ctx)
{
- struct dst_entry *dst_orig = (struct dst_entry *)ctx;
+ struct xfrm_flo *xflo = (struct xfrm_flo *)ctx;
struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
struct xfrm_dst *xdst, *new_xdst;
int num_pols = 0, num_xfrms = 0, i, err, pol_dead;
@@ -1982,7 +1991,8 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
goto make_dummy_bundle;
}

- new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, dst_orig);
+ new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family,
+ xflo->dst_orig);
if (IS_ERR(new_xdst)) {
err = PTR_ERR(new_xdst);
if (err != -EAGAIN)
@@ -2016,7 +2026,7 @@ make_dummy_bundle:
/* We found policies, but there's no bundles to instantiate:
* either because the policy blocks, has no transformations or
* we could not build template (no xfrm_states).*/
- xdst = xfrm_create_dummy_bundle(net, dst_orig, fl, num_xfrms, family);
+ xdst = xfrm_create_dummy_bundle(net, xflo, fl, num_xfrms, family);
if (IS_ERR(xdst)) {
xfrm_pols_put(pols, num_pols);
return ERR_CAST(xdst);
@@ -2116,13 +2126,18 @@ restart:
}

if (xdst == NULL) {
+ struct xfrm_flo xflo;
+
+ xflo.dst_orig = dst_orig;
+ xflo.flags = flags;
+
/* To accelerate a bit... */
if ((dst_orig->flags & DST_NOXFRM) ||
!net->xfrm.policy_count[XFRM_POLICY_OUT])
goto nopol;

flo = flow_cache_lookup(net, fl, family, dir,
- xfrm_bundle_lookup, dst_orig);
+ xfrm_bundle_lookup, &xflo);
if (flo == NULL)
goto nopol;
if (IS_ERR(flo)) {
@@ -2150,7 +2165,7 @@ restart:
xfrm_pols_put(pols, drop_pols);
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);

- return make_blackhole(net, family, dst_orig);
+ return ERR_PTR(-EREMOTE);
}
if (fl->flowi_flags & FLOWI_FLAG_CAN_SLEEP) {
DECLARE_WAITQUEUE(wait, current);
@@ -2222,6 +2237,23 @@ dropdst:
}
EXPORT_SYMBOL(xfrm_lookup);

+/* Callers of xfrm_lookup_route() must ensure a call to dst_output().
+ * Otherwise we may send out blackholed packets.
+ */
+struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig,
+ const struct flowi *fl,
+ struct sock *sk, int flags)
+{
+ struct dst_entry *dst = xfrm_lookup(net, dst_orig, fl, sk,
+ flags | XFRM_LOOKUP_QUEUE);
+
+ if (IS_ERR(dst) && PTR_ERR(dst) == -EREMOTE)
+ return make_blackhole(net, dst_orig->ops->family, dst_orig);
+
+ return dst;
+}
+EXPORT_SYMBOL(xfrm_lookup_route);
+
static inline int
xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl)
{
@@ -2487,7 +2519,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)

skb_dst_force(skb);

- dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, 0);
+ dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, XFRM_LOOKUP_QUEUE);
if (IS_ERR(dst)) {
res = 0;
dst = NULL;
diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c
index 3c5cbb9..09036f4 100644
--- a/security/integrity/evm/evm_main.c
+++ b/security/integrity/evm/evm_main.c
@@ -296,9 +296,12 @@ int evm_inode_setxattr(struct dentry *dentry, const char *xattr_name,
{
const struct evm_ima_xattr_data *xattr_data = xattr_value;

- if ((strcmp(xattr_name, XATTR_NAME_EVM) == 0)
- && (xattr_data->type == EVM_XATTR_HMAC))
- return -EPERM;
+ if (strcmp(xattr_name, XATTR_NAME_EVM) == 0) {
+ if (!xattr_value_len)
+ return -EINVAL;
+ if (xattr_data->type != EVM_IMA_XATTR_DIGSIG)
+ return -EPERM;
+ }
return evm_protect_xattr(dentry, xattr_name, xattr_value,
xattr_value_len);
}
diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c
index 734e946..c7190b7 100644
--- a/security/integrity/ima/ima_appraise.c
+++ b/security/integrity/ima/ima_appraise.c
@@ -359,11 +359,14 @@ static void ima_reset_appraise_flags(struct inode *inode)
int ima_inode_setxattr(struct dentry *dentry, const char *xattr_name,
const void *xattr_value, size_t xattr_value_len)
{
+ const struct evm_ima_xattr_data *xvalue = xattr_value;
int result;

result = ima_protect_xattr(dentry, xattr_name, xattr_value,
xattr_value_len);
if (result == 1) {
+ if (!xattr_value_len || (xvalue->type >= IMA_XATTR_LAST))
+ return -EINVAL;
ima_reset_appraise_flags(dentry->d_inode);
result = 0;
}
diff --git a/security/integrity/integrity.h b/security/integrity/integrity.h
index 33c0a70..8f2fbdc 100644
--- a/security/integrity/integrity.h
+++ b/security/integrity/integrity.h
@@ -56,6 +56,7 @@ enum evm_ima_xattr_type {
EVM_XATTR_HMAC,
EVM_IMA_XATTR_DIGSIG,
IMA_XATTR_DIGEST_NG,
+ IMA_XATTR_LAST
};

struct evm_ima_xattr_data {
diff --git a/sound/core/pcm_compat.c b/sound/core/pcm_compat.c
index af49721..c4ac3c1 100644
--- a/sound/core/pcm_compat.c
+++ b/sound/core/pcm_compat.c
@@ -206,6 +206,8 @@ static int snd_pcm_status_user_compat(struct snd_pcm_substream *substream,
if (err < 0)
return err;

+ if (clear_user(src, sizeof(*src)))
+ return -EFAULT;
if (put_user(status.state, &src->state) ||
compat_put_timespec(&status.trigger_tstamp, &src->trigger_tstamp) ||
compat_put_timespec(&status.tstamp, &src->tstamp) ||
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 01a5e05..566b0f6 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -3189,7 +3189,7 @@ static const struct vm_operations_struct snd_pcm_vm_ops_data_fault = {

#ifndef ARCH_HAS_DMA_MMAP_COHERENT
/* This should be defined / handled globally! */
-#ifdef CONFIG_ARM
+#if defined(CONFIG_ARM) || defined(CONFIG_ARM64)
#define ARCH_HAS_DMA_MMAP_COHERENT
#endif
#endif
diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index 9c67b07..c2c80ca 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -1575,19 +1575,22 @@ static bool hdmi_present_sense(struct hdmi_spec_per_pin *per_pin, int repoll)
}
}

- if (pin_eld->eld_valid && !eld->eld_valid) {
- update_eld = true;
+ if (pin_eld->eld_valid != eld->eld_valid)
eld_changed = true;
- }
+
+ if (pin_eld->eld_valid && !eld->eld_valid)
+ update_eld = true;
+
if (update_eld) {
bool old_eld_valid = pin_eld->eld_valid;
pin_eld->eld_valid = eld->eld_valid;
- eld_changed = pin_eld->eld_size != eld->eld_size ||
+ if (pin_eld->eld_size != eld->eld_size ||
memcmp(pin_eld->eld_buffer, eld->eld_buffer,
- eld->eld_size) != 0;
- if (eld_changed)
+ eld->eld_size) != 0) {
memcpy(pin_eld->eld_buffer, eld->eld_buffer,
eld->eld_size);
+ eld_changed = true;
+ }
pin_eld->eld_size = eld->eld_size;
pin_eld->info = eld->info;

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index b2a5832..1a24b2c 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -2871,6 +2871,9 @@ static void alc283_shutup(struct hda_codec *codec)

alc_write_coef_idx(codec, 0x43, 0x9004);

+ /*depop hp during suspend*/
+ alc_write_coef_idx(codec, 0x06, 0x2100);
+
snd_hda_codec_write(codec, hp_pin, 0,
AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE);

diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h
index 0e52836..8590a32 100644
--- a/sound/usb/quirks-table.h
+++ b/sound/usb/quirks-table.h
@@ -386,6 +386,36 @@ YAMAHA_DEVICE(0x105d, NULL),
}
},
{
+ USB_DEVICE(0x0499, 0x1509),
+ .driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) {
+ /* .vendor_name = "Yamaha", */
+ /* .product_name = "Steinberg UR22", */
+ .ifnum = QUIRK_ANY_INTERFACE,
+ .type = QUIRK_COMPOSITE,
+ .data = (const struct snd_usb_audio_quirk[]) {
+ {
+ .ifnum = 1,
+ .type = QUIRK_AUDIO_STANDARD_INTERFACE
+ },
+ {
+ .ifnum = 2,
+ .type = QUIRK_AUDIO_STANDARD_INTERFACE
+ },
+ {
+ .ifnum = 3,
+ .type = QUIRK_MIDI_YAMAHA
+ },
+ {
+ .ifnum = 4,
+ .type = QUIRK_IGNORE_INTERFACE
+ },
+ {
+ .ifnum = -1
+ }
+ }
+ }
+},
+{
USB_DEVICE(0x0499, 0x150a),
.driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) {
/* .vendor_name = "Yamaha", */
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index 714b949..1f0dc1e 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -43,13 +43,13 @@ static void kvm_iommu_put_pages(struct kvm *kvm,
gfn_t base_gfn, unsigned long npages);

static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn,
- unsigned long size)
+ unsigned long npages)
{
gfn_t end_gfn;
pfn_t pfn;

pfn = gfn_to_pfn_memslot(slot, gfn);
- end_gfn = gfn + (size >> PAGE_SHIFT);
+ end_gfn = gfn + npages;
gfn += 1;

if (is_error_noslot_pfn(pfn))
@@ -119,7 +119,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
* Pin all pages we are about to map in memory. This is
* important because we unmap and unpin in 4kb steps later.
*/
- pfn = kvm_pin_pages(slot, gfn, page_size);
+ pfn = kvm_pin_pages(slot, gfn, page_size >> PAGE_SHIFT);
if (is_error_noslot_pfn(pfn)) {
gfn += 1;
continue;
@@ -131,7 +131,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
if (r) {
printk(KERN_ERR "kvm_iommu_map_address:"
"iommu failed to map pfn=%llx\n", pfn);
- kvm_unpin_pages(kvm, pfn, page_size);
+ kvm_unpin_pages(kvm, pfn, page_size >> PAGE_SHIFT);
goto unmap_pages;
}

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/