Re: Linux 4.9.258

From: Greg Kroah-Hartman
Date: Tue Feb 23 2021 - 09:01:32 EST


diff --git a/Makefile b/Makefile
index e53096154f81..e5955f122ffd 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
VERSION = 4
PATCHLEVEL = 9
-SUBLEVEL = 257
+SUBLEVEL = 258
EXTRAVERSION =
NAME = Roaring Lionus

@@ -762,6 +762,13 @@ ifdef CONFIG_FUNCTION_TRACER
ifndef CC_FLAGS_FTRACE
CC_FLAGS_FTRACE := -pg
endif
+ifdef CONFIG_FTRACE_MCOUNT_RECORD
+ # gcc 5 supports generating the mcount tables directly
+ ifeq ($(call cc-option-yn,-mrecord-mcount),y)
+ CC_FLAGS_FTRACE += -mrecord-mcount
+ export CC_USING_RECORD_MCOUNT := 1
+ endif
+endif
export CC_FLAGS_FTRACE
ifdef CONFIG_HAVE_FENTRY
CC_USING_FENTRY := $(call cc-option, -mfentry -DCC_USING_FENTRY)
diff --git a/arch/arm/boot/dts/lpc32xx.dtsi b/arch/arm/boot/dts/lpc32xx.dtsi
index 2802c9565b6c..976a75a4eb2c 100644
--- a/arch/arm/boot/dts/lpc32xx.dtsi
+++ b/arch/arm/boot/dts/lpc32xx.dtsi
@@ -323,9 +323,6 @@

clocks = <&xtal_32k>, <&xtal>;
clock-names = "xtal_32k", "xtal";
-
- assigned-clocks = <&clk LPC32XX_CLK_HCLK_PLL>;
- assigned-clock-rates = <208000000>;
};
};

diff --git a/arch/arm/xen/p2m.c b/arch/arm/xen/p2m.c
index 0ed01f2d5ee4..02579e6569f0 100644
--- a/arch/arm/xen/p2m.c
+++ b/arch/arm/xen/p2m.c
@@ -93,8 +93,10 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
for (i = 0; i < count; i++) {
if (map_ops[i].status)
continue;
- set_phys_to_machine(map_ops[i].host_addr >> XEN_PAGE_SHIFT,
- map_ops[i].dev_bus_addr >> XEN_PAGE_SHIFT);
+ if (unlikely(!set_phys_to_machine(map_ops[i].host_addr >> XEN_PAGE_SHIFT,
+ map_ops[i].dev_bus_addr >> XEN_PAGE_SHIFT))) {
+ return -ENOMEM;
+ }
}

return 0;
diff --git a/arch/h8300/kernel/asm-offsets.c b/arch/h8300/kernel/asm-offsets.c
index dc2d16ce8a0d..3e33a9844d99 100644
--- a/arch/h8300/kernel/asm-offsets.c
+++ b/arch/h8300/kernel/asm-offsets.c
@@ -62,6 +62,9 @@ int main(void)
OFFSET(TI_FLAGS, thread_info, flags);
OFFSET(TI_CPU, thread_info, cpu);
OFFSET(TI_PRE, thread_info, preempt_count);
+#ifdef CONFIG_PREEMPTION
+ DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count));
+#endif

return 0;
}
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index a95d414663b1..9f0099c46c88 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -61,6 +61,9 @@ endif
KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow
KBUILD_CFLAGS += $(call cc-option,-mno-avx,)

+# Intel CET isn't enabled in the kernel
+KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none)
+
ifeq ($(CONFIG_X86_32),y)
BITS := 32
UTS_MACHINE := i386
@@ -137,9 +140,6 @@ else
KBUILD_CFLAGS += -mno-red-zone
KBUILD_CFLAGS += -mcmodel=kernel

- # Intel CET isn't enabled in the kernel
- KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none)
-
# -funit-at-a-time shrinks the kernel .text considerably
# unfortunately it makes reading oopses harder.
KBUILD_CFLAGS += $(call cc-option,-funit-at-a-time)
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 37129db76d33..fbf8508e558a 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -725,7 +725,8 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
unsigned long mfn, pfn;

/* Do not add to override if the map failed. */
- if (map_ops[i].status)
+ if (map_ops[i].status != GNTST_okay ||
+ (kmap_ops && kmap_ops[i].status != GNTST_okay))
continue;

if (map_ops[i].flags & GNTMAP_contains_pte) {
@@ -763,17 +764,15 @@ int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
unsigned long mfn = __pfn_to_mfn(page_to_pfn(pages[i]));
unsigned long pfn = page_to_pfn(pages[i]);

- if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) {
+ if (mfn != INVALID_P2M_ENTRY && (mfn & FOREIGN_FRAME_BIT))
+ set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
+ else
ret = -EINVAL;
- goto out;
- }
-
- set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
}
if (kunmap_ops)
ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
- kunmap_ops, count);
-out:
+ kunmap_ops, count) ?: ret;
+
return ret;
}
EXPORT_SYMBOL_GPL(clear_foreign_p2m_mapping);
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 4f643a87f9c7..2b739ba841b1 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -843,8 +843,11 @@ static int xen_blkbk_map(struct xen_blkif_ring *ring,
pages[i]->page = persistent_gnt->page;
pages[i]->persistent_gnt = persistent_gnt;
} else {
- if (get_free_page(ring, &pages[i]->page))
- goto out_of_memory;
+ if (get_free_page(ring, &pages[i]->page)) {
+ put_free_pages(ring, pages_to_gnt, segs_to_map);
+ ret = -ENOMEM;
+ goto out;
+ }
addr = vaddr(pages[i]->page);
pages_to_gnt[segs_to_map] = pages[i]->page;
pages[i]->persistent_gnt = NULL;
@@ -860,10 +863,8 @@ static int xen_blkbk_map(struct xen_blkif_ring *ring,
break;
}

- if (segs_to_map) {
+ if (segs_to_map)
ret = gnttab_map_refs(map, NULL, pages_to_gnt, segs_to_map);
- BUG_ON(ret);
- }

/*
* Now swizzle the MFN in our domain with the MFN from the other domain
@@ -878,7 +879,7 @@ static int xen_blkbk_map(struct xen_blkif_ring *ring,
pr_debug("invalid buffer -- could not remap it\n");
put_free_pages(ring, &pages[seg_idx]->page, 1);
pages[seg_idx]->handle = BLKBACK_INVALID_HANDLE;
- ret |= 1;
+ ret |= !ret;
goto next;
}
pages[seg_idx]->handle = map[new_map_idx].handle;
@@ -930,17 +931,18 @@ static int xen_blkbk_map(struct xen_blkif_ring *ring,
}
segs_to_map = 0;
last_map = map_until;
- if (map_until != num)
+ if (!ret && map_until != num)
goto again;

- return ret;
-
-out_of_memory:
- pr_alert("%s: out of memory\n", __func__);
- put_free_pages(ring, pages_to_gnt, segs_to_map);
- for (i = last_map; i < num; i++)
+out:
+ for (i = last_map; i < num; i++) {
+ /* Don't zap current batch's valid persistent grants. */
+ if(i >= last_map + segs_to_map)
+ pages[i]->persistent_gnt = NULL;
pages[i]->handle = BLKBACK_INVALID_HANDLE;
- return -ENOMEM;
+ }
+
+ return ret;
}

static int xen_blkbk_map_seg(struct pending_req *pending_req)
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c
index f4d75ffe3d8a..7f01fb91ea66 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c
@@ -518,7 +518,10 @@ static ssize_t iwl_dbgfs_os_device_timediff_read(struct file *file,
const size_t bufsz = sizeof(buf);
int pos = 0;

+ mutex_lock(&mvm->mutex);
iwl_mvm_get_sync_time(mvm, &curr_gp2, &curr_os);
+ mutex_unlock(&mvm->mutex);
+
do_div(curr_os, NSEC_PER_USEC);
diff = curr_os - curr_gp2;
pos += scnprintf(buf + pos, bufsz - pos, "diff=%lld\n", diff);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
index 6d38eec3f9d3..a78aaf17116e 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
@@ -1104,6 +1104,7 @@ static void iwl_mvm_reprobe_wk(struct work_struct *wk)
reprobe = container_of(wk, struct iwl_mvm_reprobe, work);
if (device_reprobe(reprobe->dev))
dev_err(reprobe->dev, "reprobe failed!\n");
+ put_device(reprobe->dev);
kfree(reprobe);
module_put(THIS_MODULE);
}
@@ -1202,7 +1203,7 @@ void iwl_mvm_nic_restart(struct iwl_mvm *mvm, bool fw_error)
module_put(THIS_MODULE);
return;
}
- reprobe->dev = mvm->trans->dev;
+ reprobe->dev = get_device(mvm->trans->dev);
INIT_WORK(&reprobe->work, iwl_mvm_reprobe_wk);
schedule_work(&reprobe->work);
} else if (mvm->cur_ucode == IWL_UCODE_REGULAR) {
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
index 174e45d78c46..ff564198d2ce 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
@@ -676,6 +676,11 @@ static void iwl_pcie_txq_unmap(struct iwl_trans *trans, int txq_id)
struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
struct iwl_txq *txq = &trans_pcie->txq[txq_id];

+ if (!txq) {
+ IWL_ERR(trans, "Trying to free a queue that wasn't allocated?\n");
+ return;
+ }
+
spin_lock_bh(&txq->lock);
while (txq->write_ptr != txq->read_ptr) {
IWL_DEBUG_TX_REPLY(trans, "Q %d Free %d\n",
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index fd2ac6cd0c69..0024200c30ce 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -1328,13 +1328,11 @@ int xenvif_tx_action(struct xenvif_queue *queue, int budget)
return 0;

gnttab_batch_copy(queue->tx_copy_ops, nr_cops);
- if (nr_mops != 0) {
+ if (nr_mops != 0)
ret = gnttab_map_refs(queue->tx_map_ops,
NULL,
queue->pages_to_map,
nr_mops);
- BUG_ON(ret);
- }

work_done = xenvif_tx_submit(queue);

diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c
index f152246c7dfb..ddfb1cfa2dd9 100644
--- a/drivers/net/xen-netback/rx.c
+++ b/drivers/net/xen-netback/rx.c
@@ -38,10 +38,15 @@ static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue)
RING_IDX prod, cons;
struct sk_buff *skb;
int needed;
+ unsigned long flags;
+
+ spin_lock_irqsave(&queue->rx_queue.lock, flags);

skb = skb_peek(&queue->rx_queue);
- if (!skb)
+ if (!skb) {
+ spin_unlock_irqrestore(&queue->rx_queue.lock, flags);
return false;
+ }

needed = DIV_ROUND_UP(skb->len, XEN_PAGE_SIZE);
if (skb_is_gso(skb))
@@ -49,6 +54,8 @@ static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue)
if (skb->sw_hash)
needed++;

+ spin_unlock_irqrestore(&queue->rx_queue.lock, flags);
+
do {
prod = queue->rx.sring->req_prod;
cons = queue->rx.req_cons;
diff --git a/drivers/remoteproc/qcom_q6v5_pil.c b/drivers/remoteproc/qcom_q6v5_pil.c
index 2e0caaaa766a..72fc33bba99c 100644
--- a/drivers/remoteproc/qcom_q6v5_pil.c
+++ b/drivers/remoteproc/qcom_q6v5_pil.c
@@ -193,6 +193,12 @@ static int q6v5_load(struct rproc *rproc, const struct firmware *fw)
{
struct q6v5 *qproc = rproc->priv;

+ /* MBA is restricted to a maximum size of 1M */
+ if (fw->size > qproc->mba_size || fw->size > SZ_1M) {
+ dev_err(qproc->dev, "MBA firmware load failed\n");
+ return -EINVAL;
+ }
+
memcpy(qproc->mba_region, fw->data, fw->size);

return 0;
diff --git a/drivers/scsi/qla2xxx/qla_tmpl.c b/drivers/scsi/qla2xxx/qla_tmpl.c
index 9c2c7fe61280..ba83c36b76bd 100644
--- a/drivers/scsi/qla2xxx/qla_tmpl.c
+++ b/drivers/scsi/qla2xxx/qla_tmpl.c
@@ -878,7 +878,8 @@ qla27xx_template_checksum(void *p, ulong size)
static inline int
qla27xx_verify_template_checksum(struct qla27xx_fwdt_template *tmp)
{
- return qla27xx_template_checksum(tmp, tmp->template_size) == 0;
+ return qla27xx_template_checksum(tmp,
+ le32_to_cpu(tmp->template_size)) == 0;
}

static inline int
@@ -894,7 +895,7 @@ qla27xx_execute_fwdt_template(struct scsi_qla_host *vha)
ulong len;

if (qla27xx_fwdt_template_valid(tmp)) {
- len = tmp->template_size;
+ len = le32_to_cpu(tmp->template_size);
tmp = memcpy(vha->hw->fw_dump, tmp, len);
ql27xx_edit_template(vha, tmp);
qla27xx_walk_template(vha, tmp, tmp, &len);
@@ -910,7 +911,7 @@ qla27xx_fwdt_calculate_dump_size(struct scsi_qla_host *vha)
ulong len = 0;

if (qla27xx_fwdt_template_valid(tmp)) {
- len = tmp->template_size;
+ len = le32_to_cpu(tmp->template_size);
qla27xx_walk_template(vha, tmp, NULL, &len);
}

@@ -922,7 +923,7 @@ qla27xx_fwdt_template_size(void *p)
{
struct qla27xx_fwdt_template *tmp = p;

- return tmp->template_size;
+ return le32_to_cpu(tmp->template_size);
}

ulong
diff --git a/drivers/scsi/qla2xxx/qla_tmpl.h b/drivers/scsi/qla2xxx/qla_tmpl.h
index 141c1c5e73f4..2d3e1a8349b3 100644
--- a/drivers/scsi/qla2xxx/qla_tmpl.h
+++ b/drivers/scsi/qla2xxx/qla_tmpl.h
@@ -13,7 +13,7 @@
struct __packed qla27xx_fwdt_template {
uint32_t template_type;
uint32_t entry_offset;
- uint32_t template_size;
+ __le32 template_size;
uint32_t reserved_1;

uint32_t entry_count;
diff --git a/drivers/usb/dwc3/ulpi.c b/drivers/usb/dwc3/ulpi.c
index bd86f84f3790..3862edf59f7d 100644
--- a/drivers/usb/dwc3/ulpi.c
+++ b/drivers/usb/dwc3/ulpi.c
@@ -10,6 +10,8 @@
* published by the Free Software Foundation.
*/

+#include <linux/delay.h>
+#include <linux/time64.h>
#include <linux/ulpi/regs.h>

#include "core.h"
@@ -20,12 +22,22 @@
DWC3_GUSB2PHYACC_ADDR(ULPI_ACCESS_EXTENDED) | \
DWC3_GUSB2PHYACC_EXTEND_ADDR(a) : DWC3_GUSB2PHYACC_ADDR(a))

-static int dwc3_ulpi_busyloop(struct dwc3 *dwc)
+#define DWC3_ULPI_BASE_DELAY DIV_ROUND_UP(NSEC_PER_SEC, 60000000L)
+
+static int dwc3_ulpi_busyloop(struct dwc3 *dwc, u8 addr, bool read)
{
- unsigned count = 1000;
+ unsigned long ns = 5L * DWC3_ULPI_BASE_DELAY;
+ unsigned int count = 1000;
u32 reg;

+ if (addr >= ULPI_EXT_VENDOR_SPECIFIC)
+ ns += DWC3_ULPI_BASE_DELAY;
+
+ if (read)
+ ns += DWC3_ULPI_BASE_DELAY;
+
while (count--) {
+ ndelay(ns);
reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYACC(0));
if (!(reg & DWC3_GUSB2PHYACC_BUSY))
return 0;
@@ -44,7 +56,7 @@ static int dwc3_ulpi_read(struct device *dev, u8 addr)
reg = DWC3_GUSB2PHYACC_NEWREGREQ | DWC3_ULPI_ADDR(addr);
dwc3_writel(dwc->regs, DWC3_GUSB2PHYACC(0), reg);

- ret = dwc3_ulpi_busyloop(dwc);
+ ret = dwc3_ulpi_busyloop(dwc, addr, true);
if (ret)
return ret;

@@ -62,7 +74,7 @@ static int dwc3_ulpi_write(struct device *dev, u8 addr, u8 val)
reg |= DWC3_GUSB2PHYACC_WRITE | val;
dwc3_writel(dwc->regs, DWC3_GUSB2PHYACC(0), reg);

- return dwc3_ulpi_busyloop(dwc);
+ return dwc3_ulpi_busyloop(dwc, addr, false);
}

static const struct ulpi_ops dwc3_ulpi_ops = {
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 910b5d40c6e9..69d59102ff1b 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -293,36 +293,47 @@ static int map_grant_pages(struct grant_map *map)
* to the kernel linear addresses of the struct pages.
* These ptes are completely different from the user ptes dealt
* with find_grant_ptes.
+ * Note that GNTMAP_device_map isn't needed here: The
+ * dev_bus_addr output field gets consumed only from ->map_ops,
+ * and by not requesting it when mapping we also avoid needing
+ * to mirror dev_bus_addr into ->unmap_ops (and holding an extra
+ * reference to the page in the hypervisor).
*/
+ unsigned int flags = (map->flags & ~GNTMAP_device_map) |
+ GNTMAP_host_map;
+
for (i = 0; i < map->count; i++) {
unsigned long address = (unsigned long)
pfn_to_kaddr(page_to_pfn(map->pages[i]));
BUG_ON(PageHighMem(map->pages[i]));

- gnttab_set_map_op(&map->kmap_ops[i], address,
- map->flags | GNTMAP_host_map,
+ gnttab_set_map_op(&map->kmap_ops[i], address, flags,
map->grants[i].ref,
map->grants[i].domid);
gnttab_set_unmap_op(&map->kunmap_ops[i], address,
- map->flags | GNTMAP_host_map, -1);
+ flags, -1);
}
}

pr_debug("map %d+%d\n", map->index, map->count);
err = gnttab_map_refs(map->map_ops, use_ptemod ? map->kmap_ops : NULL,
map->pages, map->count);
- if (err)
- return err;

for (i = 0; i < map->count; i++) {
- if (map->map_ops[i].status) {
+ if (map->map_ops[i].status == GNTST_okay)
+ map->unmap_ops[i].handle = map->map_ops[i].handle;
+ else if (!err)
err = -EINVAL;
- continue;
- }

- map->unmap_ops[i].handle = map->map_ops[i].handle;
- if (use_ptemod)
- map->kunmap_ops[i].handle = map->kmap_ops[i].handle;
+ if (map->flags & GNTMAP_device_map)
+ map->unmap_ops[i].dev_bus_addr = map->map_ops[i].dev_bus_addr;
+
+ if (use_ptemod) {
+ if (map->kmap_ops[i].status == GNTST_okay)
+ map->kunmap_ops[i].handle = map->kmap_ops[i].handle;
+ else if (!err)
+ err = -EINVAL;
+ }
}
return err;
}
diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c
index 3243d917651a..4bba877ef547 100644
--- a/drivers/xen/xen-scsiback.c
+++ b/drivers/xen/xen-scsiback.c
@@ -423,12 +423,12 @@ static int scsiback_gnttab_data_map_batch(struct gnttab_map_grant_ref *map,
return 0;

err = gnttab_map_refs(map, NULL, pg, cnt);
- BUG_ON(err);
for (i = 0; i < cnt; i++) {
if (unlikely(map[i].status != GNTST_okay)) {
pr_err("invalid buffer -- could not remap it\n");
map[i].handle = SCSIBACK_INVALID_HANDLE;
- err = -ENOMEM;
+ if (!err)
+ err = -ENOMEM;
} else {
get_page(pg[i]);
}
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index f978ae2bb846..2de656ecc48b 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1971,7 +1971,7 @@ void wb_workfn(struct work_struct *work)
struct bdi_writeback, dwork);
long pages_written;

- set_worker_desc("flush-%s", dev_name(wb->bdi->dev));
+ set_worker_desc("flush-%s", bdi_dev_name(wb->bdi));
current->flags |= PF_SWAPWRITE;

if (likely(!current_is_workqueue_rescuer() ||
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 299dbf59f28f..3a583aa1fafe 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -92,6 +92,14 @@ int ovl_copy_xattr(struct dentry *old, struct dentry *new)

if (ovl_is_private_xattr(name))
continue;
+
+ error = security_inode_copy_up_xattr(name);
+ if (error < 0 && error != -EOPNOTSUPP)
+ break;
+ if (error == 1) {
+ error = 0;
+ continue; /* Discard */
+ }
retry:
size = vfs_getxattr(old, name, value, value_size);
if (size == -ERANGE)
@@ -115,13 +123,6 @@ int ovl_copy_xattr(struct dentry *old, struct dentry *new)
goto retry;
}

- error = security_inode_copy_up_xattr(name);
- if (error < 0 && error != -EOPNOTSUPP)
- break;
- if (error == 1) {
- error = 0;
- continue; /* Discard */
- }
error = vfs_setxattr(new, name, value, size, 0);
if (error)
break;
diff --git a/fs/squashfs/export.c b/fs/squashfs/export.c
index 8073b6532cf0..d2a806416c3a 100644
--- a/fs/squashfs/export.c
+++ b/fs/squashfs/export.c
@@ -54,12 +54,17 @@ static long long squashfs_inode_lookup(struct super_block *sb, int ino_num)
struct squashfs_sb_info *msblk = sb->s_fs_info;
int blk = SQUASHFS_LOOKUP_BLOCK(ino_num - 1);
int offset = SQUASHFS_LOOKUP_BLOCK_OFFSET(ino_num - 1);
- u64 start = le64_to_cpu(msblk->inode_lookup_table[blk]);
+ u64 start;
__le64 ino;
int err;

TRACE("Entered squashfs_inode_lookup, inode_number = %d\n", ino_num);

+ if (ino_num == 0 || (ino_num - 1) >= msblk->inodes)
+ return -EINVAL;
+
+ start = le64_to_cpu(msblk->inode_lookup_table[blk]);
+
err = squashfs_read_metadata(sb, &ino, &start, &offset, sizeof(ino));
if (err < 0)
return err;
@@ -124,7 +129,10 @@ __le64 *squashfs_read_inode_lookup_table(struct super_block *sb,
u64 lookup_table_start, u64 next_table, unsigned int inodes)
{
unsigned int length = SQUASHFS_LOOKUP_BLOCK_BYTES(inodes);
+ unsigned int indexes = SQUASHFS_LOOKUP_BLOCKS(inodes);
+ int n;
__le64 *table;
+ u64 start, end;

TRACE("In read_inode_lookup_table, length %d\n", length);

@@ -134,20 +142,37 @@ __le64 *squashfs_read_inode_lookup_table(struct super_block *sb,
if (inodes == 0)
return ERR_PTR(-EINVAL);

- /* length bytes should not extend into the next table - this check
- * also traps instances where lookup_table_start is incorrectly larger
- * than the next table start
+ /*
+ * The computed size of the lookup table (length bytes) should exactly
+ * match the table start and end points
*/
- if (lookup_table_start + length > next_table)
+ if (length != (next_table - lookup_table_start))
return ERR_PTR(-EINVAL);

table = squashfs_read_table(sb, lookup_table_start, length);
+ if (IS_ERR(table))
+ return table;

/*
- * table[0] points to the first inode lookup table metadata block,
- * this should be less than lookup_table_start
+ * table0], table[1], ... table[indexes - 1] store the locations
+ * of the compressed inode lookup blocks. Each entry should be
+ * less than the next (i.e. table[0] < table[1]), and the difference
+ * between them should be SQUASHFS_METADATA_SIZE or less.
+ * table[indexes - 1] should be less than lookup_table_start, and
+ * again the difference should be SQUASHFS_METADATA_SIZE or less
*/
- if (!IS_ERR(table) && le64_to_cpu(table[0]) >= lookup_table_start) {
+ for (n = 0; n < (indexes - 1); n++) {
+ start = le64_to_cpu(table[n]);
+ end = le64_to_cpu(table[n + 1]);
+
+ if (start >= end || (end - start) > SQUASHFS_METADATA_SIZE) {
+ kfree(table);
+ return ERR_PTR(-EINVAL);
+ }
+ }
+
+ start = le64_to_cpu(table[indexes - 1]);
+ if (start >= lookup_table_start || (lookup_table_start - start) > SQUASHFS_METADATA_SIZE) {
kfree(table);
return ERR_PTR(-EINVAL);
}
diff --git a/fs/squashfs/id.c b/fs/squashfs/id.c
index d38ea3dab951..8ccc0e3f6ea5 100644
--- a/fs/squashfs/id.c
+++ b/fs/squashfs/id.c
@@ -48,10 +48,15 @@ int squashfs_get_id(struct super_block *sb, unsigned int index,
struct squashfs_sb_info *msblk = sb->s_fs_info;
int block = SQUASHFS_ID_BLOCK(index);
int offset = SQUASHFS_ID_BLOCK_OFFSET(index);
- u64 start_block = le64_to_cpu(msblk->id_table[block]);
+ u64 start_block;
__le32 disk_id;
int err;

+ if (index >= msblk->ids)
+ return -EINVAL;
+
+ start_block = le64_to_cpu(msblk->id_table[block]);
+
err = squashfs_read_metadata(sb, &disk_id, &start_block, &offset,
sizeof(disk_id));
if (err < 0)
@@ -69,7 +74,10 @@ __le64 *squashfs_read_id_index_table(struct super_block *sb,
u64 id_table_start, u64 next_table, unsigned short no_ids)
{
unsigned int length = SQUASHFS_ID_BLOCK_BYTES(no_ids);
+ unsigned int indexes = SQUASHFS_ID_BLOCKS(no_ids);
+ int n;
__le64 *table;
+ u64 start, end;

TRACE("In read_id_index_table, length %d\n", length);

@@ -80,20 +88,36 @@ __le64 *squashfs_read_id_index_table(struct super_block *sb,
return ERR_PTR(-EINVAL);

/*
- * length bytes should not extend into the next table - this check
- * also traps instances where id_table_start is incorrectly larger
- * than the next table start
+ * The computed size of the index table (length bytes) should exactly
+ * match the table start and end points
*/
- if (id_table_start + length > next_table)
+ if (length != (next_table - id_table_start))
return ERR_PTR(-EINVAL);

table = squashfs_read_table(sb, id_table_start, length);
+ if (IS_ERR(table))
+ return table;

/*
- * table[0] points to the first id lookup table metadata block, this
- * should be less than id_table_start
+ * table[0], table[1], ... table[indexes - 1] store the locations
+ * of the compressed id blocks. Each entry should be less than
+ * the next (i.e. table[0] < table[1]), and the difference between them
+ * should be SQUASHFS_METADATA_SIZE or less. table[indexes - 1]
+ * should be less than id_table_start, and again the difference
+ * should be SQUASHFS_METADATA_SIZE or less
*/
- if (!IS_ERR(table) && le64_to_cpu(table[0]) >= id_table_start) {
+ for (n = 0; n < (indexes - 1); n++) {
+ start = le64_to_cpu(table[n]);
+ end = le64_to_cpu(table[n + 1]);
+
+ if (start >= end || (end - start) > SQUASHFS_METADATA_SIZE) {
+ kfree(table);
+ return ERR_PTR(-EINVAL);
+ }
+ }
+
+ start = le64_to_cpu(table[indexes - 1]);
+ if (start >= id_table_start || (id_table_start - start) > SQUASHFS_METADATA_SIZE) {
kfree(table);
return ERR_PTR(-EINVAL);
}
diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h
index ef69c31947bf..5234c19a0eab 100644
--- a/fs/squashfs/squashfs_fs_sb.h
+++ b/fs/squashfs/squashfs_fs_sb.h
@@ -77,5 +77,6 @@ struct squashfs_sb_info {
unsigned int inodes;
unsigned int fragments;
int xattr_ids;
+ unsigned int ids;
};
#endif
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 1516bb779b8d..5abc9d03397c 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -176,6 +176,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
msblk->directory_table = le64_to_cpu(sblk->directory_table_start);
msblk->inodes = le32_to_cpu(sblk->inodes);
msblk->fragments = le32_to_cpu(sblk->fragments);
+ msblk->ids = le16_to_cpu(sblk->no_ids);
flags = le16_to_cpu(sblk->flags);

TRACE("Found valid superblock on %pg\n", sb->s_bdev);
@@ -187,7 +188,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
TRACE("Block size %d\n", msblk->block_size);
TRACE("Number of inodes %d\n", msblk->inodes);
TRACE("Number of fragments %d\n", msblk->fragments);
- TRACE("Number of ids %d\n", le16_to_cpu(sblk->no_ids));
+ TRACE("Number of ids %d\n", msblk->ids);
TRACE("sblk->inode_table_start %llx\n", msblk->inode_table);
TRACE("sblk->directory_table_start %llx\n", msblk->directory_table);
TRACE("sblk->fragment_table_start %llx\n",
@@ -244,8 +245,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
allocate_id_index_table:
/* Allocate and read id index table */
msblk->id_table = squashfs_read_id_index_table(sb,
- le64_to_cpu(sblk->id_table_start), next_table,
- le16_to_cpu(sblk->no_ids));
+ le64_to_cpu(sblk->id_table_start), next_table, msblk->ids);
if (IS_ERR(msblk->id_table)) {
ERROR("unable to read id index table\n");
err = PTR_ERR(msblk->id_table);
diff --git a/fs/squashfs/xattr.h b/fs/squashfs/xattr.h
index afe70f815e3d..86b0a0073e51 100644
--- a/fs/squashfs/xattr.h
+++ b/fs/squashfs/xattr.h
@@ -30,8 +30,16 @@ extern int squashfs_xattr_lookup(struct super_block *, unsigned int, int *,
static inline __le64 *squashfs_read_xattr_id_table(struct super_block *sb,
u64 start, u64 *xattr_table_start, int *xattr_ids)
{
+ struct squashfs_xattr_id_table *id_table;
+
+ id_table = squashfs_read_table(sb, start, sizeof(*id_table));
+ if (IS_ERR(id_table))
+ return (__le64 *) id_table;
+
+ *xattr_table_start = le64_to_cpu(id_table->xattr_table_start);
+ kfree(id_table);
+
ERROR("Xattrs in filesystem, these will be ignored\n");
- *xattr_table_start = start;
return ERR_PTR(-ENOTSUPP);
}

diff --git a/fs/squashfs/xattr_id.c b/fs/squashfs/xattr_id.c
index c89607d690c4..3a655d879600 100644
--- a/fs/squashfs/xattr_id.c
+++ b/fs/squashfs/xattr_id.c
@@ -44,10 +44,15 @@ int squashfs_xattr_lookup(struct super_block *sb, unsigned int index,
struct squashfs_sb_info *msblk = sb->s_fs_info;
int block = SQUASHFS_XATTR_BLOCK(index);
int offset = SQUASHFS_XATTR_BLOCK_OFFSET(index);
- u64 start_block = le64_to_cpu(msblk->xattr_id_table[block]);
+ u64 start_block;
struct squashfs_xattr_id id;
int err;

+ if (index >= msblk->xattr_ids)
+ return -EINVAL;
+
+ start_block = le64_to_cpu(msblk->xattr_id_table[block]);
+
err = squashfs_read_metadata(sb, &id, &start_block, &offset,
sizeof(id));
if (err < 0)
@@ -63,13 +68,17 @@ int squashfs_xattr_lookup(struct super_block *sb, unsigned int index,
/*
* Read uncompressed xattr id lookup table indexes from disk into memory
*/
-__le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 start,
+__le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 table_start,
u64 *xattr_table_start, int *xattr_ids)
{
- unsigned int len;
+ struct squashfs_sb_info *msblk = sb->s_fs_info;
+ unsigned int len, indexes;
struct squashfs_xattr_id_table *id_table;
+ __le64 *table;
+ u64 start, end;
+ int n;

- id_table = squashfs_read_table(sb, start, sizeof(*id_table));
+ id_table = squashfs_read_table(sb, table_start, sizeof(*id_table));
if (IS_ERR(id_table))
return (__le64 *) id_table;

@@ -83,13 +92,52 @@ __le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 start,
if (*xattr_ids == 0)
return ERR_PTR(-EINVAL);

- /* xattr_table should be less than start */
- if (*xattr_table_start >= start)
+ len = SQUASHFS_XATTR_BLOCK_BYTES(*xattr_ids);
+ indexes = SQUASHFS_XATTR_BLOCKS(*xattr_ids);
+
+ /*
+ * The computed size of the index table (len bytes) should exactly
+ * match the table start and end points
+ */
+ start = table_start + sizeof(*id_table);
+ end = msblk->bytes_used;
+
+ if (len != (end - start))
return ERR_PTR(-EINVAL);

- len = SQUASHFS_XATTR_BLOCK_BYTES(*xattr_ids);
+ table = squashfs_read_table(sb, start, len);
+ if (IS_ERR(table))
+ return table;
+
+ /* table[0], table[1], ... table[indexes - 1] store the locations
+ * of the compressed xattr id blocks. Each entry should be less than
+ * the next (i.e. table[0] < table[1]), and the difference between them
+ * should be SQUASHFS_METADATA_SIZE or less. table[indexes - 1]
+ * should be less than table_start, and again the difference
+ * shouls be SQUASHFS_METADATA_SIZE or less.
+ *
+ * Finally xattr_table_start should be less than table[0].
+ */
+ for (n = 0; n < (indexes - 1); n++) {
+ start = le64_to_cpu(table[n]);
+ end = le64_to_cpu(table[n + 1]);
+
+ if (start >= end || (end - start) > SQUASHFS_METADATA_SIZE) {
+ kfree(table);
+ return ERR_PTR(-EINVAL);
+ }
+ }
+
+ start = le64_to_cpu(table[indexes - 1]);
+ if (start >= table_start || (table_start - start) > SQUASHFS_METADATA_SIZE) {
+ kfree(table);
+ return ERR_PTR(-EINVAL);
+ }

- TRACE("In read_xattr_index_table, length %d\n", len);
+ if (*xattr_table_start >= le64_to_cpu(table[0])) {
+ kfree(table);
+ return ERR_PTR(-EINVAL);
+ }

- return squashfs_read_table(sb, start + sizeof(*id_table), len);
+ return table;
}
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 63f17b106a4a..57db558c9a61 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -12,6 +12,7 @@
#include <linux/fs.h>
#include <linux/sched.h>
#include <linux/blkdev.h>
+#include <linux/device.h>
#include <linux/writeback.h>
#include <linux/blk-cgroup.h>
#include <linux/backing-dev-defs.h>
@@ -517,4 +518,13 @@ static inline int bdi_rw_congested(struct backing_dev_info *bdi)
(1 << WB_async_congested));
}

+extern const char *bdi_unknown_name;
+
+static inline const char *bdi_dev_name(struct backing_dev_info *bdi)
+{
+ if (!bdi || !bdi->dev)
+ return bdi_unknown_name;
+ return dev_name(bdi->dev);
+}
+
#endif /* _LINUX_BACKING_DEV_H */
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index b3d34d3e0e7e..9b8b014d13af 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -783,7 +783,9 @@ typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *); /* entry */
#ifdef CONFIG_FUNCTION_GRAPH_TRACER

/* for init task */
-#define INIT_FTRACE_GRAPH .ret_stack = NULL,
+#define INIT_FTRACE_GRAPH \
+ .ret_stack = NULL, \
+ .tracing_graph_pause = ATOMIC_INIT(0),

/*
* Stack of return addresses for functions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 8b35bdbdc214..fd77f8303ab9 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -490,9 +490,21 @@ bool mem_cgroup_oom_synchronize(bool wait);
extern int do_swap_account;
#endif

-void lock_page_memcg(struct page *page);
+struct mem_cgroup *lock_page_memcg(struct page *page);
+void __unlock_page_memcg(struct mem_cgroup *memcg);
void unlock_page_memcg(struct page *page);

+static inline void __mem_cgroup_update_page_stat(struct page *page,
+ struct mem_cgroup *memcg,
+ enum mem_cgroup_stat_index idx,
+ int val)
+{
+ VM_BUG_ON(!(rcu_read_lock_held() || PageLocked(page)));
+
+ if (memcg && memcg->stat)
+ this_cpu_add(memcg->stat->count[idx], val);
+}
+
/**
* mem_cgroup_update_page_stat - update page state statistics
* @page: the page
@@ -508,13 +520,12 @@ void unlock_page_memcg(struct page *page);
* mem_cgroup_update_page_stat(page, state, -1);
* unlock_page(page) or unlock_page_memcg(page)
*/
+
static inline void mem_cgroup_update_page_stat(struct page *page,
enum mem_cgroup_stat_index idx, int val)
{
- VM_BUG_ON(!(rcu_read_lock_held() || PageLocked(page)));

- if (page->mem_cgroup)
- this_cpu_add(page->mem_cgroup->stat->count[idx], val);
+ __mem_cgroup_update_page_stat(page, page->mem_cgroup, idx, val);
}

static inline void mem_cgroup_inc_page_stat(struct page *page,
@@ -709,7 +720,12 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
{
}

-static inline void lock_page_memcg(struct page *page)
+static inline struct mem_cgroup *lock_page_memcg(struct page *page)
+{
+ return NULL;
+}
+
+static inline void __unlock_page_memcg(struct mem_cgroup *memcg)
{
}

@@ -745,6 +761,13 @@ static inline void mem_cgroup_update_page_stat(struct page *page,
{
}

+static inline void __mem_cgroup_update_page_stat(struct page *page,
+ struct mem_cgroup *memcg,
+ enum mem_cgroup_stat_index idx,
+ int nr)
+{
+}
+
static inline void mem_cgroup_inc_page_stat(struct page *page,
enum mem_cgroup_stat_index idx)
{
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4d1b1056ac97..2aacafe2bce5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3701,6 +3701,7 @@ static inline void netif_tx_disable(struct net_device *dev)

local_bh_disable();
cpu = smp_processor_id();
+ spin_lock(&dev->tx_global_lock);
for (i = 0; i < dev->num_tx_queues; i++) {
struct netdev_queue *txq = netdev_get_tx_queue(dev, i);

@@ -3708,6 +3709,7 @@ static inline void netif_tx_disable(struct net_device *dev)
netif_tx_stop_queue(txq);
__netif_tx_unlock(txq);
}
+ spin_unlock(&dev->tx_global_lock);
local_bh_enable();
}

diff --git a/include/linux/string.h b/include/linux/string.h
index 42eed573ebb6..66a91f5a3449 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -29,6 +29,10 @@ size_t strlcpy(char *, const char *, size_t);
#ifndef __HAVE_ARCH_STRSCPY
ssize_t strscpy(char *, const char *, size_t);
#endif
+
+/* Wraps calls to strscpy()/memset(), no arch specific code required */
+ssize_t strscpy_pad(char *dest, const char *src, size_t count);
+
#ifndef __HAVE_ARCH_STRCAT
extern char * strcat(char *, const char *);
#endif
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 56c48c884a24..bf0db32b40aa 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -23,8 +23,7 @@
#define XDR_QUADLEN(l) (((l) + 3) >> 2)

/*
- * Generic opaque `network object.' At the kernel level, this type
- * is used only by lockd.
+ * Generic opaque `network object.'
*/
#define XDR_MAX_NETOBJ 1024
struct xdr_netobj {
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index ec964a924cd2..49a72adc7135 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -65,8 +65,9 @@ TRACE_EVENT(writeback_dirty_page,
),

TP_fast_assign(
- strncpy(__entry->name,
- mapping ? dev_name(inode_to_bdi(mapping->host)->dev) : "(unknown)", 32);
+ strscpy_pad(__entry->name,
+ bdi_dev_name(mapping ? inode_to_bdi(mapping->host) :
+ NULL), 32);
__entry->ino = mapping ? mapping->host->i_ino : 0;
__entry->index = page->index;
),
@@ -95,8 +96,7 @@ DECLARE_EVENT_CLASS(writeback_dirty_inode_template,
struct backing_dev_info *bdi = inode_to_bdi(inode);

/* may be called for files on pseudo FSes w/ unregistered bdi */
- strncpy(__entry->name,
- bdi->dev ? dev_name(bdi->dev) : "(unknown)", 32);
+ strscpy_pad(__entry->name, bdi_dev_name(bdi), 32);
__entry->ino = inode->i_ino;
__entry->state = inode->i_state;
__entry->flags = flags;
@@ -175,8 +175,8 @@ DECLARE_EVENT_CLASS(writeback_write_inode_template,
),

TP_fast_assign(
- strncpy(__entry->name,
- dev_name(inode_to_bdi(inode)->dev), 32);
+ strscpy_pad(__entry->name,
+ bdi_dev_name(inode_to_bdi(inode)), 32);
__entry->ino = inode->i_ino;
__entry->sync_mode = wbc->sync_mode;
__entry->cgroup_ino = __trace_wbc_assign_cgroup(wbc);
@@ -219,8 +219,7 @@ DECLARE_EVENT_CLASS(writeback_work_class,
__field(unsigned int, cgroup_ino)
),
TP_fast_assign(
- strncpy(__entry->name,
- wb->bdi->dev ? dev_name(wb->bdi->dev) : "(unknown)", 32);
+ strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32);
__entry->nr_pages = work->nr_pages;
__entry->sb_dev = work->sb ? work->sb->s_dev : 0;
__entry->sync_mode = work->sync_mode;
@@ -273,7 +272,7 @@ DECLARE_EVENT_CLASS(writeback_class,
__field(unsigned int, cgroup_ino)
),
TP_fast_assign(
- strncpy(__entry->name, dev_name(wb->bdi->dev), 32);
+ strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32);
__entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
),
TP_printk("bdi %s: cgroup_ino=%u",
@@ -296,7 +295,7 @@ TRACE_EVENT(writeback_bdi_register,
__array(char, name, 32)
),
TP_fast_assign(
- strncpy(__entry->name, dev_name(bdi->dev), 32);
+ strscpy_pad(__entry->name, bdi_dev_name(bdi), 32);
),
TP_printk("bdi %s",
__entry->name
@@ -321,7 +320,7 @@ DECLARE_EVENT_CLASS(wbc_class,
),

TP_fast_assign(
- strncpy(__entry->name, dev_name(bdi->dev), 32);
+ strscpy_pad(__entry->name, bdi_dev_name(bdi), 32);
__entry->nr_to_write = wbc->nr_to_write;
__entry->pages_skipped = wbc->pages_skipped;
__entry->sync_mode = wbc->sync_mode;
@@ -372,7 +371,7 @@ TRACE_EVENT(writeback_queue_io,
__field(unsigned int, cgroup_ino)
),
TP_fast_assign(
- strncpy(__entry->name, dev_name(wb->bdi->dev), 32);
+ strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32);
__entry->older = dirtied_before;
__entry->age = (jiffies - dirtied_before) * 1000 / HZ;
__entry->moved = moved;
@@ -457,7 +456,7 @@ TRACE_EVENT(bdi_dirty_ratelimit,
),

TP_fast_assign(
- strlcpy(__entry->bdi, dev_name(wb->bdi->dev), 32);
+ strscpy_pad(__entry->bdi, bdi_dev_name(wb->bdi), 32);
__entry->write_bw = KBps(wb->write_bandwidth);
__entry->avg_write_bw = KBps(wb->avg_write_bandwidth);
__entry->dirty_rate = KBps(dirty_rate);
@@ -522,7 +521,7 @@ TRACE_EVENT(balance_dirty_pages,

TP_fast_assign(
unsigned long freerun = (thresh + bg_thresh) / 2;
- strlcpy(__entry->bdi, dev_name(wb->bdi->dev), 32);
+ strscpy_pad(__entry->bdi, bdi_dev_name(wb->bdi), 32);

__entry->limit = global_wb_domain.dirty_limit;
__entry->setpoint = (global_wb_domain.dirty_limit +
@@ -582,8 +581,8 @@ TRACE_EVENT(writeback_sb_inodes_requeue,
),

TP_fast_assign(
- strncpy(__entry->name,
- dev_name(inode_to_bdi(inode)->dev), 32);
+ strscpy_pad(__entry->name,
+ bdi_dev_name(inode_to_bdi(inode)), 32);
__entry->ino = inode->i_ino;
__entry->state = inode->i_state;
__entry->dirtied_when = inode->dirtied_when;
@@ -656,8 +655,8 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template,
),

TP_fast_assign(
- strncpy(__entry->name,
- dev_name(inode_to_bdi(inode)->dev), 32);
+ strscpy_pad(__entry->name,
+ bdi_dev_name(inode_to_bdi(inode)), 32);
__entry->ino = inode->i_ino;
__entry->state = inode->i_state;
__entry->dirtied_when = inode->dirtied_when;
diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h
index 34b1379f9777..f9d8aac170fb 100644
--- a/include/xen/grant_table.h
+++ b/include/xen/grant_table.h
@@ -157,6 +157,7 @@ gnttab_set_map_op(struct gnttab_map_grant_ref *map, phys_addr_t addr,
map->flags = flags;
map->ref = ref;
map->dom = domid;
+ map->status = 1; /* arbitrary positive value */
}

static inline void
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index a2a232dec236..2fdf6f96f976 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -70,6 +70,8 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)

/* hash table size must be power of 2 */
n_buckets = roundup_pow_of_two(attr->max_entries);
+ if (!n_buckets)
+ return ERR_PTR(-E2BIG);

cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap);
if (cost >= U32_MAX - PAGE_SIZE)
diff --git a/kernel/futex.c b/kernel/futex.c
index 83db5787c67e..b65dbb5d60bb 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1019,6 +1019,39 @@ static void exit_pi_state_list(struct task_struct *curr)
* [10] There is no transient state which leaves owner and user space
* TID out of sync. Except one error case where the kernel is denied
* write access to the user address, see fixup_pi_state_owner().
+ *
+ *
+ * Serialization and lifetime rules:
+ *
+ * hb->lock:
+ *
+ * hb -> futex_q, relation
+ * futex_q -> pi_state, relation
+ *
+ * (cannot be raw because hb can contain arbitrary amount
+ * of futex_q's)
+ *
+ * pi_mutex->wait_lock:
+ *
+ * {uval, pi_state}
+ *
+ * (and pi_mutex 'obviously')
+ *
+ * p->pi_lock:
+ *
+ * p->pi_state_list -> pi_state->list, relation
+ *
+ * pi_state->refcount:
+ *
+ * pi_state lifetime
+ *
+ *
+ * Lock order:
+ *
+ * hb->lock
+ * pi_mutex->wait_lock
+ * p->pi_lock
+ *
*/

/*
@@ -1026,10 +1059,12 @@ static void exit_pi_state_list(struct task_struct *curr)
* the pi_state against the user space value. If correct, attach to
* it.
*/
-static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state,
+static int attach_to_pi_state(u32 __user *uaddr, u32 uval,
+ struct futex_pi_state *pi_state,
struct futex_pi_state **ps)
{
pid_t pid = uval & FUTEX_TID_MASK;
+ int ret, uval2;

/*
* Userspace might have messed up non-PI and PI futexes [3]
@@ -1037,8 +1072,33 @@ static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state,
if (unlikely(!pi_state))
return -EINVAL;

+ /*
+ * We get here with hb->lock held, and having found a
+ * futex_top_waiter(). This means that futex_lock_pi() of said futex_q
+ * has dropped the hb->lock in between queue_me() and unqueue_me_pi(),
+ * which in turn means that futex_lock_pi() still has a reference on
+ * our pi_state.
+ */
WARN_ON(!atomic_read(&pi_state->refcount));

+ /*
+ * Now that we have a pi_state, we can acquire wait_lock
+ * and do the state validation.
+ */
+ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
+
+ /*
+ * Since {uval, pi_state} is serialized by wait_lock, and our current
+ * uval was read without holding it, it can have changed. Verify it
+ * still is what we expect it to be, otherwise retry the entire
+ * operation.
+ */
+ if (get_futex_value_locked(&uval2, uaddr))
+ goto out_efault;
+
+ if (uval != uval2)
+ goto out_eagain;
+
/*
* Handle the owner died case:
*/
@@ -1054,11 +1114,11 @@ static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state,
* is not 0. Inconsistent state. [5]
*/
if (pid)
- return -EINVAL;
+ goto out_einval;
/*
* Take a ref on the state and return success. [4]
*/
- goto out_state;
+ goto out_attach;
}

/*
@@ -1070,14 +1130,14 @@ static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state,
* Take a ref on the state and return success. [6]
*/
if (!pid)
- goto out_state;
+ goto out_attach;
} else {
/*
* If the owner died bit is not set, then the pi_state
* must have an owner. [7]
*/
if (!pi_state->owner)
- return -EINVAL;
+ goto out_einval;
}

/*
@@ -1086,11 +1146,29 @@ static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state,
* user space TID. [9/10]
*/
if (pid != task_pid_vnr(pi_state->owner))
- return -EINVAL;
-out_state:
+ goto out_einval;
+
+out_attach:
atomic_inc(&pi_state->refcount);
+ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
*ps = pi_state;
return 0;
+
+out_einval:
+ ret = -EINVAL;
+ goto out_error;
+
+out_eagain:
+ ret = -EAGAIN;
+ goto out_error;
+
+out_efault:
+ ret = -EFAULT;
+ goto out_error;
+
+out_error:
+ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
+ return ret;
}

/**
@@ -1123,11 +1201,67 @@ static void wait_for_owner_exiting(int ret, struct task_struct *exiting)
put_task_struct(exiting);
}

+static int handle_exit_race(u32 __user *uaddr, u32 uval,
+ struct task_struct *tsk)
+{
+ u32 uval2;
+
+ /*
+ * If the futex exit state is not yet FUTEX_STATE_DEAD, wait
+ * for it to finish.
+ */
+ if (tsk && tsk->futex_state != FUTEX_STATE_DEAD)
+ return -EAGAIN;
+
+ /*
+ * Reread the user space value to handle the following situation:
+ *
+ * CPU0 CPU1
+ *
+ * sys_exit() sys_futex()
+ * do_exit() futex_lock_pi()
+ * futex_lock_pi_atomic()
+ * exit_signals(tsk) No waiters:
+ * tsk->flags |= PF_EXITING; *uaddr == 0x00000PID
+ * mm_release(tsk) Set waiter bit
+ * exit_robust_list(tsk) { *uaddr = 0x80000PID;
+ * Set owner died attach_to_pi_owner() {
+ * *uaddr = 0xC0000000; tsk = get_task(PID);
+ * } if (!tsk->flags & PF_EXITING) {
+ * ... attach();
+ * tsk->futex_state = } else {
+ * FUTEX_STATE_DEAD; if (tsk->futex_state !=
+ * FUTEX_STATE_DEAD)
+ * return -EAGAIN;
+ * return -ESRCH; <--- FAIL
+ * }
+ *
+ * Returning ESRCH unconditionally is wrong here because the
+ * user space value has been changed by the exiting task.
+ *
+ * The same logic applies to the case where the exiting task is
+ * already gone.
+ */
+ if (get_futex_value_locked(&uval2, uaddr))
+ return -EFAULT;
+
+ /* If the user space value has changed, try again. */
+ if (uval2 != uval)
+ return -EAGAIN;
+
+ /*
+ * The exiting task did not have a robust list, the robust list was
+ * corrupted or the user space value in *uaddr is simply bogus.
+ * Give up and tell user space.
+ */
+ return -ESRCH;
+}
+
/*
* Lookup the task for the TID provided from user space and attach to
* it after doing proper sanity checks.
*/
-static int attach_to_pi_owner(u32 uval, union futex_key *key,
+static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key,
struct futex_pi_state **ps,
struct task_struct **exiting)
{
@@ -1138,12 +1272,15 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key,
/*
* We are the first waiter - try to look up the real owner and attach
* the new pi_state to it, but bail out when TID = 0 [1]
+ *
+ * The !pid check is paranoid. None of the call sites should end up
+ * with pid == 0, but better safe than sorry. Let the caller retry
*/
if (!pid)
- return -ESRCH;
+ return -EAGAIN;
p = futex_find_get_task(pid);
if (!p)
- return -ESRCH;
+ return handle_exit_race(uaddr, uval, NULL);

if (unlikely(p->flags & PF_KTHREAD)) {
put_task_struct(p);
@@ -1162,7 +1299,7 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key,
* FUTEX_STATE_DEAD, we know that the task has finished
* the cleanup:
*/
- int ret = (p->futex_state = FUTEX_STATE_DEAD) ? -ESRCH : -EAGAIN;
+ int ret = handle_exit_race(uaddr, uval, p);

raw_spin_unlock_irq(&p->pi_lock);
/*
@@ -1183,6 +1320,9 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key,

/*
* No existing pi state. First waiter. [2]
+ *
+ * This creates pi_state, we have hb->lock held, this means nothing can
+ * observe this state, wait_lock is irrelevant.
*/
pi_state = alloc_pi_state();

@@ -1207,7 +1347,8 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key,
return 0;
}

-static int lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
+static int lookup_pi_state(u32 __user *uaddr, u32 uval,
+ struct futex_hash_bucket *hb,
union futex_key *key, struct futex_pi_state **ps,
struct task_struct **exiting)
{
@@ -1218,13 +1359,13 @@ static int lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
* attach to the pi_state when the validation succeeds.
*/
if (match)
- return attach_to_pi_state(uval, match->pi_state, ps);
+ return attach_to_pi_state(uaddr, uval, match->pi_state, ps);

/*
* We are the first waiter - try to look up the owner based on
* @uval and attach to it.
*/
- return attach_to_pi_owner(uval, key, ps, exiting);
+ return attach_to_pi_owner(uaddr, uval, key, ps, exiting);
}

static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
@@ -1237,7 +1378,7 @@ static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)))
return -EFAULT;

- /*If user space value changed, let the caller retry */
+ /* If user space value changed, let the caller retry */
return curval != uval ? -EAGAIN : 0;
}

@@ -1301,7 +1442,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
*/
match = futex_top_waiter(hb, key);
if (match)
- return attach_to_pi_state(uval, match->pi_state, ps);
+ return attach_to_pi_state(uaddr, uval, match->pi_state, ps);

/*
* No waiter and user TID is 0. We are here because the
@@ -1340,7 +1481,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
* attach to the owner. If that fails, no harm done, we only
* set the FUTEX_WAITERS bit in the user space variable.
*/
- return attach_to_pi_owner(uval, key, ps, exiting);
+ return attach_to_pi_owner(uaddr, newval, key, ps, exiting);
}

/**
@@ -1441,6 +1582,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,

if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) {
ret = -EFAULT;
+
} else if (curval != uval) {
/*
* If a unconditional UNLOCK_PI operation (user space did not
@@ -1977,7 +2119,7 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
* If that call succeeds then we have pi_state and an
* initial refcount on it.
*/
- ret = lookup_pi_state(ret, hb2, &key2,
+ ret = lookup_pi_state(uaddr2, ret, hb2, &key2,
&pi_state, &exiting);
}

@@ -2282,7 +2424,6 @@ static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
int err = 0;

oldowner = pi_state->owner;
-
/* Owner died? */
if (!pi_state->owner)
newtid |= FUTEX_OWNER_DIED;
@@ -2305,11 +2446,10 @@ static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
* because we can fault here. Imagine swapped out pages or a fork
* that marked all the anonymous memory readonly for cow.
*
- * Modifying pi_state _before_ the user space value would
- * leave the pi_state in an inconsistent state when we fault
- * here, because we need to drop the hash bucket lock to
- * handle the fault. This might be observed in the PID check
- * in lookup_pi_state.
+ * Modifying pi_state _before_ the user space value would leave the
+ * pi_state in an inconsistent state when we fault here, because we
+ * need to drop the locks to handle the fault. This might be observed
+ * in the PID check in lookup_pi_state.
*/
retry:
if (!argowner) {
@@ -2322,7 +2462,7 @@ static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
}

if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) {
- /* We got the lock after all, nothing to fix. */
+ /* We got the lock. pi_state is correct. Tell caller. */
return 1;
}

@@ -2364,24 +2504,29 @@ static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
*/
pi_state_update_owner(pi_state, newowner);

- return 0;
+ return argowner == current;

/*
- * To handle the page fault we need to drop the hash bucket
- * lock here. That gives the other task (either the highest priority
- * waiter itself or the task which stole the rtmutex) the
- * chance to try the fixup of the pi_state. So once we are
- * back from handling the fault we need to check the pi_state
- * after reacquiring the hash bucket lock and before trying to
- * do another fixup. When the fixup has been done already we
- * simply return.
+ * To handle the page fault we need to drop the locks here. That gives
+ * the other task (either the highest priority waiter itself or the
+ * task which stole the rtmutex) the chance to try the fixup of the
+ * pi_state. So once we are back from handling the fault we need to
+ * check the pi_state after reacquiring the locks and before trying to
+ * do another fixup. When the fixup has been done already we simply
+ * return.
+ *
+ * Note: we hold both hb->lock and pi_mutex->wait_lock. We can safely
+ * drop hb->lock since the caller owns the hb -> futex_q relation.
+ * Dropping the pi_mutex->wait_lock requires the state revalidate.
*/
handle_fault:
+ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
spin_unlock(q->lock_ptr);

err = fault_in_user_writeable(uaddr);

spin_lock(q->lock_ptr);
+ raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);

/*
* Check if someone else fixed it for us:
@@ -2447,8 +2592,6 @@ static long futex_wait_restart(struct restart_block *restart);
*/
static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
{
- int ret = 0;
-
if (locked) {
/*
* Got the lock. We might not be the anticipated owner if we
@@ -2459,8 +2602,8 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
* stable state, anything else needs more attention.
*/
if (q->pi_state->owner != current)
- ret = fixup_pi_state_owner(uaddr, q, current);
- goto out;
+ return fixup_pi_state_owner(uaddr, q, current);
+ return 1;
}

/*
@@ -2471,10 +2614,8 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
* Another speculative read; pi_state->owner == current is unstable
* but needs our attention.
*/
- if (q->pi_state->owner == current) {
- ret = fixup_pi_state_owner(uaddr, q, NULL);
- goto out;
- }
+ if (q->pi_state->owner == current)
+ return fixup_pi_state_owner(uaddr, q, NULL);

/*
* Paranoia check. If we did not take the lock, then we should not be
@@ -2483,8 +2624,7 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
if (WARN_ON_ONCE(rt_mutex_owner(&q->pi_state->pi_mutex) == current))
return fixup_pi_state_owner(uaddr, q, current);

-out:
- return ret ? ret : locked;
+ return 0;
}

/**
@@ -3106,6 +3246,11 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
*/
put_pi_state(q.pi_state);
spin_unlock(q.lock_ptr);
+ /*
+ * Adjust the return value. It's either -EFAULT or
+ * success (1) but the caller expects 0 for success.
+ */
+ ret = ret < 0 ? ret : 0;
}
} else {
struct rt_mutex *pi_mutex;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 16ca877745f6..ce49b62b0834 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -5803,7 +5803,6 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
}

if (t->ret_stack == NULL) {
- atomic_set(&t->tracing_graph_pause, 0);
atomic_set(&t->trace_overrun, 0);
t->curr_ret_stack = -1;
/* Make sure the tasks see the -1 first: */
@@ -6015,7 +6014,6 @@ static DEFINE_PER_CPU(struct ftrace_ret_stack *, idle_ret_stack);
static void
graph_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack)
{
- atomic_set(&t->tracing_graph_pause, 0);
atomic_set(&t->trace_overrun, 0);
t->ftrace_timestamp = 0;
/* make curr_ret_stack visible before we add the ret_stack */
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 005929d13c7d..b87ab105fa22 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2090,7 +2090,7 @@ trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
(entry = this_cpu_read(trace_buffered_event))) {
/* Try to use the per cpu buffer first */
val = this_cpu_inc_return(trace_buffered_event_cnt);
- if (val == 1) {
+ if ((len < (PAGE_SIZE - sizeof(*entry))) && val == 1) {
trace_event_setup(entry, type, flags, pc);
entry->array[0] = len;
return entry;
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 5bf072e437c4..1499b2c2799c 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1105,7 +1105,8 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
mutex_lock(&event_mutex);
list_for_each_entry(file, &tr->events, list) {
call = file->event_call;
- if (!trace_event_name(call) || !call->class || !call->class->reg)
+ if ((call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) ||
+ !trace_event_name(call) || !call->class || !call->class->reg)
continue;

if (system && strcmp(call->class->system, system->name) != 0)
diff --git a/lib/string.c b/lib/string.c
index d099762a9bd6..8fe13371aed7 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -157,11 +157,9 @@ EXPORT_SYMBOL(strlcpy);
* @src: Where to copy the string from
* @count: Size of destination buffer
*
- * Copy the string, or as much of it as fits, into the dest buffer.
- * The routine returns the number of characters copied (not including
- * the trailing NUL) or -E2BIG if the destination buffer wasn't big enough.
- * The behavior is undefined if the string buffers overlap.
- * The destination buffer is always NUL terminated, unless it's zero-sized.
+ * Copy the string, or as much of it as fits, into the dest buffer. The
+ * behavior is undefined if the string buffers overlap. The destination
+ * buffer is always NUL terminated, unless it's zero-sized.
*
* Preferred to strlcpy() since the API doesn't require reading memory
* from the src string beyond the specified "count" bytes, and since
@@ -171,8 +169,10 @@ EXPORT_SYMBOL(strlcpy);
*
* Preferred to strncpy() since it always returns a valid string, and
* doesn't unnecessarily force the tail of the destination buffer to be
- * zeroed. If the zeroing is desired, it's likely cleaner to use strscpy()
- * with an overflow test, then just memset() the tail of the dest buffer.
+ * zeroed. If zeroing is desired please use strscpy_pad().
+ *
+ * Return: The number of characters copied (not including the trailing
+ * %NUL) or -E2BIG if the destination buffer wasn't big enough.
*/
ssize_t strscpy(char *dest, const char *src, size_t count)
{
@@ -259,6 +259,39 @@ char *stpcpy(char *__restrict__ dest, const char *__restrict__ src)
}
EXPORT_SYMBOL(stpcpy);

+/**
+ * strscpy_pad() - Copy a C-string into a sized buffer
+ * @dest: Where to copy the string to
+ * @src: Where to copy the string from
+ * @count: Size of destination buffer
+ *
+ * Copy the string, or as much of it as fits, into the dest buffer. The
+ * behavior is undefined if the string buffers overlap. The destination
+ * buffer is always %NUL terminated, unless it's zero-sized.
+ *
+ * If the source string is shorter than the destination buffer, zeros
+ * the tail of the destination buffer.
+ *
+ * For full explanation of why you may want to consider using the
+ * 'strscpy' functions please see the function docstring for strscpy().
+ *
+ * Return: The number of characters copied (not including the trailing
+ * %NUL) or -E2BIG if the destination buffer wasn't big enough.
+ */
+ssize_t strscpy_pad(char *dest, const char *src, size_t count)
+{
+ ssize_t written;
+
+ written = strscpy(dest, src, count);
+ if (written < 0 || written == count - 1)
+ return written;
+
+ memset(dest + written + 1, 0, count - written - 1);
+
+ return written;
+}
+EXPORT_SYMBOL(strscpy_pad);
+
#ifndef __HAVE_ARCH_STRCAT
/**
* strcat - Append one %NUL-terminated string to another
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 113b7d317079..aad61d0175a1 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -21,6 +21,7 @@ struct backing_dev_info noop_backing_dev_info = {
EXPORT_SYMBOL_GPL(noop_backing_dev_info);

static struct class *bdi_class;
+const char *bdi_unknown_name = "(unknown)";

/*
* bdi_lock protects updates to bdi_list. bdi_list has RCU reader side
diff --git a/mm/memblock.c b/mm/memblock.c
index 42b98af6a415..e43065b13c08 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -186,14 +186,6 @@ __memblock_find_range_top_down(phys_addr_t start, phys_addr_t end,
*
* Find @size free area aligned to @align in the specified range and node.
*
- * When allocation direction is bottom-up, the @start should be greater
- * than the end of the kernel image. Otherwise, it will be trimmed. The
- * reason is that we want the bottom-up allocation just near the kernel
- * image so it is highly likely that the allocated memory and the kernel
- * will reside in the same node.
- *
- * If bottom-up allocation failed, will try to allocate memory top-down.
- *
* RETURNS:
* Found address on success, 0 on failure.
*/
@@ -201,8 +193,6 @@ phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size,
phys_addr_t align, phys_addr_t start,
phys_addr_t end, int nid, ulong flags)
{
- phys_addr_t kernel_end, ret;
-
/* pump up @end */
if (end == MEMBLOCK_ALLOC_ACCESSIBLE)
end = memblock.current_limit;
@@ -210,39 +200,13 @@ phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size,
/* avoid allocating the first page */
start = max_t(phys_addr_t, start, PAGE_SIZE);
end = max(start, end);
- kernel_end = __pa_symbol(_end);
-
- /*
- * try bottom-up allocation only when bottom-up mode
- * is set and @end is above the kernel image.
- */
- if (memblock_bottom_up() && end > kernel_end) {
- phys_addr_t bottom_up_start;
-
- /* make sure we will allocate above the kernel */
- bottom_up_start = max(start, kernel_end);

- /* ok, try bottom-up allocation first */
- ret = __memblock_find_range_bottom_up(bottom_up_start, end,
- size, align, nid, flags);
- if (ret)
- return ret;
-
- /*
- * we always limit bottom-up allocation above the kernel,
- * but top-down allocation doesn't have the limit, so
- * retrying top-down allocation may succeed when bottom-up
- * allocation failed.
- *
- * bottom-up allocation is expected to be fail very rarely,
- * so we use WARN_ONCE() here to see the stack trace if
- * fail happens.
- */
- WARN_ONCE(1, "memblock: bottom-up allocation failed, memory hotunplug may be affected\n");
- }
-
- return __memblock_find_range_top_down(start, end, size, align, nid,
- flags);
+ if (memblock_bottom_up())
+ return __memblock_find_range_bottom_up(start, end, size, align,
+ nid, flags);
+ else
+ return __memblock_find_range_top_down(start, end, size, align,
+ nid, flags);
}

/**
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d4232744c59f..27b0b4f03fcd 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1638,9 +1638,13 @@ bool mem_cgroup_oom_synchronize(bool handle)
* @page: the page
*
* This function protects unlocked LRU pages from being moved to
- * another cgroup and stabilizes their page->mem_cgroup binding.
+ * another cgroup.
+ *
+ * It ensures lifetime of the returned memcg. Caller is responsible
+ * for the lifetime of the page; __unlock_page_memcg() is available
+ * when @page might get freed inside the locked section.
*/
-void lock_page_memcg(struct page *page)
+struct mem_cgroup *lock_page_memcg(struct page *page)
{
struct mem_cgroup *memcg;
unsigned long flags;
@@ -1649,18 +1653,24 @@ void lock_page_memcg(struct page *page)
* The RCU lock is held throughout the transaction. The fast
* path can get away without acquiring the memcg->move_lock
* because page moving starts with an RCU grace period.
- */
+ *
+ * The RCU lock also protects the memcg from being freed when
+ * the page state that is going to change is the only thing
+ * preventing the page itself from being freed. E.g. writeback
+ * doesn't hold a page reference and relies on PG_writeback to
+ * keep off truncation, migration and so forth.
+ */
rcu_read_lock();

if (mem_cgroup_disabled())
- return;
+ return NULL;
again:
memcg = page->mem_cgroup;
if (unlikely(!memcg))
- return;
+ return NULL;

if (atomic_read(&memcg->moving_account) <= 0)
- return;
+ return memcg;

spin_lock_irqsave(&memcg->move_lock, flags);
if (memcg != page->mem_cgroup) {
@@ -1676,18 +1686,18 @@ void lock_page_memcg(struct page *page)
memcg->move_lock_task = current;
memcg->move_lock_flags = flags;

- return;
+ return memcg;
}
EXPORT_SYMBOL(lock_page_memcg);

/**
- * unlock_page_memcg - unlock a page->mem_cgroup binding
- * @page: the page
+ * __unlock_page_memcg - unlock and unpin a memcg
+ * @memcg: the memcg
+ *
+ * Unlock and unpin a memcg returned by lock_page_memcg().
*/
-void unlock_page_memcg(struct page *page)
+void __unlock_page_memcg(struct mem_cgroup *memcg)
{
- struct mem_cgroup *memcg = page->mem_cgroup;
-
if (memcg && memcg->move_lock_task == current) {
unsigned long flags = memcg->move_lock_flags;

@@ -1699,6 +1709,15 @@ void unlock_page_memcg(struct page *page)

rcu_read_unlock();
}
+
+/**
+ * unlock_page_memcg - unlock a page->mem_cgroup binding
+ * @page: the page
+ */
+void unlock_page_memcg(struct page *page)
+{
+ __unlock_page_memcg(page->mem_cgroup);
+}
EXPORT_SYMBOL(unlock_page_memcg);

/*
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 462c778b9fb5..498c924f2fcd 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2717,9 +2717,10 @@ EXPORT_SYMBOL(clear_page_dirty_for_io);
int test_clear_page_writeback(struct page *page)
{
struct address_space *mapping = page_mapping(page);
+ struct mem_cgroup *memcg;
int ret;

- lock_page_memcg(page);
+ memcg = lock_page_memcg(page);
if (mapping && mapping_use_writeback_tags(mapping)) {
struct inode *inode = mapping->host;
struct backing_dev_info *bdi = inode_to_bdi(inode);
@@ -2747,13 +2748,20 @@ int test_clear_page_writeback(struct page *page)
} else {
ret = TestClearPageWriteback(page);
}
+ /*
+ * NOTE: Page might be free now! Writeback doesn't hold a page
+ * reference on its own, it relies on truncation to wait for
+ * the clearing of PG_writeback. The below can only access
+ * page state that is static across allocation cycles.
+ */
if (ret) {
- mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_WRITEBACK);
+ __mem_cgroup_update_page_stat(page, memcg,
+ MEM_CGROUP_STAT_WRITEBACK, -1);
dec_node_page_state(page, NR_WRITEBACK);
dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
inc_node_page_state(page, NR_WRITTEN);
}
- unlock_page_memcg(page);
+ __unlock_page_memcg(memcg);
return ret;
}

diff --git a/net/key/af_key.c b/net/key/af_key.c
index 76a008b1cbe5..adc93329e6aa 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -2933,7 +2933,7 @@ static int count_ah_combs(const struct xfrm_tmpl *t)
break;
if (!aalg->pfkey_supported)
continue;
- if (aalg_tmpl_set(t, aalg) && aalg->available)
+ if (aalg_tmpl_set(t, aalg))
sz += sizeof(struct sadb_comb);
}
return sz + sizeof(struct sadb_prop);
@@ -2951,7 +2951,7 @@ static int count_esp_combs(const struct xfrm_tmpl *t)
if (!ealg->pfkey_supported)
continue;

- if (!(ealg_tmpl_set(t, ealg) && ealg->available))
+ if (!(ealg_tmpl_set(t, ealg)))
continue;

for (k = 1; ; k++) {
@@ -2962,7 +2962,7 @@ static int count_esp_combs(const struct xfrm_tmpl *t)
if (!aalg->pfkey_supported)
continue;

- if (aalg_tmpl_set(t, aalg) && aalg->available)
+ if (aalg_tmpl_set(t, aalg))
sz += sizeof(struct sadb_comb);
}
}
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index d507d0fc7858..ddd90a3820d3 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -903,7 +903,8 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
* Let nf_ct_resolve_clash() deal with this later.
*/
if (nf_ct_tuple_equal(&ignored_conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
- &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple))
+ &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple) &&
+ nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL))
continue;

NF_CT_STAT_INC_ATOMIC(net, found);
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 79d7ad621a80..03c8bd854e56 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -155,7 +155,8 @@ static void recent_entry_remove(struct recent_table *t, struct recent_entry *e)
/*
* Drop entries with timestamps older then 'time'.
*/
-static void recent_entry_reap(struct recent_table *t, unsigned long time)
+static void recent_entry_reap(struct recent_table *t, unsigned long time,
+ struct recent_entry *working, bool update)
{
struct recent_entry *e;

@@ -164,6 +165,12 @@ static void recent_entry_reap(struct recent_table *t, unsigned long time)
*/
e = list_entry(t->lru_list.next, struct recent_entry, lru_list);

+ /*
+ * Do not reap the entry which are going to be updated.
+ */
+ if (e == working && update)
+ return;
+
/*
* The last time stamp is the most recent.
*/
@@ -306,7 +313,8 @@ recent_mt(const struct sk_buff *skb, struct xt_action_param *par)

/* info->seconds must be non-zero */
if (info->check_set & XT_RECENT_REAP)
- recent_entry_reap(t, time);
+ recent_entry_reap(t, time, e,
+ info->check_set & XT_RECENT_UPDATE && ret);
}

if (info->check_set & XT_RECENT_SET ||
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 591d378d1a18..1d00f49dfe48 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -53,6 +53,7 @@
#include <asm/uaccess.h>
#include <linux/hashtable.h>

+#include "auth_gss_internal.h"
#include "../netns.h"

static const struct rpc_authops authgss_ops;
@@ -147,35 +148,6 @@ gss_cred_set_ctx(struct rpc_cred *cred, struct gss_cl_ctx *ctx)
clear_bit(RPCAUTH_CRED_NEW, &cred->cr_flags);
}

-static const void *
-simple_get_bytes(const void *p, const void *end, void *res, size_t len)
-{
- const void *q = (const void *)((const char *)p + len);
- if (unlikely(q > end || q < p))
- return ERR_PTR(-EFAULT);
- memcpy(res, p, len);
- return q;
-}
-
-static inline const void *
-simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest)
-{
- const void *q;
- unsigned int len;
-
- p = simple_get_bytes(p, end, &len, sizeof(len));
- if (IS_ERR(p))
- return p;
- q = (const void *)((const char *)p + len);
- if (unlikely(q > end || q < p))
- return ERR_PTR(-EFAULT);
- dest->data = kmemdup(p, len, GFP_NOFS);
- if (unlikely(dest->data == NULL))
- return ERR_PTR(-ENOMEM);
- dest->len = len;
- return q;
-}
-
static struct gss_cl_ctx *
gss_cred_get_ctx(struct rpc_cred *cred)
{
diff --git a/net/sunrpc/auth_gss/auth_gss_internal.h b/net/sunrpc/auth_gss/auth_gss_internal.h
new file mode 100644
index 000000000000..f6d9631bd9d0
--- /dev/null
+++ b/net/sunrpc/auth_gss/auth_gss_internal.h
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: BSD-3-Clause
+/*
+ * linux/net/sunrpc/auth_gss/auth_gss_internal.h
+ *
+ * Internal definitions for RPCSEC_GSS client authentication
+ *
+ * Copyright (c) 2000 The Regents of the University of Michigan.
+ * All rights reserved.
+ *
+ */
+#include <linux/err.h>
+#include <linux/string.h>
+#include <linux/sunrpc/xdr.h>
+
+static inline const void *
+simple_get_bytes(const void *p, const void *end, void *res, size_t len)
+{
+ const void *q = (const void *)((const char *)p + len);
+ if (unlikely(q > end || q < p))
+ return ERR_PTR(-EFAULT);
+ memcpy(res, p, len);
+ return q;
+}
+
+static inline const void *
+simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest)
+{
+ const void *q;
+ unsigned int len;
+
+ p = simple_get_bytes(p, end, &len, sizeof(len));
+ if (IS_ERR(p))
+ return p;
+ q = (const void *)((const char *)p + len);
+ if (unlikely(q > end || q < p))
+ return ERR_PTR(-EFAULT);
+ if (len) {
+ dest->data = kmemdup(p, len, GFP_NOFS);
+ if (unlikely(dest->data == NULL))
+ return ERR_PTR(-ENOMEM);
+ } else
+ dest->data = NULL;
+ dest->len = len;
+ return q;
+}
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 60595835317a..ea2f6022b3d5 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -46,6 +46,8 @@
#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/gss_krb5_enctypes.h>

+#include "auth_gss_internal.h"
+
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
@@ -187,35 +189,6 @@ get_gss_krb5_enctype(int etype)
return NULL;
}

-static const void *
-simple_get_bytes(const void *p, const void *end, void *res, int len)
-{
- const void *q = (const void *)((const char *)p + len);
- if (unlikely(q > end || q < p))
- return ERR_PTR(-EFAULT);
- memcpy(res, p, len);
- return q;
-}
-
-static const void *
-simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res)
-{
- const void *q;
- unsigned int len;
-
- p = simple_get_bytes(p, end, &len, sizeof(len));
- if (IS_ERR(p))
- return p;
- q = (const void *)((const char *)p + len);
- if (unlikely(q > end || q < p))
- return ERR_PTR(-EFAULT);
- res->data = kmemdup(p, len, GFP_NOFS);
- if (unlikely(res->data == NULL))
- return ERR_PTR(-ENOMEM);
- res->len = len;
- return q;
-}
-
static inline const void *
get_key(const void *p, const void *end,
struct krb5_ctx *ctx, struct crypto_skcipher **res)
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 3a2543b9701a..bd3a5ef8e59b 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -830,10 +830,12 @@ static int vsock_shutdown(struct socket *sock, int mode)
*/

sk = sock->sk;
+
+ lock_sock(sk);
if (sock->state == SS_UNCONNECTED) {
err = -ENOTCONN;
if (sk->sk_type == SOCK_STREAM)
- return err;
+ goto out;
} else {
sock->state = SS_DISCONNECTING;
err = 0;
@@ -842,10 +844,8 @@ static int vsock_shutdown(struct socket *sock, int mode)
/* Receive and send shutdowns are treated alike. */
mode = mode & (RCV_SHUTDOWN | SEND_SHUTDOWN);
if (mode) {
- lock_sock(sk);
sk->sk_shutdown |= mode;
sk->sk_state_change(sk);
- release_sock(sk);

if (sk->sk_type == SOCK_STREAM) {
sock_reset_flag(sk, SOCK_DONE);
@@ -853,6 +853,8 @@ static int vsock_shutdown(struct socket *sock, int mode)
}
}

+out:
+ release_sock(sk);
return err;
}

@@ -1121,7 +1123,6 @@ static void vsock_connect_timeout(struct work_struct *work)
{
struct sock *sk;
struct vsock_sock *vsk;
- int cancel = 0;

vsk = container_of(work, struct vsock_sock, connect_work.work);
sk = sk_vsock(vsk);
@@ -1132,11 +1133,9 @@ static void vsock_connect_timeout(struct work_struct *work)
sk->sk_state = SS_UNCONNECTED;
sk->sk_err = ETIMEDOUT;
sk->sk_error_report(sk);
- cancel = 1;
+ vsock_transport_cancel_pkt(vsk);
}
release_sock(sk);
- if (cancel)
- vsock_transport_cancel_pkt(vsk);

sock_put(sk);
}
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index aa9d1c7780c3..5f7bcc7da460 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -959,10 +959,10 @@ void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt)

vsk = vsock_sk(sk);

- space_available = virtio_transport_space_update(sk, pkt);
-
lock_sock(sk);

+ space_available = virtio_transport_space_update(sk, pkt);
+
/* Update CID in case it has changed after a transport reset event */
vsk->local_addr.svm_cid = dst.svm_cid;

diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 6228a83156ea..f8ee4e33a085 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -222,6 +222,8 @@ cmd_modversions_c = \
endif

ifdef CONFIG_FTRACE_MCOUNT_RECORD
+ifndef CC_USING_RECORD_MCOUNT
+# compiler will not generate __mcount_loc use recordmcount or recordmcount.pl
ifdef BUILD_C_RECORDMCOUNT
ifeq ("$(origin RECORDMCOUNT_WARN)", "command line")
RECORDMCOUNT_FLAGS = -w
@@ -250,6 +252,7 @@ cmd_record_mcount = \
"$(CC_FLAGS_FTRACE)" ]; then \
$(sub_cmd_record_mcount) \
fi;
+endif # CC_USING_RECORD_MCOUNT
endif

ifdef CONFIG_STACK_VALIDATION
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 5bddabb3de7c..db859b595dba 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -382,9 +382,8 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
*/
kvm->mmu_notifier_count++;
need_tlb_flush = kvm_unmap_hva_range(kvm, start, end);
- need_tlb_flush |= kvm->tlbs_dirty;
/* we've to flush the tlb before the pages can be freed */
- if (need_tlb_flush)
+ if (need_tlb_flush || kvm->tlbs_dirty)
kvm_flush_remote_tlbs(kvm);

spin_unlock(&kvm->mmu_lock);