Re: Linux 4.12.1

From: Greg KH
Date: Wed Jul 12 2017 - 11:00:35 EST


diff --git a/Makefile b/Makefile
index 283c6236438e..1286f8cc7b5b 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
VERSION = 4
PATCHLEVEL = 12
-SUBLEVEL = 0
+SUBLEVEL = 1
EXTRAVERSION =
NAME = Fearless Coyote

diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 445f30a2c5ef..0c21747ed7e0 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -261,6 +261,7 @@ static u64 pnv_deepest_stop_psscr_val;
static u64 pnv_deepest_stop_psscr_mask;
static bool deepest_stop_found;

+#ifdef CONFIG_HOTPLUG_CPU
/*
* pnv_cpu_offline: A function that puts the CPU into the deepest
* available platform idle state on a CPU-Offline.
@@ -293,6 +294,7 @@ unsigned long pnv_cpu_offline(unsigned int cpu)

return srr1;
}
+#endif

/*
* Power ISA 3.0 idle initialization.
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index c5959576c315..020f75cc8cf6 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -37,7 +37,7 @@ ENTRY(copy_user_generic_unrolled)
movl %edx,%ecx
andl $63,%edx
shrl $6,%ecx
- jz 17f
+ jz .L_copy_short_string
1: movq (%rsi),%r8
2: movq 1*8(%rsi),%r9
3: movq 2*8(%rsi),%r10
@@ -58,7 +58,8 @@ ENTRY(copy_user_generic_unrolled)
leaq 64(%rdi),%rdi
decl %ecx
jnz 1b
-17: movl %edx,%ecx
+.L_copy_short_string:
+ movl %edx,%ecx
andl $7,%edx
shrl $3,%ecx
jz 20f
@@ -174,6 +175,8 @@ EXPORT_SYMBOL(copy_user_generic_string)
*/
ENTRY(copy_user_enhanced_fast_string)
ASM_STAC
+ cmpl $64,%edx
+ jb .L_copy_short_string /* less then 64 bytes, avoid the costly 'rep' */
movl %edx,%ecx
1: rep
movsb
diff --git a/crypto/drbg.c b/crypto/drbg.c
index cdb27ac4b226..633a88e93ab0 100644
--- a/crypto/drbg.c
+++ b/crypto/drbg.c
@@ -1691,6 +1691,7 @@ static int drbg_init_sym_kernel(struct drbg_state *drbg)
return PTR_ERR(sk_tfm);
}
drbg->ctr_handle = sk_tfm;
+ init_completion(&drbg->ctr_completion);

req = skcipher_request_alloc(sk_tfm, GFP_KERNEL);
if (!req) {
diff --git a/drivers/base/core.c b/drivers/base/core.c
index bbecaf9293be..d3228cb7d12f 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -2667,7 +2667,11 @@ void device_shutdown(void)
pm_runtime_get_noresume(dev);
pm_runtime_barrier(dev);

- if (dev->bus && dev->bus->shutdown) {
+ if (dev->class && dev->class->shutdown) {
+ if (initcall_debug)
+ dev_info(dev, "shutdown\n");
+ dev->class->shutdown(dev);
+ } else if (dev->bus && dev->bus->shutdown) {
if (initcall_debug)
dev_info(dev, "shutdown\n");
dev->bus->shutdown(dev);
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index a102152301c8..97332d094fe2 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -866,7 +866,7 @@ static ssize_t driver_override_store(struct device *dev,
const char *buf, size_t count)
{
struct platform_device *pdev = to_platform_device(dev);
- char *driver_override, *old = pdev->driver_override, *cp;
+ char *driver_override, *old, *cp;

if (count > PATH_MAX)
return -EINVAL;
@@ -879,12 +879,15 @@ static ssize_t driver_override_store(struct device *dev,
if (cp)
*cp = '\0';

+ device_lock(dev);
+ old = pdev->driver_override;
if (strlen(driver_override)) {
pdev->driver_override = driver_override;
} else {
kfree(driver_override);
pdev->driver_override = NULL;
}
+ device_unlock(dev);

kfree(old);

@@ -895,8 +898,12 @@ static ssize_t driver_override_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct platform_device *pdev = to_platform_device(dev);
+ ssize_t len;

- return sprintf(buf, "%s\n", pdev->driver_override);
+ device_lock(dev);
+ len = sprintf(buf, "%s\n", pdev->driver_override);
+ device_unlock(dev);
+ return len;
}
static DEVICE_ATTR_RW(driver_override);

diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c
index 322b8a51ffc6..67ec9d3d04f5 100644
--- a/drivers/char/tpm/tpm-chip.c
+++ b/drivers/char/tpm/tpm-chip.c
@@ -143,6 +143,39 @@ static void tpm_devs_release(struct device *dev)
}

/**
+ * tpm_class_shutdown() - prepare the TPM device for loss of power.
+ * @dev: device to which the chip is associated.
+ *
+ * Issues a TPM2_Shutdown command prior to loss of power, as required by the
+ * TPM 2.0 spec.
+ * Then, calls bus- and device- specific shutdown code.
+ *
+ * XXX: This codepath relies on the fact that sysfs is not enabled for
+ * TPM2: sysfs uses an implicit lock on chip->ops, so this could race if TPM2
+ * has sysfs support enabled before TPM sysfs's implicit locking is fixed.
+ */
+static int tpm_class_shutdown(struct device *dev)
+{
+ struct tpm_chip *chip = container_of(dev, struct tpm_chip, dev);
+
+ if (chip->flags & TPM_CHIP_FLAG_TPM2) {
+ down_write(&chip->ops_sem);
+ tpm2_shutdown(chip, TPM2_SU_CLEAR);
+ chip->ops = NULL;
+ up_write(&chip->ops_sem);
+ }
+ /* Allow bus- and device-specific code to run. Note: since chip->ops
+ * is NULL, more-specific shutdown code will not be able to issue TPM
+ * commands.
+ */
+ if (dev->bus && dev->bus->shutdown)
+ dev->bus->shutdown(dev);
+ else if (dev->driver && dev->driver->shutdown)
+ dev->driver->shutdown(dev);
+ return 0;
+}
+
+/**
* tpm_chip_alloc() - allocate a new struct tpm_chip instance
* @pdev: device to which the chip is associated
* At this point pdev mst be initialized, but does not have to
@@ -181,6 +214,7 @@ struct tpm_chip *tpm_chip_alloc(struct device *pdev,
device_initialize(&chip->devs);

chip->dev.class = tpm_class;
+ chip->dev.class->shutdown = tpm_class_shutdown;
chip->dev.release = tpm_dev_release;
chip->dev.parent = pdev;
chip->dev.groups = chip->groups;
diff --git a/drivers/char/tpm/tpm-sysfs.c b/drivers/char/tpm/tpm-sysfs.c
index 55405dbe43fa..c6fa9bef11cf 100644
--- a/drivers/char/tpm/tpm-sysfs.c
+++ b/drivers/char/tpm/tpm-sysfs.c
@@ -36,9 +36,10 @@ static ssize_t pubek_show(struct device *dev, struct device_attribute *attr,
ssize_t err;
int i, rc;
char *str = buf;
-
struct tpm_chip *chip = to_tpm_chip(dev);

+ memset(&tpm_cmd, 0, sizeof(tpm_cmd));
+
tpm_cmd.header.in = tpm_readpubek_header;
err = tpm_transmit_cmd(chip, NULL, &tpm_cmd, READ_PUBEK_RESULT_SIZE,
READ_PUBEK_RESULT_MIN_BODY_SIZE, 0,
@@ -294,6 +295,9 @@ static const struct attribute_group tpm_dev_group = {

void tpm_sysfs_add_device(struct tpm_chip *chip)
{
+ /* XXX: If you wish to remove this restriction, you must first update
+ * tpm_sysfs to explicitly lock chip->ops.
+ */
if (chip->flags & TPM_CHIP_FLAG_TPM2)
return;

diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 70b7fb156414..e63f2a13c5e1 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1931,6 +1931,11 @@ static int modify_qp(struct ib_uverbs_file *file,
goto out;
}

+ if (!rdma_is_port_valid(qp->device, cmd->base.port_num)) {
+ ret = -EINVAL;
+ goto release_qp;
+ }
+
attr->qp_state = cmd->base.qp_state;
attr->cur_qp_state = cmd->base.cur_qp_state;
attr->path_mtu = cmd->base.path_mtu;
@@ -2541,6 +2546,9 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;

+ if (!rdma_is_port_valid(ib_dev, cmd.attr.port_num))
+ return -EINVAL;
+
INIT_UDATA(&udata, buf + sizeof(cmd),
(unsigned long)cmd.response + sizeof(resp),
in_len - sizeof(cmd), out_len - sizeof(resp));
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index 96b21b0dac1e..3116edfcdc18 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -223,6 +223,10 @@ static const struct usb_device_id usb_quirk_list[] = {
/* Blackmagic Design UltraStudio SDI */
{ USB_DEVICE(0x1edb, 0xbd4f), .driver_info = USB_QUIRK_NO_LPM },

+ /* Hauppauge HVR-950q */
+ { USB_DEVICE(0x2040, 0x7200), .driver_info =
+ USB_QUIRK_CONFIG_INTF_STRINGS },
+
/* INTEL VALUE SSD */
{ USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME },

diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index 28b053cacc90..62e1906bb2f3 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -416,6 +416,8 @@ static void usb_release_dev(struct device *dev)

usb_destroy_configuration(udev);
usb_release_bos_descriptor(udev);
+ if (udev->parent)
+ of_node_put(dev->of_node);
usb_put_hcd(hcd);
kfree(udev->product);
kfree(udev->manufacturer);
diff --git a/drivers/usb/dwc3/dwc3-st.c b/drivers/usb/dwc3/dwc3-st.c
index dfbf464eb88c..505676fd3ba4 100644
--- a/drivers/usb/dwc3/dwc3-st.c
+++ b/drivers/usb/dwc3/dwc3-st.c
@@ -230,7 +230,7 @@ static int st_dwc3_probe(struct platform_device *pdev)

dwc3_data->syscfg_reg_off = res->start;

- dev_vdbg(&pdev->dev, "glue-logic addr 0x%p, syscfg-reg offset 0x%x\n",
+ dev_vdbg(&pdev->dev, "glue-logic addr 0x%pK, syscfg-reg offset 0x%x\n",
dwc3_data->glue_base, dwc3_data->syscfg_reg_off);

dwc3_data->rstc_pwrdn =
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index aea9a5b948b4..58d20e3decb5 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -1215,12 +1215,9 @@ static int __dwc3_gadget_ep_queue(struct dwc3_ep *dep, struct dwc3_request *req)
return -ESHUTDOWN;
}

- if (WARN(req->dep != dep, "request %p belongs to '%s'\n",
- &req->request, req->dep->name)) {
- dev_err(dwc->dev, "%s: request %p belongs to '%s'\n",
- dep->name, &req->request, req->dep->name);
+ if (WARN(req->dep != dep, "request %pK belongs to '%s'\n",
+ &req->request, req->dep->name))
return -EINVAL;
- }

pm_runtime_get(dwc->dev);

@@ -1396,7 +1393,7 @@ static int dwc3_gadget_ep_dequeue(struct usb_ep *ep,
}
goto out1;
}
- dev_err(dwc->dev, "request %p was not queued to %s\n",
+ dev_err(dwc->dev, "request %pK was not queued to %s\n",
request, ep->name);
ret = -EINVAL;
goto out0;
diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c
index 0dde49c35dd2..1adae9eab831 100644
--- a/drivers/usb/host/xhci-hub.c
+++ b/drivers/usb/host/xhci-hub.c
@@ -1461,6 +1461,9 @@ int xhci_bus_suspend(struct usb_hcd *hcd)
t2 |= PORT_WKOC_E | PORT_WKCONN_E;
t2 &= ~PORT_WKDISC_E;
}
+ if ((xhci->quirks & XHCI_U2_DISABLE_WAKE) &&
+ (hcd->speed < HCD_USB3))
+ t2 &= ~PORT_WAKE_BITS;
} else
t2 &= ~PORT_WAKE_BITS;

diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 1bcf971141c0..0965bae95a7b 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -54,6 +54,11 @@
#define PCI_DEVICE_ID_INTEL_APL_XHCI 0x5aa8
#define PCI_DEVICE_ID_INTEL_DNV_XHCI 0x19d0

+#define PCI_DEVICE_ID_AMD_PROMONTORYA_4 0x43b9
+#define PCI_DEVICE_ID_AMD_PROMONTORYA_3 0x43ba
+#define PCI_DEVICE_ID_AMD_PROMONTORYA_2 0x43bb
+#define PCI_DEVICE_ID_AMD_PROMONTORYA_1 0x43bc
+
static const char hcd_name[] = "xhci_hcd";

static struct hc_driver __read_mostly xhci_pci_hc_driver;
@@ -135,6 +140,13 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
if (pdev->vendor == PCI_VENDOR_ID_AMD)
xhci->quirks |= XHCI_TRUST_TX_LENGTH;

+ if ((pdev->vendor == PCI_VENDOR_ID_AMD) &&
+ ((pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_4) ||
+ (pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_3) ||
+ (pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_2) ||
+ (pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_1)))
+ xhci->quirks |= XHCI_U2_DISABLE_WAKE;
+
if (pdev->vendor == PCI_VENDOR_ID_INTEL) {
xhci->quirks |= XHCI_LPM_SUPPORT;
xhci->quirks |= XHCI_INTEL_HOST;
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index 73a28a986d5e..dcd9649808c0 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1819,6 +1819,7 @@ struct xhci_hcd {
/* For controller with a broken Port Disable implementation */
#define XHCI_BROKEN_PORT_PED (1 << 25)
#define XHCI_LIMIT_ENDPOINT_INTERVAL_7 (1 << 26)
+#define XHCI_U2_DISABLE_WAKE (1 << 27)

unsigned int num_active_eps;
unsigned int limit_active_eps;
diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index 0c55e7f64269..f64e914a8985 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -141,6 +141,7 @@ static const struct usb_device_id id_table[] = {
{ USB_DEVICE(0x10C4, 0x8977) }, /* CEL MeshWorks DevKit Device */
{ USB_DEVICE(0x10C4, 0x8998) }, /* KCF Technologies PRN */
{ USB_DEVICE(0x10C4, 0x8A2A) }, /* HubZ dual ZigBee and Z-Wave dongle */
+ { USB_DEVICE(0x10C4, 0x8A5E) }, /* CEL EM3588 ZigBee USB Stick Long Range */
{ USB_DEVICE(0x10C4, 0xEA60) }, /* Silicon Labs factory default */
{ USB_DEVICE(0x10C4, 0xEA61) }, /* Silicon Labs factory default */
{ USB_DEVICE(0x10C4, 0xEA70) }, /* Silicon Labs factory default */
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 3bf61acfc26b..ebe51f11105d 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -1877,6 +1877,10 @@ static const struct usb_device_id option_ids[] = {
.driver_info = (kernel_ulong_t)&four_g_w100_blacklist
},
{ USB_DEVICE_INTERFACE_CLASS(LONGCHEER_VENDOR_ID, SPEEDUP_PRODUCT_SU9800, 0xff) },
+ { USB_DEVICE_INTERFACE_CLASS(LONGCHEER_VENDOR_ID, 0x9801, 0xff),
+ .driver_info = (kernel_ulong_t)&net_intf3_blacklist },
+ { USB_DEVICE_INTERFACE_CLASS(LONGCHEER_VENDOR_ID, 0x9803, 0xff),
+ .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
{ USB_DEVICE(LONGCHEER_VENDOR_ID, ZOOM_PRODUCT_4597) },
{ USB_DEVICE(LONGCHEER_VENDOR_ID, IBALL_3_5G_CONNECT) },
{ USB_DEVICE(HAIER_VENDOR_ID, HAIER_PRODUCT_CE100) },
diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c
index fd509ed6cf70..652b4334b26d 100644
--- a/drivers/usb/serial/qcserial.c
+++ b/drivers/usb/serial/qcserial.c
@@ -158,6 +158,7 @@ static const struct usb_device_id id_table[] = {
{DEVICE_SWI(0x1199, 0x9056)}, /* Sierra Wireless Modem */
{DEVICE_SWI(0x1199, 0x9060)}, /* Sierra Wireless Modem */
{DEVICE_SWI(0x1199, 0x9061)}, /* Sierra Wireless Modem */
+ {DEVICE_SWI(0x1199, 0x9063)}, /* Sierra Wireless EM7305 */
{DEVICE_SWI(0x1199, 0x9070)}, /* Sierra Wireless MC74xx */
{DEVICE_SWI(0x1199, 0x9071)}, /* Sierra Wireless MC74xx */
{DEVICE_SWI(0x1199, 0x9078)}, /* Sierra Wireless EM74xx */
diff --git a/drivers/usb/usbip/stub_main.c b/drivers/usb/usbip/stub_main.c
index 44ab43fc4fcc..af10f7b131a4 100644
--- a/drivers/usb/usbip/stub_main.c
+++ b/drivers/usb/usbip/stub_main.c
@@ -262,7 +262,11 @@ void stub_device_cleanup_urbs(struct stub_device *sdev)
kmem_cache_free(stub_priv_cache, priv);

kfree(urb->transfer_buffer);
+ urb->transfer_buffer = NULL;
+
kfree(urb->setup_packet);
+ urb->setup_packet = NULL;
+
usb_free_urb(urb);
}
}
diff --git a/drivers/usb/usbip/stub_tx.c b/drivers/usb/usbip/stub_tx.c
index 6b1e8c3f0e4b..be50cef645d8 100644
--- a/drivers/usb/usbip/stub_tx.c
+++ b/drivers/usb/usbip/stub_tx.c
@@ -28,7 +28,11 @@ static void stub_free_priv_and_urb(struct stub_priv *priv)
struct urb *urb = priv->urb;

kfree(urb->setup_packet);
+ urb->setup_packet = NULL;
+
kfree(urb->transfer_buffer);
+ urb->transfer_buffer = NULL;
+
list_del(&priv->list);
kmem_cache_free(stub_priv_cache, priv);
usb_free_urb(urb);
diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c
index 856ada5d39c9..5b081a01779d 100644
--- a/drivers/xen/xenbus/xenbus_comms.c
+++ b/drivers/xen/xenbus/xenbus_comms.c
@@ -299,17 +299,7 @@ static int process_msg(void)
mutex_lock(&xb_write_mutex);
list_for_each_entry(req, &xs_reply_list, list) {
if (req->msg.req_id == state.msg.req_id) {
- if (req->state == xb_req_state_wait_reply) {
- req->msg.type = state.msg.type;
- req->msg.len = state.msg.len;
- req->body = state.body;
- req->state = xb_req_state_got_reply;
- list_del(&req->list);
- req->cb(req);
- } else {
- list_del(&req->list);
- kfree(req);
- }
+ list_del(&req->list);
err = 0;
break;
}
@@ -317,6 +307,15 @@ static int process_msg(void)
mutex_unlock(&xb_write_mutex);
if (err)
goto out;
+
+ if (req->state == xb_req_state_wait_reply) {
+ req->msg.type = state.msg.type;
+ req->msg.len = state.msg.len;
+ req->body = state.body;
+ req->state = xb_req_state_got_reply;
+ req->cb(req);
+ } else
+ kfree(req);
}

mutex_unlock(&xs_response_mutex);
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 959a19ced4d5..dd58a2a95bb6 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -80,9 +80,9 @@ static struct rhashtable_params ht_parms = {

static struct rhashtable gl_hash_table;

-void gfs2_glock_free(struct gfs2_glock *gl)
+static void gfs2_glock_dealloc(struct rcu_head *rcu)
{
- struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
+ struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu);

if (gl->gl_ops->go_flags & GLOF_ASPACE) {
kmem_cache_free(gfs2_glock_aspace_cachep, gl);
@@ -90,6 +90,13 @@ void gfs2_glock_free(struct gfs2_glock *gl)
kfree(gl->gl_lksb.sb_lvbptr);
kmem_cache_free(gfs2_glock_cachep, gl);
}
+}
+
+void gfs2_glock_free(struct gfs2_glock *gl)
+{
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
+
+ call_rcu(&gl->gl_rcu, gfs2_glock_dealloc);
if (atomic_dec_and_test(&sdp->sd_glock_disposal))
wake_up(&sdp->sd_glock_wait);
}
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index b7cf65d13561..7550975c39ab 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -374,6 +374,7 @@ struct gfs2_glock {
loff_t end;
} gl_vm;
};
+ struct rcu_head gl_rcu;
struct rhash_head gl_node;
};

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 2404ad238c0b..a21b1fb9a968 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -236,6 +236,23 @@ unsigned int cpumask_local_spread(unsigned int i, int node);
(cpu) = cpumask_next_zero((cpu), (mask)), \
(cpu) < nr_cpu_ids;)

+extern int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap);
+
+/**
+ * for_each_cpu_wrap - iterate over every cpu in a mask, starting at a specified location
+ * @cpu: the (optionally unsigned) integer iterator
+ * @mask: the cpumask poiter
+ * @start: the start location
+ *
+ * The implementation does not assume any bit in @mask is set (including @start).
+ *
+ * After the loop, cpu is >= nr_cpu_ids.
+ */
+#define for_each_cpu_wrap(cpu, mask, start) \
+ for ((cpu) = cpumask_next_wrap((start)-1, (mask), (start), false); \
+ (cpu) < nr_cpumask_bits; \
+ (cpu) = cpumask_next_wrap((cpu), (mask), (start), true))
+
/**
* for_each_cpu_and - iterate over every cpu in both masks
* @cpu: the (optionally unsigned) integer iterator
diff --git a/include/linux/device.h b/include/linux/device.h
index 9ef518af5515..f240baac2001 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -378,6 +378,7 @@ int subsys_virtual_register(struct bus_type *subsys,
* @suspend: Used to put the device to sleep mode, usually to a low power
* state.
* @resume: Used to bring the device from the sleep mode.
+ * @shutdown: Called at shut-down time to quiesce the device.
* @ns_type: Callbacks so sysfs can detemine namespaces.
* @namespace: Namespace of the device belongs to this class.
* @pm: The default device power management operations of this class.
@@ -407,6 +408,7 @@ struct class {

int (*suspend)(struct device *dev, pm_message_t state);
int (*resume)(struct device *dev);
+ int (*shutdown)(struct device *dev);

const struct kobj_ns_type_operations *ns_type;
const void *(*namespace)(struct device *dev);
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index 50398b69ca44..a1f03ebfde47 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -565,9 +565,9 @@ extern void usb_ep0_reinit(struct usb_device *);
((USB_DIR_IN|USB_TYPE_STANDARD|USB_RECIP_INTERFACE)<<8)

#define EndpointRequest \
- ((USB_DIR_IN|USB_TYPE_STANDARD|USB_RECIP_INTERFACE)<<8)
+ ((USB_DIR_IN|USB_TYPE_STANDARD|USB_RECIP_ENDPOINT)<<8)
#define EndpointOutRequest \
- ((USB_DIR_OUT|USB_TYPE_STANDARD|USB_RECIP_INTERFACE)<<8)
+ ((USB_DIR_OUT|USB_TYPE_STANDARD|USB_RECIP_ENDPOINT)<<8)

/* class requests from the USB 2.0 hub spec, table 11-15 */
#define HUB_CLASS_REQ(dir, type, request) ((((dir) | (type)) << 8) | (request))
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c77e4b1d51c0..bafa4e04b850 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1381,7 +1381,6 @@ static unsigned long weighted_cpuload(const int cpu);
static unsigned long source_load(int cpu, int type);
static unsigned long target_load(int cpu, int type);
static unsigned long capacity_of(int cpu);
-static long effective_load(struct task_group *tg, int cpu, long wl, long wg);

/* Cached statistics for all CPUs within a node */
struct numa_stats {
@@ -2469,7 +2468,8 @@ void task_numa_work(struct callback_head *work)
return;


- down_read(&mm->mmap_sem);
+ if (!down_read_trylock(&mm->mmap_sem))
+ return;
vma = find_vma(mm, start);
if (!vma) {
reset_ptenuma_scan(p);
@@ -2584,6 +2584,60 @@ void task_tick_numa(struct rq *rq, struct task_struct *curr)
}
}
}
+
+/*
+ * Can a task be moved from prev_cpu to this_cpu without causing a load
+ * imbalance that would trigger the load balancer?
+ */
+static inline bool numa_wake_affine(struct sched_domain *sd,
+ struct task_struct *p, int this_cpu,
+ int prev_cpu, int sync)
+{
+ struct numa_stats prev_load, this_load;
+ s64 this_eff_load, prev_eff_load;
+
+ update_numa_stats(&prev_load, cpu_to_node(prev_cpu));
+ update_numa_stats(&this_load, cpu_to_node(this_cpu));
+
+ /*
+ * If sync wakeup then subtract the (maximum possible)
+ * effect of the currently running task from the load
+ * of the current CPU:
+ */
+ if (sync) {
+ unsigned long current_load = task_h_load(current);
+
+ if (this_load.load > current_load)
+ this_load.load -= current_load;
+ else
+ this_load.load = 0;
+ }
+
+ /*
+ * In low-load situations, where this_cpu's node is idle due to the
+ * sync cause above having dropped this_load.load to 0, move the task.
+ * Moving to an idle socket will not create a bad imbalance.
+ *
+ * Otherwise check if the nodes are near enough in load to allow this
+ * task to be woken on this_cpu's node.
+ */
+ if (this_load.load > 0) {
+ unsigned long task_load = task_h_load(p);
+
+ this_eff_load = 100;
+ this_eff_load *= prev_load.compute_capacity;
+
+ prev_eff_load = 100 + (sd->imbalance_pct - 100) / 2;
+ prev_eff_load *= this_load.compute_capacity;
+
+ this_eff_load *= this_load.load + task_load;
+ prev_eff_load *= prev_load.load - task_load;
+
+ return this_eff_load <= prev_eff_load;
+ }
+
+ return true;
+}
#else
static void task_tick_numa(struct rq *rq, struct task_struct *curr)
{
@@ -2596,6 +2650,15 @@ static inline void account_numa_enqueue(struct rq *rq, struct task_struct *p)
static inline void account_numa_dequeue(struct rq *rq, struct task_struct *p)
{
}
+
+#ifdef CONFIG_SMP
+static inline bool numa_wake_affine(struct sched_domain *sd,
+ struct task_struct *p, int this_cpu,
+ int prev_cpu, int sync)
+{
+ return true;
+}
+#endif /* !SMP */
#endif /* CONFIG_NUMA_BALANCING */

static void
@@ -2982,8 +3045,7 @@ __update_load_avg_cfs_rq(u64 now, int cpu, struct cfs_rq *cfs_rq)
* differential update where we store the last value we propagated. This in
* turn allows skipping updates if the differential is 'small'.
*
- * Updating tg's load_avg is necessary before update_cfs_share() (which is
- * done) and effective_load() (which is not done because it is too costly).
+ * Updating tg's load_avg is necessary before update_cfs_share().
*/
static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force)
{
@@ -5215,126 +5277,6 @@ static unsigned long cpu_avg_load_per_task(int cpu)
return 0;
}

-#ifdef CONFIG_FAIR_GROUP_SCHED
-/*
- * effective_load() calculates the load change as seen from the root_task_group
- *
- * Adding load to a group doesn't make a group heavier, but can cause movement
- * of group shares between cpus. Assuming the shares were perfectly aligned one
- * can calculate the shift in shares.
- *
- * Calculate the effective load difference if @wl is added (subtracted) to @tg
- * on this @cpu and results in a total addition (subtraction) of @wg to the
- * total group weight.
- *
- * Given a runqueue weight distribution (rw_i) we can compute a shares
- * distribution (s_i) using:
- *
- * s_i = rw_i / \Sum rw_j (1)
- *
- * Suppose we have 4 CPUs and our @tg is a direct child of the root group and
- * has 7 equal weight tasks, distributed as below (rw_i), with the resulting
- * shares distribution (s_i):
- *
- * rw_i = { 2, 4, 1, 0 }
- * s_i = { 2/7, 4/7, 1/7, 0 }
- *
- * As per wake_affine() we're interested in the load of two CPUs (the CPU the
- * task used to run on and the CPU the waker is running on), we need to
- * compute the effect of waking a task on either CPU and, in case of a sync
- * wakeup, compute the effect of the current task going to sleep.
- *
- * So for a change of @wl to the local @cpu with an overall group weight change
- * of @wl we can compute the new shares distribution (s'_i) using:
- *
- * s'_i = (rw_i + @wl) / (@wg + \Sum rw_j) (2)
- *
- * Suppose we're interested in CPUs 0 and 1, and want to compute the load
- * differences in waking a task to CPU 0. The additional task changes the
- * weight and shares distributions like:
- *
- * rw'_i = { 3, 4, 1, 0 }
- * s'_i = { 3/8, 4/8, 1/8, 0 }
- *
- * We can then compute the difference in effective weight by using:
- *
- * dw_i = S * (s'_i - s_i) (3)
- *
- * Where 'S' is the group weight as seen by its parent.
- *
- * Therefore the effective change in loads on CPU 0 would be 5/56 (3/8 - 2/7)
- * times the weight of the group. The effect on CPU 1 would be -4/56 (4/8 -
- * 4/7) times the weight of the group.
- */
-static long effective_load(struct task_group *tg, int cpu, long wl, long wg)
-{
- struct sched_entity *se = tg->se[cpu];
-
- if (!tg->parent) /* the trivial, non-cgroup case */
- return wl;
-
- for_each_sched_entity(se) {
- struct cfs_rq *cfs_rq = se->my_q;
- long W, w = cfs_rq_load_avg(cfs_rq);
-
- tg = cfs_rq->tg;
-
- /*
- * W = @wg + \Sum rw_j
- */
- W = wg + atomic_long_read(&tg->load_avg);
-
- /* Ensure \Sum rw_j >= rw_i */
- W -= cfs_rq->tg_load_avg_contrib;
- W += w;
-
- /*
- * w = rw_i + @wl
- */
- w += wl;
-
- /*
- * wl = S * s'_i; see (2)
- */
- if (W > 0 && w < W)
- wl = (w * (long)scale_load_down(tg->shares)) / W;
- else
- wl = scale_load_down(tg->shares);
-
- /*
- * Per the above, wl is the new se->load.weight value; since
- * those are clipped to [MIN_SHARES, ...) do so now. See
- * calc_cfs_shares().
- */
- if (wl < MIN_SHARES)
- wl = MIN_SHARES;
-
- /*
- * wl = dw_i = S * (s'_i - s_i); see (3)
- */
- wl -= se->avg.load_avg;
-
- /*
- * Recursively apply this logic to all parent groups to compute
- * the final effective load change on the root group. Since
- * only the @tg group gets extra weight, all parent groups can
- * only redistribute existing shares. @wl is the shift in shares
- * resulting from this level per the above.
- */
- wg = 0;
- }
-
- return wl;
-}
-#else
-
-static long effective_load(struct task_group *tg, int cpu, long wl, long wg)
-{
- return wl;
-}
-
-#endif
-
static void record_wakee(struct task_struct *p)
{
/*
@@ -5385,67 +5327,25 @@ static int wake_wide(struct task_struct *p)
static int wake_affine(struct sched_domain *sd, struct task_struct *p,
int prev_cpu, int sync)
{
- s64 this_load, load;
- s64 this_eff_load, prev_eff_load;
- int idx, this_cpu;
- struct task_group *tg;
- unsigned long weight;
- int balanced;
-
- idx = sd->wake_idx;
- this_cpu = smp_processor_id();
- load = source_load(prev_cpu, idx);
- this_load = target_load(this_cpu, idx);
+ int this_cpu = smp_processor_id();
+ bool affine = false;

/*
- * If sync wakeup then subtract the (maximum possible)
- * effect of the currently running task from the load
- * of the current CPU:
+ * Common case: CPUs are in the same socket, and select_idle_sibling()
+ * will do its thing regardless of what we return:
*/
- if (sync) {
- tg = task_group(current);
- weight = current->se.avg.load_avg;
-
- this_load += effective_load(tg, this_cpu, -weight, -weight);
- load += effective_load(tg, prev_cpu, 0, -weight);
- }
-
- tg = task_group(p);
- weight = p->se.avg.load_avg;
-
- /*
- * In low-load situations, where prev_cpu is idle and this_cpu is idle
- * due to the sync cause above having dropped this_load to 0, we'll
- * always have an imbalance, but there's really nothing you can do
- * about that, so that's good too.
- *
- * Otherwise check if either cpus are near enough in load to allow this
- * task to be woken on this_cpu.
- */
- this_eff_load = 100;
- this_eff_load *= capacity_of(prev_cpu);
-
- prev_eff_load = 100 + (sd->imbalance_pct - 100) / 2;
- prev_eff_load *= capacity_of(this_cpu);
-
- if (this_load > 0) {
- this_eff_load *= this_load +
- effective_load(tg, this_cpu, weight, weight);
-
- prev_eff_load *= load + effective_load(tg, prev_cpu, 0, weight);
- }
-
- balanced = this_eff_load <= prev_eff_load;
+ if (cpus_share_cache(prev_cpu, this_cpu))
+ affine = true;
+ else
+ affine = numa_wake_affine(sd, p, this_cpu, prev_cpu, sync);

schedstat_inc(p->se.statistics.nr_wakeups_affine_attempts);
+ if (affine) {
+ schedstat_inc(sd->ttwu_move_affine);
+ schedstat_inc(p->se.statistics.nr_wakeups_affine);
+ }

- if (!balanced)
- return 0;
-
- schedstat_inc(sd->ttwu_move_affine);
- schedstat_inc(p->se.statistics.nr_wakeups_affine);
-
- return 1;
+ return affine;
}

static inline int task_util(struct task_struct *p);
@@ -5640,43 +5540,6 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
return shallowest_idle_cpu != -1 ? shallowest_idle_cpu : least_loaded_cpu;
}

-/*
- * Implement a for_each_cpu() variant that starts the scan at a given cpu
- * (@start), and wraps around.
- *
- * This is used to scan for idle CPUs; such that not all CPUs looking for an
- * idle CPU find the same CPU. The down-side is that tasks tend to cycle
- * through the LLC domain.
- *
- * Especially tbench is found sensitive to this.
- */
-
-static int cpumask_next_wrap(int n, const struct cpumask *mask, int start, int *wrapped)
-{
- int next;
-
-again:
- next = find_next_bit(cpumask_bits(mask), nr_cpumask_bits, n+1);
-
- if (*wrapped) {
- if (next >= start)
- return nr_cpumask_bits;
- } else {
- if (next >= nr_cpumask_bits) {
- *wrapped = 1;
- n = -1;
- goto again;
- }
- }
-
- return next;
-}
-
-#define for_each_cpu_wrap(cpu, mask, start, wrap) \
- for ((wrap) = 0, (cpu) = (start)-1; \
- (cpu) = cpumask_next_wrap((cpu), (mask), (start), &(wrap)), \
- (cpu) < nr_cpumask_bits; )
-
#ifdef CONFIG_SCHED_SMT

static inline void set_idle_cores(int cpu, int val)
@@ -5736,7 +5599,7 @@ void __update_idle_core(struct rq *rq)
static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int target)
{
struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
- int core, cpu, wrap;
+ int core, cpu;

if (!static_branch_likely(&sched_smt_present))
return -1;
@@ -5746,7 +5609,7 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int

cpumask_and(cpus, sched_domain_span(sd), &p->cpus_allowed);

- for_each_cpu_wrap(core, cpus, target, wrap) {
+ for_each_cpu_wrap(core, cpus, target) {
bool idle = true;

for_each_cpu(cpu, cpu_smt_mask(core)) {
@@ -5809,27 +5672,38 @@ static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd
static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int target)
{
struct sched_domain *this_sd;
- u64 avg_cost, avg_idle = this_rq()->avg_idle;
+ u64 avg_cost, avg_idle;
u64 time, cost;
s64 delta;
- int cpu, wrap;
+ int cpu, nr = INT_MAX;

this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
if (!this_sd)
return -1;

- avg_cost = this_sd->avg_scan_cost;
-
/*
* Due to large variance we need a large fuzz factor; hackbench in
* particularly is sensitive here.
*/
- if (sched_feat(SIS_AVG_CPU) && (avg_idle / 512) < avg_cost)
+ avg_idle = this_rq()->avg_idle / 512;
+ avg_cost = this_sd->avg_scan_cost + 1;
+
+ if (sched_feat(SIS_AVG_CPU) && avg_idle < avg_cost)
return -1;

+ if (sched_feat(SIS_PROP)) {
+ u64 span_avg = sd->span_weight * avg_idle;
+ if (span_avg > 4*avg_cost)
+ nr = div_u64(span_avg, avg_cost);
+ else
+ nr = 4;
+ }
+
time = local_clock();

- for_each_cpu_wrap(cpu, sched_domain_span(sd), target, wrap) {
+ for_each_cpu_wrap(cpu, sched_domain_span(sd), target) {
+ if (!--nr)
+ return -1;
if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
continue;
if (idle_cpu(cpu))
@@ -6011,11 +5885,15 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f

if (affine_sd) {
sd = NULL; /* Prefer wake_affine over balance flags */
- if (cpu != prev_cpu && wake_affine(affine_sd, p, prev_cpu, sync))
+ if (cpu == prev_cpu)
+ goto pick_cpu;
+
+ if (wake_affine(affine_sd, p, prev_cpu, sync))
new_cpu = cpu;
}

if (!sd) {
+ pick_cpu:
if (sd_flag & SD_BALANCE_WAKE) /* XXX always ? */
new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);

@@ -6686,6 +6564,10 @@ static int migrate_degrades_locality(struct task_struct *p, struct lb_env *env)
if (dst_nid == p->numa_preferred_nid)
return 0;

+ /* Leaving a core idle is often worse than degrading locality. */
+ if (env->idle != CPU_NOT_IDLE)
+ return -1;
+
if (numa_group) {
src_faults = group_faults(p, src_nid);
dst_faults = group_faults(p, dst_nid);
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 11192e0cb122..ce7b4b6ac733 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -55,6 +55,7 @@ SCHED_FEAT(TTWU_QUEUE, true)
* When doing wakeups, attempt to limit superfluous scans of the LLC domain.
*/
SCHED_FEAT(SIS_AVG_CPU, false)
+SCHED_FEAT(SIS_PROP, true)

/*
* Issue a WARN when we do multiple update_rq_clock() calls
diff --git a/lib/cpumask.c b/lib/cpumask.c
index 81dedaab36cc..4731a0895760 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -43,6 +43,38 @@ int cpumask_any_but(const struct cpumask *mask, unsigned int cpu)
}
EXPORT_SYMBOL(cpumask_any_but);

+/**
+ * cpumask_next_wrap - helper to implement for_each_cpu_wrap
+ * @n: the cpu prior to the place to search
+ * @mask: the cpumask pointer
+ * @start: the start point of the iteration
+ * @wrap: assume @n crossing @start terminates the iteration
+ *
+ * Returns >= nr_cpu_ids on completion
+ *
+ * Note: the @wrap argument is required for the start condition when
+ * we cannot assume @start is set in @mask.
+ */
+int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap)
+{
+ int next;
+
+again:
+ next = cpumask_next(n, mask);
+
+ if (wrap && n < start && next >= start) {
+ return nr_cpumask_bits;
+
+ } else if (next >= nr_cpumask_bits) {
+ wrap = true;
+ n = -1;
+ goto again;
+ }
+
+ return next;
+}
+EXPORT_SYMBOL(cpumask_next_wrap);
+
/* These are not inline because of header tangles. */
#ifdef CONFIG_CPUMASK_OFFSTACK
/**