[GIT PULL] s390 patches for 4.14-rc2

From: Martin Schwidefsky
Date: Fri Sep 22 2017 - 05:41:02 EST


Hi Linus,

please pull from the 'for-linus' branch of

git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git for-linus

to receive the following updates:

- A couple of bug fixes: memory management, perf, cio, dasd and scm_blk.

- A larger change in regard to the CPU topology to improve performance
for systems running under z/VM or KVM.

The shortlog:

Gerald Schaefer (2):
s390/mm: make pmdp_invalidate() do invalidation only
s390/mm: fix write access check in gup_huge_pmd()

Heiko Carstens (2):
s390/topology: alternative topology for topology-less machines
s390/topology: enable / disable topology dynamically

Pu Hou (1):
s390/perf: fix bug when creating per-thread event

Sebastian Ott (2):
s390/scm_blk: consistently use blk_status_t as error type
s390/cio: recover from bad paths

Stefan Haberland (1):
s390/dasd: fix race during dasd initialization

arch/s390/include/asm/pgtable.h | 4 +-
arch/s390/kernel/early.c | 12 ----
arch/s390/kernel/perf_cpum_sf.c | 9 ++-
arch/s390/kernel/topology.c | 148 ++++++++++++++++++++++++++++++++++++----
arch/s390/mm/gup.c | 7 +-
drivers/s390/block/dasd.c | 12 +++-
drivers/s390/block/scm_blk.c | 6 +-
drivers/s390/cio/device.c | 12 +++-
drivers/s390/cio/device.h | 1 +
drivers/s390/cio/device_fsm.c | 12 ++++
drivers/s390/cio/io_sch.h | 2 +
11 files changed, 183 insertions(+), 42 deletions(-)

diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index dce708e..20e75a2 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1507,7 +1507,9 @@ static inline pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma,
static inline void pmdp_invalidate(struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmdp)
{
- pmdp_xchg_direct(vma->vm_mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
+ pmd_t pmd = __pmd(pmd_val(*pmdp) | _SEGMENT_ENTRY_INVALID);
+
+ pmdp_xchg_direct(vma->vm_mm, addr, pmdp, pmd);
}

#define __HAVE_ARCH_PMDP_SET_WRPROTECT
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index ca8cd80..60181ca 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -404,18 +404,6 @@ static inline void save_vector_registers(void)
#endif
}

-static int __init topology_setup(char *str)
-{
- bool enabled;
- int rc;
-
- rc = kstrtobool(str, &enabled);
- if (!rc && !enabled)
- S390_lowcore.machine_flags &= ~MACHINE_FLAG_TOPOLOGY;
- return rc;
-}
-early_param("topology", topology_setup);
-
static int __init disable_vector_extension(char *str)
{
S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX;
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index c1bf75f..7e1e403 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -823,9 +823,12 @@ static int cpumsf_pmu_event_init(struct perf_event *event)
}

/* Check online status of the CPU to which the event is pinned */
- if ((unsigned int)event->cpu >= nr_cpumask_bits ||
- (event->cpu >= 0 && !cpu_online(event->cpu)))
- return -ENODEV;
+ if (event->cpu >= 0) {
+ if ((unsigned int)event->cpu >= nr_cpumask_bits)
+ return -ENODEV;
+ if (!cpu_online(event->cpu))
+ return -ENODEV;
+ }

/* Force reset of idle/hv excludes regardless of what the
* user requested.
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index bb47c92..ed0bdd2 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -8,6 +8,8 @@

#include <linux/workqueue.h>
#include <linux/bootmem.h>
+#include <linux/uaccess.h>
+#include <linux/sysctl.h>
#include <linux/cpuset.h>
#include <linux/device.h>
#include <linux/export.h>
@@ -29,12 +31,20 @@
#define PTF_VERTICAL (1UL)
#define PTF_CHECK (2UL)

+enum {
+ TOPOLOGY_MODE_HW,
+ TOPOLOGY_MODE_SINGLE,
+ TOPOLOGY_MODE_PACKAGE,
+ TOPOLOGY_MODE_UNINITIALIZED
+};
+
struct mask_info {
struct mask_info *next;
unsigned char id;
cpumask_t mask;
};

+static int topology_mode = TOPOLOGY_MODE_UNINITIALIZED;
static void set_topology_timer(void);
static void topology_work_fn(struct work_struct *work);
static struct sysinfo_15_1_x *tl_info;
@@ -59,11 +69,26 @@ static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu)
cpumask_t mask;

cpumask_copy(&mask, cpumask_of(cpu));
- if (!MACHINE_HAS_TOPOLOGY)
- return mask;
- for (; info; info = info->next) {
- if (cpumask_test_cpu(cpu, &info->mask))
- return info->mask;
+ switch (topology_mode) {
+ case TOPOLOGY_MODE_HW:
+ while (info) {
+ if (cpumask_test_cpu(cpu, &info->mask)) {
+ mask = info->mask;
+ break;
+ }
+ info = info->next;
+ }
+ if (cpumask_empty(&mask))
+ cpumask_copy(&mask, cpumask_of(cpu));
+ break;
+ case TOPOLOGY_MODE_PACKAGE:
+ cpumask_copy(&mask, cpu_present_mask);
+ break;
+ default:
+ /* fallthrough */
+ case TOPOLOGY_MODE_SINGLE:
+ cpumask_copy(&mask, cpumask_of(cpu));
+ break;
}
return mask;
}
@@ -74,7 +99,7 @@ static cpumask_t cpu_thread_map(unsigned int cpu)
int i;

cpumask_copy(&mask, cpumask_of(cpu));
- if (!MACHINE_HAS_TOPOLOGY)
+ if (topology_mode != TOPOLOGY_MODE_HW)
return mask;
cpu -= cpu % (smp_cpu_mtid + 1);
for (i = 0; i <= smp_cpu_mtid; i++)
@@ -184,10 +209,8 @@ static void topology_update_polarization_simple(void)
{
int cpu;

- mutex_lock(&smp_cpu_state_mutex);
for_each_possible_cpu(cpu)
smp_cpu_set_polarization(cpu, POLARIZATION_HRZ);
- mutex_unlock(&smp_cpu_state_mutex);
}

static int ptf(unsigned long fc)
@@ -223,7 +246,7 @@ int topology_set_cpu_management(int fc)
static void update_cpu_masks(void)
{
struct cpu_topology_s390 *topo;
- int cpu;
+ int cpu, id;

for_each_possible_cpu(cpu) {
topo = &cpu_topology[cpu];
@@ -231,12 +254,13 @@ static void update_cpu_masks(void)
topo->core_mask = cpu_group_map(&socket_info, cpu);
topo->book_mask = cpu_group_map(&book_info, cpu);
topo->drawer_mask = cpu_group_map(&drawer_info, cpu);
- if (!MACHINE_HAS_TOPOLOGY) {
+ if (topology_mode != TOPOLOGY_MODE_HW) {
+ id = topology_mode == TOPOLOGY_MODE_PACKAGE ? 0 : cpu;
topo->thread_id = cpu;
topo->core_id = cpu;
- topo->socket_id = cpu;
- topo->book_id = cpu;
- topo->drawer_id = cpu;
+ topo->socket_id = id;
+ topo->book_id = id;
+ topo->drawer_id = id;
if (cpu_present(cpu))
cpumask_set_cpu(cpu, &cpus_with_topology);
}
@@ -254,6 +278,7 @@ static int __arch_update_cpu_topology(void)
struct sysinfo_15_1_x *info = tl_info;
int rc = 0;

+ mutex_lock(&smp_cpu_state_mutex);
cpumask_clear(&cpus_with_topology);
if (MACHINE_HAS_TOPOLOGY) {
rc = 1;
@@ -263,6 +288,7 @@ static int __arch_update_cpu_topology(void)
update_cpu_masks();
if (!MACHINE_HAS_TOPOLOGY)
topology_update_polarization_simple();
+ mutex_unlock(&smp_cpu_state_mutex);
return rc;
}

@@ -289,6 +315,11 @@ void topology_schedule_update(void)
schedule_work(&topology_work);
}

+static void topology_flush_work(void)
+{
+ flush_work(&topology_work);
+}
+
static void topology_timer_fn(unsigned long ignored)
{
if (ptf(PTF_CHECK))
@@ -459,6 +490,12 @@ void __init topology_init_early(void)
struct sysinfo_15_1_x *info;

set_sched_topology(s390_topology);
+ if (topology_mode == TOPOLOGY_MODE_UNINITIALIZED) {
+ if (MACHINE_HAS_TOPOLOGY)
+ topology_mode = TOPOLOGY_MODE_HW;
+ else
+ topology_mode = TOPOLOGY_MODE_SINGLE;
+ }
if (!MACHINE_HAS_TOPOLOGY)
goto out;
tl_info = memblock_virt_alloc(PAGE_SIZE, PAGE_SIZE);
@@ -474,12 +511,97 @@ void __init topology_init_early(void)
__arch_update_cpu_topology();
}

+static inline int topology_get_mode(int enabled)
+{
+ if (!enabled)
+ return TOPOLOGY_MODE_SINGLE;
+ return MACHINE_HAS_TOPOLOGY ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE;
+}
+
+static inline int topology_is_enabled(void)
+{
+ return topology_mode != TOPOLOGY_MODE_SINGLE;
+}
+
+static int __init topology_setup(char *str)
+{
+ bool enabled;
+ int rc;
+
+ rc = kstrtobool(str, &enabled);
+ if (rc)
+ return rc;
+ topology_mode = topology_get_mode(enabled);
+ return 0;
+}
+early_param("topology", topology_setup);
+
+static int topology_ctl_handler(struct ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ unsigned int len;
+ int new_mode;
+ char buf[2];
+
+ if (!*lenp || *ppos) {
+ *lenp = 0;
+ return 0;
+ }
+ if (!write) {
+ strncpy(buf, topology_is_enabled() ? "1\n" : "0\n",
+ ARRAY_SIZE(buf));
+ len = strnlen(buf, ARRAY_SIZE(buf));
+ if (len > *lenp)
+ len = *lenp;
+ if (copy_to_user(buffer, buf, len))
+ return -EFAULT;
+ goto out;
+ }
+ len = *lenp;
+ if (copy_from_user(buf, buffer, len > sizeof(buf) ? sizeof(buf) : len))
+ return -EFAULT;
+ if (buf[0] != '0' && buf[0] != '1')
+ return -EINVAL;
+ mutex_lock(&smp_cpu_state_mutex);
+ new_mode = topology_get_mode(buf[0] == '1');
+ if (topology_mode != new_mode) {
+ topology_mode = new_mode;
+ topology_schedule_update();
+ }
+ mutex_unlock(&smp_cpu_state_mutex);
+ topology_flush_work();
+out:
+ *lenp = len;
+ *ppos += len;
+ return 0;
+}
+
+static struct ctl_table topology_ctl_table[] = {
+ {
+ .procname = "topology",
+ .mode = 0644,
+ .proc_handler = topology_ctl_handler,
+ },
+ { },
+};
+
+static struct ctl_table topology_dir_table[] = {
+ {
+ .procname = "s390",
+ .maxlen = 0,
+ .mode = 0555,
+ .child = topology_ctl_table,
+ },
+ { },
+};
+
static int __init topology_init(void)
{
if (MACHINE_HAS_TOPOLOGY)
set_topology_timer();
else
topology_update_polarization_simple();
+ register_sysctl_table(topology_dir_table);
return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching);
}
device_initcall(topology_init);
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index 8ecc25e..98ffe3e 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -56,13 +56,12 @@ static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
unsigned long end, int write, struct page **pages, int *nr)
{
- unsigned long mask, result;
struct page *head, *page;
+ unsigned long mask;
int refs;

- result = write ? 0 : _SEGMENT_ENTRY_PROTECT;
- mask = result | _SEGMENT_ENTRY_INVALID;
- if ((pmd_val(pmd) & mask) != result)
+ mask = (write ? _SEGMENT_ENTRY_PROTECT : 0) | _SEGMENT_ENTRY_INVALID;
+ if ((pmd_val(pmd) & mask) != 0)
return 0;
VM_BUG_ON(!pfn_valid(pmd_val(pmd) >> PAGE_SHIFT));

diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index ea19b4f..29f35e2 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -1644,7 +1644,9 @@ void dasd_generic_handle_state_change(struct dasd_device *device)
dasd_schedule_device_bh(device);
if (device->block) {
dasd_schedule_block_bh(device->block);
- blk_mq_run_hw_queues(device->block->request_queue, true);
+ if (device->block->request_queue)
+ blk_mq_run_hw_queues(device->block->request_queue,
+ true);
}
}
EXPORT_SYMBOL_GPL(dasd_generic_handle_state_change);
@@ -3759,7 +3761,9 @@ int dasd_generic_path_operational(struct dasd_device *device)
dasd_schedule_device_bh(device);
if (device->block) {
dasd_schedule_block_bh(device->block);
- blk_mq_run_hw_queues(device->block->request_queue, true);
+ if (device->block->request_queue)
+ blk_mq_run_hw_queues(device->block->request_queue,
+ true);
}

if (!device->stopped)
@@ -4025,7 +4029,9 @@ int dasd_generic_restore_device(struct ccw_device *cdev)

if (device->block) {
dasd_schedule_block_bh(device->block);
- blk_mq_run_hw_queues(device->block->request_queue, true);
+ if (device->block->request_queue)
+ blk_mq_run_hw_queues(device->block->request_queue,
+ true);
}

clear_bit(DASD_FLAG_SUSPENDED, &device->flags);
diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c
index 2e7fd96..eb51893 100644
--- a/drivers/s390/block/scm_blk.c
+++ b/drivers/s390/block/scm_blk.c
@@ -249,7 +249,7 @@ static void scm_request_requeue(struct scm_request *scmrq)
static void scm_request_finish(struct scm_request *scmrq)
{
struct scm_blk_dev *bdev = scmrq->bdev;
- int *error;
+ blk_status_t *error;
int i;

for (i = 0; i < nr_requests_per_io && scmrq->request[i]; i++) {
@@ -415,7 +415,7 @@ void scm_blk_irq(struct scm_device *scmdev, void *data, blk_status_t error)

static void scm_blk_request_done(struct request *req)
{
- int *error = blk_mq_rq_to_pdu(req);
+ blk_status_t *error = blk_mq_rq_to_pdu(req);

blk_mq_end_request(req, *error);
}
@@ -450,7 +450,7 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev)
atomic_set(&bdev->queued_reqs, 0);

bdev->tag_set.ops = &scm_mq_ops;
- bdev->tag_set.cmd_size = sizeof(int);
+ bdev->tag_set.cmd_size = sizeof(blk_status_t);
bdev->tag_set.nr_hw_queues = nr_requests;
bdev->tag_set.queue_depth = nr_requests_per_io * nr_requests;
bdev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
index 489b583..e5c32f4 100644
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c
@@ -1225,10 +1225,16 @@ static int device_is_disconnected(struct ccw_device *cdev)
static int recovery_check(struct device *dev, void *data)
{
struct ccw_device *cdev = to_ccwdev(dev);
+ struct subchannel *sch;
int *redo = data;

spin_lock_irq(cdev->ccwlock);
switch (cdev->private->state) {
+ case DEV_STATE_ONLINE:
+ sch = to_subchannel(cdev->dev.parent);
+ if ((sch->schib.pmcw.pam & sch->opm) == sch->vpm)
+ break;
+ /* fall through */
case DEV_STATE_DISCONNECTED:
CIO_MSG_EVENT(3, "recovery: trigger 0.%x.%04x\n",
cdev->private->dev_id.ssid,
@@ -1260,7 +1266,7 @@ static void recovery_work_func(struct work_struct *unused)
}
spin_unlock_irq(&recovery_lock);
} else
- CIO_MSG_EVENT(4, "recovery: end\n");
+ CIO_MSG_EVENT(3, "recovery: end\n");
}

static DECLARE_WORK(recovery_work, recovery_work_func);
@@ -1274,11 +1280,11 @@ static void recovery_func(unsigned long data)
schedule_work(&recovery_work);
}

-static void ccw_device_schedule_recovery(void)
+void ccw_device_schedule_recovery(void)
{
unsigned long flags;

- CIO_MSG_EVENT(4, "recovery: schedule\n");
+ CIO_MSG_EVENT(3, "recovery: schedule\n");
spin_lock_irqsave(&recovery_lock, flags);
if (!timer_pending(&recovery_timer) || (recovery_phase != 0)) {
recovery_phase = 0;
diff --git a/drivers/s390/cio/device.h b/drivers/s390/cio/device.h
index ec497af..69cb70f 100644
--- a/drivers/s390/cio/device.h
+++ b/drivers/s390/cio/device.h
@@ -134,6 +134,7 @@ void ccw_device_set_disconnected(struct ccw_device *cdev);
void ccw_device_set_notoper(struct ccw_device *cdev);

void ccw_device_set_timeout(struct ccw_device *, int);
+void ccw_device_schedule_recovery(void);

/* Channel measurement facility related */
void retry_set_schib(struct ccw_device *cdev);
diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c
index 12016e3..f98ea67 100644
--- a/drivers/s390/cio/device_fsm.c
+++ b/drivers/s390/cio/device_fsm.c
@@ -476,6 +476,17 @@ static void create_fake_irb(struct irb *irb, int type)
}
}

+static void ccw_device_handle_broken_paths(struct ccw_device *cdev)
+{
+ struct subchannel *sch = to_subchannel(cdev->dev.parent);
+ u8 broken_paths = (sch->schib.pmcw.pam & sch->opm) ^ sch->vpm;
+
+ if (broken_paths && (cdev->private->path_broken_mask != broken_paths))
+ ccw_device_schedule_recovery();
+
+ cdev->private->path_broken_mask = broken_paths;
+}
+
void ccw_device_verify_done(struct ccw_device *cdev, int err)
{
struct subchannel *sch;
@@ -508,6 +519,7 @@ void ccw_device_verify_done(struct ccw_device *cdev, int err)
memset(&cdev->private->irb, 0, sizeof(struct irb));
}
ccw_device_report_path_events(cdev);
+ ccw_device_handle_broken_paths(cdev);
break;
case -ETIME:
case -EUSERS:
diff --git a/drivers/s390/cio/io_sch.h b/drivers/s390/cio/io_sch.h
index 220f491..9a1b56b 100644
--- a/drivers/s390/cio/io_sch.h
+++ b/drivers/s390/cio/io_sch.h
@@ -131,6 +131,8 @@ struct ccw_device_private {
not operable */
u8 path_gone_mask; /* mask of paths, that became unavailable */
u8 path_new_mask; /* mask of paths, that became available */
+ u8 path_broken_mask; /* mask of paths, which were found to be
+ unusable */
struct {
unsigned int fast:1; /* post with "channel end" */
unsigned int repall:1; /* report every interrupt status */