[PATCH 4.8 113/140] cxl: Prevent adapter reset if an active context exists

From: Greg Kroah-Hartman
Date: Wed Oct 26 2016 - 09:13:59 EST


4.8-stable review patch. If anyone has any objections, please let me know.

------------------

From: Vaibhav Jain <vaibhav@xxxxxxxxxxxxxxxxxx>

commit 70b565bbdb911023373e035225ab10077e4ab937 upstream.

This patch prevents resetting the cxl adapter via sysfs in presence of
one or more active cxl_context on it. This protects against an
unrecoverable error caused by PSL owning a dirty cache line even after
reset and host tries to touch the same cache line. In case a force reset
of the card is required irrespective of any active contexts, the int
value -1 can be stored in the 'reset' sysfs attribute of the card.

The patch introduces a new atomic_t member named contexts_num inside
struct cxl that holds the number of active context attached to the card
, which is checked against '0' before proceeding with the reset. To
prevent against a race condition where a context is activated just after
reset check is performed, the contexts_num is atomically set to '-1'
after reset-check to indicate that no more contexts can be activated on
the card anymore.

Before activating a context we atomically test if contexts_num is
non-negative and if so, increment its value by one. In case the value of
contexts_num is negative then it indicates that the card is about to be
reset and context activation is error-ed out at that point.

Fixes: 62fa19d4b4fd ("cxl: Add ability to reset the card")
Acked-by: Frederic Barrat <fbarrat@xxxxxxxxxxxxxxxxxx>
Reviewed-by: Andrew Donnellan <andrew.donnellan@xxxxxxxxxxx>
Signed-off-by: Vaibhav Jain <vaibhav@xxxxxxxxxxxxxxxxxx>
Signed-off-by: Michael Ellerman <mpe@xxxxxxxxxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>

---
Documentation/ABI/testing/sysfs-class-cxl | 7 +++--
drivers/misc/cxl/api.c | 9 ++++++
drivers/misc/cxl/context.c | 3 ++
drivers/misc/cxl/cxl.h | 24 +++++++++++++++++
drivers/misc/cxl/file.c | 11 +++++++
drivers/misc/cxl/guest.c | 3 ++
drivers/misc/cxl/main.c | 42 +++++++++++++++++++++++++++++-
drivers/misc/cxl/pci.c | 2 +
drivers/misc/cxl/sysfs.c | 27 ++++++++++++++++---
9 files changed, 121 insertions(+), 7 deletions(-)

--- a/Documentation/ABI/testing/sysfs-class-cxl
+++ b/Documentation/ABI/testing/sysfs-class-cxl
@@ -220,8 +220,11 @@ What: /sys/class/cxl/<card>/re
Date: October 2014
Contact: linuxppc-dev@xxxxxxxxxxxxxxxx
Description: write only
- Writing 1 will issue a PERST to card which may cause the card
- to reload the FPGA depending on load_image_on_perst.
+ Writing 1 will issue a PERST to card provided there are no
+ contexts active on any one of the card AFUs. This may cause
+ the card to reload the FPGA depending on load_image_on_perst.
+ Writing -1 will do a force PERST irrespective of any active
+ contexts on the card AFUs.
Users: https://github.com/ibm-capi/libcxl

What: /sys/class/cxl/<card>/perst_reloads_same_image (not in a guest)
--- a/drivers/misc/cxl/api.c
+++ b/drivers/misc/cxl/api.c
@@ -229,6 +229,14 @@ int cxl_start_context(struct cxl_context
if (ctx->status == STARTED)
goto out; /* already started */

+ /*
+ * Increment the mapped context count for adapter. This also checks
+ * if adapter_context_lock is taken.
+ */
+ rc = cxl_adapter_context_get(ctx->afu->adapter);
+ if (rc)
+ goto out;
+
if (task) {
ctx->pid = get_task_pid(task, PIDTYPE_PID);
ctx->glpid = get_task_pid(task->group_leader, PIDTYPE_PID);
@@ -240,6 +248,7 @@ int cxl_start_context(struct cxl_context

if ((rc = cxl_ops->attach_process(ctx, kernel, wed, 0))) {
put_pid(ctx->pid);
+ cxl_adapter_context_put(ctx->afu->adapter);
cxl_ctx_put();
goto out;
}
--- a/drivers/misc/cxl/context.c
+++ b/drivers/misc/cxl/context.c
@@ -238,6 +238,9 @@ int __detach_context(struct cxl_context
put_pid(ctx->glpid);

cxl_ctx_put();
+
+ /* Decrease the attached context count on the adapter */
+ cxl_adapter_context_put(ctx->afu->adapter);
return 0;
}

--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -615,6 +615,14 @@ struct cxl {
bool perst_select_user;
bool perst_same_image;
bool psl_timebase_synced;
+
+ /*
+ * number of contexts mapped on to this card. Possible values are:
+ * >0: Number of contexts mapped and new one can be mapped.
+ * 0: No active contexts and new ones can be mapped.
+ * -1: No contexts mapped and new ones cannot be mapped.
+ */
+ atomic_t contexts_num;
};

int cxl_pci_alloc_one_irq(struct cxl *adapter);
@@ -940,4 +948,20 @@ bool cxl_pci_is_vphb_device(struct pci_d

/* decode AFU error bits in the PSL register PSL_SERR_An */
void cxl_afu_decode_psl_serr(struct cxl_afu *afu, u64 serr);
+
+/*
+ * Increments the number of attached contexts on an adapter.
+ * In case an adapter_context_lock is taken the return -EBUSY.
+ */
+int cxl_adapter_context_get(struct cxl *adapter);
+
+/* Decrements the number of attached contexts on an adapter */
+void cxl_adapter_context_put(struct cxl *adapter);
+
+/* If no active contexts then prevents contexts from being attached */
+int cxl_adapter_context_lock(struct cxl *adapter);
+
+/* Unlock the contexts-lock if taken. Warn and force unlock otherwise */
+void cxl_adapter_context_unlock(struct cxl *adapter);
+
#endif
--- a/drivers/misc/cxl/file.c
+++ b/drivers/misc/cxl/file.c
@@ -205,11 +205,22 @@ static long afu_ioctl_start_work(struct
ctx->pid = get_task_pid(current, PIDTYPE_PID);
ctx->glpid = get_task_pid(current->group_leader, PIDTYPE_PID);

+ /*
+ * Increment the mapped context count for adapter. This also checks
+ * if adapter_context_lock is taken.
+ */
+ rc = cxl_adapter_context_get(ctx->afu->adapter);
+ if (rc) {
+ afu_release_irqs(ctx, ctx);
+ goto out;
+ }
+
trace_cxl_attach(ctx, work.work_element_descriptor, work.num_interrupts, amr);

if ((rc = cxl_ops->attach_process(ctx, false, work.work_element_descriptor,
amr))) {
afu_release_irqs(ctx, ctx);
+ cxl_adapter_context_put(ctx->afu->adapter);
goto out;
}

--- a/drivers/misc/cxl/guest.c
+++ b/drivers/misc/cxl/guest.c
@@ -1152,6 +1152,9 @@ struct cxl *cxl_guest_init_adapter(struc
if ((rc = cxl_sysfs_adapter_add(adapter)))
goto err_put1;

+ /* release the context lock as the adapter is configured */
+ cxl_adapter_context_unlock(adapter);
+
return adapter;

err_put1:
--- a/drivers/misc/cxl/main.c
+++ b/drivers/misc/cxl/main.c
@@ -243,8 +243,10 @@ struct cxl *cxl_alloc_adapter(void)
if (dev_set_name(&adapter->dev, "card%i", adapter->adapter_num))
goto err2;

- return adapter;
+ /* start with context lock taken */
+ atomic_set(&adapter->contexts_num, -1);

+ return adapter;
err2:
cxl_remove_adapter_nr(adapter);
err1:
@@ -286,6 +288,44 @@ int cxl_afu_select_best_mode(struct cxl_
return 0;
}

+int cxl_adapter_context_get(struct cxl *adapter)
+{
+ int rc;
+
+ rc = atomic_inc_unless_negative(&adapter->contexts_num);
+ return rc >= 0 ? 0 : -EBUSY;
+}
+
+void cxl_adapter_context_put(struct cxl *adapter)
+{
+ atomic_dec_if_positive(&adapter->contexts_num);
+}
+
+int cxl_adapter_context_lock(struct cxl *adapter)
+{
+ int rc;
+ /* no active contexts -> contexts_num == 0 */
+ rc = atomic_cmpxchg(&adapter->contexts_num, 0, -1);
+ return rc ? -EBUSY : 0;
+}
+
+void cxl_adapter_context_unlock(struct cxl *adapter)
+{
+ int val = atomic_cmpxchg(&adapter->contexts_num, -1, 0);
+
+ /*
+ * contexts lock taken -> contexts_num == -1
+ * If not true then show a warning and force reset the lock.
+ * This will happen when context_unlock was requested without
+ * doing a context_lock.
+ */
+ if (val != -1) {
+ atomic_set(&adapter->contexts_num, 0);
+ WARN(1, "Adapter context unlocked with %d active contexts",
+ val);
+ }
+}
+
static int __init init_cxl(void)
{
int rc = 0;
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -1484,6 +1484,8 @@ static int cxl_configure_adapter(struct
if ((rc = cxl_native_register_psl_err_irq(adapter)))
goto err;

+ /* Release the context lock as adapter is configured */
+ cxl_adapter_context_unlock(adapter);
return 0;

err:
--- a/drivers/misc/cxl/sysfs.c
+++ b/drivers/misc/cxl/sysfs.c
@@ -75,12 +75,31 @@ static ssize_t reset_adapter_store(struc
int val;

rc = sscanf(buf, "%i", &val);
- if ((rc != 1) || (val != 1))
+ if ((rc != 1) || (val != 1 && val != -1))
return -EINVAL;

- if ((rc = cxl_ops->adapter_reset(adapter)))
- return rc;
- return count;
+ /*
+ * See if we can lock the context mapping that's only allowed
+ * when there are no contexts attached to the adapter. Once
+ * taken this will also prevent any context from getting activated.
+ */
+ if (val == 1) {
+ rc = cxl_adapter_context_lock(adapter);
+ if (rc)
+ goto out;
+
+ rc = cxl_ops->adapter_reset(adapter);
+ /* In case reset failed release context lock */
+ if (rc)
+ cxl_adapter_context_unlock(adapter);
+
+ } else if (val == -1) {
+ /* Perform a forced adapter reset */
+ rc = cxl_ops->adapter_reset(adapter);
+ }
+
+out:
+ return rc ? rc : count;
}

static ssize_t load_image_on_perst_show(struct device *device,