Re: [PATCH] CXL: Fix PSL error due to duplicate segment table entries
From: Aneesh Kumar K.V
Date: Mon Oct 27 2014 - 10:39:10 EST
Ian Munsie <imunsie@xxxxxxxxxxx> writes:
> From: Ian Munsie <imunsie@xxxxxxxxxxx>
>
> In certain circumstances the PSL can send an interrupt for a segment
> miss that the kernel has already handled. This can happen if multiple
> translations for the same segment are queued in the PSL before the
> kernel has restarted the first translation.
>
> The CXL driver did not expect this situation and did not check if a
> segment had already been handled. This could cause a duplicate segment
> table entry which in turn caused a PSL error taking down the card.
>
> This patch fixes the issue by checking for existing entries in the
> segment table that match the segment it is trying to insert to avoid
> inserting duplicate entries.
>
> Some of the code has been refactored to simplify it - the segment table
> hash has been moved from cxl_load_segment to find_free_sste where it is
> used and we have disabled the secondary hash in the segment table to
> reduce the number of entries that need to be tested from 16 to 8. Due to
> the large segment sizes we use it is extremely unlikely that the
> secondary hash would ever have been used in practice, so this should not
> have any negative impacts and may even improve performance.
>
> copro_calculate_slb will now mask the ESID by the correct mask for 1T vs
> 256M segments. This has no effect by itself as the extra bits were
> ignored, but it makes debugging the segment table entries easier and
> means that we can directly compare the ESID values for duplicates
> without needing to worry about masking in the comparison.
>
> Signed-off-by: Ian Munsie <imunsie@xxxxxxxxxxx>
I guess you are missing too many fixes in one patch.
1) One cleanup
2) Fix for masking ea correctly
3) And fix for not erroring out when a slb is already in the slb cache.
> ---
> arch/powerpc/mm/copro_fault.c | 3 +-
> drivers/misc/cxl/fault.c | 73 ++++++++++++++++++++++---------------------
> drivers/misc/cxl/native.c | 4 +--
> 3 files changed, 41 insertions(+), 39 deletions(-)
>
> diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c
> index 0f9939e..5a236f0 100644
> --- a/arch/powerpc/mm/copro_fault.c
> +++ b/arch/powerpc/mm/copro_fault.c
> @@ -99,8 +99,6 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb)
> u64 vsid;
> int psize, ssize;
>
> - slb->esid = (ea & ESID_MASK) | SLB_ESID_V;
> -
> switch (REGION_ID(ea)) {
> case USER_REGION_ID:
> pr_devel("%s: 0x%llx -- USER_REGION_ID\n", __func__, ea);
> @@ -133,6 +131,7 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb)
> vsid |= mmu_psize_defs[psize].sllp |
> ((ssize == MMU_SEGSIZE_1T) ? SLB_VSID_B_1T : 0);
>
> + slb->esid = (ea & (ssize == MMU_SEGSIZE_1T ? ESID_MASK_1T : ESID_MASK)) | SLB_ESID_V;
> slb->vsid = vsid;
>
> return 0;
> diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c
> index 69506eb..421cfd6 100644
> --- a/drivers/misc/cxl/fault.c
> +++ b/drivers/misc/cxl/fault.c
> @@ -21,60 +21,63 @@
>
> #include "cxl.h"
>
> -static struct cxl_sste* find_free_sste(struct cxl_sste *primary_group,
> - bool sec_hash,
> - struct cxl_sste *secondary_group,
> - unsigned int *lru)
> +static bool sste_matches(struct cxl_sste *sste, struct copro_slb *slb)
> {
> - unsigned int i, entry;
> - struct cxl_sste *sste, *group = primary_group;
> -
> - for (i = 0; i < 2; i++) {
> - for (entry = 0; entry < 8; entry++) {
> - sste = group + entry;
> - if (!(be64_to_cpu(sste->esid_data) & SLB_ESID_V))
> - return sste;
> - }
> - if (!sec_hash)
> - break;
> - group = secondary_group;
> + return ((sste->vsid_data == cpu_to_be64(slb->vsid)) &&
> + (sste->esid_data == cpu_to_be64(slb->esid)));
> +}
> +
> +/* This finds a free SSTE and checks to see if it's already in table */
> +static struct cxl_sste* find_free_sste(struct cxl_context *ctx,
> + struct copro_slb *slb)
the name is confusing. If you want to keep the name, can you also
specify that it return NULL, if it finds a matching entry. IIUC that
is the real part of the fix for the problem mentioned ?
> +{
> + struct cxl_sste *primary, *sste, *ret = NULL;
> + unsigned int mask = (ctx->sst_size >> 7) - 1; /* SSTP0[SegTableSize] */
> + unsigned int entry;
> + unsigned int hash;
> +
> + if (slb->vsid & SLB_VSID_B_1T)
> + hash = (slb->esid >> SID_SHIFT_1T) & mask;
> + else /* 256M */
> + hash = (slb->esid >> SID_SHIFT) & mask;
> +
> + primary = ctx->sstp + (hash << 3);
> + sste = primary;
> +
> + for (entry = 0; entry < 8; entry++) {
> + if (!ret && !(be64_to_cpu(sste->esid_data) & SLB_ESID_V))
> + ret = sste;
> + if (sste_matches(sste, slb))
> + return NULL;
> + sste++;
> }
> + if (ret)
> + return ret;
> +
> /* Nothing free, select an entry to cast out */
> - if (sec_hash && (*lru & 0x8))
> - sste = secondary_group + (*lru & 0x7);
> - else
> - sste = primary_group + (*lru & 0x7);
> - *lru = (*lru + 1) & 0xf;
> + ret = primary + ctx->sst_lru;
> + ctx->sst_lru = (ctx->sst_lru + 1) & 0x7;
>
> - return sste;
> + return ret;
> }
>
> static void cxl_load_segment(struct cxl_context *ctx, struct copro_slb *slb)
> {
> /* mask is the group index, we search primary and secondary here. */
> - unsigned int mask = (ctx->sst_size >> 7)-1; /* SSTP0[SegTableSize] */
> - bool sec_hash = 1;
> struct cxl_sste *sste;
> - unsigned int hash;
> unsigned long flags;
>
> -
> - sec_hash = !!(cxl_p1n_read(ctx->afu, CXL_PSL_SR_An) & CXL_PSL_SR_An_SC);
> -
> - if (slb->vsid & SLB_VSID_B_1T)
> - hash = (slb->esid >> SID_SHIFT_1T) & mask;
> - else /* 256M */
> - hash = (slb->esid >> SID_SHIFT) & mask;
> -
> spin_lock_irqsave(&ctx->sste_lock, flags);
> - sste = find_free_sste(ctx->sstp + (hash << 3), sec_hash,
> - ctx->sstp + ((~hash & mask) << 3), &ctx->sst_lru);
> + sste = find_free_sste(ctx, slb);
> + if (!sste)
> + goto out_unlock;
>
> pr_devel("CXL Populating SST[%li]: %#llx %#llx\n",
> sste - ctx->sstp, slb->vsid, slb->esid);
>
> sste->vsid_data = cpu_to_be64(slb->vsid);
> sste->esid_data = cpu_to_be64(slb->esid);
> +out_unlock:
> spin_unlock_irqrestore(&ctx->sste_lock, flags);
> }
>
> diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
> index 623286a..d47532e 100644
> --- a/drivers/misc/cxl/native.c
> +++ b/drivers/misc/cxl/native.c
> @@ -417,7 +417,7 @@ static int attach_afu_directed(struct cxl_context *ctx, u64 wed, u64 amr)
> ctx->elem->haurp = 0; /* disable */
> ctx->elem->sdr = cpu_to_be64(mfspr(SPRN_SDR1));
>
> - sr = CXL_PSL_SR_An_SC;
> + sr = 0;
> if (ctx->master)
> sr |= CXL_PSL_SR_An_MP;
> if (mfspr(SPRN_LPCR) & LPCR_TC)
> @@ -508,7 +508,7 @@ static int attach_dedicated(struct cxl_context *ctx, u64 wed, u64 amr)
> u64 sr;
> int rc;
>
> - sr = CXL_PSL_SR_An_SC;
> + sr = 0;
What is this change about ?
> set_endian(sr);
> if (ctx->master)
> sr |= CXL_PSL_SR_An_MP;
> --
> 2.1.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/