[PATCH v4 3/4] mtd: add ECC error accounting for each read request

From: Michał Kępień
Date: Wed Jun 29 2022 - 09:13:30 EST


Extend struct mtd_req_stats with two new fields holding the number of
corrected bitflips and uncorrectable errors detected during a read
operation. This is a prerequisite for ultimately passing those counters
to user space, where they can be useful to applications for making
better-informed choices about moving data around.

Unlike 'max_bitflips' (which is set - in a common code path - to the
return value of a function called while the MTD device's mutex is held),
these counters have to be maintained in each MTD driver which defines
the '_read_oob' callback because the statistics need to be calculated
while the MTD device's mutex is held.

Suggested-by: Boris Brezillon <boris.brezillon@xxxxxxxxxxxxx>
Signed-off-by: Michał Kępień <kernel@xxxxxxxxxx>
---
drivers/mtd/devices/docg3.c | 8 ++++++++
drivers/mtd/nand/onenand/onenand_base.c | 12 ++++++++++++
drivers/mtd/nand/raw/nand_base.c | 10 ++++++++++
drivers/mtd/nand/spi/core.c | 10 ++++++++++
include/linux/mtd/mtd.h | 2 ++
5 files changed, 42 insertions(+)

diff --git a/drivers/mtd/devices/docg3.c b/drivers/mtd/devices/docg3.c
index 5b0ae5ddad74..3783ae5c6d23 100644
--- a/drivers/mtd/devices/docg3.c
+++ b/drivers/mtd/devices/docg3.c
@@ -871,6 +871,7 @@ static int doc_read_oob(struct mtd_info *mtd, loff_t from,
u8 *buf = ops->datbuf;
size_t len, ooblen, nbdata, nboob;
u8 hwecc[DOC_ECC_BCH_SIZE], eccconf1;
+ struct mtd_ecc_stats old_stats;
int max_bitflips = 0;

if (buf)
@@ -895,6 +896,7 @@ static int doc_read_oob(struct mtd_info *mtd, loff_t from,
ret = 0;
skip = from % DOC_LAYOUT_PAGE_SIZE;
mutex_lock(&docg3->cascade->lock);
+ old_stats = mtd->ecc_stats;
while (ret >= 0 && (len > 0 || ooblen > 0)) {
calc_block_sector(from - skip, &block0, &block1, &page, &ofs,
docg3->reliable);
@@ -966,6 +968,12 @@ static int doc_read_oob(struct mtd_info *mtd, loff_t from,
}

out:
+ if (ops->stats) {
+ ops->stats->uncorrectable_errors +=
+ mtd->ecc_stats.failed - old_stats.failed;
+ ops->stats->corrected_bitflips +=
+ mtd->ecc_stats.corrected - old_stats.corrected;
+ }
mutex_unlock(&docg3->cascade->lock);
return ret;
err_in_read:
diff --git a/drivers/mtd/nand/onenand/onenand_base.c b/drivers/mtd/nand/onenand/onenand_base.c
index 5810104420a2..f66385faf631 100644
--- a/drivers/mtd/nand/onenand/onenand_base.c
+++ b/drivers/mtd/nand/onenand/onenand_base.c
@@ -1440,6 +1440,7 @@ static int onenand_read_oob(struct mtd_info *mtd, loff_t from,
struct mtd_oob_ops *ops)
{
struct onenand_chip *this = mtd->priv;
+ struct mtd_ecc_stats old_stats;
int ret;

switch (ops->mode) {
@@ -1453,12 +1454,23 @@ static int onenand_read_oob(struct mtd_info *mtd, loff_t from,
}

onenand_get_device(mtd, FL_READING);
+
+ old_stats = mtd->ecc_stats;
+
if (ops->datbuf)
ret = ONENAND_IS_4KB_PAGE(this) ?
onenand_mlc_read_ops_nolock(mtd, from, ops) :
onenand_read_ops_nolock(mtd, from, ops);
else
ret = onenand_read_oob_nolock(mtd, from, ops);
+
+ if (ops->stats) {
+ ops->stats->uncorrectable_errors +=
+ mtd->ecc_stats.failed - old_stats.failed;
+ ops->stats->corrected_bitflips +=
+ mtd->ecc_stats.corrected - old_stats.corrected;
+ }
+
onenand_release_device(mtd);

return ret;
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index 6b67b7dfe7ce..3e20de1e145c 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -3818,6 +3818,7 @@ static int nand_read_oob(struct mtd_info *mtd, loff_t from,
struct mtd_oob_ops *ops)
{
struct nand_chip *chip = mtd_to_nand(mtd);
+ struct mtd_ecc_stats old_stats;
int ret;

ops->retlen = 0;
@@ -3829,11 +3830,20 @@ static int nand_read_oob(struct mtd_info *mtd, loff_t from,

nand_get_device(chip);

+ old_stats = mtd->ecc_stats;
+
if (!ops->datbuf)
ret = nand_do_read_oob(chip, from, ops);
else
ret = nand_do_read_ops(chip, from, ops);

+ if (ops->stats) {
+ ops->stats->uncorrectable_errors +=
+ mtd->ecc_stats.failed - old_stats.failed;
+ ops->stats->corrected_bitflips +=
+ mtd->ecc_stats.corrected - old_stats.corrected;
+ }
+
nand_release_device(chip);
return ret;
}
diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c
index d5b685d1605e..90d8a88c9326 100644
--- a/drivers/mtd/nand/spi/core.c
+++ b/drivers/mtd/nand/spi/core.c
@@ -635,6 +635,7 @@ static int spinand_mtd_read(struct mtd_info *mtd, loff_t from,
{
struct spinand_device *spinand = mtd_to_spinand(mtd);
struct nand_device *nand = mtd_to_nanddev(mtd);
+ struct mtd_ecc_stats old_stats;
unsigned int max_bitflips = 0;
struct nand_io_iter iter;
bool disable_ecc = false;
@@ -646,6 +647,8 @@ static int spinand_mtd_read(struct mtd_info *mtd, loff_t from,

mutex_lock(&spinand->lock);

+ old_stats = mtd->ecc_stats;
+
nanddev_io_for_each_page(nand, NAND_PAGE_READ, from, ops, &iter) {
if (disable_ecc)
iter.req.mode = MTD_OPS_RAW;
@@ -668,6 +671,13 @@ static int spinand_mtd_read(struct mtd_info *mtd, loff_t from,
ops->oobretlen += iter.req.ooblen;
}

+ if (ops->stats) {
+ ops->stats->uncorrectable_errors +=
+ mtd->ecc_stats.failed - old_stats.failed;
+ ops->stats->corrected_bitflips +=
+ mtd->ecc_stats.corrected - old_stats.corrected;
+ }
+
mutex_unlock(&spinand->lock);

if (ecc_failed && !ret)
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index fccad1766458..c12a5930f32c 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -41,6 +41,8 @@ struct mtd_erase_region_info {
};

struct mtd_req_stats {
+ unsigned int uncorrectable_errors;
+ unsigned int corrected_bitflips;
unsigned int max_bitflips;
};

--
2.37.0