Re: [PATCH v4 05/12] pstore/blk: Add support for pmsg frontend

From: WeiXiong Liao
Date: Sat May 09 2020 - 00:38:28 EST


hi Kees Cook,

On 2020/5/8 PM 2:39, Kees Cook wrote:
> From: WeiXiong Liao <liaoweixiong@xxxxxxxxxxxxxxxxx>
>
> Add pmsg support to pstore/blk (through pstore/zone). To enable, pmsg_size
> must be greater than 0 and a multiple of 4096.
>
> Signed-off-by: WeiXiong Liao <liaoweixiong@xxxxxxxxxxxxxxxxx>
> Link: https://lore.kernel.org/r/1585126506-18635-5-git-send-email-liaoweixiong@xxxxxxxxxxxxxxxxx
> Signed-off-by: Kees Cook <keescook@xxxxxxxxxxxx>
> ---
> fs/pstore/Kconfig | 12 ++
> fs/pstore/blk.c | 9 ++
> fs/pstore/zone.c | 268 ++++++++++++++++++++++++++++++++++--
> include/linux/pstore_zone.h | 2 +
> 4 files changed, 281 insertions(+), 10 deletions(-)
>
> diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig
> index 92ba73bd0b62..f18cd126d83f 100644
> --- a/fs/pstore/Kconfig
> +++ b/fs/pstore/Kconfig
> @@ -224,3 +224,15 @@ config PSTORE_BLK_MAX_REASON
>
> NOTE that, both Kconfig and module parameters can configure
> pstore/blk, but module parameters have priority over Kconfig.
> +
> +config PSTORE_BLK_PMSG_SIZE
> + int "Size in Kbytes of pmsg to store"
> + depends on PSTORE_BLK
> + depends on PSTORE_PMSG
> + default 64
> + help
> + This just sets size of pmsg (pmsg_size) for pstore/blk. The size is
> + in KB and must be a multiple of 4.
> +
> + NOTE that, both Kconfig and module parameters can configure
> + pstore/blk, but module parameters have priority over Kconfig.
> diff --git a/fs/pstore/blk.c b/fs/pstore/blk.c
> index d1c3074aa128..401e5ba66a5f 100644
> --- a/fs/pstore/blk.c
> +++ b/fs/pstore/blk.c
> @@ -24,6 +24,14 @@ module_param(max_reason, int, 0400);
> MODULE_PARM_DESC(max_reason,
> "maximum reason for kmsg dump (default 2: Oops and Panic)");
>
> +#if IS_ENABLED(CONFIG_PSTORE_PMSG)
> +static long pmsg_size = CONFIG_PSTORE_BLK_PMSG_SIZE;
> +#else
> +static long pmsg_size = -1;
> +#endif
> +module_param(pmsg_size, long, 0400);
> +MODULE_PARM_DESC(pmsg_size, "pmsg size in kbytes");
> +
> /*
> * blkdev - The block device to use.
> *
> @@ -124,6 +132,7 @@ static int psblk_register_do(struct psblk_device *dev)
> }
>
> verify_size(kmsg_size, 4096, dev->flags & PSTORE_FLAGS_DMESG);
> + verify_size(pmsg_size, 4096, dev->flags & PSTORE_FLAGS_PMSG);
> #undef verify_size
>
> pstore_zone_info->total_size = dev->total_size;
> diff --git a/fs/pstore/zone.c b/fs/pstore/zone.c
> index 6c25c443c8e2..f472b06a6c14 100644
> --- a/fs/pstore/zone.c
> +++ b/fs/pstore/zone.c
> @@ -23,12 +23,14 @@
> *
> * @sig: signature to indicate header (PSZ_SIG xor PSZONE-type value)
> * @datalen: length of data in @data
> + * @start: offset into @data where the beginning of the stored bytes begin
> * @data: zone data.
> */
> struct psz_buffer {
> #define PSZ_SIG (0x43474244) /* DBGC */
> uint32_t sig;
> atomic_t datalen;
> + atomic_t start;
> uint8_t data[];
> };
>
> @@ -84,9 +86,11 @@ struct pstore_zone {
> * struct psz_context - all about running state of pstore/zone
> *
> * @opszs: oops/panic storage zones
> + * @ppsz: pmsg storage zone
> * @oops_max_cnt: max count of @opszs
> * @oops_read_cnt: counter to read oops zone
> * @oops_write_cnt: counter to write
> + * @pmsg_read_cnt: counter to read pmsg zone
> * @oops_counter: counter to oops
> * @panic_counter: counter to panic
> * @recovered: whether finish recovering data from storage
> @@ -97,9 +101,11 @@ struct pstore_zone {
> */
> struct psz_context {
> struct pstore_zone **opszs;
> + struct pstore_zone *ppsz;
> unsigned int oops_max_cnt;
> unsigned int oops_read_cnt;
> unsigned int oops_write_cnt;
> + unsigned int pmsg_read_cnt;
> /*
> * the counter should be recovered when recover.
> * It records the oops/panic times after burning rather than booting.
> @@ -139,6 +145,11 @@ static inline int buffer_datalen(struct pstore_zone *zone)
> return atomic_read(&zone->buffer->datalen);
> }
>
> +static inline int buffer_start(struct pstore_zone *zone)
> +{
> + return atomic_read(&zone->buffer->start);
> +}
> +
> static inline bool is_on_panic(void)
> {
> struct psz_context *cxt = &psz_cxt;
> @@ -146,10 +157,10 @@ static inline bool is_on_panic(void)
> return atomic_read(&cxt->on_panic);
> }
>
> -static ssize_t psz_zone_read(struct pstore_zone *zone, char *buf,
> +static ssize_t psz_zone_read_buffer(struct pstore_zone *zone, char *buf,
> size_t len, unsigned long off)
> {
> - if (!buf || !zone->buffer)
> + if (!buf || !zone || !zone->buffer)
> return -EINVAL;
> if (off > zone->buffer_size)
> return -EINVAL;
> @@ -158,6 +169,18 @@ static ssize_t psz_zone_read(struct pstore_zone *zone, char *buf,
> return len;
> }
>
> +static int psz_zone_read_oldbuf(struct pstore_zone *zone, char *buf,
> + size_t len, unsigned long off)
> +{
> + if (!buf || !zone || !zone->oldbuf)
> + return -EINVAL;
> + if (off > zone->buffer_size)
> + return -EINVAL;
> + len = min_t(size_t, len, zone->buffer_size - off);
> + memcpy(buf, zone->oldbuf->data + off, len);
> + return 0;
> +}
> +
> static int psz_zone_write(struct pstore_zone *zone,
> enum psz_flush_mode flush_mode, const char *buf,
> size_t len, unsigned long off)
> @@ -413,6 +436,93 @@ static int psz_recover_oops(struct psz_context *cxt)
> return ret;
> }
>
> +static int psz_recover_zone(struct psz_context *cxt, struct pstore_zone *zone)
> +{
> + struct pstore_zone_info *info = cxt->pstore_zone_info;
> + struct psz_buffer *oldbuf, tmpbuf;
> + int ret = 0;
> + char *buf;
> + ssize_t rcnt, len, start, off;
> +
> + if (!zone || zone->oldbuf)
> + return 0;
> +
> + if (is_on_panic()) {
> + /* save data as much as possible */
> + psz_flush_dirty_zone(zone);
> + return 0;
> + }
> +
> + if (unlikely(!info->read))
> + return -EINVAL;
> +
> + len = sizeof(struct psz_buffer);
> + rcnt = info->read((char *)&tmpbuf, len, zone->off);
> + if (rcnt != len) {
> + pr_debug("read zone %s failed\n", zone->name);
> + return (int)rcnt < 0 ? (int)rcnt : -EIO;
> + }
> +
> + if (tmpbuf.sig != zone->buffer->sig) {
> + pr_debug("no valid data in zone %s\n", zone->name);
> + return 0;
> + }
> +
> + if (zone->buffer_size < atomic_read(&tmpbuf.datalen) ||
> + zone->buffer_size < atomic_read(&tmpbuf.start)) {
> + pr_info("found overtop zone: %s: off %lld, size %zu\n",
> + zone->name, zone->off, zone->buffer_size);
> + /* just keep going */
> + return 0;
> + }
> +
> + if (!atomic_read(&tmpbuf.datalen)) {
> + pr_debug("found erased zone: %s: off %lld, size %zu, datalen %d\n",
> + zone->name, zone->off, zone->buffer_size,
> + atomic_read(&tmpbuf.datalen));
> + return 0;
> + }
> +
> + pr_debug("found nice zone: %s: off %lld, size %zu, datalen %d\n",
> + zone->name, zone->off, zone->buffer_size,
> + atomic_read(&tmpbuf.datalen));
> +
> + len = atomic_read(&tmpbuf.datalen) + sizeof(*oldbuf);
> + oldbuf = kzalloc(len, GFP_KERNEL);
> + if (!oldbuf)
> + return -ENOMEM;
> +
> + memcpy(oldbuf, &tmpbuf, sizeof(*oldbuf));
> + buf = (char *)oldbuf + sizeof(*oldbuf);
> + len = atomic_read(&oldbuf->datalen);
> + start = atomic_read(&oldbuf->start);
> + off = zone->off + sizeof(*oldbuf);
> +
> + /* get part of data */
> + rcnt = info->read(buf, len - start, off + start);
> + if (rcnt != len - start) {
> + pr_err("read zone %s failed\n", zone->name);
> + ret = (int)rcnt < 0 ? (int)rcnt : -EIO;
> + goto free_oldbuf;
> + }
> +
> + /* get the rest of data */
> + rcnt = info->read(buf + len - start, start, off);
> + if (rcnt != start) {
> + pr_err("read zone %s failed\n", zone->name);
> + ret = (int)rcnt < 0 ? (int)rcnt : -EIO;
> + goto free_oldbuf;
> + }
> +
> + zone->oldbuf = oldbuf;
> + psz_flush_dirty_zone(zone);
> + return 0;
> +
> +free_oldbuf:
> + kfree(oldbuf);
> + return ret;
> +}
> +
> /**
> * psz_recovery() - recover data from storage
> * @cxt: the context of pstore/zone
> @@ -432,6 +542,10 @@ static inline int psz_recovery(struct psz_context *cxt)
> if (ret)
> goto recover_fail;
>
> + ret = psz_recover_zone(cxt, cxt->ppsz);
> + if (ret)
> + goto recover_fail;
> +
> pr_debug("recover end!\n");
> atomic_set(&cxt->recovered, 1);
> return 0;
> @@ -446,9 +560,17 @@ static int psz_pstore_open(struct pstore_info *psi)
> struct psz_context *cxt = psi->data;
>
> cxt->oops_read_cnt = 0;
> + cxt->pmsg_read_cnt = 0;
> return 0;
> }
>
> +static inline bool psz_old_ok(struct pstore_zone *zone)
> +{
> + if (zone && zone->oldbuf && atomic_read(&zone->oldbuf->datalen))
> + return true;
> + return false;
> +}
> +
> static inline bool psz_ok(struct pstore_zone *zone)
> {
> if (zone && zone->buffer && buffer_datalen(zone))
> @@ -473,6 +595,25 @@ static inline int psz_oops_erase(struct psz_context *cxt,
> return psz_zone_write(zone, FLUSH_META, NULL, 0, 0);
> }
>
> +static inline int psz_record_erase(struct psz_context *cxt,
> + struct pstore_zone *zone)
> +{
> + if (unlikely(!psz_old_ok(zone)))
> + return 0;
> +
> + kfree(zone->oldbuf);
> + zone->oldbuf = NULL;
> + /*
> + * if there are new data in zone buffer, that means the old data
> + * are already invalid. It is no need to flush 0 (erase) to
> + * block device.
> + */
> + if (!buffer_datalen(zone))
> + return psz_zone_write(zone, FLUSH_META, NULL, 0, 0);
> + psz_flush_dirty_zone(zone);
> + return 0;
> +}
> +
> static int psz_pstore_erase(struct pstore_record *record)
> {
> struct psz_context *cxt = record->psi->data;
> @@ -482,6 +623,8 @@ static int psz_pstore_erase(struct pstore_record *record)
> if (record->id >= cxt->oops_max_cnt)
> return -EINVAL;
> return psz_oops_erase(cxt, cxt->opszs[record->id], record);
> + case PSTORE_TYPE_PMSG:
> + return psz_record_erase(cxt, cxt->ppsz);
> default:
> return -EINVAL;
> }
> @@ -502,8 +645,10 @@ static void psz_write_kmsg_hdr(struct pstore_zone *zone,
> hdr->reason = record->reason;
> if (hdr->reason == KMSG_DUMP_OOPS)
> hdr->counter = ++cxt->oops_counter;
> - else
> + else if (hdr->reason == KMSG_DUMP_PANIC)
> hdr->counter = ++cxt->panic_counter;
> + else
> + hdr->counter = 0;
> }
>
> static inline int notrace psz_oops_write_record(struct psz_context *cxt,
> @@ -553,6 +698,53 @@ static int notrace psz_oops_write(struct psz_context *cxt,

I think we should also try to flush pmsg zone if it's dirty in case of panic
and lost data.

@@ -690,8 +690,9 @@ static int notrace psz_oops_write(struct psz_context
*cxt,

ret = psz_oops_write_record(cxt, record);
if (!ret) {
- pr_debug("try to flush other dirty oops zones\n");
+ pr_debug("try to flush other dirty zones\n");
psz_flush_dirty_zones(cxt->opszs, cxt->oops_max_cnt);
+ psz_flush_dirty_zone(cxt->ppsz);
}

/* always return 0 as we had handled it on buffer */

> return 0;
> }
>
> +static int notrace psz_record_write(struct pstore_zone *zone,
> + struct pstore_record *record)
> +{
> + size_t start, rem;
> + int cnt = record->size;
> + bool is_full_data = false;
> + char *buf = record->buf;
> +
> + if (!zone || !record)
> + return -ENOSPC;
> +
> + if (atomic_read(&zone->buffer->datalen) >= zone->buffer_size)
> + is_full_data = true;
> +
> + if (unlikely(cnt > zone->buffer_size)) {
> + buf += cnt - zone->buffer_size;
> + cnt = zone->buffer_size;
> + }
> +
> + start = buffer_start(zone);
> + rem = zone->buffer_size - start;
> + if (unlikely(rem < cnt)) {
> + psz_zone_write(zone, FLUSH_PART, buf, rem, start);
> + buf += rem;
> + cnt -= rem;
> + start = 0;
> + is_full_data = true;
> + }
> +
> + atomic_set(&zone->buffer->start, cnt + start);
> + psz_zone_write(zone, FLUSH_PART, buf, cnt, start);
> +
> + /**
> + * psz_zone_write will set datalen as start + cnt.
> + * It work if actual data length lesser than buffer size.
> + * If data length greater than buffer size, pmsg will rewrite to
> + * beginning of zone, which make buffer->datalen wrongly.
> + * So we should reset datalen as buffer size once actual data length
> + * greater than buffer size.
> + */
> + if (is_full_data) {
> + atomic_set(&zone->buffer->datalen, zone->buffer_size);
> + psz_zone_write(zone, FLUSH_META, NULL, 0, 0);
> + }
> + return 0;
> +}
> +
> static int notrace psz_pstore_write(struct pstore_record *record)
> {
> struct psz_context *cxt = record->psi->data;
> @@ -564,6 +756,8 @@ static int notrace psz_pstore_write(struct pstore_record *record)
> switch (record->type) {
> case PSTORE_TYPE_DMESG:
> return psz_oops_write(cxt, record);
> + case PSTORE_TYPE_PMSG:
> + return psz_record_write(cxt->ppsz, record);
> default:
> return -EINVAL;
> }
> @@ -579,6 +773,13 @@ static struct pstore_zone *psz_read_next_zone(struct psz_context *cxt)
> return zone;
> }
>
> + if (cxt->pmsg_read_cnt == 0) {
> + cxt->pmsg_read_cnt++;
> + zone = cxt->ppsz;
> + if (psz_old_ok(zone))
> + return zone;
> + }
> +
> return NULL;
> }
>
> @@ -629,7 +830,7 @@ static ssize_t psz_oops_read(struct pstore_zone *zone,
> return -ENOMEM;
> }
>
> - size = psz_zone_read(zone, record->buf + hlen, size,
> + size = psz_zone_read_buffer(zone, record->buf + hlen, size,
> sizeof(struct psz_oops_header) < 0);
> if (unlikely(size < 0)) {
> kfree(record->buf);
> @@ -639,6 +840,32 @@ static ssize_t psz_oops_read(struct pstore_zone *zone,
> return size + hlen;
> }
>
> +static ssize_t psz_record_read(struct pstore_zone *zone,
> + struct pstore_record *record)
> +{
> + size_t len;
> + struct psz_buffer *buf;
> +
> + if (!zone || !record)
> + return -ENOSPC;
> +
> + buf = (struct psz_buffer *)zone->oldbuf;
> + if (!buf)
> + return -ENOMSG;
> +
> + len = atomic_read(&buf->datalen);
> + record->buf = kmalloc(len, GFP_KERNEL);
> + if (!record->buf)
> + return -ENOMEM;
> +
> + if (unlikely(psz_zone_read_oldbuf(zone, record->buf, len, 0))) {
> + kfree(record->buf);
> + return -ENOMSG;
> + }
> +
> + return len;
> +}
> +
> static ssize_t psz_pstore_read(struct pstore_record *record)
> {
> struct psz_context *cxt = record->psi->data;
> @@ -663,6 +890,9 @@ static ssize_t psz_pstore_read(struct pstore_record *record)
> readop = psz_oops_read;
> record->id = cxt->oops_read_cnt - 1;
> break;
> + case PSTORE_TYPE_PMSG:
> + readop = psz_record_read;
> + break;
> default:
> goto next_zone;
> }
> @@ -718,8 +948,10 @@ static struct pstore_zone *psz_init_zone(enum pstore_type_id type,
> zone->type = type;
> zone->buffer_size = size - sizeof(struct psz_buffer);
> zone->buffer->sig = type ^ PSZ_SIG;
> + zone->oldbuf = NULL;
> atomic_set(&zone->dirty, 0);
> atomic_set(&zone->buffer->datalen, 0);
> + atomic_set(&zone->buffer->start, 0);
>
> *off += size;
>
> @@ -803,6 +1035,8 @@ static void psz_free_all_zones(struct psz_context *cxt)
> {
> if (cxt->opszs)
> psz_free_zones(&cxt->opszs, &cxt->oops_max_cnt);
> + if (cxt->ppsz)
> + psz_free_zone(&cxt->ppsz);
> }
>
> static int psz_alloc_zones(struct psz_context *cxt)
> @@ -810,18 +1044,26 @@ static int psz_alloc_zones(struct psz_context *cxt)
> struct pstore_zone_info *info = cxt->pstore_zone_info;
> loff_t off = 0;
> int err;
> - size_t size;
> + size_t off_size = 0;
>
> - size = info->total_size;
> - cxt->opszs = psz_init_zones(PSTORE_TYPE_DMESG, &off, size,
> + off_size += info->pmsg_size;
> + cxt->ppsz = psz_init_zone(PSTORE_TYPE_PMSG, &off, info->pmsg_size);
> + if (IS_ERR(cxt->ppsz)) {
> + err = PTR_ERR(cxt->ppsz);
> + goto free_out;
> + }
> +
> + cxt->opszs = psz_init_zones(PSTORE_TYPE_DMESG, &off,
> + info->total_size - off_size,
> info->kmsg_size, &cxt->oops_max_cnt);
> if (IS_ERR(cxt->opszs)) {
> err = PTR_ERR(cxt->opszs);
> - goto fail_out;
> + goto free_out;
> }
>
> return 0;
> -fail_out:
> +free_out:
> + psz_free_all_zones(cxt);
> return err;
> }
>
> @@ -844,7 +1086,7 @@ int register_pstore_zone(struct pstore_zone_info *info)
> return -EINVAL;
> }
>
> - if (!info->kmsg_size) {
> + if (!info->kmsg_size && !info->pmsg_size) {
> pr_warn("at least one of the records be non-zero\n");
> return -EINVAL;
> }
> @@ -871,6 +1113,7 @@ int register_pstore_zone(struct pstore_zone_info *info)
>
> check_size(total_size, 4096);
> check_size(kmsg_size, SECTOR_SIZE);
> + check_size(pmsg_size, SECTOR_SIZE);
>
> #undef check_size
>
> @@ -897,6 +1140,7 @@ int register_pstore_zone(struct pstore_zone_info *info)
> pr_debug("register %s with properties:\n", info->name);
> pr_debug("\ttotal size : %ld Bytes\n", info->total_size);
> pr_debug("\toops size : %ld Bytes\n", info->kmsg_size);
> + pr_debug("\tpmsg size : %ld Bytes\n", info->pmsg_size);
>
> err = psz_alloc_zones(cxt);
> if (err) {
> @@ -925,6 +1169,10 @@ int register_pstore_zone(struct pstore_zone_info *info)
> pr_cont(",panic_write");
> pr_cont(")");
> }
> + if (info->pmsg_size) {
> + cxt->pstore.flags |= PSTORE_FLAGS_PMSG;
> + pr_cont(" pmsg");
> + }
> pr_cont("\n");
>
> err = pstore_register(&cxt->pstore);
> diff --git a/include/linux/pstore_zone.h b/include/linux/pstore_zone.h
> index a6a79ff1351b..39c2cb944123 100644
> --- a/include/linux/pstore_zone.h
> +++ b/include/linux/pstore_zone.h
> @@ -17,6 +17,7 @@ typedef ssize_t (*psz_write_op)(const char *, size_t, loff_t);
> * @kmsg_size: The size of oops/panic zone. Zero means disabled, otherwise,
> * it must be multiple of SECTOR_SIZE(512 Bytes).
> * @max_reason: Maximum kmsg dump reason to store.
> + * @pmsg_size: The size of pmsg zone which is the same as @kmsg_size.
> * @read: The general read operation. Both of the function parameters
> * @size and @offset are relative value to storage.
> * On success, the number of bytes should be returned, others
> @@ -33,6 +34,7 @@ struct pstore_zone_info {
> unsigned long total_size;
> unsigned long kmsg_size;
> int max_reason;
> + unsigned long pmsg_size;
> psz_read_op read;
> psz_write_op write;
> psz_write_op panic_write;
>

--
WeiXiong Liao