[PATCH 02/22] mpool: add in-memory struct definitions
From: nmeeramohide
Date: Mon Sep 28 2020 - 12:47:45 EST
From: Nabeel M Mohamed <nmeeramohide@xxxxxxxxxx>
Add headers containing the basic in-memory structures used by mpool.
- mclass.h: media classes
- mlog.h: mlog objects
- mp.h, mpcore.h: mpool objects
- params.h: mpool parameters
- pd.h: pool drive interface
- pmd.h, pmd_obj.h: Metadata manager
- sb.h: superblock interface
- smap.h: space map interface
Co-developed-by: Greg Becker <gbecker@xxxxxxxxxx>
Signed-off-by: Greg Becker <gbecker@xxxxxxxxxx>
Co-developed-by: Pierre Labat <plabat@xxxxxxxxxx>
Signed-off-by: Pierre Labat <plabat@xxxxxxxxxx>
Co-developed-by: John Groves <jgroves@xxxxxxxxxx>
Signed-off-by: John Groves <jgroves@xxxxxxxxxx>
Signed-off-by: Nabeel M Mohamed <nmeeramohide@xxxxxxxxxx>
---
drivers/mpool/mclass.h | 137 +++++++++++
drivers/mpool/mlog.h | 212 +++++++++++++++++
drivers/mpool/mp.h | 231 +++++++++++++++++++
drivers/mpool/mpcore.h | 354 ++++++++++++++++++++++++++++
drivers/mpool/params.h | 116 ++++++++++
drivers/mpool/pd.h | 202 ++++++++++++++++
drivers/mpool/pmd.h | 379 ++++++++++++++++++++++++++++++
drivers/mpool/pmd_obj.h | 499 ++++++++++++++++++++++++++++++++++++++++
drivers/mpool/sb.h | 162 +++++++++++++
drivers/mpool/smap.h | 334 +++++++++++++++++++++++++++
10 files changed, 2626 insertions(+)
create mode 100644 drivers/mpool/mclass.h
create mode 100644 drivers/mpool/mlog.h
create mode 100644 drivers/mpool/mp.h
create mode 100644 drivers/mpool/mpcore.h
create mode 100644 drivers/mpool/params.h
create mode 100644 drivers/mpool/pd.h
create mode 100644 drivers/mpool/pmd.h
create mode 100644 drivers/mpool/pmd_obj.h
create mode 100644 drivers/mpool/sb.h
create mode 100644 drivers/mpool/smap.h
diff --git a/drivers/mpool/mclass.h b/drivers/mpool/mclass.h
new file mode 100644
index 000000000000..2ecdcd08de9f
--- /dev/null
+++ b/drivers/mpool/mclass.h
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2015-2020 Micron Technology, Inc. All rights reserved.
+ */
+
+#ifndef MPOOL_MCLASS_H
+#define MPOOL_MCLASS_H
+
+#include "mpool_ioctl.h"
+
+struct omf_devparm_descriptor;
+struct mpool_descriptor;
+struct mpcore_params;
+
+/*
+ * This file contains the media class structures definitions and prototypes
+ * private to mpool core.
+ */
+
+/**
+ * struct mc_parms - media class parameters
+ * @mcp_classp: class performance characteristics, enum mp_media_classp
+ * @mcp_zonepg: zone size in number of zone pages
+ * @mcp_sectorsz: 2^sectorsz is the logical sector size
+ * @mcp_devtype: device type. Enum pd_devtype.
+ * @mcp_features: ored bits from mp_mc_features
+ *
+ * Two PDs can't be placed in the same media class if they have different
+ * mc_parms.
+ */
+struct mc_parms {
+ u8 mcp_classp;
+ u32 mcp_zonepg;
+ u8 mcp_sectorsz;
+ u8 mcp_devtype;
+ u64 mcp_features;
+};
+
+/**
+ * struct mc_smap_parms - media class space map parameters
+ * @mcsp_spzone: percent spare zones for drives.
+ * @mcsp_rgnc: no. of space map zones for drives in each media class
+ * @mcsp_align: space map zone alignment for drives in each media class
+ */
+struct mc_smap_parms {
+ u8 mcsp_spzone;
+ u8 mcsp_rgnc;
+ u8 mcsp_align;
+};
+
+/**
+ * struct media_class - define a media class
+ * @mc_parms: define a media class, content differ for each media class
+ * @mc_sparms: space map params for this media class
+ * @mc_pdmc: active pdv entries grouped by media class array
+ * @mc_uacnt: UNAVAIL status drive count in each media class
+ *
+ * Locking:
+ * Protected by mp.pds_pdvlock.
+ */
+struct media_class {
+ struct mc_parms mc_parms;
+ struct mc_smap_parms mc_sparms;
+ s8 mc_pdmc;
+ u8 mc_uacnt;
+};
+
+/**
+ * mc_pd_prop2mc_parms() - Convert PD properties into media class parameters.
+ * @pd_prop: input, pd properties.
+ * @mc_parms: output, media class parameters.
+ *
+ * Typically used before a lookup (mc_lookup_from_mc_parms()) to know in
+ * which media class a PD belongs to.
+ */
+void mc_pd_prop2mc_parms(struct pd_prop *pd_prop, struct mc_parms *mc_parms);
+
+/**
+ * mc_omf_devparm2mc_parms() - convert a omf_devparm_descriptor into an mc_parms.
+ * @omf_devparm: input
+ * @mc_parms: output
+ */
+void mc_omf_devparm2mc_parms(struct omf_devparm_descriptor *omf_devparm, struct mc_parms *mc_parms);
+
+/**
+ * mc_parms2omf_devparm() - convert a mc_parms in a omf_devparm_descriptor
+ * @mc_parms: input
+ * @omf_devparm: output
+ */
+void mc_parms2omf_devparm(struct mc_parms *mc_parms, struct omf_devparm_descriptor *omf_devparm);
+
+/**
+ * mc_cmp_omf_devparm() - check if two omf_devparm_descriptor corresponds
+ * to the same media class.
+ * @omf_devparm1:
+ * @omf_devparm2:
+ *
+ * Returns 0 if in same media class.
+ */
+int mc_cmp_omf_devparm(struct omf_devparm_descriptor *omfd1, struct omf_devparm_descriptor *omfd2);
+
+/**
+ * mc_init_class() - initialize a media class
+ * @mc:
+ * @mc_parms: parameters of the media class
+ * @mcsp: smap parameters for mc
+ */
+void mc_init_class(struct media_class *mc, struct mc_parms *mc_parms, struct mc_smap_parms *mcsp);
+
+/**
+ * mc_set_spzone() - set the percent spare on the media class mclass.
+ * @mc:
+ * @spzone:
+ *
+ * Return: 0, or -ENOENT if the specified mclass doesn't exist.
+ */
+int mc_set_spzone(struct media_class *mc, u8 spzone);
+
+/**
+ * mclass_isvalid() - Return true if the media class is valid.
+ * @mclass:
+ */
+static inline bool mclass_isvalid(enum mp_media_classp mclass)
+{
+ return (mclass >= 0 && mclass < MP_MED_NUMBER);
+}
+
+/**
+ * mc_smap_parms_get() - get space map params for the specified mclass.
+ * @mp:
+ * @mclass:
+ * @mcsp: (output)
+ */
+int mc_smap_parms_get(struct media_class *mc, struct mpcore_params *params,
+ struct mc_smap_parms *mcsp);
+
+#endif /* MPOOL_MCLASS_H */
diff --git a/drivers/mpool/mlog.h b/drivers/mpool/mlog.h
new file mode 100644
index 000000000000..0de816335d55
--- /dev/null
+++ b/drivers/mpool/mlog.h
@@ -0,0 +1,212 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2015-2020 Micron Technology, Inc. All rights reserved.
+ */
+/*
+ * Defines functions for writing, reading, and managing the lifecycle of mlogs.
+ */
+
+#ifndef MPOOL_MLOG_H
+#define MPOOL_MLOG_H
+
+#include <linux/uio.h>
+
+#include "mpool_ioctl.h"
+
+#define MB (1024 * 1024)
+struct pmd_layout;
+struct mpool_descriptor;
+struct mlog_descriptor;
+
+
+/**
+ * struct mlog_read_iter -
+ * @lri_layout: Layout of log being read
+ * @lri_soff: Sector offset of next log block to read from
+ * @lri_gen: Log generation number at iterator initialization
+ * @lri_roff: Next offset in log block soff to read from
+ * @lri_rbidx: Read buffer page index currently reading from
+ * @lri_sidx: Log block index in lri_rbidx
+ * @lri_valid: 1 if iterator is valid; 0 otherwise
+ */
+struct mlog_read_iter {
+ struct pmd_layout *lri_layout;
+ off_t lri_soff;
+ u64 lri_gen;
+ u16 lri_roff;
+ u16 lri_rbidx;
+ u16 lri_sidx;
+ u8 lri_valid;
+};
+
+/**
+ * struct mlog_fsetparms -
+ *
+ * @mfp_totsec: Total number of log blocks in mlog
+ * @mfp_secpga: Is sector size page-aligned?
+ * @mfp_lpgsz: Size of each page in read/append buffer
+ * @mfp_npgmb: No. of pages in 1 MiB buffer
+ * @mfp_sectsz: Sector size obtained from PD prop
+ * @mfp_nsecmb: No. of sectors/log blocks in 1 MiB buffer
+ * @mfp_nsecpg: No. of sectors/log blocks per page
+ */
+struct mlog_fsetparms {
+ u32 mfp_totsec;
+ bool mfp_secpga;
+ u32 mfp_lpgsz;
+ u16 mfp_nlpgmb;
+ u16 mfp_sectsz;
+ u16 mfp_nsecmb;
+ u16 mfp_nseclpg;
+};
+
+/**
+ * struct mlog_stat - mlog open status (referenced by associated struct pmd_layout)
+ * @lst_citr: Current mlog read iterator
+ * @lst_mfp: Mlog flush set parameters
+ * @lst_abuf: Append buffer, max 1 MiB size
+ * @lst_rbuf: Read buffer, max 1 MiB size - immutable
+ * @lst_rsoff: LB offset of the 1st log block in lst_rbuf
+ * @lst_rseoff: LB offset of the last log block in lst_rbuf
+ * @lst_asoff: LB offset of the 1st log block in CFS
+ * @lst_wsoff: Offset of the accumulating log block
+ * @lst_abdirty: true, if append buffer is dirty
+ * @lst_pfsetid: Prev. fSetID of the first log block in CFS
+ * @lst_cfsetid: Current fSetID of the CFS
+ * @lst_cfssoff: Offset within the 1st log block from where CFS starts
+ * @lst_aoff: Next byte offset[0, sectsz) to fill in the current log block
+ * @lst_abidx: Index of current filling page in lst_abuf
+ * @lst_csem: enforce compaction semantics if true
+ * @lst_cstart: valid compaction start marker in log?
+ * @lst_cend: valid compaction end marker in log?
+ */
+struct mlog_stat {
+ struct mlog_read_iter lst_citr;
+ struct mlog_fsetparms lst_mfp;
+ char **lst_abuf;
+ char **lst_rbuf;
+ off_t lst_rsoff;
+ off_t lst_rseoff;
+ off_t lst_asoff;
+ off_t lst_wsoff;
+ bool lst_abdirty;
+ u32 lst_pfsetid;
+ u32 lst_cfsetid;
+ u16 lst_cfssoff;
+ u16 lst_aoff;
+ u16 lst_abidx;
+ u8 lst_csem;
+ u8 lst_cstart;
+ u8 lst_cend;
+};
+
+#define MLOG_TOTSEC(lstat) ((lstat)->lst_mfp.mfp_totsec)
+#define MLOG_LPGSZ(lstat) ((lstat)->lst_mfp.mfp_lpgsz)
+#define MLOG_NLPGMB(lstat) ((lstat)->lst_mfp.mfp_nlpgmb)
+#define MLOG_SECSZ(lstat) ((lstat)->lst_mfp.mfp_sectsz)
+#define MLOG_NSECMB(lstat) ((lstat)->lst_mfp.mfp_nsecmb)
+#define MLOG_NSECLPG(lstat) ((lstat)->lst_mfp.mfp_nseclpg)
+
+#define IS_SECPGA(lstat) ((lstat)->lst_mfp.mfp_secpga)
+
+/*
+ * mlog API functions
+ */
+
+/*
+ * Error codes: all mlog fns can return one or more of:
+ * -EINVAL = invalid fn args
+ * -ENOENT = log not open or logid not found
+ * -EFBIG = log full
+ * -EMSGSIZE = cstart w/o cend indicating a crash during compaction
+ * -ENODATA = malformed or corrupted log
+ * -EIO = unable to read/write log on media
+ * -ENOMEM = insufficient room in copy-out buffer
+ * -EBUSY = log is in erasing state; wait or retry erase
+ */
+
+int mlog_alloc(struct mpool_descriptor *mp, struct mlog_capacity *capreq,
+ enum mp_media_classp mclassp, struct mlog_props *prop,
+ struct mlog_descriptor **mlh);
+
+int mlog_realloc(struct mpool_descriptor *mp, u64 objid, struct mlog_capacity *capreq,
+ enum mp_media_classp mclassp, struct mlog_props *prop,
+ struct mlog_descriptor **mlh);
+
+int mlog_find_get(struct mpool_descriptor *mp, u64 objid, int which,
+ struct mlog_props *prop, struct mlog_descriptor **mlh);
+
+void mlog_put(struct mlog_descriptor *layout);
+
+void mlog_lookup_rootids(u64 *id1, u64 *id2);
+
+int mlog_commit(struct mpool_descriptor *mp, struct mlog_descriptor *mlh);
+
+int mlog_abort(struct mpool_descriptor *mp, struct mlog_descriptor *mlh);
+
+int mlog_delete(struct mpool_descriptor *mp, struct mlog_descriptor *mlh);
+
+/**
+ * mlog_open() - Open committed log, validate contents, and return its generation number
+ * @mp:
+ * @mlh:
+ * @flags:
+ * @gen: output
+ *
+ * If log is already open just returns gen; if csem is true enforces compaction
+ * semantics so that open fails if valid cstart/cend markers are not present.
+ *
+ * Returns: 0 if successful, -errno otherwise
+ */
+int mlog_open(struct mpool_descriptor *mp, struct mlog_descriptor *mlh, u8 flags, u64 *gen);
+
+int mlog_close(struct mpool_descriptor *mp, struct mlog_descriptor *mlh);
+
+int mlog_gen(struct mlog_descriptor *mlh, u64 *gen);
+
+int mlog_empty(struct mpool_descriptor *mp, struct mlog_descriptor *mlh, bool *empty);
+
+int mlog_erase(struct mpool_descriptor *mp, struct mlog_descriptor *mlh, u64 mingen);
+
+int mlog_append_cstart(struct mpool_descriptor *mp, struct mlog_descriptor *mlh);
+
+int mlog_append_cend(struct mpool_descriptor *mp, struct mlog_descriptor *mlh);
+
+int mlog_append_data(struct mpool_descriptor *mp, struct mlog_descriptor *mlh,
+ char *buf, u64 buflen, int sync);
+
+int mlog_read_data_init(struct mlog_descriptor *mlh);
+
+/**
+ * mlog_read_data_next() -
+ * @mp:
+ * @mlh:
+ * @buf:
+ * @buflen:
+ * @rdlen:
+ *
+ * Returns:
+ * If -EOVERFLOW is returned, then "buf" is too small to
+ * hold the read data. Can be retried with a bigger receive buffer whose
+ * size is returned in rdlen.
+ */
+int mlog_read_data_next(struct mpool_descriptor *mp, struct mlog_descriptor *mlh,
+ char *buf, u64 buflen, u64 *rdlen);
+
+int mlog_get_props_ex(struct mpool_descriptor *mp, struct mlog_descriptor *mlh,
+ struct mlog_props_ex *prop);
+
+void mlog_precompact_alsz(struct mpool_descriptor *mp, struct mlog_descriptor *mlh);
+
+int mlog_rw_raw(struct mpool_descriptor *mp, struct mlog_descriptor *mlh,
+ const struct kvec *iov, int iovcnt, u64 boff, u8 rw);
+
+void mlogutil_closeall(struct mpool_descriptor *mp);
+
+bool mlog_objid(u64 objid);
+
+struct pmd_layout *mlog2layout(struct mlog_descriptor *mlh);
+
+struct mlog_descriptor *layout2mlog(struct pmd_layout *layout);
+
+#endif /* MPOOL_MLOG_H */
diff --git a/drivers/mpool/mp.h b/drivers/mpool/mp.h
new file mode 100644
index 000000000000..e1570f8c8d0c
--- /dev/null
+++ b/drivers/mpool/mp.h
@@ -0,0 +1,231 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2015-2020 Micron Technology, Inc. All rights reserved.
+ */
+
+#ifndef MPOOL_MP_H
+#define MPOOL_MP_H
+
+#include "mpool_ioctl.h"
+#include "uuid.h"
+#include "params.h"
+
+struct mpool_descriptor;
+
+#define MPOOL_OP_READ 0
+#define MPOOL_OP_WRITE 1
+#define PD_DEV_ID_PDUNAVAILABLE "DID_PDUNAVAILABLE"
+
+#define MPOOL_DRIVES_MAX MP_MED_NUMBER
+#define MP_MED_ALL MP_MED_NUMBER
+
+/* Object types */
+enum mp_obj_type {
+ MP_OBJ_UNDEF = 0,
+ MP_OBJ_MBLOCK = 1,
+ MP_OBJ_MLOG = 2,
+};
+
+/**
+ * struct mpool_config -
+ * @mc_oid1:
+ * @mc_oid2:
+ * @mc_uid:
+ * @mc_gid:
+ * @mc_mode:
+ * @mc_mclassp:
+ * @mc_captgt:
+ * @mc_ra_pages_max:
+ * @mc_vma_sz_max:
+ * @mc_utype: user-defined type
+ * @mc_label: user-defined label
+
+ */
+struct mpool_config {
+ u64 mc_oid1;
+ u64 mc_oid2;
+ uid_t mc_uid;
+ gid_t mc_gid;
+ mode_t mc_mode;
+ u32 mc_rsvd0;
+ u64 mc_captgt;
+ u32 mc_ra_pages_max;
+ u32 mc_vma_size_max;
+ u32 mc_rsvd1;
+ u32 mc_rsvd2;
+ u64 mc_rsvd3;
+ u64 mc_rsvd4;
+ uuid_le mc_utype;
+ char mc_label[MPOOL_LABELSZ_MAX];
+};
+
+/*
+ * mpool API functions
+ */
+
+/**
+ * mpool_create() - Create an mpool
+ * @mpname:
+ * @flags: enum mp_mgmt_flags
+ * @dpaths:
+ * @pd_prop: PDs properties obtained by mpool_create() caller.
+ * @params: mpcore parameters
+ * @mlog_cap:
+ *
+ * Create an mpool from dcnt drive paths dpaths; store mpool metadata as
+ * specified by mdparm;
+ *
+ * Return:
+ * %0 if successful, -errno otherwise..
+ * ENODEV if insufficient number of drives meeting mdparm,
+ */
+int mpool_create(const char *name, u32 flags, char **dpaths, struct pd_prop *pd_prop,
+ struct mpcore_params *params, u64 mlog_cap);
+
+/**
+ * mpool_activate() - Activate an mpool
+ * @dcnt:
+ * @dpaths:
+ * @pd_prop: properties of the PDs. dcnt elements.
+ * @mlog_cap:
+ * @params: mpcore parameters
+ * @flags:
+ * @mpp: *mpp is set to NULL if error
+ *
+ * Activate mpool on dcnt drive paths dpaths; if force flag is set tolerate
+ * unavailable drives up to redundancy limit; if successful *mpp is a handle
+ * for the mpool.
+ *
+ * Return:
+ * %0 if successful, -errno otherwise
+ * ENODEV if too many drives unavailable or failed,
+ * ENXIO if device previously removed from mpool and is no longer a member
+ */
+int mpool_activate(u64 dcnt, char **dpaths, struct pd_prop *pd_prop, u64 mlog_cap,
+ struct mpcore_params *params, u32 flags, struct mpool_descriptor **mpp);
+
+
+/**
+ * mpool_deactivate() - Deactivate an mpool.
+ * @mp: mpool descriptor
+ *
+ * Deactivate mpool; caller must ensure no other thread can access mp; mp is
+ * invalid after call.
+ */
+int mpool_deactivate(struct mpool_descriptor *mp);
+
+/**
+ * mpool_destroy() - Destroy an mpool
+ * @dcnt:
+ * @dpaths:
+ * @pd_prop: PD properties.
+ * @flags:
+ *
+ * Destroy mpool on dcnt drive paths dpaths;
+ *
+ * Return:
+ * %0 if successful, -errno otherwise
+ */
+int mpool_destroy(u64 dcnt, char **dpaths, struct pd_prop *pd_prop, u32 flags);
+
+/**
+ * mpool_rename() - Rename mpool to mp_newname
+ * @dcnt:
+ * @dpaths:
+ * @pd_prop: PD properties.
+ * @flags:
+ * @mp_newname:
+ *
+ * Return:
+ * %0 if successful, -errno otherwise
+ */
+int mpool_rename(u64 dcnt, char **dpaths, struct pd_prop *pd_prop, u32 flags,
+ const char *mp_newname);
+
+/**
+ * mpool_drive_add() - Add new drive dpath to mpool.
+ * @mp:
+ * @dpath:
+ * @pd_prop: PD properties.
+ *
+ * Return: %0 if successful; -enno otherwise...
+ */
+int mpool_drive_add(struct mpool_descriptor *mp, char *dpath, struct pd_prop *pd_prop);
+
+/**
+ * mpool_drive_spares() - Set percent spare zones to spzone for drives in media class mclassp.
+ * @mp:
+ * @mclassp:
+ * @spzone:
+ *
+ * Return: 0 if successful, -errno otherwise...
+ */
+int mpool_drive_spares(struct mpool_descriptor *mp, enum mp_media_classp mclassp, u8 spzone);
+
+/**
+ * mpool_mclass_get_cnt() - Get a count of media classes with drives in this mpool
+ * @mp:
+ * @info:
+ */
+void mpool_mclass_get_cnt(struct mpool_descriptor *mp, u32 *cnt);
+
+/**
+ * mpool_mclass_get() - Get a information on mcl_cnt media classes
+ * @mp:
+ * @mcic:
+ * @mciv:
+ *
+ * Return: 0 if successful, -errno otherwise...
+ */
+int mpool_mclass_get(struct mpool_descriptor *mp, u32 *mcxc, struct mpool_mclass_xprops *mcxv);
+
+/**
+ * mpool_get_xprops() - Retrieve extended mpool properties
+ * @mp:
+ * @prop:
+ */
+void mpool_get_xprops(struct mpool_descriptor *mp, struct mpool_xprops *xprops);
+
+/**
+ * mpool_get_devprops_by_name() - Fill in dprop for active drive with name pdname
+ * @mp:
+ * @pdname:
+ * @dprop:
+ *
+ * Return: %0 if success, -errno otherwise...
+ * -ENOENT if device with specified name cannot be found
+ */
+int mpool_get_devprops_by_name(struct mpool_descriptor *mp, char *pdname,
+ struct mpool_devprops *dprop);
+
+/**
+ * mpool_get_usage() - Fill in stats with mpool space usage for the media class mclassp
+ * @mp:
+ * @mclassp:
+ * @usage:
+ *
+ * If mclassp is MCLASS_ALL, report on entire pool (all media classes).
+ *
+ * Return: %0 if successful; err_t otherwise...
+ */
+void
+mpool_get_usage(
+ struct mpool_descriptor *mp,
+ enum mp_media_classp mclassp,
+ struct mpool_usage *usage);
+
+/**
+ * mpool_config_store() - store a config record in MDC0
+ * @mp:
+ * @cfg:
+ */
+int mpool_config_store(struct mpool_descriptor *mp, const struct mpool_config *cfg);
+
+/**
+ * mpool_config_fetch() - fetch the current mpool config
+ * @mp:
+ * @cfg:
+ */
+int mpool_config_fetch(struct mpool_descriptor *mp, struct mpool_config *cfg);
+
+#endif /* MPOOL_MP_H */
diff --git a/drivers/mpool/mpcore.h b/drivers/mpool/mpcore.h
new file mode 100644
index 000000000000..904763d49814
--- /dev/null
+++ b/drivers/mpool/mpcore.h
@@ -0,0 +1,354 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2015-2020 Micron Technology, Inc. All rights reserved.
+ */
+
+#ifndef MPOOL_MPCORE_H
+#define MPOOL_MPCORE_H
+
+#include <linux/rbtree.h>
+#include <linux/workqueue.h>
+#include <linux/mutex.h>
+
+#include "uuid.h"
+
+#include "mp.h"
+#include "pd.h"
+#include "smap.h"
+#include "mclass.h"
+#include "pmd.h"
+#include "params.h"
+
+extern struct rb_root mpool_pools;
+
+struct pmd_layout;
+
+/**
+ * enum mpool_status -
+ * @MPOOL_STAT_UNDEF:
+ * @MPOOL_STAT_OPTIMAL:
+ * @MPOOL_STAT_FAULTED:
+ */
+enum mpool_status {
+ MPOOL_STAT_UNDEF = 0,
+ MPOOL_STAT_OPTIMAL = 1,
+ MPOOL_STAT_FAULTED = 2,
+ MPOOL_STAT_LAST = MPOOL_STAT_FAULTED,
+};
+
+_Static_assert((MPOOL_STAT_LAST < 256), "enum mpool_status must fit in u8");
+
+/**
+ * struct mpool_dev_info - Pool drive state, status, and params
+ * @pdi_devid: UUID for this drive
+ * @pdi_parm: drive parms
+ * @pdi_status: enum pd_status value: drive status
+ * @pdi_ds: drive space allocation info
+ * @pdi_rmap: per allocation zone space maps rbtree array, node:
+ * struct u64_to_u64_rb
+ * @pdi_rmlock: lock protects per zone space maps
+ * @pdi_name: device name (only the last path name component)
+ *
+ * Pool drive state, status, and params
+ *
+ * LOCKING:
+ * devid, mclass : constant; no locking required
+ * parm: constant EXCEPT in rare change of status from UNAVAIL; see below
+ * status: usage does not require locking, but MUST get/set via accessors
+ * state: protected by pdvlock in enclosing mpool_descriptor
+ * ds: protected by ds.dalock defined in smap module
+ * zmap[x]: protected by zmlock[x]
+ *
+ * parm fields are constant except in a rare change of status from UNAVAIL,
+ * during which a subset of the fields are modified. see the pd module for
+ * details on how this is handled w/o requiring locking.
+ */
+struct mpool_dev_info {
+ atomic_t pdi_status; /* Barriers or acq/rel required */
+ struct pd_dev_parm pdi_parm;
+ struct smap_dev_alloc pdi_ds;
+ struct rmbkt *pdi_rmbktv;
+ struct mpool_uuid pdi_devid;
+};
+
+/* Shortcuts */
+#define pdi_didstr pdi_parm.dpr_prop.pdp_didstr
+#define pdi_zonepg pdi_parm.dpr_prop.pdp_zparam.dvb_zonepg
+#define pdi_zonetot pdi_parm.dpr_prop.pdp_zparam.dvb_zonetot
+#define pdi_devtype pdi_parm.dpr_prop.pdp_devtype
+#define pdi_cmdopt pdi_parm.dpr_prop.pdp_cmdopt
+#define pdi_mclass pdi_parm.dpr_prop.pdp_mclassp
+#define pdi_devsz pdi_parm.dpr_prop.pdp_devsz
+#define pdi_sectorsz pdi_parm.dpr_prop.pdp_sectorsz
+#define pdi_optiosz pdi_parm.dpr_prop.pdp_optiosz
+#define pdi_fua pdi_parm.dpr_prop.pdp_fua
+#define pdi_prop pdi_parm.dpr_prop
+#define pdi_name pdi_parm.dpr_name
+
+/**
+ * struct uuid_to_mpdesc_rb -
+ * @utm_node:
+ * @utm_uuid_le:
+ * @utm_md:
+ */
+struct uuid_to_mpdesc_rb {
+ struct rb_node utm_node;
+ struct mpool_uuid utm_uuid_le;
+ struct mpool_descriptor *utm_md;
+};
+
+/**
+ * struct mpdesc_mdparm - parameters used for the MDCs of the mpool.
+ * @md_mclass: media class used for the mpool metadata
+ */
+struct mpdesc_mdparm {
+ u8 md_mclass;
+};
+
+/**
+ * struct pre_compact_ctrl - used to start/stop/control precompaction
+ * @pco_dwork:
+ * @pco_mp:
+ * @pco_nmtoc: next MDC to compact
+
+ * Each time pmd_precompact_cb() runs it will consider the next MDC
+ * for compaction.
+ */
+struct pre_compact_ctrl {
+ struct delayed_work pco_dwork;
+ struct mpool_descriptor *pco_mp;
+ atomic_t pco_nmtoc;
+};
+
+/**
+ * struct mpool_descriptor - Media pool descriptor
+ * @pds_pdvlock: drive membership/state lock
+ * @pds_pdv: per drive info array
+ * @pds_omlock: open mlog index lock
+ * @pds_oml: rbtree of open mlog layouts. indexed by objid
+ * node type: objid_to_layout_rb
+ * @pds_poolid: UUID of pool
+ * @pds_mdparm: mclass id of mclass used for mdc layouts
+ * @pds_cfg: mpool config
+ * @pds_pdvcnt: cnt of valid pdv entries
+ * @pds_mc table of media classes
+ * @pds_uctxt used by user-space mlogs to indicate the context
+ * @pds_node: for linking this object into an rbtree
+ * @pds_params: Per mpool parameters
+ * @pds_workq: Workqueue per mpool.
+ * @pds_sbmdc0: Used to store in RAM the MDC0 metadata. Loaded at activate
+ * time, changed when MDC0 is compacted.
+ * @pds_mda: metadata container array (this thing is huge!)
+ *
+ * LOCKING:
+ * poolid, ospagesz, mdparm: constant; no locking required
+ * mda: protected by internal locks as documented in pmd module
+ * oml: protected by omlock
+ * pdv: see note
+ * pds_mc: protected by pds_pdvlock
+ * Update of pds_mc[].mc_sparams.mc_spzone must also be enclosed
+ * with mpool_s_lock to serialize the spzone updates, because they include
+ * an append of an MDC0 record on top of updating mc_spzone.
+ * all other fields: protected by pds_pdvlock (as is pds_pdv[x].state)
+ * pds_sbmdc0: Used to store in RAM the MDC0 metadata. Loaded when mpool
+ * activated, no lock needed at that time (single) threaded.
+ * Then changed during MDC0 compaction. At that time it is protected by
+ * MDC0 compact lock.
+ *
+ * NOTE:
+ * pds_pdvcnt only ever increases so that pds_pdv[x], x < pdvcnt, can be
+ * accessed without locking, other than as required by the struct
+ * mpool_dev_info.
+ * mc_spzone is written and read only by mpool functions that are serialized
+ * via mpool_s_lock.
+ */
+struct mpool_descriptor {
+ struct rw_semaphore pds_pdvlock;
+
+ ____cacheline_aligned
+ struct mpool_dev_info pds_pdv[MPOOL_DRIVES_MAX];
+
+ ____cacheline_aligned
+ struct mutex pds_oml_lock;
+ struct rb_root pds_oml_root;
+
+ /* Read-mostly fields... */
+ ____cacheline_aligned
+ u16 pds_pdvcnt;
+ struct mpdesc_mdparm pds_mdparm;
+ struct workqueue_struct *pds_workq;
+ struct workqueue_struct *pds_erase_wq;
+ struct workqueue_struct *pds_precompact_wq;
+
+ struct media_class pds_mc[MP_MED_NUMBER];
+ struct mpcore_params pds_params;
+ struct omf_sb_descriptor pds_sbmdc0;
+ struct pre_compact_ctrl pds_pco;
+ struct smap_usage_work pds_smap_usage_work;
+
+ /* Rarey used fields... */
+ struct mpool_config pds_cfg;
+ struct rb_node pds_node;
+ struct mpool_uuid pds_poolid;
+ char pds_name[MPOOL_NAMESZ_MAX];
+
+ /* pds_mda is enormous (91K) */
+ struct pmd_mda_info pds_mda;
+};
+
+/**
+ * mpool_desc_unavail_add() - Add unavailable drive to mpool descriptor.
+ * @mp:
+ * @omf_devparm:
+ *
+ * Add unavailable drive to mpool descriptor; caller must guarantee that
+ * devparm.devid is not already there.
+ * As part of adding the drive to the mpool descriptor, the drive is added
+ * in its media class.
+ *
+ * Return: 0 if successful, -errno (-EINVAL or -ENOMEM) otherwise
+ */
+int mpool_desc_unavail_add(struct mpool_descriptor *mp, struct omf_devparm_descriptor *devparm);
+
+/**
+ * mpool_desc_pdmc_add() - Add a device in its media class.
+ * @mp:
+ * @pdh:
+ * @omf_devparm:
+ * @check_only: if true, the call doesn't change any state, it only check
+ * if the PD could be added in a media class.
+ *
+ * If the media class doesn't exist yet, it is created here.
+ *
+ * This function has two inputs related to the PD it is acting on:
+ * "phd"
+ * and "omf_devparm"
+ *
+ * If omf_devparm is NULL, it means that the media class in which the PD must
+ * be placed is derived from mp->pds_pdv[pdh].pdi_parm.dpr_prop
+ * In that case the PD properties (.dpr_prop) must be updated and
+ * correct when entering this function.
+ * devparm is NULL when the device is available, that means the discovery
+ * was able to update .dpr_prop.
+ *
+ * If omf_devparm is not NULL, it means that the media class in which the PD
+ * must be placed is derived from omf_devparm.
+ * This is used when unavailable PDs are placed in their media class. In this
+ * situation (because the PD is unavailable) the discovery couldn't discover
+ * the PD properties and mp->pds_pdv[pdh].pdi_parm.dpr_prop has not been
+ * updated because of that.
+ * So we can't use .dpr_prop to place the PD in its class, instead we use what
+ * is coming from the persitent metadata (PD state record in MDC0). Aka
+ * omf_devparm.
+ * mp->pds_pdv[pdh].pdi_parm.dpr_prop will be update if/when the PD is available
+ * again.
+ *
+ * Restrictions in placing PDs in media classes
+ * --------------------------------------------
+ * This function enforces these restrictions.
+ * These restrictions are:
+ * a) in a mpool, for a given mclassp (enum mp_media_classp), there is
+ * at maximum one media class.
+ * b) All drives of a media class must checksummed or none, no mix allowed.
+ * c) The STAGING and CAPACITY classes must be both checksummed or both not
+ * checksummed.
+ *
+ * Locking:
+ * -------
+ * Should be called with mp.pds_pdvlock held in write.
+ * Except if mpool is single threaded (during activate for example).
+ */
+int
+mpool_desc_pdmc_add(
+ struct mpool_descriptor *mp,
+ u16 pdh,
+ struct omf_devparm_descriptor *omf_devparm,
+ bool check_only);
+
+int uuid_to_mpdesc_insert(struct rb_root *root, struct mpool_descriptor *data);
+
+int
+mpool_dev_sbwrite(
+ struct mpool_descriptor *mp,
+ struct mpool_dev_info *pd,
+ struct omf_sb_descriptor *sbmdc0);
+
+int
+mpool_mdc0_sb2obj(
+ struct mpool_descriptor *mp,
+ struct omf_sb_descriptor *sb,
+ struct pmd_layout **l1,
+ struct pmd_layout **l2);
+
+int mpool_desc_init_newpool(struct mpool_descriptor *mp, u32 flags);
+
+int
+mpool_dev_init_all(
+ struct mpool_dev_info *pdv,
+ u64 dcnt,
+ char **dpaths,
+ struct pd_prop *pd_prop);
+
+void mpool_mdc_cap_init(struct mpool_descriptor *mp, struct mpool_dev_info *pd);
+
+int
+mpool_desc_init_sb(
+ struct mpool_descriptor *mp,
+ struct omf_sb_descriptor *sbmdc0,
+ u32 flags,
+ bool *mc_resize);
+
+int mpool_dev_sbwrite_newpool(struct mpool_descriptor *mp, struct omf_sb_descriptor *sbmdc0);
+
+int check_for_dups(char **listv, int cnt, int *dup, int *offset);
+
+void fill_in_devprops(struct mpool_descriptor *mp, u64 pdh, struct mpool_devprops *dprop);
+
+int mpool_create_rmlogs(struct mpool_descriptor *mp, u64 mlog_cap);
+
+struct mpool_descriptor *mpool_desc_alloc(void);
+
+void mpool_desc_free(struct mpool_descriptor *mp);
+
+int mpool_dev_check_new(struct mpool_descriptor *mp, struct mpool_dev_info *pd);
+
+static inline enum pd_status mpool_pd_status_get(struct mpool_dev_info *pd)
+{
+ enum pd_status val;
+
+ /* Acquire semantics used so that no reads will be re-ordered from
+ * before to after this read.
+ */
+ val = atomic_read_acquire(&pd->pdi_status);
+
+ return val;
+}
+
+static inline void mpool_pd_status_set(struct mpool_dev_info *pd, enum pd_status status)
+{
+ /* All prior writes must be visible prior to the status change */
+ smp_wmb();
+ atomic_set(&pd->pdi_status, status);
+}
+
+/**
+ * mpool_get_mpname() - Get the mpool name
+ * @mp: mpool descriptor of the mpool
+ * @mpname: buffer to copy the mpool name into
+ * @mplen: buffer length
+ *
+ * Return:
+ * %0 if successful, -EINVAL otherwise
+ */
+static inline int mpool_get_mpname(struct mpool_descriptor *mp, char *mpname, size_t mplen)
+{
+ if (!mp || !mpname)
+ return -EINVAL;
+
+ strlcpy(mpname, mp->pds_name, mplen);
+
+ return 0;
+}
+
+
+#endif /* MPOOL_MPCORE_H */
diff --git a/drivers/mpool/params.h b/drivers/mpool/params.h
new file mode 100644
index 000000000000..5d1f40857a2a
--- /dev/null
+++ b/drivers/mpool/params.h
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2015-2020 Micron Technology, Inc. All rights reserved.
+ */
+
+#ifndef MPOOL_PARAMS_H
+#define MPOOL_PARAMS_H
+
+#define MPOOL_MDC_SET_SZ 16
+
+/* Mpool metadata container compaction retries; keep relatively small */
+#define MPOOL_MDC_COMPACT_RETRY_DEFAULT 5
+
+/*
+ * Space map allocation zones per drive; bounds number of concurrent obj
+ * allocs
+ */
+#define MPOOL_SMAP_RGNCNT_DEFAULT 4
+
+/*
+ * Space map alignment in number of zones.
+ */
+#define MPOOL_SMAP_ZONEALIGN_DEFAULT 1
+
+/*
+ * Number of concurent jobs for loading user MDC 1~N
+ */
+#define MPOOL_OBJ_LOAD_JOBS_DEFAULT 8
+
+/*
+ * Defaults for MDC1/255 pre-compaction.
+ */
+#define MPOOL_PCO_PCTFULL 70
+#define MPOOL_PCO_PCTGARBAGE 20
+#define MPOOL_PCO_NBNOALLOC 2
+#define MPOOL_PCO_PERIOD 5
+#define MPOOL_PCO_FILLBIAS 1000
+#define MPOOL_PD_USAGE_PERIOD 60000
+#define MPOOL_CREATE_MDC_PCTFULL (MPOOL_PCO_PCTFULL - MPOOL_PCO_PCTGARBAGE)
+#define MPOOL_CREATE_MDC_PCTGRBG MPOOL_PCO_PCTGARBAGE
+
+
+/**
+ * struct mpcore_params - mpool core parameters. Not exported to public API.
+ * @mp_mdc0cap: MDC0 capacity, *ONLY* for testing purpose
+ * @mp_mdcncap: MDCN capacity, *ONLY* for testing purpose
+ * @mp_mdcnnum: Number of MDCs, *ONLY* for testing purpose
+ * @mp_smaprgnc:
+ * @mp_smapalign:
+ * @mp_spare:
+ * @mp_objloadjobs: number of concurrent MDC loading jobs
+ *
+ * The below parameters starting with "pco" are used for the pre-compaction
+ * of MDC1/255
+ * @mp_pcopctfull: % (0-100) of fill of MDCi active mlog that must be reached
+ * before a pre-compaction is attempted.
+ * @mp_pcopctgarbage: % (0-100) of garbage in MDCi active mlog that must be
+ * reached before a pre-compaction is attempted.
+ * @mp_pconbnoalloc: Number of MDCs from which no object is allocated from.
+ * If 0, that disable the background pre compaction.
+ * @mp_pcoperiod: In seconds. Period at which a background thread check if
+ * a MDC needs compaction.
+ * @mp_pcofillbias: If the next mpool MDC has less objects than
+ * (current MDC objects - pcofillbias), then allocate an object
+ * from the next MDC instead of from the current one.
+ * This bias favors object allocation from less filled MDCs (in term
+ * of number of committed objects).
+ * The bigger the number, the less bias.
+ * @mp_crtmdcpctfull: percent full threshold across all MDCs in combination
+ * with crtmdcpctgrbg percent is used as a trigger to create new MDCs
+ * @mp_crtmdcpctgrbg: percent garbage threshold in combination with
+ * @crtmdcpctfull percent is used as a trigger to create new MDCs
+ * @mp_mpusageperiod: period at which a background thread check mpool space
+ * usage, in milliseconds
+ */
+struct mpcore_params {
+ u64 mp_mdcnum;
+ u64 mp_mdc0cap;
+ u64 mp_mdcncap;
+ u64 mp_smaprgnc;
+ u64 mp_smapalign;
+ u64 mp_spare;
+ u64 mp_objloadjobs;
+ u64 mp_pcopctfull;
+ u64 mp_pcopctgarbage;
+ u64 mp_pconbnoalloc;
+ u64 mp_pcoperiod;
+ u64 mp_pcofillbias;
+ u64 mp_crtmdcpctfull;
+ u64 mp_crtmdcpctgrbg;
+ u64 mp_mpusageperiod;
+};
+
+/**
+ * mpcore_params_defaults() -
+ */
+static inline void mpcore_params_defaults(struct mpcore_params *params)
+{
+ params->mp_mdcnum = MPOOL_MDCNUM_DEFAULT;
+ params->mp_mdc0cap = 0;
+ params->mp_mdcncap = 0;
+ params->mp_smaprgnc = MPOOL_SMAP_RGNCNT_DEFAULT;
+ params->mp_smapalign = MPOOL_SMAP_ZONEALIGN_DEFAULT;
+ params->mp_spare = MPOOL_SPARES_DEFAULT;
+ params->mp_pcopctfull = MPOOL_PCO_PCTFULL;
+ params->mp_pcopctgarbage = MPOOL_PCO_PCTGARBAGE;
+ params->mp_pconbnoalloc = MPOOL_PCO_NBNOALLOC;
+ params->mp_pcoperiod = MPOOL_PCO_PERIOD;
+ params->mp_pcofillbias = MPOOL_PCO_FILLBIAS;
+ params->mp_crtmdcpctfull = MPOOL_CREATE_MDC_PCTFULL;
+ params->mp_crtmdcpctgrbg = MPOOL_CREATE_MDC_PCTGRBG;
+ params->mp_mpusageperiod = MPOOL_PD_USAGE_PERIOD;
+ params->mp_objloadjobs = MPOOL_OBJ_LOAD_JOBS_DEFAULT;
+}
+
+#endif /* MPOOL_PARAMS_H */
diff --git a/drivers/mpool/pd.h b/drivers/mpool/pd.h
new file mode 100644
index 000000000000..c8faefc7cf11
--- /dev/null
+++ b/drivers/mpool/pd.h
@@ -0,0 +1,202 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2015-2020 Micron Technology, Inc. All rights reserved.
+ */
+
+#ifndef MPOOL_PD_H
+#define MPOOL_PD_H
+
+#include <linux/uio.h>
+
+#include "uuid.h"
+#include "mpool_ioctl.h"
+
+/* Returns PD length in bytes. */
+#define PD_LEN(_pd_prop) ((_pd_prop)->pdp_devsz)
+
+/* Returns PD sector size (exponent, power of 2) */
+#define PD_SECTORSZ(_pd_prop) ((_pd_prop)->pdp_sectorsz)
+
+/* Return PD sector size mask */
+#define PD_SECTORMASK(_pd_prop) ((uint64_t)(1 << PD_SECTORSZ(_pd_prop)) - 1)
+
+struct omf_devparm_descriptor;
+
+/**
+ * struct pd_dev_parm -
+ * @dpr_prop: drive properties including zone parameters
+ * @dpr_dev_private: private info for implementation
+ * @dpr_name: device name
+ */
+struct pd_dev_parm {
+ struct pd_prop dpr_prop;
+ void *dpr_dev_private;
+ char dpr_name[PD_NAMESZ_MAX];
+};
+
+/* Shortcuts */
+#define dpr_zonepg dpr_prop.pdp_zparam.dvb_zonepg
+#define dpr_zonetot dpr_prop.pdp_zparam.dvb_zonetot
+#define dpr_devsz dpr_prop.pdp_devsz
+#define dpr_didstr dpr_prop.pdp_didstr
+#define dpr_mediachar dpr_prop.pdp_mediachar
+#define dpr_cmdopt dpr_prop.pdp_cmdopt
+#define dpr_optiosz dpr_prop.pdp_optiosz
+
+/**
+ * enum pd_status - Transient drive status.
+ * @PD_STAT_UNDEF: undefined; should never occur
+ * @PD_STAT_ONLINE: drive is responding to I/O requests
+ * @PD_STAT_SUSPECT: drive is failing some I/O requests
+ * @PD_STAT_OFFLINE: drive declared non-responsive to I/O requests
+ * @PD_STAT_UNAVAIL: drive path not provided or open failed when mpool was opened
+ *
+ * Transient drive status, these are stored as atomic_t variable
+ * values
+ */
+enum pd_status {
+ PD_STAT_UNDEF = 0,
+ PD_STAT_ONLINE = 1,
+ PD_STAT_SUSPECT = 2,
+ PD_STAT_OFFLINE = 3,
+ PD_STAT_UNAVAIL = 4
+};
+
+_Static_assert((PD_STAT_UNAVAIL < 256), "enum pd_status must fit in uint8_t");
+
+/**
+ * enum pd_cmd_opt - drive command options
+ * @PD_CMD_DISCARD: the device has TRIM/UNMAP command.
+ * @PD_CMD_SECTOR_UPDATABLE: the device can be read/written with sector granularity.
+ * @PD_CMD_DIF_ENABLED: T10 DIF is used on this device.
+ * @PD_CMD_SED_ENABLED: Self encrypting enabled
+ * @PD_CMD_DISCARD_ZERO: the device supports discard_zero
+ * @PD_CMD_RDONLY: activate mpool with PDs in RDONLY mode,
+ * write/discard commands are No-OPs.
+ * Defined as a bit vector so can combine.
+ * Fields holding such a vector should uint64_t.
+ *
+ * TODO: we need to find a way to detect if SED is enabled on a device
+ */
+enum pd_cmd_opt {
+ PD_CMD_NONE = 0,
+ PD_CMD_DISCARD = 0x1,
+ PD_CMD_SECTOR_UPDATABLE = 0x2,
+ PD_CMD_DIF_ENABLED = 0x4,
+ PD_CMD_SED_ENABLED = 0x8,
+ PD_CMD_DISCARD_ZERO = 0x10,
+ PD_CMD_RDONLY = 0x20,
+};
+
+/**
+ * enum pd_devtype - Device types
+ * @PD_DEV_TYPE_BLOCK_STREAM: Block device implementing streams.
+ * @PD_DEV_TYPE_BLOCK_STD: Standard (non-streams) device (SSD, HDD).
+ * @PD_DEV_TYPE_FILE: File in user space for UT.
+ * @PD_DEV_TYPE_MEM: Memory semantic device, e.g. NVDIMM direct access (raw or dax mode)
+ * @PD_DEV_TYPE_ZONE: zone-like device, e.g., open channel SSD and SMR HDD (using ZBC/ZAC)
+ * @PD_DEV_TYPE_BLOCK_NVDIMM: Standard (non-streams) NVDIMM in sector mode.
+ */
+enum pd_devtype {
+ PD_DEV_TYPE_BLOCK_STREAM = 1,
+ PD_DEV_TYPE_BLOCK_STD,
+ PD_DEV_TYPE_FILE,
+ PD_DEV_TYPE_MEM,
+ PD_DEV_TYPE_ZONE,
+ PD_DEV_TYPE_BLOCK_NVDIMM,
+ PD_DEV_TYPE_LAST = PD_DEV_TYPE_BLOCK_NVDIMM,
+};
+
+_Static_assert((PD_DEV_TYPE_LAST < 256), "enum pd_devtype must fit in uint8_t");
+
+/**
+ * enum pd_state - Device states
+ * @PD_DEV_STATE_AVAIL: Device is available
+ * @PD_DEV_STATE_UNAVAIL: Device is unavailable
+ */
+enum pd_state {
+ PD_DEV_STATE_UNDEFINED = 0,
+ PD_DEV_STATE_AVAIL = 1,
+ PD_DEV_STATE_UNAVAIL = 2,
+ PD_DEV_STATE_LAST = PD_DEV_STATE_UNAVAIL,
+};
+
+_Static_assert((PD_DEV_STATE_LAST < 256), "enum pd_state must fit in uint8_t");
+
+/*
+ * pd API functions -- device-type independent dparm ops
+ */
+
+/*
+ * Error codes: All pd functions can return one or more of:
+ *
+ * -EINVAL invalid fn args
+ * -EBADSLT attempt to read or write a bad zone on a zone device
+ * -EIO all other errors
+ */
+
+int pd_dev_open(const char *path, struct pd_dev_parm *dparm, struct pd_prop *pd_prop);
+int pd_dev_close(struct pd_dev_parm *dparm);
+int pd_dev_flush(struct pd_dev_parm *dparm);
+
+/**
+ * pd_bio_erase() -
+ * @pd:
+ * @zaddr:
+ * @zonecnt:
+ * @reads_erased: whether the data can be read post DISCARD
+ *
+ * Return:
+ */
+int pd_zone_erase(struct pd_dev_parm *dparm, u64 zaddr, u32 zonecnt, bool reads_erased);
+
+/*
+ * pd API functions - device dependent operations
+ */
+
+/**
+ * pd_zone_pwritev() -
+ * @pd:
+ * @iov:
+ * @iovcnt:
+ * @zaddr:
+ * @boff: offset in bytes from the start of "zaddr".
+ * @opflags:
+ *
+ * Return:
+ */
+int pd_zone_pwritev(struct pd_dev_parm *dparm, const struct kvec *iov,
+ int iovcnt, u64 zaddr, loff_t boff, int opflags);
+
+/**
+ * pd_zone_pwritev_sync() -
+ * @pd:
+ * @iov:
+ * @iovcnt:
+ * @zaddr:
+ * @boff: Offset in bytes from the start of zaddr.
+ *
+ * Return:
+ */
+int pd_zone_pwritev_sync(struct pd_dev_parm *dparm, const struct kvec *iov,
+ int iovcnt, u64 zaddr, loff_t boff);
+
+/**
+ * pd_zone_preadv() -
+ * @pd:
+ * @iov:
+ * @iovcnt:
+ * @zaddr: target zone for this I/O
+ * @boff: byte offset into the target zone
+ *
+ * Return:
+ */
+int pd_zone_preadv(struct pd_dev_parm *dparm, const struct kvec *iov,
+ int iovcnt, u64 zaddr, loff_t boff);
+
+void pd_dev_set_unavail(struct pd_dev_parm *dparm, struct omf_devparm_descriptor *omf_devparm);
+
+int pd_init(void) __cold;
+void pd_exit(void) __cold;
+
+#endif /* MPOOL_PD_H */
diff --git a/drivers/mpool/pmd.h b/drivers/mpool/pmd.h
new file mode 100644
index 000000000000..5fd6ca020fd1
--- /dev/null
+++ b/drivers/mpool/pmd.h
@@ -0,0 +1,379 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2015-2020 Micron Technology, Inc. All rights reserved.
+ */
+
+#ifndef MPOOL_PMD_H
+#define MPOOL_PMD_H
+
+#include <linux/atomic.h>
+#include <linux/rbtree.h>
+#include <linux/mutex.h>
+#include <linux/workqueue.h>
+#include <linux/spinlock.h>
+
+#include "mpool_ioctl.h"
+#include "omf_if.h"
+#include "pmd_obj.h"
+
+/**
+ * DOC: Module info.
+ *
+ * Pool metadata (pmd) module.
+ *
+ * Implements functions for mpool metadata management.
+ *
+ */
+
+struct mpool_descriptor;
+struct mpool_dev_info;
+struct mp_mdc;
+struct pmd_layout;
+struct mpool_config;
+
+/**
+ * DOC: Object lifecycle
+ *
+ * +) all mblock/mlog objects are owned by mpool layer users, excepting
+ * mdc mlogs
+ * +) users are responsible for object lifecycle mgmt and must not violate it;
+ * e.g. by using an object handle (layout pointer) after deleting that
+ * object
+ * +) the mpool layer never independently aborts or deletes user objects
+ */
+
+/**
+ * DOC: Object ids
+ * Object ids for mblocks and mlogs are a unit64 of the form:
+ * <uniquifier (52-bits), type (4-bits), slot # (8 bits)>
+ *
+ */
+
+/**
+ * DOC: NOTES
+ * + metadata for a given object is stored in the mdc specified by slot #
+ * + uniquifiers are only guaranteed unique for a given slot #
+ * + metadata for all mdc (except mdc 0) are stored in mdc 0
+ * + mdc 0 is a distinguished container whose metadata is stored in superblocks
+ * + mdc 0 only stores object metadata for mdc 1-255
+ * + mdc N is implemented via mlogs with objids (2N, MLOG, 0) & (2N+1, MLOG, 0)
+ * + mdc 0 mlog objids are (0, MLOG, 0) and (1, MLOG, 0) where a slot # of 0
+ * indicates the mlog metadata is stored in mdc 0 whereas it is actually in
+ * superblocks; see comments in pmd_mdc0_init() for how we exploit this.
+ */
+
+/**
+ * struct pre_compact_ctrs - objects records counters, used for pre compaction of MDC1/255.
+ * @pcc_cr: count of object create records
+ * @pcc_up: count of object update records
+ * @pcc_del: count of object delete records. If the object is shceduled for
+ * deletion in the background, the counter is incremented (while the
+ * delete record has not been written yet).
+ * @pcc_er: count of object erase records
+ * @pcc_cobj: count of committed objects (and not deleted).
+ * @pcc_cap: In bytes, size of each mlog of the MDC
+ * @pcc_len: In bytes, how much is filled the active mlog.
+ *
+ * One such structure per mpool MDC.
+ *
+ * Locking:
+ * Updates are serialized by the MDC compact lock.
+ * The reads by the pre-compaction thread are done without holding any
+ * lock. This is why atomic variables are used.
+ * However because the variables are integers, the atomic read translates
+ * into a simple load and the set translate in a simple store.
+ *
+ * The counters pcc_up, pcc_del, pcc_er are cleared at each compaction.
+ *
+ * Relaxed access is appropriate for all of these atomics
+ */
+struct pre_compact_ctrs {
+ atomic_t pcc_cr;
+ atomic_t pcc_up;
+ atomic_t pcc_del;
+ atomic_t pcc_er;
+ atomic_t pcc_cobj;
+ atomic64_t pcc_cap;
+ atomic64_t pcc_len;
+};
+
+/**
+ * struct credit_info - mdc selector info
+ * @ci_credit: available credit
+ * @ci_free: available free space
+ * @ci_slot: MDC slot number
+ *
+ * Contains information about available credit and a balance. Available
+ * credit is based on an rate at which records can be written to
+ * mdc such that all MDC will fill at the same time.
+ */
+struct credit_info {
+ u64 ci_credit;
+ u64 ci_free;
+ u8 ci_slot;
+};
+
+/**
+ * struct pmd_mdc_stats - per MDC space usage stats
+ * @pms_mblock_alen: mblock alloc len
+ * @pms_mblock_wlen: mblock write len
+ * @pms_mlog_alen: mlog alloc len
+ * @pms_mblock_cnt: mblock count
+ * @pms_mlog_cnt: mlog count
+ */
+struct pmd_mdc_stats {
+ u64 pms_mblock_alen;
+ u64 pms_mblock_wlen;
+ u64 pms_mlog_alen;
+ u32 pms_mblock_cnt;
+ u32 pms_mlog_cnt;
+};
+
+/**
+ * struct pmd_mdc_info - Metadata container (mdc) info.
+ * @mmi_compactlock: compaction lock
+ * @mmi_uc_lock: uncommitted objects tree lock
+ * @mmi_uc_root: uncommitted objects tree root
+ * @mmi_co_lock: committed objects tree lock
+ * @mmi_co_root: committed objects tree root
+ * @mmi_uqlock: uniquifier lock
+ * @mmi_luniq: uniquifier of last object assigned to container
+ * @mmi_mdc: MDC implementing container
+ * @mmi_recbuf: buffer for (un)packing log records
+ * @mmi_lckpt: last objid checkpointed
+ * @mmi_stats: per-MDC usage stats
+ * @mmi_stats_lock: lock for protecting mmi_stats
+ * @mmi_pco_cnt: counters used by the pre compaction of MDC1/255.
+ * @mmi_mdcver: version of the mdc content on media when the mpool was
+ * activated. That may not be the current version on media
+ * if a MDC metadata conversion took place during activate.
+ * @mmi_credit MDC credit info
+ *
+ * LOCKING:
+ * + mmi_luniq: protected by uqlock
+ * + mmi_mdc, recbuf, lckpt: protected by compactlock
+ * + mmi_co_root: protected by co_lock
+ * + mmi_uc_root: protected by uc_lock
+ * + mmi_stats: protected by mmi_stats_lock
+ * + mmi_pco_counters: updates serialized by mmi_compactlock
+ *
+ * NOTE:
+ * + for mdc0 mmi_luniq is the slot # of the last mdc created
+ * + logging to a mdc cannot execute concurrent with compacting
+ * that mdc;
+ * mmi_compactlock is used to enforce this
+ * + compacting a mdc requires freezing both the list of committed
+ * objects in that mdc and the metadata for those objects;
+ * compactlock facilitates this in a way that avoids locking each
+ * object during compaction; as a result object metadata updates
+ * are serialized, but even without mdc compaction this would be
+ * the case because all such metadata updates must be logged to
+ * the object's mdc and mdc logging is inherently serial
+ * + see struct pmd_layout comments for specifics on how
+ * compactlock is used to freeze metadata for committed objects
+ */
+struct pmd_mdc_info {
+ struct mutex mmi_compactlock;
+ char *mmi_recbuf;
+ u64 mmi_lckpt;
+ struct mp_mdc *mmi_mdc;
+
+ ____cacheline_aligned
+ struct mutex mmi_uc_lock;
+ struct rb_root mmi_uc_root;
+
+ ____cacheline_aligned
+ struct rw_semaphore mmi_co_lock;
+ struct rb_root mmi_co_root;
+
+ ____cacheline_aligned
+ struct mutex mmi_uqlock;
+ u64 mmi_luniq;
+
+ ____cacheline_aligned
+ struct credit_info mmi_credit;
+ struct omf_mdcver mmi_mdcver;
+
+ ____cacheline_aligned
+ struct mutex mmi_stats_lock;
+ struct pmd_mdc_stats mmi_stats;
+
+ struct pre_compact_ctrs mmi_pco_cnt;
+};
+
+/**
+ * struct pmd_mdc_selector - Object containing MDC slots for allocation
+ * @mds_tbl_idx: idx of the MDC slot selector in the mds_tbl
+ * @mds_tbl: slot table used for MDC selection
+ * @mds_mdc: scratch pad for sorting mdc by free size
+ *
+ * LOCKING:
+ * + mdi_slotvlock lock will be taken to protect this object.
+ *
+ */
+struct pmd_mdc_selector {
+ atomic_t mds_tbl_idx;
+ u8 mds_tbl[MDC_TBL_SZ];
+ void *mds_smdc[MDC_SLOTS];
+};
+
+/**
+ * struct pmd_mda_info - Metadata container array (mda).
+ * @mdi_slotvlock: it is assumed that this spinlock is NOT taken from interrupt context
+ * @mdi_slotvcnt: number of active slotv entries
+ * @mdi_slotv: per mdc info
+ * @mdi_sel: MDC allocation selector
+ *
+ * LOCKING:
+ * + mdi_slotvcnt: protected by mdi_slotvlock
+ *
+ * NOTE:
+ * + mdi_slotvcnt only ever increases so mdi_slotv[x], x < mdi_slotvcnt, is
+ * always active
+ * + all mdi_slotv[] entries are initialized whether or not active so they
+ * can all be accessed w/o locking except as required by pmd_mdc_info struct
+ */
+struct pmd_mda_info {
+ spinlock_t mdi_slotvlock;
+ u16 mdi_slotvcnt;
+
+ struct pmd_mdc_info mdi_slotv[MDC_SLOTS];
+ struct pmd_mdc_selector mdi_sel;
+};
+
+/**
+ * struct pmd_obj_load_work - work struct for loading MDC 1~N
+ * @olw_work: work struct
+ * @olw_mp:
+ * @olw_progress: Progress index. It is an (atomic_t *) so that multiple
+ * pmd_obj_load_work structs can point to a single atomic_t
+ * for grabbing the next MDC number to be processed.
+ * @olw_err:
+ */
+struct pmd_obj_load_work {
+ struct work_struct olw_work;
+ struct mpool_descriptor *olw_mp;
+ atomic_t *olw_progress; /* relaxed is correct */
+ atomic_t *olw_err;
+};
+
+/**
+ * pmd_mpool_activate() - Load all metadata for mpool mp.
+ * @mp:
+ * @mdc01:
+ * @mdc02:
+ * @create:
+ *
+ * Load all metadata for mpool mp; create flag indicates if is a new pool;
+ * caller must ensure no other thread accesses mp until activation is complete.
+ * note: pmd module owns mdc01/2 memory mgmt whether succeeds or fails
+ *
+ * Return: %0 if successful, -errno otherwise
+ */
+int pmd_mpool_activate(struct mpool_descriptor *mp, struct pmd_layout *mdc01,
+ struct pmd_layout *mdc02, int create);
+
+/**
+ * pmd_mpool_deactivate() - Deactivate mpool mp.
+ * @mp:
+ *
+ * Free all metadata for mpool mp excepting mp itself; caller must ensure
+ * no other thread can access mp during deactivation.
+ */
+void pmd_mpool_deactivate(struct mpool_descriptor *mp);
+
+/**
+ * pmd_mdc_alloc() - Add a metadata container to mpool.
+ * @mp:
+ * @mincap:
+ * @iter: the role of this parameter is to get the active mlogs of the mpool
+ * MDCs uniformely spread on the mpool devices.
+ * When pmd_mdc_alloc() is called in a loop to allocate several mpool MDCs,
+ * iter should be incremented at each subsequent call.
+ *
+ * Add a metadata container (mdc) to mpool with a minimum capacity of mincap
+ * bytes. Once added an mdc can never be deleted.
+ *
+ * Return: %0 if successful, -errno otherwise
+ */
+int pmd_mdc_alloc(struct mpool_descriptor *mp, u64 mincap, u32 iter);
+
+/**
+ * pmd_mdc_cap() - Get metadata container (mdc) capacity stats.
+ * @mp:
+ * @mdcmax:
+ * @mdccap:
+ * @mdc0cap:
+ *
+ * Get metadata container (mdc) stats: count, aggregate capacity ex-mdc0 and
+ * mdc0 cap
+ */
+void pmd_mdc_cap(struct mpool_descriptor *mp, u64 *mdcmax, u64 *mdccap, u64 *mdc0cap);
+
+/**
+ * pmd_prop_mcconfig() -
+ * @mp:
+ * @pd:
+ * @compacting: if true, called by a compaction.
+ *
+ * Persist state (new or update) for drive pd; caller must hold mp.pdvlock
+ * if pd is an in-use member of mp.pdv.
+ *
+ * Locking: caller must hold MDC0 compact lock.
+ *
+ * Return: %0 if successful, -errno otherwise
+ */
+int pmd_prop_mcconfig(struct mpool_descriptor *mp, struct mpool_dev_info *pd, bool compacting);
+
+/**
+ * pmd_prop_mcspare() -
+ * @mp:
+ * @mclassp:
+ * @spzone:
+ * @compacting: if true, called by a compaction.
+ *
+ * Persist spare zone info for drives in media class (new or update).
+ *
+ * Locking: caller must hold MDC0 compact lock.
+ *
+ * Return: %0 if successful, -errno otherwise
+ */
+int pmd_prop_mcspare(struct mpool_descriptor *mp, enum mp_media_classp mclassp,
+ u8 spzone, bool compacting);
+
+int pmd_prop_mpconfig(struct mpool_descriptor *mp, const struct mpool_config *cfg, bool compacting);
+
+/**
+ * pmd_precompact_start() - start MDC1/255 precompaction
+ * @mp:
+ */
+void pmd_precompact_start(struct mpool_descriptor *mp);
+
+/**
+ * pmd_precompact_stop() - stop MDC1/255 precompaction
+ * @mp:
+ */
+void pmd_precompact_stop(struct mpool_descriptor *mp);
+
+/**
+ * pmd_mdc_addrec_version() -add a version record in a mpool MDC.
+ * @mp:
+ * @cslot:
+ */
+int pmd_mdc_addrec_version(struct mpool_descriptor *mp, u8 cslot);
+
+int pmd_log_delete(struct mpool_descriptor *mp, u64 objid);
+
+int pmd_log_create(struct mpool_descriptor *mp, struct pmd_layout *layout);
+
+int pmd_log_erase(struct mpool_descriptor *mp, u64 objid, u64 gen);
+
+int pmd_log_idckpt(struct mpool_descriptor *mp, u64 objid);
+
+#define PMD_MDC0_COMPACTLOCK(_mp) \
+ pmd_mdc_lock(&((_mp)->pds_mda.mdi_slotv[0].mmi_compactlock), 0)
+
+#define PMD_MDC0_COMPACTUNLOCK(_mp) \
+ pmd_mdc_unlock(&((_mp)->pds_mda.mdi_slotv[0].mmi_compactlock))
+
+#endif /* MPOOL_PMD_H */
diff --git a/drivers/mpool/pmd_obj.h b/drivers/mpool/pmd_obj.h
new file mode 100644
index 000000000000..7cf5dea80f9d
--- /dev/null
+++ b/drivers/mpool/pmd_obj.h
@@ -0,0 +1,499 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2015-2020 Micron Technology, Inc. All rights reserved.
+ */
+
+#ifndef MPOOL_PMD_OBJ_H
+#define MPOOL_PMD_OBJ_H
+
+#include <linux/sort.h>
+#include <linux/rbtree.h>
+#include <linux/kref.h>
+#include <linux/rwsem.h>
+#include <linux/workqueue.h>
+
+#include "uuid.h"
+#include "mpool_ioctl.h"
+#include "omf_if.h"
+#include "mlog.h"
+
+struct mpool_descriptor;
+struct pmd_mdc_info;
+
+/*
+ * objid uniquifier checkpoint interval; used to avoid reissuing an outstanding
+ * objid after a crash; supports pmd_{mblock|mlog}_realloc()
+ */
+#define OBJID_UNIQ_POW2 8
+#define OBJID_UNIQ_DELTA (1 << OBJID_UNIQ_POW2)
+
+/* MDC_SLOTS is 256 [0,255] to fit in 8-bit slot field in objid.
+ */
+#define MDC_SLOTS 256
+#define MDC_TBL_SZ (MDC_SLOTS * 4)
+
+#define UROOT_OBJID_LOG1 logid_make(0, 1)
+#define UROOT_OBJID_LOG2 logid_make(1, 1)
+#define UROOT_OBJID_MAX 1
+
+#define MDC0_OBJID_LOG1 logid_make(0, 0)
+#define MDC0_OBJID_LOG2 logid_make(1, 0)
+
+/**
+ * enum pmd_lock_class -
+ * @PMD_NONE:
+ * @PMD_OBJ_CLIENT:
+ * For layout rwlock,
+ * - Object id contains a non-zero slot number
+ * @PMD_MDC_NORMAL:
+ * For layout rwlock,
+ * - Object id contains a zero slot number AND
+ * - Object id is neither of the well-known MDC-0 objids
+ * For pmd_mdc_info.* locks,
+ * - Array index of pmd_mda_info.slov[] is > 0.
+ * @PMD_MDC_ZERO:
+ * For layout rwlock,
+ * - Object id contains a zero slot number AND
+ * - Object id is either of the well-known MDC-0 objids
+ * For pmd_mdc_info.* locks,
+ * - Array index of pmd_mda_info.slov[] is == 0.
+ *
+ * NOTE:
+ * - Object layout rw locks must be acquired before any MDC locks.
+ * - MDC-0 locks of a given class are below MDC-1/255 locks of those same
+ * classes.
+ */
+enum pmd_lock_class {
+ PMD_NONE = 0,
+ PMD_OBJ_CLIENT = 1,
+ PMD_MDC_NORMAL = 2,
+ PMD_MDC_ZERO = 3,
+};
+
+/**
+ * enum pmd_obj_op -
+ * @PMD_OBJ_LOAD:
+ * @PMD_OBJ_ALLOC:
+ * @PMD_OBJ_COMMIT:
+ * @PMD_OBJ_ABORT:
+ * @PMD_OBJ_DELETE:
+ */
+enum pmd_obj_op {
+ PMD_OBJ_LOAD = 1,
+ PMD_OBJ_ALLOC = 2,
+ PMD_OBJ_COMMIT = 3,
+ PMD_OBJ_ABORT = 4,
+ PMD_OBJ_DELETE = 5,
+};
+
+/**
+ * enum pmd_layout_state - object state flags
+ * @PMD_LYT_COMMITTED: object is committed to media
+ * @PMD_LYT_REMOVED: object logically removed (aborted or deleted)
+ */
+enum pmd_layout_state {
+ PMD_LYT_COMMITTED = 0x01,
+ PMD_LYT_REMOVED = 0x02,
+};
+
+/**
+ * struct pmd_layout_mlpriv - mlog private data for pmd_layout
+ * @mlp_uuid: unique ID per mlog
+ * @mlp_lstat: mlog status
+ * @mlp_nodeoml: "open mlog" rbtree linkage
+ */
+struct pmd_layout_mlpriv {
+ struct mpool_uuid mlp_uuid;
+ struct rb_node mlp_nodeoml;
+ struct mlog_stat mlp_lstat;
+};
+
+/**
+ * union pmd_layout_priv - pmd_layout object type specific private data
+ * @mlpriv: mlog private data
+ */
+union pmd_layout_priv {
+ struct pmd_layout_mlpriv mlpriv;
+};
+
+/**
+ * struct pmd_layout - object layout (in-memory version)
+ * @eld_nodemdc: rbtree node for uncommitted and committed objects
+ * @eld_objid: object ID associated with layout
+ * @eld_mblen: Amount of data written in the mblock in bytes (0 for mlogs)
+ * @eld_state: enum pmd_layout_state
+ * @eld_flags: enum mlog_open_flags for mlogs
+ * @eld_gen: object generation
+ * @eld_ld:
+ * @eld_ref: user ref count from alloc/get/put
+ * @eld_rwlock: implements pmd_obj_*lock() for this layout
+ * @dle_mlpriv: mlog private data
+ *
+ * LOCKING:
+ * + objid: constant; no locking required
+ * + lstat: lstat and *lstat are protected by pmd_obj_*lock()
+ * + all other fields: see notes
+ *
+ * NOTE:
+ * + committed object fields (other): to update hold pmd_obj_wrlock()
+ * AND
+ * compactlock for object's mdc; to read hold pmd_obj_*lock()
+ * See the comments associated with struct pmd_mdc_info for
+ * further details.
+ *
+ * eld_priv[] contains exactly one element if the object type
+ * is and mlog, otherwise it contains exactly zero element.
+ */
+struct pmd_layout {
+ struct rb_node eld_nodemdc;
+ u64 eld_objid;
+ u32 eld_mblen;
+ u8 eld_state;
+ u8 eld_flags;
+ u64 eld_gen;
+ struct omf_layout_descriptor eld_ld;
+
+ /* The above fields are read-mostly, while the
+ * following two fields mutate frequently.
+ */
+ struct kref eld_ref;
+ struct rw_semaphore eld_rwlock;
+
+ union pmd_layout_priv eld_priv[];
+};
+
+/* Shortcuts for mlog private data...
+ */
+#define eld_mlpriv eld_priv->mlpriv
+#define eld_uuid eld_mlpriv.mlp_uuid
+#define eld_lstat eld_mlpriv.mlp_lstat
+#define eld_nodeoml eld_mlpriv.mlp_nodeoml
+
+/**
+ * struct pmd_obj_capacity -
+ * @moc_captgt: capacity target for object in bytes
+ * @moc_spare: true, if alloc obj from spare space
+ */
+struct pmd_obj_capacity {
+ u64 moc_captgt;
+ bool moc_spare;
+};
+
+/**
+ * struct pmd_obj_erase_work - workqueue job struct for object erase and free
+ * @oef_mp: mpool
+ * @oef_layout: object layout
+ * @oef_cache: kmem cache to free work (or NULL)
+ * @oef_wqstruct: workq struct
+ */
+struct pmd_obj_erase_work {
+ struct mpool_descriptor *oef_mp;
+ struct pmd_layout *oef_layout;
+ struct kmem_cache *oef_cache;
+ struct work_struct oef_wqstruct;
+};
+
+/**
+ * struct mdc_csm_info - mdc credit set member info
+ * @m_slot: mdc slot number
+ * @ci_credit: available credit
+ */
+struct mdc_csm_info {
+ u8 m_slot;
+ u16 m_credit;
+};
+
+/**
+ * struct mdc_credit_set - mdc credit set
+ * @cs_idx: index of current credit set member
+ * @cs_num_csm: number of credit set members in this credit set
+ * @cs_csm: array of credit set members
+ */
+struct mdc_credit_set {
+ u8 cs_idx;
+ u8 cs_num_csm;
+ struct mdc_csm_info csm[MPOOL_MDC_SET_SZ];
+};
+
+/**
+ * pmd_obj_alloc() - Allocate an object.
+ * @mp:
+ * @otype:
+ * @ocap:
+ * @mclassp: media class
+ * @layoutp:
+ *
+ * Allocate object of type otype with parameters and capacity as specified
+ * by ocap on drives in media class mclassp providing a minimum capacity of
+ * mincap bytes; if successful returns object layout.
+ *
+ * Note:
+ * Object is not persistent until committed; allocation can be aborted.
+ *
+ * Return: %0 if successful, -errno otherwise
+ */
+int pmd_obj_alloc(struct mpool_descriptor *mp, enum obj_type_omf otype,
+ struct pmd_obj_capacity *ocap, enum mp_media_classp mclassp,
+ struct pmd_layout **layoutp);
+
+
+/**
+ * pmd_obj_realloc() - Re-allocate an object.
+ * @mp:
+ * @objid:
+ * @ocap:
+ * @mclassp: media class
+ * @layoutp:
+ *
+ * Allocate object with specified objid to support crash recovery; otherwise
+ * is equivalent to pmd_obj_alloc(); if successful returns object layout.
+ *
+ * Note:
+ * Object is not persistent until committed; allocation can be aborted.
+ *
+ * Return: %0 if successful; -errno otherwise
+ */
+int pmd_obj_realloc(struct mpool_descriptor *mp, u64 objid, struct pmd_obj_capacity *ocap,
+ enum mp_media_classp mclassp, struct pmd_layout **layoutp);
+
+
+/**
+ * pmd_obj_commit() - Commit an object.
+ * @mp:
+ * @layout:
+ *
+ * Make allocated object persistent; if fails object remains uncommitted so
+ * can retry commit or abort; object cannot be committed while in erasing or
+ * aborting state; caller MUST NOT hold pmd_obj_*lock() on layout.
+ *
+ * Return: %0 if successful, -errno otherwise
+ */
+int pmd_obj_commit(struct mpool_descriptor *mp, struct pmd_layout *layout);
+
+/**
+ * pmd_obj_abort() - Discard un-committed object.
+ * @mp:
+ * @layout:
+ *
+ * Discard uncommitted object; caller MUST NOT hold pmd_obj_*lock() on
+ * layout; if successful layout is invalid after call.
+ *
+ * Return: %0 if successful; -errno otherwise
+ */
+int pmd_obj_abort(struct mpool_descriptor *mp, struct pmd_layout *layout);
+
+/**
+ * pmd_obj_delete() - Delete committed object.
+ * @mp:
+ * @layout:
+ *
+ * Delete committed object; caller MUST NOT hold pmd_obj_*lock() on layout;
+ * if successful layout is invalid.
+ *
+ * Return: %0 if successful, -errno otherwise
+ */
+int pmd_obj_delete(struct mpool_descriptor *mp, struct pmd_layout *layout);
+
+/**
+ * pmd_obj_erase() - Log erase for object and set state flag and generation number
+ * @mp:
+ * @layout:
+ * @gen:
+ *
+ * Object must be in committed state; caller MUST hold pmd_obj_wrlock() on layout.
+ *
+ * Return: %0 if successful, -errno otherwise
+ */
+int pmd_obj_erase(struct mpool_descriptor *mp, struct pmd_layout *layout, u64 gen);
+
+/**
+ * pmd_obj_find_get() - Get a reference for a layout for objid.
+ * @mp:
+ * @objid:
+ * @which:
+ *
+ * Get layout for object with specified objid; return NULL either if not found
+ *
+ * Return: pointer to layout if successful, NULL otherwise
+ */
+struct pmd_layout *pmd_obj_find_get(struct mpool_descriptor *mp, u64 objid, int which);
+
+/**
+ * pmd_obj_rdlock() - Read-lock object layout with appropriate nesting level.
+ * @layout:
+ */
+void pmd_obj_rdlock(struct pmd_layout *layout);
+
+/**
+ * pmd_obj_rdunlock() - Release read lock on object layout.
+ * @layout:
+ */
+void pmd_obj_rdunlock(struct pmd_layout *layout);
+
+/**
+ * pmd_obj_wrlock() - Write-lock object layout with appropriate nesting level.
+ * @layout:
+ */
+void pmd_obj_wrlock(struct pmd_layout *layout);
+
+/**
+ * pmd_obj_wrunlock() - Release write lock on object layout.
+ * @layout:
+ */
+void pmd_obj_wrunlock(struct pmd_layout *layout);
+
+/**
+ * pmd_init_credit() - udpates available credit and setup mdc selector table
+ * @mp: mpool object
+ *
+ * Lock: No Lock required
+ *
+ * Used to initialize credit when new MDCs are added and add the mds to
+ * available
+ * credit list.
+ */
+void pmd_update_credit(struct mpool_descriptor *mp);
+
+/**
+ * pmd_mpool_usage() - calculate per-mpool space usage
+ * @mp:
+ * @usage:
+ */
+void pmd_mpool_usage(struct mpool_descriptor *mp, struct mpool_usage *usage);
+
+/**
+ * pmd_precompact_alsz() - Inform MDC1/255 pre-compacting about the active
+ * mlog of an mpool MDCi 0<i<=255.
+ * The size and how much is used are passed in.
+ * "alsz" stands for active mlog size.
+ * @mp:
+ * @objid: objid of the active mlog of the mpool MDCi
+ * @len: In bytes, how much of the active mlog is used.
+ * @cap: In bytes, size of the active mlog.
+ */
+void pmd_precompact_alsz(struct mpool_descriptor *mp, u64 objid, u64 len, u64 cap);
+
+/**
+ * pmd_layout_alloc() - create and initialize an pmd_layout
+ * @objid: mblock/mlog object ID
+ * @gen: generation number
+ * @mblen: mblock written length
+ * @zcnt: number of zones in a strip
+ *
+ * Alloc and init object layout; non-arg fields and all strip descriptor
+ * fields are set to 0/UNDEF/NONE; no auxiliary object info is allocated.
+ *
+ * Return: NULL if allocation fails.
+ */
+struct pmd_layout *pmd_layout_alloc(struct mpool_uuid *uuid, u64 objid,
+ u64 gen, u64 mblen, u32 zcnt);
+
+/**
+ * pmd_layout_release() - free pmd_layout and internal elements
+ * @layout:
+ *
+ * Deallocate all memory associated with object layout.
+ *
+ * Return: void
+ */
+void pmd_layout_release(struct kref *refp);
+
+int pmd_layout_rw(struct mpool_descriptor *mp, struct pmd_layout *layout,
+ const struct kvec *iov, int iovcnt, u64 boff, int flags, u8 rw);
+
+struct mpool_dev_info *pmd_layout_pd_get(struct mpool_descriptor *mp, struct pmd_layout *layout);
+
+u64 pmd_layout_cap_get(struct mpool_descriptor *mp, struct pmd_layout *layout);
+
+int pmd_layout_erase(struct mpool_descriptor *mp, struct pmd_layout *layout);
+
+int pmd_obj_alloc_cmn(struct mpool_descriptor *mp, u64 objid, enum obj_type_omf otype,
+ struct pmd_obj_capacity *ocap, enum mp_media_classp mclass,
+ int realloc, bool needref, struct pmd_layout **layoutp);
+
+void pmd_update_obj_stats(struct mpool_descriptor *mp, struct pmd_layout *layout,
+ struct pmd_mdc_info *cinfo, enum pmd_obj_op op);
+
+void pmd_obj_rdlock(struct pmd_layout *layout);
+void pmd_obj_rdunlock(struct pmd_layout *layout);
+
+void pmd_obj_wrlock(struct pmd_layout *layout);
+void pmd_obj_wrunlock(struct pmd_layout *layout);
+
+void pmd_co_rlock(struct pmd_mdc_info *cinfo, u8 slot);
+void pmd_co_runlock(struct pmd_mdc_info *cinfo);
+
+struct pmd_layout *pmd_co_find(struct pmd_mdc_info *cinfo, u64 objid);
+struct pmd_layout *pmd_co_insert(struct pmd_mdc_info *cinfo, struct pmd_layout *layout);
+struct pmd_layout *pmd_co_remove(struct pmd_mdc_info *cinfo, struct pmd_layout *layout);
+
+int pmd_smap_insert(struct mpool_descriptor *mp, struct pmd_layout *layout);
+
+int pmd_init(void) __cold;
+void pmd_exit(void) __cold;
+
+static inline bool objtype_user(enum obj_type_omf otype)
+{
+ return (otype == OMF_OBJ_MBLOCK || otype == OMF_OBJ_MLOG);
+}
+
+static inline u64 objid_make(u64 uniq, enum obj_type_omf otype, u8 cslot)
+{
+ return ((uniq << 12) | ((otype & 0xF) << 8) | (cslot & 0xFF));
+}
+
+static inline u64 objid_uniq(u64 objid)
+{
+ return (objid >> 12);
+}
+
+static inline u8 objid_slot(u64 objid)
+{
+ return (objid & 0xFF);
+}
+
+static inline bool objid_ckpt(u64 objid)
+{
+ return !(objid_uniq(objid) & (OBJID_UNIQ_DELTA - 1));
+}
+
+static inline u64 logid_make(u64 uniq, u8 cslot)
+{
+ return objid_make(uniq, OMF_OBJ_MLOG, cslot);
+}
+
+static inline bool objid_mdc0log(u64 objid)
+{
+ return ((objid == MDC0_OBJID_LOG1) || (objid == MDC0_OBJID_LOG2));
+}
+
+static inline enum obj_type_omf pmd_objid_type(u64 objid)
+{
+ enum obj_type_omf otype = objid_type(objid);
+
+ return objtype_valid(otype) ? otype : OMF_OBJ_UNDEF;
+}
+
+/* True if objid is an mpool user object (versus mpool metadata object). */
+static inline bool pmd_objid_isuser(u64 objid)
+{
+ return objtype_user(objid_type(objid)) && objid_slot(objid);
+}
+
+static inline void pmd_obj_put(struct pmd_layout *layout)
+{
+ kref_put(&layout->eld_ref, pmd_layout_release);
+}
+
+/* General mdc locking (has external callers...) */
+static inline void pmd_mdc_lock(struct mutex *lock, u8 slot)
+{
+ mutex_lock_nested(lock, slot > 0 ? PMD_MDC_NORMAL : PMD_MDC_ZERO);
+}
+
+static inline void pmd_mdc_unlock(struct mutex *lock)
+{
+ mutex_unlock(lock);
+}
+
+#endif /* MPOOL_PMD_OBJ_H */
diff --git a/drivers/mpool/sb.h b/drivers/mpool/sb.h
new file mode 100644
index 000000000000..673a5f742f7c
--- /dev/null
+++ b/drivers/mpool/sb.h
@@ -0,0 +1,162 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2015-2020 Micron Technology, Inc. All rights reserved.
+ */
+
+#ifndef MPOOL_SB_PRIV_H
+#define MPOOL_SB_PRIV_H
+
+#include "mpool_ioctl.h"
+
+struct pd_dev_parm;
+struct omf_sb_descriptor;
+struct pd_prop;
+
+/*
+ * Drives have 2 superblocks.
+ * + sb0 at byte offset 0
+ * + sb1 at byte offset SB_AREA_SZ
+ *
+ * Read: sb0 is the authoritative copy, other copies are not used.
+ * Updates: sb0 is updated first; if successful sb1 is updated
+ */
+/* Number of superblock per Physical Device. */
+#define SB_SB_COUNT 2
+
+/*
+ * Size in byte of the area occupied by a superblock. The superblock itself
+ * may be smaller, but always starts at the beginning of its area.
+ */
+#define SB_AREA_SZ (4096ULL)
+
+/*
+ * Size in byte of an area located just after the superblock areas.
+ * Not used in 1.0. Later can be used for MDC0 metadata and/or voting sets.
+ */
+#define MDC0MD_AREA_SZ (4096ULL)
+
+/*
+ * sb API functions
+ */
+
+/**
+ * sb_magic_check() - check for sb magic value
+ * @dparm: struct pd_dev_parm *
+ *
+ * Determine if the mpool magic value exists in at least one place where
+ * expected on drive pd. Does NOT imply drive has a valid superblock.
+ *
+ * Note: only pd.status and pd.parm must be set; no other pd fields accessed.
+ *
+ * Return: 1 if found, 0 if not found, -(errno) if error reading
+ */
+int sb_magic_check(struct pd_dev_parm *dparm);
+
+/**
+ * sb_write_new() - write superblock to new drive
+ * @dparm: struct pd_dev_parm *
+ * @sb: struct omf_sb_descriptor *
+ *
+ * Write superblock sb to new (non-pool) drive
+ *
+ * Note: only pd.status and pd.parm must be set; no other pd fields accessed.
+ *
+ * Return: 0 if successful; -errno otherwise
+ */
+int sb_write_new(struct pd_dev_parm *dparm, struct omf_sb_descriptor *sb);
+
+/**
+ * sb_write_update() - update superblock
+ * @dparm: "dparm" info is not used to fill up the super block, only "sb" content is used.
+ * @sb: "sb" content is written in the super block.
+ *
+ * Update superblock on pool drive
+ *
+ * Note: only pd.status and pd.parm must be set; no other pd fields accessed.
+ *
+ * Return: 0 if successful; -errno otherwise
+ */
+int sb_write_update(struct pd_dev_parm *dparm, struct omf_sb_descriptor *sb);
+
+/**
+ * sb_erase() - erase superblock
+ * @dparm: struct pd_dev_parm *
+ *
+ * Erase superblock on drive pd.
+ *
+ * Note: only pd.status and pd.parm must be set; no other pd fields accessed.
+ *
+ * Return: 0 if successful; -errno otherwise
+ */
+int sb_erase(struct pd_dev_parm *dparm);
+
+/**
+ * sb_read() - read superblock
+ * @dparm: struct pd_dev_parm *
+ * @sb: struct omf_sb_descriptor *
+ * @omf_ver: omf sb version
+ * @force:
+ *
+ * Read superblock from drive pd; make repairs as necessary.
+ *
+ * Note: only pd.status and pd.parm must be set; no other pd fields accessed.
+ *
+ * Return: 0 if successful; -errno otherwise
+ */
+int sb_read(struct pd_dev_parm *dparm, struct omf_sb_descriptor *sb, u16 *omf_ver, bool force);
+
+/**
+ * sbutil_mdc0_clear() - clear mdc0 of superblock
+ * @sb: struct omf_sb_descriptor *)
+ *
+ * Clear (set to zeros) mdc0 portion of sb.
+ *
+ * Return: void
+ */
+void sbutil_mdc0_clear(struct omf_sb_descriptor *sb);
+
+/**
+ * sbutil_mdc0_isclear() - Test if mdc0 is clear
+ * @sb: struct omf_sb_descriptor *
+ *
+ * Return: 1 if mdc0 portion of sb is clear.
+ */
+int sbutil_mdc0_isclear(struct omf_sb_descriptor *sb);
+
+/**
+ * sbutil_mdc0_copy() - copy mdc0 from one superblock to another
+ * @tgtsb: struct omf_sb_descriptor *
+ * @srcsb: struct omf_sb_descriptor *
+ *
+ * Copy mdc0 portion of srcsb to tgtsb.
+ *
+ * Return void
+ */
+void sbutil_mdc0_copy(struct omf_sb_descriptor *tgtsb, struct omf_sb_descriptor *srcsb);
+
+/**
+ * sbutil_mdc0_isvalid() - validate mdc0 of a superblock
+ * @sb: struct omf_sb_descriptor *
+ *
+ * Validate mdc0 portion of sb and extract mdparm.
+ * Return: 1 if valid and mdparm set; 0 otherwise.
+ */
+int sbutil_mdc0_isvalid(struct omf_sb_descriptor *sb);
+
+/**
+ * sb_zones_for_sbs() - compute how many zones are needed to contain the superblocks.
+ * @pd_prop:
+ */
+static inline u32 sb_zones_for_sbs(struct pd_prop *pd_prop)
+{
+ u32 zonebyte;
+
+ zonebyte = pd_prop->pdp_zparam.dvb_zonepg << PAGE_SHIFT;
+
+ return (2 * (SB_AREA_SZ + MDC0MD_AREA_SZ) + (zonebyte - 1)) / zonebyte;
+}
+
+int sb_init(void) __cold;
+void sb_exit(void) __cold;
+
+#endif /* MPOOL_SB_PRIV_H */
diff --git a/drivers/mpool/smap.h b/drivers/mpool/smap.h
new file mode 100644
index 000000000000..b9b72d3182c6
--- /dev/null
+++ b/drivers/mpool/smap.h
@@ -0,0 +1,334 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2015-2020 Micron Technology, Inc. All rights reserved.
+ */
+
+#ifndef MPOOL_SMAP_H
+#define MPOOL_SMAP_H
+
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/rbtree.h>
+#include <linux/workqueue.h>
+
+#include "mpool_ioctl.h"
+
+/* Forward Decls */
+struct mpool_usage;
+struct mpool_devprops;
+struct mc_smap_parms;
+struct mpool_descriptor;
+
+/*
+ * Common defs
+ */
+
+/**
+ * struct rmbkt - region map bucket
+ */
+struct rmbkt {
+ struct mutex pdi_rmlock;
+ struct rb_root pdi_rmroot;
+} ____cacheline_aligned;
+
+/**
+ * struct smap_zone -
+ * @smz_node:
+ * @smz_key:
+ * @smz_value:
+ */
+struct smap_zone {
+ struct rb_node smz_node;
+ u64 smz_key;
+ u64 smz_value;
+};
+
+/**
+ * enum smap_space_type - space allocation policy flag
+ * @SMAP_SPC_UNDEF:
+ * @SMAP_SPC_USABLE_ONLY: allocate from usable space only
+ * @SMAP_SPC_USABLE_2_SPARE: allocate from usable space first then spare
+ * if needed
+ * @SMAP_SPC_SPARE_ONLY: allocate from spare space only
+ * @SMAP_SPC_SPARE_2_USABLE: allocate from spare space first then usable
+ * if needed
+ */
+enum smap_space_type {
+ SMAP_SPC_UNDEF = 0,
+ SMAP_SPC_USABLE_ONLY = 1,
+ SMAP_SPC_USABLE_2_SPARE = 2,
+ SMAP_SPC_SPARE_ONLY = 3,
+ SMAP_SPC_SPARE_2_USABLE = 4
+};
+
+static inline int saptype_valid(enum smap_space_type saptype)
+{
+ return (saptype && saptype <= 4);
+}
+
+/*
+ * drive allocation info
+ *
+ * LOCKING:
+ * + rgnsz, rgnladdr: constants; no locking required
+ * + all other fields: protected by dalock
+ */
+
+/**
+ * struct smap_dev_alloc -
+ * @sda_dalock:
+ * @sda_rgnsz: number of zones per rgn, excepting last
+ * @sda_rgnladdr: address of first zone in last rgn
+ * @sda_rgnalloc: rgn last alloced from
+ * @sda_zoneeff: total zones (zonetot) minus bad zones
+ * @sda_utgt: target max usable zones to allocate
+ * @sda_uact: actual usable zones allocated
+ * @sda_stgt: target max spare zones to allocate
+ * @sda_sact actual spare zones allocated
+ *
+ * NOTE:
+ * + must maintain invariant that sact <= stgt
+ * + however it is possible for uact > utgt due to changing % spare
+ * zones or zone failures. this condition corrects when
+ * sufficient space is freed or if % spare zones is changed
+ * (again).
+ *
+ * Capacity pools and calcs:
+ * + total zones = zonetot
+ * + avail zones = zoneeff
+ * + usable zones = utgt which is (zoneeff * (1 - spzone/100))
+ * + free usable zones = max(0, utgt - uact); max handles uact > utgt
+ * + used zones = uact; possible for used > usable (uact > utgt)
+ * + spare zones = stgt which is (zoneeff - utgt)
+ * + free spare zones = (stgt - sact); guaranteed that sact <= stgt
+ */
+struct smap_dev_alloc {
+ spinlock_t sda_dalock;
+ u32 sda_rgnsz;
+ u32 sda_rgnladdr;
+ u32 sda_rgnalloc;
+ u32 sda_zoneeff;
+ u32 sda_utgt;
+ u32 sda_uact;
+ u32 sda_stgt;
+ u32 sda_sact;
+};
+
+struct smap_dev_znstats {
+ u64 sdv_total;
+ u64 sdv_avail;
+ u64 sdv_usable;
+ u64 sdv_fusable;
+ u64 sdv_spare;
+ u64 sdv_fspare;
+ u64 sdv_used;
+};
+
+/**
+ * smap_usage_work - delayed work struct for checking mpool free usable space usage
+ * @smapu_wstruct:
+ * @smapu_mp:
+ * @smapu_freepct: free space %
+ */
+struct smap_usage_work {
+ struct delayed_work smapu_wstruct;
+ struct mpool_descriptor *smapu_mp;
+ int smapu_freepct;
+};
+
+/*
+ * smap API functions
+ */
+
+/*
+ * Return: all smap fns can return -errno with the following errno values
+ * on failure:
+ * + -EINVAL = invalid fn args
+ * + -ENOSPC = unable to allocate requested space
+ * + -ENOMEM = insufficient memory to complete operation
+ */
+
+/*
+ * smap API usage notes:
+ * + During mpool activation call smap_insert() for all existing objects
+ * before calling smap_alloc() or smap_free().
+ */
+
+/**
+ * smap_mpool_init() - initialize the smaps for an initialized mpool_descriptor
+ * @mp: struct mpool_descriptor *
+ *
+ * smap_mpool_init must be called once per mpool as it is being activated.
+ *
+ * Init space maps for all drives in mpool that are empty except for
+ * superblocks; caller must ensure no other thread can access mp.
+ *
+ * TODO: Traversing smap rbtrees may need fix, since there may be unsafe
+ * erases within loops.
+ *
+ * Return:
+ * 0 if successful, -errno with the following errno values on failure:
+ * -EINVAL if spare zone percentage is > 100%,
+ * -EINVAL if rgn count is 0, or
+ * -EINVAL if zonecnt on one of the drives is < rgn count
+ * -ENOMEM if there is no memory available
+ */
+int smap_mpool_init(struct mpool_descriptor *mp);
+
+/**
+ * smap_mpool_free() - free smap structures in a mpool_descriptor
+ * @mp: struct mpool_descriptor *
+ *
+ * Free space maps for all drives in mpool; caller must ensure no other
+ * thread can access mp.
+ *
+ * Return: void
+ */
+void smap_mpool_free(struct mpool_descriptor *mp);
+
+/**
+ * smap_mpool_usage() - present stats of smap usage
+ * @mp: struct mpool_descriptor *
+ * @mclass: media class or MP_MED_ALL for all classes
+ * @usage: struct mpool_usage *
+ *
+ * Fill in stats with space usage for media class; if MP_MED_ALL
+ * report on all media classes; caller must hold mp.pdvlock.
+ *
+ * Locking: the caller should hold the pds_pdvlock at least in read to
+ * be protected against media classes updates.
+ */
+void smap_mpool_usage(struct mpool_descriptor *mp, u8 mclass, struct mpool_usage *usage);
+
+/**
+ * smap_drive_spares() - Set percentage of zones to set aside as spares
+ * @mp: struct mpool_descriptor *
+ * @mclassp: media class
+ * @spzone: percentage of zones to use as spares
+ *
+ * Set percent spare zones to spzone for drives in media class mclass;
+ * caller must hold mp.pdvlock.
+ *
+ * Locking: the caller should hold the pds_pdvlock at least in read to
+ * be protected against media classes updates.
+ *
+ * Return: 0 if successful; -errno otherwise
+ */
+int smap_drive_spares(struct mpool_descriptor *mp, enum mp_media_classp mclassp, u8 spzone);
+
+/**
+ * smap_drive_usage() - Fill in a given drive's portion of dprop struct.
+ * @mp: struct mpool_descriptor *
+ * @pdh: drive number within the mpool_descriptor
+ * @dprop: struct mpool_devprops *, structure to fill in
+ *
+ * Fill in usage portion of dprop for drive pdh; caller must hold mp.pdvlock
+ *
+ * Return: 0 if successful, -errno otherwise
+ */
+int smap_drive_usage(struct mpool_descriptor *mp, u16 pdh, struct mpool_devprops *dprop);
+
+/**
+ * smap_drive_init() - Initialize a specific drive within a mpool_descriptor
+ * @mp: struct mpool_descriptor *
+ * @mcsp: smap parameters
+ * @pdh: u16, drive number within the mpool_descriptor
+ *
+ * Init space map for pool drive pdh that is empty except for superblocks
+ * with a percent spare zones of spzone; caller must ensure pdh is not in use.
+ *
+ * Return: 0 if successful, -errno otherwise
+ */
+int smap_drive_init(struct mpool_descriptor *mp, struct mc_smap_parms *mcsp, u16 pdh);
+
+/**
+ * smap_drive_free() - Release resources for a specific drive
+ * @mp: struct mpool_descriptor *
+ * @pdh: u16, drive number within the mpool_descriptor
+ *
+ * Free space map for pool drive pdh including partial (failed) inits;
+ * caller must ensure pdh is not in use.
+ *
+ * Return: void
+ */
+void smap_drive_free(struct mpool_descriptor *mp, u16 pdh);
+
+/**
+ * smap_insert() - Inject an entry to an smap for existing object
+ * @mp: struct mpool_descriptor *
+ * @pdh: drive number within the mpool_descriptor
+ * @zoneaddr: starting zone for entry
+ * @zonecnt: number of zones in entry
+ *
+ * Add entry to space map for an existing object with a strip on drive pdh
+ * starting at zoneaddr and continuing for zonecnt blocks.
+ *
+ * Used, in part for superblocks.
+ *
+ * Return: 0 if successful, -errno otherwise
+ */
+int smap_insert(struct mpool_descriptor *mp, u16 pdh, u64 zoneaddr, u32 zonecnt);
+
+/**
+ * smap_alloc() - Allocate a new contiguous zone range on a specific drive
+ * @mp: struct mpool_descriptor
+ * @pdh: u16, drive number within the mpool_descriptor
+ * @zonecnt: u64, the number of zones requested
+ * @sapolicy: enum smap_space_type, usable only, spare only, etc.
+ * @zoneaddr: u64 *, the starting zone for the allocated range
+ * @align: no. of zones (must be a power-of-2)
+ *
+ * Attempt to allocate zonecnt contiguous zones on drive pdh
+ * in accordance with space allocation policy sapolicy.
+ *
+ * Return: 0 if succcessful; -errno otherwise
+ */
+int smap_alloc(struct mpool_descriptor *mp, u16 pdh, u64 zonecnt,
+ enum smap_space_type sapolicy, u64 *zoneaddr, u64 align);
+
+/**
+ * smap_free() - Free a previously allocated range of zones in the smap
+ * @mp: struct mpool_descriptor *
+ * @pdh: u16, number of the disk within the mpool_descriptor
+ * @zoneaddr: u64, starting zone for the range to free
+ * @zonecnt: u16, the number of zones in the range
+ *
+ * Free currently allocated space starting at zoneaddr
+ * and continuing for zonecnt blocks.
+ *
+ * Return: 0 if successful, -errno otherwise
+ */
+int smap_free(struct mpool_descriptor *mp, u16 pdh, u64 zoneaddr, u16 zonecnt);
+
+/*
+ * smap internal functions
+ */
+
+/**
+ * smap_mpool_usage() - Get the media class usage for a given mclass.
+ * @mp:
+ * @mclass: if MP_MED_ALL, return the sum of the stats for all media class,
+ * else the stats only for one media class.
+ * @usage: output
+ *
+ * Locking: the caller should hold the pds_pdvlock at least in read to
+ * be protected against media classes updates.
+ */
+void smap_mclass_usage(struct mpool_descriptor *mp, u8 mclass, struct mpool_usage *usage);
+
+/**
+ * smap_log_mpool_usage() - check drive mpool free usable space %, and log a message if needed
+ * @ws:
+ */
+void smap_log_mpool_usage(struct work_struct *ws);
+
+/**
+ * smap_wait_usage_done() - wait for periodical job for logging pd free usable space % to complete
+ * @mp:
+ */
+void smap_wait_usage_done(struct mpool_descriptor *mp);
+
+int smap_init(void) __cold;
+void smap_exit(void) __cold;
+
+#endif /* MPOOL_SMAP_H */
--
2.17.2