Re: [RFC PATCH v2 1/9] mm/zswap: expose range state for swapin policy

From: Nhat Pham

Date: Fri May 29 2026 - 14:36:20 EST


On Fri, May 29, 2026 at 5:19 AM fujunjie <fujunjie1@xxxxxx> wrote:
>
> Large folio swapin needs to know whether a candidate swap range is fully
> backed by zswap before it can choose an order. That decision should stay
> in common swapin code, not inside zswap.
>
> Export two zswap facts for that caller: a lockless range occupancy snapshot
> and the current zswap reclaim-pressure state. The range state is
> advisory only. Writeback or invalidation can change the backend after the
> snapshot, so users must recheck before issuing large-folio IO.
>
> Signed-off-by: fujunjie <fujunjie1@xxxxxx>
> ---
> include/linux/zswap.h | 26 +++++++++++++++++++++++++
> mm/zswap.c | 44 +++++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 70 insertions(+)
>
> diff --git a/include/linux/zswap.h b/include/linux/zswap.h
> index 30c193a1207e..8f9aee97517c 100644
> --- a/include/linux/zswap.h
> +++ b/include/linux/zswap.h
> @@ -9,6 +9,18 @@ struct lruvec;
>
> extern atomic_long_t zswap_stored_pages;
>
> +/*
> + * Advisory zswap occupancy snapshot for a swap range. This is not a complete
> + * backend classifier; callers must recheck before depending on ALL_ZSWAP for
> + * large-folio IO.
> + */
> +enum zswap_range_state {
> + ZSWAP_RANGE_NEVER_ENABLED,
> + ZSWAP_RANGE_NO_ZSWAP,
> + ZSWAP_RANGE_ALL_ZSWAP,
> + ZSWAP_RANGE_MIXED,
> +};
> +
> #ifdef CONFIG_ZSWAP
>
> struct zswap_lruvec_state {
> @@ -27,6 +39,9 @@ struct zswap_lruvec_state {
> unsigned long zswap_total_pages(void);
> bool zswap_store(struct folio *folio);
> int zswap_load(struct folio *folio);
> +enum zswap_range_state zswap_probe_range(swp_entry_t swp,
> + unsigned int nr_pages);
> +bool zswap_pool_reclaim_pressure(void);
> void zswap_invalidate(swp_entry_t swp);
> int zswap_swapon(int type, unsigned long nr_pages);
> void zswap_swapoff(int type);
> @@ -49,6 +64,17 @@ static inline int zswap_load(struct folio *folio)
> return -ENOENT;
> }
>
> +static inline enum zswap_range_state zswap_probe_range(swp_entry_t swp,
> + unsigned int nr_pages)
> +{
> + return ZSWAP_RANGE_NEVER_ENABLED;
> +}
> +
> +static inline bool zswap_pool_reclaim_pressure(void)
> +{
> + return false;
> +}
> +
> static inline void zswap_invalidate(swp_entry_t swp) {}
> static inline int zswap_swapon(int type, unsigned long nr_pages)
> {
> diff --git a/mm/zswap.c b/mm/zswap.c
> index 761cd699e0a3..da5297f7bd69 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -506,6 +506,19 @@ unsigned long zswap_total_pages(void)
> return total;
> }
>
> +/*
> + * Expose whether zswap reclaim pressure is active. This is a backend fact:
> + * zswap_check_limits() sets the state once the pool reaches the hard limit and
> + * keeps it set until the pool falls below the accept threshold.
> + */
> +bool zswap_pool_reclaim_pressure(void)
> +{
> + if (zswap_never_enabled())
> + return false;
> +
> + return READ_ONCE(zswap_pool_reached_full);
> +}
> +
> static bool zswap_check_limits(void)
> {
> unsigned long cur_pages = zswap_total_pages();
> @@ -1559,6 +1572,37 @@ bool zswap_store(struct folio *folio)
> return ret;
> }
>
> +enum zswap_range_state zswap_probe_range(swp_entry_t swp,
> + unsigned int nr_pages)
> +{
> + unsigned int type = swp_type(swp);
> + pgoff_t offset = swp_offset(swp);
> + bool present = false, missing = false;
> + unsigned int i;
> +
> + /*
> + * This is an advisory, lockless snapshot for common swapin admission.
> + * Callers must recheck before depending on an all-zswap range for IO:
> + * concurrent writeback or invalidation can change the backend state.
> + */
> + if (zswap_never_enabled())
> + return ZSWAP_RANGE_NEVER_ENABLED;
> +
> + for (i = 0; i < nr_pages; i++) {
> + struct xarray *tree = swap_zswap_tree(swp_entry(type, offset + i));
> +
> + if (xa_load(tree, offset + i))
> + present = true;
> + else
> + missing = true;
> +
> + if (present && missing)
> + return ZSWAP_RANGE_MIXED;
> + }

Can we use xas_load() to make this check more efficient? IIUC,
xa_load() walks the tree every time.

(We used to use a bitmap here back in frontswap days. Good times....)