Re: [RFC PATCH v2 1/9] mm/zswap: expose range state for swapin policy

From: Fujunjie

Date: Sun May 31 2026 - 09:48:07 EST




On 5/30/2026 2:35 AM, Nhat Pham wrote:
> On Fri, May 29, 2026 at 5:19 AM fujunjie <fujunjie1@xxxxxx> wrote:
>>
>> Large folio swapin needs to know whether a candidate swap range is fully
>> backed by zswap before it can choose an order. That decision should stay
>> in common swapin code, not inside zswap.
>>
>> Export two zswap facts for that caller: a lockless range occupancy snapshot
>> and the current zswap reclaim-pressure state. The range state is
>> advisory only. Writeback or invalidation can change the backend after the
>> snapshot, so users must recheck before issuing large-folio IO.
>>
>> Signed-off-by: fujunjie <fujunjie1@xxxxxx>
>> ---
>> include/linux/zswap.h | 26 +++++++++++++++++++++++++
>> mm/zswap.c | 44 +++++++++++++++++++++++++++++++++++++++++++
>> 2 files changed, 70 insertions(+)
>>
>> diff --git a/include/linux/zswap.h b/include/linux/zswap.h
>> index 30c193a1207e..8f9aee97517c 100644
>> --- a/include/linux/zswap.h
>> +++ b/include/linux/zswap.h
>> @@ -9,6 +9,18 @@ struct lruvec;
>>
>> extern atomic_long_t zswap_stored_pages;
>>
>> +/*
>> + * Advisory zswap occupancy snapshot for a swap range. This is not a complete
>> + * backend classifier; callers must recheck before depending on ALL_ZSWAP for
>> + * large-folio IO.
>> + */
>> +enum zswap_range_state {
>> + ZSWAP_RANGE_NEVER_ENABLED,
>> + ZSWAP_RANGE_NO_ZSWAP,
>> + ZSWAP_RANGE_ALL_ZSWAP,
>> + ZSWAP_RANGE_MIXED,
>> +};
>> +
>> #ifdef CONFIG_ZSWAP
>>
>> struct zswap_lruvec_state {
>> @@ -27,6 +39,9 @@ struct zswap_lruvec_state {
>> unsigned long zswap_total_pages(void);
>> bool zswap_store(struct folio *folio);
>> int zswap_load(struct folio *folio);
>> +enum zswap_range_state zswap_probe_range(swp_entry_t swp,
>> + unsigned int nr_pages);
>> +bool zswap_pool_reclaim_pressure(void);
>> void zswap_invalidate(swp_entry_t swp);
>> int zswap_swapon(int type, unsigned long nr_pages);
>> void zswap_swapoff(int type);
>> @@ -49,6 +64,17 @@ static inline int zswap_load(struct folio *folio)
>> return -ENOENT;
>> }
>>
>> +static inline enum zswap_range_state zswap_probe_range(swp_entry_t swp,
>> + unsigned int nr_pages)
>> +{
>> + return ZSWAP_RANGE_NEVER_ENABLED;
>> +}
>> +
>> +static inline bool zswap_pool_reclaim_pressure(void)
>> +{
>> + return false;
>> +}
>> +
>> static inline void zswap_invalidate(swp_entry_t swp) {}
>> static inline int zswap_swapon(int type, unsigned long nr_pages)
>> {
>> diff --git a/mm/zswap.c b/mm/zswap.c
>> index 761cd699e0a3..da5297f7bd69 100644
>> --- a/mm/zswap.c
>> +++ b/mm/zswap.c
>> @@ -506,6 +506,19 @@ unsigned long zswap_total_pages(void)
>> return total;
>> }
>>
>> +/*
>> + * Expose whether zswap reclaim pressure is active. This is a backend fact:
>> + * zswap_check_limits() sets the state once the pool reaches the hard limit and
>> + * keeps it set until the pool falls below the accept threshold.
>> + */
>> +bool zswap_pool_reclaim_pressure(void)
>> +{
>> + if (zswap_never_enabled())
>> + return false;
>> +
>> + return READ_ONCE(zswap_pool_reached_full);
>> +}
>> +
>> static bool zswap_check_limits(void)
>> {
>> unsigned long cur_pages = zswap_total_pages();
>> @@ -1559,6 +1572,37 @@ bool zswap_store(struct folio *folio)
>> return ret;
>> }
>>
>> +enum zswap_range_state zswap_probe_range(swp_entry_t swp,
>> + unsigned int nr_pages)
>> +{
>> + unsigned int type = swp_type(swp);
>> + pgoff_t offset = swp_offset(swp);
>> + bool present = false, missing = false;
>> + unsigned int i;
>> +
>> + /*
>> + * This is an advisory, lockless snapshot for common swapin admission.
>> + * Callers must recheck before depending on an all-zswap range for IO:
>> + * concurrent writeback or invalidation can change the backend state.
>> + */
>> + if (zswap_never_enabled())
>> + return ZSWAP_RANGE_NEVER_ENABLED;
>> +
>> + for (i = 0; i < nr_pages; i++) {
>> + struct xarray *tree = swap_zswap_tree(swp_entry(type, offset + i));
>> +
>> + if (xa_load(tree, offset + i))
>> + present = true;
>> + else
>> + missing = true;
>> +
>> + if (present && missing)
>> + return ZSWAP_RANGE_MIXED;
>> + }
>
> Can we use xas_load() to make this check more efficient? IIUC,
> xa_load() walks the tree every time.
>
> (We used to use a bitmap here back in frontswap days. Good times....)

Thanks for your review.

I'll switch this to xas_load() in the v3 version.