Re: [RFC PATCH 9/9] mm: zswap: use zswap_entry_free() for partially initialized entries
From: Yosry Ahmed
Date: Thu Mar 28 2024 - 14:49:51 EST
On Thu, Mar 28, 2024 at 1:31 AM Chengming Zhou <chengming.zhou@xxxxxxxxx> wrote:
>
> On 2024/3/26 07:50, Yosry Ahmed wrote:
> > zswap_entry_free() performs four types of cleanups before freeing a
> > zswap_entry:
> > - Deletes the entry from the LRU.
> > - Frees compressed memory.
> > - Puts the pool reference.
> > - Uncharges the compressed memory and puts the objcg.
> >
> > zswap_entry_free() always expects a fully initialized entry. Allow
> > zswap_entry_free() to handle partially initialized entries by making it
> > possible to identify what cleanups are needed as follows:
> > - Allocate entries with __GFP_ZERO and initialize zswap_entry.lru when
> > the entry is allocated. Points are NULL and length is zero upon
> > initialization.
> > - Use zswap_entry.length to identify if there is compressed memory to
> > free. This is possible now that zero-filled pages are handled
> > separately, so a length of zero means we did not successfully compress
> > the page.
> > - Only initialize entry->objcg after the memory is charged in
> > zswap_store().
> >
> > With this in place, use zswap_entry_free() in the failure path of
> > zswap_store() to cleanup partially initialized entries. This simplifies
> > the cleanup code in zswap_store(). While we are at it, rename the
> > remaining cleanup labels to more meaningful names.
>
> Not sure if it's worthwhile to clean up the fail path with the normal path
> gets a little verbose.
I was on the fence about this, so I thought I would just send it and
see what others think.
On one hand it makes the initialization more robust because the
zswap_entry is always in a clean identifiable state, but on the other
hand it adds churn to the normal path as you noticed. Also after
removing handling zero-length entries from the failure path it isn't
that bad without this patch anyway.
So if no one else thinks this is useful, I will drop the patch in the
next version.
Thanks!
>
> >
> > Signed-off-by: Yosry Ahmed <yosryahmed@xxxxxxxxxx>
> > ---
> > mm/zswap.c | 62 ++++++++++++++++++++++++++----------------------------
> > 1 file changed, 30 insertions(+), 32 deletions(-)
> >
> > diff --git a/mm/zswap.c b/mm/zswap.c
> > index 9357328d940af..c50f9df230ca3 100644
> > --- a/mm/zswap.c
> > +++ b/mm/zswap.c
> > @@ -774,12 +774,13 @@ void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg)
> > **********************************/
> > static struct kmem_cache *zswap_entry_cache;
> >
> > -static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp, int nid)
> > +static struct zswap_entry *zswap_entry_cache_alloc(int nid)
> > {
> > struct zswap_entry *entry;
> > - entry = kmem_cache_alloc_node(zswap_entry_cache, gfp, nid);
> > - if (!entry)
> > - return NULL;
> > + entry = kmem_cache_alloc_node(zswap_entry_cache,
> > + GFP_KERNEL | __GFP_ZERO, nid);
> > + if (entry)
> > + INIT_LIST_HEAD(&entry->lru);
> > return entry;
> > }
> >
> > @@ -795,9 +796,12 @@ static struct zpool *zswap_find_zpool(struct zswap_entry *entry)
> >
> > static void zswap_entry_free(struct zswap_entry *entry)
> > {
> > - zswap_lru_del(&zswap_list_lru, entry);
> > - zpool_free(zswap_find_zpool(entry), entry->handle);
> > - zswap_pool_put(entry->pool);
> > + if (!list_empty(&entry->lru))
> > + zswap_lru_del(&zswap_list_lru, entry);
> > + if (entry->length)
> > + zpool_free(zswap_find_zpool(entry), entry->handle);
> > + if (entry->pool)
> > + zswap_pool_put(entry->pool);
> > if (entry->objcg) {
> > obj_cgroup_uncharge_zswap(entry->objcg, entry->length);
> > obj_cgroup_put(entry->objcg);
> > @@ -1447,7 +1451,7 @@ bool zswap_store(struct folio *folio)
> > return false;
> >
> > if (!zswap_enabled)
> > - goto check_old;
> > + goto erase_old;
> >
> > /* Check cgroup limits */
> > objcg = get_obj_cgroup_from_folio(folio);
> > @@ -1455,54 +1459,52 @@ bool zswap_store(struct folio *folio)
> > memcg = get_mem_cgroup_from_objcg(objcg);
> > if (shrink_memcg(memcg)) {
> > mem_cgroup_put(memcg);
> > - goto reject;
> > + goto put_objcg;
> > }
> > mem_cgroup_put(memcg);
> > }
> >
> > if (zswap_is_folio_zero_filled(folio)) {
> > if (zswap_store_zero_filled(tree, offset, objcg))
> > - goto reject;
> > + goto put_objcg;
> > goto stored;
> > }
> >
> > if (!zswap_non_zero_filled_pages_enabled)
> > - goto reject;
> > + goto put_objcg;
> >
> > if (!zswap_check_limit())
> > - goto reject;
> > + goto put_objcg;
> >
> > - entry = zswap_entry_cache_alloc(GFP_KERNEL, folio_nid(folio));
> > + entry = zswap_entry_cache_alloc(folio_nid(folio));
> > if (!entry) {
> > zswap_reject_kmemcache_fail++;
> > - goto reject;
> > + goto put_objcg;
> > }
> >
> > - /* if entry is successfully added, it keeps the reference */
> > entry->pool = zswap_pool_current_get();
> > if (!entry->pool)
> > - goto freepage;
> > + goto free_entry;
> >
> > if (objcg) {
> > memcg = get_mem_cgroup_from_objcg(objcg);
> > if (memcg_list_lru_alloc(memcg, &zswap_list_lru, GFP_KERNEL)) {
> > mem_cgroup_put(memcg);
> > - goto put_pool;
> > + goto free_entry;
> > }
> > mem_cgroup_put(memcg);
> > }
> >
> > if (!zswap_compress(folio, entry))
> > - goto put_pool;
> > -
> > - entry->swpentry = swp;
> > - entry->objcg = objcg;
> > + goto free_entry;
> >
> > if (zswap_tree_store(tree, offset, entry))
> > - goto store_failed;
> > + goto free_entry;
> >
> > - if (objcg)
> > + if (objcg) {
> > obj_cgroup_charge_zswap(objcg, entry->length);
> > + entry->objcg = objcg;
> > + }
> >
> > /*
> > * We finish initializing the entry while it's already in xarray.
> > @@ -1514,7 +1516,7 @@ bool zswap_store(struct folio *folio)
> > * The publishing order matters to prevent writeback from seeing
> > * an incoherent entry.
> > */
> > - INIT_LIST_HEAD(&entry->lru);
> > + entry->swpentry = swp;
> > zswap_lru_add(&zswap_list_lru, entry);
> >
> > stored:
> > @@ -1525,17 +1527,13 @@ bool zswap_store(struct folio *folio)
> >
> > return true;
> >
> > -store_failed:
> > - zpool_free(zswap_find_zpool(entry), entry->handle);
> > -put_pool:
> > - zswap_pool_put(entry->pool);
> > -freepage:
> > - zswap_entry_cache_free(entry);
> > -reject:
> > +free_entry:
> > + zswap_entry_free(entry);
> > +put_objcg:
> > obj_cgroup_put(objcg);
> > if (zswap_pool_reached_full)
> > queue_work(shrink_wq, &zswap_shrink_work);
> > -check_old:
> > +erase_old:
> > /*
> > * If the zswap store fails or zswap is disabled, we must invalidate the
> > * possibly stale entry which was previously stored at this offset.