Re: [PATCH v2 07/18] x86/sgx: Use a list to track to-be-reclaimed pages during reclaim

From: Kristen Carlson Accardi
Date: Mon Dec 05 2022 - 11:34:32 EST


On Fri, 2022-12-02 at 14:33 -0800, Dave Hansen wrote:
> On 12/2/22 10:36, Kristen Carlson Accardi wrote:
> > From: Sean Christopherson <sean.j.christopherson@xxxxxxxxx>
> >
> > Change sgx_reclaim_pages() to use a list rather than an array for
> > storing the epc_pages which will be reclaimed. This change is
> > needed
> > to transition to the LRU implementation for EPC cgroup support.
> >
> > This change requires keeping track of whether newly recorded
> > EPC pages are pages for VA Arrays, or for Enclave data. In
> > addition,
> > helper functions are added to move pages from one list to another
> > and
> > enforce a consistent queue like behavior for the LRU lists.
>
> More changelog nit: Please use imperative voice, not passive voice.
> Move from:
>
>         In addition, helper functions are added
>
> to:
>
>         In addition, add helper functions
>
> > diff --git a/arch/x86/kernel/cpu/sgx/encl.c
> > b/arch/x86/kernel/cpu/sgx/encl.c
> > index 4683da9ef4f1..9ee306ac2a8e 100644
> > --- a/arch/x86/kernel/cpu/sgx/encl.c
> > +++ b/arch/x86/kernel/cpu/sgx/encl.c
> > @@ -252,7 +252,7 @@ static struct sgx_encl_page
> > *__sgx_encl_load_page(struct sgx_encl *encl,
> >                 epc_page = sgx_encl_eldu(&encl->secs, NULL);
> >                 if (IS_ERR(epc_page))
> >                         return ERR_CAST(epc_page);
> > -               sgx_record_epc_page(epc_page, 0);
> > +               sgx_record_epc_page(epc_page,
> > SGX_EPC_PAGE_ENCLAVE);
> >         }
>
> This is one of those patches where the first hunk seems like it is
> entirely disconnected from what the changelog made me expect I would
> see.
>
> I don't see sgx_reclaim_pages(), or lists or arrays.
>
> If you need to pass additional data down into a function, then do
> *that*
> in a separate patch.
>
> I'm glad it eventually got fixed up, but I don't really ever like to
> see
> bare integers that don't have obvious meaning:
>
>         sgx_record_epc_page(epc_page, 0);
>
> Even if you had:
>
> #define SGX_EPC_PAGE_RECLAIMER_UNTRACKED 0
>
>         sgx_record_epc_page(epc_page,
> SGX_EPC_PAGE_RECLAIMER_UNTRACKED);
>
> makes a *LOT* of sense compared to other callers that do
>
>         sgx_record_epc_page(epc_page,
> SGX_EPC_PAGE_RECLAIMER_TRACKED);
>
> >         epc_page = sgx_encl_eldu(entry, encl->secs.epc_page);
> > @@ -260,7 +260,8 @@ static struct sgx_encl_page
> > *__sgx_encl_load_page(struct sgx_encl *encl,
> >                 return ERR_CAST(epc_page);
> >  
> >         encl->secs_child_cnt++;
> > -       sgx_record_epc_page(entry->epc_page,
> > SGX_EPC_PAGE_RECLAIMER_TRACKED);
> > +       sgx_record_epc_page(entry->epc_page,
> > +                           (SGX_EPC_PAGE_ENCLAVE |
> > SGX_EPC_PAGE_RECLAIMER_TRACKED));
> >  
> >         return entry;
> >  }
> > @@ -1221,7 +1222,7 @@ struct sgx_epc_page *sgx_alloc_va_page(struct
> > sgx_encl *encl, bool reclaim)
> >                 sgx_encl_free_epc_page(epc_page);
> >                 return ERR_PTR(-EFAULT);
> >         }
> > -       sgx_record_epc_page(epc_page, 0);
> > +       sgx_record_epc_page(epc_page, SGX_EPC_PAGE_VERSION_ARRAY);
> >  
> >         return epc_page;
> >  }
> > diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c
> > b/arch/x86/kernel/cpu/sgx/ioctl.c
> > index aca80a3f38a1..c3a9bffbc37e 100644
> > --- a/arch/x86/kernel/cpu/sgx/ioctl.c
> > +++ b/arch/x86/kernel/cpu/sgx/ioctl.c
> > @@ -114,7 +114,7 @@ static int sgx_encl_create(struct sgx_encl
> > *encl, struct sgx_secs *secs)
> >         encl->attributes = secs->attributes;
> >         encl->attributes_mask = SGX_ATTR_DEBUG | SGX_ATTR_MODE64BIT
> > | SGX_ATTR_KSS;
> >  
> > -       sgx_record_epc_page(encl->secs.epc_page, 0);
> > +       sgx_record_epc_page(encl->secs.epc_page,
> > SGX_EPC_PAGE_ENCLAVE);
> >  
> >         /* Set only after completion, as encl->lock has not been
> > taken. */
> >         set_bit(SGX_ENCL_CREATED, &encl->flags);
> > @@ -325,7 +325,8 @@ static int sgx_encl_add_page(struct sgx_encl
> > *encl, unsigned long src,
> >                         goto err_out;
> >         }
> >  
> > -       sgx_record_epc_page(encl_page->epc_page,
> > SGX_EPC_PAGE_RECLAIMER_TRACKED);
> > +       sgx_record_epc_page(encl_page->epc_page,
> > +                           (SGX_EPC_PAGE_ENCLAVE |
> > SGX_EPC_PAGE_RECLAIMER_TRACKED));
> >         mutex_unlock(&encl->lock);
> >         mmap_read_unlock(current->mm);
> >         return ret;
> > diff --git a/arch/x86/kernel/cpu/sgx/main.c
> > b/arch/x86/kernel/cpu/sgx/main.c
> > index bad72498b0a7..83aaf5cea7b9 100644
> > --- a/arch/x86/kernel/cpu/sgx/main.c
> > +++ b/arch/x86/kernel/cpu/sgx/main.c
> > @@ -288,37 +288,43 @@ static void sgx_reclaimer_write(struct
> > sgx_epc_page *epc_page,
> >   */
> >  static void __sgx_reclaim_pages(void)
> >  {
> > -       struct sgx_epc_page *chunk[SGX_NR_TO_SCAN];
> >         struct sgx_backing backing[SGX_NR_TO_SCAN];
> > +       struct sgx_epc_page *epc_page, *tmp;
> >         struct sgx_encl_page *encl_page;
> > -       struct sgx_epc_page *epc_page;
> >         pgoff_t page_index;
> > -       int cnt = 0;
> > +       LIST_HEAD(iso);
> >         int ret;
> >         int i;
> >  
> >         spin_lock(&sgx_global_lru.lock);
> >         for (i = 0; i < SGX_NR_TO_SCAN; i++) {
> > -               epc_page =
> > sgx_epc_pop_reclaimable(&sgx_global_lru);
> > +               epc_page =
> > sgx_epc_peek_reclaimable(&sgx_global_lru);
> >                 if (!epc_page)
> >                         break;
> >  
> >                 encl_page = epc_page->encl_owner;
> >  
> > +               if (WARN_ON_ONCE(!(epc_page->flags &
> > SGX_EPC_PAGE_ENCLAVE)))
> > +                       continue;
> > +
> >                 if (kref_get_unless_zero(&encl_page->encl-
> > >refcount) != 0) {
> >                         epc_page->flags |=
> > SGX_EPC_PAGE_RECLAIM_IN_PROGRESS;
> > -                       chunk[cnt++] = epc_page;
> > +                       list_move_tail(&epc_page->list, &iso);
> >                 } else {
> > -                       /* The owner is freeing the page. No need
> > to add the
> > -                        * page back to the list of reclaimable
> > pages.
> > +                       /* The owner is freeing the page, remove it
> > from the
> > +                        * LRU list
> >                          */
> >                         epc_page->flags &=
> > ~SGX_EPC_PAGE_RECLAIMER_TRACKED;
> > +                       list_del_init(&epc_page->list);
> >                 }
> >         }
> >         spin_unlock(&sgx_global_lru.lock);
> >  
> > -       for (i = 0; i < cnt; i++) {
> > -               epc_page = chunk[i];
> > +       if (list_empty(&iso))
> > +               return;
> > +
> > +       i = 0;
> > +       list_for_each_entry_safe(epc_page, tmp, &iso, list) {
> >                 encl_page = epc_page->encl_owner;
> >  
> >                 if (!sgx_reclaimer_age(epc_page))
> > @@ -333,6 +339,7 @@ static void __sgx_reclaim_pages(void)
> >                         goto skip;
> >                 }
> >  
> > +               i++;
> >                 encl_page->desc |= SGX_ENCL_PAGE_BEING_RECLAIMED;
> >                 mutex_unlock(&encl_page->encl->lock);
> >                 continue;
> > @@ -340,31 +347,25 @@ static void __sgx_reclaim_pages(void)
> >  skip:
> >                 spin_lock(&sgx_global_lru.lock);
> >                 epc_page->flags &=
> > ~SGX_EPC_PAGE_RECLAIM_IN_PROGRESS;
> > -               sgx_epc_push_reclaimable(&sgx_global_lru,
> > epc_page);
> > +               sgx_epc_move_reclaimable(&sgx_global_lru,
> > epc_page);
> >                 spin_unlock(&sgx_global_lru.lock);
> >  
> >                 kref_put(&encl_page->encl->refcount,
> > sgx_encl_release);
> > -
> > -               chunk[i] = NULL;
> >         }
> >  
> > -       for (i = 0; i < cnt; i++) {
> > -               epc_page = chunk[i];
> > -               if (epc_page)
> > -                       sgx_reclaimer_block(epc_page);
> > -       }
> > -
> > -       for (i = 0; i < cnt; i++) {
> > -               epc_page = chunk[i];
> > -               if (!epc_page)
> > -                       continue;
> > -
> > +       list_for_each_entry(epc_page, &iso, list)
> > +               sgx_reclaimer_block(epc_page);
> > +
> > +       i = 0;
> > +       list_for_each_entry_safe(epc_page, tmp, &iso, list) {
> >                 encl_page = epc_page->encl_owner;
> > -               sgx_reclaimer_write(epc_page, &backing[i]);
> > +               sgx_reclaimer_write(epc_page, &backing[i++]);
> >  
> >                 kref_put(&encl_page->encl->refcount,
> > sgx_encl_release);
> >                 epc_page->flags &= ~(SGX_EPC_PAGE_RECLAIMER_TRACKED
> > |
> > -                                   
> > SGX_EPC_PAGE_RECLAIM_IN_PROGRESS);
> > +                                   
> > SGX_EPC_PAGE_RECLAIM_IN_PROGRESS |
> > +                                    SGX_EPC_PAGE_ENCLAVE |
> > +                                    SGX_EPC_PAGE_VERSION_ARRAY);
> >  
> >                 sgx_free_epc_page(epc_page);
> >         }
> > @@ -505,6 +506,7 @@ struct sgx_epc_page *__sgx_alloc_epc_page(void)
> >  /**
> >   * sgx_record_epc_page() - Add a page to the LRU tracking
> >   * @page:      EPC page
> > + * @flags:     Reclaim flags for the page.
> >   *
> >   * Mark a page with the specified flags and add it to the
> > appropriate
> >   * (un)reclaimable list.
> > @@ -535,18 +537,19 @@ void sgx_record_epc_page(struct sgx_epc_page
> > *page, unsigned long flags)
> >  int sgx_drop_epc_page(struct sgx_epc_page *page)
> >  {
> >         spin_lock(&sgx_global_lru.lock);
> > -       if (page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED) {
> > -               /* The page is being reclaimed. */
> > -               if (page->flags & SGX_EPC_PAGE_RECLAIM_IN_PROGRESS)
> > {
> > -                       spin_unlock(&sgx_global_lru.lock);
> > -                       return -EBUSY;
> > -               }
> > -
> > -               page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED;
> > +       if ((page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED) &&
> > +           (page->flags & SGX_EPC_PAGE_RECLAIM_IN_PROGRESS)) {
> > +               spin_unlock(&sgx_global_lru.lock);
> > +               return -EBUSY;
> >         }
> >         list_del(&page->list);
> >         spin_unlock(&sgx_global_lru.lock);
> >  
> > +       page->flags &= ~(SGX_EPC_PAGE_RECLAIMER_TRACKED |
> > +                        SGX_EPC_PAGE_RECLAIM_IN_PROGRESS |
> > +                        SGX_EPC_PAGE_ENCLAVE |
> > +                        SGX_EPC_PAGE_VERSION_ARRAY);
> > +
> >         return 0;
> >  }
> >  
> > diff --git a/arch/x86/kernel/cpu/sgx/sgx.h
> > b/arch/x86/kernel/cpu/sgx/sgx.h
> > index 37d66bc6ca27..ec8d567cd975 100644
> > --- a/arch/x86/kernel/cpu/sgx/sgx.h
> > +++ b/arch/x86/kernel/cpu/sgx/sgx.h
> > @@ -32,6 +32,8 @@
> >  #define SGX_EPC_PAGE_KVM_GUEST         BIT(2)
> >  /* page flag to indicate reclaim is in progress */
> >  #define SGX_EPC_PAGE_RECLAIM_IN_PROGRESS BIT(3)
> > +#define SGX_EPC_PAGE_ENCLAVE           BIT(4)
> > +#define SGX_EPC_PAGE_VERSION_ARRAY     BIT(5)
>
> Could you please spend some time to clearly document what each bit
> means?
>
> > +static inline void __sgx_epc_page_list_move(struct list_head
> > *list, struct sgx_epc_page *page)
> > +{
> > +       list_move_tail(&page->list, list);
> > +}
>
> I'm not sure I get the point of a helper like this.  Why not just
> have
> the caller call list_move() directly?
>
> >  /*
> >   * Must be called with queue lock acquired
> >   */
> > @@ -157,6 +167,38 @@ static inline void
> > sgx_epc_push_unreclaimable(struct sgx_epc_lru_lists *lrus,
> >         __sgx_epc_page_list_push(&(lrus)->unreclaimable, page);
> >  }
> >  
> > +/*
> > + * Must be called with queue lock acquired
> > + */
> > +static inline struct sgx_epc_page *
> > __sgx_epc_page_list_peek(struct list_head *list)
> > +{
> > +       struct sgx_epc_page *epc_page;
> > +
> > +       if (list_empty(list))
> > +               return NULL;
> > +
> > +       epc_page = list_first_entry(list, struct sgx_epc_page,
> > list);
> > +       return epc_page;
> > +}
>
> list_first_entry_or_null() perhaps?
>
> > +static inline struct sgx_epc_page *
> > +sgx_epc_peek_reclaimable(struct sgx_epc_lru_lists *lrus)
> > +{
> > +       return __sgx_epc_page_list_peek(&(lrus)->reclaimable);
> > +}
> > +
> > +static inline void sgx_epc_move_reclaimable(struct
> > sgx_epc_lru_lists *lru,
> > +                                           struct sgx_epc_page
> > *page)
> > +{
> > +       __sgx_epc_page_list_move(&(lru)->reclaimable, page);
> > +}
> > +
> > +static inline struct sgx_epc_page *
> > +sgx_epc_peek_unreclaimable(struct sgx_epc_lru_lists *lrus)
> > +{
> > +       return __sgx_epc_page_list_peek(&(lrus)->unreclaimable);
> > +}
>
> In general, I'm not becoming more fond of these helpers as the series
> goes along.  My worry is that they're an abstraction where we don't
> *really* need one.  I don't seem them growing much functionality as
> the
> series goes along.
>
> I'll reserve judgement until the end though.
>


The helpers were added because Jarrko requested a queue abstraction for
the sgx_epc_lru_lists data structure in the first round of reviews. the
simple one line inlines are effectively just renaming to make the queue
abstraction more obvious to the reader.