Re: [PATCH v2 07/18] x86/sgx: Use a list to track to-be-reclaimed pages during reclaim
From: Kristen Carlson Accardi
Date: Mon Dec 05 2022 - 11:34:32 EST
On Fri, 2022-12-02 at 14:33 -0800, Dave Hansen wrote:
> On 12/2/22 10:36, Kristen Carlson Accardi wrote:
> > From: Sean Christopherson <sean.j.christopherson@xxxxxxxxx>
> >
> > Change sgx_reclaim_pages() to use a list rather than an array for
> > storing the epc_pages which will be reclaimed. This change is
> > needed
> > to transition to the LRU implementation for EPC cgroup support.
> >
> > This change requires keeping track of whether newly recorded
> > EPC pages are pages for VA Arrays, or for Enclave data. In
> > addition,
> > helper functions are added to move pages from one list to another
> > and
> > enforce a consistent queue like behavior for the LRU lists.
>
> More changelog nit: Please use imperative voice, not passive voice.
> Move from:
>
> In addition, helper functions are added
>
> to:
>
> In addition, add helper functions
>
> > diff --git a/arch/x86/kernel/cpu/sgx/encl.c
> > b/arch/x86/kernel/cpu/sgx/encl.c
> > index 4683da9ef4f1..9ee306ac2a8e 100644
> > --- a/arch/x86/kernel/cpu/sgx/encl.c
> > +++ b/arch/x86/kernel/cpu/sgx/encl.c
> > @@ -252,7 +252,7 @@ static struct sgx_encl_page
> > *__sgx_encl_load_page(struct sgx_encl *encl,
> > epc_page = sgx_encl_eldu(&encl->secs, NULL);
> > if (IS_ERR(epc_page))
> > return ERR_CAST(epc_page);
> > - sgx_record_epc_page(epc_page, 0);
> > + sgx_record_epc_page(epc_page,
> > SGX_EPC_PAGE_ENCLAVE);
> > }
>
> This is one of those patches where the first hunk seems like it is
> entirely disconnected from what the changelog made me expect I would
> see.
>
> I don't see sgx_reclaim_pages(), or lists or arrays.
>
> If you need to pass additional data down into a function, then do
> *that*
> in a separate patch.
>
> I'm glad it eventually got fixed up, but I don't really ever like to
> see
> bare integers that don't have obvious meaning:
>
> sgx_record_epc_page(epc_page, 0);
>
> Even if you had:
>
> #define SGX_EPC_PAGE_RECLAIMER_UNTRACKED 0
>
> sgx_record_epc_page(epc_page,
> SGX_EPC_PAGE_RECLAIMER_UNTRACKED);
>
> makes a *LOT* of sense compared to other callers that do
>
> sgx_record_epc_page(epc_page,
> SGX_EPC_PAGE_RECLAIMER_TRACKED);
>
> > epc_page = sgx_encl_eldu(entry, encl->secs.epc_page);
> > @@ -260,7 +260,8 @@ static struct sgx_encl_page
> > *__sgx_encl_load_page(struct sgx_encl *encl,
> > return ERR_CAST(epc_page);
> >
> > encl->secs_child_cnt++;
> > - sgx_record_epc_page(entry->epc_page,
> > SGX_EPC_PAGE_RECLAIMER_TRACKED);
> > + sgx_record_epc_page(entry->epc_page,
> > + (SGX_EPC_PAGE_ENCLAVE |
> > SGX_EPC_PAGE_RECLAIMER_TRACKED));
> >
> > return entry;
> > }
> > @@ -1221,7 +1222,7 @@ struct sgx_epc_page *sgx_alloc_va_page(struct
> > sgx_encl *encl, bool reclaim)
> > sgx_encl_free_epc_page(epc_page);
> > return ERR_PTR(-EFAULT);
> > }
> > - sgx_record_epc_page(epc_page, 0);
> > + sgx_record_epc_page(epc_page, SGX_EPC_PAGE_VERSION_ARRAY);
> >
> > return epc_page;
> > }
> > diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c
> > b/arch/x86/kernel/cpu/sgx/ioctl.c
> > index aca80a3f38a1..c3a9bffbc37e 100644
> > --- a/arch/x86/kernel/cpu/sgx/ioctl.c
> > +++ b/arch/x86/kernel/cpu/sgx/ioctl.c
> > @@ -114,7 +114,7 @@ static int sgx_encl_create(struct sgx_encl
> > *encl, struct sgx_secs *secs)
> > encl->attributes = secs->attributes;
> > encl->attributes_mask = SGX_ATTR_DEBUG | SGX_ATTR_MODE64BIT
> > | SGX_ATTR_KSS;
> >
> > - sgx_record_epc_page(encl->secs.epc_page, 0);
> > + sgx_record_epc_page(encl->secs.epc_page,
> > SGX_EPC_PAGE_ENCLAVE);
> >
> > /* Set only after completion, as encl->lock has not been
> > taken. */
> > set_bit(SGX_ENCL_CREATED, &encl->flags);
> > @@ -325,7 +325,8 @@ static int sgx_encl_add_page(struct sgx_encl
> > *encl, unsigned long src,
> > goto err_out;
> > }
> >
> > - sgx_record_epc_page(encl_page->epc_page,
> > SGX_EPC_PAGE_RECLAIMER_TRACKED);
> > + sgx_record_epc_page(encl_page->epc_page,
> > + (SGX_EPC_PAGE_ENCLAVE |
> > SGX_EPC_PAGE_RECLAIMER_TRACKED));
> > mutex_unlock(&encl->lock);
> > mmap_read_unlock(current->mm);
> > return ret;
> > diff --git a/arch/x86/kernel/cpu/sgx/main.c
> > b/arch/x86/kernel/cpu/sgx/main.c
> > index bad72498b0a7..83aaf5cea7b9 100644
> > --- a/arch/x86/kernel/cpu/sgx/main.c
> > +++ b/arch/x86/kernel/cpu/sgx/main.c
> > @@ -288,37 +288,43 @@ static void sgx_reclaimer_write(struct
> > sgx_epc_page *epc_page,
> > */
> > static void __sgx_reclaim_pages(void)
> > {
> > - struct sgx_epc_page *chunk[SGX_NR_TO_SCAN];
> > struct sgx_backing backing[SGX_NR_TO_SCAN];
> > + struct sgx_epc_page *epc_page, *tmp;
> > struct sgx_encl_page *encl_page;
> > - struct sgx_epc_page *epc_page;
> > pgoff_t page_index;
> > - int cnt = 0;
> > + LIST_HEAD(iso);
> > int ret;
> > int i;
> >
> > spin_lock(&sgx_global_lru.lock);
> > for (i = 0; i < SGX_NR_TO_SCAN; i++) {
> > - epc_page =
> > sgx_epc_pop_reclaimable(&sgx_global_lru);
> > + epc_page =
> > sgx_epc_peek_reclaimable(&sgx_global_lru);
> > if (!epc_page)
> > break;
> >
> > encl_page = epc_page->encl_owner;
> >
> > + if (WARN_ON_ONCE(!(epc_page->flags &
> > SGX_EPC_PAGE_ENCLAVE)))
> > + continue;
> > +
> > if (kref_get_unless_zero(&encl_page->encl-
> > >refcount) != 0) {
> > epc_page->flags |=
> > SGX_EPC_PAGE_RECLAIM_IN_PROGRESS;
> > - chunk[cnt++] = epc_page;
> > + list_move_tail(&epc_page->list, &iso);
> > } else {
> > - /* The owner is freeing the page. No need
> > to add the
> > - * page back to the list of reclaimable
> > pages.
> > + /* The owner is freeing the page, remove it
> > from the
> > + * LRU list
> > */
> > epc_page->flags &=
> > ~SGX_EPC_PAGE_RECLAIMER_TRACKED;
> > + list_del_init(&epc_page->list);
> > }
> > }
> > spin_unlock(&sgx_global_lru.lock);
> >
> > - for (i = 0; i < cnt; i++) {
> > - epc_page = chunk[i];
> > + if (list_empty(&iso))
> > + return;
> > +
> > + i = 0;
> > + list_for_each_entry_safe(epc_page, tmp, &iso, list) {
> > encl_page = epc_page->encl_owner;
> >
> > if (!sgx_reclaimer_age(epc_page))
> > @@ -333,6 +339,7 @@ static void __sgx_reclaim_pages(void)
> > goto skip;
> > }
> >
> > + i++;
> > encl_page->desc |= SGX_ENCL_PAGE_BEING_RECLAIMED;
> > mutex_unlock(&encl_page->encl->lock);
> > continue;
> > @@ -340,31 +347,25 @@ static void __sgx_reclaim_pages(void)
> > skip:
> > spin_lock(&sgx_global_lru.lock);
> > epc_page->flags &=
> > ~SGX_EPC_PAGE_RECLAIM_IN_PROGRESS;
> > - sgx_epc_push_reclaimable(&sgx_global_lru,
> > epc_page);
> > + sgx_epc_move_reclaimable(&sgx_global_lru,
> > epc_page);
> > spin_unlock(&sgx_global_lru.lock);
> >
> > kref_put(&encl_page->encl->refcount,
> > sgx_encl_release);
> > -
> > - chunk[i] = NULL;
> > }
> >
> > - for (i = 0; i < cnt; i++) {
> > - epc_page = chunk[i];
> > - if (epc_page)
> > - sgx_reclaimer_block(epc_page);
> > - }
> > -
> > - for (i = 0; i < cnt; i++) {
> > - epc_page = chunk[i];
> > - if (!epc_page)
> > - continue;
> > -
> > + list_for_each_entry(epc_page, &iso, list)
> > + sgx_reclaimer_block(epc_page);
> > +
> > + i = 0;
> > + list_for_each_entry_safe(epc_page, tmp, &iso, list) {
> > encl_page = epc_page->encl_owner;
> > - sgx_reclaimer_write(epc_page, &backing[i]);
> > + sgx_reclaimer_write(epc_page, &backing[i++]);
> >
> > kref_put(&encl_page->encl->refcount,
> > sgx_encl_release);
> > epc_page->flags &= ~(SGX_EPC_PAGE_RECLAIMER_TRACKED
> > |
> > -
> > SGX_EPC_PAGE_RECLAIM_IN_PROGRESS);
> > +
> > SGX_EPC_PAGE_RECLAIM_IN_PROGRESS |
> > + SGX_EPC_PAGE_ENCLAVE |
> > + SGX_EPC_PAGE_VERSION_ARRAY);
> >
> > sgx_free_epc_page(epc_page);
> > }
> > @@ -505,6 +506,7 @@ struct sgx_epc_page *__sgx_alloc_epc_page(void)
> > /**
> > * sgx_record_epc_page() - Add a page to the LRU tracking
> > * @page: EPC page
> > + * @flags: Reclaim flags for the page.
> > *
> > * Mark a page with the specified flags and add it to the
> > appropriate
> > * (un)reclaimable list.
> > @@ -535,18 +537,19 @@ void sgx_record_epc_page(struct sgx_epc_page
> > *page, unsigned long flags)
> > int sgx_drop_epc_page(struct sgx_epc_page *page)
> > {
> > spin_lock(&sgx_global_lru.lock);
> > - if (page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED) {
> > - /* The page is being reclaimed. */
> > - if (page->flags & SGX_EPC_PAGE_RECLAIM_IN_PROGRESS)
> > {
> > - spin_unlock(&sgx_global_lru.lock);
> > - return -EBUSY;
> > - }
> > -
> > - page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED;
> > + if ((page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED) &&
> > + (page->flags & SGX_EPC_PAGE_RECLAIM_IN_PROGRESS)) {
> > + spin_unlock(&sgx_global_lru.lock);
> > + return -EBUSY;
> > }
> > list_del(&page->list);
> > spin_unlock(&sgx_global_lru.lock);
> >
> > + page->flags &= ~(SGX_EPC_PAGE_RECLAIMER_TRACKED |
> > + SGX_EPC_PAGE_RECLAIM_IN_PROGRESS |
> > + SGX_EPC_PAGE_ENCLAVE |
> > + SGX_EPC_PAGE_VERSION_ARRAY);
> > +
> > return 0;
> > }
> >
> > diff --git a/arch/x86/kernel/cpu/sgx/sgx.h
> > b/arch/x86/kernel/cpu/sgx/sgx.h
> > index 37d66bc6ca27..ec8d567cd975 100644
> > --- a/arch/x86/kernel/cpu/sgx/sgx.h
> > +++ b/arch/x86/kernel/cpu/sgx/sgx.h
> > @@ -32,6 +32,8 @@
> > #define SGX_EPC_PAGE_KVM_GUEST BIT(2)
> > /* page flag to indicate reclaim is in progress */
> > #define SGX_EPC_PAGE_RECLAIM_IN_PROGRESS BIT(3)
> > +#define SGX_EPC_PAGE_ENCLAVE BIT(4)
> > +#define SGX_EPC_PAGE_VERSION_ARRAY BIT(5)
>
> Could you please spend some time to clearly document what each bit
> means?
>
> > +static inline void __sgx_epc_page_list_move(struct list_head
> > *list, struct sgx_epc_page *page)
> > +{
> > + list_move_tail(&page->list, list);
> > +}
>
> I'm not sure I get the point of a helper like this. Why not just
> have
> the caller call list_move() directly?
>
> > /*
> > * Must be called with queue lock acquired
> > */
> > @@ -157,6 +167,38 @@ static inline void
> > sgx_epc_push_unreclaimable(struct sgx_epc_lru_lists *lrus,
> > __sgx_epc_page_list_push(&(lrus)->unreclaimable, page);
> > }
> >
> > +/*
> > + * Must be called with queue lock acquired
> > + */
> > +static inline struct sgx_epc_page *
> > __sgx_epc_page_list_peek(struct list_head *list)
> > +{
> > + struct sgx_epc_page *epc_page;
> > +
> > + if (list_empty(list))
> > + return NULL;
> > +
> > + epc_page = list_first_entry(list, struct sgx_epc_page,
> > list);
> > + return epc_page;
> > +}
>
> list_first_entry_or_null() perhaps?
>
> > +static inline struct sgx_epc_page *
> > +sgx_epc_peek_reclaimable(struct sgx_epc_lru_lists *lrus)
> > +{
> > + return __sgx_epc_page_list_peek(&(lrus)->reclaimable);
> > +}
> > +
> > +static inline void sgx_epc_move_reclaimable(struct
> > sgx_epc_lru_lists *lru,
> > + struct sgx_epc_page
> > *page)
> > +{
> > + __sgx_epc_page_list_move(&(lru)->reclaimable, page);
> > +}
> > +
> > +static inline struct sgx_epc_page *
> > +sgx_epc_peek_unreclaimable(struct sgx_epc_lru_lists *lrus)
> > +{
> > + return __sgx_epc_page_list_peek(&(lrus)->unreclaimable);
> > +}
>
> In general, I'm not becoming more fond of these helpers as the series
> goes along. My worry is that they're an abstraction where we don't
> *really* need one. I don't seem them growing much functionality as
> the
> series goes along.
>
> I'll reserve judgement until the end though.
>
The helpers were added because Jarrko requested a queue abstraction for
the sgx_epc_lru_lists data structure in the first round of reviews. the
simple one line inlines are effectively just renaming to make the queue
abstraction more obvious to the reader.