Re: [PATCH mm-unstable v1 1/5] mm/kvm: add mmu_notifier_test_clear_young()

From: Yu Zhao
Date: Thu Feb 23 2023 - 12:41:02 EST


On Thu, Feb 23, 2023 at 10:14 AM Sean Christopherson <seanjc@xxxxxxxxxx> wrote:
>
> On Thu, Feb 16, 2023, Yu Zhao wrote:
> > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > index 9c60384b5ae0..1b465df4a93d 100644
> > --- a/virt/kvm/kvm_main.c
> > +++ b/virt/kvm/kvm_main.c
> > @@ -875,6 +875,63 @@ static int kvm_mmu_notifier_clear_young(struct mmu_notifier *mn,
> > return kvm_handle_hva_range_no_flush(mn, start, end, kvm_age_gfn);
> > }
> >
> > +static bool kvm_test_clear_young(struct kvm *kvm, unsigned long start,
> > + unsigned long end, unsigned long *bitmap)
> > +{
> > + int i;
> > + int key;
> > + bool success = true;
> > +
> > + trace_kvm_age_hva(start, end);
> > +
> > + key = srcu_read_lock(&kvm->srcu);
> > +
> > + for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
> > + struct interval_tree_node *node;
> > + struct kvm_memslots *slots = __kvm_memslots(kvm, i);
> > +
> > + kvm_for_each_memslot_in_hva_range(node, slots, start, end - 1) {
> > + gfn_t lsb_gfn;
> > + unsigned long hva_start, hva_end;
> > + struct kvm_gfn_range range = {
> > + .slot = container_of(node, struct kvm_memory_slot,
> > + hva_node[slots->node_idx]),
> > + };
> > +
> > + hva_start = max(start, range.slot->userspace_addr);
> > + hva_end = min(end - 1, range.slot->userspace_addr +
> > + range.slot->npages * PAGE_SIZE - 1);
> > +
> > + range.start = hva_to_gfn_memslot(hva_start, range.slot);
> > + range.end = hva_to_gfn_memslot(hva_end, range.slot) + 1;
> > +
> > + if (WARN_ON_ONCE(range.end <= range.start))
> > + continue;
>
> Extend __kvm_handle_hva_range() instead of copy-pasting. At a very quick glance,
> I believe all that is needed is (minus sanity checks):

Yes, will do.

I do need to add one more parameter to kvm_gfn_range, because that's
what the current kvm_arch_test_clear_young() needs, assuming that
function is acceptable.

Also, just a side note, from MM's POV, the following in
__kvm_handle_hva_range() seems to forget to handle end == 0, if that's
possible?

hva_end = min(range->end, slot->userspace_addr + (slot->npages <<
PAGE_SHIFT));

> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index d255964ec331..3296ae2cf6fa 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -544,6 +544,7 @@ struct kvm_hva_range {
> hva_handler_t handler;
> on_lock_fn_t on_lock;
> on_unlock_fn_t on_unlock;
> + bool lockless;
> bool flush_on_ret;
> bool may_block;
> };
> @@ -616,7 +617,7 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
> gfn_range.end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, slot);
> gfn_range.slot = slot;
>
> - if (!locked) {
> + if (!range->lockless && !locked) {
> locked = true;
> KVM_MMU_LOCK(kvm);
> if (!IS_KVM_NULL_FN(range->on_lock))
>
> > +
> > + /* see the comments on the generic kvm_arch_has_test_clear_young() */
> > + lsb_gfn = hva_to_gfn_memslot(end - 1, range.slot);
> > +
> > + success = kvm_arch_test_clear_young(kvm, &range, lsb_gfn, bitmap);
> > + if (!success)
> > + break;
> > + }
> > + }
> > +
> > + srcu_read_unlock(&kvm->srcu, key);
> > +
> > + return success;
> > +}