Re: [PATCH RFC v2 3/4] mm/ksm: introduce force_madvise knob

From: Aaron Tomlin
Date: Tue May 14 2019 - 09:24:32 EST


On Tue 2019-05-14 15:16 +0200, Oleksandr Natalenko wrote:
> Present a new sysfs knob to mark task's anonymous memory as mergeable.
>
> To force merging task's VMAs, its PID is echoed in a write-only file:
>
> # echo PID > /sys/kernel/mm/ksm/force_madvise
>
> Force unmerging is done similarly, but with "minus" sign:
>
> # echo -PID > /sys/kernel/mm/ksm/force_madvise
>
> "0" or "-0" can be used to control the current task.
>
> To achieve this, previously introduced ksm_enter()/ksm_leave() helpers
> are used in the "store" handler.
>
> Signed-off-by: Oleksandr Natalenko <oleksandr@xxxxxxxxxx>
> ---
> mm/ksm.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 68 insertions(+)
>
> diff --git a/mm/ksm.c b/mm/ksm.c
> index e9f3901168bb..22c59fb03d3a 100644
> --- a/mm/ksm.c
> +++ b/mm/ksm.c
> @@ -2879,10 +2879,77 @@ static void wait_while_offlining(void)
>
> #define KSM_ATTR_RO(_name) \
> static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
> +#define KSM_ATTR_WO(_name) \
> + static struct kobj_attribute _name##_attr = __ATTR_WO(_name)
> #define KSM_ATTR(_name) \
> static struct kobj_attribute _name##_attr = \
> __ATTR(_name, 0644, _name##_show, _name##_store)
>
> +static ssize_t force_madvise_store(struct kobject *kobj,
> + struct kobj_attribute *attr,
> + const char *buf, size_t count)
> +{
> + int err;
> + pid_t pid;
> + bool merge = true;
> + struct task_struct *tsk;
> + struct mm_struct *mm;
> + struct vm_area_struct *vma;
> +
> + err = kstrtoint(buf, 10, &pid);
> + if (err)
> + return -EINVAL;
> +
> + if (pid < 0) {
> + pid = abs(pid);
> + merge = false;
> + }
> +
> + if (!pid && *buf == '-')
> + merge = false;
> +
> + rcu_read_lock();
> + if (pid) {
> + tsk = find_task_by_vpid(pid);
> + if (!tsk) {
> + err = -ESRCH;
> + rcu_read_unlock();
> + goto out;
> + }
> + } else {
> + tsk = current;
> + }
> +
> + tsk = tsk->group_leader;
> +
> + get_task_struct(tsk);
> + rcu_read_unlock();
> +
> + mm = get_task_mm(tsk);
> + if (!mm) {
> + err = -EINVAL;
> + goto out_put_task_struct;
> + }
> + down_write(&mm->mmap_sem);
> + vma = mm->mmap;
> + while (vma) {
> + if (merge)
> + ksm_enter(vma->vm_mm, vma, &vma->vm_flags);
> + else
> + ksm_leave(vma, vma->vm_start, vma->vm_end, &vma->vm_flags);
> + vma = vma->vm_next;
> + }
> + up_write(&mm->mmap_sem);
> + mmput(mm);
> +
> +out_put_task_struct:
> + put_task_struct(tsk);
> +
> +out:
> + return err ? err : count;
> +}
> +KSM_ATTR_WO(force_madvise);
> +
> static ssize_t sleep_millisecs_show(struct kobject *kobj,
> struct kobj_attribute *attr, char *buf)
> {
> @@ -3185,6 +3252,7 @@ static ssize_t full_scans_show(struct kobject *kobj,
> KSM_ATTR_RO(full_scans);
>
> static struct attribute *ksm_attrs[] = {
> + &force_madvise_attr.attr,
> &sleep_millisecs_attr.attr,
> &pages_to_scan_attr.attr,
> &run_attr.attr,

Looks fine to me.

Reviewed-by: Aaron Tomlin <atomlin@xxxxxxxxxx>

--
Aaron Tomlin