Re: [PATCH] mm: thp: Add new kernel parameters transparent_hugepage_defrag/khugepaged_defrag

From: Vlastimil Babka
Date: Wed Jun 03 2020 - 07:17:51 EST



On 6/3/20 8:50 AM, Gavin Guo wrote:
> There is no way to set up the defrag options in boot time. And it's
> useful to set it up by default instead of making it work by a
> systemd/upstart service or put the command to set up defrag inside
> /etc/rc.local.
>
> Signed-off-by: Gavin Guo <gavin.guo@xxxxxxxxxxxxx>

Well, maybe isntead of adding these handlers, we could extend the new boot
parameter sysctl support (handling procfs /proc/sys/) to sysfs (/sys) as well,
as Eric already suggested? [1]

[1] https://lore.kernel.org/linux-api/87bloj2skm.fsf@xxxxxxxxxxxxxxxxxxxxx/

> ---
> .../admin-guide/kernel-parameters.txt | 18 ++++++++
> mm/huge_memory.c | 43 +++++++++++++++++++
> mm/khugepaged.c | 21 +++++++++
> 3 files changed, 82 insertions(+)
>
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index 6253849afac2..a9fd020d78db 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -2149,6 +2149,16 @@
> kgdbwait [KGDB] Stop kernel execution and enter the
> kernel debugger at the earliest opportunity.
>
> + khugepaged_defrag=
> + [KNL]
> + Format: { "0" | "1" }
> + 0 - disable the defrag
> + 1 - enable the defrag
> + Control the defrag efforts when generating the
> + transparent hugepages through khugepaged.
> + See Documentation/admin-guide/mm/transhuge.rst
> + for more details.
> +
> kmac= [MIPS] korina ethernet MAC address.
> Configure the RouterBoard 532 series on-chip
> Ethernet adapter MAC address.
> @@ -5146,6 +5156,14 @@
> See Documentation/admin-guide/mm/transhuge.rst
> for more details.
>
> + transparent_hugepage_defrag=
> + [KNL]
> + Format: [always|defer|defer+madvise|madvise|never]
> + Control the defrag efforts when generating the
> + transparent hugepages.
> + See Documentation/admin-guide/mm/transhuge.rst
> + for more details.
> +
> tsc= Disable clocksource stability checks for TSC.
> Format: <string>
> [x86] reliable: mark tsc clocksource as reliable, this
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 8091b780cd7a..86b20a3a1aac 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -481,6 +481,49 @@ static int __init setup_transparent_hugepage(char *str)
> }
> __setup("transparent_hugepage=", setup_transparent_hugepage);
>
> +static int __init setup_transparent_hugepage_defrag(char *str)
> +{
> + int ret = 0;
> + if (!str)
> + goto out;
> + if (!strcmp(str, "always")) {
> + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
> + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
> + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
> + set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
> + ret = 1;
> + } else if (!strcmp(str, "defer+madvise")) {
> + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
> + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
> + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
> + set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
> + ret = 1;
> + } else if (!strcmp(str, "defer")) {
> + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
> + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
> + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
> + set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
> + ret = 1;
> + } else if (!strcmp(str, "madvise")) {
> + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
> + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
> + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
> + set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
> + ret = 1;
> + } else if (!strcmp(str, "never")) {
> + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags);
> + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags);
> + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags);
> + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags);
> + ret = 1;
> + }
> +out:
> + if (!ret)
> + pr_warn("transparent_hugepage_defrag= cannot parse, ignored\n");
> + return ret;
> +}
> +__setup("transparent_hugepage_defrag=", setup_transparent_hugepage_defrag);
> +
> pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
> {
> if (likely(vma->vm_flags & VM_WRITE))
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index b043c40a21d4..39bbf2107a23 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -394,6 +394,27 @@ int __init khugepaged_init(void)
> return 0;
> }
>
> +static int __init setup_khugepaged_defrag(char *str)
> +{
> + int ret = 0;
> + if (!str)
> + goto out;
> + if (!strcmp(str, "0")) {
> + clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG,
> + &transparent_hugepage_flags);
> + ret = 1;
> + } else if (!strcmp(str, "1")) {
> + set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG,
> + &transparent_hugepage_flags);
> + ret = 1;
> + }
> +out:
> + if (!ret)
> + pr_warn("khugepaged_defrag= cannot parse, ignored\n");
> + return ret;
> +}
> +__setup("khugepaged_defrag=", setup_khugepaged_defrag);
> +
> void __init khugepaged_destroy(void)
> {
> kmem_cache_destroy(mm_slot_cache);
>