Re: [PATCH] Make SPLIT_RSS_COUNTING configurable

From: Qian Cai
Date: Wed Oct 02 2019 - 21:56:11 EST




> On Oct 2, 2019, at 4:29 PM, Daniel Colascione <dancol@xxxxxxxxxx> wrote:
>
> Adding the correct linux-mm address.
>
>
>> On Wed, Oct 2, 2019 at 1:25 PM Daniel Colascione <dancol@xxxxxxxxxx> wrote:
>>
>> Using the new config option, users can disable SPLIT_RSS_COUNTING to
>> get increased accuracy in user-visible mm counters.
>>
>> Signed-off-by: Daniel Colascione <dancol@xxxxxxxxxx>
>> ---
>> include/linux/mm.h | 4 ++--
>> include/linux/mm_types_task.h | 5 ++---
>> include/linux/sched.h | 2 +-
>> kernel/fork.c | 2 +-
>> mm/Kconfig | 11 +++++++++++
>> mm/memory.c | 6 +++---
>> 6 files changed, 20 insertions(+), 10 deletions(-)
>>
>> diff --git a/include/linux/mm.h b/include/linux/mm.h
>> index cc292273e6ba..221395de3cb4 100644
>> --- a/include/linux/mm.h
>> +++ b/include/linux/mm.h
>> @@ -1637,7 +1637,7 @@ static inline unsigned long get_mm_counter(struct mm_struct *mm, int member)
>> {
>> long val = atomic_long_read(&mm->rss_stat.count[member]);
>>
>> -#ifdef SPLIT_RSS_COUNTING
>> +#ifdef CONFIG_SPLIT_RSS_COUNTING
>> /*
>> * counter is updated in asynchronous manner and may go to minus.
>> * But it's never be expected number for users.
>> @@ -1723,7 +1723,7 @@ static inline void setmax_mm_hiwater_rss(unsigned long *maxrss,
>> *maxrss = hiwater_rss;
>> }
>>
>> -#if defined(SPLIT_RSS_COUNTING)
>> +#ifdef CONFIG_SPLIT_RSS_COUNTING
>> void sync_mm_rss(struct mm_struct *mm);
>> #else
>> static inline void sync_mm_rss(struct mm_struct *mm)
>> diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h
>> index c1bc6731125c..d2adc8057e65 100644
>> --- a/include/linux/mm_types_task.h
>> +++ b/include/linux/mm_types_task.h
>> @@ -48,14 +48,13 @@ enum {
>> NR_MM_COUNTERS
>> };
>>
>> -#if USE_SPLIT_PTE_PTLOCKS && defined(CONFIG_MMU)
>> -#define SPLIT_RSS_COUNTING
>> +#ifdef CONFIG_SPLIT_RSS_COUNTING
>> /* per-thread cached information, */
>> struct task_rss_stat {
>> int events; /* for synchronization threshold */
>> int count[NR_MM_COUNTERS];
>> };
>> -#endif /* USE_SPLIT_PTE_PTLOCKS */
>> +#endif /* CONFIG_SPLIT_RSS_COUNTING */
>>
>> struct mm_rss_stat {
>> atomic_long_t count[NR_MM_COUNTERS];
>> diff --git a/include/linux/sched.h b/include/linux/sched.h
>> index 2c2e56bd8913..22f354774540 100644
>> --- a/include/linux/sched.h
>> +++ b/include/linux/sched.h
>> @@ -729,7 +729,7 @@ struct task_struct {
>> /* Per-thread vma caching: */
>> struct vmacache vmacache;
>>
>> -#ifdef SPLIT_RSS_COUNTING
>> +#ifdef CONFIG_SPLIT_RSS_COUNTING
>> struct task_rss_stat rss_stat;
>> #endif
>> int exit_state;
>> diff --git a/kernel/fork.c b/kernel/fork.c
>> index f9572f416126..fc5e0889922b 100644
>> --- a/kernel/fork.c
>> +++ b/kernel/fork.c
>> @@ -1917,7 +1917,7 @@ static __latent_entropy struct task_struct *copy_process(
>> p->vtime.state = VTIME_INACTIVE;
>> #endif
>>
>> -#if defined(SPLIT_RSS_COUNTING)
>> +#ifdef CONFIG_SPLIT_RSS_COUNTING
>> memset(&p->rss_stat, 0, sizeof(p->rss_stat));
>> #endif
>>
>> diff --git a/mm/Kconfig b/mm/Kconfig
>> index a5dae9a7eb51..372ef9449924 100644
>> --- a/mm/Kconfig
>> +++ b/mm/Kconfig
>> @@ -736,4 +736,15 @@ config ARCH_HAS_PTE_SPECIAL
>> config ARCH_HAS_HUGEPD
>> bool
>>
>> +config SPLIT_RSS_COUNTING
>> + bool "Per-thread mm counter caching"
>> + depends on MMU
>> + default y if NR_CPUS >= SPLIT_PTLOCK_CPUS
>> + help
>> + Cache mm counter updates in thread structures and
>> + flush them to visible per-process statistics in batches.
>> + Say Y here to slightly reduce cache contention in processes
>> + with many threads at the expense of decreasing the accuracy
>> + of memory statistics in /proc.
>> +
>> endmenu

All those vague words are going to make developers almost impossible to decide the right selection here. It sounds like we should kill SPLIT_RSS_COUNTING at all to simplify the code as the benefit is so small vs the side-effect?

>> diff --git a/mm/memory.c b/mm/memory.c
>> index b1ca51a079f2..bf557ed5ba23 100644
>> --- a/mm/memory.c
>> +++ b/mm/memory.c
>> @@ -141,7 +141,7 @@ static int __init init_zero_pfn(void)
>> core_initcall(init_zero_pfn);
>>
>>
>> -#if defined(SPLIT_RSS_COUNTING)
>> +#ifdef CONFIG_SPLIT_RSS_COUNTING
>>
>> void sync_mm_rss(struct mm_struct *mm)
>> {
>> @@ -177,7 +177,7 @@ static void check_sync_rss_stat(struct task_struct *task)
>> if (unlikely(task->rss_stat.events++ > TASK_RSS_EVENTS_THRESH))
>> sync_mm_rss(task->mm);
>> }
>> -#else /* SPLIT_RSS_COUNTING */
>> +#else /* CONFIG_SPLIT_RSS_COUNTING */
>>
>> #define inc_mm_counter_fast(mm, member) inc_mm_counter(mm, member)
>> #define dec_mm_counter_fast(mm, member) dec_mm_counter(mm, member)
>> @@ -186,7 +186,7 @@ static void check_sync_rss_stat(struct task_struct *task)
>> {
>> }
>>
>> -#endif /* SPLIT_RSS_COUNTING */
>> +#endif /* CONFIG_SPLIT_RSS_COUNTING */
>>
>> /*
>> * Note: this doesn't free the actual pages themselves. That
>> --
>> 2.23.0.581.g78d2f28ef7-goog
>>