Re: [PATCH] Add slowpath enter/exit trace events

From: peter enderborg
Date: Thu Nov 23 2017 - 07:35:21 EST


Monitoring both enter/exit for all allocations and track down the one that are slow will be a very
big load on mobile devices or embedded device consuming a lot of battery and cpu. With this we can do useful monitoring
on devices on our field tests with real usage.

On 11/23/2017 01:25 PM, Michal Hocko wrote:
> On Thu 23-11-17 11:43:36, peter.enderborg@xxxxxxxx wrote:
>> From: Peter Enderborg <peter.enderborg@xxxxxxxx>
>>
>> The warning of slow allocation has been removed, this is
>> a other way to fetch that information. But you need
>> to enable the trace. The exit function also returns
>> information about the number of retries, how long
>> it was stalled and failure reason if that happened.
> I think this is just too excessive. We already have a tracepoint for the
> allocation exit. All we need is an entry to have a base to compare with.
> Another usecase would be to measure allocation latency. Information you
> are adding can be (partially) covered by existing tracepoints.
>
>> Signed-off-by: Peter Enderborg <peter.enderborg@xxxxxxxx>
>> ---
>> include/trace/events/kmem.h | 68 +++++++++++++++++++++++++++++++++++++++++++++
>> mm/page_alloc.c | 62 +++++++++++++++++++++++++++++++----------
>> 2 files changed, 116 insertions(+), 14 deletions(-)
>>
>> diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
>> index eb57e30..bb882ca 100644
>> --- a/include/trace/events/kmem.h
>> +++ b/include/trace/events/kmem.h
>> @@ -315,6 +315,74 @@ TRACE_EVENT(mm_page_alloc_extfrag,
>> __entry->change_ownership)
>> );
>>
>> +TRACE_EVENT(mm_page_alloc_slowpath_enter,
>> +
>> + TP_PROTO(int alloc_order,
>> + nodemask_t *nodemask,
>> + gfp_t gfp_flags),
>> +
>> + TP_ARGS(alloc_order, nodemask, gfp_flags),
>> +
>> + TP_STRUCT__entry(
>> + __field(int, alloc_order)
>> + __field(nodemask_t *, nodemask)
>> + __field(gfp_t, gfp_flags)
>> + ),
>> +
>> + TP_fast_assign(
>> + __entry->alloc_order = alloc_order;
>> + __entry->nodemask = nodemask;
>> + __entry->gfp_flags = gfp_flags;
>> + ),
>> +
>> + TP_printk("alloc_order=%d nodemask=%*pbl gfp_flags=%s",
>> + __entry->alloc_order,
>> + nodemask_pr_args(__entry->nodemask),
>> + show_gfp_flags(__entry->gfp_flags))
>> +);
>> +
>> +TRACE_EVENT(mm_page_alloc_slowpath_exit,
>> +
>> + TP_PROTO(struct page *page,
>> + int alloc_order,
>> + nodemask_t *nodemask,
>> + u64 alloc_start,
>> + gfp_t gfp_flags,
>> + int retrys,
>> + int exit),
>> +
>> + TP_ARGS(page, alloc_order, nodemask, alloc_start, gfp_flags,
>> + retrys, exit),
>> +
>> + TP_STRUCT__entry(__field(struct page *, page)
>> + __field(int, alloc_order)
>> + __field(nodemask_t *, nodemask)
>> + __field(u64, msdelay)
>> + __field(gfp_t, gfp_flags)
>> + __field(int, retrys)
>> + __field(int, exit)
>> + ),
>> +
>> + TP_fast_assign(
>> + __entry->page = page;
>> + __entry->alloc_order = alloc_order;
>> + __entry->nodemask = nodemask;
>> + __entry->msdelay = jiffies_to_msecs(jiffies-alloc_start);
>> + __entry->gfp_flags = gfp_flags;
>> + __entry->retrys = retrys;
>> + __entry->exit = exit;
>> + ),
>> +
>> + TP_printk("page=%p alloc_order=%d nodemask=%*pbl msdelay=%llu gfp_flags=%s retrys=%d exit=%d",
>> + __entry->page,
>> + __entry->alloc_order,
>> + nodemask_pr_args(__entry->nodemask),
>> + __entry->msdelay,
>> + show_gfp_flags(__entry->gfp_flags),
>> + __entry->retrys,
>> + __entry->exit)
>> +);
>> +
>> #endif /* _TRACE_KMEM_H */
>>
>> /* This part must be outside protection */
>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>> index 48b5b01..bae9cb9 100644
>> --- a/mm/page_alloc.c
>> +++ b/mm/page_alloc.c
>> @@ -104,6 +104,17 @@ DEFINE_PER_CPU(struct work_struct, pcpu_drain);
>> volatile unsigned long latent_entropy __latent_entropy;
>> EXPORT_SYMBOL(latent_entropy);
>> #endif
>> +enum slowpath_exit {
>> + SLOWPATH_NOZONE = -16,
>> + SLOWPATH_COMPACT_DEFERRED,
>> + SLOWPATH_CAN_NOT_DIRECT_RECLAIM,
>> + SLOWPATH_RECURSION,
>> + SLOWPATH_NO_RETRY,
>> + SLOWPATH_COSTLY_ORDER,
>> + SLOWPATH_OOM_VICTIM,
>> + SLOWPATH_NO_DIRECT_RECLAIM,
>> + SLOWPATH_ORDER
>> +};
>>
>> /*
>> * Array of node states.
>> @@ -3908,8 +3919,15 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>> enum compact_result compact_result;
>> int compaction_retries;
>> int no_progress_loops;
>> + unsigned long alloc_start = jiffies;
>> unsigned int cpuset_mems_cookie;
>> int reserve_flags;
>> + enum slowpath_exit slowpath_exit;
>> + int retry_count = 0;
>> +
>> + trace_mm_page_alloc_slowpath_enter(order,
>> + ac->nodemask,
>> + gfp_mask);
>>
>> /*
>> * In the slowpath, we sanity check order to avoid ever trying to
>> @@ -3919,7 +3937,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>> */
>> if (order >= MAX_ORDER) {
>> WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN));
>> - return NULL;
>> + slowpath_exit = SLOWPATH_ORDER;
>> + goto fail;
>> }
>>
>> /*
>> @@ -3951,8 +3970,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>> */
>> ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,
>> ac->high_zoneidx, ac->nodemask);
>> - if (!ac->preferred_zoneref->zone)
>> + if (!ac->preferred_zoneref->zone) {
>> + slowpath_exit = SLOWPATH_NOZONE;
>> goto nopage;
>> + }
>>
>> if (gfp_mask & __GFP_KSWAPD_RECLAIM)
>> wake_all_kswapds(order, ac);
>> @@ -3998,8 +4019,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>> * system, so we fail the allocation instead of entering
>> * direct reclaim.
>> */
>> - if (compact_result == COMPACT_DEFERRED)
>> + if (compact_result == COMPACT_DEFERRED) {
>> + slowpath_exit = SLOWPATH_COMPACT_DEFERRED;
>> goto nopage;
>> + }
>>
>> /*
>> * Looks like reclaim/compaction is worth trying, but
>> @@ -4011,6 +4034,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>> }
>>
>> retry:
>> + retry_count++;
>> /* Ensure kswapd doesn't accidentally go to sleep as long as we loop */
>> if (gfp_mask & __GFP_KSWAPD_RECLAIM)
>> wake_all_kswapds(order, ac);
>> @@ -4036,13 +4060,16 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>> goto got_pg;
>>
>> /* Caller is not willing to reclaim, we can't balance anything */
>> - if (!can_direct_reclaim)
>> + if (!can_direct_reclaim) {
>> + slowpath_exit = SLOWPATH_CAN_NOT_DIRECT_RECLAIM;
>> goto nopage;
>> + }
>>
>> /* Avoid recursion of direct reclaim */
>> - if (current->flags & PF_MEMALLOC)
>> + if (current->flags & PF_MEMALLOC) {
>> + slowpath_exit = SLOWPATH_RECURSION;
>> goto nopage;
>> -
>> + }
>> /* Try direct reclaim and then allocating */
>> page = __alloc_pages_direct_reclaim(gfp_mask, order, alloc_flags, ac,
>> &did_some_progress);
>> @@ -4056,16 +4083,18 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>> goto got_pg;
>>
>> /* Do not loop if specifically requested */
>> - if (gfp_mask & __GFP_NORETRY)
>> + if (gfp_mask & __GFP_NORETRY) {
>> + slowpath_exit = SLOWPATH_NO_RETRY;
>> goto nopage;
>> -
>> + }
>> /*
>> * Do not retry costly high order allocations unless they are
>> * __GFP_RETRY_MAYFAIL
>> */
>> - if (costly_order && !(gfp_mask & __GFP_RETRY_MAYFAIL))
>> + if (costly_order && !(gfp_mask & __GFP_RETRY_MAYFAIL)) {
>> + slowpath_exit = SLOWPATH_COSTLY_ORDER;
>> goto nopage;
>> -
>> + }
>> if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags,
>> did_some_progress > 0, &no_progress_loops))
>> goto retry;
>> @@ -4095,9 +4124,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>> /* Avoid allocations with no watermarks from looping endlessly */
>> if (tsk_is_oom_victim(current) &&
>> (alloc_flags == ALLOC_OOM ||
>> - (gfp_mask & __GFP_NOMEMALLOC)))
>> + (gfp_mask & __GFP_NOMEMALLOC))) {
>> + slowpath_exit = SLOWPATH_OOM_VICTIM;
>> goto nopage;
>> -
>> + }
>> /* Retry as long as the OOM killer is making progress */
>> if (did_some_progress) {
>> no_progress_loops = 0;
>> @@ -4118,9 +4148,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>> * All existing users of the __GFP_NOFAIL are blockable, so warn
>> * of any new users that actually require GFP_NOWAIT
>> */
>> - if (WARN_ON_ONCE(!can_direct_reclaim))
>> + if (WARN_ON_ONCE(!can_direct_reclaim)) {
>> + slowpath_exit = SLOWPATH_NO_DIRECT_RECLAIM;
>> goto fail;
>> -
>> + }
>> /*
>> * PF_MEMALLOC request from this context is rather bizarre
>> * because we cannot reclaim anything and only can loop waiting
>> @@ -4153,6 +4184,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
>> warn_alloc(gfp_mask, ac->nodemask,
>> "page allocation failure: order:%u", order);
>> got_pg:
>> + trace_mm_page_alloc_slowpath_exit(page, order, ac->nodemask,
>> + alloc_start, gfp_mask, retry_count, slowpath_exit);
>> +
>> return page;
>> }
>>
>> --
>> 2.7.4
>>
>> --
>> To unsubscribe, send a message with 'unsubscribe linux-mm' in
>> the body to majordomo@xxxxxxxxxx For more info on Linux MM,
>> see: http://www.linux-mm.org/ .
>> Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>