Re: [PATCH v2 1/4] mm: Fix multiple evaluvations of totalram_pages and managed_pages

From: Michal Hocko
Date: Wed Nov 07 2018 - 03:20:44 EST

Next message: jianchao.wang: "Re: [LKP] a9f38e1dec [ 245.678853] INFO: task mount:580 blocked for more than 120 seconds."
Previous message: Christoph Hellwig: "Re: [PATCH 1/2] pci: prevent sk hynix nvme from entering D3"
In reply to: Arun KS: "[PATCH v2 1/4] mm: Fix multiple evaluvations of totalram_pages and managed_pages"
Next in thread: Vlastimil Babka: "Re: [PATCH v2 1/4] mm: Fix multiple evaluvations of totalram_pages and managed_pages"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

On Tue 06-11-18 21:51:47, Arun KS wrote:
> This patch is in preparation to a later patch which converts totalram_pages
> and zone->managed_pages to atomic variables. This patch does not introduce
> any functional changes.

I forgot to comment on this one. The patch makes a lot of sense. But I
would be little bit more conservative and won't claim "no functional
changes". As things stand now multiple reads in the same function are
racy (without holding the lock). I do not see any example of an
obviously harmful case but claiming the above is too strong of a
statement. I would simply go with something like "Please note that
re-reading the value might lead to a different value and as such it
could lead to unexpected behavior. There are no known bugs as a result
of the current code but it is better to prevent from them in principle."

> Signed-off-by: Arun KS <arunks@xxxxxxxxxxxxxx>
> Reviewed-by: Konstantin Khlebnikov <khlebnikov@xxxxxxxxxxxxxx>

Other than that
Acked-by: Michal Hocko <mhocko@xxxxxxxx>

> ---
> arch/um/kernel/mem.c | 3 +--
> arch/x86/kernel/cpu/microcode/core.c | 5 +++--
> drivers/hv/hv_balloon.c | 19 ++++++++++---------
> fs/file_table.c | 7 ++++---
> kernel/fork.c | 5 +++--
> kernel/kexec_core.c | 5 +++--
> mm/page_alloc.c | 5 +++--
> mm/shmem.c | 3 ++-
> net/dccp/proto.c | 7 ++++---
> net/netfilter/nf_conntrack_core.c | 7 ++++---
> net/netfilter/xt_hashlimit.c | 5 +++--
> net/sctp/protocol.c | 7 ++++---
> 12 files changed, 44 insertions(+), 34 deletions(-)
>
> diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
> index 1067469..134d3fd 100644
> --- a/arch/um/kernel/mem.c
> +++ b/arch/um/kernel/mem.c
> @@ -51,8 +51,7 @@ void __init mem_init(void)
>
> /* this will put all low memory onto the freelists */
> memblock_free_all();
> - max_low_pfn = totalram_pages;
> - max_pfn = totalram_pages;
> + max_pfn = max_low_pfn = totalram_pages;
> mem_init_print_info(NULL);
> kmalloc_ok = 1;
> }
> diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
> index 2637ff0..99c67ca 100644
> --- a/arch/x86/kernel/cpu/microcode/core.c
> +++ b/arch/x86/kernel/cpu/microcode/core.c
> @@ -434,9 +434,10 @@ static ssize_t microcode_write(struct file *file, const char __user *buf,
> size_t len, loff_t *ppos)
> {
> ssize_t ret = -EINVAL;
> + unsigned long totalram_pgs = totalram_pages;
>
> - if ((len >> PAGE_SHIFT) > totalram_pages) {
> - pr_err("too much data (max %ld pages)\n", totalram_pages);
> + if ((len >> PAGE_SHIFT) > totalram_pgs) {
> + pr_err("too much data (max %ld pages)\n", totalram_pgs);
> return ret;
> }
>
> diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
> index 4163151..cac4945 100644
> --- a/drivers/hv/hv_balloon.c
> +++ b/drivers/hv/hv_balloon.c
> @@ -1090,6 +1090,7 @@ static void process_info(struct hv_dynmem_device *dm, struct dm_info_msg *msg)
> static unsigned long compute_balloon_floor(void)
> {
> unsigned long min_pages;
> + unsigned long totalram_pgs = totalram_pages;
> #define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT))
> /* Simple continuous piecewiese linear function:
> * max MiB -> min MiB gradient
> @@ -1102,16 +1103,16 @@ static unsigned long compute_balloon_floor(void)
> * 8192 744 (1/16)
> * 32768 1512 (1/32)
> */
> - if (totalram_pages < MB2PAGES(128))
> - min_pages = MB2PAGES(8) + (totalram_pages >> 1);
> - else if (totalram_pages < MB2PAGES(512))
> - min_pages = MB2PAGES(40) + (totalram_pages >> 2);
> - else if (totalram_pages < MB2PAGES(2048))
> - min_pages = MB2PAGES(104) + (totalram_pages >> 3);
> - else if (totalram_pages < MB2PAGES(8192))
> - min_pages = MB2PAGES(232) + (totalram_pages >> 4);
> + if (totalram_pgs < MB2PAGES(128))
> + min_pages = MB2PAGES(8) + (totalram_pgs >> 1);
> + else if (totalram_pgs < MB2PAGES(512))
> + min_pages = MB2PAGES(40) + (totalram_pgs >> 2);
> + else if (totalram_pgs < MB2PAGES(2048))
> + min_pages = MB2PAGES(104) + (totalram_pgs >> 3);
> + else if (totalram_pgs < MB2PAGES(8192))
> + min_pages = MB2PAGES(232) + (totalram_pgs >> 4);
> else
> - min_pages = MB2PAGES(488) + (totalram_pages >> 5);
> + min_pages = MB2PAGES(488) + (totalram_pgs >> 5);
> #undef MB2PAGES
> return min_pages;
> }
> diff --git a/fs/file_table.c b/fs/file_table.c
> index e49af4c..6e3c088 100644
> --- a/fs/file_table.c
> +++ b/fs/file_table.c
> @@ -380,10 +380,11 @@ void __init files_init(void)
> void __init files_maxfiles_init(void)
> {
> unsigned long n;
> - unsigned long memreserve = (totalram_pages - nr_free_pages()) * 3/2;
> + unsigned long totalram_pgs = totalram_pages;
> + unsigned long memreserve = (totalram_pgs - nr_free_pages()) * 3/2;
>
> - memreserve = min(memreserve, totalram_pages - 1);
> - n = ((totalram_pages - memreserve) * (PAGE_SIZE / 1024)) / 10;
> + memreserve = min(memreserve, totalram_pgs - 1);
> + n = ((totalram_pgs - memreserve) * (PAGE_SIZE / 1024)) / 10;
>
> files_stat.max_files = max_t(unsigned long, n, NR_FILE);
> }
> diff --git a/kernel/fork.c b/kernel/fork.c
> index 07cddff..7823f31 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -739,15 +739,16 @@ void __init __weak arch_task_cache_init(void) { }
> static void set_max_threads(unsigned int max_threads_suggested)
> {
> u64 threads;
> + unsigned long totalram_pgs = totalram_pages;
>
> /*
> * The number of threads shall be limited such that the thread
> * structures may only consume a small part of the available memory.
> */
> - if (fls64(totalram_pages) + fls64(PAGE_SIZE) > 64)
> + if (fls64(totalram_pgs) + fls64(PAGE_SIZE) > 64)
> threads = MAX_THREADS;
> else
> - threads = div64_u64((u64) totalram_pages * (u64) PAGE_SIZE,
> + threads = div64_u64((u64) totalram_pgs * (u64) PAGE_SIZE,
> (u64) THREAD_SIZE * 8UL);
>
> if (threads > max_threads_suggested)
> diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
> index 86ef06d..dff217c 100644
> --- a/kernel/kexec_core.c
> +++ b/kernel/kexec_core.c
> @@ -152,6 +152,7 @@ int sanity_check_segment_list(struct kimage *image)
> int i;
> unsigned long nr_segments = image->nr_segments;
> unsigned long total_pages = 0;
> + unsigned long totalram_pgs = totalram_pages;
>
> /*
> * Verify we have good destination addresses. The caller is
> @@ -217,13 +218,13 @@ int sanity_check_segment_list(struct kimage *image)
> * wasted allocating pages, which can cause a soft lockup.
> */
> for (i = 0; i < nr_segments; i++) {
> - if (PAGE_COUNT(image->segment[i].memsz) > totalram_pages / 2)
> + if (PAGE_COUNT(image->segment[i].memsz) > totalram_pgs / 2)
> return -EINVAL;
>
> total_pages += PAGE_COUNT(image->segment[i].memsz);
> }
>
> - if (total_pages > totalram_pages / 2)
> + if (total_pages > totalram_pgs / 2)
> return -EINVAL;
>
> /*
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index a919ba5..173312b 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -7245,6 +7245,7 @@ static void calculate_totalreserve_pages(void)
> for (i = 0; i < MAX_NR_ZONES; i++) {
> struct zone *zone = pgdat->node_zones + i;
> long max = 0;
> + unsigned long managed_pages = zone->managed_pages;
>
> /* Find valid and maximum lowmem_reserve in the zone */
> for (j = i; j < MAX_NR_ZONES; j++) {
> @@ -7255,8 +7256,8 @@ static void calculate_totalreserve_pages(void)
> /* we treat the high watermark as reserved pages. */
> max += high_wmark_pages(zone);
>
> - if (max > zone->managed_pages)
> - max = zone->managed_pages;
> + if (max > managed_pages)
> + max = managed_pages;
>
> pgdat->totalreserve_pages += max;
>
> diff --git a/mm/shmem.c b/mm/shmem.c
> index ea26d7a..6b91eab 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -114,7 +114,8 @@ static unsigned long shmem_default_max_blocks(void)
>
> static unsigned long shmem_default_max_inodes(void)
> {
> - return min(totalram_pages - totalhigh_pages, totalram_pages / 2);
> + unsigned long totalram_pgs = totalram_pages;
> + return min(totalram_pgs - totalhigh_pages, totalram_pgs / 2);
> }
> #endif
>
> diff --git a/net/dccp/proto.c b/net/dccp/proto.c
> index 43733ac..f27daa1 100644
> --- a/net/dccp/proto.c
> +++ b/net/dccp/proto.c
> @@ -1131,6 +1131,7 @@ static inline void dccp_mib_exit(void)
> static int __init dccp_init(void)
> {
> unsigned long goal;
> + unsigned long totalram_pgs = totalram_pages;
> int ehash_order, bhash_order, i;
> int rc;
>
> @@ -1154,10 +1155,10 @@ static int __init dccp_init(void)
> *
> * The methodology is similar to that of the buffer cache.
> */
> - if (totalram_pages >= (128 * 1024))
> - goal = totalram_pages >> (21 - PAGE_SHIFT);
> + if (totalram_pgs >= (128 * 1024))
> + goal = totalram_pgs >> (21 - PAGE_SHIFT);
> else
> - goal = totalram_pages >> (23 - PAGE_SHIFT);
> + goal = totalram_pgs >> (23 - PAGE_SHIFT);
>
> if (thash_entries)
> goal = (thash_entries *
> diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
> index ca1168d..0b1801e 100644
> --- a/net/netfilter/nf_conntrack_core.c
> +++ b/net/netfilter/nf_conntrack_core.c
> @@ -2248,6 +2248,7 @@ static __always_inline unsigned int total_extension_size(void)
>
> int nf_conntrack_init_start(void)
> {
> + unsigned long totalram_pgs = totalram_pages;
> int max_factor = 8;
> int ret = -ENOMEM;
> int i;
> @@ -2267,11 +2268,11 @@ int nf_conntrack_init_start(void)
> * >= 4GB machines have 65536 buckets.
> */
> nf_conntrack_htable_size
> - = (((totalram_pages << PAGE_SHIFT) / 16384)
> + = (((totalram_pgs << PAGE_SHIFT) / 16384)
> / sizeof(struct hlist_head));
> - if (totalram_pages > (4 * (1024 * 1024 * 1024 / PAGE_SIZE)))
> + if (totalram_pgs > (4 * (1024 * 1024 * 1024 / PAGE_SIZE)))
> nf_conntrack_htable_size = 65536;
> - else if (totalram_pages > (1024 * 1024 * 1024 / PAGE_SIZE))
> + else if (totalram_pgs > (1024 * 1024 * 1024 / PAGE_SIZE))
> nf_conntrack_htable_size = 16384;
> if (nf_conntrack_htable_size < 32)
> nf_conntrack_htable_size = 32;
> diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
> index 3e7d259..6cb9a74 100644
> --- a/net/netfilter/xt_hashlimit.c
> +++ b/net/netfilter/xt_hashlimit.c
> @@ -274,14 +274,15 @@ static int htable_create(struct net *net, struct hashlimit_cfg3 *cfg,
> struct xt_hashlimit_htable *hinfo;
> const struct seq_operations *ops;
> unsigned int size, i;
> + unsigned long totalram_pgs = totalram_pages;
> int ret;
>
> if (cfg->size) {
> size = cfg->size;
> } else {
> - size = (totalram_pages << PAGE_SHIFT) / 16384 /
> + size = (totalram_pgs << PAGE_SHIFT) / 16384 /
> sizeof(struct hlist_head);
> - if (totalram_pages > 1024 * 1024 * 1024 / PAGE_SIZE)
> + if (totalram_pgs > 1024 * 1024 * 1024 / PAGE_SIZE)
> size = 8192;
> if (size < 16)
> size = 16;
> diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
> index 9b277bd..7128f85 100644
> --- a/net/sctp/protocol.c
> +++ b/net/sctp/protocol.c
> @@ -1368,6 +1368,7 @@ static __init int sctp_init(void)
> int status = -EINVAL;
> unsigned long goal;
> unsigned long limit;
> + unsigned long totalram_pages;
> int max_share;
> int order;
> int num_entries;
> @@ -1426,10 +1427,10 @@ static __init int sctp_init(void)
> * The methodology is similar to that of the tcp hash tables.
> * Though not identical. Start by getting a goal size
> */
> - if (totalram_pages >= (128 * 1024))
> - goal = totalram_pages >> (22 - PAGE_SHIFT);
> + if (totalram_pgs >= (128 * 1024))
> + goal = totalram_pgs >> (22 - PAGE_SHIFT);
> else
> - goal = totalram_pages >> (24 - PAGE_SHIFT);
> + goal = totalram_pgs >> (24 - PAGE_SHIFT);
>
> /* Then compute the page order for said goal */
> order = get_order(goal);
> --
> 1.9.1

--
Michal Hocko
SUSE Labs

Next message: jianchao.wang: "Re: [LKP] a9f38e1dec [ 245.678853] INFO: task mount:580 blocked for more than 120 seconds."
Previous message: Christoph Hellwig: "Re: [PATCH 1/2] pci: prevent sk hynix nvme from entering D3"
In reply to: Arun KS: "[PATCH v2 1/4] mm: Fix multiple evaluvations of totalram_pages and managed_pages"
Next in thread: Vlastimil Babka: "Re: [PATCH v2 1/4] mm: Fix multiple evaluvations of totalram_pages and managed_pages"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]