Re: [PATCH RFC] proc/meminfo: add NetBuffers counter for socket buffers

From: Konstantin Khlebnikov
Date: Thu Jun 20 2019 - 08:45:37 EST


On 20.06.2019 15:03, Vlastimil Babka wrote:
On 5/15/19 1:55 PM, Konstantin Khlebnikov wrote:
Socket buffers always were dark-matter that lives by its own rules.

Is the information even exported somewhere e.g. in sysfs or via netlink yet?

in /proc/self/net/protocols

protocol size sockets memory press maxhdr slab module cl co di ac io in de sh ss gs se re sp bi br ha uh gp em
PACKET 1408 0 -1 NI 0 no kernel n n n n n n n n n n n n n n n n n n n
PINGv6 1088 0 -1 NI 0 yes kernel y y y n n y n n y y y y n y y y y y n
RAWv6 1088 0 -1 NI 0 yes kernel y y y n y y y n y y y y n y y y y n n
UDPLITEv6 1080 0 -1 NI 0 yes kernel y y y n y y y n y y y y n n y y y y n
UDPv6 1080 21 111 NI 0 yes kernel y y y n y n y n y y y y n n y y y y n
TCPv6 2048 49297 442697 no 304 yes kernel y y y y y y y y y y y y y n y y y y y
UNIX 1024 158 -1 NI 0 yes kernel n n n n n n n n n n n n n n n n n n n
UDP-Lite 920 0 -1 NI 0 yes kernel y y y n y y y n y y y y y n y y y y n
PING 880 0 -1 NI 0 yes kernel y y y n n y n n y y y y n y y y y y n
RAW 888 0 -1 NI 0 yes kernel y y y n y y y n y y y y n y y y y n n
UDP 920 0 111 NI 0 yes kernel y y y n y n y n y y y y y n y y y y n
TCP 1888 0 442697 no 304 yes kernel y y y y y y y y y y y y y n y y y y y
NETLINK 1040 1 -1 NI 0 no kernel n n n n n n n n n n n n n n n n n n n

column 'sockets' is virtualized, while 'memory' is not


This patch adds line NetBuffers that exposes most common kinds of them.

Did you encounter a situation where the number was significant and this
would help finding out why memory is occupied?

Might be. In example above tcp buffers are 1,7G.
This is real server, with 0.5T ram though.


TCP and UDP are most important species.
SCTP is added as example of modular protocol.
UNIX have no memory counter for now, should be easy to add.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@xxxxxxxxxxxxxx>

Right now it's a sum of a few values, which should be fine wrt
/proc/meminfo overhead. But I guess netdev guys should have a say in
this. Also you should update the corresponding Documentation/ file.

Later I send another proposal: even bigger sum - "MemKernel".
https://lore.kernel.org/linux-mm/155853600919.381.8172097084053782598.stgit@buzz/
Which gives estimation for all kinds of 'kernel' memory. It seems more useful for me.


Thanks,
Vlastimil

---
fs/proc/meminfo.c | 5 ++++-
include/linux/mm.h | 6 ++++++
mm/page_alloc.c | 3 ++-
net/core/sock.c | 20 ++++++++++++++++++++
net/sctp/socket.c | 2 +-
5 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 7bc14716fc5d..0ee2300a916d 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -41,6 +41,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
unsigned long sreclaimable, sunreclaim, misc_reclaimable;
unsigned long kernel_stack_kb, page_tables, percpu_pages;
unsigned long anon_pages, file_pages, swap_cached;
+ unsigned long net_buffers;
long kernel_misc;
int lru;
@@ -66,12 +67,13 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
kernel_stack_kb = global_zone_page_state(NR_KERNEL_STACK_KB);
page_tables = global_zone_page_state(NR_PAGETABLE);
percpu_pages = pcpu_nr_pages();
+ net_buffers = total_netbuffer_pages();
/* all other kinds of kernel memory allocations */
kernel_misc = i.totalram - i.freeram - anon_pages - file_pages
- sreclaimable - sunreclaim - misc_reclaimable
- (kernel_stack_kb >> (PAGE_SHIFT - 10))
- - page_tables - percpu_pages;
+ - page_tables - percpu_pages - net_buffers;
if (kernel_misc < 0)
kernel_misc = 0;
@@ -137,6 +139,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
show_val_kb(m, "VmallocUsed: ", 0ul);
show_val_kb(m, "VmallocChunk: ", 0ul);
show_val_kb(m, "Percpu: ", percpu_pages);
+ show_val_kb(m, "NetBuffers: ", net_buffers);
show_val_kb(m, "KernelMisc: ", kernel_misc);
#ifdef CONFIG_MEMORY_FAILURE
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0e8834ac32b7..d0a58355bfb7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2254,6 +2254,12 @@ extern void si_meminfo_node(struct sysinfo *val, int nid);
extern unsigned long arch_reserved_kernel_pages(void);
#endif
+#ifdef CONFIG_NET
+extern unsigned long total_netbuffer_pages(void);
+#else
+static inline unsigned long total_netbuffer_pages(void) { return 0; }
+#endif
+
extern __printf(3, 4)
void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3b13d3914176..fcdd7c6e72b9 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5166,7 +5166,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
" active_file:%lu inactive_file:%lu isolated_file:%lu\n"
" unevictable:%lu dirty:%lu writeback:%lu unstable:%lu\n"
" slab_reclaimable:%lu slab_unreclaimable:%lu\n"
- " mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n"
+ " mapped:%lu shmem:%lu pagetables:%lu bounce:%lu net_buffers:%lu\n"
" free:%lu free_pcp:%lu free_cma:%lu\n",
global_node_page_state(NR_ACTIVE_ANON),
global_node_page_state(NR_INACTIVE_ANON),
@@ -5184,6 +5184,7 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
global_node_page_state(NR_SHMEM),
global_zone_page_state(NR_PAGETABLE),
global_zone_page_state(NR_BOUNCE),
+ total_netbuffer_pages(),
global_zone_page_state(NR_FREE_PAGES),
free_pcp,
global_zone_page_state(NR_FREE_CMA_PAGES));
diff --git a/net/core/sock.c b/net/core/sock.c
index 75b1c950b49f..dfca4e024b74 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -142,6 +142,7 @@
#include <trace/events/sock.h>
#include <net/tcp.h>
+#include <net/udp.h>
#include <net/busy_poll.h>
static DEFINE_MUTEX(proto_list_mutex);
@@ -3573,3 +3574,22 @@ bool sk_busy_loop_end(void *p, unsigned long start_time)
}
EXPORT_SYMBOL(sk_busy_loop_end);
#endif /* CONFIG_NET_RX_BUSY_POLL */
+
+#if IS_ENABLED(CONFIG_IP_SCTP)
+atomic_long_t sctp_memory_allocated;
+EXPORT_SYMBOL_GPL(sctp_memory_allocated);
+#endif
+
+unsigned long total_netbuffer_pages(void)
+{
+ unsigned long ret = 0;
+
+#if IS_ENABLED(CONFIG_IP_SCTP)
+ ret += atomic_long_read(&sctp_memory_allocated);
+#endif
+#ifdef CONFIG_INET
+ ret += atomic_long_read(&tcp_memory_allocated);
+ ret += atomic_long_read(&udp_memory_allocated);
+#endif
+ return ret;
+}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index e4e892cc5644..9d11afdeeae4 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -107,7 +107,7 @@ static int sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
enum sctp_socket_type type);
static unsigned long sctp_memory_pressure;
-static atomic_long_t sctp_memory_allocated;
+extern atomic_long_t sctp_memory_allocated;
struct percpu_counter sctp_sockets_allocated;
static void sctp_enter_memory_pressure(struct sock *sk)