[PATCH bpf-next v4 01/30] mm: support nesting memalloc_use_memcg()

From: Roman Gushchin
Date: Fri Aug 21 2020 - 11:02:38 EST


From: Johannes Weiner <hannes@xxxxxxxxxxx>

Support nesting of memalloc_use_memcg() to be able to use
from an interrupt context.

Make memalloc_use_memcg() return the old memcg and convert existing
users to a stacking model. Delete the unused memalloc_unuse_memcg().

Roman: I've rephrased the original commit log, because it was
focused on the accounting problem related to loop devices. I made
it less specific, so it can work for bpf too. Also rebased to the
current state of the mm tree.

The original patch can be found here:
https://lkml.org/lkml/2020/5/28/806

Signed-off-by: Johannes Weiner <hannes@xxxxxxxxxxx>
Signed-off-by: Roman Gushchin <guro@xxxxxx>
---
fs/buffer.c | 6 +++---
fs/notify/fanotify/fanotify.c | 5 +++--
fs/notify/inotify/inotify_fsnotify.c | 5 +++--
include/linux/sched/mm.h | 28 +++++++++-------------------
mm/memcontrol.c | 6 +++---
5 files changed, 21 insertions(+), 29 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index 061dd202979d..97ef480db0da 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -842,13 +842,13 @@ struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
struct buffer_head *bh, *head;
gfp_t gfp = GFP_NOFS | __GFP_ACCOUNT;
long offset;
- struct mem_cgroup *memcg;
+ struct mem_cgroup *memcg, *old_memcg;

if (retry)
gfp |= __GFP_NOFAIL;

memcg = get_mem_cgroup_from_page(page);
- memalloc_use_memcg(memcg);
+ old_memcg = memalloc_use_memcg(memcg);

head = NULL;
offset = PAGE_SIZE;
@@ -867,7 +867,7 @@ struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
set_bh_page(bh, page, offset);
}
out:
- memalloc_unuse_memcg();
+ memalloc_use_memcg(old_memcg);
mem_cgroup_put(memcg);
return head;
/*
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index c942910a8649..0e59fa57f6d7 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -531,6 +531,7 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
struct inode *dirid = fanotify_dfid_inode(mask, data, data_type, dir);
const struct path *path = fsnotify_data_path(data, data_type);
unsigned int fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS);
+ struct mem_cgroup *old_memcg;
struct inode *child = NULL;
bool name_event = false;

@@ -580,7 +581,7 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
gfp |= __GFP_RETRY_MAYFAIL;

/* Whoever is interested in the event, pays for the allocation. */
- memalloc_use_memcg(group->memcg);
+ old_memcg = memalloc_use_memcg(group->memcg);

if (fanotify_is_perm_event(mask)) {
event = fanotify_alloc_perm_event(path, gfp);
@@ -608,7 +609,7 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
event->pid = get_pid(task_tgid(current));

out:
- memalloc_unuse_memcg();
+ memalloc_use_memcg(old_memcg);
return event;
}

diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index a65cf8c9f600..8017a51561c4 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -66,6 +66,7 @@ static int inotify_one_event(struct fsnotify_group *group, u32 mask,
int ret;
int len = 0;
int alloc_len = sizeof(struct inotify_event_info);
+ struct mem_cgroup *old_memcg;

if ((inode_mark->mask & FS_EXCL_UNLINK) &&
path && d_unlinked(path->dentry))
@@ -87,9 +88,9 @@ static int inotify_one_event(struct fsnotify_group *group, u32 mask,
* trigger OOM killer in the target monitoring memcg as it may have
* security repercussion.
*/
- memalloc_use_memcg(group->memcg);
+ old_memcg = memalloc_use_memcg(group->memcg);
event = kmalloc(alloc_len, GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
- memalloc_unuse_memcg();
+ memalloc_use_memcg(old_memcg);

if (unlikely(!event)) {
/*
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index f889e332912f..b8fde48d44a9 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -312,31 +312,21 @@ static inline void memalloc_nocma_restore(unsigned int flags)
* __GFP_ACCOUNT allocations till the end of the scope will be charged to the
* given memcg.
*
- * NOTE: This function is not nesting safe.
+ * NOTE: This function can nest. Users must save the return value and
+ * reset the previous value after their own charging scope is over
*/
-static inline void memalloc_use_memcg(struct mem_cgroup *memcg)
+static inline struct mem_cgroup *
+memalloc_use_memcg(struct mem_cgroup *memcg)
{
- WARN_ON_ONCE(current->active_memcg);
+ struct mem_cgroup *old = current->active_memcg;
current->active_memcg = memcg;
-}
-
-/**
- * memalloc_unuse_memcg - Ends the remote memcg charging scope.
- *
- * This function marks the end of the remote memcg charging scope started by
- * memalloc_use_memcg().
- */
-static inline void memalloc_unuse_memcg(void)
-{
- current->active_memcg = NULL;
+ return old;
}
#else
-static inline void memalloc_use_memcg(struct mem_cgroup *memcg)
-{
-}
-
-static inline void memalloc_unuse_memcg(void)
+static inline struct mem_cgroup *
+memalloc_use_memcg(struct mem_cgroup *memcg)
{
+ return NULL;
}
#endif

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b807952b4d43..b2468c80085d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5271,12 +5271,12 @@ static struct cgroup_subsys_state * __ref
mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
{
struct mem_cgroup *parent = mem_cgroup_from_css(parent_css);
- struct mem_cgroup *memcg;
+ struct mem_cgroup *memcg, *old_memcg;
long error = -ENOMEM;

- memalloc_use_memcg(parent);
+ old_memcg = memalloc_use_memcg(parent);
memcg = mem_cgroup_alloc();
- memalloc_unuse_memcg();
+ memalloc_use_memcg(old_memcg);
if (IS_ERR(memcg))
return ERR_CAST(memcg);

--
2.26.2