Re: [PATCH] mm, oom: distinguish blockable mode for mmu notifiers

From: Tetsuo Handa
Date: Sun Aug 26 2018 - 04:41:39 EST


On 2018/08/24 22:52, Michal Hocko wrote:
> @@ -180,11 +180,15 @@ void amdgpu_mn_unlock(struct amdgpu_mn *mn)
> */
> static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable)
> {
> - if (blockable)
> - mutex_lock(&amn->read_lock);
> - else if (!mutex_trylock(&amn->read_lock))
> - return -EAGAIN;
> -
> + /*
> + * We can take sleepable lock even on !blockable mode because
> + * read_lock is only ever take from this path and the notifier
> + * lock never really sleeps. In fact the only reason why the
> + * later is sleepable is because the notifier itself might sleep
> + * in amdgpu_mn_invalidate_node but blockable mode is handled
> + * before calling into that path.
> + */
> + mutex_lock(&amn->read_lock);
> if (atomic_inc_return(&amn->recursion) == 1)
> down_read_non_owner(&amn->lock);
> mutex_unlock(&amn->read_lock);
>

I'm not following. Why don't we need to do like below (given that
nobody except amdgpu_mn_read_lock() holds ->read_lock) because e.g.
drm_sched_fence_create() from drm_sched_job_init() from amdgpu_cs_submit()
is doing GFP_KERNEL memory allocation with ->lock held for write?

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index e55508b..e1cb344 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -64,8 +64,6 @@
* @node: hash table node to find structure by adev and mn
* @lock: rw semaphore protecting the notifier nodes
* @objects: interval tree containing amdgpu_mn_nodes
- * @read_lock: mutex for recursive locking of @lock
- * @recursion: depth of recursion
*
* Data for each amdgpu device and process address space.
*/
@@ -85,8 +83,6 @@ struct amdgpu_mn {
/* objects protected by lock */
struct rw_semaphore lock;
struct rb_root_cached objects;
- struct mutex read_lock;
- atomic_t recursion;
};

/**
@@ -181,14 +177,9 @@ void amdgpu_mn_unlock(struct amdgpu_mn *mn)
static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable)
{
if (blockable)
- mutex_lock(&amn->read_lock);
- else if (!mutex_trylock(&amn->read_lock))
+ down_read(&amn->lock);
+ else if (!down_read_trylock(&amn->lock))
return -EAGAIN;
-
- if (atomic_inc_return(&amn->recursion) == 1)
- down_read_non_owner(&amn->lock);
- mutex_unlock(&amn->read_lock);
-
return 0;
}

@@ -199,8 +190,7 @@ static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable)
*/
static void amdgpu_mn_read_unlock(struct amdgpu_mn *amn)
{
- if (atomic_dec_return(&amn->recursion) == 0)
- up_read_non_owner(&amn->lock);
+ up_read(&amn->lock);
}

/**
@@ -410,8 +400,6 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
amn->type = type;
amn->mn.ops = &amdgpu_mn_ops[type];
amn->objects = RB_ROOT_CACHED;
- mutex_init(&amn->read_lock);
- atomic_set(&amn->recursion, 0);

r = __mmu_notifier_register(&amn->mn, mm);
if (r)