Re: [PATCH v3] drm/sched: Fix kernel NULL pointer dereference error
From: Christian König
Date: Tue Oct 18 2022 - 08:35:12 EST
Am 18.10.22 um 14:20 schrieb Yadav, Arvind:
[SNIP]
+ drm_sched_fence_finished(s_fence);
+ dma_fence_put(&s_fence->finished);
+ wake_up_interruptible(&sched->wake_up_worker);
+}
+
+int drm_sched_fence_add_parent_cb(struct dma_fence *fence,
+ struct drm_sched_fence *s_fence)
+{
+ return dma_fence_add_callback(fence, &s_fence->cb,
+ drm_sched_job_done_cb);
+}
+
+bool drm_sched_fence_remove_parent_cb(struct drm_sched_fence *s_fence)
+{
+ return dma_fence_remove_callback(s_fence->parent,
+ &s_fence->cb);
+}
Do we really need separate functions for that?
We can use 'drm_sched_fence_set_parent' but we need to add extra NULL
check before
adding in the callback otherwise add-callback will throw the warning
message every time.
If I add NULL check then will never get any callback warning message
for setting NULL parent fence.
So I have kept separate functions.
I rather prefer having a single function and allowing the parent fence
to be set to NULL.
Alternatively we could have a drm_sched_fence_set_parent() and
drm_sched_fence_clear_parent() function if you really think it's cleaner
that way.
atomic_dec(&sched->hw_rq_count);
@@ -576,15 +562,14 @@ void drm_sched_start(struct drm_gpu_scheduler
*sched, bool full_recovery)
continue;
if (fence) {
- r = dma_fence_add_callback(fence, &s_job->cb,
- drm_sched_job_done_cb);
+ r = drm_sched_fence_add_parent_cb(fence, s_job->s_fence);
if (r == -ENOENT)
- drm_sched_job_done(s_job);
+ drm_sched_job_done(s_job->s_fence);
else if (r)
DRM_DEV_ERROR(sched->dev, "fence add callback
failed (%d)\n",
Completely nuke that here. All of this should be done in the single
drm_sched_fence_set_parent() function.
And an error message is completely superfluous. We just need to
handle the case that the callback can't be installed because the
fence is already signaled.
I will do the changes as per your review comments, Thank you for the
review.
Just to clarify, you should nuke the error message. Error handling is
rather pointless here.
Thanks,
Christian.
Thanks,
~Arvind
Regards,
Christian.
r);
} else
- drm_sched_job_done(s_job);
+ drm_sched_job_done(s_job->s_fence);
}
if (full_recovery) {
@@ -1049,14 +1034,9 @@ static int drm_sched_main(void *param)
drm_sched_fence_scheduled(s_fence);
if (!IS_ERR_OR_NULL(fence)) {
- s_fence->parent = dma_fence_get(fence);
- /* Drop for original kref_init of the fence */
- dma_fence_put(fence);
-
- r = dma_fence_add_callback(fence, &sched_job->cb,
- drm_sched_job_done_cb);
+ r = drm_sched_fence_set_parent(fence, s_fence);
if (r == -ENOENT)
- drm_sched_job_done(sched_job);
+ drm_sched_job_done(s_fence);
else if (r)
DRM_DEV_ERROR(sched->dev, "fence add callback
failed (%d)\n",
r);
@@ -1064,7 +1044,7 @@ static int drm_sched_main(void *param)
if (IS_ERR(fence))
dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
- drm_sched_job_done(sched_job);
+ drm_sched_job_done(s_fence);
}
wake_up(&sched->job_scheduled);
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 1f7d9dd1a444..7258e2fa195f 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -281,6 +281,10 @@ struct drm_sched_fence {
* @owner: job owner for debugging
*/
void *owner;
+ /**
+ * @cb: callback
+ */
+ struct dma_fence_cb cb;
};
struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f);
@@ -300,7 +304,6 @@ struct drm_sched_fence
*to_drm_sched_fence(struct dma_fence *f);
* be scheduled further.
* @s_priority: the priority of the job.
* @entity: the entity to which this job belongs.
- * @cb: the callback for the parent fence in s_fence.
*
* A job is created by the driver using drm_sched_job_init(), and
* should call drm_sched_entity_push_job() once it wants the
scheduler
@@ -325,7 +328,6 @@ struct drm_sched_job {
atomic_t karma;
enum drm_sched_priority s_priority;
struct drm_sched_entity *entity;
- struct dma_fence_cb cb;
/**
* @dependencies:
*
@@ -559,6 +561,12 @@ void drm_sched_fence_free(struct
drm_sched_fence *fence);
void drm_sched_fence_scheduled(struct drm_sched_fence *fence);
void drm_sched_fence_finished(struct drm_sched_fence *fence);
+int drm_sched_fence_add_parent_cb(struct dma_fence *fence,
+ struct drm_sched_fence *s_fence);
+bool drm_sched_fence_remove_parent_cb(struct drm_sched_fence
*s_fence);
+int drm_sched_fence_set_parent(struct dma_fence *fence,
+ struct drm_sched_fence *s_fence);
+
unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler
*sched);
void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched,
unsigned long remaining);