[PATCH 2/4] drm/xe: Add fault injection for rebind worker -ENOSPC

From: Thomas Hellström

Date: Fri Jun 12 2026 - 09:54:44 EST


Add fault injection support using the kernel fault injection
infrastructure to inject -ENOSPC early in the success path of
preempt_rebind_work_func(), before xe_svm_notifier_lock() is taken,
testing the error handling paths without interference from real
resource exhaustion.

Injection is restricted to restartable VMs. When triggered, the
worker deactivates the VM (rebind_deactivated).
Upcoming patches will then also post an error event to userspace.

Enable via debugfs:

echo 1 > /sys/kernel/debug/dri/0/fail_rebind/times
echo 100 > /sys/kernel/debug/dri/0/fail_rebind/probability

Assisted-by: GitHub_Copilot:claude-sonnet-4.6
Signed-off-by: Thomas Hellström <thomas.hellstrom@xxxxxxxxxxxxxxx>
---
drivers/gpu/drm/xe/xe_debugfs.c | 4 +++-
drivers/gpu/drm/xe/xe_vm.c | 32 ++++++++++++++++++++++++++++++++
drivers/gpu/drm/xe/xe_vm.h | 5 +++++
3 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c
index 22b471303984..1a92c52ccd83 100644
--- a/drivers/gpu/drm/xe/xe_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_debugfs.c
@@ -35,8 +35,8 @@
#ifdef CONFIG_DRM_XE_DEBUG
#include "xe_bo_evict.h"
#include "xe_migrate.h"
-#include "xe_vm.h"
#endif
+#include "xe_vm.h"

DECLARE_FAULT_ATTR(gt_reset_failure);
DECLARE_FAULT_ATTR(inject_csc_hw_error);
@@ -612,6 +612,8 @@ void xe_debugfs_register(struct xe_device *xe)

fault_create_debugfs_attr("fail_gt_reset", root, &gt_reset_failure);

+ xe_vm_debugfs_register(root);
+
if (IS_SRIOV_PF(xe))
xe_sriov_pf_debugfs_register(xe, root);
else if (IS_SRIOV_VF(xe))
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 86ed8f31a219..b69a2e5bd9c9 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -18,6 +18,9 @@
#include <linux/kthread.h>
#include <linux/mm.h>
#include <linux/swap.h>
+#ifdef CONFIG_DEBUG_FS
+#include <linux/debugfs.h>
+#endif

#include <generated/xe_wa_oob.h>

@@ -43,6 +46,17 @@
#include "xe_vm_madvise.h"
#include "xe_wa.h"

+#ifdef CONFIG_FAULT_INJECTION
+static DECLARE_FAULT_ATTR(rebind_enospc);
+
+static void xe_vm_register_fault_attrs(struct dentry *root)
+{
+ fault_create_debugfs_attr("fail_rebind", root, &rebind_enospc);
+}
+#else
+static inline void xe_vm_register_fault_attrs(struct dentry *root) {}
+#endif
+
static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
{
return vm->gpuvm.r_obj;
@@ -529,6 +543,13 @@ static void preempt_rebind_work_func(struct work_struct *w)
goto out_unlock;
}

+#ifdef CONFIG_FAULT_INJECTION
+ if (xe_vm_is_restartable(vm) && should_fail(&rebind_enospc, 1)) {
+ err = -ENOSPC;
+ goto out_unlock;
+ }
+#endif
+
#define retry_required(__tries, __vm) \
(IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \
(!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \
@@ -5042,3 +5063,14 @@ void xe_vm_remove_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
}
up_write(&vm->exec_queues.lock);
}
+
+#ifdef CONFIG_DEBUG_FS
+/**
+ * xe_vm_debugfs_register() - Register xe_vm debugfs entries
+ * @root: debugfs root dentry for this device
+ */
+void xe_vm_debugfs_register(struct dentry *root)
+{
+ xe_vm_register_fault_attrs(root);
+}
+#endif
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index 9ee44599cacd..0f9a38d97bf6 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -216,6 +216,11 @@ int xe_vm_restart_ioctl(struct drm_device *dev, void *data,
struct drm_file *file);
void xe_vm_close_and_put(struct xe_vm *vm);

+#ifdef CONFIG_DEBUG_FS
+struct dentry;
+void xe_vm_debugfs_register(struct dentry *root);
+#endif
+
static inline bool xe_vm_in_fault_mode(struct xe_vm *vm)
{
return vm->flags & XE_VM_FLAG_FAULT_MODE;
--
2.54.0