[PATCH RFC 21/24] kfence: Enable capability analysis

From: Marco Elver
Date: Thu Feb 06 2025 - 13:28:19 EST


Enable capability analysis for the KFENCE subsystem.

Notable, kfence_handle_page_fault() required minor restructure, which
also fixed a subtle race; arguably that function is more readable now.

Signed-off-by: Marco Elver <elver@xxxxxxxxxx>
---
mm/kfence/Makefile | 2 ++
mm/kfence/core.c | 24 +++++++++++++++++-------
mm/kfence/kfence.h | 18 ++++++++++++------
mm/kfence/kfence_test.c | 4 ++++
mm/kfence/report.c | 8 ++++++--
5 files changed, 41 insertions(+), 15 deletions(-)

diff --git a/mm/kfence/Makefile b/mm/kfence/Makefile
index 2de2a58d11a1..b3640bdc3c69 100644
--- a/mm/kfence/Makefile
+++ b/mm/kfence/Makefile
@@ -1,5 +1,7 @@
# SPDX-License-Identifier: GPL-2.0

+CAPABILITY_ANALYSIS := y
+
obj-y := core.o report.o

CFLAGS_kfence_test.o := -fno-omit-frame-pointer -fno-optimize-sibling-calls
diff --git a/mm/kfence/core.c b/mm/kfence/core.c
index 102048821c22..c2d1ffd20a1f 100644
--- a/mm/kfence/core.c
+++ b/mm/kfence/core.c
@@ -7,6 +7,8 @@

#define pr_fmt(fmt) "kfence: " fmt

+disable_capability_analysis();
+
#include <linux/atomic.h>
#include <linux/bug.h>
#include <linux/debugfs.h>
@@ -34,6 +36,8 @@

#include <asm/kfence.h>

+enable_capability_analysis();
+
#include "kfence.h"

/* Disables KFENCE on the first warning assuming an irrecoverable error. */
@@ -132,8 +136,8 @@ struct kfence_metadata *kfence_metadata __read_mostly;
static struct kfence_metadata *kfence_metadata_init __read_mostly;

/* Freelist with available objects. */
-static struct list_head kfence_freelist = LIST_HEAD_INIT(kfence_freelist);
-static DEFINE_RAW_SPINLOCK(kfence_freelist_lock); /* Lock protecting freelist. */
+DEFINE_RAW_SPINLOCK(kfence_freelist_lock); /* Lock protecting freelist. */
+static struct list_head kfence_freelist __var_guarded_by(&kfence_freelist_lock) = LIST_HEAD_INIT(kfence_freelist);

/*
* The static key to set up a KFENCE allocation; or if static keys are not used
@@ -253,6 +257,7 @@ static bool kfence_unprotect(unsigned long addr)
}

static inline unsigned long metadata_to_pageaddr(const struct kfence_metadata *meta)
+ __must_hold(&meta->lock)
{
unsigned long offset = (meta - kfence_metadata + 1) * PAGE_SIZE * 2;
unsigned long pageaddr = (unsigned long)&__kfence_pool[offset];
@@ -288,6 +293,7 @@ static inline bool kfence_obj_allocated(const struct kfence_metadata *meta)
static noinline void
metadata_update_state(struct kfence_metadata *meta, enum kfence_object_state next,
unsigned long *stack_entries, size_t num_stack_entries)
+ __must_hold(&meta->lock)
{
struct kfence_track *track =
next == KFENCE_OBJECT_ALLOCATED ? &meta->alloc_track : &meta->free_track;
@@ -485,7 +491,7 @@ static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t g
alloc_covered_add(alloc_stack_hash, 1);

/* Set required slab fields. */
- slab = virt_to_slab((void *)meta->addr);
+ slab = virt_to_slab(addr);
slab->slab_cache = cache;
slab->objects = 1;

@@ -514,6 +520,7 @@ static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t g
static void kfence_guarded_free(void *addr, struct kfence_metadata *meta, bool zombie)
{
struct kcsan_scoped_access assert_page_exclusive;
+ u32 alloc_stack_hash;
unsigned long flags;
bool init;

@@ -546,9 +553,10 @@ static void kfence_guarded_free(void *addr, struct kfence_metadata *meta, bool z
/* Mark the object as freed. */
metadata_update_state(meta, KFENCE_OBJECT_FREED, NULL, 0);
init = slab_want_init_on_free(meta->cache);
+ alloc_stack_hash = meta->alloc_stack_hash;
raw_spin_unlock_irqrestore(&meta->lock, flags);

- alloc_covered_add(meta->alloc_stack_hash, -1);
+ alloc_covered_add(alloc_stack_hash, -1);

/* Check canary bytes for memory corruption. */
check_canary(meta);
@@ -593,6 +601,7 @@ static void rcu_guarded_free(struct rcu_head *h)
* which partial initialization succeeded.
*/
static unsigned long kfence_init_pool(void)
+ __no_capability_analysis
{
unsigned long addr;
struct page *pages;
@@ -1192,6 +1201,7 @@ bool kfence_handle_page_fault(unsigned long addr, bool is_write, struct pt_regs
{
const int page_index = (addr - (unsigned long)__kfence_pool) / PAGE_SIZE;
struct kfence_metadata *to_report = NULL;
+ unsigned long unprotected_page = 0;
enum kfence_error_type error_type;
unsigned long flags;

@@ -1225,9 +1235,8 @@ bool kfence_handle_page_fault(unsigned long addr, bool is_write, struct pt_regs
if (!to_report)
goto out;

- raw_spin_lock_irqsave(&to_report->lock, flags);
- to_report->unprotected_page = addr;
error_type = KFENCE_ERROR_OOB;
+ unprotected_page = addr;

/*
* If the object was freed before we took the look we can still
@@ -1239,7 +1248,6 @@ bool kfence_handle_page_fault(unsigned long addr, bool is_write, struct pt_regs
if (!to_report)
goto out;

- raw_spin_lock_irqsave(&to_report->lock, flags);
error_type = KFENCE_ERROR_UAF;
/*
* We may race with __kfence_alloc(), and it is possible that a
@@ -1251,6 +1259,8 @@ bool kfence_handle_page_fault(unsigned long addr, bool is_write, struct pt_regs

out:
if (to_report) {
+ raw_spin_lock_irqsave(&to_report->lock, flags);
+ to_report->unprotected_page = unprotected_page;
kfence_report_error(addr, is_write, regs, to_report, error_type);
raw_spin_unlock_irqrestore(&to_report->lock, flags);
} else {
diff --git a/mm/kfence/kfence.h b/mm/kfence/kfence.h
index dfba5ea06b01..27829d70baf6 100644
--- a/mm/kfence/kfence.h
+++ b/mm/kfence/kfence.h
@@ -9,6 +9,8 @@
#ifndef MM_KFENCE_KFENCE_H
#define MM_KFENCE_KFENCE_H

+disable_capability_analysis();
+
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
@@ -16,6 +18,8 @@

#include "../slab.h" /* for struct kmem_cache */

+enable_capability_analysis();
+
/*
* Get the canary byte pattern for @addr. Use a pattern that varies based on the
* lower 3 bits of the address, to detect memory corruptions with higher
@@ -34,6 +38,8 @@
/* Maximum stack depth for reports. */
#define KFENCE_STACK_DEPTH 64

+extern raw_spinlock_t kfence_freelist_lock;
+
/* KFENCE object states. */
enum kfence_object_state {
KFENCE_OBJECT_UNUSED, /* Object is unused. */
@@ -53,7 +59,7 @@ struct kfence_track {

/* KFENCE metadata per guarded allocation. */
struct kfence_metadata {
- struct list_head list; /* Freelist node; access under kfence_freelist_lock. */
+ struct list_head list __var_guarded_by(&kfence_freelist_lock); /* Freelist node. */
struct rcu_head rcu_head; /* For delayed freeing. */

/*
@@ -91,13 +97,13 @@ struct kfence_metadata {
* In case of an invalid access, the page that was unprotected; we
* optimistically only store one address.
*/
- unsigned long unprotected_page;
+ unsigned long unprotected_page __var_guarded_by(&lock);

/* Allocation and free stack information. */
- struct kfence_track alloc_track;
- struct kfence_track free_track;
+ struct kfence_track alloc_track __var_guarded_by(&lock);
+ struct kfence_track free_track __var_guarded_by(&lock);
/* For updating alloc_covered on frees. */
- u32 alloc_stack_hash;
+ u32 alloc_stack_hash __var_guarded_by(&lock);
#ifdef CONFIG_MEMCG
struct slabobj_ext obj_exts;
#endif
@@ -141,6 +147,6 @@ enum kfence_error_type {
void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *regs,
const struct kfence_metadata *meta, enum kfence_error_type type);

-void kfence_print_object(struct seq_file *seq, const struct kfence_metadata *meta);
+void kfence_print_object(struct seq_file *seq, const struct kfence_metadata *meta) __must_hold(&meta->lock);

#endif /* MM_KFENCE_KFENCE_H */
diff --git a/mm/kfence/kfence_test.c b/mm/kfence/kfence_test.c
index 00034e37bc9f..67eca6e9a8de 100644
--- a/mm/kfence/kfence_test.c
+++ b/mm/kfence/kfence_test.c
@@ -11,6 +11,8 @@
* Marco Elver <elver@xxxxxxxxxx>
*/

+disable_capability_analysis();
+
#include <kunit/test.h>
#include <linux/jiffies.h>
#include <linux/kernel.h>
@@ -26,6 +28,8 @@

#include <asm/kfence.h>

+enable_capability_analysis();
+
#include "kfence.h"

/* May be overridden by <asm/kfence.h>. */
diff --git a/mm/kfence/report.c b/mm/kfence/report.c
index 10e6802a2edf..bbee90d0034d 100644
--- a/mm/kfence/report.c
+++ b/mm/kfence/report.c
@@ -5,6 +5,8 @@
* Copyright (C) 2020, Google LLC.
*/

+disable_capability_analysis();
+
#include <linux/stdarg.h>

#include <linux/kernel.h>
@@ -22,6 +24,8 @@

#include <asm/kfence.h>

+enable_capability_analysis();
+
#include "kfence.h"

/* May be overridden by <asm/kfence.h>. */
@@ -106,6 +110,7 @@ static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries

static void kfence_print_stack(struct seq_file *seq, const struct kfence_metadata *meta,
bool show_alloc)
+ __must_hold(&meta->lock)
{
const struct kfence_track *track = show_alloc ? &meta->alloc_track : &meta->free_track;
u64 ts_sec = track->ts_nsec;
@@ -207,8 +212,6 @@ void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *r
if (WARN_ON(type != KFENCE_ERROR_INVALID && !meta))
return;

- if (meta)
- lockdep_assert_held(&meta->lock);
/*
* Because we may generate reports in printk-unfriendly parts of the
* kernel, such as scheduler code, the use of printk() could deadlock.
@@ -263,6 +266,7 @@ void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *r
stack_trace_print(stack_entries + skipnr, num_stack_entries - skipnr, 0);

if (meta) {
+ lockdep_assert_held(&meta->lock);
pr_err("\n");
kfence_print_object(NULL, meta);
}
--
2.48.1.502.g6dc24dfdaf-goog