Re: [PATCH v2 4/7] sysfs: Add SYSFS_HUGE_BIN_FILE flag for binary attributes larger than PAGE_SIZE

From: M K, Muralidhara

Date: Tue May 12 2026 - 02:35:39 EST


Hi Greg and rafael,
Could you please review and provide your feedback on this patch.

On 4/27/2026 9:21 PM, Muralidhara M K wrote:
Historically, sysfs read buffers were allocated with get_zeroed_page(),
limiting reads to PAGE_SIZE. Commit 13c589d5b0ac ("sysfs: use seq_file
when reading regular files") transitioned regular (text) attribute reads
to seq_file, which can dynamically grow buffers beyond PAGE_SIZE.
However, the PAGE_SIZE limit was intentionally preserved for
compatibility. When binary attribute handling was later unified into
the same codebase, the non-seq_file read path (kernfs_file_read_iter)
retained this PAGE_SIZE cap for binary files as well.

Drivers that expose binary attributes larger than PAGE_SIZE — such as
the AMD HSMP metric table (~13 KB) — cannot deliver the full content
in a single read() call through the existing path.

Introduce a new opt-in flag SYSFS_HUGE_BIN_FILE (040000) that drivers
can OR into their bin_attribute mode. When set, sysfs selects a new
kernfs_ops (sysfs_bin_kfops_huge_file_ro) whose .seq_show callback
pipes the bin_attribute ->read() result through seq_file, allowing
reads of arbitrary size in one shot. Existing binary attributes
without the flag continue using the legacy capped path.

Co-developed-by: Nayak K Prateek <kprateek.nayak@xxxxxxx>
Signed-off-by: Nayak K Prateek <kprateek.nayak@xxxxxxx>
Signed-off-by: Muralidhara M K <muralidhara.mk@xxxxxxx>
---
Changes v1->v2: New patch

fs/sysfs/file.c | 45 +++++++++++++++++++++++++++++++++++++++++++
fs/sysfs/group.c | 8 ++++----
include/linux/sysfs.h | 1 +
3 files changed, 50 insertions(+), 4 deletions(-)

diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 5709cede1d75..be42c3c1e056 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -38,6 +38,45 @@ static const struct sysfs_ops *sysfs_file_ops(struct kernfs_node *kn)
return kobj->ktype ? kobj->ktype->sysfs_ops : NULL;
}
+/*
+ * Reads on huge sysfs bin files are handled through seq_file, which
+ * takes care of hairy details like buffering and seeking. The
+ * following function pipes the bin_attribute ->read() result through
+ * seq_file so that reads larger than PAGE_SIZE work in one shot.
+ */
+static int sysfs_kf_huge_file_seq_show(struct seq_file *sf, void *v)
+{
+ struct kernfs_open_file *of = sf->private;
+ const struct bin_attribute *battr = of->kn->priv;
+ struct kobject *kobj = sysfs_file_kobj(of->kn);
+ loff_t size = file_inode(of->file)->i_size;
+ ssize_t count;
+ char *buf;
+
+ if (!battr->read)
+ return -EIO;
+
+ if (!size)
+ return -EIO;
+
+ /* acquire buffer and ensure that it's >= size */
+ count = seq_get_buf(sf, &buf);
+ if (count < size) {
+ seq_commit(sf, -1);
+ return 0;
+ }
+
+ memset(buf, 0, size);
+
+ count = battr->read(of->file, kobj, battr, buf, 0, size);
+ if (count < 0)
+ return count;
+
+ WARN_ON(count > size);
+ seq_commit(sf, min_t(ssize_t, count, size));
+ return 0;
+}
+
/*
* Reads on sysfs are handled through seq_file, which takes care of hairy
* details like buffering and seeking. The following function pipes
@@ -249,6 +288,10 @@ static const struct kernfs_ops sysfs_prealloc_kfops_rw = {
.prealloc = true,
};
+static const struct kernfs_ops sysfs_bin_kfops_huge_file_ro = {
+ .seq_show = sysfs_kf_huge_file_seq_show,
+};
+
static const struct kernfs_ops sysfs_bin_kfops_ro = {
.read = sysfs_kf_bin_read,
};
@@ -333,6 +376,8 @@ int sysfs_add_bin_file_mode_ns(struct kernfs_node *parent,
ops = &sysfs_bin_kfops_mmap;
else if (battr->read && battr->write)
ops = &sysfs_bin_kfops_rw;
+ else if (battr->read && (mode & SYSFS_HUGE_BIN_FILE))
+ ops = &sysfs_bin_kfops_huge_file_ro;
else if (battr->read)
ops = &sysfs_bin_kfops_ro;
else if (battr->write)
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index b3edae0578c0..2d0b01c00a97 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -74,11 +74,11 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj,
continue;
}
- WARN(mode & ~(SYSFS_PREALLOC | 0664),
+ WARN(mode & ~(SYSFS_PREALLOC | SYSFS_HUGE_BIN_FILE | 0664),
"Attribute %s: Invalid permissions 0%o\n",
(*attr)->name, mode);
- mode &= SYSFS_PREALLOC | 0664;
+ mode &= SYSFS_PREALLOC | SYSFS_HUGE_BIN_FILE | 0664;
error = sysfs_add_file_mode_ns(parent, *attr, mode, uid,
gid, NULL);
if (unlikely(error))
@@ -107,11 +107,11 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj,
if (grp->bin_size)
size = grp->bin_size(kobj, *bin_attr, i);
- WARN(mode & ~(SYSFS_PREALLOC | 0664),
+ WARN(mode & ~(SYSFS_PREALLOC | SYSFS_HUGE_BIN_FILE | 0664),
"Attribute %s: Invalid permissions 0%o\n",
(*bin_attr)->attr.name, mode);
- mode &= SYSFS_PREALLOC | 0664;
+ mode &= SYSFS_PREALLOC | SYSFS_HUGE_BIN_FILE | 0664;
error = sysfs_add_bin_file_mode_ns(parent, *bin_attr,
mode, size, uid, gid,
NULL);
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index b1a3a1e6ad09..78f6c6252cf9 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -124,6 +124,7 @@ struct attribute_group {
#define SYSFS_PREALLOC 010000
#define SYSFS_GROUP_INVISIBLE 020000
+#define SYSFS_HUGE_BIN_FILE 040000
/*
* DEFINE_SYSFS_GROUP_VISIBLE(name):