Re: [PATCH 1/6] alloc_tag: add ioctl to /proc/allocinfo
From: Suren Baghdasaryan
Date: Wed Jun 03 2026 - 15:44:20 EST
On Thu, May 21, 2026 at 1:20 AM Hao Ge <hao.ge@xxxxxxxxx> wrote:
>
> On 2026/5/20 01:42, Suren Baghdasaryan wrote:
> > On Mon, May 18, 2026 at 7:53 PM Hao Ge <hao.ge@xxxxxxxxx> wrote:
> >> Hi Abhishek
> >>
> >>
> >> Thanks for the follow-up.
> >>
> >>
> >> On 2026/5/19 07:41, Abhishek Bapat wrote:
> >>> On Wed, May 13, 2026 at 9:38 PM Hao Ge<hao.ge@xxxxxxxxx> wrote:
> >>>> Hi Suren and Abhishek
> >>>>
> >>>>
> >>>> Thanks for the patch! A couple of minor comments below.
> >>>>
> >>>>
> >>>> On 2026/5/5 07:36, Abhishek Bapat wrote:
> >>>>> From: Suren Baghdasaryan<surenb@xxxxxxxxxx>
> >>>>>
> >>>>> Add the following ioctl commands for /proc/allocinfo file:
> >>>>>
> >>>>> ALLOCINFO_IOC_CONTENT_ID - gets content identifier which can be used
> >>>>> to check whether the file content has changed specifically due to module
> >>>>> load/unload. Every time a module is loaded / unloaded, the returned
> >>>>> value will be different. By comparing the identifier value at the
> >>>>> beginning and at the end of the content retrieval operation, users can
> >>>>> validate retrieved information for consistency.
> >>>>>
> >>>>> ALLOCINFO_IOC_GET_AT - gets the record at the specified position. This
> >>>>> is the position of a record in /proc/allocinfo.
> >>>>>
> >>>>> ALLOCINFO_IOC_GET_NEXT - gets the record next to the last retrieved
> >>>>> one. If no records were previously retrieved, returns the first
> >>>>> record.
> >>>>>
> >>>>> Signed-off-by: Suren Baghdasaryan<surenb@xxxxxxxxxx>
> >>>>> Signed-off-by: Abhishek Bapat<abhishekbapat@xxxxxxxxxx>
> >>>>> ---
> >>>>> .../userspace-api/ioctl/ioctl-number.rst | 2 +
> >>>>> include/linux/codetag.h | 1 +
> >>>>> include/uapi/linux/alloc_tag.h | 54 ++++++
> >>>>> lib/alloc_tag.c | 178 +++++++++++++++++-
> >>>>> lib/codetag.c | 11 ++
> >>>>> 5 files changed, 244 insertions(+), 2 deletions(-)
> >>>>> create mode 100644 include/uapi/linux/alloc_tag.h
> >>>>>
> >>>>> diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst
> >>>>> index 331223761fff..84f6808a8578 100644
> >>>>> --- a/Documentation/userspace-api/ioctl/ioctl-number.rst
> >>>>> +++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
> >>>>> @@ -349,6 +349,8 @@ Code Seq# Include File Comments
> >>>>> <mailto:luzmaximilian@xxxxxxxxx>
> >>>>> 0xA5 20-2F linux/surface_aggregator/dtx.h Microsoft Surface DTX driver
> >>>>> <mailto:luzmaximilian@xxxxxxxxx>
> >>>>> +0xA6 00-0F uapi/linux/alloc_tag.h Memory allocation profiling
> >>>>> +<mailto:surenb@xxxxxxxxxx>
> >>>>> 0xAA 00-3F linux/uapi/linux/userfaultfd.h
> >>>>> 0xAB 00-1F linux/nbd.h
> >>>>> 0xAC 00-1F linux/raw.h
> >>>>> diff --git a/include/linux/codetag.h b/include/linux/codetag.h
> >>>>> index 8ea2a5f7c98a..2bcd4e7c809e 100644
> >>>>> --- a/include/linux/codetag.h
> >>>>> +++ b/include/linux/codetag.h
> >>>>> @@ -76,6 +76,7 @@ struct codetag_iterator {
> >>>>>
> >>>>> void codetag_lock_module_list(struct codetag_type *cttype, bool lock);
> >>>>> bool codetag_trylock_module_list(struct codetag_type *cttype);
> >>>>> +unsigned long codetag_get_content_id(struct codetag_type *cttype);
> >>>>> struct codetag_iterator codetag_get_ct_iter(struct codetag_type *cttype);
> >>>>> struct codetag *codetag_next_ct(struct codetag_iterator *iter);
> >>>>>
> >>>>> diff --git a/include/uapi/linux/alloc_tag.h b/include/uapi/linux/alloc_tag.h
> >>>>> new file mode 100644
> >>>>> index 000000000000..e9a5b55fcc7a
> >>>>> --- /dev/null
> >>>>> +++ b/include/uapi/linux/alloc_tag.h
> >>>>> @@ -0,0 +1,54 @@
> >>>>> +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
> >>>>> +/*
> >>>>> + * include/linux/alloc_tag.h
> >>>>> + */
> >>>>> +
> >>>>> +#ifndef _UAPI_ALLOC_TAG_H
> >>>>> +#define _UAPI_ALLOC_TAG_H
> >>>>> +
> >>>>> +#include <linux/types.h>
> >>>>> +
> >>>>> +#define ALLOCINFO_STR_SIZE 64
> >>>>> +
> >>>>> +struct allocinfo_content_id {
> >>>>> + __u64 id;
> >>>>> +};
> >>>>> +
> >>>>> +struct allocinfo_tag {
> >>>>> + /* Longer names are trimmed */
> >>>>> + char modname[ALLOCINFO_STR_SIZE];
> >>>>> + char function[ALLOCINFO_STR_SIZE];
> >>>>> + char filename[ALLOCINFO_STR_SIZE];
> >>>>> + __u64 lineno;
> >>>>> +};
> >>>>> +
> >>>>> +struct allocinfo_counter {
> >>>>> + __u64 bytes;
> >>>>> + __u64 calls;
> >>>>> + __u8 accurate;
> >>>>> + __u8 pad[7]; /* Add alignment to not break the 32-bit compatible interface */
> >>>>> +};
> >>>>> +
> >>>>> +struct allocinfo_tag_data {
> >>>>> + struct allocinfo_tag tag;
> >>>>> + struct allocinfo_counter counter;
> >>>>> +};
> >>>>> +
> >>>>> +struct allocinfo_get_at {
> >>>>> + __u64 pos; /* input */
> >>>>> + struct allocinfo_tag_data data;
> >>>>> +};
> >>>>> +
> >>>>> +#define _ALLOCINFO_IOC_CONTENT_ID 0
> >>>>> +#define _ALLOCINFO_IOC_GET_AT 1
> >>>>> +#define _ALLOCINFO_IOC_GET_NEXT 2
> >>>>> +
> >>>>> +#define ALLOCINFO_IOC_BASE 0xA6
> >>>>> +#define ALLOCINFO_IOC_CONTENT_ID _IOR(ALLOCINFO_IOC_BASE, _ALLOCINFO_IOC_CONTENT_ID, \
> >>>>> + struct allocinfo_content_id)
> >>>>> +#define ALLOCINFO_IOC_GET_AT _IOWR(ALLOCINFO_IOC_BASE, _ALLOCINFO_IOC_GET_AT, \
> >>>>> + struct allocinfo_get_at)
> >>>>> +#define ALLOCINFO_IOC_GET_NEXT _IOR(ALLOCINFO_IOC_BASE, _ALLOCINFO_IOC_GET_NEXT, \
> >>>>> + struct allocinfo_tag_data)
> >>>>> +
> >>>>> +#endif /* _UAPI_ALLOC_TAG_H */
> >>>>> diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c
> >>>>> index ed1bdcf1f8ab..5c24d2f954d4 100644
> >>>>> --- a/lib/alloc_tag.c
> >>>>> +++ b/lib/alloc_tag.c
> >>>>> @@ -14,6 +14,7 @@
> >>>>> #include <linux/string_choices.h>
> >>>>> #include <linux/vmalloc.h>
> >>>>> #include <linux/kmemleak.h>
> >>>>> +#include <uapi/linux/alloc_tag.h>
> >>>>>
> >>>>> #define ALLOCINFO_FILE_NAME "allocinfo"
> >>>>> #define MODULE_ALLOC_TAG_VMAP_SIZE (100000UL * sizeof(struct alloc_tag))
> >>>>> @@ -46,6 +47,9 @@ int alloc_tag_ref_offs;
> >>>>> struct allocinfo_private {
> >>>>> struct codetag_iterator iter;
> >>>>> bool print_header;
> >>>>> + /* ioctl uses a separate iterator not to interfere with reads */
> >>>>> + struct codetag_iterator ioctl_iter;
> >>>>> + bool positioned; /* seq_open_private() sets to 0 */
> >>>>> };
> >>>>>
> >>>>> static void *allocinfo_start(struct seq_file *m, loff_t *pos)
> >>>>> @@ -125,6 +129,177 @@ static const struct seq_operations allocinfo_seq_op = {
> >>>>> .show = allocinfo_show,
> >>>>> };
> >>>>>
> >>>>> +static int allocinfo_open(struct inode *inode, struct file *file)
> >>>>> +{
> >>>>> + return seq_open_private(file, &allocinfo_seq_op,
> >>>>> + sizeof(struct allocinfo_private));
> >>>>> +}
> >>>>> +
> >>>>> +static int allocinfo_release(struct inode *inode, struct file *file)
> >>>>> +{
> >>>>> + return seq_release_private(inode, file);
> >>>>> +}
> >>>>> +
> >>>>> +static const char *allocinfo_str(const char *str)
> >>>>> +{
> >>>>> + size_t len = strlen(str);
> >>>>> +
> >>>>> + /* Keep an extra space for the trailing NULL. */
> >>>>> + if (len >= ALLOCINFO_STR_SIZE)
> >>>>> + str += (len - ALLOCINFO_STR_SIZE) + 1;
> >>>>> + return str;
> >>>>> +}
> >>>>> +
> >>>>> +/* Copy a string and trim from the beginning if it's too long */
> >>>>> +static void allocinfo_copy_str(char *dest, const char *src)
> >>>>> +{
> >>>>> + strscpy(dest, allocinfo_str(src), ALLOCINFO_STR_SIZE);
> >>>>> +}
> >>>>> +
> >>>>> +static void allocinfo_to_params(struct codetag *ct,
> >>>>> + struct allocinfo_tag_data *data)
> >>>>> +{
> >>>>> + struct alloc_tag *tag = ct_to_alloc_tag(ct);
> >>>>> + struct alloc_tag_counters counter = alloc_tag_read(tag);
> >>>>> +
> >>>>> + if (ct->modname)
> >>>>> + allocinfo_copy_str(data->tag.modname, ct->modname);
> >>>>> + else
> >>>>> + data->tag.modname[0] = '\0';
> >>>> Minor nit about allocinfo_to_params():
> >>>>
> >>>> When modname is NULL (built-in kernel code), the current code sets it
> >>>>
> >>>> to an empty string:
> >>>>
> >>>> if (ct->modname)
> >>>>
> >>>> allocinfo_copy_str(data->tag.modname, ct->modname);
> >>>>
> >>>> else
> >>>>
> >>>> data->tag.modname[0] = '\0';
> >>>>
> >>>> This is of course workable in userspace by checking for an empty
> >>>>
> >>>> string, but I was wondering if it would be cleaner to use "vmlinux"
> >>>>
> >>>> as a default:
> >>>>
> >>>> else
> >>>>
> >>>> allocinfo_copy_str(data->tag.modname, "vmlinux");
> >>>>
> >>>>
> >>>> For some context, in our memory analysis workflow we often group
> >>>>
> >>>> allocations by module to get a quick overview of where memory goes,
> >>>>
> >>>> for example:
> >>>>
> >>>> vmlinux: 2.1 GB (kernel core)
> >>>>
> >>>> nvidia: 1.2 GB (GPU driver)
> >>>>
> >>>> iwlwifi: 800 MB (WiFi driver)
> >>>>
> >>>> ext4: 500 MB (filesystem)
> >>>>
> >>>> Having a consistent identifier for kernel built-in allocations would
> >>>>
> >>>> avoid each userspace tool needing to handle the empty string as a
> >>>>
> >>>> special case. Totally fine if this is intentional though.
> >>>>
> >>> Thanks for bringing this up, I can certainly make this change.
> >>> However, the information is not currently exposed this way through
> >>> /proc/allocinfo. /proc/allocinfo does not categorize kernel non-module
> >>> allocations as vmlinux, so there will a delta between how IOCTL and
> >>> /proc/allocinfo behave. Suren, could you comment on whether this
> >>> recommendation is fine by you?
> >>>
> >> Right, /proc/allocinfo indeed doesn't categorize them as vmlinux currently.
> >>
> >> It's just that in practice we often group allocations by module, so
> >> having "vmlinux" as a default
> >>
> >> would be convenient. Let's wait for Suren's input.
> > Hi Folks,
> > I would prefer to keep it empty because vmlinux is not really a module
> > and hardcoding this name also seems suboptimal (in case it ever
> > changes). Empty string also aligns with how we output /proc/allocinfo
> > data. If the symbol is in the kernel itself, we do not display the
> > module name at all. So, all in all, unless there is a strong reason
> > against it, I think we should keep it empty.
>
> Hi Suren
>
>
> Thanks for the clarification, that makes sense.
>
> For userspace tools that want to group by module, we can always map an
> empty modname to "vmlinux" at the
>
> presentation layer — no need to hardcode that in the kernel.
>
>
> Hi Abhishek
>
> I noticed the new files (like include/uapi/linux/alloc_tag.h) were added
> in this patchset.
>
> Should they be reflected in the MAINTAINERS file for easier future
> maintenance?
Yes, definitely. Thanks for noticing!
>
> Thanks
>
> Best Regards
>
> Hao
>
> >>>>> + allocinfo_copy_str(data->tag.function, ct->function);
> >>>>> + allocinfo_copy_str(data->tag.filename, ct->filename);
> >>>>> + data->tag.lineno = ct->lineno;
> >>>>> + data->counter.bytes = counter.bytes;
> >>>>> + data->counter.calls = counter.calls;
> >>>>> + data->counter.accurate = !alloc_tag_is_inaccurate(tag);
> >>>>> +}
> >>>>> +
> >>>>> +static int allocinfo_ioctl_get_content_id(struct seq_file *m, void __user *arg)
> >>>>> +{
> >>>>> + struct allocinfo_content_id params;
> >>>>> +
> >>>>> + codetag_lock_module_list(alloc_tag_cttype, true);
> >>>>> + params.id = codetag_get_content_id(alloc_tag_cttype);
> >>>>> + codetag_lock_module_list(alloc_tag_cttype, false);
> >>>>> + if (copy_to_user(arg, ¶ms, sizeof(params)))
> >>>>> + return -EFAULT;
> >>>>> +
> >>>>> + return 0;
> >>>>> +}
> >>>>> +
> >>>>> +static int allocinfo_ioctl_get_at(struct seq_file *m, void __user *arg)
> >>>>> +{
> >>>>> + struct allocinfo_private *priv;
> >>>>> + struct codetag *ct;
> >>>>> + __u64 pos;
> >>>>> + struct allocinfo_get_at params = {0};
> >>>>> +
> >>>>> + if (copy_from_user(¶ms, arg, sizeof(params)))
> >>>>> + return -EFAULT;
> >>>>> +
> >>>>> + priv = (struct allocinfo_private *)m->private;
> >>>>> + pos = params.pos;
> >>>>> +
> >>>>> + codetag_lock_module_list(alloc_tag_cttype, true);
> >>>>> +
> >>>>> + /* Find the codetag */
> >>>>> + priv->ioctl_iter = codetag_get_ct_iter(alloc_tag_cttype);
> >>>>> + ct = codetag_next_ct(&priv->ioctl_iter);
> >>>>> + while (ct && pos--)
> >>>>> + ct = codetag_next_ct(&priv->ioctl_iter);
> >>>> I noticed that codetag_next_ct(&priv->ioctl_iter) and
> >>>>
> >>>> priv->positioned are accessed without serialization in the ioctl
> >>>>
> >>>> path. Concurrent ioctl calls on the same fd could race on these
> >>>>
> >>>> fields. Just something I spotted while reading the code.
> >>>>
> >>>>
> >>>> Thanks
> >>>>
> >>>> Best Regards
> >>>>
> >>>> Hao
> >>>>
> >>> I believe this should be prevented by `codetag_lock_module_list`; am I
> >>> wrong in my understanding?
> >> Thanks for the explanation! codetag_lock_module_list is designed to
> >> protect the module list from concurrent load/unload, which it does
> >>
> >> correctly. However, it doesn't cover the race between concurrent ioctl
> >> calls on the same fd, since it acquires cttype->mod_lock via
> >>
> >> down_read() and rwsem read locks allow multiple readers to proceed
> >> concurrently:
> >>
> >> Thread A: ALLOCINFO_IOC_GET_AT
> >>
> >> down_read(&cttype->mod_lock) // read lock acquired
> >>
> >> priv->ioctl_iter = codetag_get_ct_iter(...)
> >>
> >> ct = codetag_next_ct(&priv->ioctl_iter)
> >>
> >> priv->positioned = true;
> >>
> >> Thread B: ALLOCINFO_IOC_GET_NEXT // concurrent ioctl on same fd
> >>
> >> down_read(&cttype->mod_lock) // read locks don't exclude
> >> each other
> >>
> >> if (!priv->positioned) { // sees partial state from
> >> Thread A
> >>
> >> priv->ioctl_iter = ... // overwrites Thread A's iterator
> >>
> >> }
> >>
> >> ct = codetag_next_ct(&priv->ioctl_iter) // corrupted iterator
> >>
> >> priv->ioctl_iter and priv->positioned are per-fd state with no
> >> serialization in the ioctl path.
> > Yep, you are right. codetag_lock_module_list() is not enough here to
> > protect from such races. I guess allocinfo_private would need another
> > lock.
> > Thanks,
> > Suren.
> >
> >
> >> Just something I spotted.
> >>
> >> Thanks
> >>
> >> Best Regards
> >>
> >> Hao
> >>
> >>>>> + if (ct) {
> >>>>> + allocinfo_to_params(ct, ¶ms.data);
> >>>>> + priv->positioned = true;
> >>>>> + }
> >>>>> +
> >>>>> + codetag_lock_module_list(alloc_tag_cttype, false);
> >>>>> +
> >>>>> + if (!ct)
> >>>>> + return -ENOENT;
> >>>>> +
> >>>>> + if (copy_to_user(arg, ¶ms, sizeof(params)))
> >>>>> + return -EFAULT;
> >>>>> +
> >>>>> + return 0;
> >>>>> +}
> >>>>> +
> >>>>> +static int allocinfo_ioctl_get_next(struct seq_file *m, void __user *arg)
> >>>>> +{
> >>>>> + struct allocinfo_private *priv;
> >>>>> + struct codetag *ct;
> >>>>> + struct allocinfo_tag_data params = {0};
> >>>>> + int ret = 0;
> >>>>> +
> >>>>> + priv = (struct allocinfo_private *)m->private;
> >>>>> +
> >>>>> + codetag_lock_module_list(alloc_tag_cttype, true);
> >>>>> +
> >>>>> + if (!priv->positioned) {
> >>>>> + priv->ioctl_iter = codetag_get_ct_iter(alloc_tag_cttype);
> >>>>> + priv->positioned = true;
> >>>>> + }
> >>>>> +
> >>>>> + ct = codetag_next_ct(&priv->ioctl_iter);
> >>>>> + if (ct)
> >>>>> + allocinfo_to_params(ct, ¶ms);
> >>>>> +
> >>>>> + if (!ct) {
> >>>>> + priv->positioned = false;
> >>>>> + ret = -ENOENT;
> >>>>> + }
> >>>>> + codetag_lock_module_list(alloc_tag_cttype, false);
> >>>>> +
> >>>>> + if (ret == 0) {
> >>>>> + if (copy_to_user(arg, ¶ms, sizeof(params)))
> >>>>> + return -EFAULT;
> >>>>> + }
> >>>>> + return ret;
> >>>>> +}
> >>>>> +
> >>>>> +static long allocinfo_ioctl(struct file *file, unsigned int cmd,
> >>>>> + unsigned long __arg)
> >>>>> +{
> >>>>> + void __user *arg = (void __user *)__arg;
> >>>>> + int ret;
> >>>>> +
> >>>>> + switch (cmd) {
> >>>>> + case ALLOCINFO_IOC_CONTENT_ID:
> >>>>> + ret = allocinfo_ioctl_get_content_id(file->private_data, arg);
> >>>>> + break;
> >>>>> + case ALLOCINFO_IOC_GET_AT:
> >>>>> + ret = allocinfo_ioctl_get_at(file->private_data, arg);
> >>>>> + break;
> >>>>> + case ALLOCINFO_IOC_GET_NEXT:
> >>>>> + ret = allocinfo_ioctl_get_next(file->private_data, arg);
> >>>>> + break;
> >>>>> + default:
> >>>>> + ret = -ENOIOCTLCMD;
> >>>>> + break;
> >>>>> + }
> >>>>> +
> >>>>> + return ret;
> >>>>> +}
> >>>>> +
> >>>>> +#ifdef CONFIG_COMPAT
> >>>>> +static long allocinfo_compat_ioctl(struct file *file, unsigned int cmd,
> >>>>> + unsigned long arg)
> >>>>> +{
> >>>>> + return allocinfo_ioctl(file, cmd, (unsigned long)compat_ptr(arg));
> >>>>> +}
> >>>>> +#endif
> >>>>> +
> >>>>> +static const struct proc_ops allocinfo_proc_ops = {
> >>>>> + .proc_open = allocinfo_open,
> >>>>> + .proc_read_iter = seq_read_iter,
> >>>>> + .proc_lseek = seq_lseek,
> >>>>> + .proc_release = allocinfo_release,
> >>>>> + .proc_ioctl = allocinfo_ioctl,
> >>>>> +#ifdef CONFIG_COMPAT
> >>>>> + .proc_compat_ioctl = allocinfo_compat_ioctl,
> >>>>> +#endif
> >>>>> +
> >>>>> +};
> >>>>> +
> >>>>> size_t alloc_tag_top_users(struct codetag_bytes *tags, size_t count, bool can_sleep)
> >>>>> {
> >>>>> struct codetag_iterator iter;
> >>>>> @@ -946,8 +1121,7 @@ static int __init alloc_tag_init(void)
> >>>>> return 0;
> >>>>> }
> >>>>>
> >>>>> - if (!proc_create_seq_private(ALLOCINFO_FILE_NAME, 0400, NULL, &allocinfo_seq_op,
> >>>>> - sizeof(struct allocinfo_private), NULL)) {
> >>>>> + if (!proc_create(ALLOCINFO_FILE_NAME, 0400, NULL, &allocinfo_proc_ops)) {
> >>>>> pr_err("Failed to create %s file\n", ALLOCINFO_FILE_NAME);
> >>>>> shutdown_mem_profiling(false);
> >>>>> return -ENOMEM;
> >>>>> diff --git a/lib/codetag.c b/lib/codetag.c
> >>>>> index 304667897ad4..93aa30991563 100644
> >>>>> --- a/lib/codetag.c
> >>>>> +++ b/lib/codetag.c
> >>>>> @@ -48,6 +48,17 @@ bool codetag_trylock_module_list(struct codetag_type *cttype)
> >>>>> return down_read_trylock(&cttype->mod_lock) != 0;
> >>>>> }
> >>>>>
> >>>>> +unsigned long codetag_get_content_id(struct codetag_type *cttype)
> >>>>> +{
> >>>>> + lockdep_assert_held(&cttype->mod_lock);
> >>>>> +
> >>>>> + /*
> >>>>> + * next_mod_seq is updated on every load, so can be used to identify
> >>>>> + * content changes.
> >>>>> + */
> >>>>> + return cttype->next_mod_seq;
> >>>>> +}
> >>>>> +
> >>>>> struct codetag_iterator codetag_get_ct_iter(struct codetag_type *cttype)
> >>>>> {
> >>>>> struct codetag_iterator iter = {
> >>> Note, I will be following up with a v2 patchset with your feedback
> >>> included. Please bring up any other points you'd want to clarify so
> >>> that I can include all the changes in the v2 patchset. Thanks for
> >>> reviewing!