Re: [PATCH] printk: add cpu id information to printk() output

From: Petr Mladek
Date: Fri Sep 15 2023 - 12:33:26 EST


On Fri 2023-09-15 11:53:13, Greg KH wrote:
> On Fri, Sep 15, 2023 at 04:46:02PM +0800, Enlin Mu wrote:
> > John Ogness <john.ogness@xxxxxxxxxxxxx> 于2023年9月15日周五 16:34写道:
> > >
> > > On 2023-09-15, Enlin Mu <enlin.mu@xxxxxxxxxxx> wrote:
> > > > Sometimes we want to print cpu id of printk() messages to consoles
> > > >
> > > > diff --git a/include/linux/threads.h b/include/linux/threads.h
> > > > index c34173e6c5f1..6700bd9a174f 100644
> > > > --- a/include/linux/threads.h
> > > > +++ b/include/linux/threads.h
> > > > @@ -34,6 +34,9 @@
> > > > #define PID_MAX_LIMIT (CONFIG_BASE_SMALL ? PAGE_SIZE * 8 : \
> > > > (sizeof(long) > 4 ? 4 * 1024 * 1024 : PID_MAX_DEFAULT))
> > > >
> > > > +#define CPU_ID_SHIFT 23
> > > > +#define CPU_ID_MASK 0xff800000
> > >
> > > This only supports 256 CPUs. I think it doesn't make sense to try to
> > > squish CPU and Task IDs into 32 bits.
> > Yes, it is not good way,
> > >
> > > What about introducing a caller_id option to always only print the CPU
> > > ID? Or do you really need Task _and_ CPU?
> > Yes, I need it.Because I need to know which CPU is printing the
> > log, so that I can identify the current system operation, such as load
> > situation and CPU busy/idle status
>
> The cpu that is printing the log isn't the one that added the log
> message, so I think you will have incorrect data here, right?

We already store some metadata about the caller:

* All fields are set by the printk code except for @seq, which is
* set by the ringbuffer code.
*/
struct printk_info {
u64 seq; /* sequence number */
u64 ts_nsec; /* timestamp in nanoseconds */
u16 text_len; /* length of text message */
u8 facility; /* syslog facility */
u8 flags:5; /* internal record flags */
u8 level:3; /* syslog level */
u32 caller_id; /* thread id or processor id */

struct dev_printk_info dev_info;
};

The 32-bit caller ID is generated using:

static inline u32 printk_caller_id(void)
{
return in_task() ? task_pid_nr(current) :
0x80000000 + smp_processor_id();
}

We could add more metadata and always store the CPU ID and something
like:

[CTXT][ Tpid][ Ccpu]

for example

[TASK][ T234][ C4]
[ IRQ][ T4567][ C17]
[SIRQ][ T5][ C0]
[ NMI][ T356][ C128]


The biggest problem is that it would change the format of the
ringbuffer so that it would require updating external tools,
working with crashdump, especially crash but there are also
alternative python extensions for gdb.


See below POC of the kernel part. It is not even compile tested. The size
of the buffers is updated by a guess. Comments are not updated, ...

diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h
index 7d4979d5c3ce..e3e0a9fdc0cf 100644
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -15,7 +15,7 @@ int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write,
#ifdef CONFIG_PRINTK

#ifdef CONFIG_PRINTK_CALLER
-#define PRINTK_PREFIX_MAX 48
+#define PRINTK_PREFIX_MAX 64
#else
#define PRINTK_PREFIX_MAX 32
#endif
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 7e0b4dd02398..bb393ccbd5e9 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -621,16 +621,20 @@ static void append_char(char **pp, char *e, char c)
*(*pp)++ = c;
}

+char printk_caller_ctxt_txt[][] = { "TSK", "IRQ", "SIRQ", "NMI"' };
+
static ssize_t info_print_ext_header(char *buf, size_t size,
struct printk_info *info)
{
u64 ts_usec = info->ts_nsec;
- char caller[20];
+ char caller[40];
#ifdef CONFIG_PRINTK_CALLER
- u32 id = info->caller_id;
+ struct printk_caller_info *ci = info->caller_info;

- snprintf(caller, sizeof(caller), ",caller=%c%u",
- id & 0x80000000 ? 'C' : 'T', id & ~0x80000000);
+ snprintf(caller, sizeof(caller),
+ ",caller.ctxt=%s,caller.pid=%d,caller.cpu=%d",
+ printk_caller_ctxt_txt[ci->ctxt],
+ ci->pid,ci->cpu);
#else
caller[0] = '\0';
#endif
@@ -996,11 +1000,16 @@ void log_buf_vmcoreinfo_setup(void)
VMCOREINFO_OFFSET(prb_data_blk_lpos, begin);
VMCOREINFO_OFFSET(prb_data_blk_lpos, next);

+ VMCOREINFO_STRUCT_SIZE(printk_caller_info);
+ VMCOREINFO_OFFSET(printk_caller_info, ctxt);
+ VMCOREINFO_OFFSET(printk_caller_info, cpu);
+ VMCOREINFO_OFFSET(printk_caller_info, pid);
+
VMCOREINFO_STRUCT_SIZE(printk_info);
VMCOREINFO_OFFSET(printk_info, seq);
VMCOREINFO_OFFSET(printk_info, ts_nsec);
VMCOREINFO_OFFSET(printk_info, text_len);
- VMCOREINFO_OFFSET(printk_info, caller_id);
+ VMCOREINFO_OFFSET(printk_info, caller_info);
VMCOREINFO_OFFSET(printk_info, dev_info);

VMCOREINFO_STRUCT_SIZE(dev_printk_info);
@@ -1111,7 +1120,7 @@ static unsigned int __init add_to_rb(struct printk_ringbuffer *rb,
dest_r.info->level = r->info->level;
dest_r.info->flags = r->info->flags;
dest_r.info->ts_nsec = r->info->ts_nsec;
- dest_r.info->caller_id = r->info->caller_id;
+ dest_r.info->caller_info = r->info->caller_info;
memcpy(&dest_r.info->dev_info, &r->info->dev_info, sizeof(dest_r.info->dev_info));

prb_final_commit(&e);
@@ -1329,13 +1338,16 @@ static size_t print_time(u64 ts, char *buf)
}

#ifdef CONFIG_PRINTK_CALLER
-static size_t print_caller(u32 id, char *buf)
+static size_t print_caller_info(struct printk_caller_info *ci, char *buf)
{
- char caller[12];
+ char pid[12];
+ char cpu[12];

- snprintf(caller, sizeof(caller), "%c%u",
- id & 0x80000000 ? 'C' : 'T', id & ~0x80000000);
- return sprintf(buf, "[%6s]", caller);
+ snprintf(pid, sizeof(pid), "T%u", ci->pid);
+ snprintf(cpu, sizeof(cpu), "C%u", ci->cpu);
+
+ return sprintf(buf, "[%s][%6s][%6s]",
+ printk_caller_ctxt_txt[ci->ctxt], pid, cpu);
}
#else
#define print_caller(id, buf) 0
diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c
index fde338606ce8..66be9b47be46 100644
--- a/kernel/printk/printk_ringbuffer.c
+++ b/kernel/printk/printk_ringbuffer.c
@@ -431,7 +431,8 @@ static enum desc_state get_desc_state(unsigned long id,
*/
static enum desc_state desc_read(struct prb_desc_ring *desc_ring,
unsigned long id, struct prb_desc *desc_out,
- u64 *seq_out, u32 *caller_id_out)
+ u64 *seq_out,
+ struct printk_caller_info *ci_out)
{
struct printk_info *info = to_info(desc_ring, id);
struct prb_desc *desc = to_desc(desc_ring, id);
@@ -480,8 +481,8 @@ static enum desc_state desc_read(struct prb_desc_ring *desc_ring,
}
if (seq_out)
*seq_out = info->seq; /* also part of desc_read:C */
- if (caller_id_out)
- *caller_id_out = info->caller_id; /* also part of desc_read:C */
+ if (caller_info_out)
+ *ci_out = info->caller_info; /* also part of desc_read:C */

/*
* 1. Guarantee the descriptor content is loaded before re-checking
@@ -1251,14 +1252,22 @@ static const char *get_data(struct prb_data_ring *data_ring,
return &db->data[0];
}

+static bool same_caller(struct printk_caller_info *cur_ci,
+ struct printk_caller_info *last_ci)
+{
+ return (cur_ci->cpu == last_ci->cpu &&
+ cur_ci->pid == last_ci->pid &&
+ cur_ci->ctxt == last_ci->ctxt)
/*
* Attempt to transition the newest descriptor from committed back to reserved
* so that the record can be modified by a writer again. This is only possible
* if the descriptor is not yet finalized and the provided @caller_id matches.
*/
static struct prb_desc *desc_reopen_last(struct prb_desc_ring *desc_ring,
- u32 caller_id, unsigned long *id_out)
+ struct printk_caller_info *ci,
+ unsigned long *id_out)
{
+ struct printk_caller_info last_ci;
unsigned long prev_state_val;
enum desc_state d_state;
struct prb_desc desc;
@@ -1272,8 +1281,8 @@ static struct prb_desc *desc_reopen_last(struct prb_desc_ring *desc_ring,
* To reduce unnecessarily reopening, first check if the descriptor
* state and caller ID are correct.
*/
- d_state = desc_read(desc_ring, id, &desc, NULL, &cid);
- if (d_state != desc_committed || cid != caller_id)
+ d_state = desc_read(desc_ring, id, &desc, NULL, &last_ci);
+ if (d_state != desc_committed || !same_caller(&last_ci, ci))
return NULL;

d = to_desc(desc_ring, id);
@@ -1348,7 +1357,7 @@ static struct prb_desc *desc_reopen_last(struct prb_desc_ring *desc_ring,
* @r->info->text_len after concatenating.
*/
bool prb_reserve_in_last(struct prb_reserved_entry *e, struct printk_ringbuffer *rb,
- struct printk_record *r, u32 caller_id, unsigned int max_size)
+ struct printk_record *r, struct printk_caller_info *ci, unsigned int max_size)
{
struct prb_desc_ring *desc_ring = &rb->desc_ring;
struct printk_info *info;
@@ -1359,7 +1368,7 @@ bool prb_reserve_in_last(struct prb_reserved_entry *e, struct printk_ringbuffer
local_irq_save(e->irqflags);

/* Transition the newest descriptor back to the reserved state. */
- d = desc_reopen_last(desc_ring, caller_id, &id);
+ d = desc_reopen_last(desc_ring, ci, &id);
if (!d) {
local_irq_restore(e->irqflags);
goto fail_reopen;
@@ -1377,11 +1386,11 @@ bool prb_reserve_in_last(struct prb_reserved_entry *e, struct printk_ringbuffer
e->id = id;

/*
- * desc_reopen_last() checked the caller_id, but there was no
+ * desc_reopen_last() checked the caller_info, but there was no
* exclusive access at that point. The descriptor may have
* changed since then.
*/
- if (caller_id != info->caller_id)
+ if (!match(ci, &info->caller_info)
goto fail;

if (BLK_DATALESS(&d->text_blk_lpos)) {
diff --git a/kernel/printk/printk_ringbuffer.h b/kernel/printk/printk_ringbuffer.h
index 18cd25e489b8..9934353965d4 100644
--- a/kernel/printk/printk_ringbuffer.h
+++ b/kernel/printk/printk_ringbuffer.h
@@ -6,6 +6,18 @@
#include <linux/atomic.h>
#include <linux/dev_printk.h>

+#define PRINTK_CTXT_TASK 0
+#define PRINTK_CTXT_HARDIRQ 1
+#define PRINTK_CTXT_SOFTIRQ 2
+#define PRINTK_CTXT_NMI 3
+
+
+struct printk_caller_info {
+ u32 ctxt:2; /* context */
+ u32 cpu:24; /* CPU number */
+ u32 pid; /* PID of the process */
+};
+
/*
* Meta information about each stored message.
*
@@ -19,8 +31,7 @@ struct printk_info {
u8 facility; /* syslog facility */
u8 flags:5; /* internal record flags */
u8 level:3; /* syslog level */
- u32 caller_id; /* thread id or processor id */
-
+ struct printk_caller_info caller_info;
struct dev_printk_info dev_info;
};