Re: [syzbot] WARNING in batadv_nc_mesh_free

From: Pavel Skripkin
Date: Fri Oct 22 2021 - 14:33:55 EST


On 10/22/21 02:19, syzbot wrote:
Hello,

syzbot found the following issue on:

HEAD commit: 2f111a6fd5b5 Merge tag 'ceph-for-5.15-rc7' of git://github..
git tree: upstream
console output: https://syzkaller.appspot.com/x/log.txt?x=115750acb00000
kernel config: https://syzkaller.appspot.com/x/.config?x=d95853dad8472c91
dashboard link: https://syzkaller.appspot.com/bug?extid=28b0702ada0bf7381f58
compiler: Debian clang version 11.0.1-2, GNU ld (GNU Binutils for Debian) 2.35.2
syz repro: https://syzkaller.appspot.com/x/repro.syz?x=1026ef2cb00000
C reproducer: https://syzkaller.appspot.com/x/repro.c?x=15c9c162b00000

IMPORTANT: if you fix the issue, please add the following tag to the commit:
Reported-by: syzbot+28b0702ada0bf7381f58@xxxxxxxxxxxxxxxxxxxxxxxxx

RBP: 00007ffef262e230 R08: 0000000000000002 R09: 00007fddc8003531
R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000004
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
------------[ cut here ]------------
ODEBUG: assert_init not available (active state 0) object type: timer_list hint: 0x0
WARNING: CPU: 0 PID: 6517 at lib/debugobjects.c:508 debug_print_object lib/debugobjects.c:505 [inline]
WARNING: CPU: 0 PID: 6517 at lib/debugobjects.c:508 debug_object_assert_init+0x1fa/0x250 lib/debugobjects.c:895
Modules linked in:
CPU: 0 PID: 6517 Comm: syz-executor011 Not tainted 5.15.0-rc6-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
RIP: 0010:debug_print_object lib/debugobjects.c:505 [inline]
RIP: 0010:debug_object_assert_init+0x1fa/0x250 lib/debugobjects.c:895
Code: e8 4b 15 b8 fd 4c 8b 45 00 48 c7 c7 a0 31 b4 8a 48 c7 c6 00 2e b4 8a 48 c7 c2 e0 33 b4 8a 31 c9 49 89 d9 31 c0 e8 b6 c6 36 fd <0f> 0b ff 05 3a 5c c5 09 48 83 c5 38 48 89 e8 48 c1 e8 03 42 80 3c
RSP: 0018:ffffc90002c7e698 EFLAGS: 00010046
RAX: cffa606352c78700 RBX: 0000000000000000 RCX: ffff888076ce9c80
RDX: 0000000000000000 RSI: 0000000080000000 RDI: 0000000000000000
RBP: ffffffff8a512d00 R08: ffffffff81693402 R09: ffffed1017383f2c
R10: ffffed1017383f2c R11: 0000000000000000 R12: dffffc0000000000
R13: ffff88801bcd1720 R14: 0000000000000002 R15: ffffffff90ba5a20
FS: 0000555557087300(0000) GS:ffff8880b9c00000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f5473f3c000 CR3: 0000000070ca6000 CR4: 00000000003506f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
debug_timer_assert_init kernel/time/timer.c:739 [inline]
debug_assert_init kernel/time/timer.c:784 [inline]
del_timer+0xa5/0x3d0 kernel/time/timer.c:1204
try_to_grab_pending+0x151/0xbb0 kernel/workqueue.c:1270
__cancel_work_timer+0x14c/0x710 kernel/workqueue.c:3129
batadv_nc_mesh_free+0x4a/0xf0 net/batman-adv/network-coding.c:1869
batadv_mesh_free+0x6f/0x140 net/batman-adv/main.c:245
batadv_mesh_init+0x4e5/0x550 net/batman-adv/main.c:226

Looks like cancel_delayed_work_sync() is called before INIT_DELAYED_WORK(), so calltrace looks like

batadv_mesh_init()
batadv_originator_init() <- injected allocation failure
batadv_mesh_free()
batadv_nc_mesh_free()
cancel_delayed_work_sync()


Quick fix can be moving INIT_DELAYED_WORK() from batadv_nc_init() to batadv_mesh_init(), since there is complex dependencies between each mech part, if I understood comments correctly


Just for thoughts and syzbot testing

#syz test
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git master



With regards,
Pavel Skripkin


diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 3ddd66e4c29e..a25c644acd6c 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -188,6 +188,10 @@ int batadv_mesh_init(struct net_device *soft_iface)
INIT_HLIST_HEAD(&bat_priv->softif_vlan_list);
INIT_HLIST_HEAD(&bat_priv->tp_list);

+#ifdef CONFIG_BATMAN_ADV_NC
+ INIT_DELAYED_WORK(&bat_priv->nc.work, batadv_nc_worker);
+#endif
+
bat_priv->gw.generation = 0;

ret = batadv_v_mesh_init(bat_priv);
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index 9f06132e007d..eafd9936e021 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -47,7 +47,6 @@
static struct lock_class_key batadv_nc_coding_hash_lock_class_key;
static struct lock_class_key batadv_nc_decoding_hash_lock_class_key;

-static void batadv_nc_worker(struct work_struct *work);
static int batadv_nc_recv_coded_packet(struct sk_buff *skb,
struct batadv_hard_iface *recv_if);

@@ -158,7 +157,6 @@ int batadv_nc_mesh_init(struct batadv_priv *bat_priv)
batadv_hash_set_lock_class(bat_priv->nc.decoding_hash,
&batadv_nc_decoding_hash_lock_class_key);

- INIT_DELAYED_WORK(&bat_priv->nc.work, batadv_nc_worker);
batadv_nc_start_timer(bat_priv);

batadv_tvlv_handler_register(bat_priv, batadv_nc_tvlv_ogm_handler_v1,
@@ -707,7 +705,7 @@ batadv_nc_process_nc_paths(struct batadv_priv *bat_priv,
* coding
* @work: kernel work struct
*/
-static void batadv_nc_worker(struct work_struct *work)
+void batadv_nc_worker(struct work_struct *work)
{
struct delayed_work *delayed_work;
struct batadv_priv_nc *priv_nc;
diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h
index 368cc3130e4c..cfcd1223a92b 100644
--- a/net/batman-adv/network-coding.h
+++ b/net/batman-adv/network-coding.h
@@ -37,6 +37,7 @@ void batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv,
struct sk_buff *skb);
void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv,
struct sk_buff *skb);
+void batadv_nc_worker(struct work_struct *work);

#else /* ifdef CONFIG_BATMAN_ADV_NC */

@@ -58,6 +59,10 @@ static inline void batadv_nc_mesh_free(struct batadv_priv *bat_priv)
{
}

+static inline void batadv_nc_worker(struct work_struct *work)
+{
+}
+
static inline void
batadv_nc_update_nc_node(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,