Just another reference which make it clear that the reordering of function calls in your patch is likely not correct:
https://elixir.bootlin.com/linux/v5.19.7/source/net/packet/af_packet.c#L4734
static int __init packet_init(void)
{
int rc;
rc = proto_register(&packet_proto, 0);
if (rc)
goto out;
rc = sock_register(&packet_family_ops);
if (rc)
goto out_proto;
rc = register_pernet_subsys(&packet_net_ops);
if (rc)
goto out_sock;
rc = register_netdevice_notifier(&packet_netdev_notifier);
if (rc)
goto out_pernet;
return 0;
out_pernet:
unregister_pernet_subsys(&packet_net_ops);
out_sock:
sock_unregister(PF_PACKET);
out_proto:
proto_unregister(&packet_proto);
out:
return rc;
}
I had a simple test with can_raw. kernel modification as following:
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -118,6 +118,8 @@ static int can_create(struct net *net, struct socket *sock, int protocol,
const struct can_proto *cp;
int err = 0;
+ printk("%s: protocol: %d\n", __func__, protocol);
+
sock->state = SS_UNCONNECTED;
if (protocol < 0 || protocol >= CAN_NPROTO)
diff --git a/net/can/raw.c b/net/can/raw.c
index 5dca1e9e44cf..6052fd0cc7b2 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -943,6 +943,9 @@ static __init int raw_module_init(void)
pr_info("can: raw protocol\n");
err = can_proto_register(&raw_can_proto);
+ printk("%s: can_proto_register done\n", __func__);
+ msleep(5000); // 5s
+ printk("%s: to register_netdevice_notifier\n", __func__);
if (err < 0)
pr_err("can: registration of raw protocol failed\n");
else
I added 5 seconds delay after can_proto_register() and some debugs.
Testcase codes just try to create a CAN_RAW socket in user space as following:
int main(int argc, char **argv)
{
int s;
s = socket(PF_CAN, SOCK_RAW, CAN_RAW);
if (s < 0) {
perror("socket");
return 0;
}
close(s);
return 0;
}
Execute 'modprobe can_raw' and the testcase we can get message as following:
[ 109.312767] can: raw protocol
[ 109.312772] raw_module_init: can_proto_register done
[ 111.296178] can_create: protocol: 1
[ 114.809141] raw_module_init: to register_netdevice_notifier
It proved that it can create CAN_RAW socket and process socket once can_proto_register() successfully.
CAN_BCM is the same.
In the vast majority of cases, creating protocol socket and operating it are after protocol module initialization.
The scenario that I pointed in my patch is a low probability.
af_packet.c and af_key.c do like that doesn't mean it's very correct. I think so.
Thank you for your prompt reply.
On 08.09.22 09:10, Oliver Hartkopp wrote:
.
On 08.09.22 05:04, Ziyang Xuan wrote:
Now, register_netdevice_notifier() and register_pernet_subsys() are both
after can_proto_register(). It can create CAN_BCM socket and process socket
once can_proto_register() successfully, so it is possible missing notifier
event or proc node creation because notifier or bcm proc directory is not
registered or created yet. Although this is a low probability scenario, it
is not impossible.
Move register_pernet_subsys() and register_netdevice_notifier() to the
front of can_proto_register(). In addition, register_pernet_subsys() and
register_netdevice_notifier() may fail, check their results are necessary.
Signed-off-by: Ziyang Xuan <william.xuanziyang@xxxxxxxxxx>
---
net/can/bcm.c | 18 +++++++++++++++---
1 file changed, 15 insertions(+), 3 deletions(-)
diff --git a/net/can/bcm.c b/net/can/bcm.c
index e60161bec850..e2783156bfd1 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1744,15 +1744,27 @@ static int __init bcm_module_init(void)
pr_info("can: broadcast manager protocol\n");
+ err = register_pernet_subsys(&canbcm_pernet_ops);
+ if (err)
+ return err;
Analogue to your patch for the CAN_RAW socket here (which has been applied to can-next right now) ...
https://lore.kernel.org/linux-can/7af9401f0d2d9fed36c1667b5ac9b8df8f8b87ee.1661584485.git.william.xuanziyang@xxxxxxxxxx/T/#u
... I'm not sure whether this is the right sequence to acquire the different resources here.
E.g. in ipsec_pfkey_init() in af_key.c
https://elixir.bootlin.com/linux/v5.19.7/source/net/key/af_key.c#L3887
proto_register() is executed before register_pernet_subsys()
Which seems to be more natural to me.
Best regards,
Oliver
+
+ err = register_netdevice_notifier(&canbcm_notifier);
+ if (err)
+ goto register_notifier_failed;
+
err = can_proto_register(&bcm_can_proto);
if (err < 0) {
printk(KERN_ERR "can: registration of bcm protocol failed\n");
- return err;
+ goto register_proto_failed;
}
- register_pernet_subsys(&canbcm_pernet_ops);
- register_netdevice_notifier(&canbcm_notifier);
return 0;
+
+register_proto_failed:
+ unregister_netdevice_notifier(&canbcm_notifier);
+register_notifier_failed:
+ unregister_pernet_subsys(&canbcm_pernet_ops);
+ return err;
}
static void __exit bcm_module_exit(void)