Re: [PATCH net 1/3] net/smc: Resolve the race between link group access and termination
From: Karsten Graul
Date: Tue Jan 11 2022 - 03:23:16 EST
On 10/01/2022 10:26, Wen Gu wrote:
> We encountered some crashes caused by the race between the access
> and the termination of link groups.
>
>
> +/* won't be freed until no one accesses to lgr anymore */
> +static void __smc_lgr_free(struct smc_link_group *lgr)
> +{
> + smc_lgr_free_bufs(lgr);
> + if (!lgr->is_smcd)
> + smc_wr_free_lgr_mem(lgr);
> + kfree(lgr);
> +}
> +
> /* remove a link group */
> static void smc_lgr_free(struct smc_link_group *lgr)
> {
> @@ -1298,7 +1326,6 @@ static void smc_lgr_free(struct smc_link_group *lgr)
> smc_llc_lgr_clear(lgr);
> }
>
> - smc_lgr_free_bufs(lgr);
> destroy_workqueue(lgr->tx_wq);
> if (lgr->is_smcd) {
> smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
> @@ -1306,11 +1333,21 @@ static void smc_lgr_free(struct smc_link_group *lgr)
> if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
> wake_up(&lgr->smcd->lgrs_deleted);
> } else {
> - smc_wr_free_lgr_mem(lgr);
> if (!atomic_dec_return(&lgr_cnt))
> wake_up(&lgrs_deleted);
These waiters (seaparate ones for smcd and smcr) are used to wait for all lgrs
to be deleted when a module unload or reboot was triggered, so it must only be
woken up when the lgr is actually freed.