Re: iscsi deadlock while login and logout 2 iscsi nodes in tight loop at unstable network environment

From: wangqiang (AY)
Date: Thu Jun 29 2017 - 10:39:53 EST


>Hi all,
>Â
>In the upstream 4.12.0-rc5 kernel, I encounter a deadlock problem while login and logout 2 iscsi nodes in tight loop at unstable network environment, the iscsid will be D state.
>Â
>SysRq-w reported the following call stacks:
>[10660.667133] sysrq: SysRq : Show Blocked State
>[10660.671536]ÂÂ taskÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ PC stackÂÂ pid father
>[10660.672137] iscsidÂÂÂÂÂÂÂÂÂ DÂÂÂ 0 11415ÂÂÂÂÂ 1 0x00000000
>[10660.672139] Call Trace:
>[10660.672141]Â __schedule+0x1d9/0x780
>[10660.672144]Â schedule+0x36/0x80
>[10660.672146]Â async_synchronize_cookie_domain+0xaa/0x150
>[10660.672149]Â ? remove_wait_queue+0x60/0x60
>[10660.672151]Â async_synchronize_full_domain+0x18/0x20
>[10660.672153]Â sd_remove+0x4d/0xc0 [sd_mod]
>[10660.672156]Â device_release_driver_internal+0x141/0x200
>[10660.672158]Â device_release_driver+0x12/0x20
>[10660.672159]Â bus_remove_device+0xfd/0x170
>[10660.672161]Â device_del+0x1e8/0x330
>[10660.672164]Â __scsi_remove_device+0xc6/0xd0
>[10660.672166]Â scsi_remove_device+0x26/0x40
>[10660.672168]Â scsi_remove_target+0x165/0x1b0
>[10660.672174]Â __iscsi_unbind_session+0xbe/0x170 [scsi_transport_iscsi]
>[10660.672180]Â iscsi_remove_session+0xd6/0x190 [scsi_transport_iscsi]
>[10660.672186]Â iscsi_destroy_session+0x12/0x50 [scsi_transport_iscsi]
>[10660.672192]Â iscsi_session_teardown+0xd5/0xf0 [libiscsi]
>[10660.672195]Â iscsi_sw_tcp_session_destroy+0x48/0x70 [iscsi_tcp]
>[10660.672202]Â iscsi_if_recv_msg+0xbb1/0x1390 [scsi_transport_iscsi]
>[10660.672205]Â ? __kmalloc_node_track_caller+0x199/0x280
>[10660.672211]Â iscsi_if_rx+0x8e/0x1f0 [scsi_transport_iscsi]
>[10660.672214]Â netlink_unicast+0x181/0x240
>[10660.672216]Â netlink_sendmsg+0x2c2/0x3b0
>[10660.672220]Â sock_sendmsg+0x38/0x50
>[10660.672222]Â ___sys_sendmsg+0x27a/0x290
>[10660.672224]Â ? kernfs_refresh_inode+0xc8/0xe0
>[10660.672227]Â ? _copy_to_user+0x26/0x40
>[10660.672230]Â __sys_sendmsg+0x54/0x90
>[10660.672232]Â SyS_sendmsg+0x12/0x20
>[10660.672234]Â entry_SYSCALL_64_fastpath+0x1a/0xa5
>
>[10660.672325] kworker/u48:2ÂÂ DÂÂÂ 0 22364ÂÂÂÂÂ 2 0x00000000
>[10660.672332] Workqueue: scsi_wq_7 __iscsi_unbind_session [scsi_transport_iscsi]
>[10660.672333] Call Trace:
>[10660.672336]Â __schedule+0x1d9/0x780
>[10660.672350]Â ? set_next_entity+0x7c/0x7a0
>[10660.672353]Â schedule+0x36/0x80
>[10660.672354]Â schedule_timeout+0x249/0x300
>[10660.672356]Â ? finish_task_switch+0xec/0x240
>[10660.672359]Â wait_for_completion+0x121/0x180
>[10660.672361]Â ? wake_up_q+0x80/0x80
>[10660.672364]Â flush_workqueue+0x16d/0x490
>[10660.672366]Â ? probe_sched_wakeup+0x1c/0x20
>[10660.672369]Â drain_workqueue+0x5b/0x130
>[10660.672371]Â destroy_workqueue+0x1a/0x200
>[10660.672373]Â scsi_host_dev_release+0x6f/0x130
>[10660.672375]Â device_release+0x32/0x90
>[10660.672377]Â kobject_cleanup+0x63/0x170
>[10660.672379]Â kobject_put+0x28/0x50
>[10660.672381]Â put_device+0x17/0x20
>[10660.672383]Â scsi_host_put+0x15/0x20
>[10660.672389]Â iscsi_session_release+0x3d/0x80 [scsi_transport_iscsi]
>[10660.672390]Â device_release+0x32/0x90
>[10660.672393]Â kobject_cleanup+0x63/0x170
>[10660.672395]Â kobject_put+0x28/0x50
>[10660.672396]Â put_device+0x17/0x20
>[10660.672398]Â scsi_target_dev_release+0x1f/0x30
>[10660.672400]Â device_release+0x32/0x90
>[10660.672402]Â kobject_cleanup+0x63/0x170
>[10660.672404]Â kobject_put+0x28/0x50
>[10660.672406]Â put_device+0x17/0x20
>[10660.672407]Â scsi_target_destroy+0xc5/0xe0
>[10660.672409]Â scsi_target_reap_ref_release+0x35/0x40
>[10660.672411]Â scsi_target_reap+0x2e/0x40
>[10660.672413]Â scsi_remove_target+0x197/0x1b0
>[10660.672419]Â __iscsi_unbind_session+0xbe/0x170 [scsi_transport_iscsi]
>[10660.672421]Â process_one_work+0x149/0x360
>[10660.672423]Â worker_thread+0x4d/0x3c0
>[10660.672425]Â kthread+0x109/0x140
>[10660.672427]Â ? rescuer_thread+0x380/0x380
>[10660.672428]Â ? kthread_park+0x60/0x60
>[10660.672430]Â ret_from_fork+0x25/0x30
>Â
>sd_remove() waits for async_synchronize_cookie_domain which is defined in sd_probe() and relate to sd_probe_async(), but due to unstable network environment, the session which process sd_probe_async() became ISCSI_STATE_FAILED state, and IO corresponding to sd_probe_async() doesnt complete, iscsi_conn_failure() send event to iscsid and wait for respond to stop connection, iscsid cannot respond as the sd_remove() is waiting for completion of sd_probe_async(). Hence the deadlock.
>Â
>So, is any method to fix this deadlock?
>Any reply would be thankful.
>Â
>Execute the shell scripts below at the initiator side and wait for 5 ~ 20 minutes, the deadlock can be reproduced.(nodes need at least 10 luns respectively.) #!/bin/bash # discovery 2 nodes iscsiadm -m discovery -p IP_A -t st iscsiadm -m discovery -p IP_B -t st # set network loss rate (eth1 connected to the ipsan) tc qdisc add dev eth1 root netem loss 60% # loop test
>n=1
>while [ 1 ]
>do
>ÂÂÂÂ echo "$(date) ==== test loop $n ===="
>ÂÂÂ iscsiadm -m node -T iqn.xxxx::yyyy:IP_A --login
>ÂÂÂ sleep 5
>ÂÂÂÂ iscsiadm -m node -T iqn.xxxx::yyyy:IP_A --logout&
>ÂÂÂÂ iscsiadm -m node -T iqn.xxxx::yyyz:IP_B --login&
>ÂÂÂÂ sleep 10
>ÂÂÂÂ iscsiadm -m node -u
>ÂÂÂÂ n=$(($n + 1))
>ÂÂÂÂ echo "$(date) ==== continue to test loop ===="
>Done


This problem puzzle me for a long time.

I found 2 factors lead to this stuck:
1. Pid 11415 stack is synchronization operation of iscsid after receIved ISCSI_KEVENT_CONN_ERROR from kernel. Iscsid cannot process any other KEVENT until async_synchronize_cookie_domain complete. In this problem, async_synchronize_cookie_domain is waitting for sd_probe_async function in scsi_sd_probe_domain.
2. Sd_remove() wait for the completion of async threads executing sd_probe_async of disks on unrelated host adapters, rather than just the the async thread associated with the scsi_disk being removed.

I try to fix this problem by 2 patchs below.
Patch 1 make sd_remove() just wait for the the async thread associated with the scsi_disk being removed.
Patch 2 make the operation of iscsid after receIved ISCSI_KEVENT_CONN_ERROR be asynchronous by put the __iscsi_destroy_session() in work queue.

But result in crash as scsi_mq_uninit_cmd() run into BUG_ON(list_empty(&cmd->list)).

Any one has suggestion to fix this problem?


Patch 1:
Date: Wed, 28 Jun 2017 08:57:08 +0800
Subject: [PATCH] fix async_domain

---
drivers/scsi/sd.c | 7 +++++--
drivers/scsi/sd.h | 2 ++
2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 23cb9ee..1359268 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -2986,7 +2986,9 @@ static int sd_probe(struct device *dev)
dev_set_drvdata(dev, sdkp);

get_device(&sdkp->dev); /* prevent release before async_schedule */
- async_schedule_domain(sd_probe_async, sdkp, &scsi_sd_probe_domain);
+ //async_schedule_domain(sd_probe_async, sdkp, &scsi_sd_probe_domain);
+ INIT_LIST_HEAD(&sdkp->sd_async_domain.pending);
+ async_schedule_domain(sd_probe_async, sdkp, &sdkp->sd_async_domain);

return 0;

@@ -3022,7 +3024,8 @@ static int sd_remove(struct device *dev)
devt = disk_devt(sdkp->disk);
scsi_autopm_get_device(sdkp->device);

- async_synchronize_full_domain(&scsi_sd_probe_domain);
+ //async_synchronize_full_domain(&scsi_sd_probe_domain);
+ async_synchronize_full_domain(&sdkp->sd_async_domain);
device_del(&sdkp->dev);
del_gendisk(sdkp->disk);
sd_shutdown(dev);
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 7c27b54..f308988 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -2,6 +2,7 @@
#define _SCSI_DISK_H

#include <linux/rh_kabi.h>
+#include <linux/async.h>

/*
* More than enough for everybody ;) The huge number of majors
@@ -99,6 +100,7 @@ struct scsi_disk {
* allow extending the structure while preserving ABI.
*/
u32 xcopy_reserved;
+ struct async_domain sd_async_domain;

RH_KABI_RESERVE(1)
RH_KABI_RESERVE(2)


Patch 2:
Date: Sun, 25 Jun 2017 19:29:25 +0800
Subject: [PATCH 1/1] Fix iscsid hang.

---
drivers/scsi/scsi_transport_iscsi.c | 14 +++++++++++++-
include/scsi/scsi_transport_iscsi.h | 1 +
2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index 9a6ed90..b254c1f 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -1994,6 +1994,16 @@ void iscsi_block_session(struct iscsi_cls_session *session)
}
EXPORT_SYMBOL_GPL(iscsi_block_session);

+static void __iscsi_destroy_session(struct work_struct *work)
+{
+ struct iscsi_cls_session *session =
+ container_of(work, struct iscsi_cls_session,
+ destroy_work);
+ struct iscsi_transport *transport = session->transport;
+
+ transport->destroy_session(session);
+}
+
static void __iscsi_unbind_session(struct work_struct *work)
{
struct iscsi_cls_session *session =
@@ -2049,6 +2059,7 @@ iscsi_alloc_session(struct Scsi_Host *shost, struct iscsi_transport *transport,
INIT_WORK(&session->unblock_work, __iscsi_unblock_session);
INIT_WORK(&session->block_work, __iscsi_block_session);
INIT_WORK(&session->unbind_work, __iscsi_unbind_session);
+ INIT_WORK(&session->destroy_work, __iscsi_destroy_session);
INIT_WORK(&session->scan_work, iscsi_scan_session);
spin_lock_init(&session->lock);

@@ -3538,7 +3549,8 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group)
case ISCSI_UEVENT_DESTROY_SESSION:
session = iscsi_session_lookup(ev->u.d_session.sid);
if (session)
- transport->destroy_session(session);
+ scsi_queue_work(iscsi_session_to_shost(session),
+ &session->destroy_work);
else
err = -EINVAL;
break;
diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h
index 6183d20..bbdfed6 100644
--- a/include/scsi/scsi_transport_iscsi.h
+++ b/include/scsi/scsi_transport_iscsi.h
@@ -238,6 +238,7 @@ struct iscsi_cls_session {
struct work_struct unblock_work;
struct work_struct scan_work;
struct work_struct unbind_work;
+ struct work_struct destroy_work;

/* recovery fields */
int recovery_tmo;
--
1.8.3.1