From 8980864315ed2d54700648af0bf83e3735c6233f Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Tue, 18 Oct 2022 13:29:58 -0700
Subject: [PATCH] FROMGIT: scsi: ufs: Fix a deadlock between PM and the SCSI
 error handler

The following deadlock has been observed on multiple test setups:

 * ufshcd_wl_suspend() is waiting for blk_execute_rq(START STOP UNIT) to
   complete while ufshcd_wl_suspend() holds host_sem.

 * The SCSI error handler is activated, changes the host state to
   SHOST_RECOVERY, ufshcd_eh_host_reset_handler() and ufshcd_err_handler()
   are called and the latter function tries to obtain host_sem.

This is a deadlock because blk_execute_rq() can't execute SCSI commands
while the host is in the SHOST_RECOVERY state and because the error handler
cannot make progress because host_sem is held by another thread.

Fix this deadlock as follows:

 * Fail attempts to suspend the system while the SCSI error handler is in
   progress by setting the SCMD_FAIL_IF_RECOVERING flag for START STOP UNIT
   commands.

 * If the system is suspending and a START STOP UNIT command times out,
   handle the SCSI command timeout from inside the context of the SCSI
   timeout handler instead of activating the SCSI error handler.

The runtime power management code is not affected by this deadlock since
hba->host_sem is not touched by the runtime power management functions in
the UFS driver.

Reviewed-by: Adrian Hunter <adrian.hunter@intel.com>
Change-Id: I815c978c7857cc0f2c838d4d5eb68502a1591f8a
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20221018202958.1902564-11-bvanassche@acm.org
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Bug: 267974767
(cherry picked from commit 7029e2151a7c6a5c60b35996d026528e7d51aae3 git://git.kernel.org/pub/scm/linux/kernel/git/mkp/scsi.git for-next)
Signed-off-by: Bart Van Assche <bvanassche@google.com>
---
 drivers/ufs/core/ufshcd.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index 146e3327ac64..a1b94637cd94 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -8356,6 +8356,28 @@ out:
 	}
 }
 
+static enum scsi_timeout_action ufshcd_eh_timed_out(struct scsi_cmnd *scmd)
+{
+	struct ufs_hba *hba = shost_priv(scmd->device->host);
+
+	if (!hba->system_suspending) {
+		/* Activate the error handler in the SCSI core. */
+		return SCSI_EH_NOT_HANDLED;
+	}
+
+	/*
+	 * If we get here we know that no TMFs are outstanding and also that
+	 * the only pending command is a START STOP UNIT command. Handle the
+	 * timeout of that command directly to prevent a deadlock between
+	 * ufshcd_set_dev_pwr_mode() and ufshcd_err_handler().
+	 */
+	ufshcd_link_recovery(hba);
+	dev_info(hba->dev, "%s() finished; outstanding_tasks = %#lx.\n",
+		 __func__, hba->outstanding_tasks);
+
+	return hba->outstanding_reqs ? SCSI_EH_RESET_TIMER : SCSI_EH_DONE;
+}
+
 static const struct attribute_group *ufshcd_driver_groups[] = {
 	&ufs_sysfs_unit_descriptor_group,
 	&ufs_sysfs_lun_attributes_group,
@@ -8390,6 +8412,7 @@ static struct scsi_host_template ufshcd_driver_template = {
 	.eh_abort_handler	= ufshcd_abort,
 	.eh_device_reset_handler = ufshcd_eh_device_reset_handler,
 	.eh_host_reset_handler   = ufshcd_eh_host_reset_handler,
+	.eh_timed_out		= ufshcd_eh_timed_out,
 	.this_id		= -1,
 	.sg_tablesize		= SG_ALL,
 	.cmd_per_lun		= UFSHCD_CMD_PER_LUN,
@@ -8811,6 +8834,7 @@ static int ufshcd_execute_start_stop(struct scsi_device *sdev,
 	scmd->cmd_len = COMMAND_SIZE(cdb[0]);
 	memcpy(scmd->cmnd, cdb, scmd->cmd_len);
 	scmd->allowed = 0/*retries*/;
+	scmd->flags |= SCMD_FAIL_IF_RECOVERING;
 	req->timeout = 1 * HZ;
 	req->rq_flags |= RQF_PM | RQF_QUIET;