FROMLIST: scsi: ufs: Fix a race condition related to device management commands

If a completion happens after wait_for_completion_timeout() times out
and before ufshcd_clear_cmds() is called then the completion code
may crash on the complete() call in __ufshcd_transfer_req_compl(). This
patch fixes the following crash:

Unable to handle kernel NULL pointer dereference at virtual address 0000000000000008
Call trace:
 complete+0x64/0x178
 __ufshcd_transfer_req_compl+0x30c/0x9c0
 ufshcd_poll+0xf0/0x208
 ufshcd_sl_intr+0xb8/0xf0
 ufshcd_intr+0x168/0x2f4
 __handle_irq_event_percpu+0xa0/0x30c
 handle_irq_event+0x84/0x178
 handle_fasteoi_irq+0x150/0x2e8
 __handle_domain_irq+0x114/0x1e4
 gic_handle_irq.31846+0x58/0x300
 el1_irq+0xe4/0x1c0
 efi_header_end+0x110/0x680
 __irq_exit_rcu+0x108/0x124
 __handle_domain_irq+0x118/0x1e4
 gic_handle_irq.31846+0x58/0x300
 el1_irq+0xe4/0x1c0
 cpuidle_enter_state+0x3ac/0x8c4
 do_idle+0x2fc/0x55c
 cpu_startup_entry+0x84/0x90
 kernel_init+0x0/0x310
 start_kernel+0x0/0x608
 start_kernel+0x4ec/0x608

Bug: 238156478
Link: https://lore.kernel.org/linux-scsi/20220713184008.2232094-1-bvanassche@acm.org/T/#u
Change-Id: Icf646e1e536e2d4918e5625574117f6f42e1ff7d
Signed-off-by: Bart Van Assche <bvanassche@google.com>
This commit is contained in:
Bart Van Assche
2022-07-08 15:06:54 -07:00
committed by Treehugger Robot
parent 561c270725
commit eb4344203d

View File

@@ -3016,37 +3016,58 @@ ufshcd_dev_cmd_completion(struct ufs_hba *hba, struct ufshcd_lrb *lrbp)
static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba,
struct ufshcd_lrb *lrbp, int max_timeout)
{
int err = 0;
unsigned long time_left;
unsigned long time_left = msecs_to_jiffies(max_timeout);
unsigned long flags;
bool pending;
int err;
retry:
time_left = wait_for_completion_timeout(hba->dev_cmd.complete,
msecs_to_jiffies(max_timeout));
time_left);
spin_lock_irqsave(hba->host->host_lock, flags);
hba->dev_cmd.complete = NULL;
if (likely(time_left)) {
/*
* The caller of this function still owns the @lrbp tag so the
* code below does not trigger any race conditions.
*/
hba->dev_cmd.complete = NULL;
err = ufshcd_get_tr_ocs(lrbp);
if (!err)
err = ufshcd_dev_cmd_completion(hba, lrbp);
}
spin_unlock_irqrestore(hba->host->host_lock, flags);
if (!time_left) {
} else {
err = -ETIMEDOUT;
dev_dbg(hba->dev, "%s: dev_cmd request timedout, tag %d\n",
__func__, lrbp->task_tag);
if (!ufshcd_clear_cmds(hba, 1U << lrbp->task_tag))
if (!ufshcd_clear_cmds(hba, 1U << lrbp->task_tag)) {
/* successfully cleared the command, retry if needed */
err = -EAGAIN;
/*
* in case of an error, after clearing the doorbell,
* we also need to clear the outstanding_request
* field in hba
*/
spin_lock_irqsave(&hba->outstanding_lock, flags);
__clear_bit(lrbp->task_tag, &hba->outstanding_reqs);
spin_unlock_irqrestore(&hba->outstanding_lock, flags);
/*
* Since clearing the command succeeded we also need to
* clear the task tag bit from the outstanding_reqs
* variable.
*/
spin_lock_irqsave(&hba->outstanding_lock, flags);
pending = test_bit(lrbp->task_tag,
&hba->outstanding_reqs);
if (pending) {
hba->dev_cmd.complete = NULL;
__clear_bit(lrbp->task_tag,
&hba->outstanding_reqs);
}
spin_unlock_irqrestore(&hba->outstanding_lock, flags);
if (!pending) {
/*
* A race occurred between this function and the
* completion handler.
*/
time_left = 1;
goto retry;
}
} else {
dev_err(hba->dev, "%s: failed to clear tag %d\n", __func__,
lrbp->task_tag);
}
}
return err;