mirror of
https://github.com/hardkernel/linux.git
synced 2026-06-05 10:31:46 +09:00
scsi: lpfc: Abort outstanding ELS cmds when mailbox timeout error is detected
[ Upstream commit 089ea22e37 ]
A mailbox timeout error usually indicates something has gone wrong, and a
follow up reset of the HBA is a typical recovery mechanism. Introduce a
MBX_TMO_ERR flag to detect such cases and have lpfc_els_flush_cmd abort ELS
commands if the MBX_TMO_ERR flag condition was set. This ensures all of
the registered SGL resources meant for ELS traffic are not leaked after an
HBA reset.
Signed-off-by: Justin Tee <justin.tee@broadcom.com>
Link: https://lore.kernel.org/r/20230712180522.112722-9-justintee8345@gmail.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
committed by
Greg Kroah-Hartman
parent
dfcd3c0102
commit
b2a019ec8b
@@ -895,6 +895,7 @@ enum lpfc_irq_chann_mode {
|
|||||||
enum lpfc_hba_bit_flags {
|
enum lpfc_hba_bit_flags {
|
||||||
FABRIC_COMANDS_BLOCKED,
|
FABRIC_COMANDS_BLOCKED,
|
||||||
HBA_PCI_ERR,
|
HBA_PCI_ERR,
|
||||||
|
MBX_TMO_ERR,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct lpfc_hba {
|
struct lpfc_hba {
|
||||||
|
|||||||
@@ -9410,11 +9410,13 @@ void
|
|||||||
lpfc_els_flush_cmd(struct lpfc_vport *vport)
|
lpfc_els_flush_cmd(struct lpfc_vport *vport)
|
||||||
{
|
{
|
||||||
LIST_HEAD(abort_list);
|
LIST_HEAD(abort_list);
|
||||||
|
LIST_HEAD(cancel_list);
|
||||||
struct lpfc_hba *phba = vport->phba;
|
struct lpfc_hba *phba = vport->phba;
|
||||||
struct lpfc_sli_ring *pring;
|
struct lpfc_sli_ring *pring;
|
||||||
struct lpfc_iocbq *tmp_iocb, *piocb;
|
struct lpfc_iocbq *tmp_iocb, *piocb;
|
||||||
u32 ulp_command;
|
u32 ulp_command;
|
||||||
unsigned long iflags = 0;
|
unsigned long iflags = 0;
|
||||||
|
bool mbx_tmo_err;
|
||||||
|
|
||||||
lpfc_fabric_abort_vport(vport);
|
lpfc_fabric_abort_vport(vport);
|
||||||
|
|
||||||
@@ -9436,15 +9438,16 @@ lpfc_els_flush_cmd(struct lpfc_vport *vport)
|
|||||||
if (phba->sli_rev == LPFC_SLI_REV4)
|
if (phba->sli_rev == LPFC_SLI_REV4)
|
||||||
spin_lock(&pring->ring_lock);
|
spin_lock(&pring->ring_lock);
|
||||||
|
|
||||||
|
mbx_tmo_err = test_bit(MBX_TMO_ERR, &phba->bit_flags);
|
||||||
/* First we need to issue aborts to outstanding cmds on txcmpl */
|
/* First we need to issue aborts to outstanding cmds on txcmpl */
|
||||||
list_for_each_entry_safe(piocb, tmp_iocb, &pring->txcmplq, list) {
|
list_for_each_entry_safe(piocb, tmp_iocb, &pring->txcmplq, list) {
|
||||||
if (piocb->cmd_flag & LPFC_IO_LIBDFC)
|
if (piocb->cmd_flag & LPFC_IO_LIBDFC && !mbx_tmo_err)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (piocb->vport != vport)
|
if (piocb->vport != vport)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (piocb->cmd_flag & LPFC_DRIVER_ABORTED)
|
if (piocb->cmd_flag & LPFC_DRIVER_ABORTED && !mbx_tmo_err)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
/* On the ELS ring we can have ELS_REQUESTs or
|
/* On the ELS ring we can have ELS_REQUESTs or
|
||||||
@@ -9463,8 +9466,8 @@ lpfc_els_flush_cmd(struct lpfc_vport *vport)
|
|||||||
*/
|
*/
|
||||||
if (phba->link_state == LPFC_LINK_DOWN)
|
if (phba->link_state == LPFC_LINK_DOWN)
|
||||||
piocb->cmd_cmpl = lpfc_cmpl_els_link_down;
|
piocb->cmd_cmpl = lpfc_cmpl_els_link_down;
|
||||||
}
|
} else if (ulp_command == CMD_GEN_REQUEST64_CR ||
|
||||||
if (ulp_command == CMD_GEN_REQUEST64_CR)
|
mbx_tmo_err)
|
||||||
list_add_tail(&piocb->dlist, &abort_list);
|
list_add_tail(&piocb->dlist, &abort_list);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -9476,11 +9479,19 @@ lpfc_els_flush_cmd(struct lpfc_vport *vport)
|
|||||||
list_for_each_entry_safe(piocb, tmp_iocb, &abort_list, dlist) {
|
list_for_each_entry_safe(piocb, tmp_iocb, &abort_list, dlist) {
|
||||||
spin_lock_irqsave(&phba->hbalock, iflags);
|
spin_lock_irqsave(&phba->hbalock, iflags);
|
||||||
list_del_init(&piocb->dlist);
|
list_del_init(&piocb->dlist);
|
||||||
lpfc_sli_issue_abort_iotag(phba, pring, piocb, NULL);
|
if (mbx_tmo_err)
|
||||||
|
list_move_tail(&piocb->list, &cancel_list);
|
||||||
|
else
|
||||||
|
lpfc_sli_issue_abort_iotag(phba, pring, piocb, NULL);
|
||||||
|
|
||||||
spin_unlock_irqrestore(&phba->hbalock, iflags);
|
spin_unlock_irqrestore(&phba->hbalock, iflags);
|
||||||
}
|
}
|
||||||
/* Make sure HBA is alive */
|
if (!list_empty(&cancel_list))
|
||||||
lpfc_issue_hb_tmo(phba);
|
lpfc_sli_cancel_iocbs(phba, &cancel_list, IOSTAT_LOCAL_REJECT,
|
||||||
|
IOERR_SLI_ABORTED);
|
||||||
|
else
|
||||||
|
/* Make sure HBA is alive */
|
||||||
|
lpfc_issue_hb_tmo(phba);
|
||||||
|
|
||||||
if (!list_empty(&abort_list))
|
if (!list_empty(&abort_list))
|
||||||
lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,
|
lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,
|
||||||
|
|||||||
@@ -7563,6 +7563,8 @@ lpfc_disable_pci_dev(struct lpfc_hba *phba)
|
|||||||
void
|
void
|
||||||
lpfc_reset_hba(struct lpfc_hba *phba)
|
lpfc_reset_hba(struct lpfc_hba *phba)
|
||||||
{
|
{
|
||||||
|
int rc = 0;
|
||||||
|
|
||||||
/* If resets are disabled then set error state and return. */
|
/* If resets are disabled then set error state and return. */
|
||||||
if (!phba->cfg_enable_hba_reset) {
|
if (!phba->cfg_enable_hba_reset) {
|
||||||
phba->link_state = LPFC_HBA_ERROR;
|
phba->link_state = LPFC_HBA_ERROR;
|
||||||
@@ -7573,13 +7575,25 @@ lpfc_reset_hba(struct lpfc_hba *phba)
|
|||||||
if (phba->sli.sli_flag & LPFC_SLI_ACTIVE) {
|
if (phba->sli.sli_flag & LPFC_SLI_ACTIVE) {
|
||||||
lpfc_offline_prep(phba, LPFC_MBX_WAIT);
|
lpfc_offline_prep(phba, LPFC_MBX_WAIT);
|
||||||
} else {
|
} else {
|
||||||
|
if (test_bit(MBX_TMO_ERR, &phba->bit_flags)) {
|
||||||
|
/* Perform a PCI function reset to start from clean */
|
||||||
|
rc = lpfc_pci_function_reset(phba);
|
||||||
|
lpfc_els_flush_all_cmd(phba);
|
||||||
|
}
|
||||||
lpfc_offline_prep(phba, LPFC_MBX_NO_WAIT);
|
lpfc_offline_prep(phba, LPFC_MBX_NO_WAIT);
|
||||||
lpfc_sli_flush_io_rings(phba);
|
lpfc_sli_flush_io_rings(phba);
|
||||||
}
|
}
|
||||||
lpfc_offline(phba);
|
lpfc_offline(phba);
|
||||||
lpfc_sli_brdrestart(phba);
|
clear_bit(MBX_TMO_ERR, &phba->bit_flags);
|
||||||
lpfc_online(phba);
|
if (unlikely(rc)) {
|
||||||
lpfc_unblock_mgmt_io(phba);
|
lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
|
||||||
|
"8888 PCI function reset failed rc %x\n",
|
||||||
|
rc);
|
||||||
|
} else {
|
||||||
|
lpfc_sli_brdrestart(phba);
|
||||||
|
lpfc_online(phba);
|
||||||
|
lpfc_unblock_mgmt_io(phba);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -3919,6 +3919,8 @@ void lpfc_poll_eratt(struct timer_list *t)
|
|||||||
uint64_t sli_intr, cnt;
|
uint64_t sli_intr, cnt;
|
||||||
|
|
||||||
phba = from_timer(phba, t, eratt_poll);
|
phba = from_timer(phba, t, eratt_poll);
|
||||||
|
if (!(phba->hba_flag & HBA_SETUP))
|
||||||
|
return;
|
||||||
|
|
||||||
/* Here we will also keep track of interrupts per sec of the hba */
|
/* Here we will also keep track of interrupts per sec of the hba */
|
||||||
sli_intr = phba->sli.slistat.sli_intr;
|
sli_intr = phba->sli.slistat.sli_intr;
|
||||||
@@ -7712,7 +7714,9 @@ lpfc_sli4_repost_sgl_list(struct lpfc_hba *phba,
|
|||||||
spin_unlock_irq(&phba->hbalock);
|
spin_unlock_irq(&phba->hbalock);
|
||||||
} else {
|
} else {
|
||||||
lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
|
lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
|
||||||
"3161 Failure to post sgl to port.\n");
|
"3161 Failure to post sgl to port,status %x "
|
||||||
|
"blkcnt %d totalcnt %d postcnt %d\n",
|
||||||
|
status, block_cnt, total_cnt, post_cnt);
|
||||||
return -EIO;
|
return -EIO;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -8495,6 +8499,7 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
|
|||||||
spin_unlock_irq(&phba->hbalock);
|
spin_unlock_irq(&phba->hbalock);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
phba->hba_flag &= ~HBA_SETUP;
|
||||||
|
|
||||||
lpfc_sli4_dip(phba);
|
lpfc_sli4_dip(phba);
|
||||||
|
|
||||||
@@ -9317,6 +9322,7 @@ lpfc_mbox_timeout_handler(struct lpfc_hba *phba)
|
|||||||
* would get IOCB_ERROR from lpfc_sli_issue_iocb, allowing
|
* would get IOCB_ERROR from lpfc_sli_issue_iocb, allowing
|
||||||
* it to fail all outstanding SCSI IO.
|
* it to fail all outstanding SCSI IO.
|
||||||
*/
|
*/
|
||||||
|
set_bit(MBX_TMO_ERR, &phba->bit_flags);
|
||||||
spin_lock_irq(&phba->pport->work_port_lock);
|
spin_lock_irq(&phba->pport->work_port_lock);
|
||||||
phba->pport->work_port_events &= ~WORKER_MBOX_TMO;
|
phba->pport->work_port_events &= ~WORKER_MBOX_TMO;
|
||||||
spin_unlock_irq(&phba->pport->work_port_lock);
|
spin_unlock_irq(&phba->pport->work_port_lock);
|
||||||
|
|||||||
Reference in New Issue
Block a user