From 50c6b20eff8e10cb91f06262d8003a5d9be3dfab Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 14 Nov 2019 13:02:40 +0100 Subject: [PATCH 1/8] net/smc: fix final cleanup sequence for SMCD devices If peer announces shutdown, use the link group terminate worker for local cleanup of link groups and connections to terminate link group in proper context. Make sure link groups are cleaned up first before destroying the event queue of the SMCD device, because link group cleanup may raise events. Send signal shutdown only if peer has not done it already. Send socket abort or close only, if peer has not already announced shutdown. Signed-off-by: Ursula Braun Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/smc_cdc.c | 3 +++ net/smc/smc_core.c | 20 ++++++++++++-------- net/smc/smc_core.h | 2 ++ net/smc/smc_ism.c | 7 +++++-- 4 files changed, 22 insertions(+), 10 deletions(-) diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index 7dc07ec2379b..164f1584861b 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -131,6 +131,9 @@ int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn) { int rc; + if (!conn->lgr || (conn->lgr->is_smcd && conn->lgr->peer_shutdown)) + return -EPIPE; + if (conn->lgr->is_smcd) { spin_lock_bh(&conn->send_lock); rc = smcd_cdc_msg_send(conn); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 0d92456729ab..561f069b30de 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -275,6 +275,7 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) lgr->smcd = ini->ism_dev; lgr_list = &ini->ism_dev->lgr_list; lgr_lock = &lgr->smcd->lgr_lock; + lgr->peer_shutdown = 0; } else { /* SMC-R specific settings */ get_device(&ini->ib_dev->ibdev->dev); @@ -514,11 +515,16 @@ static void smc_conn_kill(struct smc_connection *conn) { struct smc_sock *smc = container_of(conn, struct smc_sock, conn); - smc_close_abort(conn); + if (conn->lgr->is_smcd && conn->lgr->peer_shutdown) + conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; + else + smc_close_abort(conn); conn->killed = 1; - smc_sk_wake_ups(smc); - smc_lgr_unregister_conn(conn); smc->sk.sk_err = ECONNABORTED; + smc_sk_wake_ups(smc); + if (conn->lgr->is_smcd) + tasklet_kill(&conn->rx_tsklet); + smc_lgr_unregister_conn(conn); smc_close_active_abort(smc); } @@ -604,6 +610,8 @@ void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan) list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) { if ((!peer_gid || lgr->peer_gid == peer_gid) && (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) { + if (peer_gid) /* peer triggered termination */ + lgr->peer_shutdown = 1; list_move(&lgr->list, &lgr_free_list); } } @@ -612,11 +620,7 @@ void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan) /* cancel the regular free workers and actually free lgrs */ list_for_each_entry_safe(lgr, l, &lgr_free_list, list) { list_del_init(&lgr->list); - __smc_lgr_terminate(lgr); - cancel_delayed_work_sync(&lgr->free_work); - if (!peer_gid && vlan == VLAN_VID_MASK) /* dev terminated? */ - smc_ism_signal_shutdown(lgr); - smc_lgr_free(lgr); + schedule_work(&lgr->terminate_work); } } diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index e6fd1ed42064..097ceba86caf 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -228,6 +228,8 @@ struct smc_link_group { /* Peer GID (remote) */ struct smcd_dev *smcd; /* ISM device for VLAN reg. */ + u8 peer_shutdown : 1; + /* peer triggered shutdownn */ }; }; }; diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c index ee7340898cb4..18946e95a3be 100644 --- a/net/smc/smc_ism.c +++ b/net/smc/smc_ism.c @@ -226,6 +226,9 @@ int smc_ism_signal_shutdown(struct smc_link_group *lgr) int rc; union smcd_sw_event_info ev_info; + if (lgr->peer_shutdown) + return 0; + memcpy(ev_info.uid, lgr->id, SMC_LGR_ID_SIZE); ev_info.vlan_id = lgr->vlan_id; ev_info.code = ISM_EVENT_REQUEST; @@ -313,12 +316,12 @@ EXPORT_SYMBOL_GPL(smcd_register_dev); void smcd_unregister_dev(struct smcd_dev *smcd) { spin_lock(&smcd_dev_list.lock); - list_del(&smcd->list); + list_del_init(&smcd->list); spin_unlock(&smcd_dev_list.lock); smcd->going_away = 1; + smc_smcd_terminate(smcd, 0, VLAN_VID_MASK); flush_workqueue(smcd->event_wq); destroy_workqueue(smcd->event_wq); - smc_smcd_terminate(smcd, 0, VLAN_VID_MASK); device_del(&smcd->dev); } From 42bfba9eaa33dd4af0b50b87508062a41ec26653 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 14 Nov 2019 13:02:41 +0100 Subject: [PATCH 2/8] net/smc: immediate termination for SMCD link groups SMCD link group termination is called when peer signals its shutdown of its corresponding link group. For regular shutdowns no connections exist anymore. For abnormal shutdowns connections must be killed and their DMBs must be unregistered immediately. That means the SMCR method to delay the link group freeing several seconds does not fit. This patch adds immediate termination of a link group and its SMCD connections and makes sure all SMCD link group related cleanup steps are finished. Signed-off-by: Ursula Braun Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- drivers/s390/net/ism.h | 2 -- include/net/smc.h | 2 ++ net/smc/smc_close.c | 25 +++++++++++++++++------ net/smc/smc_core.c | 46 +++++++++++++++++++++++++++++++++++------- net/smc/smc_ism.c | 14 +++++++++++-- 5 files changed, 72 insertions(+), 17 deletions(-) diff --git a/drivers/s390/net/ism.h b/drivers/s390/net/ism.h index 66eac2b9704d..1901e9c80ed8 100644 --- a/drivers/s390/net/ism.h +++ b/drivers/s390/net/ism.h @@ -32,8 +32,6 @@ #define ISM_UNREG_SBA 0x11 #define ISM_UNREG_IEQ 0x12 -#define ISM_ERROR 0xFFFF - struct ism_req_hdr { u32 cmd; u16 : 16; diff --git a/include/net/smc.h b/include/net/smc.h index 05174ae4f325..7c2082341bb3 100644 --- a/include/net/smc.h +++ b/include/net/smc.h @@ -37,6 +37,8 @@ struct smcd_dmb { #define ISM_EVENT_GID 1 #define ISM_EVENT_SWR 2 +#define ISM_ERROR 0xFFFF + struct smcd_event { u32 type; u32 code; diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index d34e5adce2eb..d205b2114006 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -110,6 +110,17 @@ int smc_close_abort(struct smc_connection *conn) return smc_cdc_get_slot_and_msg_send(conn); } +static void smc_close_cancel_work(struct smc_sock *smc) +{ + struct sock *sk = &smc->sk; + + release_sock(sk); + cancel_work_sync(&smc->conn.close_work); + cancel_delayed_work_sync(&smc->conn.tx_work); + lock_sock(sk); + sk->sk_state = SMC_CLOSED; +} + /* terminate smc socket abnormally - active abort * link group is terminated, i.e. RDMA communication no longer possible */ @@ -126,23 +137,21 @@ void smc_close_active_abort(struct smc_sock *smc) switch (sk->sk_state) { case SMC_ACTIVE: sk->sk_state = SMC_PEERABORTWAIT; - release_sock(sk); - cancel_delayed_work_sync(&smc->conn.tx_work); - lock_sock(sk); + smc_close_cancel_work(smc); sk->sk_state = SMC_CLOSED; sock_put(sk); /* passive closing */ break; case SMC_APPCLOSEWAIT1: case SMC_APPCLOSEWAIT2: - release_sock(sk); - cancel_delayed_work_sync(&smc->conn.tx_work); - lock_sock(sk); + smc_close_cancel_work(smc); sk->sk_state = SMC_CLOSED; sock_put(sk); /* postponed passive closing */ break; case SMC_PEERCLOSEWAIT1: case SMC_PEERCLOSEWAIT2: case SMC_PEERFINCLOSEWAIT: + sk->sk_state = SMC_PEERABORTWAIT; + smc_close_cancel_work(smc); sk->sk_state = SMC_CLOSED; smc_conn_free(&smc->conn); release_clcsock = true; @@ -150,7 +159,11 @@ void smc_close_active_abort(struct smc_sock *smc) break; case SMC_PROCESSABORT: case SMC_APPFINCLOSEWAIT: + sk->sk_state = SMC_PEERABORTWAIT; + smc_close_cancel_work(smc); sk->sk_state = SMC_CLOSED; + smc_conn_free(&smc->conn); + release_clcsock = true; break; case SMC_INIT: case SMC_PEERABORTWAIT: diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 561f069b30de..9d6da2c7413d 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -214,7 +214,7 @@ static void smc_lgr_free_work(struct work_struct *work) if (!lgr->is_smcd && lnk->state != SMC_LNK_INACTIVE) smc_llc_link_inactive(lnk); - if (lgr->is_smcd) + if (lgr->is_smcd && !lgr->terminating) smc_ism_signal_shutdown(lgr); smc_lgr_free(lgr); } @@ -381,7 +381,8 @@ void smc_conn_free(struct smc_connection *conn) if (!lgr) return; if (lgr->is_smcd) { - smc_ism_unset_conn(conn); + if (!list_empty(&lgr->list)) + smc_ism_unset_conn(conn); tasklet_kill(&conn->rx_tsklet); } else { smc_cdc_tx_dismiss_slots(conn); @@ -481,8 +482,10 @@ static void smc_lgr_free(struct smc_link_group *lgr) { smc_lgr_free_bufs(lgr); if (lgr->is_smcd) { - smc_ism_put_vlan(lgr->smcd, lgr->vlan_id); - put_device(&lgr->smcd->dev); + if (!lgr->terminating) { + smc_ism_put_vlan(lgr->smcd, lgr->vlan_id); + put_device(&lgr->smcd->dev); + } } else { smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]); put_device(&lgr->lnk[SMC_SINGLE_LINK].smcibdev->ibdev->dev); @@ -503,6 +506,20 @@ void smc_lgr_forget(struct smc_link_group *lgr) spin_unlock_bh(lgr_lock); } +static void smcd_unregister_all_dmbs(struct smc_link_group *lgr) +{ + int i; + + for (i = 0; i < SMC_RMBE_SIZES; i++) { + struct smc_buf_desc *buf_desc; + + list_for_each_entry(buf_desc, &lgr->rmbs[i], list) { + buf_desc->len += sizeof(struct smcd_cdc_msg); + smc_ism_unregister_dmb(lgr->smcd, buf_desc); + } + } +} + static void smc_sk_wake_ups(struct smc_sock *smc) { smc->sk.sk_write_space(&smc->sk); @@ -522,12 +539,28 @@ static void smc_conn_kill(struct smc_connection *conn) conn->killed = 1; smc->sk.sk_err = ECONNABORTED; smc_sk_wake_ups(smc); - if (conn->lgr->is_smcd) + if (conn->lgr->is_smcd) { + smc_ism_unset_conn(conn); tasklet_kill(&conn->rx_tsklet); + } smc_lgr_unregister_conn(conn); smc_close_active_abort(smc); } +static void smc_lgr_cleanup(struct smc_link_group *lgr) +{ + if (lgr->is_smcd) { + smc_ism_signal_shutdown(lgr); + smcd_unregister_all_dmbs(lgr); + smc_ism_put_vlan(lgr->smcd, lgr->vlan_id); + put_device(&lgr->smcd->dev); + } else { + struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; + + wake_up(&lnk->wr_reg_wait); + } +} + /* terminate link group */ static void __smc_lgr_terminate(struct smc_link_group *lgr) { @@ -557,8 +590,7 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr) node = rb_first(&lgr->conns_all); } read_unlock_bh(&lgr->conns_lock); - if (!lgr->is_smcd) - wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait); + smc_lgr_cleanup(lgr); smc_lgr_schedule_free_work_fast(lgr); } diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c index 18946e95a3be..903da947b20d 100644 --- a/net/smc/smc_ism.c +++ b/net/smc/smc_ism.c @@ -146,6 +146,10 @@ out: int smc_ism_unregister_dmb(struct smcd_dev *smcd, struct smc_buf_desc *dmb_desc) { struct smcd_dmb dmb; + int rc = 0; + + if (!dmb_desc->dma_addr) + return rc; memset(&dmb, 0, sizeof(dmb)); dmb.dmb_tok = dmb_desc->token; @@ -153,7 +157,13 @@ int smc_ism_unregister_dmb(struct smcd_dev *smcd, struct smc_buf_desc *dmb_desc) dmb.cpu_addr = dmb_desc->cpu_addr; dmb.dma_addr = dmb_desc->dma_addr; dmb.dmb_len = dmb_desc->len; - return smcd->ops->unregister_dmb(smcd, &dmb); + rc = smcd->ops->unregister_dmb(smcd, &dmb); + if (!rc || rc == ISM_ERROR) { + dmb_desc->cpu_addr = NULL; + dmb_desc->dma_addr = 0; + } + + return rc; } int smc_ism_register_dmb(struct smc_link_group *lgr, int dmb_len, @@ -375,7 +385,7 @@ void smcd_handle_irq(struct smcd_dev *smcd, unsigned int dmbno) spin_lock_irqsave(&smcd->lock, flags); conn = smcd->conn[dmbno]; - if (conn) + if (conn && !conn->killed) tasklet_schedule(&conn->rx_tsklet); spin_unlock_irqrestore(&smcd->lock, flags); } From 5421ec281df9dfda4418c02959e1f76097cabd9a Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 14 Nov 2019 13:02:42 +0100 Subject: [PATCH 3/8] net/smc: abnormal termination of SMCD link groups A final cleanup due to SMCD device removal means immediate freeing of all link groups belonging to this device in interrupt context. This patch introduces a separate SMCD link group termination routine, which terminates all link groups of an SMCD device. This new routine smcd_terminate_all ()is reused if the smc module is unloaded. Signed-off-by: Ursula Braun Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/smc_clc.c | 2 +- net/smc/smc_core.c | 67 +++++++++++++++++++++++++++++++++++----------- net/smc/smc_core.h | 3 ++- net/smc/smc_ism.c | 2 +- net/smc/smc_llc.c | 2 +- net/smc/smc_tx.c | 2 +- 6 files changed, 57 insertions(+), 21 deletions(-) diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 49bcebff6378..0879f7bed967 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -349,7 +349,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, smc->peer_diagnosis = ntohl(dclc->peer_diagnosis); if (((struct smc_clc_msg_decline *)buf)->hdr.flag) { smc->conn.lgr->sync_err = 1; - smc_lgr_terminate(smc->conn.lgr); + smc_lgr_terminate(smc->conn.lgr, true); } } diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 9d6da2c7413d..d79dd78c1cd8 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -224,7 +224,7 @@ static void smc_lgr_terminate_work(struct work_struct *work) struct smc_link_group *lgr = container_of(work, struct smc_link_group, terminate_work); - smc_lgr_terminate(lgr); + smc_lgr_terminate(lgr, true); } /* create a new SMC link group */ @@ -528,7 +528,7 @@ static void smc_sk_wake_ups(struct smc_sock *smc) } /* kill a connection */ -static void smc_conn_kill(struct smc_connection *conn) +static void smc_conn_kill(struct smc_connection *conn, bool soft) { struct smc_sock *smc = container_of(conn, struct smc_sock, conn); @@ -541,7 +541,10 @@ static void smc_conn_kill(struct smc_connection *conn) smc_sk_wake_ups(smc); if (conn->lgr->is_smcd) { smc_ism_unset_conn(conn); - tasklet_kill(&conn->rx_tsklet); + if (soft) + tasklet_kill(&conn->rx_tsklet); + else + tasklet_unlock_wait(&conn->rx_tsklet); } smc_lgr_unregister_conn(conn); smc_close_active_abort(smc); @@ -562,7 +565,7 @@ static void smc_lgr_cleanup(struct smc_link_group *lgr) } /* terminate link group */ -static void __smc_lgr_terminate(struct smc_link_group *lgr) +static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft) { struct smc_connection *conn; struct smc_sock *smc; @@ -570,6 +573,8 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr) if (lgr->terminating) return; /* lgr already terminating */ + if (!soft) + cancel_delayed_work_sync(&lgr->free_work); lgr->terminating = 1; if (!lgr->is_smcd) smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]); @@ -583,7 +588,7 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr) smc = container_of(conn, struct smc_sock, conn); sock_hold(&smc->sk); /* sock_put below */ lock_sock(&smc->sk); - smc_conn_kill(conn); + smc_conn_kill(conn, soft); release_sock(&smc->sk); sock_put(&smc->sk); /* sock_hold above */ read_lock_bh(&lgr->conns_lock); @@ -591,11 +596,17 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr) } read_unlock_bh(&lgr->conns_lock); smc_lgr_cleanup(lgr); - smc_lgr_schedule_free_work_fast(lgr); + if (soft) + smc_lgr_schedule_free_work_fast(lgr); + else + smc_lgr_free(lgr); } -/* unlink and terminate link group */ -void smc_lgr_terminate(struct smc_link_group *lgr) +/* unlink and terminate link group + * @soft: true if link group shutdown can take its time + * false if immediate link group shutdown is required + */ +void smc_lgr_terminate(struct smc_link_group *lgr, bool soft) { spinlock_t *lgr_lock; @@ -605,9 +616,11 @@ void smc_lgr_terminate(struct smc_link_group *lgr) spin_unlock_bh(lgr_lock); return; /* lgr already terminating */ } + if (!soft) + lgr->freeing = 1; list_del_init(&lgr->list); spin_unlock_bh(lgr_lock); - __smc_lgr_terminate(lgr); + __smc_lgr_terminate(lgr, soft); } /* Called when IB port is terminated */ @@ -627,11 +640,11 @@ void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport) list_for_each_entry_safe(lgr, l, &lgr_free_list, list) { list_del_init(&lgr->list); - __smc_lgr_terminate(lgr); + __smc_lgr_terminate(lgr, true); } } -/* Called when SMC-D device is terminated or peer is lost */ +/* Called when peer lgr shutdown (regularly or abnormally) is received */ void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan) { struct smc_link_group *lgr, *l; @@ -656,6 +669,24 @@ void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan) } } +/* Called when an SMCD device is removed or the smc module is unloaded */ +void smc_smcd_terminate_all(struct smcd_dev *smcd) +{ + struct smc_link_group *lgr, *lg; + LIST_HEAD(lgr_free_list); + + spin_lock_bh(&smcd->lgr_lock); + list_splice_init(&smcd->lgr_list, &lgr_free_list); + list_for_each_entry(lgr, &lgr_free_list, list) + lgr->freeing = 1; + spin_unlock_bh(&smcd->lgr_lock); + + list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) { + list_del_init(&lgr->list); + __smc_lgr_terminate(lgr, false); + } +} + /* Determine vlan of internal TCP socket. * @vlan_id: address to store the determined vlan id into */ @@ -1173,8 +1204,8 @@ static void smc_core_going_away(void) spin_unlock(&smcd_dev_list.lock); } -/* Called (from smc_exit) when module is removed */ -void smc_core_exit(void) +/* Clean up all SMC link groups */ +static void smc_lgrs_shutdown(void) { struct smc_link_group *lgr, *lg; LIST_HEAD(lgr_freeing_list); @@ -1188,7 +1219,7 @@ void smc_core_exit(void) spin_lock(&smcd_dev_list.lock); list_for_each_entry(smcd, &smcd_dev_list.list, list) - list_splice_init(&smcd->lgr_list, &lgr_freeing_list); + smc_smcd_terminate_all(smcd); spin_unlock(&smcd_dev_list.lock); list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) { @@ -1202,8 +1233,12 @@ void smc_core_exit(void) smc_llc_link_inactive(lnk); } cancel_delayed_work_sync(&lgr->free_work); - if (lgr->is_smcd) - smc_ism_signal_shutdown(lgr); smc_lgr_free(lgr); /* free link group */ } } + +/* Called (from smc_exit) when module is removed */ +void smc_core_exit(void) +{ + smc_lgrs_shutdown(); +} diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 097ceba86caf..7f34f4d5a514 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -296,10 +296,11 @@ struct smc_clc_msg_accept_confirm; struct smc_clc_msg_local; void smc_lgr_forget(struct smc_link_group *lgr); -void smc_lgr_terminate(struct smc_link_group *lgr); +void smc_lgr_terminate(struct smc_link_group *lgr, bool soft); void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport); void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan); +void smc_smcd_terminate_all(struct smcd_dev *dev); int smc_buf_create(struct smc_sock *smc, bool is_smcd); int smc_uncompress_bufsize(u8 compressed); int smc_rmb_rtoken_handling(struct smc_connection *conn, diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c index 903da947b20d..56cdab8be1fa 100644 --- a/net/smc/smc_ism.c +++ b/net/smc/smc_ism.c @@ -329,7 +329,7 @@ void smcd_unregister_dev(struct smcd_dev *smcd) list_del_init(&smcd->list); spin_unlock(&smcd_dev_list.lock); smcd->going_away = 1; - smc_smcd_terminate(smcd, 0, VLAN_VID_MASK); + smc_smcd_terminate_all(smcd); flush_workqueue(smcd->event_wq); destroy_workqueue(smcd->event_wq); diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index e1918ffaf125..26a18c872455 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -614,7 +614,7 @@ static void smc_llc_testlink_work(struct work_struct *work) rc = wait_for_completion_interruptible_timeout(&link->llc_testlink_resp, SMC_LLC_WAIT_TIME); if (rc <= 0) { - smc_lgr_terminate(smc_get_lgr(link)); + smc_lgr_terminate(smc_get_lgr(link), true); return; } next_interval = link->llc_testlink_time; diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index 824f096ee7de..0d42e7716b91 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -284,7 +284,7 @@ static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset, rdma_wr->rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey; rc = ib_post_send(link->roce_qp, &rdma_wr->wr, NULL); if (rc) - smc_lgr_terminate(lgr); + smc_lgr_terminate(lgr, true); return rc; } From 5edd6b9cb8d7c6c346c93c52a53735591127e879 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 14 Nov 2019 13:02:43 +0100 Subject: [PATCH 4/8] net/smc: introduce bookkeeping of SMCD link groups If the ism module is unloaded return control from exit routine only, if all link groups are freed. If an IB device is thrown away return control from device removal only, if all link groups belonging to this device are freed. A counters for the total number of SMCD link groups per ISM device is introduced. ism module unloading continues only if the total number of SMCD link groups for all ISM devices is zero. ISM device removal continues only it the total number of SMCD link groups per ISM device has decreased to zero. Signed-off-by: Ursula Braun Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- include/net/smc.h | 2 ++ net/smc/smc_core.c | 6 ++++++ net/smc/smc_ism.c | 1 + 3 files changed, 9 insertions(+) diff --git a/include/net/smc.h b/include/net/smc.h index 7c2082341bb3..646feb4bc75f 100644 --- a/include/net/smc.h +++ b/include/net/smc.h @@ -79,6 +79,8 @@ struct smcd_dev { bool pnetid_by_user; struct list_head lgr_list; spinlock_t lgr_lock; + atomic_t lgr_cnt; + wait_queue_head_t lgrs_deleted; u8 going_away : 1; }; diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index d79dd78c1cd8..30854acb846c 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -276,6 +276,7 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) lgr_list = &ini->ism_dev->lgr_list; lgr_lock = &lgr->smcd->lgr_lock; lgr->peer_shutdown = 0; + atomic_inc(&ini->ism_dev->lgr_cnt); } else { /* SMC-R specific settings */ get_device(&ini->ib_dev->ibdev->dev); @@ -486,6 +487,8 @@ static void smc_lgr_free(struct smc_link_group *lgr) smc_ism_put_vlan(lgr->smcd, lgr->vlan_id); put_device(&lgr->smcd->dev); } + if (!atomic_dec_return(&lgr->smcd->lgr_cnt)) + wake_up(&lgr->smcd->lgrs_deleted); } else { smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]); put_device(&lgr->lnk[SMC_SINGLE_LINK].smcibdev->ibdev->dev); @@ -685,6 +688,9 @@ void smc_smcd_terminate_all(struct smcd_dev *smcd) list_del_init(&lgr->list); __smc_lgr_terminate(lgr, false); } + + if (atomic_read(&smcd->lgr_cnt)) + wait_event(smcd->lgrs_deleted, !atomic_read(&smcd->lgr_cnt)); } /* Determine vlan of internal TCP socket. diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c index 56cdab8be1fa..5c4727d5066e 100644 --- a/net/smc/smc_ism.c +++ b/net/smc/smc_ism.c @@ -302,6 +302,7 @@ struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name, spin_lock_init(&smcd->lgr_lock); INIT_LIST_HEAD(&smcd->vlan); INIT_LIST_HEAD(&smcd->lgr_list); + init_waitqueue_head(&smcd->lgrs_deleted); smcd->event_wq = alloc_ordered_workqueue("ism_evt_wq-%s)", WQ_MEM_RECLAIM, name); if (!smcd->event_wq) { From 15e1b99aadfb2766f9379a23a0fc1d4336c8cd8e Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 14 Nov 2019 13:02:44 +0100 Subject: [PATCH 5/8] net/smc: no WR buffer wait for terminating link group Avoid waiting for a free work request buffer, if the link group is already terminating. Signed-off-by: Ursula Braun Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/smc_llc.c | 3 +++ net/smc/smc_wr.c | 10 ++++++---- net/smc/smc_wr.h | 10 ++++++++++ 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 26a18c872455..8d1b076021ed 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -656,6 +656,7 @@ void smc_llc_link_active(struct smc_link *link, int testlink_time) void smc_llc_link_deleting(struct smc_link *link) { link->state = SMC_LNK_DELETING; + smc_wr_wakeup_tx_wait(link); } /* called in tasklet context */ @@ -663,6 +664,8 @@ void smc_llc_link_inactive(struct smc_link *link) { link->state = SMC_LNK_INACTIVE; cancel_delayed_work(&link->llc_testlink_wrk); + smc_wr_wakeup_reg_wait(link); + smc_wr_wakeup_tx_wait(link); } /* called in worker context */ diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index 50743dc56c86..619dd89fbac0 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -75,7 +75,7 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc) link->wr_reg_state = FAILED; else link->wr_reg_state = CONFIRMED; - wake_up(&link->wr_reg_wait); + smc_wr_wakeup_reg_wait(link); return; } @@ -171,6 +171,7 @@ int smc_wr_tx_get_free_slot(struct smc_link *link, struct smc_rdma_wr **wr_rdma_buf, struct smc_wr_tx_pend_priv **wr_pend_priv) { + struct smc_link_group *lgr = smc_get_lgr(link); struct smc_wr_tx_pend *wr_pend; u32 idx = link->wr_tx_cnt; struct ib_send_wr *wr_ib; @@ -179,19 +180,20 @@ int smc_wr_tx_get_free_slot(struct smc_link *link, *wr_buf = NULL; *wr_pend_priv = NULL; - if (in_softirq()) { + if (in_softirq() || lgr->terminating) { rc = smc_wr_tx_get_free_slot_index(link, &idx); if (rc) return rc; } else { - rc = wait_event_timeout( + rc = wait_event_interruptible_timeout( link->wr_tx_wait, link->state == SMC_LNK_INACTIVE || + lgr->terminating || (smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY), SMC_WR_TX_WAIT_FREE_SLOT_TIME); if (!rc) { /* timeout - terminate connections */ - smc_lgr_terminate_sched(smc_get_lgr(link)); + smc_lgr_terminate_sched(lgr); return -EPIPE; } if (idx == link->wr_tx_cnt) diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h index 09bf32fd3959..3ac99c898418 100644 --- a/net/smc/smc_wr.h +++ b/net/smc/smc_wr.h @@ -60,6 +60,16 @@ static inline void smc_wr_tx_set_wr_id(atomic_long_t *wr_tx_id, long val) atomic_long_set(wr_tx_id, val); } +static inline void smc_wr_wakeup_tx_wait(struct smc_link *lnk) +{ + wake_up_all(&lnk->wr_tx_wait); +} + +static inline void smc_wr_wakeup_reg_wait(struct smc_link *lnk) +{ + wake_up(&lnk->wr_reg_wait); +} + /* post a new receive work request to fill a completed old work request entry */ static inline int smc_wr_rx_post(struct smc_link *link) { From 2c1d3e50302fe3e0bd6873323877c2ad19db3f49 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 14 Nov 2019 13:02:45 +0100 Subject: [PATCH 6/8] net/smc: abnormal termination without orderly flag For abnormal termination issue an LLC DELETE_LINK without the orderly flag. Signed-off-by: Ursula Braun Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/smc_core.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 30854acb846c..ee44e8244d0c 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -161,10 +161,10 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn) * of the DELETE LINK sequence from server; or as server to * initiate the delete processing. See smc_llc_rx_delete_link(). */ -static int smc_link_send_delete(struct smc_link *lnk) +static int smc_link_send_delete(struct smc_link *lnk, bool orderly) { if (lnk->state == SMC_LNK_ACTIVE && - !smc_llc_send_delete_link(lnk, SMC_LLC_REQ, true)) { + !smc_llc_send_delete_link(lnk, SMC_LLC_REQ, orderly)) { smc_llc_link_deleting(lnk); return 0; } @@ -201,7 +201,7 @@ static void smc_lgr_free_work(struct work_struct *work) if (!lgr->is_smcd && !lgr->terminating) { /* try to send del link msg, on error free lgr immediately */ if (lnk->state == SMC_LNK_ACTIVE && - !smc_link_send_delete(lnk)) { + !smc_link_send_delete(lnk, true)) { /* reschedule in case we never receive a response */ smc_lgr_schedule_free_work(lgr); spin_unlock_bh(lgr_lock); @@ -1233,9 +1233,7 @@ static void smc_lgrs_shutdown(void) if (!lgr->is_smcd) { struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; - if (lnk->state == SMC_LNK_ACTIVE) - smc_llc_send_delete_link(lnk, SMC_LLC_REQ, - false); + smc_link_send_delete(&lgr->lnk[SMC_SINGLE_LINK], false); smc_llc_link_inactive(lnk); } cancel_delayed_work_sync(&lgr->free_work); From 6a37ad3da5d64a632d03a8dc272c65e706cc7160 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 14 Nov 2019 13:02:46 +0100 Subject: [PATCH 7/8] net/smc: wait for tx completions before link freeing Make sure all pending work requests are completed before freeing a link. Dismiss tx pending slots already when terminating a link group to exploit termination shortcut in tx completion queue handler. And kill the completion queue tasklets after destroy of the completion queues, otherwise there is a time window for another tasklet schedule of an already killed tasklet. Signed-off-by: Ursula Braun Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/smc_core.c | 2 ++ net/smc/smc_ib.c | 2 +- net/smc/smc_wr.c | 27 +++++++++++++++++++++++++-- 3 files changed, 28 insertions(+), 3 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index ee44e8244d0c..0755bd4b587c 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -548,6 +548,8 @@ static void smc_conn_kill(struct smc_connection *conn, bool soft) tasklet_kill(&conn->rx_tsklet); else tasklet_unlock_wait(&conn->rx_tsklet); + } else { + smc_cdc_tx_dismiss_slots(conn); } smc_lgr_unregister_conn(conn); smc_close_active_abort(smc); diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index af05daeb0538..c15dcd08dc74 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -520,9 +520,9 @@ static void smc_ib_cleanup_per_ibdev(struct smc_ib_device *smcibdev) if (!smcibdev->initialized) return; smcibdev->initialized = 0; - smc_wr_remove_dev(smcibdev); ib_destroy_cq(smcibdev->roce_cq_recv); ib_destroy_cq(smcibdev->roce_cq_send); + smc_wr_remove_dev(smcibdev); } static struct ib_client smc_ib_client; diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index 619dd89fbac0..337ee52ad3d3 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -50,6 +50,26 @@ struct smc_wr_tx_pend { /* control data for a pending send request */ /*------------------------------- completion --------------------------------*/ +/* returns true if at least one tx work request is pending on the given link */ +static inline bool smc_wr_is_tx_pend(struct smc_link *link) +{ + if (find_first_bit(link->wr_tx_mask, link->wr_tx_cnt) != + link->wr_tx_cnt) { + return true; + } + return false; +} + +/* wait till all pending tx work requests on the given link are completed */ +static inline int smc_wr_tx_wait_no_pending_sends(struct smc_link *link) +{ + if (wait_event_timeout(link->wr_tx_wait, !smc_wr_is_tx_pend(link), + SMC_WR_TX_WAIT_PENDING_TIME)) + return 0; + else /* timeout */ + return -EPIPE; +} + static inline int smc_wr_tx_find_pending_index(struct smc_link *link, u64 wr_id) { u32 i; @@ -229,6 +249,7 @@ int smc_wr_tx_put_slot(struct smc_link *link, memset(&link->wr_tx_bufs[idx], 0, sizeof(link->wr_tx_bufs[idx])); test_and_clear_bit(idx, link->wr_tx_mask); + wake_up(&link->wr_tx_wait); return 1; } @@ -512,8 +533,10 @@ void smc_wr_free_link(struct smc_link *lnk) { struct ib_device *ibdev; - memset(lnk->wr_tx_mask, 0, - BITS_TO_LONGS(SMC_WR_BUF_CNT) * sizeof(*lnk->wr_tx_mask)); + if (smc_wr_tx_wait_no_pending_sends(lnk)) + memset(lnk->wr_tx_mask, 0, + BITS_TO_LONGS(SMC_WR_BUF_CNT) * + sizeof(*lnk->wr_tx_mask)); if (!lnk->smcibdev) return; From 0b29ec6436138721acf5844e558f7334a0fa61d5 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 14 Nov 2019 13:02:47 +0100 Subject: [PATCH 8/8] net/smc: immediate termination for SMCR link groups If the SMC module is unloaded or an IB device is thrown away, the immediate link group freeing introduced for SMCD is exploited for SMCR as well. That means SMCR-specifics are added to smc_conn_kill(). Signed-off-by: Ursula Braun Signed-off-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/smc_core.c | 58 +++++++++++++++++++++++++++++++--------------- net/smc/smc_core.h | 3 ++- net/smc/smc_ib.c | 3 ++- net/smc/smc_llc.c | 4 +++- 4 files changed, 46 insertions(+), 22 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 0755bd4b587c..97e9d21c4d1e 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -566,6 +566,10 @@ static void smc_lgr_cleanup(struct smc_link_group *lgr) struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; wake_up(&lnk->wr_reg_wait); + if (lnk->state != SMC_LNK_INACTIVE) { + smc_link_send_delete(lnk, false); + smc_llc_link_inactive(lnk); + } } } @@ -638,14 +642,16 @@ void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport) list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) { if (!lgr->is_smcd && lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev && - lgr->lnk[SMC_SINGLE_LINK].ibport == ibport) + lgr->lnk[SMC_SINGLE_LINK].ibport == ibport) { list_move(&lgr->list, &lgr_free_list); + lgr->freeing = 1; + } } spin_unlock_bh(&smc_lgr_list.lock); list_for_each_entry_safe(lgr, l, &lgr_free_list, list) { list_del_init(&lgr->list); - __smc_lgr_terminate(lgr, true); + __smc_lgr_terminate(lgr, false); } } @@ -695,6 +701,36 @@ void smc_smcd_terminate_all(struct smcd_dev *smcd) wait_event(smcd->lgrs_deleted, !atomic_read(&smcd->lgr_cnt)); } +/* Called when an SMCR device is removed or the smc module is unloaded. + * If smcibdev is given, all SMCR link groups using this device are terminated. + * If smcibdev is NULL, all SMCR link groups are terminated. + */ +void smc_smcr_terminate_all(struct smc_ib_device *smcibdev) +{ + struct smc_link_group *lgr, *lg; + LIST_HEAD(lgr_free_list); + + spin_lock_bh(&smc_lgr_list.lock); + if (!smcibdev) { + list_splice_init(&smc_lgr_list.list, &lgr_free_list); + list_for_each_entry(lgr, &lgr_free_list, list) + lgr->freeing = 1; + } else { + list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) { + if (lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev) { + list_move(&lgr->list, &lgr_free_list); + lgr->freeing = 1; + } + } + } + spin_unlock_bh(&smc_lgr_list.lock); + + list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) { + list_del_init(&lgr->list); + __smc_lgr_terminate(lgr, false); + } +} + /* Determine vlan of internal TCP socket. * @vlan_id: address to store the determined vlan id into */ @@ -1215,32 +1251,16 @@ static void smc_core_going_away(void) /* Clean up all SMC link groups */ static void smc_lgrs_shutdown(void) { - struct smc_link_group *lgr, *lg; - LIST_HEAD(lgr_freeing_list); struct smcd_dev *smcd; smc_core_going_away(); - spin_lock_bh(&smc_lgr_list.lock); - list_splice_init(&smc_lgr_list.list, &lgr_freeing_list); - spin_unlock_bh(&smc_lgr_list.lock); + smc_smcr_terminate_all(NULL); spin_lock(&smcd_dev_list.lock); list_for_each_entry(smcd, &smcd_dev_list.list, list) smc_smcd_terminate_all(smcd); spin_unlock(&smcd_dev_list.lock); - - list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) { - list_del_init(&lgr->list); - if (!lgr->is_smcd) { - struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; - - smc_link_send_delete(&lgr->lnk[SMC_SINGLE_LINK], false); - smc_llc_link_inactive(lnk); - } - cancel_delayed_work_sync(&lgr->free_work); - smc_lgr_free(lgr); /* free link group */ - } } /* Called (from smc_exit) when module is removed */ diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 7f34f4d5a514..a428db6cd2e2 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -287,7 +287,7 @@ static inline struct smc_connection *smc_lgr_find_conn( static inline void smc_lgr_terminate_sched(struct smc_link_group *lgr) { - if (!lgr->terminating) + if (!lgr->terminating && !lgr->freeing) schedule_work(&lgr->terminate_work); } @@ -301,6 +301,7 @@ void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport); void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan); void smc_smcd_terminate_all(struct smcd_dev *dev); +void smc_smcr_terminate_all(struct smc_ib_device *smcibdev); int smc_buf_create(struct smc_sock *smc, bool is_smcd); int smc_uncompress_bufsize(u8 compressed); int smc_rmb_rtoken_handling(struct smc_connection *conn, diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index c15dcd08dc74..0ab122e66328 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -565,7 +565,7 @@ static void smc_ib_add_dev(struct ib_device *ibdev) schedule_work(&smcibdev->port_event_work); } -/* callback function for ib_register_client() */ +/* callback function for ib_unregister_client() */ static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data) { struct smc_ib_device *smcibdev; @@ -575,6 +575,7 @@ static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data) spin_lock(&smc_ib_devices.lock); list_del_init(&smcibdev->list); /* remove from smc_ib_devices */ spin_unlock(&smc_ib_devices.lock); + smc_smcr_terminate_all(smcibdev); smc_ib_cleanup_per_ibdev(smcibdev); ib_unregister_event_handler(&smcibdev->event_handler); kfree(smcibdev); diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 8d1b076021ed..a9f6431dd69a 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -698,9 +698,11 @@ int smc_llc_do_confirm_rkey(struct smc_link *link, int smc_llc_do_delete_rkey(struct smc_link *link, struct smc_buf_desc *rmb_desc) { - int rc; + int rc = 0; mutex_lock(&link->llc_delete_rkey_mutex); + if (link->state != SMC_LNK_ACTIVE) + goto out; reinit_completion(&link->llc_delete_rkey); rc = smc_llc_send_delete_rkey(link, rmb_desc); if (rc)