From ce4626131112e1d0066a890371e14d8091323f99 Mon Sep 17 00:00:00 2001 From: Tony Nguyen Date: Mon, 22 Aug 2022 11:56:54 -0700 Subject: [PATCH 1/5] ice: Allow operation with reduced device MSI-X The driver currently takes an all or nothing approach for device MSI-X vectors. Meaning if it does not get its full allocation, it will fail and not load. There is no reason it can't work with a reduced number of MSI-X vectors. Take a similar approach as commit 741106f7bd8d ("ice: Improve MSI-X fallback logic") and, instead, adjust the MSI-X request to make use of what is available. Signed-off-by: Tony Nguyen Tested-by: Petr Oros Tested-by: Gurucharan (A Contingent worker at Intel) --- drivers/net/ethernet/intel/ice/ice_main.c | 177 ++++++++++++---------- 1 file changed, 96 insertions(+), 81 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 14edf7614406..9d031271cfda 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3921,88 +3921,135 @@ static int ice_init_pf(struct ice_pf *pf) return 0; } +/** + * ice_reduce_msix_usage - Reduce usage of MSI-X vectors + * @pf: board private structure + * @v_remain: number of remaining MSI-X vectors to be distributed + * + * Reduce the usage of MSI-X vectors when entire request cannot be fulfilled. + * pf->num_lan_msix and pf->num_rdma_msix values are set based on number of + * remaining vectors. + */ +static void ice_reduce_msix_usage(struct ice_pf *pf, int v_remain) +{ + int v_rdma; + + if (!ice_is_rdma_ena(pf)) { + pf->num_lan_msix = v_remain; + return; + } + + /* RDMA needs at least 1 interrupt in addition to AEQ MSIX */ + v_rdma = ICE_RDMA_NUM_AEQ_MSIX + 1; + + if (v_remain < ICE_MIN_LAN_TXRX_MSIX + ICE_MIN_RDMA_MSIX) { + dev_warn(ice_pf_to_dev(pf), "Not enough MSI-X vectors to support RDMA.\n"); + clear_bit(ICE_FLAG_RDMA_ENA, pf->flags); + + pf->num_rdma_msix = 0; + pf->num_lan_msix = ICE_MIN_LAN_TXRX_MSIX; + } else if ((v_remain < ICE_MIN_LAN_TXRX_MSIX + v_rdma) || + (v_remain - v_rdma < v_rdma)) { + /* Support minimum RDMA and give remaining vectors to LAN MSIX */ + pf->num_rdma_msix = ICE_MIN_RDMA_MSIX; + pf->num_lan_msix = v_remain - ICE_MIN_RDMA_MSIX; + } else { + /* Split remaining MSIX with RDMA after accounting for AEQ MSIX + */ + pf->num_rdma_msix = (v_remain - ICE_RDMA_NUM_AEQ_MSIX) / 2 + + ICE_RDMA_NUM_AEQ_MSIX; + pf->num_lan_msix = v_remain - pf->num_rdma_msix; + } +} + /** * ice_ena_msix_range - Request a range of MSIX vectors from the OS * @pf: board private structure * - * compute the number of MSIX vectors required (v_budget) and request from - * the OS. Return the number of vectors reserved or negative on failure + * Compute the number of MSIX vectors wanted and request from the OS. Adjust + * device usage if there are not enough vectors. Return the number of vectors + * reserved or negative on failure. */ static int ice_ena_msix_range(struct ice_pf *pf) { - int num_cpus, v_left, v_actual, v_other, v_budget = 0; + int num_cpus, hw_num_msix, v_other, v_wanted, v_actual; struct device *dev = ice_pf_to_dev(pf); - int needed, err, i; + int err, i; - v_left = pf->hw.func_caps.common_cap.num_msix_vectors; + hw_num_msix = pf->hw.func_caps.common_cap.num_msix_vectors; num_cpus = num_online_cpus(); - /* reserve for LAN miscellaneous handler */ - needed = ICE_MIN_LAN_OICR_MSIX; - if (v_left < needed) - goto no_hw_vecs_left_err; - v_budget += needed; - v_left -= needed; + /* LAN miscellaneous handler */ + v_other = ICE_MIN_LAN_OICR_MSIX; - /* reserve for flow director */ - if (test_bit(ICE_FLAG_FD_ENA, pf->flags)) { - needed = ICE_FDIR_MSIX; - if (v_left < needed) - goto no_hw_vecs_left_err; - v_budget += needed; - v_left -= needed; - } + /* Flow Director */ + if (test_bit(ICE_FLAG_FD_ENA, pf->flags)) + v_other += ICE_FDIR_MSIX; - /* reserve for switchdev */ - needed = ICE_ESWITCH_MSIX; - if (v_left < needed) - goto no_hw_vecs_left_err; - v_budget += needed; - v_left -= needed; + /* switchdev */ + v_other += ICE_ESWITCH_MSIX; - /* total used for non-traffic vectors */ - v_other = v_budget; + v_wanted = v_other; - /* reserve vectors for LAN traffic */ - needed = num_cpus; - if (v_left < needed) - goto no_hw_vecs_left_err; - pf->num_lan_msix = needed; - v_budget += needed; - v_left -= needed; + /* LAN traffic */ + pf->num_lan_msix = num_cpus; + v_wanted += pf->num_lan_msix; - /* reserve vectors for RDMA auxiliary driver */ + /* RDMA auxiliary driver */ if (ice_is_rdma_ena(pf)) { - needed = num_cpus + ICE_RDMA_NUM_AEQ_MSIX; - if (v_left < needed) - goto no_hw_vecs_left_err; - pf->num_rdma_msix = needed; - v_budget += needed; - v_left -= needed; + pf->num_rdma_msix = num_cpus + ICE_RDMA_NUM_AEQ_MSIX; + v_wanted += pf->num_rdma_msix; } - pf->msix_entries = devm_kcalloc(dev, v_budget, + if (v_wanted > hw_num_msix) { + int v_remain; + + dev_warn(dev, "not enough device MSI-X vectors. wanted = %d, available = %d\n", + v_wanted, hw_num_msix); + + if (hw_num_msix < ICE_MIN_MSIX) { + err = -ERANGE; + goto exit_err; + } + + v_remain = hw_num_msix - v_other; + if (v_remain < ICE_MIN_LAN_TXRX_MSIX) { + v_other = ICE_MIN_MSIX - ICE_MIN_LAN_TXRX_MSIX; + v_remain = ICE_MIN_LAN_TXRX_MSIX; + } + + ice_reduce_msix_usage(pf, v_remain); + v_wanted = pf->num_lan_msix + pf->num_rdma_msix + v_other; + + dev_notice(dev, "Reducing request to %d MSI-X vectors for LAN traffic.\n", + pf->num_lan_msix); + if (ice_is_rdma_ena(pf)) + dev_notice(dev, "Reducing request to %d MSI-X vectors for RDMA.\n", + pf->num_rdma_msix); + } + + pf->msix_entries = devm_kcalloc(dev, v_wanted, sizeof(*pf->msix_entries), GFP_KERNEL); if (!pf->msix_entries) { err = -ENOMEM; goto exit_err; } - for (i = 0; i < v_budget; i++) + for (i = 0; i < v_wanted; i++) pf->msix_entries[i].entry = i; /* actually reserve the vectors */ v_actual = pci_enable_msix_range(pf->pdev, pf->msix_entries, - ICE_MIN_MSIX, v_budget); + ICE_MIN_MSIX, v_wanted); if (v_actual < 0) { dev_err(dev, "unable to reserve MSI-X vectors\n"); err = v_actual; goto msix_err; } - if (v_actual < v_budget) { + if (v_actual < v_wanted) { dev_warn(dev, "not enough OS MSI-X vectors. requested = %d, obtained = %d\n", - v_budget, v_actual); + v_wanted, v_actual); if (v_actual < ICE_MIN_MSIX) { /* error if we can't get minimum vectors */ @@ -4011,38 +4058,11 @@ static int ice_ena_msix_range(struct ice_pf *pf) goto msix_err; } else { int v_remain = v_actual - v_other; - int v_rdma = 0, v_min_rdma = 0; - if (ice_is_rdma_ena(pf)) { - /* Need at least 1 interrupt in addition to - * AEQ MSIX - */ - v_rdma = ICE_RDMA_NUM_AEQ_MSIX + 1; - v_min_rdma = ICE_MIN_RDMA_MSIX; - } + if (v_remain < ICE_MIN_LAN_TXRX_MSIX) + v_remain = ICE_MIN_LAN_TXRX_MSIX; - if (v_actual == ICE_MIN_MSIX || - v_remain < ICE_MIN_LAN_TXRX_MSIX + v_min_rdma) { - dev_warn(dev, "Not enough MSI-X vectors to support RDMA.\n"); - clear_bit(ICE_FLAG_RDMA_ENA, pf->flags); - - pf->num_rdma_msix = 0; - pf->num_lan_msix = ICE_MIN_LAN_TXRX_MSIX; - } else if ((v_remain < ICE_MIN_LAN_TXRX_MSIX + v_rdma) || - (v_remain - v_rdma < v_rdma)) { - /* Support minimum RDMA and give remaining - * vectors to LAN MSIX - */ - pf->num_rdma_msix = v_min_rdma; - pf->num_lan_msix = v_remain - v_min_rdma; - } else { - /* Split remaining MSIX with RDMA after - * accounting for AEQ MSIX - */ - pf->num_rdma_msix = (v_remain - ICE_RDMA_NUM_AEQ_MSIX) / 2 + - ICE_RDMA_NUM_AEQ_MSIX; - pf->num_lan_msix = v_remain - pf->num_rdma_msix; - } + ice_reduce_msix_usage(pf, v_remain); dev_notice(dev, "Enabled %d MSI-X vectors for LAN traffic.\n", pf->num_lan_msix); @@ -4057,12 +4077,7 @@ static int ice_ena_msix_range(struct ice_pf *pf) msix_err: devm_kfree(dev, pf->msix_entries); - goto exit_err; -no_hw_vecs_left_err: - dev_err(dev, "not enough device MSI-X vectors. requested = %d, available = %d\n", - needed, v_left); - err = -ERANGE; exit_err: pf->num_rdma_msix = 0; pf->num_lan_msix = 0; From 0b57e0d44299113a59697fc66714d5b3f14615b7 Mon Sep 17 00:00:00 2001 From: Michal Michalik Date: Tue, 23 Aug 2022 13:56:26 +0200 Subject: [PATCH 2/5] ice: Check if reset in progress while waiting for offsets Occasionally while waiting to valid offsets from hardware we get reset. Add check for reset before proceeding to execute scheduled work. Co-developed-by: Karol Kolacinski Signed-off-by: Karol Kolacinski Signed-off-by: Michal Michalik Tested-by: Gurucharan (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_ptp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index 5a2fd4d690f3..26020f3f0a43 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -1242,6 +1242,9 @@ static void ice_ptp_wait_for_offset_valid(struct kthread_work *work) hw = &pf->hw; dev = ice_pf_to_dev(pf); + if (ice_is_reset_in_progress(pf->state)) + return; + if (ice_ptp_check_offset_valid(port)) { /* Offsets not ready yet, try again later */ kthread_queue_delayed_work(pf->ptp.kworker, From 1bd50f2deb19c13834f9f6b2f3c3263e38a47e1a Mon Sep 17 00:00:00 2001 From: Paul Greenwalt Date: Wed, 24 Aug 2022 13:28:40 -0700 Subject: [PATCH 3/5] ice: add helper function to check FW API version Several functions in ice_common.c check the firmware API version to see if the current API version meets some minimum requirement. Improve the readability of these checks by introducing ice_is_fw_api_min_ver, a helper function to perform that check. Signed-off-by: Paul Greenwalt Signed-off-by: Jacob Keller Tested-by: Gurucharan (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_common.c | 63 +++++++++++---------- 1 file changed, 32 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 40e4c286649c..bec770e34f39 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -5285,6 +5285,29 @@ ice_aq_get_gpio(struct ice_hw *hw, u16 gpio_ctrl_handle, u8 pin_idx, return 0; } +/** + * ice_is_fw_api_min_ver + * @hw: pointer to the hardware structure + * @maj: major version + * @min: minor version + * @patch: patch version + * + * Checks if the firmware API is minimum version + */ +static bool ice_is_fw_api_min_ver(struct ice_hw *hw, u8 maj, u8 min, u8 patch) +{ + if (hw->api_maj_ver == maj) { + if (hw->api_min_ver > min) + return true; + if (hw->api_min_ver == min && hw->api_patch >= patch) + return true; + } else if (hw->api_maj_ver > maj) { + return true; + } + + return false; +} + /** * ice_fw_supports_link_override * @hw: pointer to the hardware structure @@ -5293,17 +5316,9 @@ ice_aq_get_gpio(struct ice_hw *hw, u16 gpio_ctrl_handle, u8 pin_idx, */ bool ice_fw_supports_link_override(struct ice_hw *hw) { - if (hw->api_maj_ver == ICE_FW_API_LINK_OVERRIDE_MAJ) { - if (hw->api_min_ver > ICE_FW_API_LINK_OVERRIDE_MIN) - return true; - if (hw->api_min_ver == ICE_FW_API_LINK_OVERRIDE_MIN && - hw->api_patch >= ICE_FW_API_LINK_OVERRIDE_PATCH) - return true; - } else if (hw->api_maj_ver > ICE_FW_API_LINK_OVERRIDE_MAJ) { - return true; - } - - return false; + return ice_is_fw_api_min_ver(hw, ICE_FW_API_LINK_OVERRIDE_MAJ, + ICE_FW_API_LINK_OVERRIDE_MIN, + ICE_FW_API_LINK_OVERRIDE_PATCH); } /** @@ -5436,16 +5451,9 @@ bool ice_fw_supports_lldp_fltr_ctrl(struct ice_hw *hw) if (hw->mac_type != ICE_MAC_E810) return false; - if (hw->api_maj_ver == ICE_FW_API_LLDP_FLTR_MAJ) { - if (hw->api_min_ver > ICE_FW_API_LLDP_FLTR_MIN) - return true; - if (hw->api_min_ver == ICE_FW_API_LLDP_FLTR_MIN && - hw->api_patch >= ICE_FW_API_LLDP_FLTR_PATCH) - return true; - } else if (hw->api_maj_ver > ICE_FW_API_LLDP_FLTR_MAJ) { - return true; - } - return false; + return ice_is_fw_api_min_ver(hw, ICE_FW_API_LLDP_FLTR_MAJ, + ICE_FW_API_LLDP_FLTR_MIN, + ICE_FW_API_LLDP_FLTR_PATCH); } /** @@ -5482,14 +5490,7 @@ ice_lldp_fltr_add_remove(struct ice_hw *hw, u16 vsi_num, bool add) */ bool ice_fw_supports_report_dflt_cfg(struct ice_hw *hw) { - if (hw->api_maj_ver == ICE_FW_API_REPORT_DFLT_CFG_MAJ) { - if (hw->api_min_ver > ICE_FW_API_REPORT_DFLT_CFG_MIN) - return true; - if (hw->api_min_ver == ICE_FW_API_REPORT_DFLT_CFG_MIN && - hw->api_patch >= ICE_FW_API_REPORT_DFLT_CFG_PATCH) - return true; - } else if (hw->api_maj_ver > ICE_FW_API_REPORT_DFLT_CFG_MAJ) { - return true; - } - return false; + return ice_is_fw_api_min_ver(hw, ICE_FW_API_REPORT_DFLT_CFG_MAJ, + ICE_FW_API_REPORT_DFLT_CFG_MIN, + ICE_FW_API_REPORT_DFLT_CFG_PATCH); } From 1b9e740dd733d1db4e790e94a4e5021ad17d92f7 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 4 Sep 2022 16:18:02 +0200 Subject: [PATCH 4/5] ice: switch: Simplify memory allocation 'rbuf' is locale to the ice_get_initial_sw_cfg() function. There is no point in using devm_kzalloc()/devm_kfree(). use kzalloc()/kfree() instead. Signed-off-by: Christophe JAILLET Reviewed-by: Michal Swiatkowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_switch.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 697feb89188c..eb6e19deb70d 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -2274,9 +2274,7 @@ int ice_get_initial_sw_cfg(struct ice_hw *hw) int status; u16 i; - rbuf = devm_kzalloc(ice_hw_to_dev(hw), ICE_SW_CFG_MAX_BUF_LEN, - GFP_KERNEL); - + rbuf = kzalloc(ICE_SW_CFG_MAX_BUF_LEN, GFP_KERNEL); if (!rbuf) return -ENOMEM; @@ -2324,7 +2322,7 @@ int ice_get_initial_sw_cfg(struct ice_hw *hw) } } while (req_desc && !status); - devm_kfree(ice_hw_to_dev(hw), rbuf); + kfree(rbuf); return status; } From 04cbaa6c08e3974760c7ac5a70256d736444f6f0 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 4 Sep 2022 16:22:46 +0200 Subject: [PATCH 5/5] ice: Simplify memory allocation in ice_sched_init_port() 'buf' is locale to the ice_sched_init_port() function. There is no point in using devm_kzalloc()/devm_kfree(). use kzalloc()/kfree() instead. Signed-off-by: Christophe JAILLET Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_sched.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c index 7947223536e3..118595763bba 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.c +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -1212,7 +1212,7 @@ int ice_sched_init_port(struct ice_port_info *pi) hw = pi->hw; /* Query the Default Topology from FW */ - buf = devm_kzalloc(ice_hw_to_dev(hw), ICE_AQ_MAX_BUF_LEN, GFP_KERNEL); + buf = kzalloc(ICE_AQ_MAX_BUF_LEN, GFP_KERNEL); if (!buf) return -ENOMEM; @@ -1290,7 +1290,7 @@ err_init_port: pi->root = NULL; } - devm_kfree(ice_hw_to_dev(hw), buf); + kfree(buf); return status; }